Merge branches 'sched/devel', 'sched/cpu-hotplug', 'sched/cpusets' and 'sched/urgent' into sched/core

commit: 990d0f2ced23052abc7efa09bd05bff34e00cf73 [log] [tgz]
author: Ingo Molnar <mingo@elte.hu> Wed Oct 08 11:31:02 2008 +0200
committer: Ingo Molnar <mingo@elte.hu> Wed Oct 08 11:31:02 2008 +0200
tree: df9f3fe5c0417102586087cec63e1d813a8f29cb
parent: 85ba94ba0592296053f7f2846812173424afe1cb [diff]
parent: 34b3ede2353604ec9861c1d900b2a835ff85de47 [diff]
parent: e545a6140b698b2494daf0b32107bdcc5e901390 [diff]
parent: d294eb83d8d39a29f01dad391f15fc3a29aa04f9 [diff]
diff --git a/Documentation/HOWTO b/Documentation/HOWTO
index c2371c5..48a3955 100644
--- a/Documentation/HOWTO
+++ b/Documentation/HOWTO

@@ -77,7 +77,8 @@
 When a kernel change causes the interface that the kernel exposes to
 userspace to change, it is recommended that you send the information or
 a patch to the manual pages explaining the change to the manual pages
-maintainer at mtk.manpages@gmail.com.
+maintainer at mtk.manpages@gmail.com, and CC the list
+linux-api@vger.kernel.org.
 
 Here is a list of files that are in the kernel source tree that are
 required reading:

diff --git a/Documentation/SubmitChecklist b/Documentation/SubmitChecklist
index da10e07..21f0795 100644
--- a/Documentation/SubmitChecklist
+++ b/Documentation/SubmitChecklist

@@ -67,6 +67,8 @@
 
 19: All new userspace interfaces are documented in Documentation/ABI/.
     See Documentation/ABI/README for more information.
+    Patches that change userspace interfaces should be CCed to
+    linux-api@vger.kernel.org.
 
 20: Check that it all passes `make headers_check'.
 

diff --git a/Documentation/kernel-doc-nano-HOWTO.txt b/Documentation/kernel-doc-nano-HOWTO.txt
index 0bd3274..c6841ee 100644
--- a/Documentation/kernel-doc-nano-HOWTO.txt
+++ b/Documentation/kernel-doc-nano-HOWTO.txt

@@ -168,10 +168,10 @@
 mkdir $ARGV[0],0777;
 $state = 0;
 while (<STDIN>) {
-    if (/^\.TH \"[^\"]*\" 4 \"([^\"]*)\"/) {
+    if (/^\.TH \"[^\"]*\" 9 \"([^\"]*)\"/) {
 	if ($state == 1) { close OUT }
 	$state = 1;
-	$fn = "$ARGV[0]/$1.4";
+	$fn = "$ARGV[0]/$1.9";
 	print STDERR "Creating $fn\n";
 	open OUT, ">$fn" or die "can't open $fn: $!\n";
 	print OUT $_;

diff --git a/Documentation/scheduler/sched-design-CFS.txt b/Documentation/scheduler/sched-design-CFS.txt
index 88bcb87..9d8eb55 100644
--- a/Documentation/scheduler/sched-design-CFS.txt
+++ b/Documentation/scheduler/sched-design-CFS.txt

@@ -1,151 +1,242 @@
-
-This is the CFS scheduler.
-
-80% of CFS's design can be summed up in a single sentence: CFS basically
-models an "ideal, precise multi-tasking CPU" on real hardware.
-
-"Ideal multi-tasking CPU" is a (non-existent  :-))  CPU that has 100%
-physical power and which can run each task at precise equal speed, in
-parallel, each at 1/nr_running speed. For example: if there are 2 tasks
-running then it runs each at 50% physical power - totally in parallel.
-
-On real hardware, we can run only a single task at once, so while that
-one task runs, the other tasks that are waiting for the CPU are at a
-disadvantage - the current task gets an unfair amount of CPU time. In
-CFS this fairness imbalance is expressed and tracked via the per-task
-p->wait_runtime (nanosec-unit) value. "wait_runtime" is the amount of
-time the task should now run on the CPU for it to become completely fair
-and balanced.
-
-( small detail: on 'ideal' hardware, the p->wait_runtime value would
-  always be zero - no task would ever get 'out of balance' from the
-  'ideal' share of CPU time. )
-
-CFS's task picking logic is based on this p->wait_runtime value and it
-is thus very simple: it always tries to run the task with the largest
-p->wait_runtime value. In other words, CFS tries to run the task with
-the 'gravest need' for more CPU time. So CFS always tries to split up
-CPU time between runnable tasks as close to 'ideal multitasking
-hardware' as possible.
-
-Most of the rest of CFS's design just falls out of this really simple
-concept, with a few add-on embellishments like nice levels,
-multiprocessing and various algorithm variants to recognize sleepers.
-
-In practice it works like this: the system runs a task a bit, and when
-the task schedules (or a scheduler tick happens) the task's CPU usage is
-'accounted for': the (small) time it just spent using the physical CPU
-is deducted from p->wait_runtime. [minus the 'fair share' it would have
-gotten anyway]. Once p->wait_runtime gets low enough so that another
-task becomes the 'leftmost task' of the time-ordered rbtree it maintains
-(plus a small amount of 'granularity' distance relative to the leftmost
-task so that we do not over-schedule tasks and trash the cache) then the
-new leftmost task is picked and the current task is preempted.
-
-The rq->fair_clock value tracks the 'CPU time a runnable task would have
-fairly gotten, had it been runnable during that time'. So by using
-rq->fair_clock values we can accurately timestamp and measure the
-'expected CPU time' a task should have gotten. All runnable tasks are
-sorted in the rbtree by the "rq->fair_clock - p->wait_runtime" key, and
-CFS picks the 'leftmost' task and sticks to it. As the system progresses
-forwards, newly woken tasks are put into the tree more and more to the
-right - slowly but surely giving a chance for every task to become the
-'leftmost task' and thus get on the CPU within a deterministic amount of
-time.
-
-Some implementation details:
-
- - the introduction of Scheduling Classes: an extensible hierarchy of
-   scheduler modules. These modules encapsulate scheduling policy
-   details and are handled by the scheduler core without the core
-   code assuming about them too much.
-
- - sched_fair.c implements the 'CFS desktop scheduler': it is a
-   replacement for the vanilla scheduler's SCHED_OTHER interactivity
-   code.
-
-   I'd like to give credit to Con Kolivas for the general approach here:
-   he has proven via RSDL/SD that 'fair scheduling' is possible and that
-   it results in better desktop scheduling. Kudos Con!
-
-   The CFS patch uses a completely different approach and implementation
-   from RSDL/SD. My goal was to make CFS's interactivity quality exceed
-   that of RSDL/SD, which is a high standard to meet :-) Testing
-   feedback is welcome to decide this one way or another. [ and, in any
-   case, all of SD's logic could be added via a kernel/sched_sd.c module
-   as well, if Con is interested in such an approach. ]
-
-   CFS's design is quite radical: it does not use runqueues, it uses a
-   time-ordered rbtree to build a 'timeline' of future task execution,
-   and thus has no 'array switch' artifacts (by which both the vanilla
-   scheduler and RSDL/SD are affected).
-
-   CFS uses nanosecond granularity accounting and does not rely on any
-   jiffies or other HZ detail. Thus the CFS scheduler has no notion of
-   'timeslices' and has no heuristics whatsoever. There is only one
-   central tunable (you have to switch on CONFIG_SCHED_DEBUG):
-
-         /proc/sys/kernel/sched_granularity_ns
-
-   which can be used to tune the scheduler from 'desktop' (low
-   latencies) to 'server' (good batching) workloads. It defaults to a
-   setting suitable for desktop workloads. SCHED_BATCH is handled by the
-   CFS scheduler module too.
-
-   Due to its design, the CFS scheduler is not prone to any of the
-   'attacks' that exist today against the heuristics of the stock
-   scheduler: fiftyp.c, thud.c, chew.c, ring-test.c, massive_intr.c all
-   work fine and do not impact interactivity and produce the expected
-   behavior.
-
-   the CFS scheduler has a much stronger handling of nice levels and
-   SCHED_BATCH: both types of workloads should be isolated much more
-   agressively than under the vanilla scheduler.
-
-   ( another detail: due to nanosec accounting and timeline sorting,
-     sched_yield() support is very simple under CFS, and in fact under
-     CFS sched_yield() behaves much better than under any other
-     scheduler i have tested so far. )
-
- - sched_rt.c implements SCHED_FIFO and SCHED_RR semantics, in a simpler
-   way than the vanilla scheduler does. It uses 100 runqueues (for all
-   100 RT priority levels, instead of 140 in the vanilla scheduler)
-   and it needs no expired array.
-
- - reworked/sanitized SMP load-balancing: the runqueue-walking
-   assumptions are gone from the load-balancing code now, and
-   iterators of the scheduling modules are used. The balancing code got
-   quite a bit simpler as a result.
+                      =============
+                      CFS Scheduler
+                      =============
 
 
-Group scheduler extension to CFS
-================================
+1.  OVERVIEW
 
-Normally the scheduler operates on individual tasks and strives to provide
-fair CPU time to each task. Sometimes, it may be desirable to group tasks
-and provide fair CPU time to each such task group. For example, it may
-be desirable to first provide fair CPU time to each user on the system
-and then to each task belonging to a user.
+CFS stands for "Completely Fair Scheduler," and is the new "desktop" process
+scheduler implemented by Ingo Molnar and merged in Linux 2.6.23.  It is the
+replacement for the previous vanilla scheduler's SCHED_OTHER interactivity
+code.
 
-CONFIG_FAIR_GROUP_SCHED strives to achieve exactly that. It lets
-SCHED_NORMAL/BATCH tasks be be grouped and divides CPU time fairly among such
-groups. At present, there are two (mutually exclusive) mechanisms to group
-tasks for CPU bandwidth control purpose:
+80% of CFS's design can be summed up in a single sentence: CFS basically models
+an "ideal, precise multi-tasking CPU" on real hardware.
 
-	- Based on user id (CONFIG_FAIR_USER_SCHED)
-		In this option, tasks are grouped according to their user id.
-	- Based on "cgroup" pseudo filesystem (CONFIG_FAIR_CGROUP_SCHED)
-		This options lets the administrator create arbitrary groups
-		of tasks, using the "cgroup" pseudo filesystem. See
-		Documentation/cgroups.txt for more information about this
-		filesystem.
+"Ideal multi-tasking CPU" is a (non-existent  :-)) CPU that has 100% physical
+power and which can run each task at precise equal speed, in parallel, each at
+1/nr_running speed.  For example: if there are 2 tasks running, then it runs
+each at 50% physical power --- i.e., actually in parallel.
+
+On real hardware, we can run only a single task at once, so we have to
+introduce the concept of "virtual runtime."  The virtual runtime of a task
+specifies when its next timeslice would start execution on the ideal
+multi-tasking CPU described above.  In practice, the virtual runtime of a task
+is its actual runtime normalized to the total number of running tasks.
+
+
+
+2.  FEW IMPLEMENTATION DETAILS
+
+In CFS the virtual runtime is expressed and tracked via the per-task
+p->se.vruntime (nanosec-unit) value.  This way, it's possible to accurately
+timestamp and measure the "expected CPU time" a task should have gotten.
+
+[ small detail: on "ideal" hardware, at any time all tasks would have the same
+  p->se.vruntime value --- i.e., tasks would execute simultaneously and no task
+  would ever get "out of balance" from the "ideal" share of CPU time.  ]
+
+CFS's task picking logic is based on this p->se.vruntime value and it is thus
+very simple: it always tries to run the task with the smallest p->se.vruntime
+value (i.e., the task which executed least so far).  CFS always tries to split
+up CPU time between runnable tasks as close to "ideal multitasking hardware" as
+possible.
+
+Most of the rest of CFS's design just falls out of this really simple concept,
+with a few add-on embellishments like nice levels, multiprocessing and various
+algorithm variants to recognize sleepers.
+
+
+
+3.  THE RBTREE
+
+CFS's design is quite radical: it does not use the old data structures for the
+runqueues, but it uses a time-ordered rbtree to build a "timeline" of future
+task execution, and thus has no "array switch" artifacts (by which both the
+previous vanilla scheduler and RSDL/SD are affected).
+
+CFS also maintains the rq->cfs.min_vruntime value, which is a monotonic
+increasing value tracking the smallest vruntime among all tasks in the
+runqueue.  The total amount of work done by the system is tracked using
+min_vruntime; that value is used to place newly activated entities on the left
+side of the tree as much as possible.
+
+The total number of running tasks in the runqueue is accounted through the
+rq->cfs.load value, which is the sum of the weights of the tasks queued on the
+runqueue.
+
+CFS maintains a time-ordered rbtree, where all runnable tasks are sorted by the
+p->se.vruntime key (there is a subtraction using rq->cfs.min_vruntime to
+account for possible wraparounds).  CFS picks the "leftmost" task from this
+tree and sticks to it.
+As the system progresses forwards, the executed tasks are put into the tree
+more and more to the right --- slowly but surely giving a chance for every task
+to become the "leftmost task" and thus get on the CPU within a deterministic
+amount of time.
+
+Summing up, CFS works like this: it runs a task a bit, and when the task
+schedules (or a scheduler tick happens) the task's CPU usage is "accounted
+for": the (small) time it just spent using the physical CPU is added to
+p->se.vruntime.  Once p->se.vruntime gets high enough so that another task
+becomes the "leftmost task" of the time-ordered rbtree it maintains (plus a
+small amount of "granularity" distance relative to the leftmost task so that we
+do not over-schedule tasks and trash the cache), then the new leftmost task is
+picked and the current task is preempted.
+
+
+
+4.  SOME FEATURES OF CFS
+
+CFS uses nanosecond granularity accounting and does not rely on any jiffies or
+other HZ detail.  Thus the CFS scheduler has no notion of "timeslices" in the
+way the previous scheduler had, and has no heuristics whatsoever.  There is
+only one central tunable (you have to switch on CONFIG_SCHED_DEBUG):
+
+   /proc/sys/kernel/sched_granularity_ns
+
+which can be used to tune the scheduler from "desktop" (i.e., low latencies) to
+"server" (i.e., good batching) workloads.  It defaults to a setting suitable
+for desktop workloads.  SCHED_BATCH is handled by the CFS scheduler module too.
+
+Due to its design, the CFS scheduler is not prone to any of the "attacks" that
+exist today against the heuristics of the stock scheduler: fiftyp.c, thud.c,
+chew.c, ring-test.c, massive_intr.c all work fine and do not impact
+interactivity and produce the expected behavior.
+
+The CFS scheduler has a much stronger handling of nice levels and SCHED_BATCH
+than the previous vanilla scheduler: both types of workloads are isolated much
+more aggressively.
+
+SMP load-balancing has been reworked/sanitized: the runqueue-walking
+assumptions are gone from the load-balancing code now, and iterators of the
+scheduling modules are used.  The balancing code got quite a bit simpler as a
+result.
+
+
+
+5. Scheduling policies
+
+CFS implements three scheduling policies:
+
+  - SCHED_NORMAL (traditionally called SCHED_OTHER): The scheduling
+    policy that is used for regular tasks.
+
+  - SCHED_BATCH: Does not preempt nearly as often as regular tasks
+    would, thereby allowing tasks to run longer and make better use of
+    caches but at the cost of interactivity. This is well suited for
+    batch jobs.
+
+  - SCHED_IDLE: This is even weaker than nice 19, but its not a true
+    idle timer scheduler in order to avoid to get into priority
+    inversion problems which would deadlock the machine.
+
+SCHED_FIFO/_RR are implemented in sched_rt.c and are as specified by
+POSIX.
+
+The command chrt from util-linux-ng 2.13.1.1 can set all of these except
+SCHED_IDLE.
+
+
+
+6.  SCHEDULING CLASSES
+
+The new CFS scheduler has been designed in such a way to introduce "Scheduling
+Classes," an extensible hierarchy of scheduler modules.  These modules
+encapsulate scheduling policy details and are handled by the scheduler core
+without the core code assuming too much about them.
+
+sched_fair.c implements the CFS scheduler described above.
+
+sched_rt.c implements SCHED_FIFO and SCHED_RR semantics, in a simpler way than
+the previous vanilla scheduler did.  It uses 100 runqueues (for all 100 RT
+priority levels, instead of 140 in the previous scheduler) and it needs no
+expired array.
+
+Scheduling classes are implemented through the sched_class structure, which
+contains hooks to functions that must be called whenever an interesting event
+occurs.
+
+This is the (partial) list of the hooks:
+
+ - enqueue_task(...)
+
+   Called when a task enters a runnable state.
+   It puts the scheduling entity (task) into the red-black tree and
+   increments the nr_running variable.
+
+ - dequeue_tree(...)
+
+   When a task is no longer runnable, this function is called to keep the
+   corresponding scheduling entity out of the red-black tree.  It decrements
+   the nr_running variable.
+
+ - yield_task(...)
+
+   This function is basically just a dequeue followed by an enqueue, unless the
+   compat_yield sysctl is turned on; in that case, it places the scheduling
+   entity at the right-most end of the red-black tree.
+
+ - check_preempt_curr(...)
+
+   This function checks if a task that entered the runnable state should
+   preempt the currently running task.
+
+ - pick_next_task(...)
+
+   This function chooses the most appropriate task eligible to run next.
+
+ - set_curr_task(...)
+
+   This function is called when a task changes its scheduling class or changes
+   its task group.
+
+ - task_tick(...)
+
+   This function is mostly called from time tick functions; it might lead to
+   process switch.  This drives the running preemption.
+
+ - task_new(...)
+
+   The core scheduler gives the scheduling module an opportunity to manage new
+   task startup.  The CFS scheduling module uses it for group scheduling, while
+   the scheduling module for a real-time task does not use it.
+
+
+
+7.  GROUP SCHEDULER EXTENSIONS TO CFS
+
+Normally, the scheduler operates on individual tasks and strives to provide
+fair CPU time to each task.  Sometimes, it may be desirable to group tasks and
+provide fair CPU time to each such task group.  For example, it may be
+desirable to first provide fair CPU time to each user on the system and then to
+each task belonging to a user.
+
+CONFIG_GROUP_SCHED strives to achieve exactly that.  It lets tasks to be
+grouped and divides CPU time fairly among such groups.
+
+CONFIG_RT_GROUP_SCHED permits to group real-time (i.e., SCHED_FIFO and
+SCHED_RR) tasks.
+
+CONFIG_FAIR_GROUP_SCHED permits to group CFS (i.e., SCHED_NORMAL and
+SCHED_BATCH) tasks.
+
+At present, there are two (mutually exclusive) mechanisms to group tasks for
+CPU bandwidth control purposes:
+
+ - Based on user id (CONFIG_USER_SCHED)
+
+   With this option, tasks are grouped according to their user id.
+
+ - Based on "cgroup" pseudo filesystem (CONFIG_CGROUP_SCHED)
+
+   This options needs CONFIG_CGROUPS to be defined, and lets the administrator
+   create arbitrary groups of tasks, using the "cgroup" pseudo filesystem.  See
+   Documentation/cgroups.txt for more information about this filesystem.
 
 Only one of these options to group tasks can be chosen and not both.
 
-Group scheduler tunables:
-
-When CONFIG_FAIR_USER_SCHED is defined, a directory is created in sysfs for
-each new user and a "cpu_share" file is added in that directory.
+When CONFIG_USER_SCHED is defined, a directory is created in sysfs for each new
+user and a "cpu_share" file is added in that directory.
 
 	# cd /sys/kernel/uids
 	# cat 512/cpu_share		# Display user 512's CPU share
@@ -155,16 +246,14 @@
 	2048
 	#
 
-CPU bandwidth between two users are divided in the ratio of their CPU shares.
-For ex: if you would like user "root" to get twice the bandwidth of user
-"guest", then set the cpu_share for both the users such that "root"'s
-cpu_share is twice "guest"'s cpu_share
+CPU bandwidth between two users is divided in the ratio of their CPU shares.
+For example: if you would like user "root" to get twice the bandwidth of user
+"guest," then set the cpu_share for both the users such that "root"'s cpu_share
+is twice "guest"'s cpu_share.
 
-
-When CONFIG_FAIR_CGROUP_SCHED is defined, a "cpu.shares" file is created
-for each group created using the pseudo filesystem. See example steps
-below to create task groups and modify their CPU share using the "cgroups"
-pseudo filesystem
+When CONFIG_CGROUP_SCHED is defined, a "cpu.shares" file is created for each
+group created using the pseudo filesystem.  See example steps below to create
+task groups and modify their CPU share using the "cgroups" pseudo filesystem.
 
 	# mkdir /dev/cpuctl
 	# mount -t cgroup -ocpu none /dev/cpuctl

diff --git a/Documentation/video4linux/CARDLIST.em28xx b/Documentation/video4linux/CARDLIST.em28xx
index 89c7f32..53449cb 100644
--- a/Documentation/video4linux/CARDLIST.em28xx
+++ b/Documentation/video4linux/CARDLIST.em28xx

@@ -46,7 +46,7 @@
  45 -> Pinnacle PCTV DVB-T                      (em2870)
  46 -> Compro, VideoMate U3                     (em2870)        [185b:2870]
  47 -> KWorld DVB-T 305U                        (em2880)        [eb1a:e305]
- 48 -> KWorld DVB-T 310U                        (em2880)
+ 48 -> KWorld DVB-T 310U                        (em2880)        [eb1a:e310]
  49 -> MSI DigiVox A/D                          (em2880)        [eb1a:e310]
  50 -> MSI DigiVox A/D II                       (em2880)        [eb1a:e320]
  51 -> Terratec Hybrid XS Secam                 (em2880)        [0ccd:004c]

diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt
index 0f03900..9a3e4d7 100644
--- a/Documentation/video4linux/gspca.txt
+++ b/Documentation/video4linux/gspca.txt

@@ -190,6 +190,7 @@
 pac7311		093a:2621	PAC731x
 pac7311		093a:2624	PAC7302
 pac7311		093a:2626	Labtec 2200
+pac7311		093a:262a	Webcam 300k
 zc3xx		0ac8:0302	Z-star Vimicro zc0302
 vc032x		0ac8:0321	Vimicro generic vc0321
 vc032x		0ac8:0323	Vimicro Vc0323

diff --git a/MAINTAINERS b/MAINTAINERS
index 3596d17..8dae455 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS

@@ -1198,9 +1198,7 @@
 S:	Maintained
 
 CPUSETS
-P:	Paul Jackson
 P:	Paul Menage
-M:	pj@sgi.com
 M:	menage@google.com
 L:	linux-kernel@vger.kernel.org
 W:	http://www.bullopensource.org/cpuset/
@@ -2706,6 +2704,7 @@
 P:	Michael Kerrisk
 M:	mtk.manpages@gmail.com
 W:	http://www.kernel.org/doc/man-pages
+L:	linux-man@vger.kernel.org
 S:	Supported
 
 MARVELL LIBERTAS WIRELESS DRIVER

diff --git a/Makefile b/Makefile
index 1d03c16..ce9eceb 100644
--- a/Makefile
+++ b/Makefile

@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 27
-EXTRAVERSION = -rc8
+EXTRAVERSION = -rc9
 NAME = Rotary Wombat
 
 # *DOCUMENTATION*

diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 83df541..06b6fda 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c

@@ -149,6 +149,9 @@
 	atomic_inc(&init_mm.mm_count);
 	current->active_mm = &init_mm;
 
+	/* inform the notifiers about the new cpu */
+	notify_cpu_starting(cpuid);
+
 	/* Must have completely accurate bogos.  */
 	local_irq_enable();
 

diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index e9842f6..e42a749 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c

@@ -277,6 +277,7 @@
 	/*
 	 * Enable local interrupts.
 	 */
+	notify_cpu_starting(cpu);
 	local_irq_enable();
 	local_fiq_enable();
 

diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c
index 952a24b..52e16c6 100644
--- a/arch/cris/arch-v32/kernel/smp.c
+++ b/arch/cris/arch-v32/kernel/smp.c

@@ -178,6 +178,7 @@
 	unmask_irq(IPI_INTR_VECT);
 	unmask_irq(TIMER0_INTR_VECT);
 	preempt_disable();
+	notify_cpu_starting(cpu);
 	local_irq_enable();
 
 	cpu_set(cpu, cpu_online_map);

diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index d8f05e5..1dcbb85 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c

@@ -401,6 +401,7 @@
 	spin_lock(&vector_lock);
 	/* Setup the per cpu irq handling data structures */
 	__setup_vector_irq(cpuid);
+	notify_cpu_starting(cpuid);
 	cpu_set(cpuid, cpu_online_map);
 	per_cpu(cpu_state, cpuid) = CPU_ONLINE;
 	spin_unlock(&vector_lock);

diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c
index 2c03ac1..fc29948 100644
--- a/arch/m32r/kernel/smpboot.c
+++ b/arch/m32r/kernel/smpboot.c

@@ -498,6 +498,8 @@
 {
 	int cpu_id = smp_processor_id();
 
+	notify_cpu_starting(cpu_id);
+
 	local_irq_enable();
 
 	/* Get our bogomips. */

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 49896a2..1e06d23 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig

@@ -211,6 +211,7 @@
 	select SYS_SUPPORTS_64BIT_KERNEL
 	select SYS_SUPPORTS_BIG_ENDIAN
 	select SYS_SUPPORTS_LITTLE_ENDIAN
+	select SYS_SUPPORTS_MIPS_CMP if BROKEN	# because SYNC_R4K is broken
 	select SYS_SUPPORTS_MULTITHREADING
 	select SYS_SUPPORTS_SMARTMIPS
 	help
@@ -1403,7 +1404,6 @@
 	depends on CPU_MIPS32_R2
 	#depends on CPU_MIPS64_R2		# once there is hardware ...
 	depends on SYS_SUPPORTS_MULTITHREADING
-	select GENERIC_CLOCKEVENTS_BROADCAST
 	select CPU_MIPSR2_IRQ_VI
 	select CPU_MIPSR2_IRQ_EI
 	select MIPS_MT
@@ -1451,32 +1451,17 @@
 	  Includes a loader for loading an elf relocatable object
 	  onto another VPE and running it.
 
-config MIPS_MT_SMTC_INSTANT_REPLAY
-	bool "Low-latency Dispatch of Deferred SMTC IPIs"
-	depends on MIPS_MT_SMTC && !PREEMPT
-	default y
-	help
-	  SMTC pseudo-interrupts between TCs are deferred and queued
-	  if the target TC is interrupt-inhibited (IXMT). In the first
-	  SMTC prototypes, these queued IPIs were serviced on return
-	  to user mode, or on entry into the kernel idle loop. The
-	  INSTANT_REPLAY option dispatches them as part of local_irq_restore()
-	  processing, which adds runtime overhead (hence the option to turn
-	  it off), but ensures that IPIs are handled promptly even under
-	  heavy I/O interrupt load.
-
 config MIPS_MT_SMTC_IM_BACKSTOP
 	bool "Use per-TC register bits as backstop for inhibited IM bits"
 	depends on MIPS_MT_SMTC
-	default y
+	default n
 	help
 	  To support multiple TC microthreads acting as "CPUs" within
 	  a VPE, VPE-wide interrupt mask bits must be specially manipulated
 	  during interrupt handling. To support legacy drivers and interrupt
 	  controller management code, SMTC has a "backstop" to track and
 	  if necessary restore the interrupt mask. This has some performance
-	  impact on interrupt service overhead. Disable it only if you know
-	  what you are doing.
+	  impact on interrupt service overhead.
 
 config MIPS_MT_SMTC_IRQAFF
 	bool "Support IRQ affinity API"
@@ -1486,10 +1471,8 @@
 	  Enables SMP IRQ affinity API (/proc/irq/*/smp_affinity, etc.)
 	  for SMTC Linux kernel. Requires platform support, of which
 	  an example can be found in the MIPS kernel i8259 and Malta
-	  platform code.  It is recommended that MIPS_MT_SMTC_INSTANT_REPLAY
-	  be enabled if MIPS_MT_SMTC_IRQAFF is used. Adds overhead to
-	  interrupt dispatch, and should be used only if you know what
-	  you are doing.
+	  platform code.  Adds some overhead to interrupt dispatch, and
+	  should be used only if you know what you are doing.
 
 config MIPS_VPE_LOADER_TOM
 	bool "Load VPE program into memory hidden from linux"
@@ -1517,6 +1500,18 @@
 	  "exit" syscall notifying other kernel modules the SP program is
 	  exiting.  You probably want to say yes here.
 
+config MIPS_CMP
+	bool "MIPS CMP framework support"
+	depends on SYS_SUPPORTS_MIPS_CMP
+	select SYNC_R4K if BROKEN
+	select SYS_SUPPORTS_SMP
+	select SYS_SUPPORTS_SCHED_SMT if SMP
+	select WEAK_ORDERING
+	default n
+	help
+	  This is a placeholder option for the GCMP work. It will need to
+	  be handled differently...
+
 config SB1_PASS_1_WORKAROUNDS
 	bool
 	depends on CPU_SB1_PASS_1
@@ -1693,6 +1688,9 @@
 config SMP_UP
 	bool
 
+config SYS_SUPPORTS_MIPS_CMP
+	bool
+
 config SYS_SUPPORTS_SMP
 	bool
 
@@ -1740,17 +1738,6 @@
 	  performance should round up your number of processors to the next
 	  power of two.
 
-config MIPS_CMP
-	bool "MIPS CMP framework support"
-	depends on SMP
-	select SYNC_R4K
-	select SYS_SUPPORTS_SCHED_SMT
-	select WEAK_ORDERING
-	default n
-	help
-	  This is a placeholder option for the GCMP work. It will need to
-	  be handled differently...
-
 source "kernel/time/Kconfig"
 
 #

diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index 706f939..25775cb 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile

@@ -10,6 +10,7 @@
 
 obj-$(CONFIG_CEVT_BCM1480)	+= cevt-bcm1480.o
 obj-$(CONFIG_CEVT_R4K)		+= cevt-r4k.o
+obj-$(CONFIG_MIPS_MT_SMTC)	+= cevt-smtc.o
 obj-$(CONFIG_CEVT_DS1287)	+= cevt-ds1287.o
 obj-$(CONFIG_CEVT_GT641XX)	+= cevt-gt641xx.o
 obj-$(CONFIG_CEVT_SB1250)	+= cevt-sb1250.o

diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c
index 24a2d90..4a4c59f 100644
--- a/arch/mips/kernel/cevt-r4k.c
+++ b/arch/mips/kernel/cevt-r4k.c

@@ -12,6 +12,14 @@
 
 #include <asm/smtc_ipi.h>
 #include <asm/time.h>
+#include <asm/cevt-r4k.h>
+
+/*
+ * The SMTC Kernel for the 34K, 1004K, et. al. replaces several
+ * of these routines with SMTC-specific variants.
+ */
+
+#ifndef CONFIG_MIPS_MT_SMTC
 
 static int mips_next_event(unsigned long delta,
                            struct clock_event_device *evt)
@@ -19,60 +27,27 @@
 	unsigned int cnt;
 	int res;
 
-#ifdef CONFIG_MIPS_MT_SMTC
-	{
-	unsigned long flags, vpflags;
-	local_irq_save(flags);
-	vpflags = dvpe();
-#endif
 	cnt = read_c0_count();
 	cnt += delta;
 	write_c0_compare(cnt);
 	res = ((int)(read_c0_count() - cnt) > 0) ? -ETIME : 0;
-#ifdef CONFIG_MIPS_MT_SMTC
-	evpe(vpflags);
-	local_irq_restore(flags);
-	}
-#endif
 	return res;
 }
 
-static void mips_set_mode(enum clock_event_mode mode,
-                          struct clock_event_device *evt)
+#endif /* CONFIG_MIPS_MT_SMTC */
+
+void mips_set_clock_mode(enum clock_event_mode mode,
+				struct clock_event_device *evt)
 {
 	/* Nothing to do ...  */
 }
 
-static DEFINE_PER_CPU(struct clock_event_device, mips_clockevent_device);
-static int cp0_timer_irq_installed;
+DEFINE_PER_CPU(struct clock_event_device, mips_clockevent_device);
+int cp0_timer_irq_installed;
 
-/*
- * Timer ack for an R4k-compatible timer of a known frequency.
- */
-static void c0_timer_ack(void)
-{
-	write_c0_compare(read_c0_compare());
-}
+#ifndef CONFIG_MIPS_MT_SMTC
 
-/*
- * Possibly handle a performance counter interrupt.
- * Return true if the timer interrupt should not be checked
- */
-static inline int handle_perf_irq(int r2)
-{
-	/*
-	 * The performance counter overflow interrupt may be shared with the
-	 * timer interrupt (cp0_perfcount_irq < 0). If it is and a
-	 * performance counter has overflowed (perf_irq() == IRQ_HANDLED)
-	 * and we can't reliably determine if a counter interrupt has also
-	 * happened (!r2) then don't check for a timer interrupt.
-	 */
-	return (cp0_perfcount_irq < 0) &&
-		perf_irq() == IRQ_HANDLED &&
-		!r2;
-}
-
-static irqreturn_t c0_compare_interrupt(int irq, void *dev_id)
+irqreturn_t c0_compare_interrupt(int irq, void *dev_id)
 {
 	const int r2 = cpu_has_mips_r2;
 	struct clock_event_device *cd;
@@ -93,12 +68,8 @@
 	 * interrupt.  Being the paranoiacs we are we check anyway.
 	 */
 	if (!r2 || (read_c0_cause() & (1 << 30))) {
-		c0_timer_ack();
-#ifdef CONFIG_MIPS_MT_SMTC
-		if (cpu_data[cpu].vpe_id)
-			goto out;
-		cpu = 0;
-#endif
+		/* Clear Count/Compare Interrupt */
+		write_c0_compare(read_c0_compare());
 		cd = &per_cpu(mips_clockevent_device, cpu);
 		cd->event_handler(cd);
 	}
@@ -107,65 +78,16 @@
 	return IRQ_HANDLED;
 }
 
-static struct irqaction c0_compare_irqaction = {
+#endif /* Not CONFIG_MIPS_MT_SMTC */
+
+struct irqaction c0_compare_irqaction = {
 	.handler = c0_compare_interrupt,
-#ifdef CONFIG_MIPS_MT_SMTC
-	.flags = IRQF_DISABLED,
-#else
 	.flags = IRQF_DISABLED | IRQF_PERCPU,
-#endif
 	.name = "timer",
 };
 
-#ifdef CONFIG_MIPS_MT_SMTC
-DEFINE_PER_CPU(struct clock_event_device, smtc_dummy_clockevent_device);
 
-static void smtc_set_mode(enum clock_event_mode mode,
-                          struct clock_event_device *evt)
-{
-}
-
-static void mips_broadcast(cpumask_t mask)
-{
-	unsigned int cpu;
-
-	for_each_cpu_mask(cpu, mask)
-		smtc_send_ipi(cpu, SMTC_CLOCK_TICK, 0);
-}
-
-static void setup_smtc_dummy_clockevent_device(void)
-{
-	//uint64_t mips_freq = mips_hpt_^frequency;
-	unsigned int cpu = smp_processor_id();
-	struct clock_event_device *cd;
-
-	cd = &per_cpu(smtc_dummy_clockevent_device, cpu);
-
-	cd->name		= "SMTC";
-	cd->features		= CLOCK_EVT_FEAT_DUMMY;
-
-	/* Calculate the min / max delta */
-	cd->mult	= 0; //div_sc((unsigned long) mips_freq, NSEC_PER_SEC, 32);
-	cd->shift		= 0; //32;
-	cd->max_delta_ns	= 0; //clockevent_delta2ns(0x7fffffff, cd);
-	cd->min_delta_ns	= 0; //clockevent_delta2ns(0x30, cd);
-
-	cd->rating		= 200;
-	cd->irq			= 17; //-1;
-//	if (cpu)
-//		cd->cpumask	= CPU_MASK_ALL; // cpumask_of_cpu(cpu);
-//	else
-		cd->cpumask	= cpumask_of_cpu(cpu);
-
-	cd->set_mode		= smtc_set_mode;
-
-	cd->broadcast		= mips_broadcast;
-
-	clockevents_register_device(cd);
-}
-#endif
-
-static void mips_event_handler(struct clock_event_device *dev)
+void mips_event_handler(struct clock_event_device *dev)
 {
 }
 
@@ -177,7 +99,23 @@
 	return (read_c0_cause() >> cp0_compare_irq) & 0x100;
 }
 
-static int c0_compare_int_usable(void)
+/*
+ * Compare interrupt can be routed and latched outside the core,
+ * so a single execution hazard barrier may not be enough to give
+ * it time to clear as seen in the Cause register.  4 time the
+ * pipeline depth seems reasonably conservative, and empirically
+ * works better in configurations with high CPU/bus clock ratios.
+ */
+
+#define compare_change_hazard() \
+	do { \
+		irq_disable_hazard(); \
+		irq_disable_hazard(); \
+		irq_disable_hazard(); \
+		irq_disable_hazard(); \
+	} while (0)
+
+int c0_compare_int_usable(void)
 {
 	unsigned int delta;
 	unsigned int cnt;
@@ -187,7 +125,7 @@
 	 */
 	if (c0_compare_int_pending()) {
 		write_c0_compare(read_c0_count());
-		irq_disable_hazard();
+		compare_change_hazard();
 		if (c0_compare_int_pending())
 			return 0;
 	}
@@ -196,7 +134,7 @@
 		cnt = read_c0_count();
 		cnt += delta;
 		write_c0_compare(cnt);
-		irq_disable_hazard();
+		compare_change_hazard();
 		if ((int)(read_c0_count() - cnt) < 0)
 		    break;
 		/* increase delta if the timer was already expired */
@@ -205,11 +143,12 @@
 	while ((int)(read_c0_count() - cnt) <= 0)
 		;	/* Wait for expiry  */
 
+	compare_change_hazard();
 	if (!c0_compare_int_pending())
 		return 0;
 
 	write_c0_compare(read_c0_count());
-	irq_disable_hazard();
+	compare_change_hazard();
 	if (c0_compare_int_pending())
 		return 0;
 
@@ -219,6 +158,8 @@
 	return 1;
 }
 
+#ifndef CONFIG_MIPS_MT_SMTC
+
 int __cpuinit mips_clockevent_init(void)
 {
 	uint64_t mips_freq = mips_hpt_frequency;
@@ -229,17 +170,6 @@
 	if (!cpu_has_counter || !mips_hpt_frequency)
 		return -ENXIO;
 
-#ifdef CONFIG_MIPS_MT_SMTC
-	setup_smtc_dummy_clockevent_device();
-
-	/*
-	 * On SMTC we only register VPE0's compare interrupt as clockevent
-	 * device.
-	 */
-	if (cpu)
-		return 0;
-#endif
-
 	if (!c0_compare_int_usable())
 		return -ENXIO;
 
@@ -265,13 +195,9 @@
 
 	cd->rating		= 300;
 	cd->irq			= irq;
-#ifdef CONFIG_MIPS_MT_SMTC
-	cd->cpumask		= CPU_MASK_ALL;
-#else
 	cd->cpumask		= cpumask_of_cpu(cpu);
-#endif
 	cd->set_next_event	= mips_next_event;
-	cd->set_mode		= mips_set_mode;
+	cd->set_mode		= mips_set_clock_mode;
 	cd->event_handler	= mips_event_handler;
 
 	clockevents_register_device(cd);
@@ -281,12 +207,9 @@
 
 	cp0_timer_irq_installed = 1;
 
-#ifdef CONFIG_MIPS_MT_SMTC
-#define CPUCTR_IMASKBIT (0x100 << cp0_compare_irq)
-	setup_irq_smtc(irq, &c0_compare_irqaction, CPUCTR_IMASKBIT);
-#else
 	setup_irq(irq, &c0_compare_irqaction);
-#endif
 
 	return 0;
 }
+
+#endif /* Not CONFIG_MIPS_MT_SMTC */

diff --git a/arch/mips/kernel/cevt-smtc.c b/arch/mips/kernel/cevt-smtc.c
new file mode 100644
index 0000000..5162fe4
--- /dev/null
+++ b/arch/mips/kernel/cevt-smtc.c

@@ -0,0 +1,321 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2007 MIPS Technologies, Inc.
+ * Copyright (C) 2007 Ralf Baechle <ralf@linux-mips.org>
+ * Copyright (C) 2008 Kevin D. Kissell, Paralogos sarl
+ */
+#include <linux/clockchips.h>
+#include <linux/interrupt.h>
+#include <linux/percpu.h>
+
+#include <asm/smtc_ipi.h>
+#include <asm/time.h>
+#include <asm/cevt-r4k.h>
+
+/*
+ * Variant clock event timer support for SMTC on MIPS 34K, 1004K
+ * or other MIPS MT cores.
+ *
+ * Notes on SMTC Support:
+ *
+ * SMTC has multiple microthread TCs pretending to be Linux CPUs.
+ * But there's only one Count/Compare pair per VPE, and Compare
+ * interrupts are taken opportunisitically by available TCs
+ * bound to the VPE with the Count register.  The new timer
+ * framework provides for global broadcasts, but we really
+ * want VPE-level multicasts for best behavior. So instead
+ * of invoking the high-level clock-event broadcast code,
+ * this version of SMTC support uses the historical SMTC
+ * multicast mechanisms "under the hood", appearing to the
+ * generic clock layer as if the interrupts are per-CPU.
+ *
+ * The approach taken here is to maintain a set of NR_CPUS
+ * virtual timers, and track which "CPU" needs to be alerted
+ * at each event.
+ *
+ * It's unlikely that we'll see a MIPS MT core with more than
+ * 2 VPEs, but we *know* that we won't need to handle more
+ * VPEs than we have "CPUs".  So NCPUs arrays of NCPUs elements
+ * is always going to be overkill, but always going to be enough.
+ */
+
+unsigned long smtc_nexttime[NR_CPUS][NR_CPUS];
+static int smtc_nextinvpe[NR_CPUS];
+
+/*
+ * Timestamps stored are absolute values to be programmed
+ * into Count register.  Valid timestamps will never be zero.
+ * If a Zero Count value is actually calculated, it is converted
+ * to be a 1, which will introduce 1 or two CPU cycles of error
+ * roughly once every four billion events, which at 1000 HZ means
+ * about once every 50 days.  If that's actually a problem, one
+ * could alternate squashing 0 to 1 and to -1.
+ */
+
+#define MAKEVALID(x) (((x) == 0L) ? 1L : (x))
+#define ISVALID(x) ((x) != 0L)
+
+/*
+ * Time comparison is subtle, as it's really truncated
+ * modular arithmetic.
+ */
+
+#define IS_SOONER(a, b, reference) \
+    (((a) - (unsigned long)(reference)) < ((b) - (unsigned long)(reference)))
+
+/*
+ * CATCHUP_INCREMENT, used when the function falls behind the counter.
+ * Could be an increasing function instead of a constant;
+ */
+
+#define CATCHUP_INCREMENT 64
+
+static int mips_next_event(unsigned long delta,
+				struct clock_event_device *evt)
+{
+	unsigned long flags;
+	unsigned int mtflags;
+	unsigned long timestamp, reference, previous;
+	unsigned long nextcomp = 0L;
+	int vpe = current_cpu_data.vpe_id;
+	int cpu = smp_processor_id();
+	local_irq_save(flags);
+	mtflags = dmt();
+
+	/*
+	 * Maintain the per-TC virtual timer
+	 * and program the per-VPE shared Count register
+	 * as appropriate here...
+	 */
+	reference = (unsigned long)read_c0_count();
+	timestamp = MAKEVALID(reference + delta);
+	/*
+	 * To really model the clock, we have to catch the case
+	 * where the current next-in-VPE timestamp is the old
+	 * timestamp for the calling CPE, but the new value is
+	 * in fact later.  In that case, we have to do a full
+	 * scan and discover the new next-in-VPE CPU id and
+	 * timestamp.
+	 */
+	previous = smtc_nexttime[vpe][cpu];
+	if (cpu == smtc_nextinvpe[vpe] && ISVALID(previous)
+	    && IS_SOONER(previous, timestamp, reference)) {
+		int i;
+		int soonest = cpu;
+
+		/*
+		 * Update timestamp array here, so that new
+		 * value gets considered along with those of
+		 * other virtual CPUs on the VPE.
+		 */
+		smtc_nexttime[vpe][cpu] = timestamp;
+		for_each_online_cpu(i) {
+			if (ISVALID(smtc_nexttime[vpe][i])
+			    && IS_SOONER(smtc_nexttime[vpe][i],
+				smtc_nexttime[vpe][soonest], reference)) {
+				    soonest = i;
+			}
+		}
+		smtc_nextinvpe[vpe] = soonest;
+		nextcomp = smtc_nexttime[vpe][soonest];
+	/*
+	 * Otherwise, we don't have to process the whole array rank,
+	 * we just have to see if the event horizon has gotten closer.
+	 */
+	} else {
+		if (!ISVALID(smtc_nexttime[vpe][smtc_nextinvpe[vpe]]) ||
+		    IS_SOONER(timestamp,
+			smtc_nexttime[vpe][smtc_nextinvpe[vpe]], reference)) {
+			    smtc_nextinvpe[vpe] = cpu;
+			    nextcomp = timestamp;
+		}
+		/*
+		 * Since next-in-VPE may me the same as the executing
+		 * virtual CPU, we update the array *after* checking
+		 * its value.
+		 */
+		smtc_nexttime[vpe][cpu] = timestamp;
+	}
+
+	/*
+	 * It may be that, in fact, we don't need to update Compare,
+	 * but if we do, we want to make sure we didn't fall into
+	 * a crack just behind Count.
+	 */
+	if (ISVALID(nextcomp)) {
+		write_c0_compare(nextcomp);
+		ehb();
+		/*
+		 * We never return an error, we just make sure
+		 * that we trigger the handlers as quickly as
+		 * we can if we fell behind.
+		 */
+		while ((nextcomp - (unsigned long)read_c0_count())
+			> (unsigned long)LONG_MAX) {
+			nextcomp += CATCHUP_INCREMENT;
+			write_c0_compare(nextcomp);
+			ehb();
+		}
+	}
+	emt(mtflags);
+	local_irq_restore(flags);
+	return 0;
+}
+
+
+void smtc_distribute_timer(int vpe)
+{
+	unsigned long flags;
+	unsigned int mtflags;
+	int cpu;
+	struct clock_event_device *cd;
+	unsigned long nextstamp = 0L;
+	unsigned long reference;
+
+
+repeat:
+	for_each_online_cpu(cpu) {
+	    /*
+	     * Find virtual CPUs within the current VPE who have
+	     * unserviced timer requests whose time is now past.
+	     */
+	    local_irq_save(flags);
+	    mtflags = dmt();
+	    if (cpu_data[cpu].vpe_id == vpe &&
+		ISVALID(smtc_nexttime[vpe][cpu])) {
+		reference = (unsigned long)read_c0_count();
+		if ((smtc_nexttime[vpe][cpu] - reference)
+			 > (unsigned long)LONG_MAX) {
+			    smtc_nexttime[vpe][cpu] = 0L;
+			    emt(mtflags);
+			    local_irq_restore(flags);
+			    /*
+			     * We don't send IPIs to ourself.
+			     */
+			    if (cpu != smp_processor_id()) {
+				smtc_send_ipi(cpu, SMTC_CLOCK_TICK, 0);
+			    } else {
+				cd = &per_cpu(mips_clockevent_device, cpu);
+				cd->event_handler(cd);
+			    }
+		} else {
+			/* Local to VPE but Valid Time not yet reached. */
+			if (!ISVALID(nextstamp) ||
+			    IS_SOONER(smtc_nexttime[vpe][cpu], nextstamp,
+			    reference)) {
+				smtc_nextinvpe[vpe] = cpu;
+				nextstamp = smtc_nexttime[vpe][cpu];
+			}
+			emt(mtflags);
+			local_irq_restore(flags);
+		}
+	    } else {
+		emt(mtflags);
+		local_irq_restore(flags);
+
+	    }
+	}
+	/* Reprogram for interrupt at next soonest timestamp for VPE */
+	if (ISVALID(nextstamp)) {
+		write_c0_compare(nextstamp);
+		ehb();
+		if ((nextstamp - (unsigned long)read_c0_count())
+			> (unsigned long)LONG_MAX)
+				goto repeat;
+	}
+}
+
+
+irqreturn_t c0_compare_interrupt(int irq, void *dev_id)
+{
+	int cpu = smp_processor_id();
+
+	/* If we're running SMTC, we've got MIPS MT and therefore MIPS32R2 */
+	handle_perf_irq(1);
+
+	if (read_c0_cause() & (1 << 30)) {
+		/* Clear Count/Compare Interrupt */
+		write_c0_compare(read_c0_compare());
+		smtc_distribute_timer(cpu_data[cpu].vpe_id);
+	}
+	return IRQ_HANDLED;
+}
+
+
+int __cpuinit mips_clockevent_init(void)
+{
+	uint64_t mips_freq = mips_hpt_frequency;
+	unsigned int cpu = smp_processor_id();
+	struct clock_event_device *cd;
+	unsigned int irq;
+	int i;
+	int j;
+
+	if (!cpu_has_counter || !mips_hpt_frequency)
+		return -ENXIO;
+	if (cpu == 0) {
+		for (i = 0; i < num_possible_cpus(); i++) {
+			smtc_nextinvpe[i] = 0;
+			for (j = 0; j < num_possible_cpus(); j++)
+				smtc_nexttime[i][j] = 0L;
+		}
+		/*
+		 * SMTC also can't have the usablility test
+		 * run by secondary TCs once Compare is in use.
+		 */
+		if (!c0_compare_int_usable())
+			return -ENXIO;
+	}
+
+	/*
+	 * With vectored interrupts things are getting platform specific.
+	 * get_c0_compare_int is a hook to allow a platform to return the
+	 * interrupt number of it's liking.
+	 */
+	irq = MIPS_CPU_IRQ_BASE + cp0_compare_irq;
+	if (get_c0_compare_int)
+		irq = get_c0_compare_int();
+
+	cd = &per_cpu(mips_clockevent_device, cpu);
+
+	cd->name		= "MIPS";
+	cd->features		= CLOCK_EVT_FEAT_ONESHOT;
+
+	/* Calculate the min / max delta */
+	cd->mult	= div_sc((unsigned long) mips_freq, NSEC_PER_SEC, 32);
+	cd->shift		= 32;
+	cd->max_delta_ns	= clockevent_delta2ns(0x7fffffff, cd);
+	cd->min_delta_ns	= clockevent_delta2ns(0x300, cd);
+
+	cd->rating		= 300;
+	cd->irq			= irq;
+	cd->cpumask		= cpumask_of_cpu(cpu);
+	cd->set_next_event	= mips_next_event;
+	cd->set_mode		= mips_set_clock_mode;
+	cd->event_handler	= mips_event_handler;
+
+	clockevents_register_device(cd);
+
+	/*
+	 * On SMTC we only want to do the data structure
+	 * initialization and IRQ setup once.
+	 */
+	if (cpu)
+		return 0;
+	/*
+	 * And we need the hwmask associated with the c0_compare
+	 * vector to be initialized.
+	 */
+	irq_hwmask[irq] = (0x100 << cp0_compare_irq);
+	if (cp0_timer_irq_installed)
+		return 0;
+
+	cp0_timer_irq_installed = 1;
+
+	setup_irq(irq, &c0_compare_irqaction);
+
+	return 0;
+}

diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 11c92dc..e621fda 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c

@@ -54,14 +54,18 @@
  * interrupt is requested" restriction in the MIPS32/MIPS64 architecture makes
  * using this version a gamble.
  */
-static void r4k_wait_irqoff(void)
+void r4k_wait_irqoff(void)
 {
 	local_irq_disable();
 	if (!need_resched())
-		__asm__("	.set	mips3		\n"
+		__asm__("	.set	push		\n"
+			"	.set	mips3		\n"
 			"	wait			\n"
-			"	.set	mips0		\n");
+			"	.set	pop		\n");
 	local_irq_enable();
+	__asm__(" 	.globl __pastwait	\n"
+		"__pastwait:			\n");
+	return;
 }
 
 /*

diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S
index e29598a..ffa3310 100644
--- a/arch/mips/kernel/entry.S
+++ b/arch/mips/kernel/entry.S

@@ -79,11 +79,6 @@
 
 FEXPORT(restore_all)			# restore full frame
 #ifdef CONFIG_MIPS_MT_SMTC
-/* Detect and execute deferred IPI "interrupts" */
-	LONG_L	s0, TI_REGS($28)
-	LONG_S	sp, TI_REGS($28)
-	jal	deferred_smtc_ipi
-	LONG_S	s0, TI_REGS($28)
 #ifdef CONFIG_MIPS_MT_SMTC_IM_BACKSTOP
 /* Re-arm any temporarily masked interrupts not explicitly "acked" */
 	mfc0	v0, CP0_TCSTATUS
@@ -112,6 +107,11 @@
 	xor	t0, t0, t3
 	mtc0	t0, CP0_TCCONTEXT
 #endif /* CONFIG_MIPS_MT_SMTC_IM_BACKSTOP */
+/* Detect and execute deferred IPI "interrupts" */
+	LONG_L	s0, TI_REGS($28)
+	LONG_S	sp, TI_REGS($28)
+	jal	deferred_smtc_ipi
+	LONG_S	s0, TI_REGS($28)
 #endif /* CONFIG_MIPS_MT_SMTC */
 	.set	noat
 	RESTORE_TEMP

diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index f886dd7..01dcbe3 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S

@@ -282,8 +282,8 @@
 	and	t0, a0, t1
 #ifdef CONFIG_MIPS_MT_SMTC_IM_BACKSTOP
 	mfc0	t2, CP0_TCCONTEXT
-	or	t0, t0, t2
-	mtc0	t0, CP0_TCCONTEXT
+	or	t2, t0, t2
+	mtc0	t2, CP0_TCCONTEXT
 #endif /* CONFIG_MIPS_MT_SMTC_IM_BACKSTOP */
 	xor	t1, t1, t0
 	mtc0	t1, CP0_STATUS

diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S
index 3613645..492a0a8 100644
--- a/arch/mips/kernel/head.S
+++ b/arch/mips/kernel/head.S

@@ -22,6 +22,7 @@
 #include <asm/irqflags.h>
 #include <asm/regdef.h>
 #include <asm/page.h>
+#include <asm/pgtable-bits.h>
 #include <asm/mipsregs.h>
 #include <asm/stackframe.h>
 

diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c
index df4d3f2..dc9eb72 100644
--- a/arch/mips/kernel/mips-mt-fpaff.c
+++ b/arch/mips/kernel/mips-mt-fpaff.c

@@ -159,7 +159,7 @@
 /*
  * FPU Use Factor empirically derived from experiments on 34K
  */
-#define FPUSEFACTOR 333
+#define FPUSEFACTOR 2000
 
 static __init int mt_fp_affinity_init(void)
 {

diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index ce76843..22fc19b 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c

@@ -55,7 +55,7 @@
 	while (1) {
 		tick_nohz_stop_sched_tick(1);
 		while (!need_resched()) {
-#ifdef CONFIG_SMTC_IDLE_HOOK_DEBUG
+#ifdef CONFIG_MIPS_MT_SMTC
 			extern void smtc_idle_loop_hook(void);
 
 			smtc_idle_loop_hook();
@@ -145,19 +145,18 @@
 	 */
 	p->thread.cp0_status = read_c0_status() & ~(ST0_CU2|ST0_CU1);
 	childregs->cp0_status &= ~(ST0_CU2|ST0_CU1);
+
+#ifdef CONFIG_MIPS_MT_SMTC
+	/*
+	 * SMTC restores TCStatus after Status, and the CU bits
+	 * are aliased there.
+	 */
+	childregs->cp0_tcstatus &= ~(ST0_CU2|ST0_CU1);
+#endif
 	clear_tsk_thread_flag(p, TIF_USEDFPU);
 
 #ifdef CONFIG_MIPS_MT_FPAFF
 	clear_tsk_thread_flag(p, TIF_FPUBOUND);
-
-	/*
-	 * FPU affinity support is cleaner if we track the
-	 * user-visible CPU affinity from the very beginning.
-	 * The generic cpus_allowed mask will already have
-	 * been copied from the parent before copy_thread
-	 * is invoked.
-	 */
-	p->thread.user_cpus_allowed = p->cpus_allowed;
 #endif /* CONFIG_MIPS_MT_FPAFF */
 
 	if (clone_flags & CLONE_SETTLS)

diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 35234b9..96ffc9c 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c

@@ -238,7 +238,7 @@
 		case FPC_EIR: {	/* implementation / version register */
 			unsigned int flags;
 #ifdef CONFIG_MIPS_MT_SMTC
-			unsigned int irqflags;
+			unsigned long irqflags;
 			unsigned int mtflags;
 #endif /* CONFIG_MIPS_MT_SMTC */
 

diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 4410f17..7b59cfb 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c

@@ -121,6 +121,8 @@
 	cpu = smp_processor_id();
 	cpu_data[cpu].udelay_val = loops_per_jiffy;
 
+	notify_cpu_starting(cpu);
+
 	mp_ops->smp_finish();
 	set_cpu_sibling_map(cpu);
 

diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index a516286..897fb2b 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c

@@ -1,4 +1,21 @@
-/* Copyright (C) 2004 Mips Technologies, Inc */
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * Copyright (C) 2004 Mips Technologies, Inc
+ * Copyright (C) 2008 Kevin D. Kissell
+ */
 
 #include <linux/clockchips.h>
 #include <linux/kernel.h>
@@ -21,7 +38,6 @@
 #include <asm/time.h>
 #include <asm/addrspace.h>
 #include <asm/smtc.h>
-#include <asm/smtc_ipi.h>
 #include <asm/smtc_proc.h>
 
 /*
@@ -58,11 +74,6 @@
 
 asiduse smtc_live_asid[MAX_SMTC_TLBS][MAX_SMTC_ASIDS];
 
-/*
- * Clock interrupt "latch" buffers, per "CPU"
- */
-
-static atomic_t ipi_timer_latch[NR_CPUS];
 
 /*
  * Number of InterProcessor Interrupt (IPI) message buffers to allocate
@@ -70,7 +81,7 @@
 
 #define IPIBUF_PER_CPU 4
 
-static struct smtc_ipi_q IPIQ[NR_CPUS];
+struct smtc_ipi_q IPIQ[NR_CPUS];
 static struct smtc_ipi_q freeIPIq;
 
 
@@ -282,7 +293,7 @@
  * phys_cpu_present_map and the logical/physical mappings.
  */
 
-int __init mipsmt_build_cpu_map(int start_cpu_slot)
+int __init smtc_build_cpu_map(int start_cpu_slot)
 {
 	int i, ntcs;
 
@@ -325,7 +336,12 @@
 	write_tc_c0_tcstatus((read_tc_c0_tcstatus()
 			& ~(TCSTATUS_TKSU | TCSTATUS_DA | TCSTATUS_IXMT))
 			| TCSTATUS_A);
-	write_tc_c0_tccontext(0);
+	/*
+	 * TCContext gets an offset from the base of the IPIQ array
+	 * to be used in low-level code to detect the presence of
+	 * an active IPI queue
+	 */
+	write_tc_c0_tccontext((sizeof(struct smtc_ipi_q) * cpu) << 16);
 	/* Bind tc to vpe */
 	write_tc_c0_tcbind(vpe);
 	/* In general, all TCs should have the same cpu_data indications */
@@ -336,10 +352,18 @@
 		cpu_data[cpu].options &= ~MIPS_CPU_FPU;
 	cpu_data[cpu].vpe_id = vpe;
 	cpu_data[cpu].tc_id = tc;
+	/* Multi-core SMTC hasn't been tested, but be prepared */
+	cpu_data[cpu].core = (read_vpe_c0_ebase() >> 1) & 0xff;
 }
 
+/*
+ * Tweak to get Count registes in as close a sync as possible.
+ * Value seems good for 34K-class cores.
+ */
 
-void mipsmt_prepare_cpus(void)
+#define CP0_SKEW 8
+
+void smtc_prepare_cpus(int cpus)
 {
 	int i, vpe, tc, ntc, nvpe, tcpervpe[NR_CPUS], slop, cpu;
 	unsigned long flags;
@@ -363,13 +387,13 @@
 		IPIQ[i].head = IPIQ[i].tail = NULL;
 		spin_lock_init(&IPIQ[i].lock);
 		IPIQ[i].depth = 0;
-		atomic_set(&ipi_timer_latch[i], 0);
 	}
 
 	/* cpu_data index starts at zero */
 	cpu = 0;
 	cpu_data[cpu].vpe_id = 0;
 	cpu_data[cpu].tc_id = 0;
+	cpu_data[cpu].core = (read_c0_ebase() >> 1) & 0xff;
 	cpu++;
 
 	/* Report on boot-time options */
@@ -484,7 +508,8 @@
 			write_vpe_c0_compare(0);
 			/* Propagate Config7 */
 			write_vpe_c0_config7(read_c0_config7());
-			write_vpe_c0_count(read_c0_count());
+			write_vpe_c0_count(read_c0_count() + CP0_SKEW);
+			ehb();
 		}
 		/* enable multi-threading within VPE */
 		write_vpe_c0_vpecontrol(read_vpe_c0_vpecontrol() | VPECONTROL_TE);
@@ -556,7 +581,7 @@
 void __cpuinit smtc_boot_secondary(int cpu, struct task_struct *idle)
 {
 	extern u32 kernelsp[NR_CPUS];
-	long flags;
+	unsigned long flags;
 	int mtflags;
 
 	LOCK_MT_PRA();
@@ -585,24 +610,22 @@
 
 void smtc_init_secondary(void)
 {
-	/*
-	 * Start timer on secondary VPEs if necessary.
-	 * plat_timer_setup has already have been invoked by init/main
-	 * on "boot" TC.  Like per_cpu_trap_init() hack, this assumes that
-	 * SMTC init code assigns TCs consdecutively and in ascending order
-	 * to across available VPEs.
-	 */
-	if (((read_c0_tcbind() & TCBIND_CURTC) != 0) &&
-	    ((read_c0_tcbind() & TCBIND_CURVPE)
-	    != cpu_data[smp_processor_id() - 1].vpe_id)){
-		write_c0_compare(read_c0_count() + mips_hpt_frequency/HZ);
-	}
-
 	local_irq_enable();
 }
 
 void smtc_smp_finish(void)
 {
+	int cpu = smp_processor_id();
+
+	/*
+	 * Lowest-numbered CPU per VPE starts a clock tick.
+	 * Like per_cpu_trap_init() hack, this assumes that
+	 * SMTC init code assigns TCs consdecutively and
+	 * in ascending order across available VPEs.
+	 */
+	if (cpu > 0 && (cpu_data[cpu].vpe_id != cpu_data[cpu - 1].vpe_id))
+		write_c0_compare(read_c0_count() + mips_hpt_frequency/HZ);
+
 	printk("TC %d going on-line as CPU %d\n",
 		cpu_data[smp_processor_id()].tc_id, smp_processor_id());
 }
@@ -753,8 +776,10 @@
 {
 	int tcstatus;
 	struct smtc_ipi *pipi;
-	long flags;
+	unsigned long flags;
 	int mtflags;
+	unsigned long tcrestart;
+	extern void r4k_wait_irqoff(void), __pastwait(void);
 
 	if (cpu == smp_processor_id()) {
 		printk("Cannot Send IPI to self!\n");
@@ -771,8 +796,6 @@
 	pipi->arg = (void *)action;
 	pipi->dest = cpu;
 	if (cpu_data[cpu].vpe_id != cpu_data[smp_processor_id()].vpe_id) {
-		if (type == SMTC_CLOCK_TICK)
-			atomic_inc(&ipi_timer_latch[cpu]);
 		/* If not on same VPE, enqueue and send cross-VPE interrupt */
 		smtc_ipi_nq(&IPIQ[cpu], pipi);
 		LOCK_CORE_PRA();
@@ -800,22 +823,29 @@
 
 		if ((tcstatus & TCSTATUS_IXMT) != 0) {
 			/*
-			 * Spin-waiting here can deadlock,
-			 * so we queue the message for the target TC.
+			 * If we're in the the irq-off version of the wait
+			 * loop, we need to force exit from the wait and
+			 * do a direct post of the IPI.
+			 */
+			if (cpu_wait == r4k_wait_irqoff) {
+				tcrestart = read_tc_c0_tcrestart();
+				if (tcrestart >= (unsigned long)r4k_wait_irqoff
+				    && tcrestart < (unsigned long)__pastwait) {
+					write_tc_c0_tcrestart(__pastwait);
+					tcstatus &= ~TCSTATUS_IXMT;
+					write_tc_c0_tcstatus(tcstatus);
+					goto postdirect;
+				}
+			}
+			/*
+			 * Otherwise we queue the message for the target TC
+			 * to pick up when he does a local_irq_restore()
 			 */
 			write_tc_c0_tchalt(0);
 			UNLOCK_CORE_PRA();
-			/* Try to reduce redundant timer interrupt messages */
-			if (type == SMTC_CLOCK_TICK) {
-			    if (atomic_postincrement(&ipi_timer_latch[cpu])!=0){
-				smtc_ipi_nq(&freeIPIq, pipi);
-				return;
-			    }
-			}
 			smtc_ipi_nq(&IPIQ[cpu], pipi);
 		} else {
-			if (type == SMTC_CLOCK_TICK)
-				atomic_inc(&ipi_timer_latch[cpu]);
+postdirect:
 			post_direct_ipi(cpu, pipi);
 			write_tc_c0_tchalt(0);
 			UNLOCK_CORE_PRA();
@@ -883,7 +913,7 @@
 	smp_call_function_interrupt();
 }
 
-DECLARE_PER_CPU(struct clock_event_device, smtc_dummy_clockevent_device);
+DECLARE_PER_CPU(struct clock_event_device, mips_clockevent_device);
 
 void ipi_decode(struct smtc_ipi *pipi)
 {
@@ -891,20 +921,13 @@
 	struct clock_event_device *cd;
 	void *arg_copy = pipi->arg;
 	int type_copy = pipi->type;
-	int ticks;
-
 	smtc_ipi_nq(&freeIPIq, pipi);
 	switch (type_copy) {
 	case SMTC_CLOCK_TICK:
 		irq_enter();
 		kstat_this_cpu.irqs[MIPS_CPU_IRQ_BASE + 1]++;
-		cd = &per_cpu(smtc_dummy_clockevent_device, cpu);
-		ticks = atomic_read(&ipi_timer_latch[cpu]);
-		atomic_sub(ticks, &ipi_timer_latch[cpu]);
-		while (ticks) {
-			cd->event_handler(cd);
-			ticks--;
-		}
+		cd = &per_cpu(mips_clockevent_device, cpu);
+		cd->event_handler(cd);
 		irq_exit();
 		break;
 
@@ -937,24 +960,48 @@
 	}
 }
 
+/*
+ * Similar to smtc_ipi_replay(), but invoked from context restore,
+ * so it reuses the current exception frame rather than set up a
+ * new one with self_ipi.
+ */
+
 void deferred_smtc_ipi(void)
 {
-	struct smtc_ipi *pipi;
-	unsigned long flags;
-/* DEBUG */
-	int q = smp_processor_id();
+	int cpu = smp_processor_id();
 
 	/*
 	 * Test is not atomic, but much faster than a dequeue,
 	 * and the vast majority of invocations will have a null queue.
+	 * If irq_disabled when this was called, then any IPIs queued
+	 * after we test last will be taken on the next irq_enable/restore.
+	 * If interrupts were enabled, then any IPIs added after the
+	 * last test will be taken directly.
 	 */
-	if (IPIQ[q].head != NULL) {
-		while((pipi = smtc_ipi_dq(&IPIQ[q])) != NULL) {
-			/* ipi_decode() should be called with interrupts off */
-			local_irq_save(flags);
+
+	while (IPIQ[cpu].head != NULL) {
+		struct smtc_ipi_q *q = &IPIQ[cpu];
+		struct smtc_ipi *pipi;
+		unsigned long flags;
+
+		/*
+		 * It may be possible we'll come in with interrupts
+		 * already enabled.
+		 */
+		local_irq_save(flags);
+
+		spin_lock(&q->lock);
+		pipi = __smtc_ipi_dq(q);
+		spin_unlock(&q->lock);
+		if (pipi != NULL)
 			ipi_decode(pipi);
-			local_irq_restore(flags);
-		}
+		/*
+		 * The use of the __raw_local restore isn't
+		 * as obviously necessary here as in smtc_ipi_replay(),
+		 * but it's more efficient, given that we're already
+		 * running down the IPI queue.
+		 */
+		__raw_local_irq_restore(flags);
 	}
 }
 
@@ -975,7 +1022,7 @@
 	struct smtc_ipi *pipi;
 	unsigned long tcstatus;
 	int sent;
-	long flags;
+	unsigned long flags;
 	unsigned int mtflags;
 	unsigned int vpflags;
 
@@ -1066,55 +1113,53 @@
 
 /*
  * SMTC-specific hacks invoked from elsewhere in the kernel.
- *
- * smtc_ipi_replay is called from raw_local_irq_restore which is only ever
- * called with interrupts disabled.  We do rely on interrupts being disabled
- * here because using spin_lock_irqsave()/spin_unlock_irqrestore() would
- * result in a recursive call to raw_local_irq_restore().
  */
 
-static void __smtc_ipi_replay(void)
+ /*
+  * smtc_ipi_replay is called from raw_local_irq_restore
+  */
+
+void smtc_ipi_replay(void)
 {
 	unsigned int cpu = smp_processor_id();
 
 	/*
 	 * To the extent that we've ever turned interrupts off,
 	 * we may have accumulated deferred IPIs.  This is subtle.
-	 * If we use the smtc_ipi_qdepth() macro, we'll get an
-	 * exact number - but we'll also disable interrupts
-	 * and create a window of failure where a new IPI gets
-	 * queued after we test the depth but before we re-enable
-	 * interrupts. So long as IXMT never gets set, however,
 	 * we should be OK:  If we pick up something and dispatch
 	 * it here, that's great. If we see nothing, but concurrent
 	 * with this operation, another TC sends us an IPI, IXMT
 	 * is clear, and we'll handle it as a real pseudo-interrupt
-	 * and not a pseudo-pseudo interrupt.
+	 * and not a pseudo-pseudo interrupt.  The important thing
+	 * is to do the last check for queued message *after* the
+	 * re-enabling of interrupts.
 	 */
-	if (IPIQ[cpu].depth > 0) {
-		while (1) {
-			struct smtc_ipi_q *q = &IPIQ[cpu];
-			struct smtc_ipi *pipi;
-			extern void self_ipi(struct smtc_ipi *);
+	while (IPIQ[cpu].head != NULL) {
+		struct smtc_ipi_q *q = &IPIQ[cpu];
+		struct smtc_ipi *pipi;
+		unsigned long flags;
 
-			spin_lock(&q->lock);
-			pipi = __smtc_ipi_dq(q);
-			spin_unlock(&q->lock);
-			if (!pipi)
-				break;
+		/*
+		 * It's just possible we'll come in with interrupts
+		 * already enabled.
+		 */
+		local_irq_save(flags);
 
+		spin_lock(&q->lock);
+		pipi = __smtc_ipi_dq(q);
+		spin_unlock(&q->lock);
+		/*
+		 ** But use a raw restore here to avoid recursion.
+		 */
+		__raw_local_irq_restore(flags);
+
+		if (pipi) {
 			self_ipi(pipi);
 			smtc_cpu_stats[cpu].selfipis++;
 		}
 	}
 }
 
-void smtc_ipi_replay(void)
-{
-	raw_local_irq_disable();
-	__smtc_ipi_replay();
-}
-
 EXPORT_SYMBOL(smtc_ipi_replay);
 
 void smtc_idle_loop_hook(void)
@@ -1193,40 +1238,13 @@
 		}
 	}
 
-	/*
-	 * Now that we limit outstanding timer IPIs, check for hung TC
-	 */
-	for (tc = 0; tc < NR_CPUS; tc++) {
-		/* Don't check ourself - we'll dequeue IPIs just below */
-		if ((tc != smp_processor_id()) &&
-		    atomic_read(&ipi_timer_latch[tc]) > timerq_limit) {
-		    if (clock_hang_reported[tc] == 0) {
-			pdb_msg += sprintf(pdb_msg,
-				"TC %d looks hung with timer latch at %d\n",
-				tc, atomic_read(&ipi_timer_latch[tc]));
-			clock_hang_reported[tc]++;
-			}
-		}
-	}
 	emt(mtflags);
 	local_irq_restore(flags);
 	if (pdb_msg != &id_ho_db_msg[0])
 		printk("CPU%d: %s", smp_processor_id(), id_ho_db_msg);
 #endif /* CONFIG_SMTC_IDLE_HOOK_DEBUG */
 
-	/*
-	 * Replay any accumulated deferred IPIs. If "Instant Replay"
-	 * is in use, there should never be any.
-	 */
-#ifndef CONFIG_MIPS_MT_SMTC_INSTANT_REPLAY
-	{
-		unsigned long flags;
-
-		local_irq_save(flags);
-		__smtc_ipi_replay();
-		local_irq_restore(flags);
-	}
-#endif /* CONFIG_MIPS_MT_SMTC_INSTANT_REPLAY */
+	smtc_ipi_replay();
 }
 
 void smtc_soft_dump(void)
@@ -1242,10 +1260,6 @@
 		printk("%d: %ld\n", i, smtc_cpu_stats[i].selfipis);
 	}
 	smtc_ipi_qdump();
-	printk("Timer IPI Backlogs:\n");
-	for (i=0; i < NR_CPUS; i++) {
-		printk("%d: %d\n", i, atomic_read(&ipi_timer_latch[i]));
-	}
 	printk("%d Recoveries of \"stolen\" FPU\n",
 	       atomic_read(&smtc_fpu_recoveries));
 }

diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 5fd0cd0..b602ac6 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c

@@ -825,8 +825,10 @@
 		if (cpus_intersects(current->cpus_allowed, mt_fpu_cpumask)) {
 			cpumask_t tmask;
 
-			cpus_and(tmask, current->thread.user_cpus_allowed,
-			         mt_fpu_cpumask);
+			current->thread.user_cpus_allowed
+				= current->cpus_allowed;
+			cpus_and(tmask, current->cpus_allowed,
+				mt_fpu_cpumask);
 			set_cpus_allowed(current, tmask);
 			set_thread_flag(TIF_FPUBOUND);
 		}

diff --git a/arch/mips/mti-malta/Makefile b/arch/mips/mti-malta/Makefile
index 3b7dd72..cef2db8 100644
--- a/arch/mips/mti-malta/Makefile
+++ b/arch/mips/mti-malta/Makefile

@@ -15,6 +15,6 @@
 obj-$(CONFIG_PCI)		+= malta-pci.o
 
 # FIXME FIXME FIXME
-obj-$(CONFIG_MIPS_MT_SMTC)	+= malta_smtc.o
+obj-$(CONFIG_MIPS_MT_SMTC)	+= malta-smtc.o
 
 EXTRA_CFLAGS += -Werror

diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c
index 5ea705e..f84a46a 100644
--- a/arch/mips/mti-malta/malta-smtc.c
+++ b/arch/mips/mti-malta/malta-smtc.c

@@ -84,12 +84,17 @@
 
 static void __init msmtc_smp_setup(void)
 {
-	mipsmt_build_cpu_map(0);
+	/*
+	 * we won't get the definitive value until
+	 * we've run smtc_prepare_cpus later, but
+	 * we would appear to need an upper bound now.
+	 */
+	smp_num_siblings = smtc_build_cpu_map(0);
 }
 
 static void __init msmtc_prepare_cpus(unsigned int max_cpus)
 {
-	mipsmt_prepare_cpus();
+	smtc_prepare_cpus(max_cpus);
 }
 
 struct plat_smp_ops msmtc_smp_ops = {

diff --git a/arch/mips/sibyte/swarm/Makefile b/arch/mips/sibyte/swarm/Makefile
index f18ba92..7b45f19 100644
--- a/arch/mips/sibyte/swarm/Makefile
+++ b/arch/mips/sibyte/swarm/Makefile

@@ -1,3 +1,4 @@
-obj-y				:= setup.o rtc_xicor1241.o rtc_m41t81.o
+obj-y				:= platform.o setup.o rtc_xicor1241.o \
+				   rtc_m41t81.o
 
 obj-$(CONFIG_I2C_BOARDINFO)	+= swarm-i2c.o

diff --git a/arch/mips/sibyte/swarm/platform.c b/arch/mips/sibyte/swarm/platform.c
new file mode 100644
index 0000000..dd0e5b9
--- /dev/null
+++ b/arch/mips/sibyte/swarm/platform.c

@@ -0,0 +1,81 @@
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/ata_platform.h>
+
+#include <asm/sibyte/board.h>
+#include <asm/sibyte/sb1250_genbus.h>
+#include <asm/sibyte/sb1250_regs.h>
+
+#define DRV_NAME	"pata-swarm"
+
+#define SWARM_IDE_SHIFT	5
+#define SWARM_IDE_BASE	0x1f0
+#define SWARM_IDE_CTRL	0x3f6
+
+static struct resource swarm_pata_resource[] = {
+	{
+		.name	= "Swarm GenBus IDE",
+		.flags	= IORESOURCE_MEM,
+	}, {
+		.name	= "Swarm GenBus IDE",
+		.flags	= IORESOURCE_MEM,
+	}, {
+		.name	= "Swarm GenBus IDE",
+		.flags	= IORESOURCE_IRQ,
+		.start	= K_INT_GB_IDE,
+		.end	= K_INT_GB_IDE,
+	},
+};
+
+static struct pata_platform_info pata_platform_data = {
+	.ioport_shift	= SWARM_IDE_SHIFT,
+};
+
+static struct platform_device swarm_pata_device = {
+	.name		= "pata_platform",
+	.id		= -1,
+	.resource	= swarm_pata_resource,
+	.num_resources	= ARRAY_SIZE(swarm_pata_resource),
+	.dev  = {
+		.platform_data		= &pata_platform_data,
+		.coherent_dma_mask	= ~0,	/* grumble */
+	},
+};
+
+static int __init swarm_pata_init(void)
+{
+	u8 __iomem *base;
+	phys_t offset, size;
+	struct resource *r;
+
+	if (!SIBYTE_HAVE_IDE)
+		return -ENODEV;
+
+	base = ioremap(A_IO_EXT_BASE, 0x800);
+	offset = __raw_readq(base + R_IO_EXT_REG(R_IO_EXT_START_ADDR, IDE_CS));
+	size = __raw_readq(base + R_IO_EXT_REG(R_IO_EXT_MULT_SIZE, IDE_CS));
+	iounmap(base);
+
+	offset = G_IO_START_ADDR(offset) << S_IO_ADDRBASE;
+	size = (G_IO_MULT_SIZE(size) + 1) << S_IO_REGSIZE;
+	if (offset < A_PHYS_GENBUS || offset >= A_PHYS_GENBUS_END) {
+		pr_info(DRV_NAME ": PATA interface at GenBus disabled\n");
+
+		return -EBUSY;
+	}
+
+	pr_info(DRV_NAME ": PATA interface at GenBus slot %i\n", IDE_CS);
+
+	r = swarm_pata_resource;
+	r[0].start = offset + (SWARM_IDE_BASE << SWARM_IDE_SHIFT);
+	r[0].end   = offset + ((SWARM_IDE_BASE + 8) << SWARM_IDE_SHIFT) - 1;
+	r[1].start = offset + (SWARM_IDE_CTRL << SWARM_IDE_SHIFT);
+	r[1].end   = offset + ((SWARM_IDE_CTRL + 1) << SWARM_IDE_SHIFT) - 1;
+
+	return platform_device_register(&swarm_pata_device);
+}
+
+device_initcall(swarm_pata_init);

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 5337ca7..c27b10a 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c

@@ -453,6 +453,7 @@
 	secondary_cpu_time_init();
 
 	ipi_call_lock();
+	notify_cpu_starting(cpu);
 	cpu_set(cpu, cpu_online_map);
 	/* Update sibling maps */
 	base = cpu_first_thread_in_core(cpu);

diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 00b9b4d..9e8b1f9 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c

@@ -585,6 +585,8 @@
 	/* Enable pfault pseudo page faults on this cpu. */
 	pfault_init();
 
+	/* call cpu notifiers */
+	notify_cpu_starting(smp_processor_id());
 	/* Mark this cpu as online */
 	spin_lock(&call_lock);
 	cpu_set(smp_processor_id(), cpu_online_map);

diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index ca114fe..06acb1a 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c

@@ -169,6 +169,8 @@
 
 static void clock_comparator_interrupt(__u16 code)
 {
+	if (S390_lowcore.clock_comparator == -1ULL)
+		set_clock_comparator(S390_lowcore.clock_comparator);
 }
 
 static void etr_timing_alert(struct etr_irq_parm *);

diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index fc6ab60..0953cee 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c

@@ -1,14 +1,9 @@
 /*
- *  arch/s390/lib/delay.c
  *    Precise Delay Loops for S390
  *
- *  S390 version
- *    Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
- *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- *
- *  Derived from "arch/i386/lib/delay.c"
- *    Copyright (C) 1993 Linus Torvalds
- *    Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *    Copyright IBM Corp. 1999,2008
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *		 Heiko Carstens <heiko.carstens@de.ibm.com>,
  */
 
 #include <linux/sched.h>
@@ -29,30 +24,31 @@
 	asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1));
 }
 
-/*
- * Waits for 'usecs' microseconds using the TOD clock comparator.
- */
-void __udelay(unsigned long usecs)
+static void __udelay_disabled(unsigned long usecs)
 {
-	u64 end, time, old_cc = 0;
-	unsigned long flags, cr0, mask, dummy;
-	int irq_context;
+	unsigned long mask, cr0, cr0_saved;
+	u64 clock_saved;
 
-	irq_context = in_interrupt();
-	if (!irq_context)
-		local_bh_disable();
-	local_irq_save(flags);
-	if (raw_irqs_disabled_flags(flags)) {
-		old_cc = local_tick_disable();
-		S390_lowcore.clock_comparator = -1ULL;
-		__ctl_store(cr0, 0, 0);
-		dummy = (cr0 & 0xffff00e0) | 0x00000800;
-		__ctl_load(dummy , 0, 0);
-		mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT;
-	} else
-		mask = psw_kernel_bits | PSW_MASK_WAIT |
-			PSW_MASK_EXT | PSW_MASK_IO;
+	clock_saved = local_tick_disable();
+	set_clock_comparator(get_clock() + ((u64) usecs << 12));
+	__ctl_store(cr0_saved, 0, 0);
+	cr0 = (cr0_saved & 0xffff00e0) | 0x00000800;
+	__ctl_load(cr0 , 0, 0);
+	mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT;
+	trace_hardirqs_on();
+	__load_psw_mask(mask);
+	local_irq_disable();
+	__ctl_load(cr0_saved, 0, 0);
+	local_tick_enable(clock_saved);
+	set_clock_comparator(S390_lowcore.clock_comparator);
+}
 
+static void __udelay_enabled(unsigned long usecs)
+{
+	unsigned long mask;
+	u64 end, time;
+
+	mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT | PSW_MASK_IO;
 	end = get_clock() + ((u64) usecs << 12);
 	do {
 		time = end < S390_lowcore.clock_comparator ?
@@ -62,13 +58,37 @@
 		__load_psw_mask(mask);
 		local_irq_disable();
 	} while (get_clock() < end);
-
-	if (raw_irqs_disabled_flags(flags)) {
-		__ctl_load(cr0, 0, 0);
-		local_tick_enable(old_cc);
-	}
-	if (!irq_context)
-		_local_bh_enable();
 	set_clock_comparator(S390_lowcore.clock_comparator);
+}
+
+/*
+ * Waits for 'usecs' microseconds using the TOD clock comparator.
+ */
+void __udelay(unsigned long usecs)
+{
+	unsigned long flags;
+
+	preempt_disable();
+	local_irq_save(flags);
+	if (in_irq()) {
+		__udelay_disabled(usecs);
+		goto out;
+	}
+	if (in_softirq()) {
+		if (raw_irqs_disabled_flags(flags))
+			__udelay_disabled(usecs);
+		else
+			__udelay_enabled(usecs);
+		goto out;
+	}
+	if (raw_irqs_disabled_flags(flags)) {
+		local_bh_disable();
+		__udelay_disabled(usecs);
+		_local_bh_enable();
+		goto out;
+	}
+	__udelay_enabled(usecs);
+out:
 	local_irq_restore(flags);
+	preempt_enable();
 }

diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index 60c5084..001778f 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c

@@ -82,6 +82,8 @@
 
 	preempt_disable();
 
+	notify_cpu_starting(smp_processor_id());
+
 	local_irq_enable();
 
 	calibrate_delay();

diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c
index 6959640..446767e 100644
--- a/arch/sparc/kernel/sun4d_smp.c
+++ b/arch/sparc/kernel/sun4d_smp.c

@@ -88,6 +88,7 @@
 	local_flush_cache_all();
 	local_flush_tlb_all();
 
+	notify_cpu_starting(cpuid);
 	/*
 	 * Unblock the master CPU _only_ when the scheduler state
 	 * of all secondary CPUs will be up-to-date, so after

diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c
index a14a76a..9964890 100644
--- a/arch/sparc/kernel/sun4m_smp.c
+++ b/arch/sparc/kernel/sun4m_smp.c

@@ -71,6 +71,8 @@
 	local_flush_cache_all();
 	local_flush_tlb_all();
 
+	notify_cpu_starting(cpuid);
+
 	/* Get our local ticker going. */
 	smp_setup_percpu_timer();
 

diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c
index be2d50c..0457721 100644
--- a/arch/um/kernel/smp.c
+++ b/arch/um/kernel/smp.c

@@ -85,6 +85,7 @@
 	while (!cpu_isset(cpu, smp_commenced_mask))
 		cpu_relax();
 
+	notify_cpu_starting(cpu);
 	cpu_set(cpu, cpu_online_map);
 	default_idle();
 	return 0;

diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c
index a1310c5..857e492 100644
--- a/arch/x86/boot/compressed/relocs.c
+++ b/arch/x86/boot/compressed/relocs.c

@@ -492,7 +492,7 @@
 			continue;
 		}
 		sh_symtab = sec_symtab->symtab;
-		sym_strtab = sec->link->strtab;
+		sym_strtab = sec_symtab->link->strtab;
 		for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) {
 			Elf32_Rel *rel;
 			Elf32_Sym *sym;

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index bfd10fd..c102af8 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c

@@ -1605,6 +1605,14 @@
 	 */
 	{
 	 .callback = dmi_ignore_irq0_timer_override,
+	 .ident = "HP nx6115 laptop",
+	 .matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6115"),
+		     },
+	 },
+	{
+	 .callback = dmi_ignore_irq0_timer_override,
 	 .ident = "HP NX6125 laptop",
 	 .matches = {
 		     DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
@@ -1619,6 +1627,14 @@
 		     DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6325"),
 		     },
 	 },
+	{
+	 .callback = dmi_ignore_irq0_timer_override,
+	 .ident = "HP 6715b laptop",
+	 .matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq 6715b"),
+		     },
+	 },
 	{}
 };
 

diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 8282a21..10435a1 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c

@@ -455,12 +455,7 @@
 		return NOTIFY_DONE;
 
 	case DIE_NMI_IPI:
-		if (atomic_read(&kgdb_active) != -1) {
-			/* KGDB CPU roundup */
-			kgdb_nmicallback(raw_smp_processor_id(), regs);
-			was_in_debug_nmi[raw_smp_processor_id()] = 1;
-			touch_nmi_watchdog();
-		}
+		/* Just ignore, we will handle the roundup on DIE_NMI. */
 		return NOTIFY_DONE;
 
 	case DIE_NMIUNKNOWN:

diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 49285f8..be33a54 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c

@@ -626,7 +626,6 @@
 	struct pci_dev *dev;
 	void *gatt;
 	int i, error;
-	unsigned long start_pfn, end_pfn;
 
 	printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
 	aper_size = aper_base = info->aper_size = 0;
@@ -672,12 +671,6 @@
 	printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n",
 	       aper_base, aper_size>>10);
 
-	/* need to map that range */
-	end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
-	if (end_pfn > max_low_pfn_mapped) {
-		start_pfn = (aper_base>>PAGE_SHIFT);
-		init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
-	}
 	return 0;
 
  nommu:
@@ -727,7 +720,8 @@
 {
 	struct agp_kern_info info;
 	unsigned long iommu_start;
-	unsigned long aper_size;
+	unsigned long aper_base, aper_size;
+	unsigned long start_pfn, end_pfn;
 	unsigned long scratch;
 	long i;
 
@@ -765,8 +759,16 @@
 		return;
 	}
 
+	/* need to map that range */
+	aper_size = info.aper_size << 20;
+	aper_base = info.aper_base;
+	end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
+	if (end_pfn > max_low_pfn_mapped) {
+		start_pfn = (aper_base>>PAGE_SHIFT);
+		init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
+	}
+
 	printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n");
-	aper_size = info.aper_size * 1024 * 1024;
 	iommu_size = check_iommu_size(info.aper_base, aper_size);
 	iommu_pages = iommu_size >> PAGE_SHIFT;
 

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7985c5b..0b8261c 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c

@@ -257,6 +257,7 @@
 	end_local_APIC_setup();
 	map_cpu_to_logical_apicid();
 
+	notify_cpu_starting(cpuid);
 	/*
 	 * Get our bogomips.
 	 *

diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index ee0fba0..199a5f4 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c

@@ -448,6 +448,8 @@
 
 	VDEBUG(("VOYAGER SMP: CPU%d, stack at about %p\n", cpuid, &cpuid));
 
+	notify_cpu_starting(cpuid);
+
 	/* enable interrupts */
 	local_irq_enable();
 

diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index 0841095..8dd3336 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c

@@ -165,8 +165,11 @@
 				"firmware_node");
 		ret = sysfs_create_link(&acpi_dev->dev.kobj, &dev->kobj,
 				"physical_node");
-		if (acpi_dev->wakeup.flags.valid)
+		if (acpi_dev->wakeup.flags.valid) {
 			device_set_wakeup_capable(dev, true);
+			device_set_wakeup_enable(dev,
+						acpi_dev->wakeup.state.enabled);
+		}
 	}
 
 	return 0;

diff --git a/drivers/acpi/sleep/proc.c b/drivers/acpi/sleep/proc.c
index 4ebbba2..bf5b04d 100644
--- a/drivers/acpi/sleep/proc.c
+++ b/drivers/acpi/sleep/proc.c

@@ -377,6 +377,14 @@
 	return 0;
 }
 
+static void physical_device_enable_wakeup(struct acpi_device *adev)
+{
+	struct device *dev = acpi_get_physical_device(adev->handle);
+
+	if (dev && device_can_wakeup(dev))
+		device_set_wakeup_enable(dev, adev->wakeup.state.enabled);
+}
+
 static ssize_t
 acpi_system_write_wakeup_device(struct file *file,
 				const char __user * buffer,
@@ -411,6 +419,7 @@
 		}
 	}
 	if (found_dev) {
+		physical_device_enable_wakeup(found_dev);
 		list_for_each_safe(node, next, &acpi_wakeup_device_list) {
 			struct acpi_device *dev = container_of(node,
 							       struct
@@ -428,6 +437,7 @@
 				       dev->pnp.bus_id, found_dev->pnp.bus_id);
 				dev->wakeup.state.enabled =
 				    found_dev->wakeup.state.enabled;
+				physical_device_enable_wakeup(dev);
 			}
 		}
 	}

diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 94df917..0778d99 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c

@@ -364,7 +364,7 @@
 	int i;
 
 	status_block = dma_readl(dw, RAW.BLOCK);
-	status_xfer = dma_readl(dw, RAW.BLOCK);
+	status_xfer = dma_readl(dw, RAW.XFER);
 	status_err = dma_readl(dw, RAW.ERROR);
 
 	dev_vdbg(dw->dma.dev, "tasklet: status_block=%x status_err=%x\n",

diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index 8e93a79..052879a 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig

@@ -780,10 +780,6 @@
 	  to transfer data to and from memory.  Saying Y is safe and improves
 	  performance.
 
-config BLK_DEV_IDE_SWARM
-	tristate "IDE for Sibyte evaluation boards"
-	depends on SIBYTE_SB1xxx_SOC
-
 config BLK_DEV_IDE_AU1XXX
        bool "IDE for AMD Alchemy Au1200"
        depends on SOC_AU1200

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 49a8c58..f16bb46 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c

@@ -1661,7 +1661,9 @@
 		cdi->mask &= ~CDC_PLAY_AUDIO;
 
 	mechtype = buf[8 + 6] >> 5;
-	if (mechtype == mechtype_caddy || mechtype == mechtype_popup)
+	if (mechtype == mechtype_caddy ||
+	    mechtype == mechtype_popup ||
+	    (drive->atapi_flags & IDE_AFLAG_NO_AUTOCLOSE))
 		cdi->mask |= CDC_CLOSE_TRAY;
 
 	if (cdi->sanyo_slot > 0) {
@@ -1859,6 +1861,8 @@
 	{ "MATSHITADVD-ROM SR-8176", NULL,   IDE_AFLAG_PLAY_AUDIO_OK	     },
 	{ "MATSHITADVD-ROM SR-8174", NULL,   IDE_AFLAG_PLAY_AUDIO_OK	     },
 	{ "Optiarc DVD RW AD-5200A", NULL,   IDE_AFLAG_PLAY_AUDIO_OK	     },
+	{ "Optiarc DVD RW AD-7200A", NULL,   IDE_AFLAG_PLAY_AUDIO_OK	     },
+	{ "Optiarc DVD RW AD-7543A", NULL,   IDE_AFLAG_NO_AUTOCLOSE	     },
 	{ NULL, NULL, 0 }
 };
 

diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index adc6827..3fa07c0 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c

@@ -211,7 +211,7 @@
 				xcount = bcount & 0xffff;
 				if (is_trm290)
 					xcount = ((xcount >> 2) - 1) << 16;
-				if (xcount == 0x0000) {
+				else if (xcount == 0x0000) {
 	/* 
 	 * Most chipsets correctly interpret a length of 0x0000 as 64KB,
 	 * but at least one (e.g. CS5530) misinterprets it as zero (!).

diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 994e410..a51a30e 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c

@@ -1492,7 +1492,7 @@
 
 static int ide_sysfs_register_port(ide_hwif_t *hwif)
 {
-	int i, rc;
+	int i, uninitialized_var(rc);
 
 	for (i = 0; ide_port_attrs[i]; i++) {
 		rc = device_create_file(hwif->portdev, ide_port_attrs[i]);

diff --git a/drivers/ide/mips/Makefile b/drivers/ide/mips/Makefile
index 677c7b2..5873fa0 100644
--- a/drivers/ide/mips/Makefile
+++ b/drivers/ide/mips/Makefile

@@ -1,4 +1,3 @@
-obj-$(CONFIG_BLK_DEV_IDE_SWARM)		+= swarm.o
 obj-$(CONFIG_BLK_DEV_IDE_AU1XXX)	+= au1xxx-ide.o
 
 EXTRA_CFLAGS    := -Idrivers/ide

diff --git a/drivers/ide/mips/swarm.c b/drivers/ide/mips/swarm.c
deleted file mode 100644
index 39c9ee9..0000000
--- a/drivers/ide/mips/swarm.c
+++ /dev/null

@@ -1,197 +0,0 @@
-/*
- * Copyright (C) 2001, 2002, 2003 Broadcom Corporation
- * Copyright (C) 2004 MontaVista Software Inc.
- *	Author:	Manish Lachwani, mlachwani@mvista.com
- * Copyright (C) 2004  MIPS Technologies, Inc.  All rights reserved.
- *	Author: Maciej W. Rozycki <macro@mips.com>
- * Copyright (c) 2006, 2008  Maciej W. Rozycki
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- */
-
-/*
- *  Derived loosely from ide-pmac.c, so:
- *  Copyright (C) 1998 Paul Mackerras.
- *  Copyright (C) 1995-1998 Mark Lord
- */
-
-/*
- * Boards with SiByte processors so far have supported IDE devices via
- * the Generic Bus, PCI bus, and built-in PCMCIA interface.  In all
- * cases, byte-swapping must be avoided for these devices (whereas
- * other PCI devices, for example, will require swapping).  Any
- * SiByte-targetted kernel including IDE support will include this
- * file.  Probing of a Generic Bus for an IDE device is controlled by
- * the definition of "SIBYTE_HAVE_IDE", which is provided by
- * <asm/sibyte/board.h> for Broadcom boards.
- */
-
-#include <linux/ide.h>
-#include <linux/ioport.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/platform_device.h>
-
-#include <asm/io.h>
-
-#include <asm/sibyte/board.h>
-#include <asm/sibyte/sb1250_genbus.h>
-#include <asm/sibyte/sb1250_regs.h>
-
-#define DRV_NAME "ide-swarm"
-
-static char swarm_ide_string[] = DRV_NAME;
-
-static struct resource swarm_ide_resource = {
-	.name	= "SWARM GenBus IDE",
-	.flags	= IORESOURCE_MEM,
-};
-
-static struct platform_device *swarm_ide_dev;
-
-static const struct ide_port_info swarm_port_info = {
-	.name			= DRV_NAME,
-	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
-};
-
-/*
- * swarm_ide_probe - if the board header indicates the existence of
- * Generic Bus IDE, allocate a HWIF for it.
- */
-static int __devinit swarm_ide_probe(struct device *dev)
-{
-	u8 __iomem *base;
-	struct ide_host *host;
-	phys_t offset, size;
-	int i, rc;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
-
-	if (!SIBYTE_HAVE_IDE)
-		return -ENODEV;
-
-	base = ioremap(A_IO_EXT_BASE, 0x800);
-	offset = __raw_readq(base + R_IO_EXT_REG(R_IO_EXT_START_ADDR, IDE_CS));
-	size = __raw_readq(base + R_IO_EXT_REG(R_IO_EXT_MULT_SIZE, IDE_CS));
-	iounmap(base);
-
-	offset = G_IO_START_ADDR(offset) << S_IO_ADDRBASE;
-	size = (G_IO_MULT_SIZE(size) + 1) << S_IO_REGSIZE;
-	if (offset < A_PHYS_GENBUS || offset >= A_PHYS_GENBUS_END) {
-		printk(KERN_INFO DRV_NAME
-		       ": IDE interface at GenBus disabled\n");
-		return -EBUSY;
-	}
-
-	printk(KERN_INFO DRV_NAME ": IDE interface at GenBus slot %i\n",
-	       IDE_CS);
-
-	swarm_ide_resource.start = offset;
-	swarm_ide_resource.end = offset + size - 1;
-	if (request_resource(&iomem_resource, &swarm_ide_resource)) {
-		printk(KERN_ERR DRV_NAME
-		       ": can't request I/O memory resource\n");
-		return -EBUSY;
-	}
-
-	base = ioremap(offset, size);
-
-	memset(&hw, 0, sizeof(hw));
-	for (i = 0; i <= 7; i++)
-		hw.io_ports_array[i] =
-				(unsigned long)(base + ((0x1f0 + i) << 5));
-	hw.io_ports.ctl_addr =
-				(unsigned long)(base + (0x3f6 << 5));
-	hw.irq = K_INT_GB_IDE;
-	hw.chipset = ide_generic;
-
-	rc = ide_host_add(&swarm_port_info, hws, &host);
-	if (rc)
-		goto err;
-
-	dev_set_drvdata(dev, host);
-
-	return 0;
-err:
-	release_resource(&swarm_ide_resource);
-	iounmap(base);
-	return rc;
-}
-
-static struct device_driver swarm_ide_driver = {
-	.name	= swarm_ide_string,
-	.bus	= &platform_bus_type,
-	.probe	= swarm_ide_probe,
-};
-
-static void swarm_ide_platform_release(struct device *device)
-{
-	struct platform_device *pldev;
-
-	/* free device */
-	pldev = to_platform_device(device);
-	kfree(pldev);
-}
-
-static int __devinit swarm_ide_init_module(void)
-{
-	struct platform_device *pldev;
-	int err;
-
-	printk(KERN_INFO "SWARM IDE driver\n");
-
-	if (driver_register(&swarm_ide_driver)) {
-		printk(KERN_ERR "Driver registration failed\n");
-		err = -ENODEV;
-		goto out;
-	}
-
-        if (!(pldev = kzalloc(sizeof (*pldev), GFP_KERNEL))) {
-		err = -ENOMEM;
-		goto out_unregister_driver;
-	}
-
-	pldev->name		= swarm_ide_string;
-	pldev->id		= 0;
-	pldev->dev.release	= swarm_ide_platform_release;
-
-	if (platform_device_register(pldev)) {
-		err = -ENODEV;
-		goto out_free_pldev;
-	}
-
-        if (!pldev->dev.driver) {
-		/*
-		 * The driver was not bound to this device, there was
-                 * no hardware at this address. Unregister it, as the
-		 * release fuction will take care of freeing the
-		 * allocated structure
-		 */
-		platform_device_unregister (pldev);
-	}
-
-	swarm_ide_dev = pldev;
-
-	return 0;
-
-out_free_pldev:
-	kfree(pldev);
-
-out_unregister_driver:
-	driver_unregister(&swarm_ide_driver);
-out:
-	return err;
-}
-
-module_init(swarm_ide_init_module);

diff --git a/drivers/leds/leds-fsg.c b/drivers/leds/leds-fsg.c
index be0e121..3493515 100644
--- a/drivers/leds/leds-fsg.c
+++ b/drivers/leds/leds-fsg.c

@@ -161,6 +161,16 @@
 {
 	int ret;
 
+	/* Map the LED chip select address space */
+	latch_address = (unsigned short *) ioremap(IXP4XX_EXP_BUS_BASE(2), 512);
+	if (!latch_address) {
+		ret = -ENOMEM;
+		goto failremap;
+	}
+
+	latch_value = 0xffff;
+	*latch_address = latch_value;
+
 	ret = led_classdev_register(&pdev->dev, &fsg_wlan_led);
 	if (ret < 0)
 		goto failwlan;
@@ -185,20 +195,8 @@
 	if (ret < 0)
 		goto failring;
 
-	/* Map the LED chip select address space */
-	latch_address = (unsigned short *) ioremap(IXP4XX_EXP_BUS_BASE(2), 512);
-	if (!latch_address) {
-		ret = -ENOMEM;
-		goto failremap;
-	}
-
-	latch_value = 0xffff;
-	*latch_address = latch_value;
-
 	return ret;
 
- failremap:
-	led_classdev_unregister(&fsg_ring_led);
  failring:
 	led_classdev_unregister(&fsg_sync_led);
  failsync:
@@ -210,14 +208,14 @@
  failwan:
 	led_classdev_unregister(&fsg_wlan_led);
  failwlan:
+	iounmap(latch_address);
+ failremap:
 
 	return ret;
 }
 
 static int fsg_led_remove(struct platform_device *pdev)
 {
-	iounmap(latch_address);
-
 	led_classdev_unregister(&fsg_wlan_led);
 	led_classdev_unregister(&fsg_wan_led);
 	led_classdev_unregister(&fsg_sata_led);
@@ -225,6 +223,8 @@
 	led_classdev_unregister(&fsg_sync_led);
 	led_classdev_unregister(&fsg_ring_led);
 
+	iounmap(latch_address);
+
 	return 0;
 }
 

diff --git a/drivers/leds/leds-pca955x.c b/drivers/leds/leds-pca955x.c
index 146c069..f508729 100644
--- a/drivers/leds/leds-pca955x.c
+++ b/drivers/leds/leds-pca955x.c

@@ -248,11 +248,10 @@
 					const struct i2c_device_id *id)
 {
 	struct pca955x_led *pca955x;
-	int i;
-	int err = -ENODEV;
 	struct pca955x_chipdef *chip;
 	struct i2c_adapter *adapter;
 	struct led_platform_data *pdata;
+	int i, err;
 
 	chip = &pca955x_chipdefs[id->driver_data];
 	adapter = to_i2c_adapter(client->dev.parent);
@@ -282,43 +281,41 @@
 		}
 	}
 
-	for (i = 0; i < chip->bits; i++) {
-		pca955x = kzalloc(sizeof(struct pca955x_led), GFP_KERNEL);
-		if (!pca955x) {
-			err = -ENOMEM;
-			goto exit;
-		}
+	pca955x = kzalloc(sizeof(*pca955x) * chip->bits, GFP_KERNEL);
+	if (!pca955x)
+		return -ENOMEM;
 
-		pca955x->chipdef = chip;
-		pca955x->client = client;
-		pca955x->led_num = i;
+	i2c_set_clientdata(client, pca955x);
+
+	for (i = 0; i < chip->bits; i++) {
+		pca955x[i].chipdef = chip;
+		pca955x[i].client = client;
+		pca955x[i].led_num = i;
+
 		/* Platform data can specify LED names and default triggers */
 		if (pdata) {
 			if (pdata->leds[i].name)
-				snprintf(pca955x->name, 32, "pca955x:%s",
-							pdata->leds[i].name);
+				snprintf(pca955x[i].name,
+					 sizeof(pca955x[i].name), "pca955x:%s",
+					 pdata->leds[i].name);
 			if (pdata->leds[i].default_trigger)
-				pca955x->led_cdev.default_trigger =
+				pca955x[i].led_cdev.default_trigger =
 					pdata->leds[i].default_trigger;
 		} else {
-			snprintf(pca955x->name, 32, "pca955x:%d", i);
+			snprintf(pca955x[i].name, sizeof(pca955x[i].name),
+				 "pca955x:%d", i);
 		}
-		spin_lock_init(&pca955x->lock);
 
-		pca955x->led_cdev.name = pca955x->name;
-		pca955x->led_cdev.brightness_set =
-				pca955x_led_set;
+		spin_lock_init(&pca955x[i].lock);
 
-		/*
-		 * Client data is a pointer to the _first_ pca955x_led
-		 * struct
-		 */
-		if (i == 0)
-			i2c_set_clientdata(client, pca955x);
+		pca955x[i].led_cdev.name = pca955x[i].name;
+		pca955x[i].led_cdev.brightness_set = pca955x_led_set;
 
-		INIT_WORK(&(pca955x->work), pca955x_led_work);
+		INIT_WORK(&pca955x[i].work, pca955x_led_work);
 
-		led_classdev_register(&client->dev, &(pca955x->led_cdev));
+		err = led_classdev_register(&client->dev, &pca955x[i].led_cdev);
+		if (err < 0)
+			goto exit;
 	}
 
 	/* Turn off LEDs */
@@ -336,23 +333,32 @@
 	pca955x_write_psc(client, 1, 0);
 
 	return 0;
+
 exit:
+	while (i--) {
+		led_classdev_unregister(&pca955x[i].led_cdev);
+		cancel_work_sync(&pca955x[i].work);
+	}
+
+	kfree(pca955x);
+	i2c_set_clientdata(client, NULL);
+
 	return err;
 }
 
 static int __devexit pca955x_remove(struct i2c_client *client)
 {
 	struct pca955x_led *pca955x = i2c_get_clientdata(client);
-	int leds = pca955x->chipdef->bits;
 	int i;
 
-	for (i = 0; i < leds; i++) {
-		led_classdev_unregister(&(pca955x->led_cdev));
-		cancel_work_sync(&(pca955x->work));
-		kfree(pca955x);
-		pca955x = pca955x + 1;
+	for (i = 0; i < pca955x->chipdef->bits; i++) {
+		led_classdev_unregister(&pca955x[i].led_cdev);
+		cancel_work_sync(&pca955x[i].work);
 	}
 
+	kfree(pca955x);
+	i2c_set_clientdata(client, NULL);
+
 	return 0;
 }
 

diff --git a/drivers/media/common/tuners/tuner-xc2028.h b/drivers/media/common/tuners/tuner-xc2028.h
index 216025c..2c5b628 100644
--- a/drivers/media/common/tuners/tuner-xc2028.h
+++ b/drivers/media/common/tuners/tuner-xc2028.h

@@ -10,6 +10,7 @@
 #include "dvb_frontend.h"
 
 #define XC2028_DEFAULT_FIRMWARE "xc3028-v27.fw"
+#define XC3028L_DEFAULT_FIRMWARE "xc3028L-v36.fw"
 
 /*      Dmoduler		IF (kHz) */
 #define	XC3028_FE_DEFAULT	0		/* Don't load SCODE */

diff --git a/drivers/media/dvb/b2c2/flexcop-fe-tuner.c b/drivers/media/dvb/b2c2/flexcop-fe-tuner.c
index 4eed783..a127a41 100644
--- a/drivers/media/dvb/b2c2/flexcop-fe-tuner.c
+++ b/drivers/media/dvb/b2c2/flexcop-fe-tuner.c

@@ -491,6 +491,7 @@
 	.demod_address = 0x53,
 	.invert = 1,
 	.repeated_start_workaround = 1,
+	.serial_mpeg = 1,
 };
 
 static struct itd1000_config skystar2_rev2_7_itd1000_config = {

diff --git a/drivers/media/dvb/dvb-core/dmxdev.c b/drivers/media/dvb/dvb-core/dmxdev.c
index 069d847..0c733c6 100644
--- a/drivers/media/dvb/dvb-core/dmxdev.c
+++ b/drivers/media/dvb/dvb-core/dmxdev.c

@@ -364,15 +364,16 @@
 				       enum dmx_success success)
 {
 	struct dmxdev_filter *dmxdevfilter = filter->priv;
+	unsigned long flags;
 	int ret;
 
 	if (dmxdevfilter->buffer.error) {
 		wake_up(&dmxdevfilter->buffer.queue);
 		return 0;
 	}
-	spin_lock(&dmxdevfilter->dev->lock);
+	spin_lock_irqsave(&dmxdevfilter->dev->lock, flags);
 	if (dmxdevfilter->state != DMXDEV_STATE_GO) {
-		spin_unlock(&dmxdevfilter->dev->lock);
+		spin_unlock_irqrestore(&dmxdevfilter->dev->lock, flags);
 		return 0;
 	}
 	del_timer(&dmxdevfilter->timer);
@@ -391,7 +392,7 @@
 	}
 	if (dmxdevfilter->params.sec.flags & DMX_ONESHOT)
 		dmxdevfilter->state = DMXDEV_STATE_DONE;
-	spin_unlock(&dmxdevfilter->dev->lock);
+	spin_unlock_irqrestore(&dmxdevfilter->dev->lock, flags);
 	wake_up(&dmxdevfilter->buffer.queue);
 	return 0;
 }
@@ -403,11 +404,12 @@
 {
 	struct dmxdev_filter *dmxdevfilter = feed->priv;
 	struct dvb_ringbuffer *buffer;
+	unsigned long flags;
 	int ret;
 
-	spin_lock(&dmxdevfilter->dev->lock);
+	spin_lock_irqsave(&dmxdevfilter->dev->lock, flags);
 	if (dmxdevfilter->params.pes.output == DMX_OUT_DECODER) {
-		spin_unlock(&dmxdevfilter->dev->lock);
+		spin_unlock_irqrestore(&dmxdevfilter->dev->lock, flags);
 		return 0;
 	}
 
@@ -417,7 +419,7 @@
 	else
 		buffer = &dmxdevfilter->dev->dvr_buffer;
 	if (buffer->error) {
-		spin_unlock(&dmxdevfilter->dev->lock);
+		spin_unlock_irqrestore(&dmxdevfilter->dev->lock, flags);
 		wake_up(&buffer->queue);
 		return 0;
 	}
@@ -428,7 +430,7 @@
 		dvb_ringbuffer_flush(buffer);
 		buffer->error = ret;
 	}
-	spin_unlock(&dmxdevfilter->dev->lock);
+	spin_unlock_irqrestore(&dmxdevfilter->dev->lock, flags);
 	wake_up(&buffer->queue);
 	return 0;
 }

diff --git a/drivers/media/dvb/dvb-core/dvb_demux.c b/drivers/media/dvb/dvb-core/dvb_demux.c
index e2eca0b..a2c1fd5 100644
--- a/drivers/media/dvb/dvb-core/dvb_demux.c
+++ b/drivers/media/dvb/dvb-core/dvb_demux.c

@@ -399,7 +399,9 @@
 void dvb_dmx_swfilter_packets(struct dvb_demux *demux, const u8 *buf,
 			      size_t count)
 {
-	spin_lock(&demux->lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&demux->lock, flags);
 
 	while (count--) {
 		if (buf[0] == 0x47)
@@ -407,16 +409,17 @@
 		buf += 188;
 	}
 
-	spin_unlock(&demux->lock);
+	spin_unlock_irqrestore(&demux->lock, flags);
 }
 
 EXPORT_SYMBOL(dvb_dmx_swfilter_packets);
 
 void dvb_dmx_swfilter(struct dvb_demux *demux, const u8 *buf, size_t count)
 {
+	unsigned long flags;
 	int p = 0, i, j;
 
-	spin_lock(&demux->lock);
+	spin_lock_irqsave(&demux->lock, flags);
 
 	if (demux->tsbufp) {
 		i = demux->tsbufp;
@@ -449,17 +452,18 @@
 	}
 
 bailout:
-	spin_unlock(&demux->lock);
+	spin_unlock_irqrestore(&demux->lock, flags);
 }
 
 EXPORT_SYMBOL(dvb_dmx_swfilter);
 
 void dvb_dmx_swfilter_204(struct dvb_demux *demux, const u8 *buf, size_t count)
 {
+	unsigned long flags;
 	int p = 0, i, j;
 	u8 tmppack[188];
 
-	spin_lock(&demux->lock);
+	spin_lock_irqsave(&demux->lock, flags);
 
 	if (demux->tsbufp) {
 		i = demux->tsbufp;
@@ -500,7 +504,7 @@
 	}
 
 bailout:
-	spin_unlock(&demux->lock);
+	spin_unlock_irqrestore(&demux->lock, flags);
 }
 
 EXPORT_SYMBOL(dvb_dmx_swfilter_204);

diff --git a/drivers/media/dvb/frontends/s5h1420.c b/drivers/media/dvb/frontends/s5h1420.c
index 747d3fa..2e9fd28 100644
--- a/drivers/media/dvb/frontends/s5h1420.c
+++ b/drivers/media/dvb/frontends/s5h1420.c

@@ -59,7 +59,7 @@
 	 * it does not support repeated-start, workaround: write addr-1
 	 * and then read
 	 */
-	u8 shadow[255];
+	u8 shadow[256];
 };
 
 static u32 s5h1420_getsymbolrate(struct s5h1420_state* state);
@@ -94,8 +94,11 @@
 		if (ret != 3)
 			return ret;
 	} else {
-		ret = i2c_transfer(state->i2c, &msg[1], 2);
-		if (ret != 2)
+		ret = i2c_transfer(state->i2c, &msg[1], 1);
+		if (ret != 1)
+			return ret;
+		ret = i2c_transfer(state->i2c, &msg[2], 1);
+		if (ret != 1)
 			return ret;
 	}
 
@@ -823,7 +826,7 @@
 	struct s5h1420_state* state = fe->demodulator_priv;
 
 	/* disable power down and do reset */
-	state->CON_1_val = 0x10;
+	state->CON_1_val = state->config->serial_mpeg << 4;
 	s5h1420_writereg(state, 0x02, state->CON_1_val);
 	msleep(10);
 	s5h1420_reset(state);

diff --git a/drivers/media/dvb/frontends/s5h1420.h b/drivers/media/dvb/frontends/s5h1420.h
index 4c913f1..ff30813 100644
--- a/drivers/media/dvb/frontends/s5h1420.h
+++ b/drivers/media/dvb/frontends/s5h1420.h

@@ -32,10 +32,12 @@
 	u8 demod_address;
 
 	/* does the inversion require inversion? */
-	u8 invert : 1;
+	u8 invert:1;
 
-	u8 repeated_start_workaround : 1;
-	u8 cdclk_polarity : 1; /* 1 == falling edge, 0 == raising edge */
+	u8 repeated_start_workaround:1;
+	u8 cdclk_polarity:1; /* 1 == falling edge, 0 == raising edge */
+
+	u8 serial_mpeg:1;
 };
 
 #if defined(CONFIG_DVB_S5H1420) || (defined(CONFIG_DVB_S5H1420_MODULE) && defined(MODULE))

diff --git a/drivers/media/dvb/siano/sms-cards.c b/drivers/media/dvb/siano/sms-cards.c
index cc5efb6..9da260f 100644
--- a/drivers/media/dvb/siano/sms-cards.c
+++ b/drivers/media/dvb/siano/sms-cards.c

@@ -40,6 +40,8 @@
 		.driver_info = SMS1XXX_BOARD_HAUPPAUGE_OKEMO_B },
 	{ USB_DEVICE(0x2040, 0x5500),
 		.driver_info = SMS1XXX_BOARD_HAUPPAUGE_WINDHAM },
+	{ USB_DEVICE(0x2040, 0x5510),
+		.driver_info = SMS1XXX_BOARD_HAUPPAUGE_WINDHAM },
 	{ USB_DEVICE(0x2040, 0x5580),
 		.driver_info = SMS1XXX_BOARD_HAUPPAUGE_WINDHAM },
 	{ USB_DEVICE(0x2040, 0x5590),
@@ -87,7 +89,7 @@
 		.fw[DEVICE_MODE_DVBT_BDA] = "sms1xxx-nova-b-dvbt-01.fw",
 	},
 	[SMS1XXX_BOARD_HAUPPAUGE_WINDHAM] = {
-		.name	= "Hauppauge WinTV-Nova-T-MiniStick",
+		.name	= "Hauppauge WinTV MiniStick",
 		.type	= SMS_NOVA_B0,
 		.fw[DEVICE_MODE_DVBT_BDA] = "sms1xxx-hcw-55xxx-dvbt-01.fw",
 	},

diff --git a/drivers/media/video/bt8xx/bttv-driver.c b/drivers/media/video/bt8xx/bttv-driver.c
index 6ae4cc8..933eaef 100644
--- a/drivers/media/video/bt8xx/bttv-driver.c
+++ b/drivers/media/video/bt8xx/bttv-driver.c

@@ -3431,7 +3431,7 @@
 	dprintk("bttv: open minor=%d\n",minor);
 
 	for (i = 0; i < bttv_num; i++) {
-		if (bttvs[i].radio_dev->minor == minor) {
+		if (bttvs[i].radio_dev && bttvs[i].radio_dev->minor == minor) {
 			btv = &bttvs[i];
 			break;
 		}

diff --git a/drivers/media/video/cafe_ccic.c b/drivers/media/video/cafe_ccic.c
index c149b7d..5405c30 100644
--- a/drivers/media/video/cafe_ccic.c
+++ b/drivers/media/video/cafe_ccic.c

@@ -19,6 +19,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/fs.h>
+#include <linux/mm.h>
 #include <linux/pci.h>
 #include <linux/i2c.h>
 #include <linux/interrupt.h>

diff --git a/drivers/media/video/cpia2/cpia2_usb.c b/drivers/media/video/cpia2/cpia2_usb.c
index a4574740..a8a1990 100644
--- a/drivers/media/video/cpia2/cpia2_usb.c
+++ b/drivers/media/video/cpia2/cpia2_usb.c

@@ -632,7 +632,7 @@
 static int submit_urbs(struct camera_data *cam)
 {
 	struct urb *urb;
-	int fx, err, i;
+	int fx, err, i, j;
 
 	for(i=0; i<NUM_SBUF; ++i) {
 		if (cam->sbuf[i].data)
@@ -657,6 +657,9 @@
 		}
 		urb = usb_alloc_urb(FRAMES_PER_DESC, GFP_KERNEL);
 		if (!urb) {
+			ERR("%s: usb_alloc_urb error!\n", __func__);
+			for (j = 0; j < i; j++)
+				usb_free_urb(cam->sbuf[j].urb);
 			return -ENOMEM;
 		}
 

diff --git a/drivers/media/video/cx18/cx18-cards.c b/drivers/media/video/cx18/cx18-cards.c
index 8fe5f38..3cb9734 100644
--- a/drivers/media/video/cx18/cx18-cards.c
+++ b/drivers/media/video/cx18/cx18-cards.c

@@ -163,7 +163,7 @@
 	},
 	.audio_inputs = {
 		{ CX18_CARD_INPUT_AUD_TUNER,
-		  CX18_AV_AUDIO8, 0 },
+		  CX18_AV_AUDIO5, 0 },
 		{ CX18_CARD_INPUT_LINE_IN1,
 		  CX18_AV_AUDIO_SERIAL1, 0 },
 	},

diff --git a/drivers/media/video/em28xx/em28xx-audio.c b/drivers/media/video/em28xx/em28xx-audio.c
index 3c00610..ac3292d 100644
--- a/drivers/media/video/em28xx/em28xx-audio.c
+++ b/drivers/media/video/em28xx/em28xx-audio.c

@@ -117,10 +117,10 @@
 
 			if (oldptr + length >= runtime->buffer_size) {
 				unsigned int cnt =
-				    runtime->buffer_size - oldptr - 1;
+				    runtime->buffer_size - oldptr;
 				memcpy(runtime->dma_area + oldptr * stride, cp,
 				       cnt * stride);
-				memcpy(runtime->dma_area, cp + cnt,
+				memcpy(runtime->dma_area, cp + cnt * stride,
 				       length * stride - cnt * stride);
 			} else {
 				memcpy(runtime->dma_area + oldptr * stride, cp,
@@ -161,8 +161,14 @@
 
 		memset(dev->adev->transfer_buffer[i], 0x80, sb_size);
 		urb = usb_alloc_urb(EM28XX_NUM_AUDIO_PACKETS, GFP_ATOMIC);
-		if (!urb)
+		if (!urb) {
+			em28xx_errdev("usb_alloc_urb failed!\n");
+			for (j = 0; j < i; j++) {
+				usb_free_urb(dev->adev->urb[j]);
+				kfree(dev->adev->transfer_buffer[j]);
+			}
 			return -ENOMEM;
+		}
 
 		urb->dev = dev->udev;
 		urb->context = dev;

diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c
index 452da70..de943cf 100644
--- a/drivers/media/video/em28xx/em28xx-cards.c
+++ b/drivers/media/video/em28xx/em28xx-cards.c

@@ -93,28 +93,6 @@
 			.amux     = 0,
 		} },
 	},
-	[EM2800_BOARD_KWORLD_USB2800] = {
-		.name         = "Kworld USB2800",
-		.valid        = EM28XX_BOARD_NOT_VALIDATED,
-		.is_em2800    = 1,
-		.vchannels    = 3,
-		.tuner_type   = TUNER_PHILIPS_FCV1236D,
-		.tda9887_conf = TDA9887_PRESENT,
-		.decoder      = EM28XX_SAA7113,
-		.input          = { {
-			.type     = EM28XX_VMUX_TELEVISION,
-			.vmux     = SAA7115_COMPOSITE2,
-			.amux     = 0,
-		}, {
-			.type     = EM28XX_VMUX_COMPOSITE1,
-			.vmux     = SAA7115_COMPOSITE0,
-			.amux     = 1,
-		}, {
-			.type     = EM28XX_VMUX_SVIDEO,
-			.vmux     = SAA7115_SVIDEO3,
-			.amux     = 1,
-		} },
-	},
 	[EM2820_BOARD_KWORLD_PVRTV2800RF] = {
 		.name         = "Kworld PVR TV 2800 RF",
 		.is_em2800    = 0,
@@ -599,7 +577,7 @@
 		}, {
 			.type     = EM28XX_VMUX_COMPOSITE1,
 			.vmux     = TVP5150_COMPOSITE1,
-			.amux     = 1,
+			.amux     = 3,
 		}, {
 			.type     = EM28XX_VMUX_SVIDEO,
 			.vmux     = TVP5150_SVIDEO,
@@ -952,22 +930,23 @@
 	},
 	[EM2880_BOARD_KWORLD_DVB_310U] = {
 		.name	      = "KWorld DVB-T 310U",
-		.valid        = EM28XX_BOARD_NOT_VALIDATED,
 		.vchannels    = 3,
 		.tuner_type   = TUNER_XC2028,
+		.has_dvb      = 1,
+		.mts_firmware = 1,
 		.decoder      = EM28XX_TVP5150,
 		.input          = { {
 			.type     = EM28XX_VMUX_TELEVISION,
 			.vmux     = TVP5150_COMPOSITE0,
-			.amux     = 0,
+			.amux     = EM28XX_AMUX_VIDEO,
 		}, {
 			.type     = EM28XX_VMUX_COMPOSITE1,
 			.vmux     = TVP5150_COMPOSITE1,
-			.amux     = 1,
-		}, {
+			.amux     = EM28XX_AMUX_AC97_LINE_IN,
+		}, {	/* S-video has not been tested yet */
 			.type     = EM28XX_VMUX_SVIDEO,
 			.vmux     = TVP5150_SVIDEO,
-			.amux     = 1,
+			.amux     = EM28XX_AMUX_AC97_LINE_IN,
 		} },
 	},
 	[EM2881_BOARD_DNT_DA2_HYBRID] = {
@@ -1282,6 +1261,7 @@
 static struct em28xx_hash_table em28xx_eeprom_hash [] = {
 	/* P/N: SA 60002070465 Tuner: TVF7533-MF */
 	{0x6ce05a8f, EM2820_BOARD_PROLINK_PLAYTV_USB2, TUNER_YMEC_TVF_5533MF},
+	{0x966a0441, EM2880_BOARD_KWORLD_DVB_310U, TUNER_XC2028},
 };
 
 /* I2C devicelist hash table for devices with generic USB IDs */
@@ -1552,9 +1532,12 @@
 		/* djh - Not sure which demod we need here */
 		ctl->demod = XC3028_FE_DEFAULT;
 		break;
+	case EM2880_BOARD_AMD_ATI_TV_WONDER_HD_600:
+		ctl->demod = XC3028_FE_DEFAULT;
+		ctl->fname = XC3028L_DEFAULT_FIRMWARE;
+		break;
 	case EM2883_BOARD_HAUPPAUGE_WINTV_HVR_950:
 	case EM2880_BOARD_PINNACLE_PCTV_HD_PRO:
-	case EM2880_BOARD_AMD_ATI_TV_WONDER_HD_600:
 		/* FIXME: Better to specify the needed IF */
 		ctl->demod = XC3028_FE_DEFAULT;
 		break;
@@ -1764,6 +1747,20 @@
 		break;
 	case EM2820_BOARD_UNKNOWN:
 	case EM2800_BOARD_UNKNOWN:
+		/*
+		 * The K-WORLD DVB-T 310U is detected as an MSI Digivox AD.
+		 *
+		 * This occurs because they share identical USB vendor and
+		 * product IDs.
+		 *
+		 * What we do here is look up the EEPROM hash of the K-WORLD
+		 * and if it is found then we decide that we do not have
+		 * a DIGIVOX and reset the device to the K-WORLD instead.
+		 *
+		 * This solution is only valid if they do not share eeprom
+		 * hash identities which has not been determined as yet.
+		 */
+	case EM2880_BOARD_MSI_DIGIVOX_AD:
 		if (!em28xx_hint_board(dev))
 			em28xx_set_model(dev);
 		break;

diff --git a/drivers/media/video/em28xx/em28xx-dvb.c b/drivers/media/video/em28xx/em28xx-dvb.c
index 4b992bc..d2b1a1a 100644
--- a/drivers/media/video/em28xx/em28xx-dvb.c
+++ b/drivers/media/video/em28xx/em28xx-dvb.c

@@ -452,6 +452,15 @@
 			goto out_free;
 		}
 		break;
+	case EM2880_BOARD_KWORLD_DVB_310U:
+		dvb->frontend = dvb_attach(zl10353_attach,
+						&em28xx_zl10353_with_xc3028,
+						&dev->i2c_adap);
+		if (attach_xc3028(0x61, dev) < 0) {
+			result = -EINVAL;
+			goto out_free;
+		}
+		break;
 	default:
 		printk(KERN_ERR "%s/2: The frontend of your DVB/ATSC card"
 				" isn't supported yet\n",

diff --git a/drivers/media/video/gspca/gspca.c b/drivers/media/video/gspca/gspca.c
index 7be6928..ac95c55 100644
--- a/drivers/media/video/gspca/gspca.c
+++ b/drivers/media/video/gspca/gspca.c

@@ -459,6 +459,7 @@
 		urb = usb_alloc_urb(npkt, GFP_KERNEL);
 		if (!urb) {
 			err("usb_alloc_urb failed");
+			destroy_urbs(gspca_dev);
 			return -ENOMEM;
 		}
 		urb->transfer_buffer = usb_buffer_alloc(gspca_dev->dev,
@@ -468,8 +469,8 @@
 
 		if (urb->transfer_buffer == NULL) {
 			usb_free_urb(urb);
-			destroy_urbs(gspca_dev);
 			err("usb_buffer_urb failed");
+			destroy_urbs(gspca_dev);
 			return -ENOMEM;
 		}
 		gspca_dev->urb[n] = urb;

diff --git a/drivers/media/video/gspca/pac7311.c b/drivers/media/video/gspca/pac7311.c
index d4be518..ba865b7 100644
--- a/drivers/media/video/gspca/pac7311.c
+++ b/drivers/media/video/gspca/pac7311.c

@@ -1063,6 +1063,7 @@
 	{USB_DEVICE(0x093a, 0x2621), .driver_info = SENSOR_PAC7302},
 	{USB_DEVICE(0x093a, 0x2624), .driver_info = SENSOR_PAC7302},
 	{USB_DEVICE(0x093a, 0x2626), .driver_info = SENSOR_PAC7302},
+	{USB_DEVICE(0x093a, 0x262a), .driver_info = SENSOR_PAC7302},
 	{}
 };
 MODULE_DEVICE_TABLE(usb, device_table);

diff --git a/drivers/media/video/gspca/sonixb.c b/drivers/media/video/gspca/sonixb.c
index 5dd78c6..12b81ae 100644
--- a/drivers/media/video/gspca/sonixb.c
+++ b/drivers/media/video/gspca/sonixb.c

@@ -232,7 +232,7 @@
 static struct v4l2_pix_format vga_mode[] = {
 	{160, 120, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE,
 		.bytesperline = 160,
-		.sizeimage = 160 * 120 * 5 / 4,
+		.sizeimage = 160 * 120,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 2 | MODE_RAW},
 	{160, 120, V4L2_PIX_FMT_SN9C10X, V4L2_FIELD_NONE,
@@ -264,7 +264,7 @@
 		.priv = 1 | MODE_REDUCED_SIF},
 	{176, 144, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE,
 		.bytesperline = 176,
-		.sizeimage = 176 * 144 * 5 / 4,
+		.sizeimage = 176 * 144,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 1 | MODE_RAW},
 	{176, 144, V4L2_PIX_FMT_SN9C10X, V4L2_FIELD_NONE,

diff --git a/drivers/media/video/gspca/sonixj.c b/drivers/media/video/gspca/sonixj.c
index d75b1d2..572b0f3 100644
--- a/drivers/media/video/gspca/sonixj.c
+++ b/drivers/media/video/gspca/sonixj.c

@@ -707,6 +707,7 @@
 			0x08, 0,		/* value, index */
 			gspca_dev->usb_buf, 8,
 			500);
+	msleep(2);
 }
 
 /* read 5 bytes in gspca_dev->usb_buf */
@@ -976,13 +977,13 @@
 	case BRIDGE_SN9C105:
 		if (regF1 != 0x11)
 			return -ENODEV;
-		reg_w(gspca_dev, 0x02, regGpio, 2);
+		reg_w(gspca_dev, 0x01, regGpio, 2);
 		break;
 	case BRIDGE_SN9C120:
 		if (regF1 != 0x12)
 			return -ENODEV;
 		regGpio[1] = 0x70;
-		reg_w(gspca_dev, 0x02, regGpio, 2);
+		reg_w(gspca_dev, 0x01, regGpio, 2);
 		break;
 	default:
 /*	case BRIDGE_SN9C110: */
@@ -1183,7 +1184,7 @@
 	static const __u8 CA[] = { 0x28, 0xd8, 0x14, 0xec };
 	static const __u8 CE[] = { 0x32, 0xdd, 0x2d, 0xdd };	/* MI0360 */
 	static const __u8 CE_ov76xx[] =
-			{ 0x32, 0xdd, 0x32, 0xdd };	/* OV7630/48 */
+				{ 0x32, 0xdd, 0x32, 0xdd };
 
 	sn9c1xx = sn_tb[(int) sd->sensor];
 	configure_gpio(gspca_dev, sn9c1xx);
@@ -1223,8 +1224,15 @@
 	reg_w(gspca_dev, 0x20, gamma_def, sizeof gamma_def);
 	for (i = 0; i < 8; i++)
 		reg_w(gspca_dev, 0x84, reg84, sizeof reg84);
+	switch (sd->sensor) {
+	case SENSOR_OV7660:
+		reg_w1(gspca_dev, 0x9a, 0x05);
+		break;
+	default:
 		reg_w1(gspca_dev, 0x9a, 0x08);
 		reg_w1(gspca_dev, 0x99, 0x59);
+		break;
+	}
 
 	mode = gspca_dev->cam.cam_mode[(int) gspca_dev->curr_mode].priv;
 	if (mode)
@@ -1275,8 +1283,8 @@
 /*			reg1 = 0x44; */
 /*			reg1 = 0x46;	(done) */
 		} else {
-			reg17 = 0x22;	/* 640 MCKSIZE */
-			reg1 = 0x06;
+			reg17 = 0xa2;	/* 640 */
+			reg1 = 0x44;
 		}
 		break;
 	}
@@ -1285,6 +1293,7 @@
 	switch (sd->sensor) {
 	case SENSOR_OV7630:
 	case SENSOR_OV7648:
+	case SENSOR_OV7660:
 		reg_w(gspca_dev, 0xce, CE_ov76xx, 4);
 		break;
 	default:

diff --git a/drivers/media/video/gspca/spca561.c b/drivers/media/video/gspca/spca561.c
index cfbc9eb..95fcfcb 100644
--- a/drivers/media/video/gspca/spca561.c
+++ b/drivers/media/video/gspca/spca561.c

@@ -225,7 +225,7 @@
 	reg_w_val(gspca_dev->dev, 0x8802, (mode | 0x01));
 	do {
 		reg_r(gspca_dev, 0x8803, 1);
-		if (!gspca_dev->usb_buf)
+		if (!gspca_dev->usb_buf[0])
 			break;
 	} while (--retry);
 	if (retry == 0)

diff --git a/drivers/media/video/gspca/zc3xx.c b/drivers/media/video/gspca/zc3xx.c
index 8d7c27e..d61ef72 100644
--- a/drivers/media/video/gspca/zc3xx.c
+++ b/drivers/media/video/gspca/zc3xx.c

@@ -6576,8 +6576,8 @@
 		 cs2102_60HZ, cs2102_60HZScale},
 /* SENSOR_CS2102K 1 */
 		{cs2102_NoFliker, cs2102_NoFlikerScale,
-		 cs2102_50HZ, cs2102_50HZScale,
-		 cs2102_60HZ, cs2102_60HZScale},
+		 NULL, NULL, /* currently disabled */
+		 NULL, NULL},
 /* SENSOR_GC0305 2 */
 		{gc0305_NoFliker, gc0305_NoFliker,
 		 gc0305_50HZ, gc0305_50HZ,

diff --git a/drivers/media/video/ov511.c b/drivers/media/video/ov511.c
index 3d3c48d..c685240 100644
--- a/drivers/media/video/ov511.c
+++ b/drivers/media/video/ov511.c

@@ -3591,7 +3591,7 @@
 ov51x_init_isoc(struct usb_ov511 *ov)
 {
 	struct urb *urb;
-	int fx, err, n, size;
+	int fx, err, n, i, size;
 
 	PDEBUG(3, "*** Initializing capture ***");
 
@@ -3662,6 +3662,8 @@
 		urb = usb_alloc_urb(FRAMES_PER_DESC, GFP_KERNEL);
 		if (!urb) {
 			err("init isoc: usb_alloc_urb ret. NULL");
+			for (i = 0; i < n; i++)
+				usb_free_urb(ov->sbuf[i].urb);
 			return -ENOMEM;
 		}
 		ov->sbuf[n].urb = urb;
@@ -5651,7 +5653,7 @@
 	if (!ov->dev)
 		return -ENODEV;
 	sensor_get_exposure(ov, &exp);
-	return sprintf(buf, "%d\n", exp >> 8);
+	return sprintf(buf, "%d\n", exp);
 }
 static DEVICE_ATTR(exposure, S_IRUGO, show_exposure, NULL);
 

diff --git a/drivers/media/video/pvrusb2/pvrusb2-devattr.c b/drivers/media/video/pvrusb2/pvrusb2-devattr.c
index 88e1751..cbe2a34 100644
--- a/drivers/media/video/pvrusb2/pvrusb2-devattr.c
+++ b/drivers/media/video/pvrusb2/pvrusb2-devattr.c

@@ -489,6 +489,8 @@
 struct usb_device_id pvr2_device_table[] = {
 	{ USB_DEVICE(0x2040, 0x2900),
 	  .driver_info = (kernel_ulong_t)&pvr2_device_29xxx},
+	{ USB_DEVICE(0x2040, 0x2950), /* Logically identical to 2900 */
+	  .driver_info = (kernel_ulong_t)&pvr2_device_29xxx},
 	{ USB_DEVICE(0x2040, 0x2400),
 	  .driver_info = (kernel_ulong_t)&pvr2_device_24xxx},
 	{ USB_DEVICE(0x1164, 0x0622),

diff --git a/drivers/media/video/s2255drv.c b/drivers/media/video/s2255drv.c
index b1d09d8..92b83fe 100644
--- a/drivers/media/video/s2255drv.c
+++ b/drivers/media/video/s2255drv.c

@@ -669,7 +669,7 @@
 		(unsigned long)vbuf, pos);
 	/* tell v4l buffer was filled */
 
-	buf->vb.field_count++;
+	buf->vb.field_count = dev->frame_count[chn] * 2;
 	do_gettimeofday(&ts);
 	buf->vb.ts = ts;
 	buf->vb.state = VIDEOBUF_DONE;
@@ -1268,6 +1268,7 @@
 	dev->last_frame[chn] = -1;
 	dev->bad_payload[chn] = 0;
 	dev->cur_frame[chn] = 0;
+	dev->frame_count[chn] = 0;
 	for (j = 0; j < SYS_FRAMES; j++) {
 		dev->buffer[chn].frame[j].ulState = 0;
 		dev->buffer[chn].frame[j].cur_size = 0;

diff --git a/drivers/media/video/uvc/uvc_ctrl.c b/drivers/media/video/uvc/uvc_ctrl.c
index 6ef3e52..feab12a 100644
--- a/drivers/media/video/uvc/uvc_ctrl.c
+++ b/drivers/media/video/uvc/uvc_ctrl.c

@@ -592,7 +592,7 @@
 	if (ctrl == NULL)
 		return -EINVAL;
 
-	data = kmalloc(8, GFP_KERNEL);
+	data = kmalloc(ctrl->info->size, GFP_KERNEL);
 	if (data == NULL)
 		return -ENOMEM;
 

diff --git a/drivers/media/video/w9968cf.c b/drivers/media/video/w9968cf.c
index 168baab..11edf79 100644
--- a/drivers/media/video/w9968cf.c
+++ b/drivers/media/video/w9968cf.c

@@ -911,7 +911,6 @@
 
 	for (i = 0; i < W9968CF_URBS; i++) {
 		urb = usb_alloc_urb(W9968CF_ISO_PACKETS, GFP_KERNEL);
-		cam->urb[i] = urb;
 		if (!urb) {
 			for (j = 0; j < i; j++)
 				usb_free_urb(cam->urb[j]);
@@ -919,6 +918,7 @@
 			return -ENOMEM;
 		}
 
+		cam->urb[i] = urb;
 		urb->dev = udev;
 		urb->context = (void*)cam;
 		urb->pipe = usb_rcvisocpipe(udev, 1);

diff --git a/drivers/media/video/wm8739.c b/drivers/media/video/wm8739.c
index 95c79ad..54ac3fe 100644
--- a/drivers/media/video/wm8739.c
+++ b/drivers/media/video/wm8739.c

@@ -274,10 +274,8 @@
 			client->addr << 1, client->adapter->name);
 
 	state = kmalloc(sizeof(struct wm8739_state), GFP_KERNEL);
-	if (state == NULL) {
-		kfree(client);
+	if (state == NULL)
 		return -ENOMEM;
-	}
 	state->vol_l = 0x17; /* 0dB */
 	state->vol_r = 0x17; /* 0dB */
 	state->muted = 0;

diff --git a/drivers/media/video/zoran_card.c b/drivers/media/video/zoran_card.c
index d842a7c..3282be7 100644
--- a/drivers/media/video/zoran_card.c
+++ b/drivers/media/video/zoran_card.c

@@ -988,7 +988,7 @@
 	zr->v4l_grab_seq = 0;
 	zr->v4l_settings.width = 192;
 	zr->v4l_settings.height = 144;
-	zr->v4l_settings.format = &zoran_formats[4];	/* YUY2 - YUV-4:2:2 packed */
+	zr->v4l_settings.format = &zoran_formats[7];	/* YUY2 - YUV-4:2:2 packed */
 	zr->v4l_settings.bytesperline =
 	    zr->v4l_settings.width *
 	    ((zr->v4l_settings.format->depth + 7) / 8);

diff --git a/drivers/media/video/zoran_driver.c b/drivers/media/video/zoran_driver.c
index ec6f596..2dab9ee 100644
--- a/drivers/media/video/zoran_driver.c
+++ b/drivers/media/video/zoran_driver.c

@@ -134,7 +134,7 @@
 	}, {
 		.name = "16-bit RGB BE",
 		ZFMT(-1,
-		     V4L2_PIX_FMT_RGB565, V4L2_COLORSPACE_SRGB),
+		     V4L2_PIX_FMT_RGB565X, V4L2_COLORSPACE_SRGB),
 		.depth = 16,
 		.flags = ZORAN_FORMAT_CAPTURE |
 			 ZORAN_FORMAT_OVERLAY,
@@ -2737,7 +2737,8 @@
 				    fh->v4l_settings.format->fourcc;
 				fmt->fmt.pix.colorspace =
 				    fh->v4l_settings.format->colorspace;
-				fmt->fmt.pix.bytesperline = 0;
+				fmt->fmt.pix.bytesperline =
+				    fh->v4l_settings.bytesperline;
 				if (BUZ_MAX_HEIGHT <
 				    (fh->v4l_settings.height * 2))
 					fmt->fmt.pix.field =
@@ -2833,13 +2834,6 @@
 				fmt->fmt.pix.pixelformat,
 				(char *) &printformat);
 
-			if (fmt->fmt.pix.bytesperline > 0) {
-				dprintk(5,
-					KERN_ERR "%s: bpl not supported\n",
-					ZR_DEVNAME(zr));
-				return -EINVAL;
-			}
-
 			/* we can be requested to do JPEG/raw playback/capture */
 			if (!
 			    (fmt->type == V4L2_BUF_TYPE_VIDEO_CAPTURE ||
@@ -2923,6 +2917,7 @@
 				fh->jpg_buffers.buffer_size =
 				    zoran_v4l2_calc_bufsize(&fh->
 							    jpg_settings);
+				fmt->fmt.pix.bytesperline = 0;
 				fmt->fmt.pix.sizeimage =
 				    fh->jpg_buffers.buffer_size;
 
@@ -2979,6 +2974,8 @@
 
 				/* tell the user the
 				 * results/missing stuff */
+				fmt->fmt.pix.bytesperline =
+					fh->v4l_settings.bytesperline;
 				fmt->fmt.pix.sizeimage =
 					fh->v4l_settings.height *
 					fh->v4l_settings.bytesperline;

diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 917035e..0000896 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c

@@ -426,8 +426,6 @@
 	host->sg = NULL;
 	host->data = data;
 
-	mci_writel(host, BLKR, MCI_BCNT(data->blocks)
-			| MCI_BLKLEN(data->blksz));
 	dev_vdbg(&mmc->class_dev, "BLKR=0x%08x\n",
 			MCI_BCNT(data->blocks) | MCI_BLKLEN(data->blksz));
 
@@ -483,6 +481,10 @@
 		if (data->blocks > 1 && data->blksz & 3)
 			goto fail;
 		atmci_set_timeout(host, data);
+
+		/* Must set block count/size before sending command */
+		mci_writel(host, BLKR, MCI_BCNT(data->blocks)
+				| MCI_BLKLEN(data->blksz));
 	}
 
 	iflags = MCI_CMDRDY;

diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c
index 0b6095b..bcd2bc4 100644
--- a/drivers/net/e1000e/ich8lan.c
+++ b/drivers/net/e1000e/ich8lan.c

@@ -396,7 +396,7 @@
 	u32 extcnf_ctrl;
 	u32 timeout = PHY_CFG_TIMEOUT;
 
-	WARN_ON(preempt_count());
+	might_sleep();
 
 	if (!mutex_trylock(&nvm_mutex)) {
 		WARN(1, KERN_ERR "e1000e mutex contention. Owned by pid %d\n",

diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c
index f118252..52e2743 100644
--- a/drivers/rtc/rtc-dev.c
+++ b/drivers/rtc/rtc-dev.c

@@ -422,6 +422,12 @@
 	return err;
 }
 
+static int rtc_dev_fasync(int fd, struct file *file, int on)
+{
+	struct rtc_device *rtc = file->private_data;
+	return fasync_helper(fd, file, on, &rtc->async_queue);
+}
+
 static int rtc_dev_release(struct inode *inode, struct file *file)
 {
 	struct rtc_device *rtc = file->private_data;
@@ -434,16 +440,13 @@
 	if (rtc->ops->release)
 		rtc->ops->release(rtc->dev.parent);
 
+	if (file->f_flags & FASYNC)
+		rtc_dev_fasync(-1, file, 0);
+
 	clear_bit_unlock(RTC_DEV_BUSY, &rtc->flags);
 	return 0;
 }
 
-static int rtc_dev_fasync(int fd, struct file *file, int on)
-{
-	struct rtc_device *rtc = file->private_data;
-	return fasync_helper(fd, file, on, &rtc->async_queue);
-}
-
 static const struct file_operations rtc_dev_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,

diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c
index 1679e2f..a0b6b46 100644
--- a/drivers/s390/cio/qdio_setup.c
+++ b/drivers/s390/cio/qdio_setup.c

@@ -447,51 +447,36 @@
 {
 	char s[80];
 
-	sprintf(s, "%s sc:%x ", cdev->dev.bus_id, irq_ptr->schid.sch_no);
-
+	sprintf(s, "qdio: %s ", dev_name(&cdev->dev));
 	switch (irq_ptr->qib.qfmt) {
 	case QDIO_QETH_QFMT:
-		sprintf(s + strlen(s), "OSADE ");
+		sprintf(s + strlen(s), "OSA ");
 		break;
 	case QDIO_ZFCP_QFMT:
 		sprintf(s + strlen(s), "ZFCP ");
 		break;
 	case QDIO_IQDIO_QFMT:
-		sprintf(s + strlen(s), "HiperSockets ");
+		sprintf(s + strlen(s), "HS ");
 		break;
 	}
-	sprintf(s + strlen(s), "using: ");
-
-	if (!is_thinint_irq(irq_ptr))
-		sprintf(s + strlen(s), "no");
-	sprintf(s + strlen(s), "AdapterInterrupts ");
-	if (!(irq_ptr->sch_token != 0))
-		sprintf(s + strlen(s), "no");
-	sprintf(s + strlen(s), "QEBSM ");
-	if (!(irq_ptr->qib.ac & QIB_AC_OUTBOUND_PCI_SUPPORTED))
-		sprintf(s + strlen(s), "no");
-	sprintf(s + strlen(s), "OutboundPCI ");
-	if (!css_general_characteristics.aif_tdd)
-		sprintf(s + strlen(s), "no");
-	sprintf(s + strlen(s), "TDD\n");
-	printk(KERN_INFO "qdio: %s", s);
-
-	memset(s, 0, sizeof(s));
-	sprintf(s, "%s SIGA required: ", cdev->dev.bus_id);
-	if (irq_ptr->siga_flag.input)
-		sprintf(s + strlen(s), "Read ");
-	if (irq_ptr->siga_flag.output)
-		sprintf(s + strlen(s), "Write ");
-	if (irq_ptr->siga_flag.sync)
-		sprintf(s + strlen(s), "Sync ");
-	if (!irq_ptr->siga_flag.no_sync_ti)
-		sprintf(s + strlen(s), "SyncAI ");
-	if (!irq_ptr->siga_flag.no_sync_out_ti)
-		sprintf(s + strlen(s), "SyncOutAI ");
-	if (!irq_ptr->siga_flag.no_sync_out_pci)
-		sprintf(s + strlen(s), "SyncOutPCI");
+	sprintf(s + strlen(s), "on SC %x using ", irq_ptr->schid.sch_no);
+	sprintf(s + strlen(s), "AI:%d ", is_thinint_irq(irq_ptr));
+	sprintf(s + strlen(s), "QEBSM:%d ", (irq_ptr->sch_token) ? 1 : 0);
+	sprintf(s + strlen(s), "PCI:%d ",
+		(irq_ptr->qib.ac & QIB_AC_OUTBOUND_PCI_SUPPORTED) ? 1 : 0);
+	sprintf(s + strlen(s), "TDD:%d ", css_general_characteristics.aif_tdd);
+	sprintf(s + strlen(s), "SIGA:");
+	sprintf(s + strlen(s), "%s", (irq_ptr->siga_flag.input) ? "R" : " ");
+	sprintf(s + strlen(s), "%s", (irq_ptr->siga_flag.output) ? "W" : " ");
+	sprintf(s + strlen(s), "%s", (irq_ptr->siga_flag.sync) ? "S" : " ");
+	sprintf(s + strlen(s), "%s",
+		(!irq_ptr->siga_flag.no_sync_ti) ? "A" : " ");
+	sprintf(s + strlen(s), "%s",
+		(!irq_ptr->siga_flag.no_sync_out_ti) ? "O" : " ");
+	sprintf(s + strlen(s), "%s",
+		(!irq_ptr->siga_flag.no_sync_out_pci) ? "P" : " ");
 	sprintf(s + strlen(s), "\n");
-	printk(KERN_INFO "qdio: %s", s);
+	printk(KERN_INFO "%s", s);
 }
 
 int __init qdio_setup_init(void)

diff --git a/drivers/spi/orion_spi.c b/drivers/spi/orion_spi.c
index c4eaacd..b872bfa 100644
--- a/drivers/spi/orion_spi.c
+++ b/drivers/spi/orion_spi.c

@@ -427,7 +427,7 @@
 			goto msg_rejected;
 		}
 
-		if (t->speed_hz < orion_spi->min_speed) {
+		if (t->speed_hz && t->speed_hz < orion_spi->min_speed) {
 			dev_err(&spi->dev,
 				"message rejected : "
 				"device min speed (%d Hz) exceeds "

diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index c6299e8..9cbff84 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c

@@ -2400,11 +2400,15 @@
 
  	if (!fbcon_is_inactive(vc, info)) {
 		if (ops->blank_state != blank) {
+			int ret = 1;
+
 			ops->blank_state = blank;
 			fbcon_cursor(vc, blank ? CM_ERASE : CM_DRAW);
 			ops->cursor_flash = (!blank);
 
-			if (fb_blank(info, blank))
+			if (info->fbops->fb_blank)
+				ret = info->fbops->fb_blank(blank, info);
+			if (ret)
 				fbcon_generic_blank(vc, info, blank);
 		}
 

diff --git a/include/asm-mips/cevt-r4k.h b/include/asm-mips/cevt-r4k.h
new file mode 100644
index 0000000..fa4328f
--- /dev/null
+++ b/include/asm-mips/cevt-r4k.h

@@ -0,0 +1,46 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2008 Kevin D. Kissell
+ */
+
+/*
+ * Definitions used for common event timer implementation
+ * for MIPS 4K-type processors and their MIPS MT variants.
+ * Avoids unsightly extern declarations in C files.
+ */
+#ifndef __ASM_CEVT_R4K_H
+#define __ASM_CEVT_R4K_H
+
+DECLARE_PER_CPU(struct clock_event_device, mips_clockevent_device);
+
+void mips_event_handler(struct clock_event_device *dev);
+int c0_compare_int_usable(void);
+void mips_set_clock_mode(enum clock_event_mode, struct clock_event_device *);
+irqreturn_t c0_compare_interrupt(int, void *);
+
+extern struct irqaction c0_compare_irqaction;
+extern int cp0_timer_irq_installed;
+
+/*
+ * Possibly handle a performance counter interrupt.
+ * Return true if the timer interrupt should not be checked
+ */
+
+static inline int handle_perf_irq(int r2)
+{
+	/*
+	 * The performance counter overflow interrupt may be shared with the
+	 * timer interrupt (cp0_perfcount_irq < 0). If it is and a
+	 * performance counter has overflowed (perf_irq() == IRQ_HANDLED)
+	 * and we can't reliably determine if a counter interrupt has also
+	 * happened (!r2) then don't check for a timer interrupt.
+	 */
+	return (cp0_perfcount_irq < 0) &&
+		perf_irq() == IRQ_HANDLED &&
+		!r2;
+}
+
+#endif /* __ASM_CEVT_R4K_H */

diff --git a/include/asm-mips/irqflags.h b/include/asm-mips/irqflags.h
index 881e886..701ec0b 100644
--- a/include/asm-mips/irqflags.h
+++ b/include/asm-mips/irqflags.h

@@ -38,8 +38,17 @@
 	"	.set	pop						\n"
 	"	.endm");
 
+extern void smtc_ipi_replay(void);
+
 static inline void raw_local_irq_enable(void)
 {
+#ifdef CONFIG_MIPS_MT_SMTC
+	/*
+	 * SMTC kernel needs to do a software replay of queued
+	 * IPIs, at the cost of call overhead on each local_irq_enable()
+	 */
+	smtc_ipi_replay();
+#endif
 	__asm__ __volatile__(
 		"raw_local_irq_enable"
 		: /* no outputs */
@@ -47,6 +56,7 @@
 		: "memory");
 }
 
+
 /*
  * For cli() we have to insert nops to make sure that the new value
  * has actually arrived in the status register before the end of this
@@ -185,15 +195,14 @@
 	"	.set	pop						\n"
 	"	.endm							\n");
 
-extern void smtc_ipi_replay(void);
 
 static inline void raw_local_irq_restore(unsigned long flags)
 {
 	unsigned long __tmp1;
 
-#ifdef CONFIG_MIPS_MT_SMTC_INSTANT_REPLAY
+#ifdef CONFIG_MIPS_MT_SMTC
 	/*
-	 * CONFIG_MIPS_MT_SMTC_INSTANT_REPLAY does prompt replay of deferred
+	 * SMTC kernel needs to do a software replay of queued
 	 * IPIs, at the cost of branch and call overhead on each
 	 * local_irq_restore()
 	 */
@@ -208,6 +217,17 @@
 		: "memory");
 }
 
+static inline void __raw_local_irq_restore(unsigned long flags)
+{
+	unsigned long __tmp1;
+
+	__asm__ __volatile__(
+		"raw_local_irq_restore\t%0"
+		: "=r" (__tmp1)
+		: "0" (flags)
+		: "memory");
+}
+
 static inline int raw_irqs_disabled_flags(unsigned long flags)
 {
 #ifdef CONFIG_MIPS_MT_SMTC

diff --git a/include/asm-mips/mipsregs.h b/include/asm-mips/mipsregs.h
index a46f8e2..9798660 100644
--- a/include/asm-mips/mipsregs.h
+++ b/include/asm-mips/mipsregs.h

@@ -1462,7 +1462,7 @@
 {								\
 	unsigned int res;					\
 	unsigned int omt;					\
-	unsigned int flags;					\
+	unsigned long flags;					\
 								\
 	local_irq_save(flags);					\
 	omt = __dmt();						\
@@ -1480,7 +1480,7 @@
 {								\
 	unsigned int res;					\
 	unsigned int omt;					\
-	unsigned int flags;					\
+	unsigned long flags;					\
 								\
 	local_irq_save(flags);					\
 	omt = __dmt();						\
@@ -1498,7 +1498,7 @@
 {								\
 	unsigned int res;					\
 	unsigned int omt;					\
-	unsigned int flags;					\
+	unsigned long flags;					\
 								\
 	local_irq_save(flags);					\
 								\

diff --git a/include/asm-mips/smtc.h b/include/asm-mips/smtc.h
index 3639b28..ea60bf0 100644
--- a/include/asm-mips/smtc.h
+++ b/include/asm-mips/smtc.h

@@ -6,6 +6,7 @@
  */
 
 #include <asm/mips_mt.h>
+#include <asm/smtc_ipi.h>
 
 /*
  * System-wide SMTC status information
@@ -38,14 +39,15 @@
 struct task_struct;
 
 void smtc_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu);
-
+void self_ipi(struct smtc_ipi *);
 void smtc_flush_tlb_asid(unsigned long asid);
-extern int mipsmt_build_cpu_map(int startslot);
-extern void mipsmt_prepare_cpus(void);
+extern int smtc_build_cpu_map(int startslot);
+extern void smtc_prepare_cpus(int cpus);
 extern void smtc_smp_finish(void);
 extern void smtc_boot_secondary(int cpu, struct task_struct *t);
 extern void smtc_cpus_done(void);
 
+
 /*
  * Sharing the TLB between multiple VPEs means that the
  * "random" index selection function is not allowed to

diff --git a/include/asm-mips/sn/mapped_kernel.h b/include/asm-mips/sn/mapped_kernel.h
index c3dd5d0..721496a 100644
--- a/include/asm-mips/sn/mapped_kernel.h
+++ b/include/asm-mips/sn/mapped_kernel.h

@@ -5,6 +5,8 @@
 #ifndef __ASM_SN_MAPPED_KERNEL_H
 #define __ASM_SN_MAPPED_KERNEL_H
 
+#include <linux/mmzone.h>
+
 /*
  * Note on how mapped kernels work: the text and data section is
  * compiled at cksseg segment (LOADADDR = 0xc001c000), and the
@@ -29,10 +31,8 @@
 #define MAPPED_ADDR_RO_TO_PHYS(x)	(x - REP_BASE)
 #define MAPPED_ADDR_RW_TO_PHYS(x)	(x - REP_BASE - 16777216)
 
-#define MAPPED_KERN_RO_PHYSBASE(n) \
-			(PLAT_NODE_DATA(n)->kern_vars.kv_ro_baseaddr)
-#define MAPPED_KERN_RW_PHYSBASE(n) \
-			(PLAT_NODE_DATA(n)->kern_vars.kv_rw_baseaddr)
+#define MAPPED_KERN_RO_PHYSBASE(n) (hub_data(n)->kern_vars.kv_ro_baseaddr)
+#define MAPPED_KERN_RW_PHYSBASE(n) (hub_data(n)->kern_vars.kv_rw_baseaddr)
 
 #define MAPPED_KERN_RO_TO_PHYS(x) \
 				((unsigned long)MAPPED_ADDR_RO_TO_PHYS(x) | \

diff --git a/include/asm-mips/stackframe.h b/include/asm-mips/stackframe.h
index 051e1af..4c37c4e5 100644
--- a/include/asm-mips/stackframe.h
+++ b/include/asm-mips/stackframe.h

@@ -297,14 +297,31 @@
 #ifdef CONFIG_MIPS_MT_SMTC
 		.set	mips32r2
 		/*
-		 * This may not really be necessary if ints are already
-		 * inhibited here.
+		 * We need to make sure the read-modify-write
+		 * of Status below isn't perturbed by an interrupt
+		 * or cross-TC access, so we need to do at least a DMT,
+		 * protected by an interrupt-inhibit. But setting IXMT
+		 * also creates a few-cycle window where an IPI could
+		 * be queued and not be detected before potentially
+		 * returning to a WAIT or user-mode loop. It must be
+		 * replayed.
+		 *
+		 * We're in the middle of a context switch, and
+		 * we can't dispatch it directly without trashing
+		 * some registers, so we'll try to detect this unlikely
+		 * case and program a software interrupt in the VPE,
+		 * as would be done for a cross-VPE IPI.  To accomodate
+		 * the handling of that case, we're doing a DVPE instead
+		 * of just a DMT here to protect against other threads.
+		 * This is a lot of cruft to cover a tiny window.
+		 * If you can find a better design, implement it!
+		 *
 		 */
 		mfc0	v0, CP0_TCSTATUS
 		ori	v0, TCSTATUS_IXMT
 		mtc0	v0, CP0_TCSTATUS
 		_ehb
-		DMT	5				# dmt a1
+		DVPE	5				# dvpe a1
 		jal	mips_ihb
 #endif /* CONFIG_MIPS_MT_SMTC */
 		mfc0	a0, CP0_STATUS
@@ -325,17 +342,50 @@
  */
 		LONG_L	v1, PT_TCSTATUS(sp)
 		_ehb
-		mfc0	v0, CP0_TCSTATUS
+		mfc0	a0, CP0_TCSTATUS
 		andi	v1, TCSTATUS_IXMT
-		/* We know that TCStatua.IXMT should be set from above */
-		xori	v0, v0, TCSTATUS_IXMT
-		or	v0, v0, v1
-		mtc0	v0, CP0_TCSTATUS
-		_ehb
-		andi	a1, a1, VPECONTROL_TE
+		bnez	v1, 0f
+
+/*
+ * We'd like to detect any IPIs queued in the tiny window
+ * above and request an software interrupt to service them
+ * when we ERET.
+ *
+ * Computing the offset into the IPIQ array of the executing
+ * TC's IPI queue in-line would be tedious.  We use part of
+ * the TCContext register to hold 16 bits of offset that we
+ * can add in-line to find the queue head.
+ */
+		mfc0	v0, CP0_TCCONTEXT
+		la	a2, IPIQ
+		srl	v0, v0, 16
+		addu	a2, a2, v0
+		LONG_L	v0, 0(a2)
+		beqz	v0, 0f
+/*
+ * If we have a queue, provoke dispatch within the VPE by setting C_SW1
+ */
+		mfc0	v0, CP0_CAUSE
+		ori	v0, v0, C_SW1
+		mtc0	v0, CP0_CAUSE
+0:
+		/*
+		 * This test should really never branch but
+		 * let's be prudent here.  Having atomized
+		 * the shared register modifications, we can
+		 * now EVPE, and must do so before interrupts
+		 * are potentially re-enabled.
+		 */
+		andi	a1, a1, MVPCONTROL_EVP
 		beqz	a1, 1f
-		emt
+		evpe
 1:
+		/* We know that TCStatua.IXMT should be set from above */
+		xori	a0, a0, TCSTATUS_IXMT
+		or	a0, a0, v1
+		mtc0	a0, CP0_TCSTATUS
+		_ehb
+
 		.set	mips0
 #endif /* CONFIG_MIPS_MT_SMTC */
 		LONG_L	v1, PT_EPC(sp)

diff --git a/include/linux/completion.h b/include/linux/completion.h
index 02ef883..4a6b604 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h

@@ -10,6 +10,18 @@
 
 #include <linux/wait.h>
 
+/**
+ * struct completion - structure used to maintain state for a "completion"
+ *
+ * This is the opaque structure used to maintain the state for a "completion".
+ * Completions currently use a FIFO to queue threads that have to wait for
+ * the "completion" event.
+ *
+ * See also:  complete(), wait_for_completion() (and friends _timeout,
+ * _interruptible, _interruptible_timeout, and _killable), init_completion(),
+ * and macros DECLARE_COMPLETION(), DECLARE_COMPLETION_ONSTACK(), and
+ * INIT_COMPLETION().
+ */
 struct completion {
 	unsigned int done;
 	wait_queue_head_t wait;
@@ -21,6 +33,14 @@
 #define COMPLETION_INITIALIZER_ONSTACK(work) \
 	({ init_completion(&work); work; })
 
+/**
+ * DECLARE_COMPLETION: - declare and initialize a completion structure
+ * @work:  identifier for the completion structure
+ *
+ * This macro declares and initializes a completion structure. Generally used
+ * for static declarations. You should use the _ONSTACK variant for automatic
+ * variables.
+ */
 #define DECLARE_COMPLETION(work) \
 	struct completion work = COMPLETION_INITIALIZER(work)
 
@@ -29,6 +49,13 @@
  * completions - so we use the _ONSTACK() variant for those that
  * are on the kernel stack:
  */
+/**
+ * DECLARE_COMPLETION_ONSTACK: - declare and initialize a completion structure
+ * @work:  identifier for the completion structure
+ *
+ * This macro declares and initializes a completion structure on the kernel
+ * stack.
+ */
 #ifdef CONFIG_LOCKDEP
 # define DECLARE_COMPLETION_ONSTACK(work) \
 	struct completion work = COMPLETION_INITIALIZER_ONSTACK(work)
@@ -36,6 +63,13 @@
 # define DECLARE_COMPLETION_ONSTACK(work) DECLARE_COMPLETION(work)
 #endif
 
+/**
+ * init_completion: - Initialize a dynamically allocated completion
+ * @x:  completion structure that is to be initialized
+ *
+ * This inline function will initialize a dynamically created completion
+ * structure.
+ */
 static inline void init_completion(struct completion *x)
 {
 	x->done = 0;
@@ -55,6 +89,13 @@
 extern void complete(struct completion *);
 extern void complete_all(struct completion *);
 
+/**
+ * INIT_COMPLETION: - reinitialize a completion structure
+ * @x:  completion structure to be reinitialized
+ *
+ * This macro should be used to reinitialize a completion structure so it can
+ * be reused. This is especially important after complete_all() is used.
+ */
 #define INIT_COMPLETION(x)	((x).done = 0)
 
 

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index d7faf88..c2747ac 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h

@@ -69,6 +69,7 @@
 #endif
 
 int cpu_up(unsigned int cpu);
+void notify_cpu_starting(unsigned int cpu);
 extern void cpu_hotplug_init(void);
 extern void cpu_maps_update_begin(void);
 extern void cpu_maps_update_done(void);

diff --git a/include/linux/ide.h b/include/linux/ide.h
index 1524829..6514db8 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h

@@ -366,7 +366,9 @@
 	/* Currently on a filemark */
 	IDE_AFLAG_FILEMARK		= (1 << 25),
 	/* 0 = no tape is loaded, so we don't rewind after ejecting */
-	IDE_AFLAG_MEDIUM_PRESENT	= (1 << 26)
+	IDE_AFLAG_MEDIUM_PRESENT	= (1 << 26),
+
+	IDE_AFLAG_NO_AUTOCLOSE		= (1 << 27),
 };
 
 struct ide_drive_s {

diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index da2698b..b86fa2f 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h

@@ -213,9 +213,16 @@
 #define CPU_DOWN_FAILED		0x0006 /* CPU (unsigned)v NOT going down */
 #define CPU_DEAD		0x0007 /* CPU (unsigned)v dead */
 #define CPU_DYING		0x0008 /* CPU (unsigned)v not running any task,
-				        * not handling interrupts, soon dead */
+					* not handling interrupts, soon dead.
+					* Called on the dying cpu, interrupts
+					* are already disabled. Must not
+					* sleep, must not fail */
 #define CPU_POST_DEAD		0x0009 /* CPU (unsigned)v dead, cpu_hotplug
 					* lock is dropped */
+#define CPU_STARTING		0x000A /* CPU (unsigned)v soon running.
+					* Called on the new cpu, just before
+					* enabling interrupts. Must not sleep,
+					* must not fail */
 
 /* Used for CPU hotplug events occuring while tasks are frozen due to a suspend
  * operation in progress
@@ -229,6 +236,7 @@
 #define CPU_DOWN_FAILED_FROZEN	(CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
 #define CPU_DEAD_FROZEN		(CPU_DEAD | CPU_TASKS_FROZEN)
 #define CPU_DYING_FROZEN	(CPU_DYING | CPU_TASKS_FROZEN)
+#define CPU_STARTING_FROZEN	(CPU_STARTING | CPU_TASKS_FROZEN)
 
 /* Hibernation and suspend events */
 #define PM_HIBERNATION_PREPARE	0x0001 /* Going to hibernate */

diff --git a/include/linux/proportions.h b/include/linux/proportions.h
index 5afc1b2..cf793bb 100644
--- a/include/linux/proportions.h
+++ b/include/linux/proportions.h

@@ -104,8 +104,8 @@
 	 * snapshot of the last seen global state
 	 * and a lock protecting this state
 	 */
-	int shift;
 	unsigned long period;
+	int shift;
 	spinlock_t lock;		/* protect the snapshot state */
 };
 

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3d9120c..d8e699b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h

@@ -451,8 +451,8 @@
 	 * - everyone except group_exit_task is stopped during signal delivery
 	 *   of fatal signals, group_exit_task processes the signal.
 	 */
-	struct task_struct	*group_exit_task;
 	int			notify_count;
+	struct task_struct	*group_exit_task;
 
 	/* thread group stop support, overloads group_exit_code too */
 	int			group_stop_count;
@@ -897,7 +897,7 @@
 	void (*yield_task) (struct rq *rq);
 	int  (*select_task_rq)(struct task_struct *p, int sync);
 
-	void (*check_preempt_curr) (struct rq *rq, struct task_struct *p);
+	void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int sync);
 
 	struct task_struct * (*pick_next_task) (struct rq *rq);
 	void (*put_prev_task) (struct rq *rq, struct task_struct *p);
@@ -1010,8 +1010,8 @@
 
 struct sched_rt_entity {
 	struct list_head run_list;
-	unsigned int time_slice;
 	unsigned long timeout;
+	unsigned int time_slice;
 	int nr_cpus_allowed;
 
 	struct sched_rt_entity *back;

diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index 5da9794..b106fd8 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h

@@ -1,6 +1,8 @@
 #ifndef __LINUX_STACKTRACE_H
 #define __LINUX_STACKTRACE_H
 
+struct task_struct;
+
 #ifdef CONFIG_STACKTRACE
 struct stack_trace {
 	unsigned int nr_entries, max_entries;

diff --git a/init/main.c b/init/main.c
index f6f7042..3820323 100644
--- a/init/main.c
+++ b/init/main.c

@@ -708,7 +708,7 @@
 	int result;
 
 	if (initcall_debug) {
-		print_fn_descriptor_symbol("calling  %s\n", fn);
+		printk("calling  %pF\n", fn);
 		t0 = ktime_get();
 	}
 
@@ -718,8 +718,8 @@
 		t1 = ktime_get();
 		delta = ktime_sub(t1, t0);
 
-		print_fn_descriptor_symbol("initcall %s", fn);
-		printk(" returned %d after %Ld msecs\n", result,
+		printk("initcall %pF returned %d after %Ld msecs\n",
+			fn, result,
 			(unsigned long long) delta.tv64 >> 20);
 	}
 
@@ -737,8 +737,7 @@
 		local_irq_enable();
 	}
 	if (msgbuf[0]) {
-		print_fn_descriptor_symbol(KERN_WARNING "initcall %s", fn);
-		printk(" returned with %s\n", msgbuf);
+		printk("initcall %pF returned with %s\n", fn, msgbuf);
 	}
 
 	return result;

diff --git a/kernel/cpu.c b/kernel/cpu.c
index f17e985..86d4904 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c

@@ -199,13 +199,14 @@
 	struct take_cpu_down_param *param = _param;
 	int err;
 
-	raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
-				param->hcpu);
 	/* Ensure this CPU doesn't handle any more interrupts. */
 	err = __cpu_disable();
 	if (err < 0)
 		return err;
 
+	raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
+				param->hcpu);
+
 	/* Force idle task to run as soon as we yield: it should
 	   immediately notice cpu is offline and die quickly. */
 	sched_idle_next();
@@ -453,6 +454,25 @@
 }
 #endif /* CONFIG_PM_SLEEP_SMP */
 
+/**
+ * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
+ * @cpu: cpu that just started
+ *
+ * This function calls the cpu_chain notifiers with CPU_STARTING.
+ * It must be called by the arch code on the new cpu, before the new cpu
+ * enables interrupts and before the "boot" cpu returns from __cpu_up().
+ */
+void notify_cpu_starting(unsigned int cpu)
+{
+	unsigned long val = CPU_STARTING;
+
+#ifdef CONFIG_PM_SLEEP_SMP
+	if (cpu_isset(cpu, frozen_cpus))
+		val = CPU_STARTING_FROZEN;
+#endif /* CONFIG_PM_SLEEP_SMP */
+	raw_notifier_call_chain(&cpu_chain, val, (void *)(long)cpu);
+}
+
 #endif /* CONFIG_SMP */
 
 /*

diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 25d955d..e4dcfb2 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c

@@ -590,6 +590,7 @@
 
 	/* Signal the primary CPU that we are done: */
 	atomic_set(&cpu_in_kgdb[cpu], 0);
+	touch_softlockup_watchdog();
 	clocksource_touch_watchdog();
 	local_irq_restore(flags);
 }
@@ -1432,6 +1433,7 @@
 	    atomic_read(&kgdb_cpu_doing_single_step) != cpu) {
 
 		atomic_set(&kgdb_active, -1);
+		touch_softlockup_watchdog();
 		clocksource_touch_watchdog();
 		local_irq_restore(flags);
 
@@ -1524,6 +1526,7 @@
 kgdb_restore:
 	/* Free kgdb_active */
 	atomic_set(&kgdb_active, -1);
+	touch_softlockup_watchdog();
 	clocksource_touch_watchdog();
 	local_irq_restore(flags);
 

diff --git a/kernel/sched.c b/kernel/sched.c
index ad1962d..9715f4c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c

@@ -204,11 +204,16 @@
 	rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
 }
 
+static inline int rt_bandwidth_enabled(void)
+{
+	return sysctl_sched_rt_runtime >= 0;
+}
+
 static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
 {
 	ktime_t now;
 
-	if (rt_b->rt_runtime == RUNTIME_INF)
+	if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF)
 		return;
 
 	if (hrtimer_active(&rt_b->rt_period_timer))
@@ -298,9 +303,9 @@
 static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
 static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
 #endif /* CONFIG_RT_GROUP_SCHED */
-#else /* !CONFIG_FAIR_GROUP_SCHED */
+#else /* !CONFIG_USER_SCHED */
 #define root_task_group init_task_group
-#endif /* CONFIG_FAIR_GROUP_SCHED */
+#endif /* CONFIG_USER_SCHED */
 
 /* task_group_lock serializes add/remove of task groups and also changes to
  * a task group's cpu shares.
@@ -604,9 +609,9 @@
 
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
 
-static inline void check_preempt_curr(struct rq *rq, struct task_struct *p)
+static inline void check_preempt_curr(struct rq *rq, struct task_struct *p, int sync)
 {
-	rq->curr->sched_class->check_preempt_curr(rq, p);
+	rq->curr->sched_class->check_preempt_curr(rq, p, sync);
 }
 
 static inline int cpu_of(struct rq *rq)
@@ -1102,7 +1107,7 @@
 	hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL);
 }
 
-static void init_hrtick(void)
+static inline void init_hrtick(void)
 {
 }
 #endif /* CONFIG_SMP */
@@ -1121,7 +1126,7 @@
 	rq->hrtick_timer.function = hrtick;
 	rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
 }
-#else
+#else	/* CONFIG_SCHED_HRTICK */
 static inline void hrtick_clear(struct rq *rq)
 {
 }
@@ -1133,7 +1138,7 @@
 static inline void init_hrtick(void)
 {
 }
-#endif
+#endif	/* CONFIG_SCHED_HRTICK */
 
 /*
  * resched_task - mark a task 'to be rescheduled now'.
@@ -1380,6 +1385,51 @@
 	update_load_sub(&rq->load, load);
 }
 
+#if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) || defined(CONFIG_RT_GROUP_SCHED)
+typedef int (*tg_visitor)(struct task_group *, void *);
+
+/*
+ * Iterate the full tree, calling @down when first entering a node and @up when
+ * leaving it for the final time.
+ */
+static int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
+{
+	struct task_group *parent, *child;
+	int ret;
+
+	rcu_read_lock();
+	parent = &root_task_group;
+down:
+	ret = (*down)(parent, data);
+	if (ret)
+		goto out_unlock;
+	list_for_each_entry_rcu(child, &parent->children, siblings) {
+		parent = child;
+		goto down;
+
+up:
+		continue;
+	}
+	ret = (*up)(parent, data);
+	if (ret)
+		goto out_unlock;
+
+	child = parent;
+	parent = parent->parent;
+	if (parent)
+		goto up;
+out_unlock:
+	rcu_read_unlock();
+
+	return ret;
+}
+
+static int tg_nop(struct task_group *tg, void *data)
+{
+	return 0;
+}
+#endif
+
 #ifdef CONFIG_SMP
 static unsigned long source_load(int cpu, int type);
 static unsigned long target_load(int cpu, int type);
@@ -1397,37 +1447,6 @@
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 
-typedef void (*tg_visitor)(struct task_group *, int, struct sched_domain *);
-
-/*
- * Iterate the full tree, calling @down when first entering a node and @up when
- * leaving it for the final time.
- */
-static void
-walk_tg_tree(tg_visitor down, tg_visitor up, int cpu, struct sched_domain *sd)
-{
-	struct task_group *parent, *child;
-
-	rcu_read_lock();
-	parent = &root_task_group;
-down:
-	(*down)(parent, cpu, sd);
-	list_for_each_entry_rcu(child, &parent->children, siblings) {
-		parent = child;
-		goto down;
-
-up:
-		continue;
-	}
-	(*up)(parent, cpu, sd);
-
-	child = parent;
-	parent = parent->parent;
-	if (parent)
-		goto up;
-	rcu_read_unlock();
-}
-
 static void __set_se_shares(struct sched_entity *se, unsigned long shares);
 
 /*
@@ -1486,11 +1505,11 @@
  * This needs to be done in a bottom-up fashion because the rq weight of a
  * parent group depends on the shares of its child groups.
  */
-static void
-tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd)
+static int tg_shares_up(struct task_group *tg, void *data)
 {
 	unsigned long rq_weight = 0;
 	unsigned long shares = 0;
+	struct sched_domain *sd = data;
 	int i;
 
 	for_each_cpu_mask(i, sd->span) {
@@ -1515,6 +1534,8 @@
 		__update_group_shares_cpu(tg, i, shares, rq_weight);
 		spin_unlock_irqrestore(&rq->lock, flags);
 	}
+
+	return 0;
 }
 
 /*
@@ -1522,10 +1543,10 @@
  * This needs to be done in a top-down fashion because the load of a child
  * group is a fraction of its parents load.
  */
-static void
-tg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd)
+static int tg_load_down(struct task_group *tg, void *data)
 {
 	unsigned long load;
+	long cpu = (long)data;
 
 	if (!tg->parent) {
 		load = cpu_rq(cpu)->load.weight;
@@ -1536,11 +1557,8 @@
 	}
 
 	tg->cfs_rq[cpu]->h_load = load;
-}
 
-static void
-tg_nop(struct task_group *tg, int cpu, struct sched_domain *sd)
-{
+	return 0;
 }
 
 static void update_shares(struct sched_domain *sd)
@@ -1550,7 +1568,7 @@
 
 	if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {
 		sd->last_update = now;
-		walk_tg_tree(tg_nop, tg_shares_up, 0, sd);
+		walk_tg_tree(tg_nop, tg_shares_up, sd);
 	}
 }
 
@@ -1561,9 +1579,9 @@
 	spin_lock(&rq->lock);
 }
 
-static void update_h_load(int cpu)
+static void update_h_load(long cpu)
 {
-	walk_tg_tree(tg_load_down, tg_nop, cpu, NULL);
+	walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
 }
 
 #else
@@ -1921,11 +1939,8 @@
 		running = task_running(rq, p);
 		on_rq = p->se.on_rq;
 		ncsw = 0;
-		if (!match_state || p->state == match_state) {
-			ncsw = p->nivcsw + p->nvcsw;
-			if (unlikely(!ncsw))
-				ncsw = 1;
-		}
+		if (!match_state || p->state == match_state)
+			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
 		task_rq_unlock(rq, &flags);
 
 		/*
@@ -2285,7 +2300,7 @@
 	trace_mark(kernel_sched_wakeup,
 		"pid %d state %ld ## rq %p task %p rq->curr %p",
 		p->pid, p->state, rq, p, rq->curr);
-	check_preempt_curr(rq, p);
+	check_preempt_curr(rq, p, sync);
 
 	p->state = TASK_RUNNING;
 #ifdef CONFIG_SMP
@@ -2420,7 +2435,7 @@
 	trace_mark(kernel_sched_wakeup_new,
 		"pid %d state %ld ## rq %p task %p rq->curr %p",
 		p->pid, p->state, rq, p, rq->curr);
-	check_preempt_curr(rq, p);
+	check_preempt_curr(rq, p, 0);
 #ifdef CONFIG_SMP
 	if (p->sched_class->task_wake_up)
 		p->sched_class->task_wake_up(rq, p);
@@ -2880,7 +2895,7 @@
 	 * Note that idle threads have a prio of MAX_PRIO, for this test
 	 * to be always true for them.
 	 */
-	check_preempt_curr(this_rq, p);
+	check_preempt_curr(this_rq, p, 0);
 }
 
 /*
@@ -4627,6 +4642,15 @@
 }
 EXPORT_SYMBOL_GPL(__wake_up_sync);	/* For internal use only */
 
+/**
+ * complete: - signals a single thread waiting on this completion
+ * @x:  holds the state of this particular completion
+ *
+ * This will wake up a single thread waiting on this completion. Threads will be
+ * awakened in the same order in which they were queued.
+ *
+ * See also complete_all(), wait_for_completion() and related routines.
+ */
 void complete(struct completion *x)
 {
 	unsigned long flags;
@@ -4638,6 +4662,12 @@
 }
 EXPORT_SYMBOL(complete);
 
+/**
+ * complete_all: - signals all threads waiting on this completion
+ * @x:  holds the state of this particular completion
+ *
+ * This will wake up all threads waiting on this particular completion event.
+ */
 void complete_all(struct completion *x)
 {
 	unsigned long flags;
@@ -4658,10 +4688,7 @@
 		wait.flags |= WQ_FLAG_EXCLUSIVE;
 		__add_wait_queue_tail(&x->wait, &wait);
 		do {
-			if ((state == TASK_INTERRUPTIBLE &&
-			     signal_pending(current)) ||
-			    (state == TASK_KILLABLE &&
-			     fatal_signal_pending(current))) {
+			if (signal_pending_state(state, current)) {
 				timeout = -ERESTARTSYS;
 				break;
 			}
@@ -4689,12 +4716,31 @@
 	return timeout;
 }
 
+/**
+ * wait_for_completion: - waits for completion of a task
+ * @x:  holds the state of this particular completion
+ *
+ * This waits to be signaled for completion of a specific task. It is NOT
+ * interruptible and there is no timeout.
+ *
+ * See also similar routines (i.e. wait_for_completion_timeout()) with timeout
+ * and interrupt capability. Also see complete().
+ */
 void __sched wait_for_completion(struct completion *x)
 {
 	wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
 }
 EXPORT_SYMBOL(wait_for_completion);
 
+/**
+ * wait_for_completion_timeout: - waits for completion of a task (w/timeout)
+ * @x:  holds the state of this particular completion
+ * @timeout:  timeout value in jiffies
+ *
+ * This waits for either a completion of a specific task to be signaled or for a
+ * specified timeout to expire. The timeout is in jiffies. It is not
+ * interruptible.
+ */
 unsigned long __sched
 wait_for_completion_timeout(struct completion *x, unsigned long timeout)
 {
@@ -4702,6 +4748,13 @@
 }
 EXPORT_SYMBOL(wait_for_completion_timeout);
 
+/**
+ * wait_for_completion_interruptible: - waits for completion of a task (w/intr)
+ * @x:  holds the state of this particular completion
+ *
+ * This waits for completion of a specific task to be signaled. It is
+ * interruptible.
+ */
 int __sched wait_for_completion_interruptible(struct completion *x)
 {
 	long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
@@ -4711,6 +4764,14 @@
 }
 EXPORT_SYMBOL(wait_for_completion_interruptible);
 
+/**
+ * wait_for_completion_interruptible_timeout: - waits for completion (w/(to,intr))
+ * @x:  holds the state of this particular completion
+ * @timeout:  timeout value in jiffies
+ *
+ * This waits for either a completion of a specific task to be signaled or for a
+ * specified timeout to expire. It is interruptible. The timeout is in jiffies.
+ */
 unsigned long __sched
 wait_for_completion_interruptible_timeout(struct completion *x,
 					  unsigned long timeout)
@@ -4719,6 +4780,13 @@
 }
 EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
 
+/**
+ * wait_for_completion_killable: - waits for completion of a task (killable)
+ * @x:  holds the state of this particular completion
+ *
+ * This waits to be signaled for completion of a specific task. It can be
+ * interrupted by a kill signal.
+ */
 int __sched wait_for_completion_killable(struct completion *x)
 {
 	long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
@@ -5121,7 +5189,8 @@
 		 * Do not allow realtime tasks into groups that have no runtime
 		 * assigned.
 		 */
-		if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0)
+		if (rt_bandwidth_enabled() && rt_policy(policy) &&
+				task_group(p)->rt_bandwidth.rt_runtime == 0)
 			return -EPERM;
 #endif
 
@@ -5957,7 +6026,7 @@
 	set_task_cpu(p, dest_cpu);
 	if (on_rq) {
 		activate_task(rq_dest, p, 0);
-		check_preempt_curr(rq_dest, p);
+		check_preempt_curr(rq_dest, p, 0);
 	}
 done:
 	ret = 1;
@@ -8242,20 +8311,25 @@
 #ifdef in_atomic
 	static unsigned long prev_jiffy;	/* ratelimiting */
 
-	if ((in_atomic() || irqs_disabled()) &&
-	    system_state == SYSTEM_RUNNING && !oops_in_progress) {
-		if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-			return;
-		prev_jiffy = jiffies;
-		printk(KERN_ERR "BUG: sleeping function called from invalid"
-				" context at %s:%d\n", file, line);
-		printk("in_atomic():%d, irqs_disabled():%d\n",
-			in_atomic(), irqs_disabled());
-		debug_show_held_locks(current);
-		if (irqs_disabled())
-			print_irqtrace_events(current);
-		dump_stack();
-	}
+	if ((!in_atomic() && !irqs_disabled()) ||
+		    system_state != SYSTEM_RUNNING || oops_in_progress)
+		return;
+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
+		return;
+	prev_jiffy = jiffies;
+
+	printk(KERN_ERR
+		"BUG: sleeping function called from invalid context at %s:%d\n",
+			file, line);
+	printk(KERN_ERR
+		"in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
+			in_atomic(), irqs_disabled(),
+			current->pid, current->comm);
+
+	debug_show_held_locks(current);
+	if (irqs_disabled())
+		print_irqtrace_events(current);
+	dump_stack();
 #endif
 }
 EXPORT_SYMBOL(__might_sleep);
@@ -8753,75 +8827,97 @@
 static unsigned long to_ratio(u64 period, u64 runtime)
 {
 	if (runtime == RUNTIME_INF)
-		return 1ULL << 16;
+		return 1ULL << 20;
 
-	return div64_u64(runtime << 16, period);
+	return div64_u64(runtime << 20, period);
 }
 
-#ifdef CONFIG_CGROUP_SCHED
-static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
-{
-	struct task_group *tgi, *parent = tg->parent;
-	unsigned long total = 0;
-
-	if (!parent) {
-		if (global_rt_period() < period)
-			return 0;
-
-		return to_ratio(period, runtime) <
-			to_ratio(global_rt_period(), global_rt_runtime());
-	}
-
-	if (ktime_to_ns(parent->rt_bandwidth.rt_period) < period)
-		return 0;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(tgi, &parent->children, siblings) {
-		if (tgi == tg)
-			continue;
-
-		total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),
-				tgi->rt_bandwidth.rt_runtime);
-	}
-	rcu_read_unlock();
-
-	return total + to_ratio(period, runtime) <=
-		to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period),
-				parent->rt_bandwidth.rt_runtime);
-}
-#elif defined CONFIG_USER_SCHED
-static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
-{
-	struct task_group *tgi;
-	unsigned long total = 0;
-	unsigned long global_ratio =
-		to_ratio(global_rt_period(), global_rt_runtime());
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(tgi, &task_groups, list) {
-		if (tgi == tg)
-			continue;
-
-		total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),
-				tgi->rt_bandwidth.rt_runtime);
-	}
-	rcu_read_unlock();
-
-	return total + to_ratio(period, runtime) < global_ratio;
-}
-#endif
-
 /* Must be called with tasklist_lock held */
 static inline int tg_has_rt_tasks(struct task_group *tg)
 {
 	struct task_struct *g, *p;
+
 	do_each_thread(g, p) {
 		if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
 			return 1;
 	} while_each_thread(g, p);
+
 	return 0;
 }
 
+struct rt_schedulable_data {
+	struct task_group *tg;
+	u64 rt_period;
+	u64 rt_runtime;
+};
+
+static int tg_schedulable(struct task_group *tg, void *data)
+{
+	struct rt_schedulable_data *d = data;
+	struct task_group *child;
+	unsigned long total, sum = 0;
+	u64 period, runtime;
+
+	period = ktime_to_ns(tg->rt_bandwidth.rt_period);
+	runtime = tg->rt_bandwidth.rt_runtime;
+
+	if (tg == d->tg) {
+		period = d->rt_period;
+		runtime = d->rt_runtime;
+	}
+
+	/*
+	 * Cannot have more runtime than the period.
+	 */
+	if (runtime > period && runtime != RUNTIME_INF)
+		return -EINVAL;
+
+	/*
+	 * Ensure we don't starve existing RT tasks.
+	 */
+	if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
+		return -EBUSY;
+
+	total = to_ratio(period, runtime);
+
+	/*
+	 * Nobody can have more than the global setting allows.
+	 */
+	if (total > to_ratio(global_rt_period(), global_rt_runtime()))
+		return -EINVAL;
+
+	/*
+	 * The sum of our children's runtime should not exceed our own.
+	 */
+	list_for_each_entry_rcu(child, &tg->children, siblings) {
+		period = ktime_to_ns(child->rt_bandwidth.rt_period);
+		runtime = child->rt_bandwidth.rt_runtime;
+
+		if (child == d->tg) {
+			period = d->rt_period;
+			runtime = d->rt_runtime;
+		}
+
+		sum += to_ratio(period, runtime);
+	}
+
+	if (sum > total)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
+{
+	struct rt_schedulable_data data = {
+		.tg = tg,
+		.rt_period = period,
+		.rt_runtime = runtime,
+	};
+
+	return walk_tg_tree(tg_schedulable, tg_nop, &data);
+}
+
 static int tg_set_bandwidth(struct task_group *tg,
 		u64 rt_period, u64 rt_runtime)
 {
@@ -8829,14 +8925,9 @@
 
 	mutex_lock(&rt_constraints_mutex);
 	read_lock(&tasklist_lock);
-	if (rt_runtime == 0 && tg_has_rt_tasks(tg)) {
-		err = -EBUSY;
+	err = __rt_schedulable(tg, rt_period, rt_runtime);
+	if (err)
 		goto unlock;
-	}
-	if (!__rt_schedulable(tg, rt_period, rt_runtime)) {
-		err = -EINVAL;
-		goto unlock;
-	}
 
 	spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
 	tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
@@ -8905,19 +8996,25 @@
 
 static int sched_rt_global_constraints(void)
 {
-	struct task_group *tg = &root_task_group;
-	u64 rt_runtime, rt_period;
+	u64 runtime, period;
 	int ret = 0;
 
 	if (sysctl_sched_rt_period <= 0)
 		return -EINVAL;
 
-	rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
-	rt_runtime = tg->rt_bandwidth.rt_runtime;
+	runtime = global_rt_runtime();
+	period = global_rt_period();
+
+	/*
+	 * Sanity check on the sysctl variables.
+	 */
+	if (runtime > period && runtime != RUNTIME_INF)
+		return -EINVAL;
 
 	mutex_lock(&rt_constraints_mutex);
-	if (!__rt_schedulable(tg, rt_period, rt_runtime))
-		ret = -EINVAL;
+	read_lock(&tasklist_lock);
+	ret = __rt_schedulable(NULL, 0, 0);
+	read_unlock(&tasklist_lock);
 	mutex_unlock(&rt_constraints_mutex);
 
 	return ret;
@@ -8991,7 +9088,6 @@
 
 	if (!cgrp->parent) {
 		/* This is early initialization for the top cgroup */
-		init_task_group.css.cgroup = cgrp;
 		return &init_task_group.css;
 	}
 
@@ -9000,9 +9096,6 @@
 	if (IS_ERR(tg))
 		return ERR_PTR(-ENOMEM);
 
-	/* Bind the cgroup to task_group object we just created */
-	tg->css.cgroup = cgrp;
-
 	return &tg->css;
 }
 

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index fb8994c..fcbe850a 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c

@@ -409,64 +409,6 @@
 }
 
 /*
- * The goal of calc_delta_asym() is to be asymmetrically around NICE_0_LOAD, in
- * that it favours >=0 over <0.
- *
- *   -20         |
- *               |
- *     0 --------+-------
- *             .'
- *    19     .'
- *
- */
-static unsigned long
-calc_delta_asym(unsigned long delta, struct sched_entity *se)
-{
-	struct load_weight lw = {
-		.weight = NICE_0_LOAD,
-		.inv_weight = 1UL << (WMULT_SHIFT-NICE_0_SHIFT)
-	};
-
-	for_each_sched_entity(se) {
-		struct load_weight *se_lw = &se->load;
-		unsigned long rw = cfs_rq_of(se)->load.weight;
-
-#ifdef CONFIG_FAIR_SCHED_GROUP
-		struct cfs_rq *cfs_rq = se->my_q;
-		struct task_group *tg = NULL
-
-		if (cfs_rq)
-			tg = cfs_rq->tg;
-
-		if (tg && tg->shares < NICE_0_LOAD) {
-			/*
-			 * scale shares to what it would have been had
-			 * tg->weight been NICE_0_LOAD:
-			 *
-			 *   weight = 1024 * shares / tg->weight
-			 */
-			lw.weight *= se->load.weight;
-			lw.weight /= tg->shares;
-
-			lw.inv_weight = 0;
-
-			se_lw = &lw;
-			rw += lw.weight - se->load.weight;
-		} else
-#endif
-
-		if (se->load.weight < NICE_0_LOAD) {
-			se_lw = &lw;
-			rw += NICE_0_LOAD - se->load.weight;
-		}
-
-		delta = calc_delta_mine(delta, rw, se_lw);
-	}
-
-	return delta;
-}
-
-/*
  * Update the current task's runtime statistics. Skip current tasks that
  * are not in our scheduling class.
  */
@@ -586,11 +528,12 @@
 	update_load_add(&cfs_rq->load, se->load.weight);
 	if (!parent_entity(se))
 		inc_cpu_load(rq_of(cfs_rq), se->load.weight);
-	if (entity_is_task(se))
+	if (entity_is_task(se)) {
 		add_cfs_task_weight(cfs_rq, se->load.weight);
+		list_add(&se->group_node, &cfs_rq->tasks);
+	}
 	cfs_rq->nr_running++;
 	se->on_rq = 1;
-	list_add(&se->group_node, &cfs_rq->tasks);
 }
 
 static void
@@ -599,11 +542,12 @@
 	update_load_sub(&cfs_rq->load, se->load.weight);
 	if (!parent_entity(se))
 		dec_cpu_load(rq_of(cfs_rq), se->load.weight);
-	if (entity_is_task(se))
+	if (entity_is_task(se)) {
 		add_cfs_task_weight(cfs_rq, -se->load.weight);
+		list_del_init(&se->group_node);
+	}
 	cfs_rq->nr_running--;
 	se->on_rq = 0;
-	list_del_init(&se->group_node);
 }
 
 static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -1085,7 +1029,6 @@
 		long wl, long wg)
 {
 	struct sched_entity *se = tg->se[cpu];
-	long more_w;
 
 	if (!tg->parent)
 		return wl;
@@ -1097,18 +1040,17 @@
 	if (!wl && sched_feat(ASYM_EFF_LOAD))
 		return wl;
 
-	/*
-	 * Instead of using this increment, also add the difference
-	 * between when the shares were last updated and now.
-	 */
-	more_w = se->my_q->load.weight - se->my_q->rq_weight;
-	wl += more_w;
-	wg += more_w;
-
 	for_each_sched_entity(se) {
-#define D(n) (likely(n) ? (n) : 1)
-
 		long S, rw, s, a, b;
+		long more_w;
+
+		/*
+		 * Instead of using this increment, also add the difference
+		 * between when the shares were last updated and now.
+		 */
+		more_w = se->my_q->load.weight - se->my_q->rq_weight;
+		wl += more_w;
+		wg += more_w;
 
 		S = se->my_q->tg->shares;
 		s = se->my_q->shares;
@@ -1117,7 +1059,11 @@
 		a = S*(rw + wl);
 		b = S*rw + s*wg;
 
-		wl = s*(a-b)/D(b);
+		wl = s*(a-b);
+
+		if (likely(b))
+			wl /= b;
+
 		/*
 		 * Assume the group is already running and will
 		 * thus already be accounted for in the weight.
@@ -1126,7 +1072,6 @@
 		 * alter the group weight.
 		 */
 		wg = 0;
-#undef D
 	}
 
 	return wl;
@@ -1143,7 +1088,7 @@
 #endif
 
 static int
-wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq,
+wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
 	    struct task_struct *p, int prev_cpu, int this_cpu, int sync,
 	    int idx, unsigned long load, unsigned long this_load,
 	    unsigned int imbalance)
@@ -1191,8 +1136,8 @@
 	schedstat_inc(p, se.nr_wakeups_affine_attempts);
 	tl_per_task = cpu_avg_load_per_task(this_cpu);
 
-	if ((tl <= load && tl + target_load(prev_cpu, idx) <= tl_per_task) ||
-			balanced) {
+	if (balanced || (tl <= load && tl + target_load(prev_cpu, idx) <=
+			tl_per_task)) {
 		/*
 		 * This domain has SD_WAKE_AFFINE and
 		 * p is cache cold in this domain, and
@@ -1211,16 +1156,17 @@
 	struct sched_domain *sd, *this_sd = NULL;
 	int prev_cpu, this_cpu, new_cpu;
 	unsigned long load, this_load;
-	struct rq *rq, *this_rq;
+	struct rq *this_rq;
 	unsigned int imbalance;
 	int idx;
 
 	prev_cpu	= task_cpu(p);
-	rq		= task_rq(p);
 	this_cpu	= smp_processor_id();
 	this_rq		= cpu_rq(this_cpu);
 	new_cpu		= prev_cpu;
 
+	if (prev_cpu == this_cpu)
+		goto out;
 	/*
 	 * 'this_sd' is the first domain that both
 	 * this_cpu and prev_cpu are present in:
@@ -1248,13 +1194,10 @@
 	load = source_load(prev_cpu, idx);
 	this_load = target_load(this_cpu, idx);
 
-	if (wake_affine(rq, this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx,
+	if (wake_affine(this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx,
 				     load, this_load, imbalance))
 		return this_cpu;
 
-	if (prev_cpu == this_cpu)
-		goto out;
-
 	/*
 	 * Start passive balancing when half the imbalance_pct
 	 * limit is reached.
@@ -1281,62 +1224,20 @@
 	 * + nice tasks.
 	 */
 	if (sched_feat(ASYM_GRAN))
-		gran = calc_delta_asym(sysctl_sched_wakeup_granularity, se);
-	else
-		gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se);
+		gran = calc_delta_mine(gran, NICE_0_LOAD, &se->load);
 
 	return gran;
 }
 
 /*
- * Should 'se' preempt 'curr'.
- *
- *             |s1
- *        |s2
- *   |s3
- *         g
- *      |<--->|c
- *
- *  w(c, s1) = -1
- *  w(c, s2) =  0
- *  w(c, s3) =  1
- *
- */
-static int
-wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
-{
-	s64 gran, vdiff = curr->vruntime - se->vruntime;
-
-	if (vdiff < 0)
-		return -1;
-
-	gran = wakeup_gran(curr);
-	if (vdiff > gran)
-		return 1;
-
-	return 0;
-}
-
-/* return depth at which a sched entity is present in the hierarchy */
-static inline int depth_se(struct sched_entity *se)
-{
-	int depth = 0;
-
-	for_each_sched_entity(se)
-		depth++;
-
-	return depth;
-}
-
-/*
  * Preempt the current task with a newly woken task if needed:
  */
-static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
+static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
 {
 	struct task_struct *curr = rq->curr;
 	struct cfs_rq *cfs_rq = task_cfs_rq(curr);
 	struct sched_entity *se = &curr->se, *pse = &p->se;
-	int se_depth, pse_depth;
+	s64 delta_exec;
 
 	if (unlikely(rt_prio(p->prio))) {
 		update_rq_clock(rq);
@@ -1351,6 +1252,13 @@
 	cfs_rq_of(pse)->next = pse;
 
 	/*
+	 * We can come here with TIF_NEED_RESCHED already set from new task
+	 * wake up path.
+	 */
+	if (test_tsk_need_resched(curr))
+		return;
+
+	/*
 	 * Batch tasks do not preempt (their preemption is driven by
 	 * the tick):
 	 */
@@ -1360,33 +1268,15 @@
 	if (!sched_feat(WAKEUP_PREEMPT))
 		return;
 
-	/*
-	 * preemption test can be made between sibling entities who are in the
-	 * same cfs_rq i.e who have a common parent. Walk up the hierarchy of
-	 * both tasks until we find their ancestors who are siblings of common
-	 * parent.
-	 */
-
-	/* First walk up until both entities are at same depth */
-	se_depth = depth_se(se);
-	pse_depth = depth_se(pse);
-
-	while (se_depth > pse_depth) {
-		se_depth--;
-		se = parent_entity(se);
+	if (sched_feat(WAKEUP_OVERLAP) && sync &&
+			se->avg_overlap < sysctl_sched_migration_cost &&
+			pse->avg_overlap < sysctl_sched_migration_cost) {
+		resched_task(curr);
+		return;
 	}
 
-	while (pse_depth > se_depth) {
-		pse_depth--;
-		pse = parent_entity(pse);
-	}
-
-	while (!is_same_group(se, pse)) {
-		se = parent_entity(se);
-		pse = parent_entity(pse);
-	}
-
-	if (wakeup_preempt_entity(se, pse) == 1)
+	delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime;
+	if (delta_exec > wakeup_gran(pse))
 		resched_task(curr);
 }
 
@@ -1445,19 +1335,9 @@
 	if (next == &cfs_rq->tasks)
 		return NULL;
 
-	/* Skip over entities that are not tasks */
-	do {
-		se = list_entry(next, struct sched_entity, group_node);
-		next = next->next;
-	} while (next != &cfs_rq->tasks && !entity_is_task(se));
-
-	if (next == &cfs_rq->tasks)
-		return NULL;
-
-	cfs_rq->balance_iterator = next;
-
-	if (entity_is_task(se))
-		p = task_of(se);
+	se = list_entry(next, struct sched_entity, group_node);
+	p = task_of(se);
+	cfs_rq->balance_iterator = next->next;
 
 	return p;
 }
@@ -1507,7 +1387,7 @@
 	rcu_read_lock();
 	update_h_load(busiest_cpu);
 
-	list_for_each_entry(tg, &task_groups, list) {
+	list_for_each_entry_rcu(tg, &task_groups, list) {
 		struct cfs_rq *busiest_cfs_rq = tg->cfs_rq[busiest_cpu];
 		unsigned long busiest_h_load = busiest_cfs_rq->h_load;
 		unsigned long busiest_weight = busiest_cfs_rq->load.weight;
@@ -1620,10 +1500,10 @@
 		 * 'current' within the tree based on its new key value.
 		 */
 		swap(curr->vruntime, se->vruntime);
+		resched_task(rq->curr);
 	}
 
 	enqueue_task_fair(rq, p, 0);
-	resched_task(rq->curr);
 }
 
 /*
@@ -1642,7 +1522,7 @@
 		if (p->prio > oldprio)
 			resched_task(rq->curr);
 	} else
-		check_preempt_curr(rq, p);
+		check_preempt_curr(rq, p, 0);
 }
 
 /*
@@ -1659,7 +1539,7 @@
 	if (running)
 		resched_task(rq->curr);
 	else
-		check_preempt_curr(rq, p);
+		check_preempt_curr(rq, p, 0);
 }
 
 /* Account for a task changing its policy or group.

diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index 9353ca7..7c9e8f4 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h

@@ -11,3 +11,4 @@
 SCHED_FEAT(LB_BIAS, 1)
 SCHED_FEAT(LB_WAKEUP_UPDATE, 1)
 SCHED_FEAT(ASYM_EFF_LOAD, 1)
+SCHED_FEAT(WAKEUP_OVERLAP, 0)

diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 3a4f92d..dec4cca 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c

@@ -14,7 +14,7 @@
 /*
  * Idle tasks are unconditionally rescheduled:
  */
-static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p)
+static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int sync)
 {
 	resched_task(rq->idle);
 }
@@ -76,7 +76,7 @@
 	if (running)
 		resched_task(rq->curr);
 	else
-		check_preempt_curr(rq, p);
+		check_preempt_curr(rq, p, 0);
 }
 
 static void prio_changed_idle(struct rq *rq, struct task_struct *p,
@@ -93,7 +93,7 @@
 		if (p->prio > oldprio)
 			resched_task(rq->curr);
 	} else
-		check_preempt_curr(rq, p);
+		check_preempt_curr(rq, p, 0);
 }
 
 /*

diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 1113157..cdf5740 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c

@@ -102,12 +102,12 @@
 
 static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
 {
+	struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
 	struct sched_rt_entity *rt_se = rt_rq->rt_se;
 
-	if (rt_se && !on_rt_rq(rt_se) && rt_rq->rt_nr_running) {
-		struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
-
-		enqueue_rt_entity(rt_se);
+	if (rt_rq->rt_nr_running) {
+		if (rt_se && !on_rt_rq(rt_se))
+			enqueue_rt_entity(rt_se);
 		if (rt_rq->highest_prio < curr->prio)
 			resched_task(curr);
 	}
@@ -231,6 +231,9 @@
 #endif /* CONFIG_RT_GROUP_SCHED */
 
 #ifdef CONFIG_SMP
+/*
+ * We ran out of runtime, see if we can borrow some from our neighbours.
+ */
 static int do_balance_runtime(struct rt_rq *rt_rq)
 {
 	struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
@@ -250,9 +253,18 @@
 			continue;
 
 		spin_lock(&iter->rt_runtime_lock);
+		/*
+		 * Either all rqs have inf runtime and there's nothing to steal
+		 * or __disable_runtime() below sets a specific rq to inf to
+		 * indicate its been disabled and disalow stealing.
+		 */
 		if (iter->rt_runtime == RUNTIME_INF)
 			goto next;
 
+		/*
+		 * From runqueues with spare time, take 1/n part of their
+		 * spare time, but no more than our period.
+		 */
 		diff = iter->rt_runtime - iter->rt_time;
 		if (diff > 0) {
 			diff = div_u64((u64)diff, weight);
@@ -274,6 +286,9 @@
 	return more;
 }
 
+/*
+ * Ensure this RQ takes back all the runtime it lend to its neighbours.
+ */
 static void __disable_runtime(struct rq *rq)
 {
 	struct root_domain *rd = rq->rd;
@@ -289,17 +304,33 @@
 
 		spin_lock(&rt_b->rt_runtime_lock);
 		spin_lock(&rt_rq->rt_runtime_lock);
+		/*
+		 * Either we're all inf and nobody needs to borrow, or we're
+		 * already disabled and thus have nothing to do, or we have
+		 * exactly the right amount of runtime to take out.
+		 */
 		if (rt_rq->rt_runtime == RUNTIME_INF ||
 				rt_rq->rt_runtime == rt_b->rt_runtime)
 			goto balanced;
 		spin_unlock(&rt_rq->rt_runtime_lock);
 
+		/*
+		 * Calculate the difference between what we started out with
+		 * and what we current have, that's the amount of runtime
+		 * we lend and now have to reclaim.
+		 */
 		want = rt_b->rt_runtime - rt_rq->rt_runtime;
 
+		/*
+		 * Greedy reclaim, take back as much as we can.
+		 */
 		for_each_cpu_mask(i, rd->span) {
 			struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
 			s64 diff;
 
+			/*
+			 * Can't reclaim from ourselves or disabled runqueues.
+			 */
 			if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF)
 				continue;
 
@@ -319,8 +350,16 @@
 		}
 
 		spin_lock(&rt_rq->rt_runtime_lock);
+		/*
+		 * We cannot be left wanting - that would mean some runtime
+		 * leaked out of the system.
+		 */
 		BUG_ON(want);
 balanced:
+		/*
+		 * Disable all the borrow logic by pretending we have inf
+		 * runtime - in which case borrowing doesn't make sense.
+		 */
 		rt_rq->rt_runtime = RUNTIME_INF;
 		spin_unlock(&rt_rq->rt_runtime_lock);
 		spin_unlock(&rt_b->rt_runtime_lock);
@@ -343,6 +382,9 @@
 	if (unlikely(!scheduler_running))
 		return;
 
+	/*
+	 * Reset each runqueue's bandwidth settings
+	 */
 	for_each_leaf_rt_rq(rt_rq, rq) {
 		struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
 
@@ -389,7 +431,7 @@
 	int i, idle = 1;
 	cpumask_t span;
 
-	if (rt_b->rt_runtime == RUNTIME_INF)
+	if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
 		return 1;
 
 	span = sched_rt_period_mask();
@@ -487,6 +529,9 @@
 	curr->se.exec_start = rq->clock;
 	cpuacct_charge(curr, delta_exec);
 
+	if (!rt_bandwidth_enabled())
+		return;
+
 	for_each_sched_rt_entity(rt_se) {
 		rt_rq = rt_rq_of_se(rt_se);
 
@@ -784,7 +829,7 @@
 /*
  * Preempt the current task with a newly woken task if needed:
  */
-static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
+static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int sync)
 {
 	if (p->prio < rq->curr->prio) {
 		resched_task(rq->curr);

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index bd70345..cb01cd8 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c

@@ -235,7 +235,8 @@
 	case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
 		if (!cpu_isset(cpu, tick_broadcast_mask)) {
 			cpu_set(cpu, tick_broadcast_mask);
-			if (bc->mode == TICKDEV_MODE_PERIODIC)
+			if (tick_broadcast_device.mode ==
+			    TICKDEV_MODE_PERIODIC)
 				clockevents_shutdown(dev);
 		}
 		if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
@@ -245,7 +246,8 @@
 		if (!tick_broadcast_force &&
 		    cpu_isset(cpu, tick_broadcast_mask)) {
 			cpu_clear(cpu, tick_broadcast_mask);
-			if (bc->mode == TICKDEV_MODE_PERIODIC)
+			if (tick_broadcast_device.mode ==
+			    TICKDEV_MODE_PERIODIC)
 				tick_setup_periodic(dev, 0);
 		}
 		break;

diff --git a/kernel/user.c b/kernel/user.c
index 865ecf57..39d6159 100644
--- a/kernel/user.c
+++ b/kernel/user.c

@@ -169,7 +169,7 @@
 {
 	struct user_struct *up = container_of(kobj, struct user_struct, kobj);
 
-	return sprintf(buf, "%lu\n", sched_group_rt_runtime(up->tg));
+	return sprintf(buf, "%ld\n", sched_group_rt_runtime(up->tg));
 }
 
 static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
@@ -180,7 +180,7 @@
 	unsigned long rt_runtime;
 	int rc;
 
-	sscanf(buf, "%lu", &rt_runtime);
+	sscanf(buf, "%ld", &rt_runtime);
 
 	rc = sched_group_set_rt_runtime(up->tg, rt_runtime);
 

diff --git a/mm/slob.c b/mm/slob.c
index 4c82dd4..62b679d 100644
--- a/mm/slob.c
+++ b/mm/slob.c

@@ -515,7 +515,7 @@
 
 	sp = (struct slob_page *)virt_to_page(block);
 	if (slob_page(sp))
-		return ((slob_t *)block - 1)->units + SLOB_UNIT;
+		return (((slob_t *)block - 1)->units - 1) * SLOB_UNIT;
 	else
 		return sp->page.private;
 }

diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 418cd7d..8e0de6a 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c

@@ -1986,11 +1986,13 @@
 
 		mod = find_module(modname);
 		if (!mod) {
-			if (is_vmlinux(modname))
-				have_vmlinux = 1;
 			mod = new_module(NOFAIL(strdup(modname)));
 			mod->skip = 1;
 		}
+		if (is_vmlinux(modname)) {
+			have_vmlinux = 1;
+			mod->skip = 0;
+		}
 
 		if (!mod->skip)
 			add_marker(mod, marker, fmt);

diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index d11a815..8551952 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c

@@ -2737,6 +2737,7 @@
 		if (ctx == NULL)
 			goto netlbl_secattr_to_sid_return;
 
+		context_init(&ctx_new);
 		ctx_new.user = ctx->user;
 		ctx_new.role = ctx->role;
 		ctx_new.type = ctx->type;
@@ -2745,13 +2746,9 @@
 			if (ebitmap_netlbl_import(&ctx_new.range.level[0].cat,
 						  secattr->attr.mls.cat) != 0)
 				goto netlbl_secattr_to_sid_return;
-			ctx_new.range.level[1].cat.highbit =
-				ctx_new.range.level[0].cat.highbit;
-			ctx_new.range.level[1].cat.node =
-				ctx_new.range.level[0].cat.node;
-		} else {
-			ebitmap_init(&ctx_new.range.level[0].cat);
-			ebitmap_init(&ctx_new.range.level[1].cat);
+			memcpy(&ctx_new.range.level[1].cat,
+			       &ctx_new.range.level[0].cat,
+			       sizeof(ctx_new.range.level[0].cat));
 		}
 		if (mls_context_isvalid(&policydb, &ctx_new) != 1)
 			goto netlbl_secattr_to_sid_return_cleanup;
commit	990d0f2ced23052abc7efa09bd05bff34e00cf73	[log] [tgz]
author	Ingo Molnar <mingo@elte.hu>	Wed Oct 08 11:31:02 2008 +0200
committer	Ingo Molnar <mingo@elte.hu>	Wed Oct 08 11:31:02 2008 +0200
tree	df9f3fe5c0417102586087cec63e1d813a8f29cb
parent	85ba94ba0592296053f7f2846812173424afe1cb [diff]
parent	34b3ede2353604ec9861c1d900b2a835ff85de47 [diff]
parent	e545a6140b698b2494daf0b32107bdcc5e901390 [diff]
parent	d294eb83d8d39a29f01dad391f15fc3a29aa04f9 [diff]