Pull context-bitmap into release branch
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 3af6de3..5add0bc 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -461,6 +461,7 @@
#endif
cpu_init(); /* initialize the bootstrap CPU */
+ mmu_context_init(); /* initialize context_id bitmap */
#ifdef CONFIG_ACPI
acpi_boot_init();
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index c79a9b9..41105d4 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -8,6 +8,8 @@
* Modified RID allocation for SMP
* Goutham Rao <goutham.rao@intel.com>
* IPI based ptc implementation and A-step IPI implementation.
+ * Rohit Seth <rohit.seth@intel.com>
+ * Ken Chen <kenneth.w.chen@intel.com>
*/
#include <linux/config.h>
#include <linux/module.h>
@@ -16,78 +18,75 @@
#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/mm.h>
+#include <linux/bootmem.h>
#include <asm/delay.h>
#include <asm/mmu_context.h>
#include <asm/pgalloc.h>
#include <asm/pal.h>
#include <asm/tlbflush.h>
+#include <asm/dma.h>
static struct {
unsigned long mask; /* mask of supported purge page-sizes */
- unsigned long max_bits; /* log2() of largest supported purge page-size */
+ unsigned long max_bits; /* log2 of largest supported purge page-size */
} purge;
struct ia64_ctx ia64_ctx = {
.lock = SPIN_LOCK_UNLOCKED,
.next = 1,
- .limit = (1 << 15) - 1, /* start out with the safe (architected) limit */
.max_ctx = ~0U
};
DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
/*
+ * Initializes the ia64_ctx.bitmap array based on max_ctx+1.
+ * Called after cpu_init() has setup ia64_ctx.max_ctx based on
+ * maximum RID that is supported by boot CPU.
+ */
+void __init
+mmu_context_init (void)
+{
+ ia64_ctx.bitmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
+ ia64_ctx.flushmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
+}
+
+/*
* Acquire the ia64_ctx.lock before calling this function!
*/
void
wrap_mmu_context (struct mm_struct *mm)
{
- unsigned long tsk_context, max_ctx = ia64_ctx.max_ctx;
- struct task_struct *tsk;
- int i;
+ int i, cpu;
+ unsigned long flush_bit;
- if (ia64_ctx.next > max_ctx)
- ia64_ctx.next = 300; /* skip daemons */
- ia64_ctx.limit = max_ctx + 1;
+ for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) {
+ flush_bit = xchg(&ia64_ctx.flushmap[i], 0);
+ ia64_ctx.bitmap[i] ^= flush_bit;
+ }
+
+ /* use offset at 300 to skip daemons */
+ ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap,
+ ia64_ctx.max_ctx, 300);
+ ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap,
+ ia64_ctx.max_ctx, ia64_ctx.next);
/*
- * Scan all the task's mm->context and set proper safe range
+ * can't call flush_tlb_all() here because of race condition
+ * with O(1) scheduler [EF]
*/
-
- read_lock(&tasklist_lock);
- repeat:
- for_each_process(tsk) {
- if (!tsk->mm)
- continue;
- tsk_context = tsk->mm->context;
- if (tsk_context == ia64_ctx.next) {
- if (++ia64_ctx.next >= ia64_ctx.limit) {
- /* empty range: reset the range limit and start over */
- if (ia64_ctx.next > max_ctx)
- ia64_ctx.next = 300;
- ia64_ctx.limit = max_ctx + 1;
- goto repeat;
- }
- }
- if ((tsk_context > ia64_ctx.next) && (tsk_context < ia64_ctx.limit))
- ia64_ctx.limit = tsk_context;
- }
- read_unlock(&tasklist_lock);
- /* can't call flush_tlb_all() here because of race condition with O(1) scheduler [EF] */
- {
- int cpu = get_cpu(); /* prevent preemption/migration */
- for_each_online_cpu(i) {
- if (i != cpu)
- per_cpu(ia64_need_tlb_flush, i) = 1;
- }
- put_cpu();
- }
+ cpu = get_cpu(); /* prevent preemption/migration */
+ for_each_online_cpu(i)
+ if (i != cpu)
+ per_cpu(ia64_need_tlb_flush, i) = 1;
+ put_cpu();
local_flush_tlb_all();
}
void
-ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long nbits)
+ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
+ unsigned long end, unsigned long nbits)
{
static DEFINE_SPINLOCK(ptcg_lock);
@@ -135,7 +134,8 @@
}
void
-flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end)
+flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
{
struct mm_struct *mm = vma->vm_mm;
unsigned long size = end - start;
@@ -149,7 +149,8 @@
#endif
nbits = ia64_fls(size + 0xfff);
- while (unlikely (((1UL << nbits) & purge.mask) == 0) && (nbits < purge.max_bits))
+ while (unlikely (((1UL << nbits) & purge.mask) == 0) &&
+ (nbits < purge.max_bits))
++nbits;
if (nbits > purge.max_bits)
nbits = purge.max_bits;
@@ -191,5 +192,5 @@
local_cpu_data->ptce_stride[0] = ptce_info.stride[0];
local_cpu_data->ptce_stride[1] = ptce_info.stride[1];
- local_flush_tlb_all(); /* nuke left overs from bootstrapping... */
+ local_flush_tlb_all(); /* nuke left overs from bootstrapping... */
}
diff --git a/include/asm-ia64/mmu_context.h b/include/asm-ia64/mmu_context.h
index 8d6e72f..b5c6508 100644
--- a/include/asm-ia64/mmu_context.h
+++ b/include/asm-ia64/mmu_context.h
@@ -7,12 +7,13 @@
*/
/*
- * Routines to manage the allocation of task context numbers. Task context numbers are
- * used to reduce or eliminate the need to perform TLB flushes due to context switches.
- * Context numbers are implemented using ia-64 region ids. Since the IA-64 TLB does not
- * consider the region number when performing a TLB lookup, we need to assign a unique
- * region id to each region in a process. We use the least significant three bits in a
- * region id for this purpose.
+ * Routines to manage the allocation of task context numbers. Task context
+ * numbers are used to reduce or eliminate the need to perform TLB flushes
+ * due to context switches. Context numbers are implemented using ia-64
+ * region ids. Since the IA-64 TLB does not consider the region number when
+ * performing a TLB lookup, we need to assign a unique region id to each
+ * region in a process. We use the least significant three bits in aregion
+ * id for this purpose.
*/
#define IA64_REGION_ID_KERNEL 0 /* the kernel's region id (tlb.c depends on this being 0) */
@@ -32,13 +33,17 @@
struct ia64_ctx {
spinlock_t lock;
unsigned int next; /* next context number to use */
- unsigned int limit; /* next >= limit => must call wrap_mmu_context() */
- unsigned int max_ctx; /* max. context value supported by all CPUs */
+ unsigned int limit; /* available free range */
+ unsigned int max_ctx; /* max. context value supported by all CPUs */
+ /* call wrap_mmu_context when next >= max */
+ unsigned long *bitmap; /* bitmap size is max_ctx+1 */
+ unsigned long *flushmap;/* pending rid to be flushed */
};
extern struct ia64_ctx ia64_ctx;
DECLARE_PER_CPU(u8, ia64_need_tlb_flush);
+extern void mmu_context_init (void);
extern void wrap_mmu_context (struct mm_struct *mm);
static inline void
@@ -47,10 +52,10 @@
}
/*
- * When the context counter wraps around all TLBs need to be flushed because an old
- * context number might have been reused. This is signalled by the ia64_need_tlb_flush
- * per-CPU variable, which is checked in the routine below. Called by activate_mm().
- * <efocht@ess.nec.de>
+ * When the context counter wraps around all TLBs need to be flushed because
+ * an old context number might have been reused. This is signalled by the
+ * ia64_need_tlb_flush per-CPU variable, which is checked in the routine
+ * below. Called by activate_mm(). <efocht@ess.nec.de>
*/
static inline void
delayed_tlb_flush (void)
@@ -60,11 +65,9 @@
if (unlikely(__ia64_per_cpu_var(ia64_need_tlb_flush))) {
spin_lock_irqsave(&ia64_ctx.lock, flags);
- {
- if (__ia64_per_cpu_var(ia64_need_tlb_flush)) {
- local_flush_tlb_all();
- __ia64_per_cpu_var(ia64_need_tlb_flush) = 0;
- }
+ if (__ia64_per_cpu_var(ia64_need_tlb_flush)) {
+ local_flush_tlb_all();
+ __ia64_per_cpu_var(ia64_need_tlb_flush) = 0;
}
spin_unlock_irqrestore(&ia64_ctx.lock, flags);
}
@@ -76,20 +79,27 @@
unsigned long flags;
nv_mm_context_t context = mm->context;
- if (unlikely(!context)) {
- spin_lock_irqsave(&ia64_ctx.lock, flags);
- {
- /* re-check, now that we've got the lock: */
- context = mm->context;
- if (context == 0) {
- cpus_clear(mm->cpu_vm_mask);
- if (ia64_ctx.next >= ia64_ctx.limit)
- wrap_mmu_context(mm);
- mm->context = context = ia64_ctx.next++;
- }
+ if (likely(context))
+ goto out;
+
+ spin_lock_irqsave(&ia64_ctx.lock, flags);
+ /* re-check, now that we've got the lock: */
+ context = mm->context;
+ if (context == 0) {
+ cpus_clear(mm->cpu_vm_mask);
+ if (ia64_ctx.next >= ia64_ctx.limit) {
+ ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap,
+ ia64_ctx.max_ctx, ia64_ctx.next);
+ ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap,
+ ia64_ctx.max_ctx, ia64_ctx.next);
+ if (ia64_ctx.next >= ia64_ctx.max_ctx)
+ wrap_mmu_context(mm);
}
- spin_unlock_irqrestore(&ia64_ctx.lock, flags);
+ mm->context = context = ia64_ctx.next++;
+ __set_bit(context, ia64_ctx.bitmap);
}
+ spin_unlock_irqrestore(&ia64_ctx.lock, flags);
+out:
/*
* Ensure we're not starting to use "context" before any old
* uses of it are gone from our TLB.
@@ -100,8 +110,8 @@
}
/*
- * Initialize context number to some sane value. MM is guaranteed to be a brand-new
- * address-space, so no TLB flushing is needed, ever.
+ * Initialize context number to some sane value. MM is guaranteed to be a
+ * brand-new address-space, so no TLB flushing is needed, ever.
*/
static inline int
init_new_context (struct task_struct *p, struct mm_struct *mm)
@@ -162,7 +172,10 @@
if (!cpu_isset(smp_processor_id(), mm->cpu_vm_mask))
cpu_set(smp_processor_id(), mm->cpu_vm_mask);
reload_context(context);
- /* in the unlikely event of a TLB-flush by another thread, redo the load: */
+ /*
+ * in the unlikely event of a TLB-flush by another thread,
+ * redo the load.
+ */
} while (unlikely(context != mm->context));
}
@@ -175,8 +188,8 @@
activate_mm (struct mm_struct *prev, struct mm_struct *next)
{
/*
- * We may get interrupts here, but that's OK because interrupt handlers cannot
- * touch user-space.
+ * We may get interrupts here, but that's OK because interrupt
+ * handlers cannot touch user-space.
*/
ia64_set_kr(IA64_KR_PT_BASE, __pa(next->pgd));
activate_context(next);
diff --git a/include/asm-ia64/tlbflush.h b/include/asm-ia64/tlbflush.h
index b65c627..a35b323 100644
--- a/include/asm-ia64/tlbflush.h
+++ b/include/asm-ia64/tlbflush.h
@@ -51,6 +51,7 @@
if (!mm)
return;
+ set_bit(mm->context, ia64_ctx.flushmap);
mm->context = 0;
if (atomic_read(&mm->mm_users) == 0)