Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!
diff --git a/arch/sh/mm/Makefile b/arch/sh/mm/Makefile
new file mode 100644
index 0000000..9489a14
--- /dev/null
+++ b/arch/sh/mm/Makefile
@@ -0,0 +1,25 @@
+#
+# Makefile for the Linux SuperH-specific parts of the memory manager.
+#
+
+obj-y			:= init.o extable.o consistent.o
+
+obj-$(CONFIG_CPU_SH2)	+= cache-sh2.o
+obj-$(CONFIG_CPU_SH3)	+= cache-sh3.o
+obj-$(CONFIG_CPU_SH4)	+= cache-sh4.o pg-sh4.o
+
+obj-$(CONFIG_DMA_PAGE_OPS)	+= pg-dma.o
+obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
+
+mmu-y			:= fault-nommu.o tlb-nommu.o pg-nommu.o
+mmu-$(CONFIG_MMU)	:= fault.o clear_page.o copy_page.o
+
+obj-y			+= $(mmu-y)
+
+ifdef CONFIG_MMU
+obj-$(CONFIG_CPU_SH3)	+= tlb-sh3.o
+obj-$(CONFIG_CPU_SH4)	+= tlb-sh4.o ioremap.o
+obj-$(CONFIG_SH7705_CACHE_32KB) += pg-sh7705.o
+endif
+
+obj-$(CONFIG_SH7705_CACHE_32KB) += cache-sh7705.o
diff --git a/arch/sh/mm/cache-sh2.c b/arch/sh/mm/cache-sh2.c
new file mode 100644
index 0000000..2689cb2
--- /dev/null
+++ b/arch/sh/mm/cache-sh2.c
@@ -0,0 +1,50 @@
+/*
+ * arch/sh/mm/cache-sh2.c
+ *
+ * Copyright (C) 2002 Paul Mundt
+ *
+ * Released under the terms of the GNU GPL v2.0.
+ */
+#include <linux/init.h>
+#include <linux/mm.h>
+
+#include <asm/cache.h>
+#include <asm/addrspace.h>
+#include <asm/processor.h>
+#include <asm/cacheflush.h>
+#include <asm/io.h>
+
+/*
+ * Calculate the OC address and set the way bit on the SH-2.
+ *
+ * We must have already jump_to_P2()'ed prior to calling this
+ * function, since we rely on CCR manipulation to do the
+ * Right Thing(tm).
+ */
+unsigned long __get_oc_addr(unsigned long set, unsigned long way)
+{
+	unsigned long ccr;
+
+	/*
+	 * On SH-2 the way bit isn't tracked in the address field
+	 * if we're doing address array access .. instead, we need
+	 * to manually switch out the way in the CCR.
+	 */
+	ccr = ctrl_inl(CCR);
+	ccr &= ~0x00c0;
+	ccr |= way << cpu_data->dcache.way_shift;
+
+	/*
+	 * Despite the number of sets being halved, we end up losing
+	 * the first 2 ways to OCRAM instead of the last 2 (if we're
+	 * 4-way). As a result, forcibly setting the W1 bit handily
+	 * bumps us up 2 ways.
+	 */
+	if (ccr & CCR_CACHE_ORA)
+		ccr |= 1 << (cpu_data->dcache.way_shift + 1);
+
+	ctrl_outl(ccr, CCR);
+
+	return CACHE_OC_ADDRESS_ARRAY | (set << cpu_data->dcache.entry_shift);
+}
+
diff --git a/arch/sh/mm/cache-sh3.c b/arch/sh/mm/cache-sh3.c
new file mode 100644
index 0000000..838731f
--- /dev/null
+++ b/arch/sh/mm/cache-sh3.c
@@ -0,0 +1,100 @@
+/*
+ * arch/sh/mm/cache-sh3.c
+ *
+ * Copyright (C) 1999, 2000  Niibe Yutaka
+ * Copyright (C) 2002 Paul Mundt
+ *
+ * Released under the terms of the GNU GPL v2.0.
+ */
+
+#include <linux/init.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/threads.h>
+#include <asm/addrspace.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/cache.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+
+/*
+ * Write back the dirty D-caches, but not invalidate them.
+ *
+ * Is this really worth it, or should we just alias this routine
+ * to __flush_purge_region too?
+ *
+ * START: Virtual Address (U0, P1, or P3)
+ * SIZE: Size of the region.
+ */
+
+void __flush_wback_region(void *start, int size)
+{
+	unsigned long v, j;
+	unsigned long begin, end;
+	unsigned long flags;
+
+	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
+	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+
+	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
+		unsigned long addrstart = CACHE_OC_ADDRESS_ARRAY;
+		for (j = 0; j < cpu_data->dcache.ways; j++) {
+			unsigned long data, addr, p;
+
+			p = __pa(v);
+			addr = addrstart | (v & cpu_data->dcache.entry_mask);
+			local_irq_save(flags);
+			data = ctrl_inl(addr);
+
+			if ((data & CACHE_PHYSADDR_MASK) ==
+			    (p & CACHE_PHYSADDR_MASK)) {
+				data &= ~SH_CACHE_UPDATED;
+				ctrl_outl(data, addr);
+				local_irq_restore(flags);
+				break;
+			}
+			local_irq_restore(flags);
+			addrstart += cpu_data->dcache.way_incr;
+		}
+	}
+}
+
+/*
+ * Write back the dirty D-caches and invalidate them.
+ *
+ * START: Virtual Address (U0, P1, or P3)
+ * SIZE: Size of the region.
+ */
+void __flush_purge_region(void *start, int size)
+{
+	unsigned long v;
+	unsigned long begin, end;
+
+	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
+	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+
+	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
+		unsigned long data, addr;
+
+		data = (v & 0xfffffc00); /* _Virtual_ address, ~U, ~V */
+		addr = CACHE_OC_ADDRESS_ARRAY |
+			(v & cpu_data->dcache.entry_mask) | SH_CACHE_ASSOC;
+		ctrl_outl(data, addr);
+	}
+}
+
+/*
+ * No write back please
+ *
+ * Except I don't think there's any way to avoid the writeback. So we
+ * just alias it to __flush_purge_region(). dwmw2.
+ */
+void __flush_invalidate_region(void *start, int size)
+	__attribute__((alias("__flush_purge_region")));
diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c
new file mode 100644
index 0000000..ab833ad
--- /dev/null
+++ b/arch/sh/mm/cache-sh4.c
@@ -0,0 +1,362 @@
+/*
+ * arch/sh/mm/cache-sh4.c
+ *
+ * Copyright (C) 1999, 2000, 2002  Niibe Yutaka
+ * Copyright (C) 2001, 2002, 2003, 2004  Paul Mundt
+ * Copyright (C) 2003  Richard Curnow
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/threads.h>
+#include <asm/addrspace.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/cache.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+
+extern void __flush_cache_4096_all(unsigned long start);
+static void __flush_cache_4096_all_ex(unsigned long start);
+extern void __flush_dcache_all(void);
+static void __flush_dcache_all_ex(void);
+
+/*
+ * SH-4 has virtually indexed and physically tagged cache.
+ */
+
+struct semaphore p3map_sem[4];
+
+void __init p3_cache_init(void)
+{
+	if (remap_area_pages(P3SEG, 0, PAGE_SIZE*4, _PAGE_CACHABLE))
+		panic("%s failed.", __FUNCTION__);
+
+	sema_init (&p3map_sem[0], 1);
+	sema_init (&p3map_sem[1], 1);
+	sema_init (&p3map_sem[2], 1);
+	sema_init (&p3map_sem[3], 1);
+}
+
+/*
+ * Write back the dirty D-caches, but not invalidate them.
+ *
+ * START: Virtual Address (U0, P1, or P3)
+ * SIZE: Size of the region.
+ */
+void __flush_wback_region(void *start, int size)
+{
+	unsigned long v;
+	unsigned long begin, end;
+
+	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
+	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
+		asm volatile("ocbwb	%0"
+			     : /* no output */
+			     : "m" (__m(v)));
+	}
+}
+
+/*
+ * Write back the dirty D-caches and invalidate them.
+ *
+ * START: Virtual Address (U0, P1, or P3)
+ * SIZE: Size of the region.
+ */
+void __flush_purge_region(void *start, int size)
+{
+	unsigned long v;
+	unsigned long begin, end;
+
+	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
+	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
+		asm volatile("ocbp	%0"
+			     : /* no output */
+			     : "m" (__m(v)));
+	}
+}
+
+
+/*
+ * No write back please
+ */
+void __flush_invalidate_region(void *start, int size)
+{
+	unsigned long v;
+	unsigned long begin, end;
+
+	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
+	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
+		asm volatile("ocbi	%0"
+			     : /* no output */
+			     : "m" (__m(v)));
+	}
+}
+
+static void __flush_dcache_all_ex(void)
+{
+	unsigned long addr, end_addr, entry_offset;
+
+	end_addr = CACHE_OC_ADDRESS_ARRAY + (cpu_data->dcache.sets << cpu_data->dcache.entry_shift) * cpu_data->dcache.ways;
+	entry_offset = 1 << cpu_data->dcache.entry_shift;
+	for (addr = CACHE_OC_ADDRESS_ARRAY; addr < end_addr; addr += entry_offset) {
+		ctrl_outl(0, addr);
+	}
+}
+
+static void __flush_cache_4096_all_ex(unsigned long start)
+{
+	unsigned long addr, entry_offset;
+	int i;
+
+	entry_offset = 1 << cpu_data->dcache.entry_shift;
+	for (i = 0; i < cpu_data->dcache.ways; i++, start += cpu_data->dcache.way_incr) {
+		for (addr = CACHE_OC_ADDRESS_ARRAY + start;
+		     addr < CACHE_OC_ADDRESS_ARRAY + 4096 + start;
+		     addr += entry_offset) {
+			ctrl_outl(0, addr);
+		}
+	}
+}
+
+void flush_cache_4096_all(unsigned long start)
+{
+	if (cpu_data->dcache.ways == 1)
+		__flush_cache_4096_all(start);
+	else
+		__flush_cache_4096_all_ex(start);
+}
+
+/*
+ * Write back the range of D-cache, and purge the I-cache.
+ *
+ * Called from kernel/module.c:sys_init_module and routine for a.out format.
+ */
+void flush_icache_range(unsigned long start, unsigned long end)
+{
+	flush_cache_all();
+}
+
+/*
+ * Write back the D-cache and purge the I-cache for signal trampoline. 
+ * .. which happens to be the same behavior as flush_icache_range().
+ * So, we simply flush out a line.
+ */
+void flush_cache_sigtramp(unsigned long addr)
+{
+	unsigned long v, index;
+	unsigned long flags; 
+	int i;
+
+	v = addr & ~(L1_CACHE_BYTES-1);
+	asm volatile("ocbwb	%0"
+		     : /* no output */
+		     : "m" (__m(v)));
+
+	index = CACHE_IC_ADDRESS_ARRAY | (v & cpu_data->icache.entry_mask);
+
+	local_irq_save(flags);
+	jump_to_P2();
+	for(i = 0; i < cpu_data->icache.ways; i++, index += cpu_data->icache.way_incr)
+		ctrl_outl(0, index);	/* Clear out Valid-bit */
+	back_to_P1();
+	local_irq_restore(flags);
+}
+
+static inline void flush_cache_4096(unsigned long start,
+				    unsigned long phys)
+{
+	unsigned long flags; 
+	extern void __flush_cache_4096(unsigned long addr, unsigned long phys, unsigned long exec_offset);
+
+	/*
+	 * SH7751, SH7751R, and ST40 have no restriction to handle cache.
+	 * (While SH7750 must do that at P2 area.)
+	 */
+	if ((cpu_data->flags & CPU_HAS_P2_FLUSH_BUG)
+	   || start < CACHE_OC_ADDRESS_ARRAY) {
+		local_irq_save(flags);
+		__flush_cache_4096(start | SH_CACHE_ASSOC, P1SEGADDR(phys), 0x20000000);
+		local_irq_restore(flags);
+	} else {
+		__flush_cache_4096(start | SH_CACHE_ASSOC, P1SEGADDR(phys), 0);
+	}
+}
+
+/*
+ * Write back & invalidate the D-cache of the page.
+ * (To avoid "alias" issues)
+ */
+void flush_dcache_page(struct page *page)
+{
+	if (test_bit(PG_mapped, &page->flags)) {
+		unsigned long phys = PHYSADDR(page_address(page));
+
+		/* Loop all the D-cache */
+		flush_cache_4096(CACHE_OC_ADDRESS_ARRAY,          phys);
+		flush_cache_4096(CACHE_OC_ADDRESS_ARRAY | 0x1000, phys);
+		flush_cache_4096(CACHE_OC_ADDRESS_ARRAY | 0x2000, phys);
+		flush_cache_4096(CACHE_OC_ADDRESS_ARRAY | 0x3000, phys);
+	}
+}
+
+static inline void flush_icache_all(void)
+{
+	unsigned long flags, ccr;
+
+	local_irq_save(flags);
+	jump_to_P2();
+
+	/* Flush I-cache */
+	ccr = ctrl_inl(CCR);
+	ccr |= CCR_CACHE_ICI;
+	ctrl_outl(ccr, CCR);
+
+	back_to_P1();
+	local_irq_restore(flags);
+}
+
+void flush_cache_all(void)
+{
+	if (cpu_data->dcache.ways == 1)
+		__flush_dcache_all();
+	else
+		__flush_dcache_all_ex();
+	flush_icache_all();
+}
+
+void flush_cache_mm(struct mm_struct *mm)
+{
+	/* Is there any good way? */
+	/* XXX: possibly call flush_cache_range for each vm area */
+	/* 
+	 * FIXME: Really, the optimal solution here would be able to flush out
+	 * individual lines created by the specified context, but this isn't
+	 * feasible for a number of architectures (such as MIPS, and some
+	 * SPARC) .. is this possible for SuperH?
+	 *
+	 * In the meantime, we'll just flush all of the caches.. this
+	 * seems to be the simplest way to avoid at least a few wasted
+	 * cache flushes. -Lethal
+	 */
+	flush_cache_all();
+}
+
+/*
+ * Write back and invalidate I/D-caches for the page.
+ *
+ * ADDR: Virtual Address (U0 address)
+ * PFN: Physical page number
+ */
+void flush_cache_page(struct vm_area_struct *vma, unsigned long address, unsigned long pfn)
+{
+	unsigned long phys = pfn << PAGE_SHIFT;
+
+	/* We only need to flush D-cache when we have alias */
+	if ((address^phys) & CACHE_ALIAS) {
+		/* Loop 4K of the D-cache */
+		flush_cache_4096(
+			CACHE_OC_ADDRESS_ARRAY | (address & CACHE_ALIAS),
+			phys);
+		/* Loop another 4K of the D-cache */
+		flush_cache_4096(
+			CACHE_OC_ADDRESS_ARRAY | (phys & CACHE_ALIAS),
+			phys);
+	}
+
+	if (vma->vm_flags & VM_EXEC)
+		/* Loop 4K (half) of the I-cache */
+		flush_cache_4096(
+			CACHE_IC_ADDRESS_ARRAY | (address & 0x1000),
+			phys);
+}
+
+/*
+ * Write back and invalidate D-caches.
+ *
+ * START, END: Virtual Address (U0 address)
+ *
+ * NOTE: We need to flush the _physical_ page entry.
+ * Flushing the cache lines for U0 only isn't enough.
+ * We need to flush for P1 too, which may contain aliases.
+ */
+void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
+		       unsigned long end)
+{
+	unsigned long p = start & PAGE_MASK;
+	pgd_t *dir;
+	pmd_t *pmd;
+	pte_t *pte;
+	pte_t entry;
+	unsigned long phys;
+	unsigned long d = 0;
+
+	dir = pgd_offset(vma->vm_mm, p);
+	pmd = pmd_offset(dir, p);
+
+	do {
+		if (pmd_none(*pmd) || pmd_bad(*pmd)) {
+			p &= ~((1 << PMD_SHIFT) -1);
+			p += (1 << PMD_SHIFT);
+			pmd++;
+			continue;
+		}
+		pte = pte_offset_kernel(pmd, p);
+		do {
+			entry = *pte;
+			if ((pte_val(entry) & _PAGE_PRESENT)) {
+				phys = pte_val(entry)&PTE_PHYS_MASK;
+				if ((p^phys) & CACHE_ALIAS) {
+					d |= 1 << ((p & CACHE_ALIAS)>>12); 
+					d |= 1 << ((phys & CACHE_ALIAS)>>12);
+					if (d == 0x0f)
+						goto loop_exit;
+				}
+			}
+			pte++;
+			p += PAGE_SIZE;
+		} while (p < end && ((unsigned long)pte & ~PAGE_MASK));
+		pmd++;
+	} while (p < end);
+ loop_exit:
+	if (d & 1)
+		flush_cache_4096_all(0);
+	if (d & 2)
+		flush_cache_4096_all(0x1000);
+	if (d & 4)
+		flush_cache_4096_all(0x2000);
+	if (d & 8)
+		flush_cache_4096_all(0x3000);
+	if (vma->vm_flags & VM_EXEC)
+		flush_icache_all();
+}
+
+/*
+ * flush_icache_user_range
+ * @vma: VMA of the process
+ * @page: page
+ * @addr: U0 address
+ * @len: length of the range (< page size)
+ */
+void flush_icache_user_range(struct vm_area_struct *vma,
+			     struct page *page, unsigned long addr, int len)
+{
+	flush_cache_page(vma, addr, page_to_pfn(page));
+}
+
diff --git a/arch/sh/mm/cache-sh7705.c b/arch/sh/mm/cache-sh7705.c
new file mode 100644
index 0000000..ad8ed7d
--- /dev/null
+++ b/arch/sh/mm/cache-sh7705.c
@@ -0,0 +1,206 @@
+/*
+ * arch/sh/mm/cache-sh7705.c
+ *
+ * Copyright (C) 1999, 2000  Niibe Yutaka
+ * Copyright (C) 2004  Alex Song
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/threads.h>
+#include <asm/addrspace.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/cache.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+
+/* The 32KB cache on the SH7705 suffers from the same synonym problem
+ * as SH4 CPUs */
+
+#define __pte_offset(address) \
+		((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+#define pte_offset(dir, address) ((pte_t *) pmd_page_kernel(*(dir)) + \
+		__pte_offset(address))
+
+static inline void cache_wback_all(void)
+{
+	unsigned long ways, waysize, addrstart;
+
+	ways = cpu_data->dcache.ways;
+	waysize = cpu_data->dcache.sets;
+	waysize <<= cpu_data->dcache.entry_shift;
+
+	addrstart = CACHE_OC_ADDRESS_ARRAY;
+
+	do {
+		unsigned long addr;
+
+		for (addr = addrstart;
+		     addr < addrstart + waysize;
+		     addr += cpu_data->dcache.linesz) {
+			unsigned long data;
+			int v = SH_CACHE_UPDATED | SH_CACHE_VALID;
+
+			data = ctrl_inl(addr);
+
+			if ((data & v) == v)
+				ctrl_outl(data & ~v, addr);
+
+		}
+
+		addrstart += cpu_data->dcache.way_incr;
+	} while (--ways);
+}
+
+/*
+ * Write back the range of D-cache, and purge the I-cache.
+ *
+ * Called from kernel/module.c:sys_init_module and routine for a.out format.
+ */
+void flush_icache_range(unsigned long start, unsigned long end)
+{
+	__flush_wback_region((void *)start, end - start);
+}
+
+
+/*
+ * Writeback&Invalidate the D-cache of the page
+ */
+static void __flush_dcache_page(unsigned long phys)
+{
+	unsigned long ways, waysize, addrstart;
+	unsigned long flags;
+
+	phys |= SH_CACHE_VALID;
+
+	/*
+	 * Here, phys is the physical address of the page. We check all the
+	 * tags in the cache for those with the same page number as this page
+	 * (by masking off the lowest 2 bits of the 19-bit tag; these bits are
+	 * derived from the offset within in the 4k page). Matching valid
+	 * entries are invalidated.
+	 *
+	 * Since 2 bits of the cache index are derived from the virtual page
+	 * number, knowing this would reduce the number of cache entries to be
+	 * searched by a factor of 4. However this function exists to deal with
+	 * potential cache aliasing, therefore the optimisation is probably not
+	 * possible.
+	 */
+	local_irq_save(flags);
+	jump_to_P2();
+
+	ways = cpu_data->dcache.ways;
+	waysize = cpu_data->dcache.sets;
+	waysize <<= cpu_data->dcache.entry_shift;
+
+	addrstart = CACHE_OC_ADDRESS_ARRAY;
+
+	do {
+		unsigned long addr;
+
+		for (addr = addrstart;
+		     addr < addrstart + waysize;
+		     addr += cpu_data->dcache.linesz) {
+			unsigned long data;
+
+			data = ctrl_inl(addr) & (0x1ffffC00 | SH_CACHE_VALID);
+		        if (data == phys) {
+				data &= ~(SH_CACHE_VALID | SH_CACHE_UPDATED);
+				ctrl_outl(data, addr);
+			}
+		}
+
+		addrstart += cpu_data->dcache.way_incr;
+	} while (--ways);
+
+	back_to_P1();
+	local_irq_restore(flags);
+}
+
+
+/*
+ * Write back & invalidate the D-cache of the page.
+ * (To avoid "alias" issues)
+ */
+void flush_dcache_page(struct page *page)
+{
+	if (test_bit(PG_mapped, &page->flags))
+		__flush_dcache_page(PHYSADDR(page_address(page)));
+}
+
+void flush_cache_all(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	jump_to_P2();
+
+	cache_wback_all();
+	back_to_P1();
+	local_irq_restore(flags);
+}
+
+void flush_cache_mm(struct mm_struct *mm)
+{
+	/* Is there any good way? */
+	/* XXX: possibly call flush_cache_range for each vm area */
+	flush_cache_all();
+}
+
+/*
+ * Write back and invalidate D-caches.
+ *
+ * START, END: Virtual Address (U0 address)
+ *
+ * NOTE: We need to flush the _physical_ page entry.
+ * Flushing the cache lines for U0 only isn't enough.
+ * We need to flush for P1 too, which may contain aliases.
+ */
+void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
+		       unsigned long end)
+{
+
+	/*
+	 * We could call flush_cache_page for the pages of these range,
+	 * but it's not efficient (scan the caches all the time...).
+	 *
+	 * We can't use A-bit magic, as there's the case we don't have
+	 * valid entry on TLB.
+	 */
+	flush_cache_all();
+}
+
+/*
+ * Write back and invalidate I/D-caches for the page.
+ *
+ * ADDRESS: Virtual Address (U0 address)
+ */
+void flush_cache_page(struct vm_area_struct *vma, unsigned long address, unsigned long pfn)
+{
+	__flush_dcache_page(pfn << PAGE_SHIFT);
+}
+
+/*
+ * This is called when a page-cache page is about to be mapped into a
+ * user process' address space.  It offers an opportunity for a
+ * port to ensure d-cache/i-cache coherency if necessary.
+ *
+ * Not entirely sure why this is necessary on SH3 with 32K cache but
+ * without it we get occasional "Memory fault" when loading a program.
+ */
+void flush_icache_page(struct vm_area_struct *vma, struct page *page)
+{
+	__flush_purge_region(page_address(page), PAGE_SIZE);
+}
+
diff --git a/arch/sh/mm/clear_page.S b/arch/sh/mm/clear_page.S
new file mode 100644
index 0000000..ae58a61
--- /dev/null
+++ b/arch/sh/mm/clear_page.S
@@ -0,0 +1,295 @@
+/* $Id: clear_page.S,v 1.13 2003/08/25 17:03:10 lethal Exp $
+ *
+ * __clear_user_page, __clear_user, clear_page implementation of SuperH
+ *
+ * Copyright (C) 2001  Kaz Kojima
+ * Copyright (C) 2001, 2002  Niibe Yutaka
+ *
+ */
+#include <linux/config.h>
+#include <linux/linkage.h>
+
+/*
+ * clear_page_slow
+ * @to: P1 address
+ *
+ * void clear_page_slow(void *to)
+ */
+
+/*
+ * r0 --- scratch
+ * r4 --- to
+ * r5 --- to + 4096
+ */
+ENTRY(clear_page_slow)
+	mov	r4,r5
+	mov.w	.Llimit,r0
+	add	r0,r5
+	mov	#0,r0
+	!
+1:
+#if defined(CONFIG_CPU_SH3)
+	mov.l	r0,@r4
+#elif defined(CONFIG_CPU_SH4)
+	movca.l	r0,@r4
+	mov	r4,r1
+#endif
+	add	#32,r4
+	mov.l	r0,@-r4
+	mov.l	r0,@-r4
+	mov.l	r0,@-r4
+	mov.l	r0,@-r4
+	mov.l	r0,@-r4
+	mov.l	r0,@-r4
+	mov.l	r0,@-r4
+#if defined(CONFIG_CPU_SH4)
+	ocbwb	@r1
+#endif
+	cmp/eq	r5,r4
+	bf/s	1b
+	 add	#28,r4
+	!
+	rts
+	 nop
+.Llimit:	.word	(4096-28)
+
+ENTRY(__clear_user)
+	!
+	mov	#0, r0
+	mov	#0xe0, r1	! 0xffffffe0
+	!
+	! r4..(r4+31)&~32 	   -------- not aligned	[ Area 0 ]
+	! (r4+31)&~32..(r4+r5)&~32 -------- aligned	[ Area 1 ]
+	! (r4+r5)&~32..r4+r5       -------- not aligned	[ Area 2 ]
+	!
+	! Clear area 0
+	mov	r4, r2
+	!
+	tst	r1, r5		! length < 32
+	bt	.Larea2		! skip to remainder
+	!
+	add	#31, r2
+	and	r1, r2
+	cmp/eq	r4, r2
+	bt	.Larea1
+	mov	r2, r3
+	sub	r4, r3
+	mov	r3, r7
+	mov	r4, r2
+	!
+.L0:	dt	r3
+0:	mov.b	r0, @r2
+	bf/s	.L0
+	 add	#1, r2
+	!
+	sub	r7, r5
+	mov	r2, r4
+.Larea1:
+	mov	r4, r3
+	add	r5, r3
+	and	r1, r3
+	cmp/hi	r2, r3
+	bf	.Larea2
+	!
+	! Clear area 1
+#if defined(CONFIG_CPU_SH4)
+1:	movca.l	r0, @r2
+#else
+1:	mov.l	r0, @r2
+#endif
+	add	#4, r2
+2:	mov.l	r0, @r2
+	add	#4, r2
+3:	mov.l	r0, @r2
+	add	#4, r2
+4:	mov.l	r0, @r2
+	add	#4, r2
+5:	mov.l	r0, @r2
+	add	#4, r2
+6:	mov.l	r0, @r2
+	add	#4, r2
+7:	mov.l	r0, @r2
+	add	#4, r2
+8:	mov.l	r0, @r2
+	add	#4, r2
+	cmp/hi	r2, r3
+	bt/s	1b
+	 nop
+	!
+	! Clear area 2
+.Larea2:
+	mov	r4, r3
+	add	r5, r3
+	cmp/hs	r3, r2
+	bt/s	.Ldone
+	 sub	r2, r3
+.L2:	dt	r3
+9:	mov.b	r0, @r2
+	bf/s	.L2
+	 add	#1, r2
+	!
+.Ldone:	rts
+	 mov	#0, r0	! return 0 as normal return
+
+	! return the number of bytes remained
+.Lbad_clear_user:
+	mov	r4, r0
+	add	r5, r0
+	rts
+	 sub	r2, r0
+
+.section __ex_table,"a"
+	.align 2
+	.long	0b, .Lbad_clear_user
+	.long	1b, .Lbad_clear_user
+	.long	2b, .Lbad_clear_user
+	.long	3b, .Lbad_clear_user
+	.long	4b, .Lbad_clear_user
+	.long	5b, .Lbad_clear_user
+	.long	6b, .Lbad_clear_user
+	.long	7b, .Lbad_clear_user
+	.long	8b, .Lbad_clear_user
+	.long	9b, .Lbad_clear_user
+.previous
+
+#if defined(CONFIG_CPU_SH4)
+/*
+ * __clear_user_page
+ * @to: P3 address (with same color)
+ * @orig_to: P1 address
+ *
+ * void __clear_user_page(void *to, void *orig_to)
+ */
+
+/*
+ * r0 --- scratch 
+ * r4 --- to
+ * r5 --- orig_to
+ * r6 --- to + 4096
+ */
+ENTRY(__clear_user_page)
+	mov.w	.L4096,r0
+	mov	r4,r6
+	add	r0,r6
+	mov	#0,r0
+	!
+1:	ocbi	@r5
+	add	#32,r5
+	movca.l	r0,@r4
+	mov	r4,r1
+	add	#32,r4
+	mov.l	r0,@-r4
+	mov.l	r0,@-r4
+	mov.l	r0,@-r4
+	mov.l	r0,@-r4
+	mov.l	r0,@-r4
+	mov.l	r0,@-r4
+	mov.l	r0,@-r4
+	add	#28,r4
+	cmp/eq	r6,r4
+	bf/s	1b
+	 ocbwb	@r1
+	!
+	rts
+	 nop
+.L4096:	.word	4096
+
+ENTRY(__flush_cache_4096)
+	mov.l	1f,r3
+	add	r6,r3
+	mov	r4,r0
+	mov	#64,r2
+	shll	r2
+	mov	#64,r6
+	jmp	@r3
+	 mov	#96,r7
+	.align	2
+1:	.long	2f
+2:
+	.rept	32
+	mov.l	r5,@r0
+	mov.l	r5,@(32,r0)
+	mov.l	r5,@(r0,r6)
+	mov.l	r5,@(r0,r7)
+	add	r2,r5
+	add	r2,r0
+	.endr
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	rts
+	 nop
+
+ENTRY(__flush_dcache_all)
+	mov.l	2f,r0
+	mov.l	3f,r4
+	and	r0,r4		! r4 = (unsigned long)&empty_zero_page[0] & ~0xffffc000
+	stc	sr,r1		! save SR
+	mov.l	4f,r2
+	or	r1,r2
+	mov	#32,r3
+	shll2	r3
+1:
+	ldc	r2,sr		! set BL bit
+	movca.l	r0,@r4
+	ocbi	@r4
+	add	#32,r4
+	movca.l	r0,@r4
+	ocbi	@r4
+	add	#32,r4
+	movca.l	r0,@r4
+	ocbi	@r4
+	add	#32,r4
+	movca.l	r0,@r4
+	ocbi	@r4
+	ldc	r1,sr		! restore SR
+	dt	r3
+	bf/s	1b
+	 add	#32,r4
+
+	rts
+	 nop
+	.align	2
+2:	.long	0xffffc000
+3:	.long	empty_zero_page
+4:	.long	0x10000000	! BL bit
+
+/* __flush_cache_4096_all(unsigned long addr) */
+ENTRY(__flush_cache_4096_all)
+	mov.l	2f,r0
+	mov.l	3f,r2
+	and	r0,r2
+	or	r2,r4		! r4 = addr | (unsigned long)&empty_zero_page[0] & ~0x3fff
+	stc	sr,r1		! save SR
+	mov.l	4f,r2
+	or	r1,r2
+	mov	#32,r3
+1:
+	ldc	r2,sr		! set BL bit
+	movca.l	r0,@r4
+	ocbi	@r4
+	add	#32,r4
+	movca.l	r0,@r4
+	ocbi	@r4
+	add	#32,r4
+	movca.l	r0,@r4
+	ocbi	@r4
+	add	#32,r4
+	movca.l	r0,@r4
+	ocbi	@r4
+	ldc	r1,sr		! restore SR
+	dt	r3
+	bf/s	1b
+	 add	#32,r4
+
+	rts
+	 nop
+	.align	2
+2:	.long	0xffffc000
+3:	.long	empty_zero_page
+4:	.long	0x10000000	! BL bit
+#endif
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
new file mode 100644
index 0000000..1f7af0c
--- /dev/null
+++ b/arch/sh/mm/consistent.c
@@ -0,0 +1,85 @@
+/*
+ * arch/sh/mm/consistent.c
+ *
+ * Copyright (C) 2004  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <asm/io.h>
+
+void *consistent_alloc(int gfp, size_t size, dma_addr_t *handle)
+{
+	struct page *page, *end, *free;
+	void *ret;
+	int order;
+
+	size = PAGE_ALIGN(size);
+	order = get_order(size);
+
+	page = alloc_pages(gfp, order);
+	if (!page)
+		return NULL;
+
+	ret = page_address(page);
+	*handle = virt_to_phys(ret);
+
+	/*
+	 * We must flush the cache before we pass it on to the device
+	 */
+	dma_cache_wback_inv(ret, size);
+
+	page = virt_to_page(ret);
+	free = page + (size >> PAGE_SHIFT);
+	end  = page + (1 << order);
+
+	while (++page < end) {
+		set_page_count(page, 1);
+
+		/* Free any unused pages */
+		if (page >= free) {
+			__free_page(page);
+		}
+	}
+
+	return P2SEGADDR(ret);
+}
+
+void consistent_free(void *vaddr, size_t size)
+{
+	unsigned long addr = P1SEGADDR((unsigned long)vaddr);
+	struct page *page=virt_to_page(addr);
+	int num_pages=(size+PAGE_SIZE-1) >> PAGE_SHIFT;
+	int i;
+
+	for(i=0;i<num_pages;i++) {
+		__free_page((page+i));
+	}
+}
+
+void consistent_sync(void *vaddr, size_t size, int direction)
+{
+	void * p1addr = (void*) P1SEGADDR((unsigned long)vaddr);
+
+	switch (direction) {
+	case DMA_FROM_DEVICE:		/* invalidate only */
+		dma_cache_inv(p1addr, size);
+		break;
+	case DMA_TO_DEVICE:		/* writeback only */
+		dma_cache_wback(p1addr, size);
+		break;
+	case DMA_BIDIRECTIONAL:		/* writeback and invalidate */
+		dma_cache_wback_inv(p1addr, size);
+		break;
+	default:
+		BUG();
+	}
+}
+
+EXPORT_SYMBOL(consistent_alloc);
+EXPORT_SYMBOL(consistent_free);
+EXPORT_SYMBOL(consistent_sync);
+
diff --git a/arch/sh/mm/copy_page.S b/arch/sh/mm/copy_page.S
new file mode 100644
index 0000000..1addffe
--- /dev/null
+++ b/arch/sh/mm/copy_page.S
@@ -0,0 +1,397 @@
+/* $Id: copy_page.S,v 1.8 2003/08/25 17:03:10 lethal Exp $
+ *
+ * copy_page, __copy_user_page, __copy_user implementation of SuperH
+ *
+ * Copyright (C) 2001  Niibe Yutaka & Kaz Kojima
+ * Copyright (C) 2002  Toshinobu Sugioka
+ *
+ */
+#include <linux/linkage.h>
+
+/*
+ * copy_page_slow
+ * @to: P1 address
+ * @from: P1 address
+ *
+ * void copy_page_slow(void *to, void *from)
+ */
+
+/*
+ * r0, r1, r2, r3, r4, r5, r6, r7 --- scratch 
+ * r8 --- from + 4096
+ * r9 --- not used
+ * r10 --- to
+ * r11 --- from
+ */
+ENTRY(copy_page_slow)
+	mov.l	r8,@-r15
+	mov.l	r10,@-r15
+	mov.l	r11,@-r15
+	mov	r4,r10
+	mov	r5,r11
+	mov	r5,r8
+	mov.w	.L4096,r0
+	add	r0,r8
+	!
+1:	mov.l	@r11+,r0
+	mov.l	@r11+,r1
+	mov.l	@r11+,r2
+	mov.l	@r11+,r3
+	mov.l	@r11+,r4
+	mov.l	@r11+,r5
+	mov.l	@r11+,r6
+	mov.l	@r11+,r7
+#if defined(CONFIG_CPU_SH3)
+	mov.l	r0,@r10
+#elif defined(CONFIG_CPU_SH4)
+	movca.l	r0,@r10
+	mov	r10,r0
+#endif
+	add	#32,r10
+	mov.l	r7,@-r10
+	mov.l	r6,@-r10
+	mov.l	r5,@-r10
+	mov.l	r4,@-r10
+	mov.l	r3,@-r10
+	mov.l	r2,@-r10
+	mov.l	r1,@-r10
+#if defined(CONFIG_CPU_SH4)
+	ocbwb	@r0
+#endif
+	cmp/eq	r11,r8
+	bf/s	1b
+	 add	#28,r10
+	!
+	mov.l	@r15+,r11
+	mov.l	@r15+,r10
+	mov.l	@r15+,r8
+	rts
+	 nop
+
+#if defined(CONFIG_CPU_SH4)
+/*
+ * __copy_user_page
+ * @to: P1 address (with same color)
+ * @from: P1 address
+ * @orig_to: P1 address
+ *
+ * void __copy_user_page(void *to, void *from, void *orig_to)
+ */
+
+/*
+ * r0, r1, r2, r3, r4, r5, r6, r7 --- scratch 
+ * r8 --- from + 4096
+ * r9 --- orig_to
+ * r10 --- to
+ * r11 --- from
+ */
+ENTRY(__copy_user_page)
+	mov.l	r8,@-r15
+	mov.l	r9,@-r15
+	mov.l	r10,@-r15
+	mov.l	r11,@-r15
+	mov	r4,r10
+	mov	r5,r11
+	mov	r6,r9
+	mov	r5,r8
+	mov.w	.L4096,r0
+	add	r0,r8
+	!
+1:	ocbi	@r9
+	add	#32,r9
+	mov.l	@r11+,r0
+	mov.l	@r11+,r1
+	mov.l	@r11+,r2
+	mov.l	@r11+,r3
+	mov.l	@r11+,r4
+	mov.l	@r11+,r5
+	mov.l	@r11+,r6
+	mov.l	@r11+,r7
+	movca.l	r0,@r10
+	mov	r10,r0
+	add	#32,r10
+	mov.l	r7,@-r10
+	mov.l	r6,@-r10
+	mov.l	r5,@-r10
+	mov.l	r4,@-r10
+	mov.l	r3,@-r10
+	mov.l	r2,@-r10
+	mov.l	r1,@-r10
+	ocbwb	@r0
+	cmp/eq	r11,r8
+	bf/s	1b
+	 add	#28,r10
+	!
+	mov.l	@r15+,r11
+	mov.l	@r15+,r10
+	mov.l	@r15+,r9
+	mov.l	@r15+,r8
+	rts
+	 nop
+#endif
+.L4096:	.word	4096
+/*
+ * __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n);
+ * Return the number of bytes NOT copied
+ */
+#define EX(...)			\
+	9999: __VA_ARGS__ ;		\
+	.section __ex_table, "a";	\
+	.long 9999b, 6000f	;	\
+	.previous
+ENTRY(__copy_user)
+	tst	r6,r6		! Check explicitly for zero
+	bf	1f
+	rts
+	 mov	#0,r0		! normal return
+1:
+	mov.l	r10,@-r15
+	mov.l	r9,@-r15
+	mov.l	r8,@-r15
+	mov	r4,r3
+	add	r6,r3		! last destination address
+	mov	#12,r0		! Check if small number of bytes
+	cmp/gt	r0,r6
+	bt	2f
+	bra	.L_cleanup_loop
+	 nop
+2:
+	neg	r5,r0		! Calculate bytes needed to align source
+	add	#4,r0
+	and	#3,r0
+	tst	r0,r0
+	bt	.L_jump
+	mov	r0,r1
+
+.L_loop1:
+	! Copy bytes to align source
+EX(	mov.b	@r5+,r0		)
+	dt	r1
+EX(	mov.b	r0,@r4		)
+	add	#-1,r6
+	bf/s	.L_loop1
+	 add	#1,r4
+
+.L_jump:
+	mov	r6,r2		! Calculate number of longwords to copy
+	shlr2	r2
+	tst	r2,r2
+	bt	.L_cleanup
+
+	mov	r4,r0		! Jump to appropriate routine
+	and	#3,r0
+	mov	r0,r1
+	shll2	r1
+	mova	.L_jump_tbl,r0
+	mov.l	@(r0,r1),r1
+	jmp	@r1
+	 nop
+
+	.align 2
+.L_jump_tbl:
+	.long	.L_dest00
+	.long	.L_dest01
+	.long	.L_dest10
+	.long	.L_dest11
+
+! Destination = 00
+
+.L_dest00:
+	mov	r2,r7
+	shlr2	r7
+	shlr	r7
+	tst	r7,r7
+	mov	#7,r0
+	bt/s	1f
+	 and	r0,r2
+	.align 2
+2:
+EX(	mov.l	@r5+,r0		)
+EX(	mov.l	@r5+,r8		)
+EX(	mov.l	@r5+,r9		)
+EX(	mov.l	@r5+,r10	)
+EX(	mov.l	r0,@r4		)
+EX(	mov.l	r8,@(4,r4)	)
+EX(	mov.l	r9,@(8,r4)	)
+EX(	mov.l	r10,@(12,r4)	)
+EX(	mov.l	@r5+,r0		)
+EX(	mov.l	@r5+,r8		)
+EX(	mov.l	@r5+,r9		)
+EX(	mov.l	@r5+,r10	)
+	dt	r7
+EX(	mov.l	r0,@(16,r4)	)
+EX(	mov.l	r8,@(20,r4)	)
+EX(	mov.l	r9,@(24,r4)	)
+EX(	mov.l	r10,@(28,r4)	)
+	bf/s	2b
+	 add	#32,r4
+	tst	r2,r2
+	bt	.L_cleanup
+1:
+EX(	mov.l	@r5+,r0		)
+	dt	r2
+EX(	mov.l	r0,@r4		)
+	bf/s	1b
+	 add	#4,r4
+
+	bra	.L_cleanup
+	 nop
+
+! Destination = 10
+
+.L_dest10:
+	mov	r2,r7
+	shlr2	r7
+	shlr	r7
+	tst	r7,r7
+	mov	#7,r0
+	bt/s	1f
+	 and	r0,r2
+2:
+	dt	r7
+#ifdef __LITTLE_ENDIAN__
+EX(	mov.l	@r5+,r0		)
+EX(	mov.l	@r5+,r1		)
+EX(	mov.l	@r5+,r8		)
+EX(	mov.l	@r5+,r9		)
+EX(	mov.l	@r5+,r10	)
+EX(	mov.w	r0,@r4		)
+	add	#2,r4
+	xtrct	r1,r0
+	xtrct	r8,r1
+	xtrct	r9,r8
+	xtrct	r10,r9
+
+EX(	mov.l	r0,@r4		)
+EX(	mov.l	r1,@(4,r4)	)
+EX(	mov.l	r8,@(8,r4)	)
+EX(	mov.l	r9,@(12,r4)	)
+
+EX(	mov.l	@r5+,r1		)
+EX(	mov.l	@r5+,r8		)
+EX(	mov.l	@r5+,r0		)
+	xtrct	r1,r10
+	xtrct	r8,r1
+	xtrct	r0,r8
+	shlr16	r0
+EX(	mov.l	r10,@(16,r4)	)
+EX(	mov.l	r1,@(20,r4)	)
+EX(	mov.l	r8,@(24,r4)	)
+EX(	mov.w	r0,@(28,r4)	)
+	bf/s	2b
+	 add	#30,r4
+#else
+EX(	mov.l	@(28,r5),r0	)
+EX(	mov.l	@(24,r5),r8	)
+EX(	mov.l	@(20,r5),r9	)
+EX(	mov.l	@(16,r5),r10	)
+EX(	mov.w	r0,@(30,r4)	)
+	add	#-2,r4
+	xtrct	r8,r0
+	xtrct	r9,r8
+	xtrct	r10,r9
+EX(	mov.l	r0,@(28,r4)	)
+EX(	mov.l	r8,@(24,r4)	)
+EX(	mov.l	r9,@(20,r4)	)
+
+EX(	mov.l	@(12,r5),r0	)
+EX(	mov.l	@(8,r5),r8	)
+	xtrct	r0,r10
+EX(	mov.l	@(4,r5),r9	)
+	mov.l	r10,@(16,r4)
+EX(	mov.l	@r5,r10		)
+	xtrct	r8,r0
+	xtrct	r9,r8
+	xtrct	r10,r9
+EX(	mov.l	r0,@(12,r4)	)
+EX(	mov.l	r8,@(8,r4)	)
+	swap.w	r10,r0
+EX(	mov.l	r9,@(4,r4)	)
+EX(	mov.w	r0,@(2,r4)	)
+
+	add	#32,r5
+	bf/s	2b
+	 add	#34,r4
+#endif
+	tst	r2,r2
+	bt	.L_cleanup
+
+1:	! Read longword, write two words per iteration
+EX(	mov.l	@r5+,r0		)
+	dt	r2
+#ifdef __LITTLE_ENDIAN__
+EX(	mov.w	r0,@r4		)
+	shlr16	r0
+EX(	mov.w 	r0,@(2,r4)	)
+#else
+EX(	mov.w	r0,@(2,r4)	)
+	shlr16	r0
+EX(	mov.w	r0,@r4		)
+#endif
+	bf/s	1b
+	 add	#4,r4
+
+	bra	.L_cleanup
+	 nop
+
+! Destination = 01 or 11
+
+.L_dest01:
+.L_dest11:
+	! Read longword, write byte, word, byte per iteration
+EX(	mov.l	@r5+,r0		)
+	dt	r2
+#ifdef __LITTLE_ENDIAN__
+EX(	mov.b	r0,@r4		)
+	shlr8	r0
+	add	#1,r4
+EX(	mov.w	r0,@r4		)
+	shlr16	r0
+EX(	mov.b	r0,@(2,r4)	)
+	bf/s	.L_dest01
+	 add	#3,r4
+#else
+EX(	mov.b	r0,@(3,r4)	)
+	shlr8	r0
+	swap.w	r0,r7
+EX(	mov.b	r7,@r4		)
+	add	#1,r4
+EX(	mov.w	r0,@r4		)
+	bf/s	.L_dest01
+	 add	#3,r4
+#endif
+
+! Cleanup last few bytes
+.L_cleanup:
+	mov	r6,r0
+	and	#3,r0
+	tst	r0,r0
+	bt	.L_exit
+	mov	r0,r6
+
+.L_cleanup_loop:
+EX(	mov.b	@r5+,r0		)
+	dt	r6
+EX(	mov.b	r0,@r4		)
+	bf/s	.L_cleanup_loop
+	 add	#1,r4
+
+.L_exit:
+	mov	#0,r0		! normal return
+5000:
+
+# Exception handler:
+.section .fixup, "ax"
+6000:
+	mov.l	8000f,r1
+	mov	r3,r0
+	jmp	@r1
+	 sub	r4,r0
+	.align	2
+8000:	.long	5000b
+
+.previous
+	mov.l	@r15+,r8
+	mov.l	@r15+,r9
+	rts
+	 mov.l	@r15+,r10
diff --git a/arch/sh/mm/extable.c b/arch/sh/mm/extable.c
new file mode 100644
index 0000000..505ede7
--- /dev/null
+++ b/arch/sh/mm/extable.c
@@ -0,0 +1,22 @@
+/*
+ * linux/arch/sh/mm/extable.c
+ *  Taken from:
+ *   linux/arch/i386/mm/extable.c
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+
+int fixup_exception(struct pt_regs *regs)
+{
+	const struct exception_table_entry *fixup;
+
+	fixup = search_exception_tables(regs->pc);
+	if (fixup) {
+		regs->pc = fixup->fixup;
+		return 1;
+	}
+
+	return 0;
+}
diff --git a/arch/sh/mm/fault-nommu.c b/arch/sh/mm/fault-nommu.c
new file mode 100644
index 0000000..34d4e0c
--- /dev/null
+++ b/arch/sh/mm/fault-nommu.c
@@ -0,0 +1,82 @@
+/* 
+ * arch/sh/mm/fault-nommu.c
+ *
+ * Copyright (C) 2002 Paul Mundt
+ *
+ * Based on linux/arch/sh/mm/fault.c:
+ *  Copyright (C) 1999  Niibe Yutaka
+ *
+ * Released under the terms of the GNU GPL v2.0.
+ */
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+
+#if defined(CONFIG_SH_KGDB)
+#include <asm/kgdb.h>
+#endif
+
+extern void die(const char *,struct pt_regs *,long);
+
+/*
+ * This routine handles page faults.  It determines the address,
+ * and the problem, and then passes it off to one of the appropriate
+ * routines.
+ */
+asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
+			      unsigned long address)
+{
+#if defined(CONFIG_SH_KGDB)
+	if (kgdb_nofault && kgdb_bus_err_hook)
+		kgdb_bus_err_hook();
+#endif
+
+	/*
+	 * Oops. The kernel tried to access some bad page. We'll have to
+	 * terminate things with extreme prejudice.
+	 *
+	 */
+	if (address < PAGE_SIZE) {
+		printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
+	} else {
+		printk(KERN_ALERT "Unable to handle kernel paging request");
+	}
+
+	printk(" at virtual address %08lx\n", address);
+	printk(KERN_ALERT "pc = %08lx\n", regs->pc);
+
+	die("Oops", regs, writeaccess);
+	do_exit(SIGKILL);
+}
+
+asmlinkage int __do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
+			       unsigned long address)
+{
+#if defined(CONFIG_SH_KGDB)
+	if (kgdb_nofault && kgdb_bus_err_hook)
+		kgdb_bus_err_hook();
+#endif
+
+	if (address >= TASK_SIZE)
+		return 1;
+
+	return 0;
+}
+
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
new file mode 100644
index 0000000..7abba21
--- /dev/null
+++ b/arch/sh/mm/fault.c
@@ -0,0 +1,374 @@
+/* $Id: fault.c,v 1.14 2004/01/13 05:52:11 kkojima Exp $
+ *
+ *  linux/arch/sh/mm/fault.c
+ *  Copyright (C) 1999  Niibe Yutaka
+ *  Copyright (C) 2003  Paul Mundt
+ *
+ *  Based on linux/arch/i386/mm/fault.c:
+ *   Copyright (C) 1995  Linus Torvalds
+ */
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+#include <asm/kgdb.h>
+
+extern void die(const char *,struct pt_regs *,long);
+
+/*
+ * This routine handles page faults.  It determines the address,
+ * and the problem, and then passes it off to one of the appropriate
+ * routines.
+ */
+asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
+			      unsigned long address)
+{
+	struct task_struct *tsk;
+	struct mm_struct *mm;
+	struct vm_area_struct * vma;
+	unsigned long page;
+
+#ifdef CONFIG_SH_KGDB
+	if (kgdb_nofault && kgdb_bus_err_hook)
+		kgdb_bus_err_hook();
+#endif
+
+	tsk = current;
+	mm = tsk->mm;
+
+	/*
+	 * If we're in an interrupt or have no user
+	 * context, we must not take the fault..
+	 */
+	if (in_atomic() || !mm)
+		goto no_context;
+
+	down_read(&mm->mmap_sem);
+
+	vma = find_vma(mm, address);
+	if (!vma)
+		goto bad_area;
+	if (vma->vm_start <= address)
+		goto good_area;
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		goto bad_area;
+	if (expand_stack(vma, address))
+		goto bad_area;
+/*
+ * Ok, we have a good vm_area for this memory access, so
+ * we can handle it..
+ */
+good_area:
+	if (writeaccess) {
+		if (!(vma->vm_flags & VM_WRITE))
+			goto bad_area;
+	} else {
+		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+			goto bad_area;
+	}
+
+	/*
+	 * If for any reason at all we couldn't handle the fault,
+	 * make sure we exit gracefully rather than endlessly redo
+	 * the fault.
+	 */
+survive:
+	switch (handle_mm_fault(mm, vma, address, writeaccess)) {
+		case VM_FAULT_MINOR:
+			tsk->min_flt++;
+			break;
+		case VM_FAULT_MAJOR:
+			tsk->maj_flt++;
+			break;
+		case VM_FAULT_SIGBUS:
+			goto do_sigbus;
+		case VM_FAULT_OOM:
+			goto out_of_memory;
+		default:
+			BUG();
+	}
+
+	up_read(&mm->mmap_sem);
+	return;
+
+/*
+ * Something tried to access memory that isn't in our memory map..
+ * Fix it, but check if it's kernel or user first..
+ */
+bad_area:
+	up_read(&mm->mmap_sem);
+
+	if (user_mode(regs)) {
+		tsk->thread.address = address;
+		tsk->thread.error_code = writeaccess;
+		force_sig(SIGSEGV, tsk);
+		return;
+	}
+
+no_context:
+	/* Are we prepared to handle this kernel fault?  */
+	if (fixup_exception(regs))
+		return;
+
+/*
+ * Oops. The kernel tried to access some bad page. We'll have to
+ * terminate things with extreme prejudice.
+ *
+ */
+	if (address < PAGE_SIZE)
+		printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
+	else
+		printk(KERN_ALERT "Unable to handle kernel paging request");
+	printk(" at virtual address %08lx\n", address);
+	printk(KERN_ALERT "pc = %08lx\n", regs->pc);
+	asm volatile("mov.l	%1, %0"
+		     : "=r" (page)
+		     : "m" (__m(MMU_TTB)));
+	if (page) {
+		page = ((unsigned long *) page)[address >> 22];
+		printk(KERN_ALERT "*pde = %08lx\n", page);
+		if (page & _PAGE_PRESENT) {
+			page &= PAGE_MASK;
+			address &= 0x003ff000;
+			page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT];
+			printk(KERN_ALERT "*pte = %08lx\n", page);
+		}
+	}
+	die("Oops", regs, writeaccess);
+	do_exit(SIGKILL);
+
+/*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+out_of_memory:
+	up_read(&mm->mmap_sem);
+	if (current->pid == 1) {
+		yield();
+		down_read(&mm->mmap_sem);
+		goto survive;
+	}
+	printk("VM: killing process %s\n", tsk->comm);
+	if (user_mode(regs))
+		do_exit(SIGKILL);
+	goto no_context;
+
+do_sigbus:
+	up_read(&mm->mmap_sem);
+
+	/*
+	 * Send a sigbus, regardless of whether we were in kernel
+	 * or user mode.
+	 */
+	tsk->thread.address = address;
+	tsk->thread.error_code = writeaccess;
+	tsk->thread.trap_no = 14;
+	force_sig(SIGBUS, tsk);
+
+	/* Kernel mode? Handle exceptions or die */
+	if (!user_mode(regs))
+		goto no_context;
+}
+
+/*
+ * Called with interrupt disabled.
+ */
+asmlinkage int __do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
+			       unsigned long address)
+{
+	unsigned long addrmax = P4SEG;
+	pgd_t *dir;
+	pmd_t *pmd;
+	pte_t *pte;
+	pte_t entry;
+
+#ifdef CONFIG_SH_KGDB
+	if (kgdb_nofault && kgdb_bus_err_hook)
+		kgdb_bus_err_hook();
+#endif
+
+#ifdef CONFIG_SH_STORE_QUEUES
+	addrmax = P4SEG_STORE_QUE + 0x04000000;
+#endif
+
+	if (address >= P3SEG && address < addrmax)
+		dir = pgd_offset_k(address);
+	else if (address >= TASK_SIZE)
+		return 1;
+	else if (!current->mm)
+		return 1;
+	else
+		dir = pgd_offset(current->mm, address);
+
+	pmd = pmd_offset(dir, address);
+	if (pmd_none(*pmd))
+		return 1;
+	if (pmd_bad(*pmd)) {
+		pmd_ERROR(*pmd);
+		pmd_clear(pmd);
+		return 1;
+	}
+	pte = pte_offset_kernel(pmd, address);
+	entry = *pte;
+	if (pte_none(entry) || pte_not_present(entry)
+	    || (writeaccess && !pte_write(entry)))
+		return 1;
+
+	if (writeaccess)
+		entry = pte_mkdirty(entry);
+	entry = pte_mkyoung(entry);
+
+#ifdef CONFIG_CPU_SH4
+	/*
+	 * ITLB is not affected by "ldtlb" instruction.
+	 * So, we need to flush the entry by ourselves.
+	 */
+
+	{
+		unsigned long flags;
+		local_irq_save(flags);
+		__flush_tlb_page(get_asid(), address&PAGE_MASK);
+		local_irq_restore(flags);
+	}
+#endif
+
+	set_pte(pte, entry);
+	update_mmu_cache(NULL, address, entry);
+
+	return 0;
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
+{
+	if (vma->vm_mm && vma->vm_mm->context != NO_CONTEXT) {
+		unsigned long flags;
+		unsigned long asid;
+		unsigned long saved_asid = MMU_NO_ASID;
+
+		asid = vma->vm_mm->context & MMU_CONTEXT_ASID_MASK;
+		page &= PAGE_MASK;
+
+		local_irq_save(flags);
+		if (vma->vm_mm != current->mm) {
+			saved_asid = get_asid();
+			set_asid(asid);
+		}
+		__flush_tlb_page(asid, page);
+		if (saved_asid != MMU_NO_ASID)
+			set_asid(saved_asid);
+		local_irq_restore(flags);
+	}
+}
+
+void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+		     unsigned long end)
+{
+	struct mm_struct *mm = vma->vm_mm;
+
+	if (mm->context != NO_CONTEXT) {
+		unsigned long flags;
+		int size;
+
+		local_irq_save(flags);
+		size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+		if (size > (MMU_NTLB_ENTRIES/4)) { /* Too many TLB to flush */
+			mm->context = NO_CONTEXT;
+			if (mm == current->mm)
+				activate_context(mm);
+		} else {
+			unsigned long asid = mm->context&MMU_CONTEXT_ASID_MASK;
+			unsigned long saved_asid = MMU_NO_ASID;
+
+			start &= PAGE_MASK;
+			end += (PAGE_SIZE - 1);
+			end &= PAGE_MASK;
+			if (mm != current->mm) {
+				saved_asid = get_asid();
+				set_asid(asid);
+			}
+			while (start < end) {
+				__flush_tlb_page(asid, start);
+				start += PAGE_SIZE;
+			}
+			if (saved_asid != MMU_NO_ASID)
+				set_asid(saved_asid);
+		}
+		local_irq_restore(flags);
+	}
+}
+
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	unsigned long flags;
+	int size;
+
+	local_irq_save(flags);
+	size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	if (size > (MMU_NTLB_ENTRIES/4)) { /* Too many TLB to flush */
+		flush_tlb_all();
+	} else {
+		unsigned long asid = init_mm.context&MMU_CONTEXT_ASID_MASK;
+		unsigned long saved_asid = get_asid();
+
+		start &= PAGE_MASK;
+		end += (PAGE_SIZE - 1);
+		end &= PAGE_MASK;
+		set_asid(asid);
+		while (start < end) {
+			__flush_tlb_page(asid, start);
+			start += PAGE_SIZE;
+		}
+		set_asid(saved_asid);
+	}
+	local_irq_restore(flags);
+}
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+	/* Invalidate all TLB of this process. */
+	/* Instead of invalidating each TLB, we get new MMU context. */
+	if (mm->context != NO_CONTEXT) {
+		unsigned long flags;
+
+		local_irq_save(flags);
+		mm->context = NO_CONTEXT;
+		if (mm == current->mm)
+			activate_context(mm);
+		local_irq_restore(flags);
+	}
+}
+
+void flush_tlb_all(void)
+{
+	unsigned long flags, status;
+
+	/*
+	 * Flush all the TLB.
+	 *
+	 * Write to the MMU control register's bit:
+	 * 	TF-bit for SH-3, TI-bit for SH-4.
+	 *      It's same position, bit #2.
+	 */
+	local_irq_save(flags);
+	status = ctrl_inl(MMUCR);
+	status |= 0x04;		
+	ctrl_outl(status, MMUCR);
+	local_irq_restore(flags);
+}
diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c
new file mode 100644
index 0000000..1f897ba
--- /dev/null
+++ b/arch/sh/mm/hugetlbpage.c
@@ -0,0 +1,264 @@
+/*
+ * arch/sh/mm/hugetlbpage.c
+ *
+ * SuperH HugeTLB page support.
+ *
+ * Cloned from sparc64 by Paul Mundt.
+ *
+ * Copyright (C) 2002, 2003 David S. Miller (davem@redhat.com)
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/pagemap.h>
+#include <linux/smp_lock.h>
+#include <linux/slab.h>
+#include <linux/sysctl.h>
+
+#include <asm/mman.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+
+static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte = NULL;
+
+	pgd = pgd_offset(mm, addr);
+	if (pgd) {
+		pmd = pmd_alloc(mm, pgd, addr);
+		if (pmd)
+			pte = pte_alloc_map(mm, pmd, addr);
+	}
+	return pte;
+}
+
+static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte = NULL;
+
+	pgd = pgd_offset(mm, addr);
+	if (pgd) {
+		pmd = pmd_offset(pgd, addr);
+		if (pmd)
+			pte = pte_offset_map(pmd, addr);
+	}
+	return pte;
+}
+
+#define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0)
+
+static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma,
+			 struct page *page, pte_t * page_table, int write_access)
+{
+	unsigned long i;
+	pte_t entry;
+
+	add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE);
+
+	if (write_access)
+		entry = pte_mkwrite(pte_mkdirty(mk_pte(page,
+						       vma->vm_page_prot)));
+	else
+		entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
+	entry = pte_mkyoung(entry);
+	mk_pte_huge(entry);
+
+	for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
+		set_pte(page_table, entry);
+		page_table++;
+
+		pte_val(entry) += PAGE_SIZE;
+	}
+}
+
+/*
+ * This function checks for proper alignment of input addr and len parameters.
+ */
+int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
+{
+	if (len & ~HPAGE_MASK)
+		return -EINVAL;
+	if (addr & ~HPAGE_MASK)
+		return -EINVAL;
+	return 0;
+}
+
+int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
+			    struct vm_area_struct *vma)
+{
+	pte_t *src_pte, *dst_pte, entry;
+	struct page *ptepage;
+	unsigned long addr = vma->vm_start;
+	unsigned long end = vma->vm_end;
+	int i;
+
+	while (addr < end) {
+		dst_pte = huge_pte_alloc(dst, addr);
+		if (!dst_pte)
+			goto nomem;
+		src_pte = huge_pte_offset(src, addr);
+		BUG_ON(!src_pte || pte_none(*src_pte));
+		entry = *src_pte;
+		ptepage = pte_page(entry);
+		get_page(ptepage);
+		for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
+			set_pte(dst_pte, entry);
+			pte_val(entry) += PAGE_SIZE;
+			dst_pte++;
+		}
+		add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
+		addr += HPAGE_SIZE;
+	}
+	return 0;
+
+nomem:
+	return -ENOMEM;
+}
+
+int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
+			struct page **pages, struct vm_area_struct **vmas,
+			unsigned long *position, int *length, int i)
+{
+	unsigned long vaddr = *position;
+	int remainder = *length;
+
+	WARN_ON(!is_vm_hugetlb_page(vma));
+
+	while (vaddr < vma->vm_end && remainder) {
+		if (pages) {
+			pte_t *pte;
+			struct page *page;
+
+			pte = huge_pte_offset(mm, vaddr);
+
+			/* hugetlb should be locked, and hence, prefaulted */
+			BUG_ON(!pte || pte_none(*pte));
+
+			page = pte_page(*pte);
+
+			WARN_ON(!PageCompound(page));
+
+			get_page(page);
+			pages[i] = page;
+		}
+
+		if (vmas)
+			vmas[i] = vma;
+
+		vaddr += PAGE_SIZE;
+		--remainder;
+		++i;
+	}
+
+	*length = remainder;
+	*position = vaddr;
+
+	return i;
+}
+
+struct page *follow_huge_addr(struct mm_struct *mm,
+			      unsigned long address, int write)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+int pmd_huge(pmd_t pmd)
+{
+	return 0;
+}
+
+struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+			     pmd_t *pmd, int write)
+{
+	return NULL;
+}
+
+void unmap_hugepage_range(struct vm_area_struct *vma,
+			  unsigned long start, unsigned long end)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long address;
+	pte_t *pte;
+	struct page *page;
+	int i;
+
+	BUG_ON(start & (HPAGE_SIZE - 1));
+	BUG_ON(end & (HPAGE_SIZE - 1));
+
+	for (address = start; address < end; address += HPAGE_SIZE) {
+		pte = huge_pte_offset(mm, address);
+		BUG_ON(!pte);
+		if (pte_none(*pte))
+			continue;
+		page = pte_page(*pte);
+		put_page(page);
+		for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
+			pte_clear(mm, address+(i*PAGE_SIZE), pte);
+			pte++;
+		}
+	}
+	add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT));
+	flush_tlb_range(vma, start, end);
+}
+
+int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long addr;
+	int ret = 0;
+
+	BUG_ON(vma->vm_start & ~HPAGE_MASK);
+	BUG_ON(vma->vm_end & ~HPAGE_MASK);
+
+	spin_lock(&mm->page_table_lock);
+	for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
+		unsigned long idx;
+		pte_t *pte = huge_pte_alloc(mm, addr);
+		struct page *page;
+
+		if (!pte) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		if (!pte_none(*pte))
+			continue;
+
+		idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
+			+ (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
+		page = find_get_page(mapping, idx);
+		if (!page) {
+			/* charge the fs quota first */
+			if (hugetlb_get_quota(mapping)) {
+				ret = -ENOMEM;
+				goto out;
+			}
+			page = alloc_huge_page();
+			if (!page) {
+				hugetlb_put_quota(mapping);
+				ret = -ENOMEM;
+				goto out;
+			}
+			ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
+			if (! ret) {
+				unlock_page(page);
+			} else {
+				hugetlb_put_quota(mapping);
+				free_huge_page(page);
+				goto out;
+			}
+		}
+		set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
+	}
+out:
+	spin_unlock(&mm->page_table_lock);
+	return ret;
+}
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
new file mode 100644
index 0000000..4e9c854
--- /dev/null
+++ b/arch/sh/mm/init.c
@@ -0,0 +1,313 @@
+/* $Id: init.c,v 1.19 2004/02/21 04:42:16 kkojima Exp $
+ *
+ *  linux/arch/sh/mm/init.c
+ *
+ *  Copyright (C) 1999  Niibe Yutaka
+ *  Copyright (C) 2002, 2004  Paul Mundt
+ *
+ *  Based on linux/arch/i386/mm/init.c:
+ *   Copyright (C) 1995  Linus Torvalds
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <linux/bootmem.h>
+#include <linux/pagemap.h>
+
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
+#include <asm/io.h>
+#include <asm/tlb.h>
+#include <asm/cacheflush.h>
+#include <asm/cache.h>
+
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+pgd_t swapper_pg_dir[PTRS_PER_PGD];
+
+/*
+ * Cache of MMU context last used.
+ */
+unsigned long mmu_context_cache = NO_CONTEXT;
+
+#ifdef CONFIG_MMU
+/* It'd be good if these lines were in the standard header file. */
+#define START_PFN	(NODE_DATA(0)->bdata->node_boot_start >> PAGE_SHIFT)
+#define MAX_LOW_PFN	(NODE_DATA(0)->bdata->node_low_pfn)
+#endif
+
+#ifdef CONFIG_DISCONTIGMEM
+pg_data_t discontig_page_data[MAX_NUMNODES];
+bootmem_data_t discontig_node_bdata[MAX_NUMNODES];
+#endif
+
+void (*copy_page)(void *from, void *to);
+void (*clear_page)(void *to);
+
+void show_mem(void)
+{
+	int i, total = 0, reserved = 0;
+	int shared = 0, cached = 0;
+
+	printk("Mem-info:\n");
+	show_free_areas();
+	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+	i = max_mapnr;
+	while (i-- > 0) {
+		total++;
+		if (PageReserved(mem_map+i))
+			reserved++;
+		else if (PageSwapCache(mem_map+i))
+			cached++;
+		else if (page_count(mem_map+i))
+			shared += page_count(mem_map+i) - 1;
+	}
+	printk("%d pages of RAM\n",total);
+	printk("%d reserved pages\n",reserved);
+	printk("%d pages shared\n",shared);
+	printk("%d pages swap cached\n",cached);
+}
+
+static void set_pte_phys(unsigned long addr, unsigned long phys, pgprot_t prot)
+{
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pgd = swapper_pg_dir + pgd_index(addr);
+	if (pgd_none(*pgd)) {
+		pgd_ERROR(*pgd);
+		return;
+	}
+
+	pmd = pmd_offset(pgd, addr);
+	if (pmd_none(*pmd)) {
+		pte = (pte_t *)get_zeroed_page(GFP_ATOMIC);
+		set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
+		if (pte != pte_offset_kernel(pmd, 0)) {
+			pmd_ERROR(*pmd);
+			return;
+		}
+	}
+
+	pte = pte_offset_kernel(pmd, addr);
+	if (!pte_none(*pte)) {
+		pte_ERROR(*pte);
+		return;
+	}
+
+	set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, prot));
+
+	__flush_tlb_page(get_asid(), addr);
+}
+
+/*
+ * As a performance optimization, other platforms preserve the fixmap mapping
+ * across a context switch, we don't presently do this, but this could be done
+ * in a similar fashion as to the wired TLB interface that sh64 uses (by way
+ * of the memorry mapped UTLB configuration) -- this unfortunately forces us to
+ * give up a TLB entry for each mapping we want to preserve. While this may be
+ * viable for a small number of fixmaps, it's not particularly useful for
+ * everything and needs to be carefully evaluated. (ie, we may want this for
+ * the vsyscall page).
+ *
+ * XXX: Perhaps add a _PAGE_WIRED flag or something similar that we can pass
+ * in at __set_fixmap() time to determine the appropriate behavior to follow.
+ *
+ *					 -- PFM.
+ */
+void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
+{
+	unsigned long address = __fix_to_virt(idx);
+
+	if (idx >= __end_of_fixed_addresses) {
+		BUG();
+		return;
+	}
+
+	set_pte_phys(address, phys, prot);
+}
+
+/* References to section boundaries */
+
+extern char _text, _etext, _edata, __bss_start, _end;
+extern char __init_begin, __init_end;
+
+/*
+ * paging_init() sets up the page tables
+ *
+ * This routines also unmaps the page at virtual kernel address 0, so
+ * that we can trap those pesky NULL-reference errors in the kernel.
+ */
+void __init paging_init(void)
+{
+	unsigned long zones_size[MAX_NR_ZONES] = { 0, };
+
+	/*
+	 * Setup some defaults for the zone sizes.. these should be safe
+	 * regardless of distcontiguous memory or MMU settings.
+	 */
+	zones_size[ZONE_DMA] = 0 >> PAGE_SHIFT;
+	zones_size[ZONE_NORMAL] = __MEMORY_SIZE >> PAGE_SHIFT;
+#ifdef CONFIG_HIGHMEM
+	zones_size[ZONE_HIGHMEM] = 0 >> PAGE_SHIFT;
+#endif
+
+#ifdef CONFIG_MMU
+	/*
+	 * If we have an MMU, and want to be using it .. we need to adjust
+	 * the zone sizes accordingly, in addition to turning it on.
+	 */
+	{
+		unsigned long max_dma, low, start_pfn;
+		pgd_t *pg_dir;
+		int i;
+
+		/* We don't need kernel mapping as hardware support that. */
+		pg_dir = swapper_pg_dir;
+
+		for (i = 0; i < PTRS_PER_PGD; i++)
+			pgd_val(pg_dir[i]) = 0;
+
+		/* Turn on the MMU */
+		enable_mmu();
+
+		/* Fixup the zone sizes */
+		start_pfn = START_PFN;
+		max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+		low = MAX_LOW_PFN;
+
+		if (low < max_dma) {
+			zones_size[ZONE_DMA] = low - start_pfn;
+			zones_size[ZONE_NORMAL] = 0;
+		} else {
+			zones_size[ZONE_DMA] = max_dma - start_pfn;
+			zones_size[ZONE_NORMAL] = low - max_dma;
+		}
+	}
+
+#elif defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4)
+	/*
+	 * If we don't have CONFIG_MMU set and the processor in question
+	 * still has an MMU, care needs to be taken to make sure it doesn't
+	 * stay on.. Since the boot loader could have potentially already
+	 * turned it on, and we clearly don't want it, we simply turn it off.
+	 *
+	 * We don't need to do anything special for the zone sizes, since the
+	 * default values that were already configured up above should be
+	 * satisfactory.
+	 */
+	disable_mmu();
+#endif
+	NODE_DATA(0)->node_mem_map = NULL;
+	free_area_init_node(0, NODE_DATA(0), zones_size, __MEMORY_START >> PAGE_SHIFT, 0);
+
+#ifdef CONFIG_DISCONTIGMEM
+	/*
+	 * And for discontig, do some more fixups on the zone sizes..
+	 */
+	zones_size[ZONE_DMA] = __MEMORY_SIZE_2ND >> PAGE_SHIFT;
+	zones_size[ZONE_NORMAL] = 0;
+	free_area_init_node(1, NODE_DATA(1), zones_size, __MEMORY_START_2ND >> PAGE_SHIFT, 0);
+#endif
+}
+
+void __init mem_init(void)
+{
+	extern unsigned long empty_zero_page[1024];
+	int codesize, reservedpages, datasize, initsize;
+	int tmp;
+	extern unsigned long memory_start;
+
+#ifdef CONFIG_MMU
+	high_memory = (void *)__va(MAX_LOW_PFN * PAGE_SIZE);
+#else
+	extern unsigned long memory_end;
+
+	high_memory = (void *)(memory_end & PAGE_MASK);
+#endif
+
+	max_mapnr = num_physpages = MAP_NR(high_memory) - MAP_NR(memory_start);
+
+	/* clear the zero-page */
+	memset(empty_zero_page, 0, PAGE_SIZE);
+	__flush_wback_region(empty_zero_page, PAGE_SIZE);
+
+	/* 
+	 * Setup wrappers for copy/clear_page(), these will get overridden
+	 * later in the boot process if a better method is available.
+	 */
+	copy_page = copy_page_slow;
+	clear_page = clear_page_slow;
+
+	/* this will put all low memory onto the freelists */
+	totalram_pages += free_all_bootmem_node(NODE_DATA(0));
+#ifdef CONFIG_DISCONTIGMEM
+	totalram_pages += free_all_bootmem_node(NODE_DATA(1));
+#endif
+	reservedpages = 0;
+	for (tmp = 0; tmp < num_physpages; tmp++)
+		/*
+		 * Only count reserved RAM pages
+		 */
+		if (PageReserved(mem_map+tmp))
+			reservedpages++;
+
+	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
+	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
+	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
+
+	printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n",
+		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+		max_mapnr << (PAGE_SHIFT-10),
+		codesize >> 10,
+		reservedpages << (PAGE_SHIFT-10),
+		datasize >> 10,
+		initsize >> 10);
+
+	p3_cache_init();
+}
+
+void free_initmem(void)
+{
+	unsigned long addr;
+	
+	addr = (unsigned long)(&__init_begin);
+	for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(addr));
+		set_page_count(virt_to_page(addr), 1);
+		free_page(addr);
+		totalram_pages++;
+	}
+	printk ("Freeing unused kernel memory: %dk freed\n", (&__init_end - &__init_begin) >> 10);
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+	unsigned long p;
+	for (p = start; p < end; p += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(p));
+		set_page_count(virt_to_page(p), 1);
+		free_page(p);
+		totalram_pages++;
+	}
+	printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
+}
+#endif
+
diff --git a/arch/sh/mm/ioremap.c b/arch/sh/mm/ioremap.c
new file mode 100644
index 0000000..9f490c2
--- /dev/null
+++ b/arch/sh/mm/ioremap.c
@@ -0,0 +1,163 @@
+/*
+ * arch/sh/mm/ioremap.c
+ *
+ * Re-map IO memory to kernel address space so that we can access it.
+ * This is needed for high PCI addresses that aren't mapped in the
+ * 640k-1MB IO memory area on PC's
+ *
+ * (C) Copyright 1995 1996 Linus Torvalds
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+
+static inline void remap_area_pte(pte_t * pte, unsigned long address,
+	unsigned long size, unsigned long phys_addr, unsigned long flags)
+{
+	unsigned long end;
+	unsigned long pfn;
+	pgprot_t pgprot = __pgprot(_PAGE_PRESENT | _PAGE_RW |
+				   _PAGE_DIRTY | _PAGE_ACCESSED |
+				   _PAGE_HW_SHARED | _PAGE_FLAGS_HARD | flags);
+
+	address &= ~PMD_MASK;
+	end = address + size;
+	if (end > PMD_SIZE)
+		end = PMD_SIZE;
+	if (address >= end)
+		BUG();
+	pfn = phys_addr >> PAGE_SHIFT;
+	do {
+		if (!pte_none(*pte)) {
+			printk("remap_area_pte: page already exists\n");
+			BUG();
+		}
+		set_pte(pte, pfn_pte(pfn, pgprot));
+		address += PAGE_SIZE;
+		pfn++;
+		pte++;
+	} while (address && (address < end));
+}
+
+static inline int remap_area_pmd(pmd_t * pmd, unsigned long address,
+	unsigned long size, unsigned long phys_addr, unsigned long flags)
+{
+	unsigned long end;
+
+	address &= ~PGDIR_MASK;
+	end = address + size;
+	if (end > PGDIR_SIZE)
+		end = PGDIR_SIZE;
+	phys_addr -= address;
+	if (address >= end)
+		BUG();
+	do {
+		pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+		if (!pte)
+			return -ENOMEM;
+		remap_area_pte(pte, address, end - address, address + phys_addr, flags);
+		address = (address + PMD_SIZE) & PMD_MASK;
+		pmd++;
+	} while (address && (address < end));
+	return 0;
+}
+
+int remap_area_pages(unsigned long address, unsigned long phys_addr,
+		     unsigned long size, unsigned long flags)
+{
+	int error;
+	pgd_t * dir;
+	unsigned long end = address + size;
+
+	phys_addr -= address;
+	dir = pgd_offset_k(address);
+	flush_cache_all();
+	if (address >= end)
+		BUG();
+	spin_lock(&init_mm.page_table_lock);
+	do {
+		pmd_t *pmd;
+		pmd = pmd_alloc(&init_mm, dir, address);
+		error = -ENOMEM;
+		if (!pmd)
+			break;
+		if (remap_area_pmd(pmd, address, end - address,
+					phys_addr + address, flags))
+			break;
+		error = 0;
+		address = (address + PGDIR_SIZE) & PGDIR_MASK;
+		dir++;
+	} while (address && (address < end));
+	spin_unlock(&init_mm.page_table_lock);
+	flush_tlb_all();
+	return error;
+}
+
+/*
+ * Generic mapping function (not visible outside):
+ */
+
+/*
+ * Remap an arbitrary physical address space into the kernel virtual
+ * address space. Needed when the kernel wants to access high addresses
+ * directly.
+ *
+ * NOTE! We need to allow non-page-aligned mappings too: we will obviously
+ * have to convert them into an offset in a page-aligned mapping, but the
+ * caller shouldn't need to know that small detail.
+ */
+void * p3_ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags)
+{
+	void * addr;
+	struct vm_struct * area;
+	unsigned long offset, last_addr;
+
+	/* Don't allow wraparound or zero size */
+	last_addr = phys_addr + size - 1;
+	if (!size || last_addr < phys_addr)
+		return NULL;
+
+	/*
+	 * Don't remap the low PCI/ISA area, it's always mapped..
+	 */
+	if (phys_addr >= 0xA0000 && last_addr < 0x100000)
+		return phys_to_virt(phys_addr);
+
+	/*
+	 * Don't allow anybody to remap normal RAM that we're using..
+	 */
+	if (phys_addr < virt_to_phys(high_memory))
+		return NULL;
+
+	/*
+	 * Mappings have to be page-aligned
+	 */
+	offset = phys_addr & ~PAGE_MASK;
+	phys_addr &= PAGE_MASK;
+	size = PAGE_ALIGN(last_addr+1) - phys_addr;
+
+	/*
+	 * Ok, go for it..
+	 */
+	area = get_vm_area(size, VM_IOREMAP);
+	if (!area)
+		return NULL;
+	area->phys_addr = phys_addr;
+	addr = area->addr;
+	if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) {
+		vunmap(addr);
+		return NULL;
+	}
+	return (void *) (offset + (char *)addr);
+}
+
+void p3_iounmap(void *addr)
+{
+	if (addr > high_memory)
+		vfree((void *)(PAGE_MASK & (unsigned long)addr));
+}
diff --git a/arch/sh/mm/pg-dma.c b/arch/sh/mm/pg-dma.c
new file mode 100644
index 0000000..1406d2e
--- /dev/null
+++ b/arch/sh/mm/pg-dma.c
@@ -0,0 +1,97 @@
+/*
+ * arch/sh/mm/pg-dma.c
+ *
+ * Fast clear_page()/copy_page() implementation using the SH DMAC
+ *
+ * Copyright (C) 2003  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <asm/semaphore.h>
+#include <asm/mmu_context.h>
+#include <asm/addrspace.h>
+#include <asm/atomic.h>
+#include <asm/page.h>
+#include <asm/dma.h>
+#include <asm/io.h>
+
+/* Channel to use for page ops, must be dual-address mode capable. */
+static int dma_channel = CONFIG_DMA_PAGE_OPS_CHANNEL;
+
+static void copy_page_dma(void *to, void *from)
+{
+	/* 
+	 * This doesn't seem to get triggered until further along in the
+	 * boot process, at which point the DMAC is already initialized.
+	 * Fix this in the same fashion as clear_page_dma() in the event
+	 * that this crashes due to the DMAC not being initialized.
+	 */
+
+	flush_icache_range((unsigned long)from, PAGE_SIZE);
+	dma_write_page(dma_channel, (unsigned long)from, (unsigned long)to);
+	dma_wait_for_completion(dma_channel);
+}
+
+static void clear_page_dma(void *to)
+{
+	extern unsigned long empty_zero_page[1024];
+
+	/*
+	 * We get invoked quite early on, if the DMAC hasn't been initialized
+	 * yet, fall back on the slow manual implementation.
+	 */
+	if (dma_info[dma_channel].chan != dma_channel) {
+		clear_page_slow(to);
+		return;
+	}
+
+	dma_write_page(dma_channel, (unsigned long)empty_zero_page,
+				    (unsigned long)to);
+
+	/*
+	 * FIXME: Something is a bit racy here, if we poll the counter right
+	 * away, we seem to lock. flushing the page from the dcache doesn't
+	 * seem to make a difference one way or the other, though either a full
+	 * icache or dcache flush does.
+	 *
+	 * The location of this is important as well, and must happen prior to
+	 * the completion loop but after the transfer was initiated.
+	 *
+	 * Oddly enough, this doesn't appear to be an issue for copy_page()..
+	 */
+	flush_icache_range((unsigned long)to, PAGE_SIZE);
+
+	dma_wait_for_completion(dma_channel);
+}
+
+static int __init pg_dma_init(void)
+{
+	int ret;
+	
+	ret = request_dma(dma_channel, "page ops");
+	if (ret != 0)
+		return ret;
+
+	copy_page = copy_page_dma;
+	clear_page = clear_page_dma;
+
+	return ret;
+}
+
+static void __exit pg_dma_exit(void)
+{
+	free_dma(dma_channel);
+}
+
+module_init(pg_dma_init);
+module_exit(pg_dma_exit);
+
+MODULE_AUTHOR("Paul Mundt <lethal@linux-sh.org>");
+MODULE_DESCRIPTION("Optimized page copy/clear routines using a dual-address mode capable DMAC channel");
+MODULE_LICENSE("GPL");
+
diff --git a/arch/sh/mm/pg-nommu.c b/arch/sh/mm/pg-nommu.c
new file mode 100644
index 0000000..8f9165a
--- /dev/null
+++ b/arch/sh/mm/pg-nommu.c
@@ -0,0 +1,36 @@
+/*
+ * arch/sh/mm/pg-nommu.c
+ *
+ * clear_page()/copy_page() implementation for MMUless SH.
+ *
+ * Copyright (C) 2003  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <asm/page.h>
+
+static void copy_page_nommu(void *to, void *from)
+{
+	memcpy(to, from, PAGE_SIZE);
+}
+
+static void clear_page_nommu(void *to)
+{
+	memset(to, 0, PAGE_SIZE);
+}
+
+static int __init pg_nommu_init(void)
+{
+	copy_page = copy_page_nommu;
+	clear_page = clear_page_nommu;
+
+	return 0;
+}
+
+subsys_initcall(pg_nommu_init);
+
diff --git a/arch/sh/mm/pg-sh4.c b/arch/sh/mm/pg-sh4.c
new file mode 100644
index 0000000..e5907c7
--- /dev/null
+++ b/arch/sh/mm/pg-sh4.c
@@ -0,0 +1,122 @@
+/*
+ * arch/sh/mm/pg-sh4.c
+ *
+ * Copyright (C) 1999, 2000, 2002  Niibe Yutaka
+ * Copyright (C) 2002  Paul Mundt
+ *
+ * Released under the terms of the GNU GPL v2.0.
+ */
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/threads.h>
+#include <asm/addrspace.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/cache.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+
+extern struct semaphore p3map_sem[];
+
+/*
+ * clear_user_page
+ * @to: P1 address
+ * @address: U0 address to be mapped
+ * @page: page (virt_to_page(to))
+ */
+void clear_user_page(void *to, unsigned long address, struct page *page)
+{
+	__set_bit(PG_mapped, &page->flags);
+	if (((address ^ (unsigned long)to) & CACHE_ALIAS) == 0)
+		clear_page(to);
+	else {
+		pgprot_t pgprot = __pgprot(_PAGE_PRESENT | 
+					   _PAGE_RW | _PAGE_CACHABLE |
+					   _PAGE_DIRTY | _PAGE_ACCESSED | 
+					   _PAGE_HW_SHARED | _PAGE_FLAGS_HARD);
+		unsigned long phys_addr = PHYSADDR(to);
+		unsigned long p3_addr = P3SEG + (address & CACHE_ALIAS);
+		pgd_t *dir = pgd_offset_k(p3_addr);
+		pmd_t *pmd = pmd_offset(dir, p3_addr);
+		pte_t *pte = pte_offset_kernel(pmd, p3_addr);
+		pte_t entry;
+		unsigned long flags;
+
+		entry = pfn_pte(phys_addr >> PAGE_SHIFT, pgprot);
+		down(&p3map_sem[(address & CACHE_ALIAS)>>12]);
+		set_pte(pte, entry);
+		local_irq_save(flags);
+		__flush_tlb_page(get_asid(), p3_addr);
+		local_irq_restore(flags);
+		update_mmu_cache(NULL, p3_addr, entry);
+		__clear_user_page((void *)p3_addr, to);
+		pte_clear(&init_mm, p3_addr, pte);
+		up(&p3map_sem[(address & CACHE_ALIAS)>>12]);
+	}
+}
+
+/*
+ * copy_user_page
+ * @to: P1 address
+ * @from: P1 address
+ * @address: U0 address to be mapped
+ * @page: page (virt_to_page(to))
+ */
+void copy_user_page(void *to, void *from, unsigned long address, 
+		    struct page *page)
+{
+	__set_bit(PG_mapped, &page->flags);
+	if (((address ^ (unsigned long)to) & CACHE_ALIAS) == 0)
+		copy_page(to, from);
+	else {
+		pgprot_t pgprot = __pgprot(_PAGE_PRESENT | 
+					   _PAGE_RW | _PAGE_CACHABLE |
+					   _PAGE_DIRTY | _PAGE_ACCESSED | 
+					   _PAGE_HW_SHARED | _PAGE_FLAGS_HARD);
+		unsigned long phys_addr = PHYSADDR(to);
+		unsigned long p3_addr = P3SEG + (address & CACHE_ALIAS);
+		pgd_t *dir = pgd_offset_k(p3_addr);
+		pmd_t *pmd = pmd_offset(dir, p3_addr);
+		pte_t *pte = pte_offset_kernel(pmd, p3_addr);
+		pte_t entry;
+		unsigned long flags;
+
+		entry = pfn_pte(phys_addr >> PAGE_SHIFT, pgprot);
+		down(&p3map_sem[(address & CACHE_ALIAS)>>12]);
+		set_pte(pte, entry);
+		local_irq_save(flags);
+		__flush_tlb_page(get_asid(), p3_addr);
+		local_irq_restore(flags);
+		update_mmu_cache(NULL, p3_addr, entry);
+		__copy_user_page((void *)p3_addr, from, to);
+		pte_clear(&init_mm, p3_addr, pte);
+		up(&p3map_sem[(address & CACHE_ALIAS)>>12]);
+	}
+}
+
+/*
+ * For SH-4, we have our own implementation for ptep_get_and_clear
+ */
+inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+	pte_t pte = *ptep;
+
+	pte_clear(mm, addr, ptep);
+	if (!pte_not_present(pte)) {
+		unsigned long pfn = pte_pfn(pte);
+		if (pfn_valid(pfn)) {
+			struct page *page = pfn_to_page(pfn);
+			struct address_space *mapping = page_mapping(page);
+			if (!mapping || !mapping_writably_mapped(mapping))
+				__clear_bit(PG_mapped, &page->flags);
+		}
+	}
+	return pte;
+}
+
diff --git a/arch/sh/mm/pg-sh7705.c b/arch/sh/mm/pg-sh7705.c
new file mode 100644
index 0000000..ff9ece9
--- /dev/null
+++ b/arch/sh/mm/pg-sh7705.c
@@ -0,0 +1,137 @@
+/*
+ * arch/sh/mm/pg-sh7705.c
+ *
+ * Copyright (C) 1999, 2000  Niibe Yutaka
+ * Copyright (C) 2004  Alex Song
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/threads.h>
+#include <asm/addrspace.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/cache.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+
+static inline void __flush_purge_virtual_region(void *p1, void *virt, int size)
+{
+	unsigned long v;
+	unsigned long begin, end;
+	unsigned long p1_begin;
+
+
+	begin = L1_CACHE_ALIGN((unsigned long)virt);
+	end = L1_CACHE_ALIGN((unsigned long)virt + size);
+
+	p1_begin = (unsigned long)p1 & ~(L1_CACHE_BYTES - 1);
+
+	/* do this the slow way as we may not have TLB entries
+	 * for virt yet. */
+	for (v = begin; v < end; v += L1_CACHE_BYTES) {
+		unsigned long p;
+	        unsigned long ways, addr;
+
+		p = __pa(p1_begin);
+
+	        ways = cpu_data->dcache.ways;
+		addr = CACHE_OC_ADDRESS_ARRAY;
+
+		do {
+			unsigned long data;
+
+			addr |= (v & cpu_data->dcache.entry_mask);
+
+			data = ctrl_inl(addr);
+			if ((data & CACHE_PHYSADDR_MASK) ==
+			       (p & CACHE_PHYSADDR_MASK)) {
+				data &= ~(SH_CACHE_UPDATED|SH_CACHE_VALID);
+				ctrl_outl(data, addr);
+			}
+
+			addr += cpu_data->dcache.way_incr;
+		} while (--ways);
+
+		p1_begin += L1_CACHE_BYTES;
+	}
+}
+
+/*
+ * clear_user_page
+ * @to: P1 address
+ * @address: U0 address to be mapped
+ */
+void clear_user_page(void *to, unsigned long address, struct page *pg)
+{
+	struct page *page = virt_to_page(to);
+
+	__set_bit(PG_mapped, &page->flags);
+	if (((address ^ (unsigned long)to) & CACHE_ALIAS) == 0) {
+		clear_page(to);
+		__flush_wback_region(to, PAGE_SIZE);
+	} else {
+		__flush_purge_virtual_region(to,
+					     (void *)(address & 0xfffff000),
+					     PAGE_SIZE);
+		clear_page(to);
+		__flush_wback_region(to, PAGE_SIZE);
+	}
+}
+
+/*
+ * copy_user_page
+ * @to: P1 address
+ * @from: P1 address
+ * @address: U0 address to be mapped
+ */
+void copy_user_page(void *to, void *from, unsigned long address, struct page *pg)
+{
+	struct page *page = virt_to_page(to);
+
+
+	__set_bit(PG_mapped, &page->flags);
+	if (((address ^ (unsigned long)to) & CACHE_ALIAS) == 0) {
+		copy_page(to, from);
+		__flush_wback_region(to, PAGE_SIZE);
+	} else {
+		__flush_purge_virtual_region(to,
+					     (void *)(address & 0xfffff000),
+					     PAGE_SIZE);
+		copy_page(to, from);
+		__flush_wback_region(to, PAGE_SIZE);
+	}
+}
+
+/*
+ * For SH7705, we have our own implementation for ptep_get_and_clear
+ * Copied from pg-sh4.c
+ */
+inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+	pte_t pte = *ptep;
+
+	pte_clear(mm, addr, ptep);
+	if (!pte_not_present(pte)) {
+		unsigned long pfn = pte_pfn(pte);
+		if (pfn_valid(pfn)) {
+			struct page *page = pfn_to_page(pfn);
+			struct address_space *mapping = page_mapping(page);
+			if (!mapping || !mapping_writably_mapped(mapping))
+				__clear_bit(PG_mapped, &page->flags);
+		}
+	}
+
+	return pte;
+}
+
diff --git a/arch/sh/mm/tlb-nommu.c b/arch/sh/mm/tlb-nommu.c
new file mode 100644
index 0000000..e55cfea
--- /dev/null
+++ b/arch/sh/mm/tlb-nommu.c
@@ -0,0 +1,58 @@
+/*
+ * arch/sh/mm/tlb-nommu.c
+ *
+ * TLB Operations for MMUless SH.
+ *
+ * Copyright (C) 2002 Paul Mundt
+ *
+ * Released under the terms of the GNU GPL v2.0.
+ */
+#include <linux/kernel.h>
+#include <linux/mm.h>
+
+/*
+ * Nothing too terribly exciting here ..
+ */
+
+void flush_tlb(void)
+{
+	BUG();
+}
+
+void flush_tlb_all(void)
+{
+	BUG();
+}
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+	BUG();
+}
+
+void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+			    unsigned long end)
+{
+	BUG();
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
+{
+	BUG();
+}
+
+void __flush_tlb_page(unsigned long asid, unsigned long page)
+{
+	BUG();
+}
+
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	BUG();
+}
+
+void update_mmu_cache(struct vm_area_struct * vma,
+		      unsigned long address, pte_t pte)
+{
+	BUG();
+}
+
diff --git a/arch/sh/mm/tlb-sh3.c b/arch/sh/mm/tlb-sh3.c
new file mode 100644
index 0000000..7a0d5c1
--- /dev/null
+++ b/arch/sh/mm/tlb-sh3.c
@@ -0,0 +1,92 @@
+/*
+ * arch/sh/mm/tlb-sh3.c
+ *
+ * SH-3 specific TLB operations
+ *
+ * Copyright (C) 1999  Niibe Yutaka
+ * Copyright (C) 2002  Paul Mundt
+ *
+ * Released under the terms of the GNU GPL v2.0.
+ */
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+
+void update_mmu_cache(struct vm_area_struct * vma,
+		      unsigned long address, pte_t pte)
+{
+	unsigned long flags;
+	unsigned long pteval;
+	unsigned long vpn;
+
+	/* Ptrace may call this routine. */
+	if (vma && current->active_mm != vma->vm_mm)
+		return;
+
+#if defined(CONFIG_SH7705_CACHE_32KB)
+	struct page *page;
+	page = pte_page(pte);
+	if (VALID_PAGE(page) && !test_bit(PG_mapped, &page->flags)) {
+		unsigned long phys = pte_val(pte) & PTE_PHYS_MASK;
+		__flush_wback_region((void *)P1SEGADDR(phys), PAGE_SIZE);
+		__set_bit(PG_mapped, &page->flags);
+	}
+#endif
+
+	local_irq_save(flags);
+
+	/* Set PTEH register */
+	vpn = (address & MMU_VPN_MASK) | get_asid();
+	ctrl_outl(vpn, MMU_PTEH);
+
+	pteval = pte_val(pte);
+
+	/* Set PTEL register */
+	pteval &= _PAGE_FLAGS_HARDWARE_MASK; /* drop software flags */
+	/* conveniently, we want all the software flags to be 0 anyway */
+	ctrl_outl(pteval, MMU_PTEL);
+
+	/* Load the TLB */
+	asm volatile("ldtlb": /* no output */ : /* no input */ : "memory");
+	local_irq_restore(flags);
+}
+
+void __flush_tlb_page(unsigned long asid, unsigned long page)
+{
+	unsigned long addr, data;
+	int i, ways = MMU_NTLB_WAYS;
+
+	/*
+	 * NOTE: PTEH.ASID should be set to this MM
+	 *       _AND_ we need to write ASID to the array.
+	 *
+	 * It would be simple if we didn't need to set PTEH.ASID...
+	 */
+	addr = MMU_TLB_ADDRESS_ARRAY | (page & 0x1F000);
+	data = (page & 0xfffe0000) | asid; /* VALID bit is off */
+	
+	if ((cpu_data->flags & CPU_HAS_MMU_PAGE_ASSOC)) {
+		addr |= MMU_PAGE_ASSOC_BIT;
+		ways = 1;	/* we already know the way .. */
+	}
+
+	for (i = 0; i < ways; i++)
+		ctrl_outl(data, addr + (i << 8));
+}
+
diff --git a/arch/sh/mm/tlb-sh4.c b/arch/sh/mm/tlb-sh4.c
new file mode 100644
index 0000000..115b1b6
--- /dev/null
+++ b/arch/sh/mm/tlb-sh4.c
@@ -0,0 +1,96 @@
+/*
+ * arch/sh/mm/tlb-sh4.c
+ *
+ * SH-4 specific TLB operations
+ *
+ * Copyright (C) 1999  Niibe Yutaka
+ * Copyright (C) 2002  Paul Mundt
+ *
+ * Released under the terms of the GNU GPL v2.0.
+ */
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+
+void update_mmu_cache(struct vm_area_struct * vma,
+		      unsigned long address, pte_t pte)
+{
+	unsigned long flags;
+	unsigned long pteval;
+	unsigned long vpn;
+	struct page *page;
+	unsigned long pfn;
+	unsigned long ptea;
+
+	/* Ptrace may call this routine. */
+	if (vma && current->active_mm != vma->vm_mm)
+		return;
+
+	pfn = pte_pfn(pte);
+	if (pfn_valid(pfn)) {
+		page = pfn_to_page(pfn);
+		if (!test_bit(PG_mapped, &page->flags)) {
+			unsigned long phys = pte_val(pte) & PTE_PHYS_MASK;
+			__flush_wback_region((void *)P1SEGADDR(phys), PAGE_SIZE);
+			__set_bit(PG_mapped, &page->flags);
+		}
+	}
+
+	local_irq_save(flags);
+
+	/* Set PTEH register */
+	vpn = (address & MMU_VPN_MASK) | get_asid();
+	ctrl_outl(vpn, MMU_PTEH);
+
+	pteval = pte_val(pte);
+	/* Set PTEA register */
+	/* TODO: make this look less hacky */
+	ptea = ((pteval >> 28) & 0xe) | (pteval & 0x1);
+	ctrl_outl(ptea, MMU_PTEA);
+
+	/* Set PTEL register */
+	pteval &= _PAGE_FLAGS_HARDWARE_MASK; /* drop software flags */
+#ifdef CONFIG_SH_WRITETHROUGH
+	pteval |= _PAGE_WT;
+#endif
+	/* conveniently, we want all the software flags to be 0 anyway */
+	ctrl_outl(pteval, MMU_PTEL);
+
+	/* Load the TLB */
+	asm volatile("ldtlb": /* no output */ : /* no input */ : "memory");
+	local_irq_restore(flags);
+}
+
+void __flush_tlb_page(unsigned long asid, unsigned long page)
+{
+	unsigned long addr, data;
+
+	/*
+	 * NOTE: PTEH.ASID should be set to this MM
+	 *       _AND_ we need to write ASID to the array.
+	 *
+	 * It would be simple if we didn't need to set PTEH.ASID...
+	 */
+	addr = MMU_UTLB_ADDRESS_ARRAY | MMU_PAGE_ASSOC_BIT;
+	data = page | asid; /* VALID bit is off */
+	jump_to_P2();
+	ctrl_outl(data, addr);
+	back_to_P1();
+}
+