drm: add savage driver

Add driver for savage chipsets.

From: Felix Kuehling
Signed-off-by: Dave Airlie <airlied@linux.ie>
diff --git a/drivers/char/drm/Kconfig b/drivers/char/drm/Kconfig
index f31b970..56ace9d 100644
--- a/drivers/char/drm/Kconfig
+++ b/drivers/char/drm/Kconfig
@@ -96,3 +96,10 @@
 	  Choose this option if you have a Via unichrome or compatible video
 	  chipset. If M is selected the module will be called via.
 
+config DRM_SAVAGE
+	tristate "Savage video cards"
+	depends on DRM
+	help
+	  Choose this option if you have a Savage3D/4/SuperSavage/Pro/Twister
+	  chipset. If M is selected the module will be called savage.
+
diff --git a/drivers/char/drm/Makefile b/drivers/char/drm/Makefile
index 3f0cf8e..1945138 100644
--- a/drivers/char/drm/Makefile
+++ b/drivers/char/drm/Makefile
@@ -17,6 +17,7 @@
 radeon-objs := radeon_drv.o radeon_cp.o radeon_state.o radeon_mem.o radeon_irq.o
 ffb-objs    := ffb_drv.o ffb_context.o
 sis-objs    := sis_drv.o sis_ds.o sis_mm.o
+savage-objs := savage_drv.o savage_bci.o savage_state.o
 via-objs    := via_irq.o via_drv.o via_ds.o via_map.o via_mm.o via_dma.o via_verifier.o via_video.o
 
 ifeq ($(CONFIG_COMPAT),y)
@@ -37,5 +38,7 @@
 obj-$(CONFIG_DRM_I915)  += i915.o
 obj-$(CONFIG_DRM_FFB)   += ffb.o
 obj-$(CONFIG_DRM_SIS)   += sis.o
+obj-$(CONFIG_DRM_SAVAGE)+= savage.o
 obj-$(CONFIG_DRM_VIA)	+=via.o
 
+
diff --git a/drivers/char/drm/savage_bci.c b/drivers/char/drm/savage_bci.c
new file mode 100644
index 0000000..2fd40ba
--- /dev/null
+++ b/drivers/char/drm/savage_bci.c
@@ -0,0 +1,1096 @@
+/* savage_bci.c -- BCI support for Savage
+ *
+ * Copyright 2004  Felix Kuehling
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "drmP.h"
+#include "savage_drm.h"
+#include "savage_drv.h"
+
+/* Need a long timeout for shadow status updates can take a while
+ * and so can waiting for events when the queue is full. */
+#define SAVAGE_DEFAULT_USEC_TIMEOUT	1000000 /* 1s */
+#define SAVAGE_EVENT_USEC_TIMEOUT	5000000 /* 5s */
+#define SAVAGE_FREELIST_DEBUG		0
+
+static int
+savage_bci_wait_fifo_shadow(drm_savage_private_t *dev_priv, unsigned int n)
+{
+	uint32_t mask = dev_priv->status_used_mask;
+	uint32_t threshold = dev_priv->bci_threshold_hi;
+	uint32_t status;
+	int i;
+
+#if SAVAGE_BCI_DEBUG
+	if (n > dev_priv->cob_size + SAVAGE_BCI_FIFO_SIZE - threshold)
+		DRM_ERROR("Trying to emit %d words "
+			  "(more than guaranteed space in COB)\n", n);
+#endif
+
+	for (i = 0; i < SAVAGE_DEFAULT_USEC_TIMEOUT; i++) {
+		DRM_MEMORYBARRIER();
+		status = dev_priv->status_ptr[0];
+		if ((status & mask) < threshold)
+			return 0;
+		DRM_UDELAY(1);
+	}
+
+#if SAVAGE_BCI_DEBUG
+	DRM_ERROR("failed!\n");
+	DRM_INFO("   status=0x%08x, threshold=0x%08x\n", status, threshold);
+#endif
+	return DRM_ERR(EBUSY);
+}
+
+static int
+savage_bci_wait_fifo_s3d(drm_savage_private_t *dev_priv, unsigned int n)
+{
+	uint32_t maxUsed = dev_priv->cob_size + SAVAGE_BCI_FIFO_SIZE - n;
+	uint32_t status;
+	int i;
+
+	for (i = 0; i < SAVAGE_DEFAULT_USEC_TIMEOUT; i++) {
+		status = SAVAGE_READ(SAVAGE_STATUS_WORD0);
+		if ((status & SAVAGE_FIFO_USED_MASK_S3D) <= maxUsed)
+			return 0;
+		DRM_UDELAY(1);
+	}
+
+#if SAVAGE_BCI_DEBUG
+	DRM_ERROR("failed!\n");
+	DRM_INFO("   status=0x%08x\n", status);
+#endif
+	return DRM_ERR(EBUSY);
+}
+
+static int
+savage_bci_wait_fifo_s4(drm_savage_private_t *dev_priv, unsigned int n)
+{
+	uint32_t maxUsed = dev_priv->cob_size + SAVAGE_BCI_FIFO_SIZE - n;
+	uint32_t status;
+	int i;
+
+	for (i = 0; i < SAVAGE_DEFAULT_USEC_TIMEOUT; i++) {
+		status = SAVAGE_READ(SAVAGE_ALT_STATUS_WORD0);
+		if ((status & SAVAGE_FIFO_USED_MASK_S4) <= maxUsed)
+			return 0;
+		DRM_UDELAY(1);
+	}
+
+#if SAVAGE_BCI_DEBUG
+	DRM_ERROR("failed!\n");
+	DRM_INFO("   status=0x%08x\n", status);
+#endif
+	return DRM_ERR(EBUSY);
+}
+
+/*
+ * Waiting for events.
+ *
+ * The BIOSresets the event tag to 0 on mode changes. Therefore we
+ * never emit 0 to the event tag. If we find a 0 event tag we know the
+ * BIOS stomped on it and return success assuming that the BIOS waited
+ * for engine idle.
+ *
+ * Note: if the Xserver uses the event tag it has to follow the same
+ * rule. Otherwise there may be glitches every 2^16 events.
+ */
+static int
+savage_bci_wait_event_shadow(drm_savage_private_t *dev_priv, uint16_t e)
+{
+	uint32_t status;
+	int i;
+
+	for (i = 0; i < SAVAGE_EVENT_USEC_TIMEOUT; i++) {
+		DRM_MEMORYBARRIER();
+		status = dev_priv->status_ptr[1];
+		if ((((status & 0xffff) - e) & 0xffff) <= 0x7fff ||
+		    (status & 0xffff) == 0)
+			return 0;
+		DRM_UDELAY(1);
+	}
+
+#if SAVAGE_BCI_DEBUG
+	DRM_ERROR("failed!\n");
+	DRM_INFO("   status=0x%08x, e=0x%04x\n", status, e);
+#endif
+
+	return DRM_ERR(EBUSY);
+}
+
+static int
+savage_bci_wait_event_reg(drm_savage_private_t *dev_priv, uint16_t e)
+{
+	uint32_t status;
+	int i;
+
+	for (i = 0; i < SAVAGE_EVENT_USEC_TIMEOUT; i++) {
+		status = SAVAGE_READ(SAVAGE_STATUS_WORD1);
+		if ((((status & 0xffff) - e) & 0xffff) <= 0x7fff ||
+		    (status & 0xffff) == 0)
+			return 0;
+		DRM_UDELAY(1);
+	}
+
+#if SAVAGE_BCI_DEBUG
+	DRM_ERROR("failed!\n");
+	DRM_INFO("   status=0x%08x, e=0x%04x\n", status, e);
+#endif
+
+	return DRM_ERR(EBUSY);
+}
+
+uint16_t savage_bci_emit_event(drm_savage_private_t *dev_priv,
+			       unsigned int flags)
+{
+	uint16_t count;
+	BCI_LOCALS;
+
+	if (dev_priv->status_ptr) {
+		/* coordinate with Xserver */
+		count = dev_priv->status_ptr[1023];
+		if (count < dev_priv->event_counter)
+			dev_priv->event_wrap++;
+	} else {
+		count = dev_priv->event_counter;
+	}
+	count = (count + 1) & 0xffff;
+	if (count == 0) {
+		count++; /* See the comment above savage_wait_event_*. */
+		dev_priv->event_wrap++;
+	}
+	dev_priv->event_counter = count;
+	if (dev_priv->status_ptr)
+		dev_priv->status_ptr[1023] = (uint32_t)count;
+
+	if ((flags & (SAVAGE_WAIT_2D | SAVAGE_WAIT_3D))) {
+		unsigned int wait_cmd = BCI_CMD_WAIT;
+		if ((flags & SAVAGE_WAIT_2D))
+			wait_cmd |= BCI_CMD_WAIT_2D;
+		if ((flags & SAVAGE_WAIT_3D))
+			wait_cmd |= BCI_CMD_WAIT_3D;
+		BEGIN_BCI(2);
+		BCI_WRITE(wait_cmd);
+	} else {
+		BEGIN_BCI(1);
+	}
+	BCI_WRITE(BCI_CMD_UPDATE_EVENT_TAG | (uint32_t)count);
+
+	return count;
+}
+
+/*
+ * Freelist management
+ */
+static int savage_freelist_init(drm_device_t *dev)
+{
+	drm_savage_private_t *dev_priv = dev->dev_private;
+	drm_device_dma_t *dma = dev->dma;
+	drm_buf_t *buf;
+	drm_savage_buf_priv_t *entry;
+	int i;
+	DRM_DEBUG("count=%d\n", dma->buf_count);
+
+	dev_priv->head.next = &dev_priv->tail;
+	dev_priv->head.prev = NULL;
+	dev_priv->head.buf = NULL;
+
+	dev_priv->tail.next = NULL;
+	dev_priv->tail.prev = &dev_priv->head;
+	dev_priv->tail.buf = NULL;
+
+	for (i = 0; i < dma->buf_count; i++) {
+		buf = dma->buflist[i];
+		entry = buf->dev_private;
+
+		SET_AGE(&entry->age, 0, 0);
+		entry->buf = buf;
+
+		entry->next = dev_priv->head.next;
+		entry->prev = &dev_priv->head;
+		dev_priv->head.next->prev = entry;
+		dev_priv->head.next = entry;
+	}
+
+	return 0;
+}
+
+static drm_buf_t *savage_freelist_get(drm_device_t *dev)
+{
+	drm_savage_private_t *dev_priv = dev->dev_private;
+	drm_savage_buf_priv_t *tail = dev_priv->tail.prev;
+	uint16_t event;
+	unsigned int wrap;
+	DRM_DEBUG("\n");
+
+	UPDATE_EVENT_COUNTER();
+	if (dev_priv->status_ptr)
+		event = dev_priv->status_ptr[1] & 0xffff;
+	else
+		event = SAVAGE_READ(SAVAGE_STATUS_WORD1) & 0xffff;
+	wrap = dev_priv->event_wrap;
+	if (event > dev_priv->event_counter)
+		wrap--; /* hardware hasn't passed the last wrap yet */
+
+	DRM_DEBUG("   tail=0x%04x %d\n", tail->age.event, tail->age.wrap);
+	DRM_DEBUG("   head=0x%04x %d\n", event, wrap);
+
+	if (tail->buf && (TEST_AGE(&tail->age, event, wrap) || event == 0)) {
+		drm_savage_buf_priv_t *next = tail->next;
+		drm_savage_buf_priv_t *prev = tail->prev;
+		prev->next = next;
+		next->prev = prev;
+		tail->next = tail->prev = NULL;
+		return tail->buf;
+	}
+
+	DRM_DEBUG("returning NULL, tail->buf=%p!\n", tail->buf);
+	return NULL;
+}
+
+void savage_freelist_put(drm_device_t *dev, drm_buf_t *buf)
+{
+	drm_savage_private_t *dev_priv = dev->dev_private;
+	drm_savage_buf_priv_t *entry = buf->dev_private, *prev, *next;
+
+	DRM_DEBUG("age=0x%04x wrap=%d\n", entry->age.event, entry->age.wrap);
+
+	if (entry->next != NULL || entry->prev != NULL) {
+		DRM_ERROR("entry already on freelist.\n");
+		return;
+	}
+
+	prev = &dev_priv->head;
+	next = prev->next;
+	prev->next = entry;
+	next->prev = entry;
+	entry->prev = prev;
+	entry->next = next;
+}
+
+/*
+ * Command DMA
+ */
+static int savage_dma_init(drm_savage_private_t *dev_priv)
+{
+	unsigned int i;
+
+	dev_priv->nr_dma_pages = dev_priv->cmd_dma->size /
+		(SAVAGE_DMA_PAGE_SIZE*4);
+	dev_priv->dma_pages = drm_alloc(sizeof(drm_savage_dma_page_t) *
+					dev_priv->nr_dma_pages,
+					DRM_MEM_DRIVER);
+	if (dev_priv->dma_pages == NULL)
+		return DRM_ERR(ENOMEM);
+
+	for (i = 0; i < dev_priv->nr_dma_pages; ++i) {
+		SET_AGE(&dev_priv->dma_pages[i].age, 0, 0);
+		dev_priv->dma_pages[i].used = 0;
+		dev_priv->dma_pages[i].flushed = 0;
+	}
+	SET_AGE(&dev_priv->last_dma_age, 0, 0);
+
+	dev_priv->first_dma_page = 0;
+	dev_priv->current_dma_page = 0;
+
+	return 0;
+}
+
+void savage_dma_reset(drm_savage_private_t *dev_priv)
+{
+	uint16_t event;
+	unsigned int wrap, i;
+	event = savage_bci_emit_event(dev_priv, 0);
+	wrap = dev_priv->event_wrap;
+	for (i = 0; i < dev_priv->nr_dma_pages; ++i) {
+		SET_AGE(&dev_priv->dma_pages[i].age, event, wrap);
+		dev_priv->dma_pages[i].used = 0;
+		dev_priv->dma_pages[i].flushed = 0;
+	}
+	SET_AGE(&dev_priv->last_dma_age, event, wrap);
+	dev_priv->first_dma_page = dev_priv->current_dma_page = 0;
+}
+
+void savage_dma_wait(drm_savage_private_t *dev_priv, unsigned int page)
+{
+	uint16_t event;
+	unsigned int wrap;
+
+	/* Faked DMA buffer pages don't age. */
+	if (dev_priv->cmd_dma == &dev_priv->fake_dma)
+		return;
+
+	UPDATE_EVENT_COUNTER();
+	if (dev_priv->status_ptr)
+		event = dev_priv->status_ptr[1] & 0xffff;
+	else
+		event = SAVAGE_READ(SAVAGE_STATUS_WORD1) & 0xffff;
+	wrap = dev_priv->event_wrap;
+	if (event > dev_priv->event_counter)
+		wrap--; /* hardware hasn't passed the last wrap yet */
+
+	if (dev_priv->dma_pages[page].age.wrap > wrap ||
+	    (dev_priv->dma_pages[page].age.wrap == wrap &&
+	     dev_priv->dma_pages[page].age.event > event)) {
+		if (dev_priv->wait_evnt(dev_priv,
+					dev_priv->dma_pages[page].age.event)
+		    < 0)
+			DRM_ERROR("wait_evnt failed!\n");
+	}
+}
+
+uint32_t *savage_dma_alloc(drm_savage_private_t *dev_priv, unsigned int n)
+{
+	unsigned int cur = dev_priv->current_dma_page;
+	unsigned int rest = SAVAGE_DMA_PAGE_SIZE -
+		dev_priv->dma_pages[cur].used;
+	unsigned int nr_pages = (n - rest + SAVAGE_DMA_PAGE_SIZE-1) /
+		SAVAGE_DMA_PAGE_SIZE;
+	uint32_t *dma_ptr;
+	unsigned int i;
+
+	DRM_DEBUG("cur=%u, cur->used=%u, n=%u, rest=%u, nr_pages=%u\n",
+		  cur, dev_priv->dma_pages[cur].used, n, rest, nr_pages);
+
+	if (cur + nr_pages < dev_priv->nr_dma_pages) {
+		dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle +
+			cur*SAVAGE_DMA_PAGE_SIZE +
+			dev_priv->dma_pages[cur].used;
+		if (n < rest)
+			rest = n;
+		dev_priv->dma_pages[cur].used += rest;
+		n -= rest;
+		cur++;
+	} else {
+		dev_priv->dma_flush(dev_priv);
+		nr_pages = (n + SAVAGE_DMA_PAGE_SIZE-1) / SAVAGE_DMA_PAGE_SIZE;
+		for (i = cur; i < dev_priv->nr_dma_pages; ++i) {
+			dev_priv->dma_pages[i].age = dev_priv->last_dma_age;
+			dev_priv->dma_pages[i].used = 0;
+			dev_priv->dma_pages[i].flushed = 0;
+		}
+		dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle;
+		dev_priv->first_dma_page = cur = 0;
+	}
+	for (i = cur; nr_pages > 0; ++i, --nr_pages) {
+#if SAVAGE_DMA_DEBUG
+		if (dev_priv->dma_pages[i].used) {
+			DRM_ERROR("unflushed page %u: used=%u\n",
+				  i, dev_priv->dma_pages[i].used);
+		}
+#endif
+		if (n > SAVAGE_DMA_PAGE_SIZE)
+			dev_priv->dma_pages[i].used = SAVAGE_DMA_PAGE_SIZE;
+		else
+			dev_priv->dma_pages[i].used = n;
+		n -= SAVAGE_DMA_PAGE_SIZE;
+	}
+	dev_priv->current_dma_page = --i;
+
+	DRM_DEBUG("cur=%u, cur->used=%u, n=%u\n",
+		  i, dev_priv->dma_pages[i].used, n);
+
+	savage_dma_wait(dev_priv, dev_priv->current_dma_page);
+
+	return dma_ptr;
+}
+
+static void savage_dma_flush(drm_savage_private_t *dev_priv)
+{
+	unsigned int first = dev_priv->first_dma_page;
+	unsigned int cur = dev_priv->current_dma_page;
+	uint16_t event;
+	unsigned int wrap, pad, align, len, i;
+	unsigned long phys_addr;
+	BCI_LOCALS;
+
+	if (first == cur &&
+	    dev_priv->dma_pages[cur].used == dev_priv->dma_pages[cur].flushed)
+		return;
+
+	/* pad length to multiples of 2 entries
+	 * align start of next DMA block to multiles of 8 entries */
+	pad = -dev_priv->dma_pages[cur].used & 1;
+	align = -(dev_priv->dma_pages[cur].used + pad) & 7;
+
+	DRM_DEBUG("first=%u, cur=%u, first->flushed=%u, cur->used=%u, "
+		  "pad=%u, align=%u\n",
+		  first, cur, dev_priv->dma_pages[first].flushed,
+		  dev_priv->dma_pages[cur].used, pad, align);
+
+	/* pad with noops */
+	if (pad) {
+		uint32_t *dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle +
+			cur * SAVAGE_DMA_PAGE_SIZE +
+			dev_priv->dma_pages[cur].used;
+		dev_priv->dma_pages[cur].used += pad;
+		while(pad != 0) {
+			*dma_ptr++ = BCI_CMD_WAIT;
+			pad--;
+		}
+	}
+
+	DRM_MEMORYBARRIER();
+
+	/* do flush ... */
+	phys_addr = dev_priv->cmd_dma->offset +
+		(first * SAVAGE_DMA_PAGE_SIZE +
+		 dev_priv->dma_pages[first].flushed) * 4;
+	len = (cur - first) * SAVAGE_DMA_PAGE_SIZE +
+		dev_priv->dma_pages[cur].used -
+		dev_priv->dma_pages[first].flushed;
+
+	DRM_DEBUG("phys_addr=%lx, len=%u\n",
+		  phys_addr | dev_priv->dma_type, len);
+
+	BEGIN_BCI(3);
+	BCI_SET_REGISTERS(SAVAGE_DMABUFADDR, 1);
+	BCI_WRITE(phys_addr | dev_priv->dma_type);
+	BCI_DMA(len);
+
+	/* fix alignment of the start of the next block */
+	dev_priv->dma_pages[cur].used += align;
+
+	/* age DMA pages */
+	event = savage_bci_emit_event(dev_priv, 0);
+	wrap = dev_priv->event_wrap;
+	for (i = first; i < cur; ++i) {
+		SET_AGE(&dev_priv->dma_pages[i].age, event, wrap);
+		dev_priv->dma_pages[i].used = 0;
+		dev_priv->dma_pages[i].flushed = 0;
+	}
+	/* age the current page only when it's full */
+	if (dev_priv->dma_pages[cur].used == SAVAGE_DMA_PAGE_SIZE) {
+		SET_AGE(&dev_priv->dma_pages[cur].age, event, wrap);
+		dev_priv->dma_pages[cur].used = 0;
+		dev_priv->dma_pages[cur].flushed = 0;
+		/* advance to next page */
+		cur++;
+		if (cur == dev_priv->nr_dma_pages)
+			cur = 0;
+		dev_priv->first_dma_page = dev_priv->current_dma_page = cur;
+	} else {
+		dev_priv->first_dma_page = cur;
+		dev_priv->dma_pages[cur].flushed = dev_priv->dma_pages[i].used;
+	}
+	SET_AGE(&dev_priv->last_dma_age, event, wrap);
+
+	DRM_DEBUG("first=cur=%u, cur->used=%u, cur->flushed=%u\n", cur,
+		  dev_priv->dma_pages[cur].used,
+		  dev_priv->dma_pages[cur].flushed);
+}
+
+static void savage_fake_dma_flush(drm_savage_private_t *dev_priv)
+{
+	unsigned int i, j;
+	BCI_LOCALS;
+
+	if (dev_priv->first_dma_page == dev_priv->current_dma_page &&
+	    dev_priv->dma_pages[dev_priv->current_dma_page].used == 0)
+		return;
+
+	DRM_DEBUG("first=%u, cur=%u, cur->used=%u\n",
+		  dev_priv->first_dma_page, dev_priv->current_dma_page,
+		  dev_priv->dma_pages[dev_priv->current_dma_page].used);
+
+	for (i = dev_priv->first_dma_page;
+	     i <= dev_priv->current_dma_page && dev_priv->dma_pages[i].used;
+	     ++i) {
+		uint32_t *dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle +
+			i * SAVAGE_DMA_PAGE_SIZE;
+#if SAVAGE_DMA_DEBUG
+		/* Sanity check: all pages except the last one must be full. */
+		if (i < dev_priv->current_dma_page &&
+		    dev_priv->dma_pages[i].used != SAVAGE_DMA_PAGE_SIZE) {
+			DRM_ERROR("partial DMA page %u: used=%u",
+				  i, dev_priv->dma_pages[i].used);
+		}
+#endif
+		BEGIN_BCI(dev_priv->dma_pages[i].used);
+		for (j = 0; j < dev_priv->dma_pages[i].used; ++j) {
+			BCI_WRITE(dma_ptr[j]);
+		}
+		dev_priv->dma_pages[i].used = 0;
+	}
+
+	/* reset to first page */
+	dev_priv->first_dma_page = dev_priv->current_dma_page = 0;
+}
+
+/*
+ * Initalize mappings. On Savage4 and SavageIX the alignment
+ * and size of the aperture is not suitable for automatic MTRR setup
+ * in drm_addmap. Therefore we do it manually before the maps are
+ * initialized. We also need to take care of deleting the MTRRs in
+ * postcleanup.
+ */
+int savage_preinit(drm_device_t *dev, unsigned long chipset)
+{
+	drm_savage_private_t *dev_priv;
+	unsigned long mmio_base, fb_base, fb_size, aperture_base;
+	/* fb_rsrc and aper_rsrc aren't really used currently, but still exist
+	 * in case we decide we need information on the BAR for BSD in the
+	 * future.
+	 */
+	unsigned int fb_rsrc, aper_rsrc;
+	int ret = 0;
+
+	dev_priv = drm_alloc(sizeof(drm_savage_private_t), DRM_MEM_DRIVER);
+	if (dev_priv == NULL)
+		return DRM_ERR(ENOMEM);
+
+	memset(dev_priv, 0, sizeof(drm_savage_private_t));
+	dev->dev_private = (void *)dev_priv;
+	dev_priv->chipset = (enum savage_family)chipset;
+
+	dev_priv->mtrr[0].handle = -1;
+	dev_priv->mtrr[1].handle = -1;
+	dev_priv->mtrr[2].handle = -1;
+	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+		fb_rsrc = 0;
+		fb_base = drm_get_resource_start(dev, 0);
+		fb_size = SAVAGE_FB_SIZE_S3;
+		mmio_base = fb_base + SAVAGE_FB_SIZE_S3;
+		aper_rsrc = 0;
+		aperture_base = fb_base + SAVAGE_APERTURE_OFFSET;
+		/* this should always be true */
+		if (drm_get_resource_len(dev, 0) == 0x08000000) {
+			/* Don't make MMIO write-cobining! We need 3
+			 * MTRRs. */
+			dev_priv->mtrr[0].base = fb_base;
+			dev_priv->mtrr[0].size = 0x01000000;
+			dev_priv->mtrr[0].handle = mtrr_add(
+				dev_priv->mtrr[0].base, dev_priv->mtrr[0].size,
+				MTRR_TYPE_WRCOMB, 1);
+			dev_priv->mtrr[1].base = fb_base+0x02000000;
+			dev_priv->mtrr[1].size = 0x02000000;
+			dev_priv->mtrr[1].handle = mtrr_add(
+				dev_priv->mtrr[1].base, dev_priv->mtrr[1].size,
+				MTRR_TYPE_WRCOMB, 1);
+			dev_priv->mtrr[2].base = fb_base+0x04000000;
+			dev_priv->mtrr[2].size = 0x04000000;
+			dev_priv->mtrr[2].handle = mtrr_add(
+				dev_priv->mtrr[2].base, dev_priv->mtrr[2].size,
+				MTRR_TYPE_WRCOMB, 1);
+		} else {
+			DRM_ERROR("strange pci_resource_len %08lx\n",
+				  drm_get_resource_len(dev, 0));
+		}
+	} else if (chipset != S3_SUPERSAVAGE && chipset != S3_SAVAGE2000) {
+		mmio_base = drm_get_resource_start(dev, 0);
+		fb_rsrc = 1;
+		fb_base = drm_get_resource_start(dev, 1);
+		fb_size = SAVAGE_FB_SIZE_S4;
+		aper_rsrc = 1;
+		aperture_base = fb_base + SAVAGE_APERTURE_OFFSET;
+		/* this should always be true */
+		if (drm_get_resource_len(dev, 1) == 0x08000000) {
+			/* Can use one MTRR to cover both fb and
+			 * aperture. */
+			dev_priv->mtrr[0].base = fb_base;
+			dev_priv->mtrr[0].size = 0x08000000;
+			dev_priv->mtrr[0].handle = mtrr_add(
+				dev_priv->mtrr[0].base, dev_priv->mtrr[0].size,
+				MTRR_TYPE_WRCOMB, 1);
+		} else {
+			DRM_ERROR("strange pci_resource_len %08lx\n",
+				  drm_get_resource_len(dev, 1));
+		}
+	} else {
+		mmio_base = drm_get_resource_start(dev, 0);
+		fb_rsrc = 1;
+		fb_base = drm_get_resource_start(dev, 1);
+		fb_size = drm_get_resource_len(dev, 1);
+		aper_rsrc = 2;
+		aperture_base = drm_get_resource_start(dev, 2);
+		/* Automatic MTRR setup will do the right thing. */
+	}
+
+	ret = drm_addmap(dev, mmio_base, SAVAGE_MMIO_SIZE, _DRM_REGISTERS,
+			 _DRM_READ_ONLY, &dev_priv->mmio);
+	if (ret)
+		return ret;
+
+	ret = drm_addmap(dev, fb_base, fb_size, _DRM_FRAME_BUFFER,
+			 _DRM_WRITE_COMBINING, &dev_priv->fb);
+	if (ret)
+		return ret;
+
+	ret = drm_addmap(dev, aperture_base, SAVAGE_APERTURE_SIZE,
+			 _DRM_FRAME_BUFFER, _DRM_WRITE_COMBINING,
+			 &dev_priv->aperture);
+	if (ret)
+		return ret;
+
+	return ret;
+}
+
+/*
+ * Delete MTRRs and free device-private data.
+ */
+int savage_postcleanup(drm_device_t *dev)
+{
+	drm_savage_private_t *dev_priv = dev->dev_private;
+	int i;
+
+	for (i = 0; i < 3; ++i)
+		if (dev_priv->mtrr[i].handle >= 0)
+			mtrr_del(dev_priv->mtrr[i].handle,
+				 dev_priv->mtrr[i].base,
+				 dev_priv->mtrr[i].size);
+
+	drm_free(dev_priv, sizeof(drm_savage_private_t), DRM_MEM_DRIVER);
+
+	return 0;
+}
+
+static int savage_do_init_bci(drm_device_t *dev, drm_savage_init_t *init)
+{
+	drm_savage_private_t *dev_priv = dev->dev_private;
+
+	if (init->fb_bpp != 16 && init->fb_bpp != 32) {
+		DRM_ERROR("invalid frame buffer bpp %d!\n", init->fb_bpp);
+		return DRM_ERR(EINVAL);
+	}
+	if (init->depth_bpp != 16 && init->depth_bpp != 32) {
+		DRM_ERROR("invalid depth buffer bpp %d!\n", init->fb_bpp);
+		return DRM_ERR(EINVAL);
+	}
+	if (init->dma_type != SAVAGE_DMA_AGP &&
+	    init->dma_type != SAVAGE_DMA_PCI) {
+		DRM_ERROR("invalid dma memory type %d!\n", init->dma_type);
+		return DRM_ERR(EINVAL);
+	}
+
+	dev_priv->cob_size = init->cob_size;
+	dev_priv->bci_threshold_lo = init->bci_threshold_lo;
+	dev_priv->bci_threshold_hi = init->bci_threshold_hi;
+	dev_priv->dma_type = init->dma_type;
+
+	dev_priv->fb_bpp = init->fb_bpp;
+	dev_priv->front_offset = init->front_offset;
+	dev_priv->front_pitch = init->front_pitch;
+	dev_priv->back_offset = init->back_offset;
+	dev_priv->back_pitch = init->back_pitch;
+	dev_priv->depth_bpp = init->depth_bpp;
+	dev_priv->depth_offset = init->depth_offset;
+	dev_priv->depth_pitch = init->depth_pitch;
+
+	dev_priv->texture_offset = init->texture_offset;
+	dev_priv->texture_size = init->texture_size;
+
+	DRM_GETSAREA();
+	if (!dev_priv->sarea) {
+		DRM_ERROR("could not find sarea!\n");
+		savage_do_cleanup_bci(dev);
+		return DRM_ERR(EINVAL);
+	}
+	if (init->status_offset != 0) {
+		dev_priv->status = drm_core_findmap(dev, init->status_offset);
+		if (!dev_priv->status) {
+			DRM_ERROR("could not find shadow status region!\n");
+			savage_do_cleanup_bci(dev);
+			return DRM_ERR(EINVAL);
+		}
+	} else {
+		dev_priv->status = NULL;
+	}
+	if (dev_priv->dma_type == SAVAGE_DMA_AGP && init->buffers_offset) {
+		dev->agp_buffer_map = drm_core_findmap(dev,
+						       init->buffers_offset);
+		if (!dev->agp_buffer_map) {
+			DRM_ERROR("could not find DMA buffer region!\n");
+			savage_do_cleanup_bci(dev);
+			return DRM_ERR(EINVAL);
+		}
+		drm_core_ioremap(dev->agp_buffer_map, dev);
+		if (!dev->agp_buffer_map) {
+			DRM_ERROR("failed to ioremap DMA buffer region!\n");
+			savage_do_cleanup_bci(dev);
+			return DRM_ERR(ENOMEM);
+		}
+	}
+	if (init->agp_textures_offset) {
+		dev_priv->agp_textures =
+			drm_core_findmap(dev, init->agp_textures_offset);
+		if (!dev_priv->agp_textures) {
+			DRM_ERROR("could not find agp texture region!\n");
+			savage_do_cleanup_bci(dev);
+			return DRM_ERR(EINVAL);
+		}
+	} else {
+		dev_priv->agp_textures = NULL;
+	}
+
+	if (init->cmd_dma_offset) {
+		if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+			DRM_ERROR("command DMA not supported on "
+				  "Savage3D/MX/IX.\n");
+			savage_do_cleanup_bci(dev);
+			return DRM_ERR(EINVAL);
+		}
+		if (dev->dma && dev->dma->buflist) {
+			DRM_ERROR("command and vertex DMA not supported "
+				  "at the same time.\n");
+			savage_do_cleanup_bci(dev);
+			return DRM_ERR(EINVAL);
+		}
+		dev_priv->cmd_dma = drm_core_findmap(dev, init->cmd_dma_offset);
+		if (!dev_priv->cmd_dma) {
+			DRM_ERROR("could not find command DMA region!\n");
+			savage_do_cleanup_bci(dev);
+			return DRM_ERR(EINVAL);
+		}
+		if (dev_priv->dma_type == SAVAGE_DMA_AGP) {
+			if (dev_priv->cmd_dma->type != _DRM_AGP) {
+				DRM_ERROR("AGP command DMA region is not a "
+					  "_DRM_AGP map!\n");
+				savage_do_cleanup_bci(dev);
+				return DRM_ERR(EINVAL);
+			}
+			drm_core_ioremap(dev_priv->cmd_dma, dev);
+			if (!dev_priv->cmd_dma->handle) {
+				DRM_ERROR("failed to ioremap command "
+					  "DMA region!\n");
+				savage_do_cleanup_bci(dev);
+				return DRM_ERR(ENOMEM);
+			}
+		} else if (dev_priv->cmd_dma->type != _DRM_CONSISTENT) {
+			DRM_ERROR("PCI command DMA region is not a "
+				  "_DRM_CONSISTENT map!\n");
+			savage_do_cleanup_bci(dev);
+			return DRM_ERR(EINVAL);
+		}
+	} else {
+		dev_priv->cmd_dma = NULL;
+	}
+
+	dev_priv->dma_flush = savage_dma_flush;
+	if (!dev_priv->cmd_dma) {
+		DRM_DEBUG("falling back to faked command DMA.\n");
+		dev_priv->fake_dma.offset = 0;
+		dev_priv->fake_dma.size = SAVAGE_FAKE_DMA_SIZE;
+		dev_priv->fake_dma.type = _DRM_SHM;
+		dev_priv->fake_dma.handle = drm_alloc(SAVAGE_FAKE_DMA_SIZE,
+						      DRM_MEM_DRIVER);
+		if (!dev_priv->fake_dma.handle) {
+			DRM_ERROR("could not allocate faked DMA buffer!\n");
+			savage_do_cleanup_bci(dev);
+			return DRM_ERR(ENOMEM);
+		}
+		dev_priv->cmd_dma = &dev_priv->fake_dma;
+		dev_priv->dma_flush = savage_fake_dma_flush;
+	}
+
+	dev_priv->sarea_priv =
+		(drm_savage_sarea_t *)((uint8_t *)dev_priv->sarea->handle +
+				       init->sarea_priv_offset);
+
+	/* setup bitmap descriptors */
+	{
+		unsigned int color_tile_format;
+		unsigned int depth_tile_format;
+		unsigned int front_stride, back_stride, depth_stride;
+		if (dev_priv->chipset <= S3_SAVAGE4) {
+			color_tile_format = dev_priv->fb_bpp == 16 ?
+				SAVAGE_BD_TILE_16BPP : SAVAGE_BD_TILE_32BPP;
+			depth_tile_format = dev_priv->depth_bpp == 16 ?
+				SAVAGE_BD_TILE_16BPP : SAVAGE_BD_TILE_32BPP;
+		} else {
+			color_tile_format = SAVAGE_BD_TILE_DEST;
+			depth_tile_format = SAVAGE_BD_TILE_DEST;
+		}
+		front_stride = dev_priv->front_pitch / (dev_priv->fb_bpp/8);
+		back_stride  = dev_priv-> back_pitch / (dev_priv->fb_bpp/8);
+		depth_stride = dev_priv->depth_pitch / (dev_priv->depth_bpp/8);
+
+		dev_priv->front_bd = front_stride | SAVAGE_BD_BW_DISABLE |
+			(dev_priv->fb_bpp << SAVAGE_BD_BPP_SHIFT) |
+			(color_tile_format << SAVAGE_BD_TILE_SHIFT);
+
+		dev_priv-> back_bd =  back_stride | SAVAGE_BD_BW_DISABLE |
+			(dev_priv->fb_bpp << SAVAGE_BD_BPP_SHIFT) |
+			(color_tile_format << SAVAGE_BD_TILE_SHIFT);
+
+		dev_priv->depth_bd = depth_stride | SAVAGE_BD_BW_DISABLE |
+			(dev_priv->depth_bpp << SAVAGE_BD_BPP_SHIFT) |
+			(depth_tile_format << SAVAGE_BD_TILE_SHIFT);
+	}
+
+	/* setup status and bci ptr */
+	dev_priv->event_counter = 0;
+	dev_priv->event_wrap = 0;
+	dev_priv->bci_ptr = (volatile uint32_t *)
+	    ((uint8_t *)dev_priv->mmio->handle + SAVAGE_BCI_OFFSET);
+	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+		dev_priv->status_used_mask = SAVAGE_FIFO_USED_MASK_S3D;
+	} else {
+		dev_priv->status_used_mask = SAVAGE_FIFO_USED_MASK_S4;
+	}
+	if (dev_priv->status != NULL) {
+		dev_priv->status_ptr =
+			(volatile uint32_t *)dev_priv->status->handle;
+		dev_priv->wait_fifo = savage_bci_wait_fifo_shadow;
+		dev_priv->wait_evnt = savage_bci_wait_event_shadow;
+		dev_priv->status_ptr[1023] = dev_priv->event_counter;
+	} else {
+		dev_priv->status_ptr = NULL;
+		if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+			dev_priv->wait_fifo = savage_bci_wait_fifo_s3d;
+		} else {
+			dev_priv->wait_fifo = savage_bci_wait_fifo_s4;
+		}
+		dev_priv->wait_evnt = savage_bci_wait_event_reg;
+	}
+
+	/* cliprect functions */
+	if (S3_SAVAGE3D_SERIES(dev_priv->chipset))
+		dev_priv->emit_clip_rect = savage_emit_clip_rect_s3d;
+	else
+		dev_priv->emit_clip_rect = savage_emit_clip_rect_s4;
+
+	if (savage_freelist_init(dev) < 0) {
+		DRM_ERROR("could not initialize freelist\n");
+		savage_do_cleanup_bci(dev);
+		return DRM_ERR(ENOMEM);
+	}
+
+	if (savage_dma_init(dev_priv) <  0) {
+		DRM_ERROR("could not initialize command DMA\n");
+		savage_do_cleanup_bci(dev);
+		return DRM_ERR(ENOMEM);
+	}
+
+	return 0;
+}
+
+int savage_do_cleanup_bci(drm_device_t *dev)
+{
+	drm_savage_private_t *dev_priv = dev->dev_private;
+
+	if (dev_priv->cmd_dma == &dev_priv->fake_dma) {
+		if (dev_priv->fake_dma.handle)
+			drm_free(dev_priv->fake_dma.handle,
+				 SAVAGE_FAKE_DMA_SIZE, DRM_MEM_DRIVER);
+	} else if (dev_priv->cmd_dma && dev_priv->cmd_dma->handle &&
+		   dev_priv->cmd_dma->type == _DRM_AGP &&
+		   dev_priv->dma_type == SAVAGE_DMA_AGP)
+		drm_core_ioremapfree(dev_priv->cmd_dma, dev);
+
+	if (dev_priv->dma_type == SAVAGE_DMA_AGP &&
+	    dev->agp_buffer_map && dev->agp_buffer_map->handle) {
+		drm_core_ioremapfree(dev->agp_buffer_map, dev);
+		/* make sure the next instance (which may be running
+		 * in PCI mode) doesn't try to use an old
+		 * agp_buffer_map. */
+		dev->agp_buffer_map = NULL;
+	}
+
+	if (dev_priv->dma_pages)
+		drm_free(dev_priv->dma_pages,
+			 sizeof(drm_savage_dma_page_t)*dev_priv->nr_dma_pages,
+			 DRM_MEM_DRIVER);
+
+	return 0;
+}
+
+static int savage_bci_init(DRM_IOCTL_ARGS)
+{
+	DRM_DEVICE;
+	drm_savage_init_t init;
+
+	LOCK_TEST_WITH_RETURN(dev, filp);
+
+	DRM_COPY_FROM_USER_IOCTL(init, (drm_savage_init_t __user *)data,
+				 sizeof(init));
+
+	switch (init.func) {
+	case SAVAGE_INIT_BCI:
+		return savage_do_init_bci(dev, &init);
+	case SAVAGE_CLEANUP_BCI:
+		return savage_do_cleanup_bci(dev);
+	}
+
+	return DRM_ERR(EINVAL);
+}
+
+static int savage_bci_event_emit(DRM_IOCTL_ARGS)
+{
+	DRM_DEVICE;
+	drm_savage_private_t *dev_priv = dev->dev_private;
+	drm_savage_event_emit_t event;
+
+	DRM_DEBUG("\n");
+
+	LOCK_TEST_WITH_RETURN(dev, filp);
+
+	DRM_COPY_FROM_USER_IOCTL(event, (drm_savage_event_emit_t __user *)data,
+				 sizeof(event));
+
+	event.count = savage_bci_emit_event(dev_priv, event.flags);
+	event.count |= dev_priv->event_wrap << 16;
+	DRM_COPY_TO_USER_IOCTL(&((drm_savage_event_emit_t __user *)data)->count,
+			       event.count, sizeof(event.count));
+	return 0;
+}
+
+static int savage_bci_event_wait(DRM_IOCTL_ARGS)
+{
+	DRM_DEVICE;
+	drm_savage_private_t *dev_priv = dev->dev_private;
+	drm_savage_event_wait_t event;
+	unsigned int event_e, hw_e;
+	unsigned int event_w, hw_w;
+
+	DRM_DEBUG("\n");
+
+	DRM_COPY_FROM_USER_IOCTL(event, (drm_savage_event_wait_t __user *)data,
+				 sizeof(event));
+
+	UPDATE_EVENT_COUNTER();
+	if (dev_priv->status_ptr)
+		hw_e = dev_priv->status_ptr[1] & 0xffff;
+	else
+		hw_e = SAVAGE_READ(SAVAGE_STATUS_WORD1) & 0xffff;
+	hw_w = dev_priv->event_wrap;
+	if (hw_e > dev_priv->event_counter)
+		hw_w--; /* hardware hasn't passed the last wrap yet */
+
+	event_e = event.count & 0xffff;
+	event_w = event.count >> 16;
+
+	/* Don't need to wait if
+	 * - event counter wrapped since the event was emitted or
+	 * - the hardware has advanced up to or over the event to wait for.
+	 */
+	if (event_w < hw_w || (event_w == hw_w && event_e <= hw_e) )
+		return 0;
+	else
+		return dev_priv->wait_evnt(dev_priv, event_e);
+}
+
+/*
+ * DMA buffer management
+ */
+
+static int savage_bci_get_buffers(DRMFILE filp, drm_device_t *dev, drm_dma_t *d)
+{
+	drm_buf_t *buf;
+	int i;
+
+	for (i = d->granted_count; i < d->request_count; i++) {
+		buf = savage_freelist_get(dev);
+		if (!buf)
+			return DRM_ERR(EAGAIN);
+
+		buf->filp = filp;
+
+		if (DRM_COPY_TO_USER(&d->request_indices[i],
+				     &buf->idx, sizeof(buf->idx)))
+			return DRM_ERR(EFAULT);
+		if (DRM_COPY_TO_USER(&d->request_sizes[i],
+				     &buf->total, sizeof(buf->total)))
+			return DRM_ERR(EFAULT);
+
+		d->granted_count++;
+	}
+	return 0;
+}
+
+int savage_bci_buffers(DRM_IOCTL_ARGS)
+{
+	DRM_DEVICE;
+	drm_device_dma_t *dma = dev->dma;
+	drm_dma_t d;
+	int ret = 0;
+
+	LOCK_TEST_WITH_RETURN(dev, filp);
+
+	DRM_COPY_FROM_USER_IOCTL(d, (drm_dma_t __user *)data, sizeof(d));
+
+	/* Please don't send us buffers.
+	 */
+	if (d.send_count != 0) {
+		DRM_ERROR("Process %d trying to send %d buffers via drmDMA\n",
+			  DRM_CURRENTPID, d.send_count);
+		return DRM_ERR(EINVAL);
+	}
+
+	/* We'll send you buffers.
+	 */
+	if (d.request_count < 0 || d.request_count > dma->buf_count) {
+		DRM_ERROR("Process %d trying to get %d buffers (of %d max)\n",
+			  DRM_CURRENTPID, d.request_count, dma->buf_count);
+		return DRM_ERR(EINVAL);
+	}
+
+	d.granted_count = 0;
+
+	if (d.request_count) {
+		ret = savage_bci_get_buffers(filp, dev, &d);
+	}
+
+	DRM_COPY_TO_USER_IOCTL((drm_dma_t __user *)data, d, sizeof(d));
+
+	return ret;
+}
+
+void savage_reclaim_buffers(drm_device_t *dev, DRMFILE filp) {
+	drm_device_dma_t *dma = dev->dma;
+	drm_savage_private_t *dev_priv = dev->dev_private;
+	int i;
+
+	if (!dma)
+		return;
+	if (!dev_priv)
+		return;
+	if (!dma->buflist)
+		return;
+
+	/*i830_flush_queue(dev);*/
+
+	for (i = 0; i < dma->buf_count; i++) {
+		drm_buf_t *buf = dma->buflist[i];
+		drm_savage_buf_priv_t *buf_priv = buf->dev_private;
+
+		if (buf->filp == filp && buf_priv &&
+		    buf_priv->next == NULL && buf_priv->prev == NULL) {
+			uint16_t event;
+			DRM_DEBUG("reclaimed from client\n");
+			event = savage_bci_emit_event(dev_priv, SAVAGE_WAIT_3D);
+			SET_AGE(&buf_priv->age, event, dev_priv->event_wrap);
+			savage_freelist_put(dev, buf);
+		}
+	}
+
+	drm_core_reclaim_buffers(dev, filp);
+}
+
+
+drm_ioctl_desc_t savage_ioctls[] = {
+	[DRM_IOCTL_NR(DRM_SAVAGE_BCI_INIT)] = {savage_bci_init, 1, 1},
+	[DRM_IOCTL_NR(DRM_SAVAGE_BCI_CMDBUF)] = {savage_bci_cmdbuf, 1, 0},
+	[DRM_IOCTL_NR(DRM_SAVAGE_BCI_EVENT_EMIT)] = {savage_bci_event_emit, 1, 0},
+	[DRM_IOCTL_NR(DRM_SAVAGE_BCI_EVENT_WAIT)] = {savage_bci_event_wait, 1, 0},
+};
+
+int savage_max_ioctl = DRM_ARRAY_SIZE(savage_ioctls);
diff --git a/drivers/char/drm/savage_drm.h b/drivers/char/drm/savage_drm.h
new file mode 100644
index 0000000..6526c9a
--- /dev/null
+++ b/drivers/char/drm/savage_drm.h
@@ -0,0 +1,209 @@
+/* savage_drm.h -- Public header for the savage driver
+ *
+ * Copyright 2004  Felix Kuehling
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __SAVAGE_DRM_H__
+#define __SAVAGE_DRM_H__
+
+#ifndef __SAVAGE_SAREA_DEFINES__
+#define __SAVAGE_SAREA_DEFINES__
+
+/* 2 heaps (1 for card, 1 for agp), each divided into upto 128
+ * regions, subject to a minimum region size of (1<<16) == 64k.
+ *
+ * Clients may subdivide regions internally, but when sharing between
+ * clients, the region size is the minimum granularity.
+ */
+
+#define SAVAGE_CARD_HEAP		0
+#define SAVAGE_AGP_HEAP			1
+#define SAVAGE_NR_TEX_HEAPS		2
+#define SAVAGE_NR_TEX_REGIONS		16
+#define SAVAGE_LOG_MIN_TEX_REGION_SIZE	16
+
+#endif /* __SAVAGE_SAREA_DEFINES__ */
+
+typedef struct _drm_savage_sarea {
+	/* LRU lists for texture memory in agp space and on the card.
+	 */
+	drm_tex_region_t texList[SAVAGE_NR_TEX_HEAPS][SAVAGE_NR_TEX_REGIONS+1];
+	unsigned int texAge[SAVAGE_NR_TEX_HEAPS];
+
+	/* Mechanism to validate card state.
+	 */
+	int ctxOwner;
+} drm_savage_sarea_t, *drm_savage_sarea_ptr;
+
+/* Savage-specific ioctls
+ */
+#define DRM_SAVAGE_BCI_INIT		0x00
+#define DRM_SAVAGE_BCI_CMDBUF           0x01
+#define DRM_SAVAGE_BCI_EVENT_EMIT	0x02
+#define DRM_SAVAGE_BCI_EVENT_WAIT	0x03
+
+#define DRM_IOCTL_SAVAGE_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_INIT, drm_savage_init_t)
+#define DRM_IOCTL_SAVAGE_CMDBUF		DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_CMDBUF, drm_savage_cmdbuf_t)
+#define DRM_IOCTL_SAVAGE_EVENT_EMIT	DRM_IOWR(DRM_COMMAND_BASE + DRM_SAVAGE_BCI_EVENT_EMIT, drm_savage_event_emit_t)
+#define DRM_IOCTL_SAVAGE_EVENT_WAIT	DRM_IOW( DRM_COMMAND_BASE + DRM_SAVAGE_BCI_EVENT_WAIT, drm_savage_event_wait_t)
+
+#define SAVAGE_DMA_PCI	1
+#define SAVAGE_DMA_AGP	3
+typedef struct drm_savage_init {
+	enum {
+		SAVAGE_INIT_BCI = 1,
+		SAVAGE_CLEANUP_BCI = 2
+	} func;
+	unsigned int sarea_priv_offset;
+
+	/* some parameters */
+	unsigned int cob_size;
+	unsigned int bci_threshold_lo, bci_threshold_hi;
+	unsigned int dma_type;
+
+	/* frame buffer layout */
+	unsigned int fb_bpp;
+	unsigned int front_offset, front_pitch;
+	unsigned int back_offset, back_pitch;
+	unsigned int depth_bpp;
+	unsigned int depth_offset, depth_pitch;
+
+	/* local textures */
+	unsigned int texture_offset;
+	unsigned int texture_size;
+
+	/* physical locations of non-permanent maps */
+	unsigned long status_offset;
+	unsigned long buffers_offset;
+	unsigned long agp_textures_offset;
+	unsigned long cmd_dma_offset;
+} drm_savage_init_t;
+
+typedef union drm_savage_cmd_header drm_savage_cmd_header_t;
+typedef struct drm_savage_cmdbuf {
+				/* command buffer in client's address space */
+	drm_savage_cmd_header_t __user *cmd_addr;
+	unsigned int size;	/* size of the command buffer in 64bit units */
+
+	unsigned int dma_idx;	/* DMA buffer index to use */
+	int discard;		/* discard DMA buffer when done */
+				/* vertex buffer in client's address space */
+	unsigned int __user *vb_addr;
+	unsigned int vb_size;	/* size of client vertex buffer in bytes */
+	unsigned int vb_stride;	/* stride of vertices in 32bit words */
+				/* boxes in client's address space */
+	drm_clip_rect_t __user *box_addr;
+	unsigned int nbox;	/* number of clipping boxes */
+} drm_savage_cmdbuf_t;
+
+#define SAVAGE_WAIT_2D  0x1 /* wait for 2D idle before updating event tag */
+#define SAVAGE_WAIT_3D  0x2 /* wait for 3D idle before updating event tag */
+#define SAVAGE_WAIT_IRQ 0x4 /* emit or wait for IRQ, not implemented yet */
+typedef struct drm_savage_event {
+	unsigned int count;
+	unsigned int flags;
+} drm_savage_event_emit_t, drm_savage_event_wait_t;
+
+/* Commands for the cmdbuf ioctl
+ */
+#define SAVAGE_CMD_STATE	0  /* a range of state registers */
+#define SAVAGE_CMD_DMA_PRIM	1  /* vertices from DMA buffer */
+#define SAVAGE_CMD_VB_PRIM	2  /* vertices from client vertex buffer */
+#define SAVAGE_CMD_DMA_IDX	3  /* indexed vertices from DMA buffer */
+#define SAVAGE_CMD_VB_IDX	4  /* indexed vertices client vertex buffer */
+#define SAVAGE_CMD_CLEAR	5  /* clear buffers */
+#define SAVAGE_CMD_SWAP		6  /* swap buffers */
+
+/* Primitive types
+*/
+#define SAVAGE_PRIM_TRILIST	0  /* triangle list */
+#define SAVAGE_PRIM_TRISTRIP	1  /* triangle strip */
+#define SAVAGE_PRIM_TRIFAN	2  /* triangle fan */
+#define SAVAGE_PRIM_TRILIST_201	3  /* reorder verts for correct flat
+				    * shading on s3d */
+
+/* Skip flags (vertex format)
+ */
+#define SAVAGE_SKIP_Z		0x01
+#define SAVAGE_SKIP_W		0x02
+#define SAVAGE_SKIP_C0		0x04
+#define SAVAGE_SKIP_C1		0x08
+#define SAVAGE_SKIP_S0		0x10
+#define SAVAGE_SKIP_T0		0x20
+#define SAVAGE_SKIP_ST0		0x30
+#define SAVAGE_SKIP_S1		0x40
+#define SAVAGE_SKIP_T1		0x80
+#define SAVAGE_SKIP_ST1		0xc0
+#define SAVAGE_SKIP_ALL_S3D	0x3f
+#define SAVAGE_SKIP_ALL_S4	0xff
+
+/* Buffer names for clear command
+ */
+#define SAVAGE_FRONT		0x1
+#define SAVAGE_BACK		0x2
+#define SAVAGE_DEPTH		0x4
+
+/* 64-bit command header
+ */
+union drm_savage_cmd_header {
+	struct {
+		unsigned char cmd;	/* command */
+		unsigned char pad0;
+		unsigned short pad1;
+		unsigned short pad2;
+		unsigned short pad3;
+	} cmd; /* generic */
+	struct {
+		unsigned char cmd;
+		unsigned char global;	/* need idle engine? */
+		unsigned short count;	/* number of consecutive registers */
+		unsigned short start;	/* first register */
+		unsigned short pad3;
+	} state; /* SAVAGE_CMD_STATE */
+	struct {
+		unsigned char cmd;
+		unsigned char prim;	/* primitive type */
+		unsigned short skip;	/* vertex format (skip flags) */
+		unsigned short count;	/* number of vertices */
+		unsigned short start;	/* first vertex in DMA/vertex buffer */
+	} prim; /* SAVAGE_CMD_DMA_PRIM, SAVAGE_CMD_VB_PRIM */
+	struct {
+		unsigned char cmd;
+		unsigned char prim;
+		unsigned short skip;
+		unsigned short count;	/* number of indices that follow */
+		unsigned short pad3;
+	} idx; /* SAVAGE_CMD_DMA_IDX, SAVAGE_CMD_VB_IDX */
+	struct {
+		unsigned char cmd;
+		unsigned char pad0;
+		unsigned short pad1;
+		unsigned int flags;
+	} clear0; /* SAVAGE_CMD_CLEAR */
+	struct {
+		unsigned int mask;
+		unsigned int value;
+	} clear1; /* SAVAGE_CMD_CLEAR data */
+};
+
+#endif
diff --git a/drivers/char/drm/savage_drv.c b/drivers/char/drm/savage_drv.c
new file mode 100644
index 0000000..ac8d270
--- /dev/null
+++ b/drivers/char/drm/savage_drv.c
@@ -0,0 +1,112 @@
+/* savage_drv.c -- Savage driver for Linux
+ *
+ * Copyright 2004  Felix Kuehling
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/config.h>
+#include "drmP.h"
+#include "savage_drm.h"
+#include "savage_drv.h"
+
+#include "drm_pciids.h"
+
+static int postinit( struct drm_device *dev, unsigned long flags )
+{
+	DRM_INFO( "Initialized %s %d.%d.%d %s on minor %d: %s\n",
+		DRIVER_NAME,
+		DRIVER_MAJOR,
+		DRIVER_MINOR,
+		DRIVER_PATCHLEVEL,
+		DRIVER_DATE,
+		dev->primary.minor,
+		pci_pretty_name(dev->pdev)
+		);
+	return 0;
+}
+
+static int version( drm_version_t *version )
+{
+	int len;
+
+	version->version_major = DRIVER_MAJOR;
+	version->version_minor = DRIVER_MINOR;
+	version->version_patchlevel = DRIVER_PATCHLEVEL;
+	DRM_COPY( version->name, DRIVER_NAME );
+	DRM_COPY( version->date, DRIVER_DATE );
+	DRM_COPY( version->desc, DRIVER_DESC );
+	return 0;
+}
+
+static struct pci_device_id pciidlist[] = {
+	savage_PCI_IDS
+};
+
+extern drm_ioctl_desc_t savage_ioctls[];
+extern int savage_max_ioctl;
+
+static struct drm_driver driver = {
+	.driver_features =
+	    DRIVER_USE_AGP | DRIVER_USE_MTRR |
+	    DRIVER_HAVE_DMA | DRIVER_PCI_DMA,
+	.dev_priv_size = sizeof(drm_savage_buf_priv_t),
+	.preinit = savage_preinit,
+	.postinit = postinit,
+	.postcleanup = savage_postcleanup,
+	.reclaim_buffers = savage_reclaim_buffers,
+	.get_map_ofs = drm_core_get_map_ofs,
+	.get_reg_ofs = drm_core_get_reg_ofs,
+	.version = version,
+	.ioctls = savage_ioctls,
+	.dma_ioctl = savage_bci_buffers,
+	.fops = {
+		.owner   = THIS_MODULE,
+		.open	 = drm_open,
+		.release = drm_release,
+		.ioctl	 = drm_ioctl,
+		.mmap	 = drm_mmap,
+		.poll = drm_poll,
+		.fasync  = drm_fasync,
+	},
+	.pci_driver = {
+		.name          = DRIVER_NAME,
+		.id_table      = pciidlist,
+	}
+};
+
+static int __init savage_init(void)
+{
+	driver.num_ioctls = savage_max_ioctl;
+	return drm_init(&driver);
+}
+
+static void __exit savage_exit(void)
+{
+	drm_exit(&driver);
+}
+
+module_init(savage_init);
+module_exit(savage_exit);
+
+MODULE_AUTHOR( DRIVER_AUTHOR );
+MODULE_DESCRIPTION( DRIVER_DESC );
+MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/char/drm/savage_drv.h b/drivers/char/drm/savage_drv.h
new file mode 100644
index 0000000..a454349
--- /dev/null
+++ b/drivers/char/drm/savage_drv.h
@@ -0,0 +1,579 @@
+/* savage_drv.h -- Private header for the savage driver
+ *
+ * Copyright 2004  Felix Kuehling
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __SAVAGE_DRV_H__
+#define __SAVAGE_DRV_H__
+
+#define DRIVER_AUTHOR	"Felix Kuehling"
+
+#define DRIVER_NAME	"savage"
+#define DRIVER_DESC	"Savage3D/MX/IX, Savage4, SuperSavage, Twister, ProSavage[DDR]"
+#define DRIVER_DATE	"20050313"
+
+#define DRIVER_MAJOR		2
+#define DRIVER_MINOR		4
+#define DRIVER_PATCHLEVEL	1
+/* Interface history:
+ *
+ * 1.x   The DRM driver from the VIA/S3 code drop, basically a dummy
+ * 2.0   The first real DRM
+ * 2.1   Scissors registers managed by the DRM, 3D operations clipped by
+ *       cliprects of the cmdbuf ioctl
+ * 2.2   Implemented SAVAGE_CMD_DMA_IDX and SAVAGE_CMD_VB_IDX
+ * 2.3   Event counters used by BCI_EVENT_EMIT/WAIT ioctls are now 32 bits
+ *       wide and thus very long lived (unlikely to ever wrap). The size
+ *       in the struct was 32 bits before, but only 16 bits were used
+ * 2.4   Implemented command DMA. Now drm_savage_init_t.cmd_dma_offset is
+ *       actually used
+ */
+
+typedef struct drm_savage_age {
+	uint16_t event;
+	unsigned int wrap;
+} drm_savage_age_t;
+
+typedef struct drm_savage_buf_priv {
+	struct drm_savage_buf_priv *next;
+	struct drm_savage_buf_priv *prev;
+	drm_savage_age_t age;
+	drm_buf_t *buf;
+} drm_savage_buf_priv_t;
+
+typedef struct drm_savage_dma_page {
+	drm_savage_age_t age;
+	unsigned int used, flushed;
+} drm_savage_dma_page_t;
+#define SAVAGE_DMA_PAGE_SIZE 1024 /* in dwords */
+/* Fake DMA buffer size in bytes. 4 pages. Allows a maximum command
+ * size of 16kbytes or 4k entries. Minimum requirement would be
+ * 10kbytes for 255 40-byte vertices in one drawing command. */
+#define SAVAGE_FAKE_DMA_SIZE (SAVAGE_DMA_PAGE_SIZE*4*4)
+
+/* interesting bits of hardware state that are saved in dev_priv */
+typedef union {
+	struct drm_savage_common_state {
+		uint32_t vbaddr;
+	} common;
+	struct {
+		unsigned char pad[sizeof(struct drm_savage_common_state)];
+		uint32_t texctrl, texaddr;
+		uint32_t scstart, new_scstart;
+		uint32_t scend, new_scend;
+	} s3d;
+	struct {
+		unsigned char pad[sizeof(struct drm_savage_common_state)];
+		uint32_t texdescr, texaddr0, texaddr1;
+		uint32_t drawctrl0, new_drawctrl0;
+		uint32_t drawctrl1, new_drawctrl1;
+	} s4;
+} drm_savage_state_t;
+
+/* these chip tags should match the ones in the 2D driver in savage_regs.h. */
+enum savage_family {
+	S3_UNKNOWN = 0,
+	S3_SAVAGE3D,
+	S3_SAVAGE_MX,
+	S3_SAVAGE4,
+	S3_PROSAVAGE,
+	S3_TWISTER,
+	S3_PROSAVAGEDDR,
+	S3_SUPERSAVAGE,
+	S3_SAVAGE2000,
+	S3_LAST
+};
+
+#define S3_SAVAGE3D_SERIES(chip)  ((chip>=S3_SAVAGE3D) && (chip<=S3_SAVAGE_MX))
+
+#define S3_SAVAGE4_SERIES(chip)  ((chip==S3_SAVAGE4)            \
+                                  || (chip==S3_PROSAVAGE)       \
+                                  || (chip==S3_TWISTER)         \
+                                  || (chip==S3_PROSAVAGEDDR))
+
+#define	S3_SAVAGE_MOBILE_SERIES(chip)	((chip==S3_SAVAGE_MX) || (chip==S3_SUPERSAVAGE))
+
+#define S3_SAVAGE_SERIES(chip)    ((chip>=S3_SAVAGE3D) && (chip<=S3_SAVAGE2000))
+
+#define S3_MOBILE_TWISTER_SERIES(chip)   ((chip==S3_TWISTER)    \
+                                          ||(chip==S3_PROSAVAGEDDR))
+
+/* flags */
+#define SAVAGE_IS_AGP 1
+
+typedef struct drm_savage_private {
+	drm_savage_sarea_t *sarea_priv;
+
+	drm_savage_buf_priv_t head, tail;
+
+	/* who am I? */
+	enum savage_family chipset;
+
+	unsigned int cob_size;
+	unsigned int bci_threshold_lo, bci_threshold_hi;
+	unsigned int dma_type;
+
+	/* frame buffer layout */
+	unsigned int fb_bpp;
+	unsigned int front_offset, front_pitch;
+	unsigned int back_offset, back_pitch;
+	unsigned int depth_bpp;
+	unsigned int depth_offset, depth_pitch;
+
+	/* bitmap descriptors for swap and clear */
+	unsigned int front_bd, back_bd, depth_bd;
+
+	/* local textures */
+	unsigned int texture_offset;
+	unsigned int texture_size;
+
+	/* memory regions in physical memory */
+	drm_local_map_t *sarea;
+	drm_local_map_t *mmio;
+	drm_local_map_t *fb;
+	drm_local_map_t *aperture;
+	drm_local_map_t *status;
+	drm_local_map_t *agp_textures;
+	drm_local_map_t *cmd_dma;
+	drm_local_map_t fake_dma;
+
+	struct {
+		int handle;
+		unsigned long base, size;
+	} mtrr[3];
+
+	/* BCI and status-related stuff */
+	volatile uint32_t *status_ptr, *bci_ptr;
+	uint32_t status_used_mask;
+	uint16_t event_counter;
+	unsigned int event_wrap;
+
+	/* Savage4 command DMA */
+	drm_savage_dma_page_t *dma_pages;
+	unsigned int nr_dma_pages, first_dma_page, current_dma_page;
+	drm_savage_age_t last_dma_age;
+
+	/* saved hw state for global/local check on S3D */
+	uint32_t hw_draw_ctrl, hw_zbuf_ctrl;
+	/* and for scissors (global, so don't emit if not changed) */
+	uint32_t hw_scissors_start, hw_scissors_end;
+
+	drm_savage_state_t state;
+
+	/* after emitting a wait cmd Savage3D needs 63 nops before next DMA */
+	unsigned int waiting;
+
+	/* config/hardware-dependent function pointers */
+	int (*wait_fifo)(struct drm_savage_private *dev_priv, unsigned int n);
+	int (*wait_evnt)(struct drm_savage_private *dev_priv, uint16_t e);
+	/* Err, there is a macro wait_event in include/linux/wait.h.
+	 * Avoid unwanted macro expansion. */
+	void (*emit_clip_rect)(struct drm_savage_private *dev_priv,
+			       drm_clip_rect_t *pbox);
+	void (*dma_flush)(struct drm_savage_private *dev_priv);
+} drm_savage_private_t;
+
+/* ioctls */
+extern int savage_bci_cmdbuf(DRM_IOCTL_ARGS);
+extern int savage_bci_buffers(DRM_IOCTL_ARGS);
+
+/* BCI functions */
+extern uint16_t savage_bci_emit_event(drm_savage_private_t *dev_priv,
+				      unsigned int flags);
+extern void savage_freelist_put(drm_device_t *dev, drm_buf_t *buf);
+extern void savage_dma_reset(drm_savage_private_t *dev_priv);
+extern void savage_dma_wait(drm_savage_private_t *dev_priv, unsigned int page);
+extern uint32_t *savage_dma_alloc(drm_savage_private_t *dev_priv,
+				  unsigned int n);
+extern int savage_preinit(drm_device_t *dev, unsigned long chipset);
+extern int savage_postcleanup(drm_device_t *dev);
+extern int savage_do_cleanup_bci(drm_device_t *dev);
+extern void savage_reclaim_buffers(drm_device_t *dev, DRMFILE filp);
+
+/* state functions */
+extern void savage_emit_clip_rect_s3d(drm_savage_private_t *dev_priv,
+				      drm_clip_rect_t *pbox);
+extern void savage_emit_clip_rect_s4(drm_savage_private_t *dev_priv,
+				     drm_clip_rect_t *pbox);
+
+#define SAVAGE_FB_SIZE_S3	0x01000000	/*  16MB */
+#define SAVAGE_FB_SIZE_S4	0x02000000	/*  32MB */
+#define SAVAGE_MMIO_SIZE        0x00080000	/* 512kB */
+#define SAVAGE_APERTURE_OFFSET  0x02000000	/*  32MB */
+#define SAVAGE_APERTURE_SIZE    0x05000000	/* 5 tiled surfaces, 16MB each */
+
+#define SAVAGE_BCI_OFFSET       0x00010000      /* offset of the BCI region
+						 * inside the MMIO region */
+#define SAVAGE_BCI_FIFO_SIZE	32		/* number of entries in on-chip
+						 * BCI FIFO */
+
+/*
+ * MMIO registers
+ */
+#define SAVAGE_STATUS_WORD0		0x48C00
+#define SAVAGE_STATUS_WORD1		0x48C04
+#define SAVAGE_ALT_STATUS_WORD0 	0x48C60
+
+#define SAVAGE_FIFO_USED_MASK_S3D	0x0001ffff
+#define SAVAGE_FIFO_USED_MASK_S4	0x001fffff
+
+/* Copied from savage_bci.h in the 2D driver with some renaming. */
+
+/* Bitmap descriptors */
+#define SAVAGE_BD_STRIDE_SHIFT 0
+#define SAVAGE_BD_BPP_SHIFT   16
+#define SAVAGE_BD_TILE_SHIFT  24
+#define SAVAGE_BD_BW_DISABLE  (1<<28)
+/* common: */
+#define	SAVAGE_BD_TILE_LINEAR		0
+/* savage4, MX, IX, 3D */
+#define	SAVAGE_BD_TILE_16BPP		2
+#define	SAVAGE_BD_TILE_32BPP		3
+/* twister, prosavage, DDR, supersavage, 2000 */
+#define	SAVAGE_BD_TILE_DEST		1
+#define	SAVAGE_BD_TILE_TEXTURE		2
+/* GBD - BCI enable */
+/* savage4, MX, IX, 3D */
+#define SAVAGE_GBD_BCI_ENABLE                    8
+/* twister, prosavage, DDR, supersavage, 2000 */
+#define SAVAGE_GBD_BCI_ENABLE_TWISTER            0
+
+#define SAVAGE_GBD_BIG_ENDIAN                    4
+#define SAVAGE_GBD_LITTLE_ENDIAN                 0
+#define SAVAGE_GBD_64                            1
+
+/*  Global Bitmap Descriptor */
+#define SAVAGE_BCI_GLB_BD_LOW             0x8168
+#define SAVAGE_BCI_GLB_BD_HIGH            0x816C
+
+/*
+ * BCI registers
+ */
+/* Savage4/Twister/ProSavage 3D registers */
+#define SAVAGE_DRAWLOCALCTRL_S4		0x1e
+#define SAVAGE_TEXPALADDR_S4		0x1f
+#define SAVAGE_TEXCTRL0_S4		0x20
+#define SAVAGE_TEXCTRL1_S4		0x21
+#define SAVAGE_TEXADDR0_S4		0x22
+#define SAVAGE_TEXADDR1_S4		0x23
+#define SAVAGE_TEXBLEND0_S4		0x24
+#define SAVAGE_TEXBLEND1_S4		0x25
+#define SAVAGE_TEXXPRCLR_S4		0x26 /* never used */
+#define SAVAGE_TEXDESCR_S4		0x27
+#define SAVAGE_FOGTABLE_S4		0x28
+#define SAVAGE_FOGCTRL_S4		0x30
+#define SAVAGE_STENCILCTRL_S4		0x31
+#define SAVAGE_ZBUFCTRL_S4		0x32
+#define SAVAGE_ZBUFOFF_S4		0x33
+#define SAVAGE_DESTCTRL_S4		0x34
+#define SAVAGE_DRAWCTRL0_S4		0x35
+#define SAVAGE_DRAWCTRL1_S4		0x36
+#define SAVAGE_ZWATERMARK_S4		0x37
+#define SAVAGE_DESTTEXRWWATERMARK_S4	0x38
+#define SAVAGE_TEXBLENDCOLOR_S4		0x39
+/* Savage3D/MX/IX 3D registers */
+#define SAVAGE_TEXPALADDR_S3D		0x18
+#define SAVAGE_TEXXPRCLR_S3D		0x19 /* never used */
+#define SAVAGE_TEXADDR_S3D		0x1A
+#define SAVAGE_TEXDESCR_S3D		0x1B
+#define SAVAGE_TEXCTRL_S3D		0x1C
+#define SAVAGE_FOGTABLE_S3D		0x20
+#define SAVAGE_FOGCTRL_S3D		0x30
+#define SAVAGE_DRAWCTRL_S3D		0x31
+#define SAVAGE_ZBUFCTRL_S3D		0x32
+#define SAVAGE_ZBUFOFF_S3D		0x33
+#define SAVAGE_DESTCTRL_S3D		0x34
+#define SAVAGE_SCSTART_S3D		0x35
+#define SAVAGE_SCEND_S3D		0x36
+#define SAVAGE_ZWATERMARK_S3D		0x37 
+#define SAVAGE_DESTTEXRWWATERMARK_S3D	0x38
+/* common stuff */
+#define SAVAGE_VERTBUFADDR		0x3e
+#define SAVAGE_BITPLANEWTMASK		0xd7
+#define SAVAGE_DMABUFADDR		0x51
+
+/* texture enable bits (needed for tex addr checking) */
+#define SAVAGE_TEXCTRL_TEXEN_MASK	0x00010000 /* S3D */
+#define SAVAGE_TEXDESCR_TEX0EN_MASK	0x02000000 /* S4 */
+#define SAVAGE_TEXDESCR_TEX1EN_MASK	0x04000000 /* S4 */
+
+/* Global fields in Savage4/Twister/ProSavage 3D registers:
+ *
+ * All texture registers and DrawLocalCtrl are local. All other
+ * registers are global. */
+
+/* Global fields in Savage3D/MX/IX 3D registers:
+ *
+ * All texture registers are local. DrawCtrl and ZBufCtrl are
+ * partially local. All other registers are global.
+ *
+ * DrawCtrl global fields: cullMode, alphaTestCmpFunc, alphaTestEn, alphaRefVal
+ * ZBufCtrl global fields: zCmpFunc, zBufEn
+ */
+#define SAVAGE_DRAWCTRL_S3D_GLOBAL	0x03f3c00c
+#define SAVAGE_ZBUFCTRL_S3D_GLOBAL	0x00000027
+
+/* Masks for scissor bits (drawCtrl[01] on s4, scissorStart/End on s3d)
+ */
+#define SAVAGE_SCISSOR_MASK_S4		0x00fff7ff
+#define SAVAGE_SCISSOR_MASK_S3D		0x07ff07ff
+
+/*
+ * BCI commands
+ */
+#define BCI_CMD_NOP                  0x40000000
+#define BCI_CMD_RECT                 0x48000000
+#define BCI_CMD_RECT_XP              0x01000000
+#define BCI_CMD_RECT_YP              0x02000000
+#define BCI_CMD_SCANLINE             0x50000000
+#define BCI_CMD_LINE                 0x5C000000
+#define BCI_CMD_LINE_LAST_PIXEL      0x58000000
+#define BCI_CMD_BYTE_TEXT            0x63000000
+#define BCI_CMD_NT_BYTE_TEXT         0x67000000
+#define BCI_CMD_BIT_TEXT             0x6C000000
+#define BCI_CMD_GET_ROP(cmd)         (((cmd) >> 16) & 0xFF)
+#define BCI_CMD_SET_ROP(cmd, rop)    ((cmd) |= ((rop & 0xFF) << 16))
+#define BCI_CMD_SEND_COLOR           0x00008000
+
+#define BCI_CMD_CLIP_NONE            0x00000000
+#define BCI_CMD_CLIP_CURRENT         0x00002000
+#define BCI_CMD_CLIP_LR              0x00004000
+#define BCI_CMD_CLIP_NEW             0x00006000
+
+#define BCI_CMD_DEST_GBD             0x00000000
+#define BCI_CMD_DEST_PBD             0x00000800
+#define BCI_CMD_DEST_PBD_NEW         0x00000C00
+#define BCI_CMD_DEST_SBD             0x00001000
+#define BCI_CMD_DEST_SBD_NEW         0x00001400
+
+#define BCI_CMD_SRC_TRANSPARENT      0x00000200
+#define BCI_CMD_SRC_SOLID            0x00000000
+#define BCI_CMD_SRC_GBD              0x00000020
+#define BCI_CMD_SRC_COLOR            0x00000040
+#define BCI_CMD_SRC_MONO             0x00000060
+#define BCI_CMD_SRC_PBD_COLOR        0x00000080
+#define BCI_CMD_SRC_PBD_MONO         0x000000A0
+#define BCI_CMD_SRC_PBD_COLOR_NEW    0x000000C0
+#define BCI_CMD_SRC_PBD_MONO_NEW     0x000000E0
+#define BCI_CMD_SRC_SBD_COLOR        0x00000100
+#define BCI_CMD_SRC_SBD_MONO         0x00000120
+#define BCI_CMD_SRC_SBD_COLOR_NEW    0x00000140
+#define BCI_CMD_SRC_SBD_MONO_NEW     0x00000160
+
+#define BCI_CMD_PAT_TRANSPARENT      0x00000010
+#define BCI_CMD_PAT_NONE             0x00000000
+#define BCI_CMD_PAT_COLOR            0x00000002
+#define BCI_CMD_PAT_MONO             0x00000003
+#define BCI_CMD_PAT_PBD_COLOR        0x00000004
+#define BCI_CMD_PAT_PBD_MONO         0x00000005
+#define BCI_CMD_PAT_PBD_COLOR_NEW    0x00000006
+#define BCI_CMD_PAT_PBD_MONO_NEW     0x00000007
+#define BCI_CMD_PAT_SBD_COLOR        0x00000008
+#define BCI_CMD_PAT_SBD_MONO         0x00000009
+#define BCI_CMD_PAT_SBD_COLOR_NEW    0x0000000A
+#define BCI_CMD_PAT_SBD_MONO_NEW     0x0000000B
+
+#define BCI_BD_BW_DISABLE            0x10000000
+#define BCI_BD_TILE_MASK             0x03000000
+#define BCI_BD_TILE_NONE             0x00000000
+#define BCI_BD_TILE_16               0x02000000
+#define BCI_BD_TILE_32               0x03000000
+#define BCI_BD_GET_BPP(bd)           (((bd) >> 16) & 0xFF)
+#define BCI_BD_SET_BPP(bd, bpp)      ((bd) |= (((bpp) & 0xFF) << 16))
+#define BCI_BD_GET_STRIDE(bd)        ((bd) & 0xFFFF)
+#define BCI_BD_SET_STRIDE(bd, st)    ((bd) |= ((st) & 0xFFFF))
+
+#define BCI_CMD_SET_REGISTER            0x96000000
+
+#define BCI_CMD_WAIT                    0xC0000000
+#define BCI_CMD_WAIT_3D                 0x00010000
+#define BCI_CMD_WAIT_2D                 0x00020000
+
+#define BCI_CMD_UPDATE_EVENT_TAG        0x98000000
+
+#define BCI_CMD_DRAW_PRIM               0x80000000
+#define BCI_CMD_DRAW_INDEXED_PRIM       0x88000000
+#define BCI_CMD_DRAW_CONT               0x01000000
+#define BCI_CMD_DRAW_TRILIST            0x00000000
+#define BCI_CMD_DRAW_TRISTRIP           0x02000000
+#define BCI_CMD_DRAW_TRIFAN             0x04000000
+#define BCI_CMD_DRAW_SKIPFLAGS          0x000000ff
+#define BCI_CMD_DRAW_NO_Z		0x00000001
+#define BCI_CMD_DRAW_NO_W		0x00000002
+#define BCI_CMD_DRAW_NO_CD		0x00000004
+#define BCI_CMD_DRAW_NO_CS		0x00000008
+#define BCI_CMD_DRAW_NO_U0		0x00000010
+#define BCI_CMD_DRAW_NO_V0		0x00000020
+#define BCI_CMD_DRAW_NO_UV0		0x00000030
+#define BCI_CMD_DRAW_NO_U1		0x00000040
+#define BCI_CMD_DRAW_NO_V1		0x00000080
+#define BCI_CMD_DRAW_NO_UV1		0x000000c0
+
+#define BCI_CMD_DMA			0xa8000000
+
+#define BCI_W_H(w, h)                ((((h) << 16) | (w)) & 0x0FFF0FFF)
+#define BCI_X_Y(x, y)                ((((y) << 16) | (x)) & 0x0FFF0FFF)
+#define BCI_X_W(x, y)                ((((w) << 16) | (x)) & 0x0FFF0FFF)
+#define BCI_CLIP_LR(l, r)            ((((r) << 16) | (l)) & 0x0FFF0FFF)
+#define BCI_CLIP_TL(t, l)            ((((t) << 16) | (l)) & 0x0FFF0FFF)
+#define BCI_CLIP_BR(b, r)            ((((b) << 16) | (r)) & 0x0FFF0FFF)
+
+#define BCI_LINE_X_Y(x, y)           (((y) << 16) | ((x) & 0xFFFF))
+#define BCI_LINE_STEPS(diag, axi)    (((axi) << 16) | ((diag) & 0xFFFF))
+#define BCI_LINE_MISC(maj, ym, xp, yp, err) \
+	(((maj) & 0x1FFF) | \
+	((ym) ? 1<<13 : 0) | \
+	((xp) ? 1<<14 : 0) | \
+	((yp) ? 1<<15 : 0) | \
+	((err) << 16))
+
+/*
+ * common commands
+ */
+#define BCI_SET_REGISTERS( first, n )			\
+	BCI_WRITE(BCI_CMD_SET_REGISTER |		\
+		  ((uint32_t)(n) & 0xff) << 16 |	\
+		  ((uint32_t)(first) & 0xffff))
+#define DMA_SET_REGISTERS( first, n )			\
+	DMA_WRITE(BCI_CMD_SET_REGISTER |		\
+		  ((uint32_t)(n) & 0xff) << 16 |	\
+		  ((uint32_t)(first) & 0xffff))
+
+#define BCI_DRAW_PRIMITIVE(n, type, skip)         \
+        BCI_WRITE(BCI_CMD_DRAW_PRIM | (type) | (skip) | \
+		  ((n) << 16))
+#define DMA_DRAW_PRIMITIVE(n, type, skip)         \
+        DMA_WRITE(BCI_CMD_DRAW_PRIM | (type) | (skip) | \
+		  ((n) << 16))
+
+#define BCI_DRAW_INDICES_S3D(n, type, i0)         \
+        BCI_WRITE(BCI_CMD_DRAW_INDEXED_PRIM | (type) |  \
+		  ((n) << 16) | (i0))
+
+#define BCI_DRAW_INDICES_S4(n, type, skip)        \
+        BCI_WRITE(BCI_CMD_DRAW_INDEXED_PRIM | (type) |  \
+                  (skip) | ((n) << 16))
+
+#define BCI_DMA(n)	\
+	BCI_WRITE(BCI_CMD_DMA | (((n) >> 1) - 1))
+
+/*
+ * access to MMIO
+ */
+#define SAVAGE_READ(reg)	DRM_READ32(  dev_priv->mmio, (reg) )
+#define SAVAGE_WRITE(reg)	DRM_WRITE32( dev_priv->mmio, (reg) )
+
+/*
+ * access to the burst command interface (BCI)
+ */
+#define SAVAGE_BCI_DEBUG 1
+
+#define BCI_LOCALS    volatile uint32_t *bci_ptr;
+
+#define BEGIN_BCI( n ) do {			\
+	dev_priv->wait_fifo(dev_priv, (n));	\
+	bci_ptr = dev_priv->bci_ptr;		\
+} while(0)
+
+#define BCI_WRITE( val ) *bci_ptr++ = (uint32_t)(val)
+
+#define BCI_COPY_FROM_USER(src,n) do {				\
+    unsigned int i;						\
+    for (i = 0; i < n; ++i) {					\
+	uint32_t val;						\
+	DRM_GET_USER_UNCHECKED(val, &((uint32_t*)(src))[i]);	\
+	BCI_WRITE(val);						\
+    }								\
+} while(0)
+
+/*
+ * command DMA support
+ */
+#define SAVAGE_DMA_DEBUG 1
+
+#define DMA_LOCALS   uint32_t *dma_ptr;
+
+#define BEGIN_DMA( n ) do {						\
+	unsigned int cur = dev_priv->current_dma_page;			\
+	unsigned int rest = SAVAGE_DMA_PAGE_SIZE -			\
+		dev_priv->dma_pages[cur].used;				\
+	if ((n) > rest) {						\
+		dma_ptr = savage_dma_alloc(dev_priv, (n));		\
+	} else { /* fast path for small allocations */			\
+		dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle +	\
+			cur * SAVAGE_DMA_PAGE_SIZE +			\
+			dev_priv->dma_pages[cur].used;			\
+		if (dev_priv->dma_pages[cur].used == 0)			\
+			savage_dma_wait(dev_priv, cur);			\
+		dev_priv->dma_pages[cur].used += (n);			\
+	}								\
+} while(0)
+
+#define DMA_WRITE( val ) *dma_ptr++ = (uint32_t)(val)
+
+#define DMA_COPY_FROM_USER(src,n) do {				\
+	DRM_COPY_FROM_USER_UNCHECKED(dma_ptr, (src), (n)*4);	\
+	dma_ptr += n;						\
+} while(0)
+
+#if SAVAGE_DMA_DEBUG
+#define DMA_COMMIT() do {						\
+	unsigned int cur = dev_priv->current_dma_page;			\
+	uint32_t *expected = (uint32_t *)dev_priv->cmd_dma->handle +	\
+			cur * SAVAGE_DMA_PAGE_SIZE +			\
+			dev_priv->dma_pages[cur].used;			\
+	if (dma_ptr != expected) {					\
+		DRM_ERROR("DMA allocation and use don't match: "	\
+			  "%p != %p\n", expected, dma_ptr);		\
+		savage_dma_reset(dev_priv);				\
+	}								\
+} while(0)
+#else
+#define DMA_COMMIT() do {/* nothing */} while(0)
+#endif
+
+#define DMA_FLUSH() dev_priv->dma_flush(dev_priv)
+
+/* Buffer aging via event tag
+ */
+
+#define UPDATE_EVENT_COUNTER( ) do {			\
+	if (dev_priv->status_ptr) {			\
+		uint16_t count;				\
+		/* coordinate with Xserver */		\
+		count = dev_priv->status_ptr[1023];	\
+		if (count < dev_priv->event_counter)	\
+			dev_priv->event_wrap++;		\
+		dev_priv->event_counter = count;	\
+	}						\
+} while(0)
+
+#define SET_AGE( age, e, w ) do {	\
+	(age)->event = e;		\
+	(age)->wrap = w;		\
+} while(0)
+
+#define TEST_AGE( age, e, w )				\
+	( (age)->wrap < (w) || ( (age)->wrap == (w) && (age)->event <= (e) ) )
+
+#endif /* __SAVAGE_DRV_H__ */
diff --git a/drivers/char/drm/savage_state.c b/drivers/char/drm/savage_state.c
new file mode 100644
index 0000000..475695a
--- /dev/null
+++ b/drivers/char/drm/savage_state.c
@@ -0,0 +1,1146 @@
+/* savage_state.c -- State and drawing support for Savage
+ *
+ * Copyright 2004  Felix Kuehling
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "drmP.h"
+#include "savage_drm.h"
+#include "savage_drv.h"
+
+void savage_emit_clip_rect_s3d(drm_savage_private_t *dev_priv,
+			       drm_clip_rect_t *pbox)
+{
+	uint32_t scstart = dev_priv->state.s3d.new_scstart;
+	uint32_t scend   = dev_priv->state.s3d.new_scend;
+	scstart = (scstart & ~SAVAGE_SCISSOR_MASK_S3D) |
+		((uint32_t)pbox->x1 & 0x000007ff) | 
+		(((uint32_t)pbox->y1 << 16) & 0x07ff0000);
+	scend   = (scend   & ~SAVAGE_SCISSOR_MASK_S3D) |
+		(((uint32_t)pbox->x2-1) & 0x000007ff) |
+		((((uint32_t)pbox->y2-1) << 16) & 0x07ff0000);
+	if (scstart != dev_priv->state.s3d.scstart ||
+	    scend   != dev_priv->state.s3d.scend) {
+		DMA_LOCALS;
+		BEGIN_DMA(4);
+		DMA_WRITE(BCI_CMD_WAIT|BCI_CMD_WAIT_3D);
+		DMA_SET_REGISTERS(SAVAGE_SCSTART_S3D, 2);
+		DMA_WRITE(scstart);
+		DMA_WRITE(scend);
+		dev_priv->state.s3d.scstart = scstart;
+		dev_priv->state.s3d.scend   = scend;
+		dev_priv->waiting = 1;
+		DMA_COMMIT();
+	}
+}
+
+void savage_emit_clip_rect_s4(drm_savage_private_t *dev_priv,
+			      drm_clip_rect_t *pbox)
+{
+	uint32_t drawctrl0 = dev_priv->state.s4.new_drawctrl0;
+	uint32_t drawctrl1 = dev_priv->state.s4.new_drawctrl1;
+	drawctrl0 = (drawctrl0 & ~SAVAGE_SCISSOR_MASK_S4) |
+		((uint32_t)pbox->x1 & 0x000007ff) |
+		(((uint32_t)pbox->y1 << 12) & 0x00fff000);
+	drawctrl1 = (drawctrl1 & ~SAVAGE_SCISSOR_MASK_S4) |
+		(((uint32_t)pbox->x2-1) & 0x000007ff) |
+		((((uint32_t)pbox->y2-1) << 12) & 0x00fff000);
+	if (drawctrl0 != dev_priv->state.s4.drawctrl0 ||
+	    drawctrl1 != dev_priv->state.s4.drawctrl1) {
+		DMA_LOCALS;
+		BEGIN_DMA(4);
+		DMA_WRITE(BCI_CMD_WAIT|BCI_CMD_WAIT_3D);
+		DMA_SET_REGISTERS(SAVAGE_DRAWCTRL0_S4, 2);
+		DMA_WRITE(drawctrl0);
+		DMA_WRITE(drawctrl1);
+		dev_priv->state.s4.drawctrl0 = drawctrl0;
+		dev_priv->state.s4.drawctrl1 = drawctrl1;
+		dev_priv->waiting = 1;
+		DMA_COMMIT();
+	}
+}
+
+static int savage_verify_texaddr(drm_savage_private_t *dev_priv, int unit,
+				 uint32_t addr)
+{
+	if ((addr & 6) != 2) { /* reserved bits */
+		DRM_ERROR("bad texAddr%d %08x (reserved bits)\n", unit, addr);
+		return DRM_ERR(EINVAL);
+	}
+	if (!(addr & 1)) { /* local */
+		addr &= ~7;
+		if (addr <  dev_priv->texture_offset ||
+		    addr >= dev_priv->texture_offset+dev_priv->texture_size) {
+			DRM_ERROR("bad texAddr%d %08x (local addr out of range)\n",
+				  unit, addr);
+			return DRM_ERR(EINVAL);
+		}
+	} else { /* AGP */
+		if (!dev_priv->agp_textures) {
+			DRM_ERROR("bad texAddr%d %08x (AGP not available)\n",
+				  unit, addr);
+			return DRM_ERR(EINVAL);
+		}
+		addr &= ~7;
+		if (addr < dev_priv->agp_textures->offset ||
+		    addr >= (dev_priv->agp_textures->offset +
+			     dev_priv->agp_textures->size)) {
+			DRM_ERROR("bad texAddr%d %08x (AGP addr out of range)\n",
+				  unit, addr);
+			return DRM_ERR(EINVAL);
+		}
+	}
+	return 0;
+}
+
+#define SAVE_STATE(reg,where)			\
+	if(start <= reg && start+count > reg)	\
+		DRM_GET_USER_UNCHECKED(dev_priv->state.where, &regs[reg-start])
+#define SAVE_STATE_MASK(reg,where,mask) do {			\
+	if(start <= reg && start+count > reg) {			\
+		uint32_t tmp;					\
+		DRM_GET_USER_UNCHECKED(tmp, &regs[reg-start]);	\
+		dev_priv->state.where = (tmp & (mask)) |	\
+			(dev_priv->state.where & ~(mask));	\
+	}							\
+} while (0)
+static int savage_verify_state_s3d(drm_savage_private_t *dev_priv,
+				   unsigned int start, unsigned int count,
+				   const uint32_t __user *regs)
+{
+	if (start < SAVAGE_TEXPALADDR_S3D ||
+	    start+count-1 > SAVAGE_DESTTEXRWWATERMARK_S3D) {
+		DRM_ERROR("invalid register range (0x%04x-0x%04x)\n",
+			  start, start+count-1);
+		return DRM_ERR(EINVAL);
+	}
+
+	SAVE_STATE_MASK(SAVAGE_SCSTART_S3D, s3d.new_scstart,
+			~SAVAGE_SCISSOR_MASK_S3D);
+	SAVE_STATE_MASK(SAVAGE_SCEND_S3D, s3d.new_scend,
+			~SAVAGE_SCISSOR_MASK_S3D);
+
+	/* if any texture regs were changed ... */
+	if (start <= SAVAGE_TEXCTRL_S3D &&
+	    start+count > SAVAGE_TEXPALADDR_S3D) {
+		/* ... check texture state */
+		SAVE_STATE(SAVAGE_TEXCTRL_S3D, s3d.texctrl);
+		SAVE_STATE(SAVAGE_TEXADDR_S3D, s3d.texaddr);
+		if (dev_priv->state.s3d.texctrl & SAVAGE_TEXCTRL_TEXEN_MASK)
+			return savage_verify_texaddr(
+				dev_priv, 0, dev_priv->state.s3d.texaddr);
+	}
+
+	return 0;
+}
+
+static int savage_verify_state_s4(drm_savage_private_t *dev_priv,
+				  unsigned int start, unsigned int count,
+				  const uint32_t __user *regs)
+{
+	int ret = 0;
+
+	if (start < SAVAGE_DRAWLOCALCTRL_S4 ||
+	    start+count-1 > SAVAGE_TEXBLENDCOLOR_S4) {
+		DRM_ERROR("invalid register range (0x%04x-0x%04x)\n",
+			  start, start+count-1);
+		return DRM_ERR(EINVAL);
+	}
+
+	SAVE_STATE_MASK(SAVAGE_DRAWCTRL0_S4, s4.new_drawctrl0,
+			~SAVAGE_SCISSOR_MASK_S4);
+	SAVE_STATE_MASK(SAVAGE_DRAWCTRL1_S4, s4.new_drawctrl1,
+			~SAVAGE_SCISSOR_MASK_S4);
+
+	/* if any texture regs were changed ... */
+	if (start <= SAVAGE_TEXDESCR_S4 &&
+	    start+count > SAVAGE_TEXPALADDR_S4) {
+		/* ... check texture state */
+		SAVE_STATE(SAVAGE_TEXDESCR_S4, s4.texdescr);
+		SAVE_STATE(SAVAGE_TEXADDR0_S4, s4.texaddr0);
+		SAVE_STATE(SAVAGE_TEXADDR1_S4, s4.texaddr1);
+		if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX0EN_MASK)
+			ret |= savage_verify_texaddr(
+				dev_priv, 0, dev_priv->state.s4.texaddr0);
+		if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX1EN_MASK)
+			ret |= savage_verify_texaddr(
+				dev_priv, 1, dev_priv->state.s4.texaddr1);
+	}
+
+	return ret;
+}
+#undef SAVE_STATE
+#undef SAVE_STATE_MASK
+
+static int savage_dispatch_state(drm_savage_private_t *dev_priv,
+				 const drm_savage_cmd_header_t *cmd_header,
+				 const uint32_t __user *regs)
+{
+	unsigned int count = cmd_header->state.count;
+	unsigned int start = cmd_header->state.start;
+	unsigned int count2 = 0;
+	unsigned int bci_size;
+	int ret;
+	DMA_LOCALS;
+
+	if (!count)
+		return 0;
+
+	if (DRM_VERIFYAREA_READ(regs, count*4))
+		return DRM_ERR(EFAULT);
+
+	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+		ret = savage_verify_state_s3d(dev_priv, start, count, regs);
+		if (ret != 0)
+			return ret;
+		/* scissor regs are emitted in savage_dispatch_draw */
+		if (start < SAVAGE_SCSTART_S3D) {
+			if (start+count > SAVAGE_SCEND_S3D+1)
+				count2 = count - (SAVAGE_SCEND_S3D+1 - start);
+			if (start+count > SAVAGE_SCSTART_S3D)
+				count = SAVAGE_SCSTART_S3D - start;
+		} else if (start <= SAVAGE_SCEND_S3D) {
+			if (start+count > SAVAGE_SCEND_S3D+1) {
+				count -= SAVAGE_SCEND_S3D+1 - start;
+				start = SAVAGE_SCEND_S3D+1;
+			} else
+				return 0;
+		}
+	} else {
+		ret = savage_verify_state_s4(dev_priv, start, count, regs);
+		if (ret != 0)
+			return ret;
+		/* scissor regs are emitted in savage_dispatch_draw */
+		if (start < SAVAGE_DRAWCTRL0_S4) {
+			if (start+count > SAVAGE_DRAWCTRL1_S4+1)
+				count2 = count - (SAVAGE_DRAWCTRL1_S4+1 - start);
+			if (start+count > SAVAGE_DRAWCTRL0_S4)
+				count = SAVAGE_DRAWCTRL0_S4 - start;
+		} else if (start <= SAVAGE_DRAWCTRL1_S4) {
+			if (start+count > SAVAGE_DRAWCTRL1_S4+1) {
+				count -= SAVAGE_DRAWCTRL1_S4+1 - start;
+				start = SAVAGE_DRAWCTRL1_S4+1;
+			} else
+				return 0;
+		}
+	}
+
+	bci_size = count + (count+254)/255 + count2 + (count2+254)/255;
+
+	if (cmd_header->state.global) {
+		BEGIN_DMA(bci_size+1);
+		DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
+		dev_priv->waiting = 1;
+	} else {
+		BEGIN_DMA(bci_size);
+	}
+
+	do {
+		while (count > 0) {
+			unsigned int n = count < 255 ? count : 255;
+			DMA_SET_REGISTERS(start, n);
+			DMA_COPY_FROM_USER(regs, n);
+			count -= n;
+			start += n;
+			regs += n;
+		}
+		start += 2;
+		regs += 2;
+		count = count2;
+		count2 = 0;
+	} while (count);
+
+	DMA_COMMIT();
+
+	return 0;
+}
+
+static int savage_dispatch_dma_prim(drm_savage_private_t *dev_priv,
+				    const drm_savage_cmd_header_t *cmd_header,
+				    const drm_buf_t *dmabuf)
+{
+	unsigned char reorder = 0;
+	unsigned int prim = cmd_header->prim.prim;
+	unsigned int skip = cmd_header->prim.skip;
+	unsigned int n = cmd_header->prim.count;
+	unsigned int start = cmd_header->prim.start;
+	unsigned int i;
+	BCI_LOCALS;
+
+	if (!dmabuf) {
+	    DRM_ERROR("called without dma buffers!\n");
+	    return DRM_ERR(EINVAL);
+	}
+
+	if (!n)
+		return 0;
+
+	switch (prim) {
+	case SAVAGE_PRIM_TRILIST_201:
+		reorder = 1;
+		prim = SAVAGE_PRIM_TRILIST;
+	case SAVAGE_PRIM_TRILIST:
+		if (n % 3 != 0) {
+			DRM_ERROR("wrong number of vertices %u in TRILIST\n",
+				  n);
+			return DRM_ERR(EINVAL);
+		}
+		break;
+	case SAVAGE_PRIM_TRISTRIP:
+	case SAVAGE_PRIM_TRIFAN:
+		if (n < 3) {
+			DRM_ERROR("wrong number of vertices %u in TRIFAN/STRIP\n",
+				  n);
+			return DRM_ERR(EINVAL);
+		}
+		break;
+	default:
+		DRM_ERROR("invalid primitive type %u\n", prim);
+		return DRM_ERR(EINVAL);
+	}
+
+	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+		if (skip != 0) {
+			DRM_ERROR("invalid skip flags 0x%04x for DMA\n",
+				  skip);
+			return DRM_ERR(EINVAL);
+		}
+	} else {
+		unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) -
+			(skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) -
+			(skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1);
+		if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) {
+			DRM_ERROR("invalid skip flags 0x%04x for DMA\n",
+				  skip);
+			return DRM_ERR(EINVAL);
+		}
+		if (reorder) {
+			DRM_ERROR("TRILIST_201 used on Savage4 hardware\n");
+			return DRM_ERR(EINVAL);
+		}
+	}
+
+	if (start + n > dmabuf->total/32) {
+		DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n",
+			  start, start + n - 1, dmabuf->total/32);
+		return DRM_ERR(EINVAL);
+	}
+
+	/* Vertex DMA doesn't work with command DMA at the same time,
+	 * so we use BCI_... to submit commands here. Flush buffered
+	 * faked DMA first. */
+	DMA_FLUSH();
+
+	if (dmabuf->bus_address != dev_priv->state.common.vbaddr) {
+		BEGIN_BCI(2);
+		BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1);
+		BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type);
+		dev_priv->state.common.vbaddr = dmabuf->bus_address;
+	}
+	if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) {
+		/* Workaround for what looks like a hardware bug. If a
+		 * WAIT_3D_IDLE was emitted some time before the
+		 * indexed drawing command then the engine will lock
+		 * up. There are two known workarounds:
+		 * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */
+		BEGIN_BCI(63);
+		for (i = 0; i < 63; ++i)
+			BCI_WRITE(BCI_CMD_WAIT);
+		dev_priv->waiting = 0;
+	}
+
+	prim <<= 25;
+	while (n != 0) {
+		/* Can emit up to 255 indices (85 triangles) at once. */
+		unsigned int count = n > 255 ? 255 : n;
+		if (reorder) {
+			/* Need to reorder indices for correct flat
+			 * shading while preserving the clock sense
+			 * for correct culling. Only on Savage3D. */
+			int reorder[3] = {-1, -1, -1};
+			reorder[start%3] = 2;
+
+			BEGIN_BCI((count+1+1)/2);
+			BCI_DRAW_INDICES_S3D(count, prim, start+2);
+
+			for (i = start+1; i+1 < start+count; i += 2)
+				BCI_WRITE((i + reorder[i % 3]) |
+					  ((i+1 + reorder[(i+1) % 3]) << 16));
+			if (i < start+count)
+				BCI_WRITE(i + reorder[i%3]);
+		} else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+			BEGIN_BCI((count+1+1)/2);
+			BCI_DRAW_INDICES_S3D(count, prim, start);
+
+			for (i = start+1; i+1 < start+count; i += 2)
+				BCI_WRITE(i | ((i+1) << 16));
+			if (i < start+count)
+				BCI_WRITE(i);
+		} else {
+			BEGIN_BCI((count+2+1)/2);
+			BCI_DRAW_INDICES_S4(count, prim, skip);
+
+			for (i = start; i+1 < start+count; i += 2)
+				BCI_WRITE(i | ((i+1) << 16));
+			if (i < start+count)
+				BCI_WRITE(i);
+		}
+
+		start += count;
+		n -= count;
+
+		prim |= BCI_CMD_DRAW_CONT;
+	}
+
+	return 0;
+}
+
+static int savage_dispatch_vb_prim(drm_savage_private_t *dev_priv,
+				   const drm_savage_cmd_header_t *cmd_header,
+				   const uint32_t __user *vtxbuf,
+				   unsigned int vb_size,
+				   unsigned int vb_stride)
+{
+	unsigned char reorder = 0;
+	unsigned int prim = cmd_header->prim.prim;
+	unsigned int skip = cmd_header->prim.skip;
+	unsigned int n = cmd_header->prim.count;
+	unsigned int start = cmd_header->prim.start;
+	unsigned int vtx_size;
+	unsigned int i;
+	DMA_LOCALS;
+
+	if (!n)
+		return 0;
+
+	switch (prim) {
+	case SAVAGE_PRIM_TRILIST_201:
+		reorder = 1;
+		prim = SAVAGE_PRIM_TRILIST;
+	case SAVAGE_PRIM_TRILIST:
+		if (n % 3 != 0) {
+			DRM_ERROR("wrong number of vertices %u in TRILIST\n",
+				  n);
+			return DRM_ERR(EINVAL);
+		}
+		break;
+	case SAVAGE_PRIM_TRISTRIP:
+	case SAVAGE_PRIM_TRIFAN:
+		if (n < 3) {
+			DRM_ERROR("wrong number of vertices %u in TRIFAN/STRIP\n",
+				  n);
+			return DRM_ERR(EINVAL);
+		}
+		break;
+	default:
+		DRM_ERROR("invalid primitive type %u\n", prim);
+		return DRM_ERR(EINVAL);
+	}
+
+	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+		if (skip > SAVAGE_SKIP_ALL_S3D) {
+			DRM_ERROR("invalid skip flags 0x%04x\n", skip);
+			return DRM_ERR(EINVAL);
+		}
+		vtx_size = 8; /* full vertex */
+	} else {
+		if (skip > SAVAGE_SKIP_ALL_S4) {
+			DRM_ERROR("invalid skip flags 0x%04x\n", skip);
+			return DRM_ERR(EINVAL);
+		}
+		vtx_size = 10; /* full vertex */
+	}
+
+	vtx_size -= (skip & 1) + (skip >> 1 & 1) +
+		(skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) +
+		(skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1);
+
+	if (vtx_size > vb_stride) {
+		DRM_ERROR("vertex size greater than vb stride (%u > %u)\n",
+			  vtx_size, vb_stride);
+		return DRM_ERR(EINVAL);
+	}
+
+	if (start + n > vb_size / (vb_stride*4)) {
+		DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n",
+			  start, start + n - 1, vb_size / (vb_stride*4));
+		return DRM_ERR(EINVAL);
+	}
+
+	prim <<= 25;
+	while (n != 0) {
+		/* Can emit up to 255 vertices (85 triangles) at once. */
+		unsigned int count = n > 255 ? 255 : n;
+		if (reorder) {
+			/* Need to reorder vertices for correct flat
+			 * shading while preserving the clock sense
+			 * for correct culling. Only on Savage3D. */
+			int reorder[3] = {-1, -1, -1};
+			reorder[start%3] = 2;
+
+			BEGIN_DMA(count*vtx_size+1);
+			DMA_DRAW_PRIMITIVE(count, prim, skip);
+
+			for (i = start; i < start+count; ++i) {
+				unsigned int j = i + reorder[i % 3];
+				DMA_COPY_FROM_USER(&vtxbuf[vb_stride*j],
+						   vtx_size);
+			}
+
+			DMA_COMMIT();
+		} else {
+			BEGIN_DMA(count*vtx_size+1);
+			DMA_DRAW_PRIMITIVE(count, prim, skip);
+
+			if (vb_stride == vtx_size) {
+				DMA_COPY_FROM_USER(&vtxbuf[vb_stride*start],
+						   vtx_size*count);
+			} else {
+				for (i = start; i < start+count; ++i) {
+					DMA_COPY_FROM_USER(
+						&vtxbuf[vb_stride*i],
+						vtx_size);
+				}
+			}
+
+			DMA_COMMIT();
+		}
+
+		start += count;
+		n -= count;
+
+		prim |= BCI_CMD_DRAW_CONT;
+	}
+
+	return 0;
+}
+
+static int savage_dispatch_dma_idx(drm_savage_private_t *dev_priv,
+				   const drm_savage_cmd_header_t *cmd_header,
+				   const uint16_t __user *usr_idx,
+				   const drm_buf_t *dmabuf)
+{
+	unsigned char reorder = 0;
+	unsigned int prim = cmd_header->idx.prim;
+	unsigned int skip = cmd_header->idx.skip;
+	unsigned int n = cmd_header->idx.count;
+	unsigned int i;
+	BCI_LOCALS;
+
+	if (!dmabuf) {
+	    DRM_ERROR("called without dma buffers!\n");
+	    return DRM_ERR(EINVAL);
+	}
+
+	if (!n)
+		return 0;
+
+	switch (prim) {
+	case SAVAGE_PRIM_TRILIST_201:
+		reorder = 1;
+		prim = SAVAGE_PRIM_TRILIST;
+	case SAVAGE_PRIM_TRILIST:
+		if (n % 3 != 0) {
+			DRM_ERROR("wrong number of indices %u in TRILIST\n",
+				  n);
+			return DRM_ERR(EINVAL);
+		}
+		break;
+	case SAVAGE_PRIM_TRISTRIP:
+	case SAVAGE_PRIM_TRIFAN:
+		if (n < 3) {
+			DRM_ERROR("wrong number of indices %u in TRIFAN/STRIP\n",
+				  n);
+			return DRM_ERR(EINVAL);
+		}
+		break;
+	default:
+		DRM_ERROR("invalid primitive type %u\n", prim);
+		return DRM_ERR(EINVAL);
+	}
+
+	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+		if (skip != 0) {
+			DRM_ERROR("invalid skip flags 0x%04x for DMA\n",
+				  skip);
+			return DRM_ERR(EINVAL);
+		}
+	} else {
+		unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) -
+			(skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) -
+			(skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1);
+		if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) {
+			DRM_ERROR("invalid skip flags 0x%04x for DMA\n",
+				  skip);
+			return DRM_ERR(EINVAL);
+		}
+		if (reorder) {
+			DRM_ERROR("TRILIST_201 used on Savage4 hardware\n");
+			return DRM_ERR(EINVAL);
+		}
+	}
+
+	/* Vertex DMA doesn't work with command DMA at the same time,
+	 * so we use BCI_... to submit commands here. Flush buffered
+	 * faked DMA first. */
+	DMA_FLUSH();
+
+	if (dmabuf->bus_address != dev_priv->state.common.vbaddr) {
+		BEGIN_BCI(2);
+		BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1);
+		BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type);
+		dev_priv->state.common.vbaddr = dmabuf->bus_address;
+	}
+	if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) {
+		/* Workaround for what looks like a hardware bug. If a
+		 * WAIT_3D_IDLE was emitted some time before the
+		 * indexed drawing command then the engine will lock
+		 * up. There are two known workarounds:
+		 * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */
+		BEGIN_BCI(63);
+		for (i = 0; i < 63; ++i)
+			BCI_WRITE(BCI_CMD_WAIT);
+		dev_priv->waiting = 0;
+	}
+
+	prim <<= 25;
+	while (n != 0) {
+		/* Can emit up to 255 indices (85 triangles) at once. */
+		unsigned int count = n > 255 ? 255 : n;
+		/* Is it ok to allocate 510 bytes on the stack in an ioctl? */
+		uint16_t idx[255];
+
+		/* Copy and check indices */
+		DRM_COPY_FROM_USER_UNCHECKED(idx, usr_idx, count*2);
+		for (i = 0; i < count; ++i) {
+			if (idx[i] > dmabuf->total/32) {
+				DRM_ERROR("idx[%u]=%u out of range (0-%u)\n",
+					  i, idx[i], dmabuf->total/32);
+				return DRM_ERR(EINVAL);
+			}
+		}
+
+		if (reorder) {
+			/* Need to reorder indices for correct flat
+			 * shading while preserving the clock sense
+			 * for correct culling. Only on Savage3D. */
+			int reorder[3] = {2, -1, -1};
+
+			BEGIN_BCI((count+1+1)/2);
+			BCI_DRAW_INDICES_S3D(count, prim, idx[2]);
+
+			for (i = 1; i+1 < count; i += 2)
+				BCI_WRITE(idx[i + reorder[i % 3]] |
+					  (idx[i+1 + reorder[(i+1) % 3]] << 16));
+			if (i < count)
+				BCI_WRITE(idx[i + reorder[i%3]]);
+		} else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+			BEGIN_BCI((count+1+1)/2);
+			BCI_DRAW_INDICES_S3D(count, prim, idx[0]);
+
+			for (i = 1; i+1 < count; i += 2)
+				BCI_WRITE(idx[i] | (idx[i+1] << 16));
+			if (i < count)
+				BCI_WRITE(idx[i]);
+		} else {
+			BEGIN_BCI((count+2+1)/2);
+			BCI_DRAW_INDICES_S4(count, prim, skip);
+
+			for (i = 0; i+1 < count; i += 2)
+				BCI_WRITE(idx[i] | (idx[i+1] << 16));
+			if (i < count)
+				BCI_WRITE(idx[i]);
+		}
+
+		usr_idx += count;
+		n -= count;
+
+		prim |= BCI_CMD_DRAW_CONT;
+	}
+
+	return 0;
+}
+
+static int savage_dispatch_vb_idx(drm_savage_private_t *dev_priv,
+				  const drm_savage_cmd_header_t *cmd_header,
+				  const uint16_t __user *usr_idx,
+				  const uint32_t __user *vtxbuf,
+				  unsigned int vb_size,
+				  unsigned int vb_stride)
+{
+	unsigned char reorder = 0;
+	unsigned int prim = cmd_header->idx.prim;
+	unsigned int skip = cmd_header->idx.skip;
+	unsigned int n = cmd_header->idx.count;
+	unsigned int vtx_size;
+	unsigned int i;
+	DMA_LOCALS;
+
+	if (!n)
+		return 0;
+
+	switch (prim) {
+	case SAVAGE_PRIM_TRILIST_201:
+		reorder = 1;
+		prim = SAVAGE_PRIM_TRILIST;
+	case SAVAGE_PRIM_TRILIST:
+		if (n % 3 != 0) {
+			DRM_ERROR("wrong number of indices %u in TRILIST\n",
+				  n);
+			return DRM_ERR(EINVAL);
+		}
+		break;
+	case SAVAGE_PRIM_TRISTRIP:
+	case SAVAGE_PRIM_TRIFAN:
+		if (n < 3) {
+			DRM_ERROR("wrong number of indices %u in TRIFAN/STRIP\n",
+				  n);
+			return DRM_ERR(EINVAL);
+		}
+		break;
+	default:
+		DRM_ERROR("invalid primitive type %u\n", prim);
+		return DRM_ERR(EINVAL);
+	}
+
+	if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
+		if (skip > SAVAGE_SKIP_ALL_S3D) {
+			DRM_ERROR("invalid skip flags 0x%04x\n", skip);
+			return DRM_ERR(EINVAL);
+		}
+		vtx_size = 8; /* full vertex */
+	} else {
+		if (skip > SAVAGE_SKIP_ALL_S4) {
+			DRM_ERROR("invalid skip flags 0x%04x\n", skip);
+			return DRM_ERR(EINVAL);
+		}
+		vtx_size = 10; /* full vertex */
+	}
+
+	vtx_size -= (skip & 1) + (skip >> 1 & 1) +
+		(skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) +
+		(skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1);
+
+	if (vtx_size > vb_stride) {
+		DRM_ERROR("vertex size greater than vb stride (%u > %u)\n",
+			  vtx_size, vb_stride);
+		return DRM_ERR(EINVAL);
+	}
+
+	prim <<= 25;
+	while (n != 0) {
+		/* Can emit up to 255 vertices (85 triangles) at once. */
+		unsigned int count = n > 255 ? 255 : n;
+		/* Is it ok to allocate 510 bytes on the stack in an ioctl? */
+		uint16_t idx[255];
+
+		/* Copy and check indices */
+		DRM_COPY_FROM_USER_UNCHECKED(idx, usr_idx, count*2);
+		for (i = 0; i < count; ++i) {
+			if (idx[i] > vb_size / (vb_stride*4)) {
+				DRM_ERROR("idx[%u]=%u out of range (0-%u)\n",
+					  i, idx[i],  vb_size / (vb_stride*4));
+				return DRM_ERR(EINVAL);
+			}
+		}
+
+		if (reorder) {
+			/* Need to reorder vertices for correct flat
+			 * shading while preserving the clock sense
+			 * for correct culling. Only on Savage3D. */
+			int reorder[3] = {2, -1, -1};
+
+			BEGIN_DMA(count*vtx_size+1);
+			DMA_DRAW_PRIMITIVE(count, prim, skip);
+
+			for (i = 0; i < count; ++i) {
+				unsigned int j = idx[i + reorder[i % 3]];
+				DMA_COPY_FROM_USER(&vtxbuf[vb_stride*j],
+						   vtx_size);
+			}
+
+			DMA_COMMIT();
+		} else {
+			BEGIN_DMA(count*vtx_size+1);
+			DMA_DRAW_PRIMITIVE(count, prim, skip);
+
+			for (i = 0; i < count; ++i) {
+				unsigned int j = idx[i];
+				DMA_COPY_FROM_USER(&vtxbuf[vb_stride*j],
+						   vtx_size);
+			}
+
+			DMA_COMMIT();
+		}
+
+		usr_idx += count;
+		n -= count;
+
+		prim |= BCI_CMD_DRAW_CONT;
+	}
+
+	return 0;
+}
+
+static int savage_dispatch_clear(drm_savage_private_t *dev_priv,
+				 const drm_savage_cmd_header_t *cmd_header,
+				 const drm_savage_cmd_header_t __user *data,
+				 unsigned int nbox,
+				 const drm_clip_rect_t __user *usr_boxes)
+{
+	unsigned int flags = cmd_header->clear0.flags, mask, value;
+	unsigned int clear_cmd;
+	unsigned int i, nbufs;
+	DMA_LOCALS;
+
+	if (nbox == 0)
+		return 0;
+
+	DRM_GET_USER_UNCHECKED(mask, &((const drm_savage_cmd_header_t*)data)
+			       ->clear1.mask);
+	DRM_GET_USER_UNCHECKED(value, &((const drm_savage_cmd_header_t*)data)
+			       ->clear1.value);
+
+	clear_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP |
+		BCI_CMD_SEND_COLOR | BCI_CMD_DEST_PBD_NEW;
+	BCI_CMD_SET_ROP(clear_cmd,0xCC);
+
+	nbufs = ((flags & SAVAGE_FRONT) ? 1 : 0) +
+		((flags & SAVAGE_BACK) ? 1 : 0) +
+		((flags & SAVAGE_DEPTH) ? 1 : 0);
+	if (nbufs == 0)
+		return 0;
+
+	if (mask != 0xffffffff) {
+		/* set mask */
+		BEGIN_DMA(2);
+		DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1);
+		DMA_WRITE(mask);
+		DMA_COMMIT();
+	}
+	for (i = 0; i < nbox; ++i) {
+		drm_clip_rect_t box;
+		unsigned int x, y, w, h;
+		unsigned int buf;
+		DRM_COPY_FROM_USER_UNCHECKED(&box, &usr_boxes[i], sizeof(box));
+		x = box.x1, y = box.y1;
+		w = box.x2 - box.x1;
+		h = box.y2 - box.y1;
+		BEGIN_DMA(nbufs*6);
+		for (buf = SAVAGE_FRONT; buf <= SAVAGE_DEPTH; buf <<= 1) {
+			if (!(flags & buf))
+				continue;
+			DMA_WRITE(clear_cmd);
+			switch(buf) {
+			case SAVAGE_FRONT:
+				DMA_WRITE(dev_priv->front_offset);
+				DMA_WRITE(dev_priv->front_bd);
+				break;
+			case SAVAGE_BACK:
+				DMA_WRITE(dev_priv->back_offset);
+				DMA_WRITE(dev_priv->back_bd);
+				break;
+			case SAVAGE_DEPTH:
+				DMA_WRITE(dev_priv->depth_offset);
+				DMA_WRITE(dev_priv->depth_bd);
+				break;
+			}
+			DMA_WRITE(value);
+			DMA_WRITE(BCI_X_Y(x, y));
+			DMA_WRITE(BCI_W_H(w, h));
+		}
+		DMA_COMMIT();
+	}
+	if (mask != 0xffffffff) {
+		/* reset mask */
+		BEGIN_DMA(2);
+		DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1);
+		DMA_WRITE(0xffffffff);
+		DMA_COMMIT();
+	}
+
+	return 0;
+}
+
+static int savage_dispatch_swap(drm_savage_private_t *dev_priv,
+				unsigned int nbox,
+				const drm_clip_rect_t __user *usr_boxes)
+{
+	unsigned int swap_cmd;
+	unsigned int i;
+	DMA_LOCALS;
+
+	if (nbox == 0)
+		return 0;
+
+	swap_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP |
+		BCI_CMD_SRC_PBD_COLOR_NEW | BCI_CMD_DEST_GBD;
+	BCI_CMD_SET_ROP(swap_cmd,0xCC);
+
+	for (i = 0; i < nbox; ++i) {
+		drm_clip_rect_t box;
+		DRM_COPY_FROM_USER_UNCHECKED(&box, &usr_boxes[i], sizeof(box));
+
+		BEGIN_DMA(6);
+		DMA_WRITE(swap_cmd);
+		DMA_WRITE(dev_priv->back_offset);
+		DMA_WRITE(dev_priv->back_bd);
+		DMA_WRITE(BCI_X_Y(box.x1, box.y1));
+		DMA_WRITE(BCI_X_Y(box.x1, box.y1));
+		DMA_WRITE(BCI_W_H(box.x2-box.x1, box.y2-box.y1));
+		DMA_COMMIT();
+	}
+
+	return 0;
+}
+
+static int savage_dispatch_draw(drm_savage_private_t *dev_priv,
+				const drm_savage_cmd_header_t __user *start,
+				const drm_savage_cmd_header_t __user *end,
+				const drm_buf_t *dmabuf,
+				const unsigned int __user *usr_vtxbuf,
+				unsigned int vb_size, unsigned int vb_stride,
+				unsigned int nbox,
+				const drm_clip_rect_t __user *usr_boxes)
+{
+	unsigned int i, j;
+	int ret;
+
+	for (i = 0; i < nbox; ++i) {
+		drm_clip_rect_t box;
+		const drm_savage_cmd_header_t __user *usr_cmdbuf;
+		DRM_COPY_FROM_USER_UNCHECKED(&box, &usr_boxes[i], sizeof(box));
+		dev_priv->emit_clip_rect(dev_priv, &box);
+
+		usr_cmdbuf = start;
+		while (usr_cmdbuf < end) {
+			drm_savage_cmd_header_t cmd_header;
+			DRM_COPY_FROM_USER_UNCHECKED(&cmd_header, usr_cmdbuf,
+						     sizeof(cmd_header));
+			usr_cmdbuf++;
+			switch (cmd_header.cmd.cmd) {
+			case SAVAGE_CMD_DMA_PRIM:
+				ret = savage_dispatch_dma_prim(
+					dev_priv, &cmd_header, dmabuf);
+				break;
+			case SAVAGE_CMD_VB_PRIM:
+				ret = savage_dispatch_vb_prim(
+					dev_priv, &cmd_header,
+					(const uint32_t __user *)usr_vtxbuf,
+					vb_size, vb_stride);
+				break;
+			case SAVAGE_CMD_DMA_IDX:
+				j = (cmd_header.idx.count + 3) / 4;
+				/* j was check in savage_bci_cmdbuf */
+				ret = savage_dispatch_dma_idx(
+					dev_priv, &cmd_header,
+					(const uint16_t __user *)usr_cmdbuf,
+					dmabuf);
+				usr_cmdbuf += j;
+				break;
+			case SAVAGE_CMD_VB_IDX:
+				j = (cmd_header.idx.count + 3) / 4;
+				/* j was check in savage_bci_cmdbuf */
+				ret = savage_dispatch_vb_idx(
+					dev_priv, &cmd_header,
+					(const uint16_t __user *)usr_cmdbuf,
+					(const uint32_t __user *)usr_vtxbuf,
+					vb_size, vb_stride);
+				usr_cmdbuf += j;
+				break;
+			default:
+				/* What's the best return code? EFAULT? */
+				DRM_ERROR("IMPLEMENTATION ERROR: "
+					  "non-drawing-command %d\n",
+					  cmd_header.cmd.cmd);
+				return DRM_ERR(EINVAL);
+			}
+
+			if (ret != 0)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+int savage_bci_cmdbuf(DRM_IOCTL_ARGS)
+{
+	DRM_DEVICE;
+	drm_savage_private_t *dev_priv = dev->dev_private;
+	drm_device_dma_t *dma = dev->dma;
+	drm_buf_t *dmabuf;
+	drm_savage_cmdbuf_t cmdbuf;
+	drm_savage_cmd_header_t __user *usr_cmdbuf;
+	drm_savage_cmd_header_t __user *first_draw_cmd;
+	unsigned int __user *usr_vtxbuf;
+	drm_clip_rect_t __user *usr_boxes;
+	unsigned int i, j;
+	int ret = 0;
+
+	DRM_DEBUG("\n");
+	
+	LOCK_TEST_WITH_RETURN(dev, filp);
+
+	DRM_COPY_FROM_USER_IOCTL(cmdbuf, (drm_savage_cmdbuf_t __user *)data,
+				 sizeof(cmdbuf));
+
+	if (dma && dma->buflist) {
+		if (cmdbuf.dma_idx > dma->buf_count) {
+			DRM_ERROR("vertex buffer index %u out of range (0-%u)\n",
+				  cmdbuf.dma_idx, dma->buf_count-1);
+			return DRM_ERR(EINVAL);
+		}
+		dmabuf = dma->buflist[cmdbuf.dma_idx];
+	} else {
+		dmabuf = NULL;
+	}
+
+	usr_cmdbuf = (drm_savage_cmd_header_t __user *)cmdbuf.cmd_addr;
+	usr_vtxbuf = (unsigned int __user *)cmdbuf.vb_addr;
+	usr_boxes = (drm_clip_rect_t __user *)cmdbuf.box_addr;
+	if ((cmdbuf.size && DRM_VERIFYAREA_READ(usr_cmdbuf, cmdbuf.size*8)) ||
+	    (cmdbuf.vb_size && DRM_VERIFYAREA_READ(
+		    usr_vtxbuf, cmdbuf.vb_size)) ||
+	    (cmdbuf.nbox && DRM_VERIFYAREA_READ(
+		    usr_boxes, cmdbuf.nbox*sizeof(drm_clip_rect_t))))
+		return DRM_ERR(EFAULT);
+
+	/* Make sure writes to DMA buffers are finished before sending
+	 * DMA commands to the graphics hardware. */
+	DRM_MEMORYBARRIER();
+
+	/* Coming from user space. Don't know if the Xserver has
+	 * emitted wait commands. Assuming the worst. */
+	dev_priv->waiting = 1;
+
+	i = 0;
+	first_draw_cmd = NULL;
+	while (i < cmdbuf.size) {
+		drm_savage_cmd_header_t cmd_header;
+		DRM_COPY_FROM_USER_UNCHECKED(&cmd_header, usr_cmdbuf,
+					     sizeof(cmd_header));
+		usr_cmdbuf++;
+		i++;
+
+		/* Group drawing commands with same state to minimize
+		 * iterations over clip rects. */
+		j = 0;
+		switch (cmd_header.cmd.cmd) {
+		case SAVAGE_CMD_DMA_IDX:
+		case SAVAGE_CMD_VB_IDX:
+			j = (cmd_header.idx.count + 3) / 4;
+			if (i + j > cmdbuf.size) {
+				DRM_ERROR("indexed drawing command extends "
+					  "beyond end of command buffer\n");
+				DMA_FLUSH();
+				return DRM_ERR(EINVAL);
+			}
+			/* fall through */
+		case SAVAGE_CMD_DMA_PRIM:
+		case SAVAGE_CMD_VB_PRIM:
+			if (!first_draw_cmd)
+				first_draw_cmd = usr_cmdbuf-1;
+			usr_cmdbuf += j;
+			i += j;
+			break;
+		default:
+			if (first_draw_cmd) {
+				ret = savage_dispatch_draw (
+					dev_priv, first_draw_cmd, usr_cmdbuf-1,
+					dmabuf, usr_vtxbuf, cmdbuf.vb_size,
+					cmdbuf.vb_stride,
+					cmdbuf.nbox, usr_boxes);
+				if (ret != 0)
+					return ret;
+				first_draw_cmd = NULL;
+			}
+		}
+		if (first_draw_cmd)
+			continue;
+
+		switch (cmd_header.cmd.cmd) {
+		case SAVAGE_CMD_STATE:
+			j = (cmd_header.state.count + 1) / 2;
+			if (i + j > cmdbuf.size) {
+				DRM_ERROR("command SAVAGE_CMD_STATE extends "
+					  "beyond end of command buffer\n");
+				DMA_FLUSH();
+				return DRM_ERR(EINVAL);
+			}
+			ret = savage_dispatch_state(
+				dev_priv, &cmd_header,
+				(uint32_t __user *)usr_cmdbuf);
+			usr_cmdbuf += j;
+			i += j;
+			break;
+		case SAVAGE_CMD_CLEAR:
+			if (i + 1 > cmdbuf.size) {
+				DRM_ERROR("command SAVAGE_CMD_CLEAR extends "
+					  "beyond end of command buffer\n");
+				DMA_FLUSH();
+				return DRM_ERR(EINVAL);
+			}
+			ret = savage_dispatch_clear(dev_priv, &cmd_header,
+						    usr_cmdbuf,
+						    cmdbuf.nbox, usr_boxes);
+			usr_cmdbuf++;
+			i++;
+			break;
+		case SAVAGE_CMD_SWAP:
+			ret = savage_dispatch_swap(dev_priv,
+						   cmdbuf.nbox, usr_boxes);
+			break;
+		default:
+			DRM_ERROR("invalid command 0x%x\n", cmd_header.cmd.cmd);
+			DMA_FLUSH();
+			return DRM_ERR(EINVAL);
+		}
+
+		if (ret != 0) {
+			DMA_FLUSH();
+			return ret;
+		}
+	}
+
+	if (first_draw_cmd) {
+		ret = savage_dispatch_draw (
+			dev_priv, first_draw_cmd, usr_cmdbuf, dmabuf,
+			usr_vtxbuf, cmdbuf.vb_size, cmdbuf.vb_stride,
+			cmdbuf.nbox, usr_boxes);
+		if (ret != 0) {
+			DMA_FLUSH();
+			return ret;
+		}
+	}
+
+	DMA_FLUSH();
+
+	if (dmabuf && cmdbuf.discard) {
+		drm_savage_buf_priv_t *buf_priv = dmabuf->dev_private;
+		uint16_t event;
+		event = savage_bci_emit_event(dev_priv, SAVAGE_WAIT_3D);
+		SET_AGE(&buf_priv->age, event, dev_priv->event_wrap);
+		savage_freelist_put(dev, dmabuf);
+	}
+
+	return 0;
+}