Merge remote branch 'korg/drm-radeon-testing' into drm-testing
diff --git a/drivers/gpu/drm/ati_pcigart.c b/drivers/gpu/drm/ati_pcigart.c
index 628eae3..a1fce68 100644
--- a/drivers/gpu/drm/ati_pcigart.c
+++ b/drivers/gpu/drm/ati_pcigart.c
@@ -39,8 +39,7 @@
 				       struct drm_ati_pcigart_info *gart_info)
 {
 	gart_info->table_handle = drm_pci_alloc(dev, gart_info->table_size,
-						PAGE_SIZE,
-						gart_info->table_mask);
+						PAGE_SIZE);
 	if (gart_info->table_handle == NULL)
 		return -ENOMEM;
 
@@ -112,6 +111,13 @@
 	if (gart_info->gart_table_location == DRM_ATI_GART_MAIN) {
 		DRM_DEBUG("PCI: no table in VRAM: using normal RAM\n");
 
+		if (pci_set_dma_mask(dev->pdev, gart_info->table_mask)) {
+			DRM_ERROR("fail to set dma mask to 0x%Lx\n",
+				  gart_info->table_mask);
+			ret = 1;
+			goto done;
+		}
+
 		ret = drm_ati_alloc_pcigart_table(dev, gart_info);
 		if (ret) {
 			DRM_ERROR("cannot allocate PCI GART page!\n");
diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c
index 3d09e30..8417cc4 100644
--- a/drivers/gpu/drm/drm_bufs.c
+++ b/drivers/gpu/drm/drm_bufs.c
@@ -326,7 +326,7 @@
 		 * As we're limiting the address to 2^32-1 (or less),
 		 * casting it down to 32 bits is no problem, but we
 		 * need to point to a 64bit variable first. */
-		dmah = drm_pci_alloc(dev, map->size, map->size, 0xffffffffUL);
+		dmah = drm_pci_alloc(dev, map->size, map->size);
 		if (!dmah) {
 			kfree(map);
 			return -ENOMEM;
@@ -885,7 +885,7 @@
 
 	while (entry->buf_count < count) {
 
-		dmah = drm_pci_alloc(dev, PAGE_SIZE << page_order, 0x1000, 0xfffffffful);
+		dmah = drm_pci_alloc(dev, PAGE_SIZE << page_order, 0x1000);
 
 		if (!dmah) {
 			/* Set count correctly so we free the proper amount. */
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index 4231d6d..aba79c4 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -216,7 +216,7 @@
 EXPORT_SYMBOL(drm_helper_crtc_in_use);
 
 /**
- * drm_disable_unused_functions - disable unused objects
+ * drm_helper_disable_unused_functions - disable unused objects
  * @dev: DRM device
  *
  * LOCKING:
@@ -1162,6 +1162,9 @@
 int drm_helper_resume_force_mode(struct drm_device *dev)
 {
 	struct drm_crtc *crtc;
+	struct drm_encoder *encoder;
+	struct drm_encoder_helper_funcs *encoder_funcs;
+	struct drm_crtc_helper_funcs *crtc_funcs;
 	int ret;
 
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
@@ -1174,6 +1177,25 @@
 
 		if (ret == false)
 			DRM_ERROR("failed to set mode on crtc %p\n", crtc);
+
+		/* Turn off outputs that were already powered off */
+		if (drm_helper_choose_crtc_dpms(crtc)) {
+			list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+
+				if(encoder->crtc != crtc)
+					continue;
+
+				encoder_funcs = encoder->helper_private;
+				if (encoder_funcs->dpms)
+					(*encoder_funcs->dpms) (encoder,
+								drm_helper_choose_encoder_dpms(encoder));
+
+				crtc_funcs = crtc->helper_private;
+				if (crtc_funcs->dpms)
+					(*crtc_funcs->dpms) (crtc,
+							     drm_helper_choose_crtc_dpms(crtc));
+			}
+		}
 	}
 	/* disable the unused connectors while restoring the modesetting */
 	drm_helper_disable_unused_functions(dev);
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 5c9f798..defcaf1 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -911,23 +911,27 @@
 	struct drm_device *dev = connector->dev;
 	struct cvt_timing *cvt;
 	const int rates[] = { 60, 85, 75, 60, 50 };
+	const u8 empty[3] = { 0, 0, 0 };
 
 	for (i = 0; i < 4; i++) {
 		int uninitialized_var(width), height;
 		cvt = &(timing->data.other_data.data.cvt[i]);
 
-		height = (cvt->code[0] + ((cvt->code[1] & 0xf0) << 8) + 1) * 2;
-		switch (cvt->code[1] & 0xc0) {
+		if (!memcmp(cvt->code, empty, 3))
+			continue;
+
+		height = (cvt->code[0] + ((cvt->code[1] & 0xf0) << 4) + 1) * 2;
+		switch (cvt->code[1] & 0x0c) {
 		case 0x00:
 			width = height * 4 / 3;
 			break;
-		case 0x40:
+		case 0x04:
 			width = height * 16 / 9;
 			break;
-		case 0x80:
+		case 0x08:
 			width = height * 16 / 10;
 			break;
-		case 0xc0:
+		case 0x0c:
 			width = height * 15 / 9;
 			break;
 		}
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 1b49fa0..1c2b7d4 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -156,7 +156,7 @@
 			force = DRM_FORCE_ON;
 			break;
 		case 'D':
-			if ((connector->connector_type != DRM_MODE_CONNECTOR_DVII) ||
+			if ((connector->connector_type != DRM_MODE_CONNECTOR_DVII) &&
 			    (connector->connector_type != DRM_MODE_CONNECTOR_HDMIB))
 				force = DRM_FORCE_ON;
 			else
@@ -606,11 +606,10 @@
 		return -EINVAL;
 
 	/* Need to resize the fb object !!! */
-	if (var->xres > fb->width || var->yres > fb->height) {
-		DRM_ERROR("Requested width/height is greater than current fb "
-			   "object %dx%d > %dx%d\n", var->xres, var->yres,
-			   fb->width, fb->height);
-		DRM_ERROR("Need resizing code.\n");
+	if (var->bits_per_pixel > fb->bits_per_pixel || var->xres > fb->width || var->yres > fb->height) {
+		DRM_DEBUG("fb userspace requested width/height/bpp is greater than current fb "
+			  "object %dx%d-%d > %dx%d-%d\n", var->xres, var->yres, var->bits_per_pixel,
+			  fb->width, fb->height, fb->bits_per_pixel);
 		return -EINVAL;
 	}
 
diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c
index 577094f..e68ebf9 100644
--- a/drivers/gpu/drm/drm_pci.c
+++ b/drivers/gpu/drm/drm_pci.c
@@ -47,8 +47,7 @@
 /**
  * \brief Allocate a PCI consistent memory block, for DMA.
  */
-drm_dma_handle_t *drm_pci_alloc(struct drm_device * dev, size_t size, size_t align,
-				dma_addr_t maxaddr)
+drm_dma_handle_t *drm_pci_alloc(struct drm_device * dev, size_t size, size_t align)
 {
 	drm_dma_handle_t *dmah;
 #if 1
@@ -63,11 +62,6 @@
 	if (align > size)
 		return NULL;
 
-	if (pci_set_dma_mask(dev->pdev, maxaddr) != 0) {
-		DRM_ERROR("Setting pci dma mask failed\n");
-		return NULL;
-	}
-
 	dmah = kmalloc(sizeof(drm_dma_handle_t), GFP_KERNEL);
 	if (!dmah)
 		return NULL;
diff --git a/drivers/gpu/drm/i2c/ch7006_drv.c b/drivers/gpu/drm/i2c/ch7006_drv.c
index 9422a74..81681a0 100644
--- a/drivers/gpu/drm/i2c/ch7006_drv.c
+++ b/drivers/gpu/drm/i2c/ch7006_drv.c
@@ -408,6 +408,11 @@
 
 	ch7006_info(client, "Detected version ID: %x\n", val);
 
+	/* I don't know what this is for, but otherwise I get no
+	 * signal.
+	 */
+	ch7006_write(client, 0x3d, 0x0);
+
 	return 0;
 
 fail:
diff --git a/drivers/gpu/drm/i2c/ch7006_mode.c b/drivers/gpu/drm/i2c/ch7006_mode.c
index 87f5445..e447dfb 100644
--- a/drivers/gpu/drm/i2c/ch7006_mode.c
+++ b/drivers/gpu/drm/i2c/ch7006_mode.c
@@ -427,11 +427,6 @@
 	ch7006_load_reg(client, state, CH7006_SUBC_INC7);
 	ch7006_load_reg(client, state, CH7006_PLL_CONTROL);
 	ch7006_load_reg(client, state, CH7006_CALC_SUBC_INC0);
-
-	/* I don't know what this is for, but otherwise I get no
-	 * signal.
-	 */
-	ch7006_write(client, 0x3d, 0x0);
 }
 
 void ch7006_state_save(struct i2c_client *client,
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 701bfea..02607ed 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -123,7 +123,7 @@
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	/* Program Hardware Status Page */
 	dev_priv->status_page_dmah =
-		drm_pci_alloc(dev, PAGE_SIZE, PAGE_SIZE, 0xffffffff);
+		drm_pci_alloc(dev, PAGE_SIZE, PAGE_SIZE);
 
 	if (!dev_priv->status_page_dmah) {
 		DRM_ERROR("Can not allocate hardware status page\n");
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 917b837..c7f0cbe 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4708,7 +4708,7 @@
 
 	phys_obj->id = id;
 
-	phys_obj->handle = drm_pci_alloc(dev, size, 0, 0xffffffff);
+	phys_obj->handle = drm_pci_alloc(dev, size, 0);
 	if (!phys_obj->handle) {
 		ret = -ENOMEM;
 		goto kfree_obj;
diff --git a/drivers/gpu/drm/nouveau/Makefile b/drivers/gpu/drm/nouveau/Makefile
index 1d90d4d..48c290b 100644
--- a/drivers/gpu/drm/nouveau/Makefile
+++ b/drivers/gpu/drm/nouveau/Makefile
@@ -8,14 +8,15 @@
              nouveau_sgdma.o nouveau_dma.o \
              nouveau_bo.o nouveau_fence.o nouveau_gem.o nouveau_ttm.o \
              nouveau_hw.o nouveau_calc.o nouveau_bios.o nouveau_i2c.o \
-	     nouveau_display.o nouveau_connector.o nouveau_fbcon.o \
-	     nouveau_dp.o \
+             nouveau_display.o nouveau_connector.o nouveau_fbcon.o \
+             nouveau_dp.o nouveau_grctx.o \
              nv04_timer.o \
              nv04_mc.o nv40_mc.o nv50_mc.o \
              nv04_fb.o nv10_fb.o nv40_fb.o \
              nv04_fifo.o nv10_fifo.o nv40_fifo.o nv50_fifo.o \
              nv04_graph.o nv10_graph.o nv20_graph.o \
              nv40_graph.o nv50_graph.o \
+             nv40_grctx.o \
              nv04_instmem.o nv50_instmem.o \
              nv50_crtc.o nv50_dac.o nv50_sor.o \
              nv50_cursor.o nv50_display.o nv50_fbcon.o \
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c
index 5eec5ed..ba14397 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.c
@@ -181,43 +181,42 @@
 	const char desc[8];
 	void (*loadbios)(struct drm_device *, uint8_t *);
 	const bool rw;
-	int score;
 };
 
 static struct methods nv04_methods[] = {
 	{ "PROM", load_vbios_prom, false },
 	{ "PRAMIN", load_vbios_pramin, true },
 	{ "PCIROM", load_vbios_pci, true },
-	{ }
 };
 
 static struct methods nv50_methods[] = {
 	{ "PRAMIN", load_vbios_pramin, true },
 	{ "PROM", load_vbios_prom, false },
 	{ "PCIROM", load_vbios_pci, true },
-	{ }
 };
 
+#define METHODCNT 3
+
 static bool NVShadowVBIOS(struct drm_device *dev, uint8_t *data)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct methods *methods, *method;
+	struct methods *methods;
+	int i;
 	int testscore = 3;
+	int scores[METHODCNT];
 
 	if (nouveau_vbios) {
-		method = nv04_methods;
-		while (method->loadbios) {
-			if (!strcasecmp(nouveau_vbios, method->desc))
+		methods = nv04_methods;
+		for (i = 0; i < METHODCNT; i++)
+			if (!strcasecmp(nouveau_vbios, methods[i].desc))
 				break;
-			method++;
-		}
 
-		if (method->loadbios) {
+		if (i < METHODCNT) {
 			NV_INFO(dev, "Attempting to use BIOS image from %s\n",
-				method->desc);
+				methods[i].desc);
 
-			method->loadbios(dev, data);
-			if (score_vbios(dev, data, method->rw))
+			methods[i].loadbios(dev, data);
+			if (score_vbios(dev, data, methods[i].rw))
 				return true;
 		}
 
@@ -229,28 +228,24 @@
 	else
 		methods = nv50_methods;
 
-	method = methods;
-	while (method->loadbios) {
+	for (i = 0; i < METHODCNT; i++) {
 		NV_TRACE(dev, "Attempting to load BIOS image from %s\n",
-			 method->desc);
+			 methods[i].desc);
 		data[0] = data[1] = 0;	/* avoid reuse of previous image */
-		method->loadbios(dev, data);
-		method->score = score_vbios(dev, data, method->rw);
-		if (method->score == testscore)
+		methods[i].loadbios(dev, data);
+		scores[i] = score_vbios(dev, data, methods[i].rw);
+		if (scores[i] == testscore)
 			return true;
-		method++;
 	}
 
 	while (--testscore > 0) {
-		method = methods;
-		while (method->loadbios) {
-			if (method->score == testscore) {
+		for (i = 0; i < METHODCNT; i++) {
+			if (scores[i] == testscore) {
 				NV_TRACE(dev, "Using BIOS image from %s\n",
-					 method->desc);
-				method->loadbios(dev, data);
+					 methods[i].desc);
+				methods[i].loadbios(dev, data);
 				return true;
 			}
-			method++;
 		}
 	}
 
@@ -261,10 +256,7 @@
 struct init_tbl_entry {
 	char *name;
 	uint8_t id;
-	int length;
-	int length_offset;
-	int length_multiplier;
-	bool (*handler)(struct nvbios *, uint16_t, struct init_exec *);
+	int (*handler)(struct nvbios *, uint16_t, struct init_exec *);
 };
 
 struct bit_entry {
@@ -820,7 +812,7 @@
 	}
 }
 
-static bool
+static int
 init_io_restrict_prog(struct nvbios *bios, uint16_t offset,
 		      struct init_exec *iexec)
 {
@@ -852,9 +844,10 @@
 	uint32_t reg = ROM32(bios->data[offset + 7]);
 	uint8_t config;
 	uint32_t configval;
+	int len = 11 + count * 4;
 
 	if (!iexec->execute)
-		return true;
+		return len;
 
 	BIOSLOG(bios, "0x%04X: Port: 0x%04X, Index: 0x%02X, Mask: 0x%02X, "
 		      "Shift: 0x%02X, Count: 0x%02X, Reg: 0x%08X\n",
@@ -865,7 +858,7 @@
 		NV_ERROR(bios->dev,
 			 "0x%04X: Config 0x%02X exceeds maximal bound 0x%02X\n",
 			 offset, config, count);
-		return false;
+		return 0;
 	}
 
 	configval = ROM32(bios->data[offset + 11 + config * 4]);
@@ -874,10 +867,10 @@
 
 	bios_wr32(bios, reg, configval);
 
-	return true;
+	return len;
 }
 
-static bool
+static int
 init_repeat(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -912,10 +905,10 @@
 
 	iexec->repeat = false;
 
-	return true;
+	return 2;
 }
 
-static bool
+static int
 init_io_restrict_pll(struct nvbios *bios, uint16_t offset,
 		     struct init_exec *iexec)
 {
@@ -951,9 +944,10 @@
 	uint32_t reg = ROM32(bios->data[offset + 8]);
 	uint8_t config;
 	uint16_t freq;
+	int len = 12 + count * 2;
 
 	if (!iexec->execute)
-		return true;
+		return len;
 
 	BIOSLOG(bios, "0x%04X: Port: 0x%04X, Index: 0x%02X, Mask: 0x%02X, "
 		      "Shift: 0x%02X, IO Flag Condition: 0x%02X, "
@@ -966,7 +960,7 @@
 		NV_ERROR(bios->dev,
 			 "0x%04X: Config 0x%02X exceeds maximal bound 0x%02X\n",
 			 offset, config, count);
-		return false;
+		return 0;
 	}
 
 	freq = ROM16(bios->data[offset + 12 + config * 2]);
@@ -986,10 +980,10 @@
 
 	setPLL(bios, reg, freq * 10);
 
-	return true;
+	return len;
 }
 
-static bool
+static int
 init_end_repeat(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -1007,12 +1001,12 @@
 	 * we're not in repeat mode
 	 */
 	if (iexec->repeat)
-		return false;
+		return 0;
 
-	return true;
+	return 1;
 }
 
-static bool
+static int
 init_copy(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -1041,7 +1035,7 @@
 	uint8_t crtcdata;
 
 	if (!iexec->execute)
-		return true;
+		return 11;
 
 	BIOSLOG(bios, "0x%04X: Reg: 0x%08X, Shift: 0x%02X, SrcMask: 0x%02X, "
 		      "Port: 0x%04X, Index: 0x%02X, Mask: 0x%02X\n",
@@ -1060,10 +1054,10 @@
 	crtcdata |= (uint8_t)data;
 	bios_idxprt_wr(bios, crtcport, crtcindex, crtcdata);
 
-	return true;
+	return 11;
 }
 
-static bool
+static int
 init_not(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -1079,10 +1073,10 @@
 		BIOSLOG(bios, "0x%04X: ------ Executing following commands ------\n", offset);
 
 	iexec->execute = !iexec->execute;
-	return true;
+	return 1;
 }
 
-static bool
+static int
 init_io_flag_condition(struct nvbios *bios, uint16_t offset,
 		       struct init_exec *iexec)
 {
@@ -1100,7 +1094,7 @@
 	uint8_t cond = bios->data[offset + 1];
 
 	if (!iexec->execute)
-		return true;
+		return 2;
 
 	if (io_flag_condition_met(bios, offset, cond))
 		BIOSLOG(bios, "0x%04X: Condition fulfilled -- continuing to execute\n", offset);
@@ -1109,10 +1103,10 @@
 		iexec->execute = false;
 	}
 
-	return true;
+	return 2;
 }
 
-static bool
+static int
 init_idx_addr_latched(struct nvbios *bios, uint16_t offset,
 		      struct init_exec *iexec)
 {
@@ -1140,11 +1134,12 @@
 	uint32_t mask = ROM32(bios->data[offset + 9]);
 	uint32_t data = ROM32(bios->data[offset + 13]);
 	uint8_t count = bios->data[offset + 17];
+	int len = 18 + count * 2;
 	uint32_t value;
 	int i;
 
 	if (!iexec->execute)
-		return true;
+		return len;
 
 	BIOSLOG(bios, "0x%04X: ControlReg: 0x%08X, DataReg: 0x%08X, "
 		      "Mask: 0x%08X, Data: 0x%08X, Count: 0x%02X\n",
@@ -1164,10 +1159,10 @@
 		bios_wr32(bios, controlreg, value);
 	}
 
-	return true;
+	return len;
 }
 
-static bool
+static int
 init_io_restrict_pll2(struct nvbios *bios, uint16_t offset,
 		      struct init_exec *iexec)
 {
@@ -1196,25 +1191,26 @@
 	uint8_t shift = bios->data[offset + 5];
 	uint8_t count = bios->data[offset + 6];
 	uint32_t reg = ROM32(bios->data[offset + 7]);
+	int len = 11 + count * 4;
 	uint8_t config;
 	uint32_t freq;
 
 	if (!iexec->execute)
-		return true;
+		return len;
 
 	BIOSLOG(bios, "0x%04X: Port: 0x%04X, Index: 0x%02X, Mask: 0x%02X, "
 		      "Shift: 0x%02X, Count: 0x%02X, Reg: 0x%08X\n",
 		offset, crtcport, crtcindex, mask, shift, count, reg);
 
 	if (!reg)
-		return true;
+		return len;
 
 	config = (bios_idxprt_rd(bios, crtcport, crtcindex) & mask) >> shift;
 	if (config > count) {
 		NV_ERROR(bios->dev,
 			 "0x%04X: Config 0x%02X exceeds maximal bound 0x%02X\n",
 			 offset, config, count);
-		return false;
+		return 0;
 	}
 
 	freq = ROM32(bios->data[offset + 11 + config * 4]);
@@ -1224,10 +1220,10 @@
 
 	setPLL(bios, reg, freq);
 
-	return true;
+	return len;
 }
 
-static bool
+static int
 init_pll2(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -1244,16 +1240,16 @@
 	uint32_t freq = ROM32(bios->data[offset + 5]);
 
 	if (!iexec->execute)
-		return true;
+		return 9;
 
 	BIOSLOG(bios, "0x%04X: Reg: 0x%04X, Freq: %dkHz\n",
 		offset, reg, freq);
 
 	setPLL(bios, reg, freq);
-	return true;
+	return 9;
 }
 
-static bool
+static int
 init_i2c_byte(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -1277,12 +1273,13 @@
 	uint8_t i2c_index = bios->data[offset + 1];
 	uint8_t i2c_address = bios->data[offset + 2];
 	uint8_t count = bios->data[offset + 3];
+	int len = 4 + count * 3;
 	struct nouveau_i2c_chan *chan;
 	struct i2c_msg msg;
 	int i;
 
 	if (!iexec->execute)
-		return true;
+		return len;
 
 	BIOSLOG(bios, "0x%04X: DCBI2CIndex: 0x%02X, I2CAddress: 0x%02X, "
 		      "Count: 0x%02X\n",
@@ -1290,7 +1287,7 @@
 
 	chan = init_i2c_device_find(bios->dev, i2c_index);
 	if (!chan)
-		return false;
+		return 0;
 
 	for (i = 0; i < count; i++) {
 		uint8_t i2c_reg = bios->data[offset + 4 + i * 3];
@@ -1303,7 +1300,7 @@
 		msg.len = 1;
 		msg.buf = &value;
 		if (i2c_transfer(&chan->adapter, &msg, 1) != 1)
-			return false;
+			return 0;
 
 		BIOSLOG(bios, "0x%04X: I2CReg: 0x%02X, Value: 0x%02X, "
 			      "Mask: 0x%02X, Data: 0x%02X\n",
@@ -1317,14 +1314,14 @@
 			msg.len = 1;
 			msg.buf = &value;
 			if (i2c_transfer(&chan->adapter, &msg, 1) != 1)
-				return false;
+				return 0;
 		}
 	}
 
-	return true;
+	return len;
 }
 
-static bool
+static int
 init_zm_i2c_byte(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -1346,12 +1343,13 @@
 	uint8_t i2c_index = bios->data[offset + 1];
 	uint8_t i2c_address = bios->data[offset + 2];
 	uint8_t count = bios->data[offset + 3];
+	int len = 4 + count * 2;
 	struct nouveau_i2c_chan *chan;
 	struct i2c_msg msg;
 	int i;
 
 	if (!iexec->execute)
-		return true;
+		return len;
 
 	BIOSLOG(bios, "0x%04X: DCBI2CIndex: 0x%02X, I2CAddress: 0x%02X, "
 		      "Count: 0x%02X\n",
@@ -1359,7 +1357,7 @@
 
 	chan = init_i2c_device_find(bios->dev, i2c_index);
 	if (!chan)
-		return false;
+		return 0;
 
 	for (i = 0; i < count; i++) {
 		uint8_t i2c_reg = bios->data[offset + 4 + i * 2];
@@ -1374,14 +1372,14 @@
 			msg.len = 1;
 			msg.buf = &data;
 			if (i2c_transfer(&chan->adapter, &msg, 1) != 1)
-				return false;
+				return 0;
 		}
 	}
 
-	return true;
+	return len;
 }
 
-static bool
+static int
 init_zm_i2c(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -1401,13 +1399,14 @@
 	uint8_t i2c_index = bios->data[offset + 1];
 	uint8_t i2c_address = bios->data[offset + 2];
 	uint8_t count = bios->data[offset + 3];
+	int len = 4 + count;
 	struct nouveau_i2c_chan *chan;
 	struct i2c_msg msg;
 	uint8_t data[256];
 	int i;
 
 	if (!iexec->execute)
-		return true;
+		return len;
 
 	BIOSLOG(bios, "0x%04X: DCBI2CIndex: 0x%02X, I2CAddress: 0x%02X, "
 		      "Count: 0x%02X\n",
@@ -1415,7 +1414,7 @@
 
 	chan = init_i2c_device_find(bios->dev, i2c_index);
 	if (!chan)
-		return false;
+		return 0;
 
 	for (i = 0; i < count; i++) {
 		data[i] = bios->data[offset + 4 + i];
@@ -1429,13 +1428,13 @@
 		msg.len = count;
 		msg.buf = data;
 		if (i2c_transfer(&chan->adapter, &msg, 1) != 1)
-			return false;
+			return 0;
 	}
 
-	return true;
+	return len;
 }
 
-static bool
+static int
 init_tmds(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -1460,7 +1459,7 @@
 	uint32_t reg, value;
 
 	if (!iexec->execute)
-		return true;
+		return 5;
 
 	BIOSLOG(bios, "0x%04X: MagicLookupValue: 0x%02X, TMDSAddr: 0x%02X, "
 		      "Mask: 0x%02X, Data: 0x%02X\n",
@@ -1468,7 +1467,7 @@
 
 	reg = get_tmds_index_reg(bios->dev, mlv);
 	if (!reg)
-		return false;
+		return 0;
 
 	bios_wr32(bios, reg,
 		  tmdsaddr | NV_PRAMDAC_FP_TMDS_CONTROL_WRITE_DISABLE);
@@ -1476,10 +1475,10 @@
 	bios_wr32(bios, reg + 4, value);
 	bios_wr32(bios, reg, tmdsaddr);
 
-	return true;
+	return 5;
 }
 
-static bool
+static int
 init_zm_tmds_group(struct nvbios *bios, uint16_t offset,
 		   struct init_exec *iexec)
 {
@@ -1500,18 +1499,19 @@
 
 	uint8_t mlv = bios->data[offset + 1];
 	uint8_t count = bios->data[offset + 2];
+	int len = 3 + count * 2;
 	uint32_t reg;
 	int i;
 
 	if (!iexec->execute)
-		return true;
+		return len;
 
 	BIOSLOG(bios, "0x%04X: MagicLookupValue: 0x%02X, Count: 0x%02X\n",
 		offset, mlv, count);
 
 	reg = get_tmds_index_reg(bios->dev, mlv);
 	if (!reg)
-		return false;
+		return 0;
 
 	for (i = 0; i < count; i++) {
 		uint8_t tmdsaddr = bios->data[offset + 3 + i * 2];
@@ -1521,10 +1521,10 @@
 		bios_wr32(bios, reg, tmdsaddr);
 	}
 
-	return true;
+	return len;
 }
 
-static bool
+static int
 init_cr_idx_adr_latch(struct nvbios *bios, uint16_t offset,
 		      struct init_exec *iexec)
 {
@@ -1547,11 +1547,12 @@
 	uint8_t crtcindex2 = bios->data[offset + 2];
 	uint8_t baseaddr = bios->data[offset + 3];
 	uint8_t count = bios->data[offset + 4];
+	int len = 5 + count;
 	uint8_t oldaddr, data;
 	int i;
 
 	if (!iexec->execute)
-		return true;
+		return len;
 
 	BIOSLOG(bios, "0x%04X: Index1: 0x%02X, Index2: 0x%02X, "
 		      "BaseAddr: 0x%02X, Count: 0x%02X\n",
@@ -1568,10 +1569,10 @@
 
 	bios_idxprt_wr(bios, NV_CIO_CRX__COLOR, crtcindex1, oldaddr);
 
-	return true;
+	return len;
 }
 
-static bool
+static int
 init_cr(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -1592,7 +1593,7 @@
 	uint8_t value;
 
 	if (!iexec->execute)
-		return true;
+		return 4;
 
 	BIOSLOG(bios, "0x%04X: Index: 0x%02X, Mask: 0x%02X, Data: 0x%02X\n",
 		offset, crtcindex, mask, data);
@@ -1601,10 +1602,10 @@
 	value |= data;
 	bios_idxprt_wr(bios, NV_CIO_CRX__COLOR, crtcindex, value);
 
-	return true;
+	return 4;
 }
 
-static bool
+static int
 init_zm_cr(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -1621,14 +1622,14 @@
 	uint8_t data = bios->data[offset + 2];
 
 	if (!iexec->execute)
-		return true;
+		return 3;
 
 	bios_idxprt_wr(bios, NV_CIO_CRX__COLOR, crtcindex, data);
 
-	return true;
+	return 3;
 }
 
-static bool
+static int
 init_zm_cr_group(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -1645,18 +1646,19 @@
 	 */
 
 	uint8_t count = bios->data[offset + 1];
+	int len = 2 + count * 2;
 	int i;
 
 	if (!iexec->execute)
-		return true;
+		return len;
 
 	for (i = 0; i < count; i++)
 		init_zm_cr(bios, offset + 2 + 2 * i - 1, iexec);
 
-	return true;
+	return len;
 }
 
-static bool
+static int
 init_condition_time(struct nvbios *bios, uint16_t offset,
 		    struct init_exec *iexec)
 {
@@ -1680,7 +1682,7 @@
 	unsigned cnt;
 
 	if (!iexec->execute)
-		return true;
+		return 3;
 
 	if (retries > 100)
 		retries = 100;
@@ -1711,10 +1713,10 @@
 		iexec->execute = false;
 	}
 
-	return true;
+	return 3;
 }
 
-static bool
+static int
 init_zm_reg_sequence(struct nvbios *bios, uint16_t offset,
 		     struct init_exec *iexec)
 {
@@ -1734,10 +1736,11 @@
 
 	uint32_t basereg = ROM32(bios->data[offset + 1]);
 	uint32_t count = bios->data[offset + 5];
+	int len = 6 + count * 4;
 	int i;
 
 	if (!iexec->execute)
-		return true;
+		return len;
 
 	BIOSLOG(bios, "0x%04X: BaseReg: 0x%08X, Count: 0x%02X\n",
 		offset, basereg, count);
@@ -1749,10 +1752,10 @@
 		bios_wr32(bios, reg, data);
 	}
 
-	return true;
+	return len;
 }
 
-static bool
+static int
 init_sub_direct(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -1768,7 +1771,7 @@
 	uint16_t sub_offset = ROM16(bios->data[offset + 1]);
 
 	if (!iexec->execute)
-		return true;
+		return 3;
 
 	BIOSLOG(bios, "0x%04X: Executing subroutine at 0x%04X\n",
 		offset, sub_offset);
@@ -1777,10 +1780,10 @@
 
 	BIOSLOG(bios, "0x%04X: End of 0x%04X subroutine\n", offset, sub_offset);
 
-	return true;
+	return 3;
 }
 
-static bool
+static int
 init_copy_nv_reg(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -1808,7 +1811,7 @@
 	uint32_t srcvalue, dstvalue;
 
 	if (!iexec->execute)
-		return true;
+		return 22;
 
 	BIOSLOG(bios, "0x%04X: SrcReg: 0x%08X, Shift: 0x%02X, SrcMask: 0x%08X, "
 		      "Xor: 0x%08X, DstReg: 0x%08X, DstMask: 0x%08X\n",
@@ -1827,10 +1830,10 @@
 
 	bios_wr32(bios, dstreg, dstvalue | srcvalue);
 
-	return true;
+	return 22;
 }
 
-static bool
+static int
 init_zm_index_io(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -1848,14 +1851,14 @@
 	uint8_t data = bios->data[offset + 4];
 
 	if (!iexec->execute)
-		return true;
+		return 5;
 
 	bios_idxprt_wr(bios, crtcport, crtcindex, data);
 
-	return true;
+	return 5;
 }
 
-static bool
+static int
 init_compute_mem(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -1904,7 +1907,7 @@
 	struct drm_nouveau_private *dev_priv = bios->dev->dev_private;
 
 	if (dev_priv->card_type >= NV_50)
-		return true;
+		return 1;
 
 	/*
 	 * On every card I've seen, this step gets done for us earlier in
@@ -1922,10 +1925,10 @@
 	/* write back the saved configuration value */
 	bios_wr32(bios, NV_PFB_CFG0, bios->state.saved_nv_pfb_cfg0);
 
-	return true;
+	return 1;
 }
 
-static bool
+static int
 init_reset(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -1959,10 +1962,10 @@
 	pci_nv_20 &= ~NV_PBUS_PCI_NV_20_ROM_SHADOW_ENABLED;	/* 0xfffffffe */
 	bios_wr32(bios, NV_PBUS_PCI_NV_20, pci_nv_20);
 
-	return true;
+	return 13;
 }
 
-static bool
+static int
 init_configure_mem(struct nvbios *bios, uint16_t offset,
 		   struct init_exec *iexec)
 {
@@ -1983,7 +1986,7 @@
 	uint32_t reg, data;
 
 	if (bios->major_version > 2)
-		return false;
+		return 0;
 
 	bios_idxprt_wr(bios, NV_VIO_SRX, NV_VIO_SR_CLOCK_INDEX, bios_idxprt_rd(
 		       bios, NV_VIO_SRX, NV_VIO_SR_CLOCK_INDEX) | 0x20);
@@ -2015,10 +2018,10 @@
 		bios_wr32(bios, reg, data);
 	}
 
-	return true;
+	return 1;
 }
 
-static bool
+static int
 init_configure_clk(struct nvbios *bios, uint16_t offset,
 		   struct init_exec *iexec)
 {
@@ -2038,7 +2041,7 @@
 	int clock;
 
 	if (bios->major_version > 2)
-		return false;
+		return 0;
 
 	clock = ROM16(bios->data[meminitoffs + 4]) * 10;
 	setPLL(bios, NV_PRAMDAC_NVPLL_COEFF, clock);
@@ -2048,10 +2051,10 @@
 		clock *= 2;
 	setPLL(bios, NV_PRAMDAC_MPLL_COEFF, clock);
 
-	return true;
+	return 1;
 }
 
-static bool
+static int
 init_configure_preinit(struct nvbios *bios, uint16_t offset,
 		       struct init_exec *iexec)
 {
@@ -2071,15 +2074,15 @@
 	uint8_t cr3c = ((straps << 2) & 0xf0) | (straps & (1 << 6));
 
 	if (bios->major_version > 2)
-		return false;
+		return 0;
 
 	bios_idxprt_wr(bios, NV_CIO_CRX__COLOR,
 			     NV_CIO_CRE_SCRATCH4__INDEX, cr3c);
 
-	return true;
+	return 1;
 }
 
-static bool
+static int
 init_io(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -2099,7 +2102,7 @@
 	uint8_t data = bios->data[offset + 4];
 
 	if (!iexec->execute)
-		return true;
+		return 5;
 
 	BIOSLOG(bios, "0x%04X: Port: 0x%04X, Mask: 0x%02X, Data: 0x%02X\n",
 		offset, crtcport, mask, data);
@@ -2158,15 +2161,15 @@
 		for (i = 0; i < 2; i++)
 			bios_wr32(bios, 0x614108 + (i*0x800), bios_rd32(
 				  bios, 0x614108 + (i*0x800)) & 0x0fffffff);
-		return true;
+		return 5;
 	}
 
 	bios_port_wr(bios, crtcport, (bios_port_rd(bios, crtcport) & mask) |
 									data);
-	return true;
+	return 5;
 }
 
-static bool
+static int
 init_sub(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -2181,7 +2184,7 @@
 	uint8_t sub = bios->data[offset + 1];
 
 	if (!iexec->execute)
-		return true;
+		return 2;
 
 	BIOSLOG(bios, "0x%04X: Calling script %d\n", offset, sub);
 
@@ -2191,10 +2194,10 @@
 
 	BIOSLOG(bios, "0x%04X: End of script %d\n", offset, sub);
 
-	return true;
+	return 2;
 }
 
-static bool
+static int
 init_ram_condition(struct nvbios *bios, uint16_t offset,
 		   struct init_exec *iexec)
 {
@@ -2215,7 +2218,7 @@
 	uint8_t data;
 
 	if (!iexec->execute)
-		return true;
+		return 3;
 
 	data = bios_rd32(bios, NV_PFB_BOOT_0) & mask;
 
@@ -2229,10 +2232,10 @@
 		iexec->execute = false;
 	}
 
-	return true;
+	return 3;
 }
 
-static bool
+static int
 init_nv_reg(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -2251,17 +2254,17 @@
 	uint32_t data = ROM32(bios->data[offset + 9]);
 
 	if (!iexec->execute)
-		return true;
+		return 13;
 
 	BIOSLOG(bios, "0x%04X: Reg: 0x%08X, Mask: 0x%08X, Data: 0x%08X\n",
 		offset, reg, mask, data);
 
 	bios_wr32(bios, reg, (bios_rd32(bios, reg) & mask) | data);
 
-	return true;
+	return 13;
 }
 
-static bool
+static int
 init_macro(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -2285,7 +2288,7 @@
 	int i;
 
 	if (!iexec->execute)
-		return true;
+		return 2;
 
 	BIOSLOG(bios, "0x%04X: Macro: 0x%02X, MacroTableIndex: 0x%02X, "
 		      "Count: 0x%02X\n",
@@ -2300,10 +2303,10 @@
 		bios_wr32(bios, reg, data);
 	}
 
-	return true;
+	return 2;
 }
 
-static bool
+static int
 init_done(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -2315,10 +2318,10 @@
 	 */
 
 	/* mild retval abuse to stop parsing this table */
-	return false;
+	return 0;
 }
 
-static bool
+static int
 init_resume(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -2330,15 +2333,15 @@
 	 */
 
 	if (iexec->execute)
-		return true;
+		return 1;
 
 	iexec->execute = true;
 	BIOSLOG(bios, "0x%04X: ---- Executing following commands ----\n", offset);
 
-	return true;
+	return 1;
 }
 
-static bool
+static int
 init_time(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -2353,7 +2356,7 @@
 	unsigned time = ROM16(bios->data[offset + 1]);
 
 	if (!iexec->execute)
-		return true;
+		return 3;
 
 	BIOSLOG(bios, "0x%04X: Sleeping for 0x%04X microseconds\n",
 		offset, time);
@@ -2363,10 +2366,10 @@
 	else
 		msleep((time + 900) / 1000);
 
-	return true;
+	return 3;
 }
 
-static bool
+static int
 init_condition(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -2383,7 +2386,7 @@
 	uint8_t cond = bios->data[offset + 1];
 
 	if (!iexec->execute)
-		return true;
+		return 2;
 
 	BIOSLOG(bios, "0x%04X: Condition: 0x%02X\n", offset, cond);
 
@@ -2394,10 +2397,10 @@
 		iexec->execute = false;
 	}
 
-	return true;
+	return 2;
 }
 
-static bool
+static int
 init_io_condition(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -2414,7 +2417,7 @@
 	uint8_t cond = bios->data[offset + 1];
 
 	if (!iexec->execute)
-		return true;
+		return 2;
 
 	BIOSLOG(bios, "0x%04X: IO condition: 0x%02X\n", offset, cond);
 
@@ -2425,10 +2428,10 @@
 		iexec->execute = false;
 	}
 
-	return true;
+	return 2;
 }
 
-static bool
+static int
 init_index_io(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -2451,7 +2454,7 @@
 	uint8_t value;
 
 	if (!iexec->execute)
-		return true;
+		return 6;
 
 	BIOSLOG(bios, "0x%04X: Port: 0x%04X, Index: 0x%02X, Mask: 0x%02X, "
 		      "Data: 0x%02X\n",
@@ -2460,10 +2463,10 @@
 	value = (bios_idxprt_rd(bios, crtcport, crtcindex) & mask) | data;
 	bios_idxprt_wr(bios, crtcport, crtcindex, value);
 
-	return true;
+	return 6;
 }
 
-static bool
+static int
 init_pll(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -2481,16 +2484,16 @@
 	uint16_t freq = ROM16(bios->data[offset + 5]);
 
 	if (!iexec->execute)
-		return true;
+		return 7;
 
 	BIOSLOG(bios, "0x%04X: Reg: 0x%08X, Freq: %d0kHz\n", offset, reg, freq);
 
 	setPLL(bios, reg, freq * 10);
 
-	return true;
+	return 7;
 }
 
-static bool
+static int
 init_zm_reg(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -2507,17 +2510,17 @@
 	uint32_t value = ROM32(bios->data[offset + 5]);
 
 	if (!iexec->execute)
-		return true;
+		return 9;
 
 	if (reg == 0x000200)
 		value |= 1;
 
 	bios_wr32(bios, reg, value);
 
-	return true;
+	return 9;
 }
 
-static bool
+static int
 init_ram_restrict_pll(struct nvbios *bios, uint16_t offset,
 		      struct init_exec *iexec)
 {
@@ -2543,14 +2546,15 @@
 	uint8_t type = bios->data[offset + 1];
 	uint32_t freq = ROM32(bios->data[offset + 2 + (index * 4)]);
 	uint8_t *pll_limits = &bios->data[bios->pll_limit_tbl_ptr], *entry;
+	int len = 2 + bios->ram_restrict_group_count * 4;
 	int i;
 
 	if (!iexec->execute)
-		return true;
+		return len;
 
 	if (!bios->pll_limit_tbl_ptr || (pll_limits[0] & 0xf0) != 0x30) {
 		NV_ERROR(dev, "PLL limits table not version 3.x\n");
-		return true; /* deliberate, allow default clocks to remain */
+		return len; /* deliberate, allow default clocks to remain */
 	}
 
 	entry = pll_limits + pll_limits[1];
@@ -2563,15 +2567,15 @@
 				offset, type, reg, freq);
 
 			setPLL(bios, reg, freq);
-			return true;
+			return len;
 		}
 	}
 
 	NV_ERROR(dev, "PLL type 0x%02x not found in PLL limits table", type);
-	return true;
+	return len;
 }
 
-static bool
+static int
 init_8c(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -2581,10 +2585,10 @@
 	 *
 	 */
 
-	return true;
+	return 1;
 }
 
-static bool
+static int
 init_8d(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -2594,10 +2598,10 @@
 	 *
 	 */
 
-	return true;
+	return 1;
 }
 
-static bool
+static int
 init_gpio(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -2615,14 +2619,17 @@
 	const uint8_t *gpio_entry;
 	int i;
 
+	if (!iexec->execute)
+		return 1;
+
 	if (bios->bdcb.version != 0x40) {
 		NV_ERROR(bios->dev, "DCB table not version 4.0\n");
-		return false;
+		return 0;
 	}
 
 	if (!bios->bdcb.gpio_table_ptr) {
 		NV_WARN(bios->dev, "Invalid pointer to INIT_8E table\n");
-		return false;
+		return 0;
 	}
 
 	gpio_entry = gpio_table + gpio_table[1];
@@ -2660,13 +2667,10 @@
 		bios_wr32(bios, r, v);
 	}
 
-	return true;
+	return 1;
 }
 
-/* hack to avoid moving the itbl_entry array before this function */
-int init_ram_restrict_zm_reg_group_blocklen;
-
-static bool
+static int
 init_ram_restrict_zm_reg_group(struct nvbios *bios, uint16_t offset,
 			       struct init_exec *iexec)
 {
@@ -2692,21 +2696,21 @@
 	uint8_t regincrement = bios->data[offset + 5];
 	uint8_t count = bios->data[offset + 6];
 	uint32_t strap_ramcfg, data;
-	uint16_t blocklen;
+	/* previously set by 'M' BIT table */
+	uint16_t blocklen = bios->ram_restrict_group_count * 4;
+	int len = 7 + count * blocklen;
 	uint8_t index;
 	int i;
 
-	/* previously set by 'M' BIT table */
-	blocklen = init_ram_restrict_zm_reg_group_blocklen;
 
 	if (!iexec->execute)
-		return true;
+		return len;
 
 	if (!blocklen) {
 		NV_ERROR(bios->dev,
 			 "0x%04X: Zero block length - has the M table "
 			 "been parsed?\n", offset);
-		return false;
+		return 0;
 	}
 
 	strap_ramcfg = (bios_rd32(bios, NV_PEXTDEV_BOOT_0) >> 2) & 0xf;
@@ -2724,10 +2728,10 @@
 		reg += regincrement;
 	}
 
-	return true;
+	return len;
 }
 
-static bool
+static int
 init_copy_zm_reg(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -2744,14 +2748,14 @@
 	uint32_t dstreg = ROM32(bios->data[offset + 5]);
 
 	if (!iexec->execute)
-		return true;
+		return 9;
 
 	bios_wr32(bios, dstreg, bios_rd32(bios, srcreg));
 
-	return true;
+	return 9;
 }
 
-static bool
+static int
 init_zm_reg_group_addr_latched(struct nvbios *bios, uint16_t offset,
 			       struct init_exec *iexec)
 {
@@ -2769,20 +2773,21 @@
 
 	uint32_t reg = ROM32(bios->data[offset + 1]);
 	uint8_t count = bios->data[offset + 5];
+	int len = 6 + count * 4;
 	int i;
 
 	if (!iexec->execute)
-		return true;
+		return len;
 
 	for (i = 0; i < count; i++) {
 		uint32_t data = ROM32(bios->data[offset + 6 + 4 * i]);
 		bios_wr32(bios, reg, data);
 	}
 
-	return true;
+	return len;
 }
 
-static bool
+static int
 init_reserved(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -2793,10 +2798,10 @@
 	 * Seemingly does nothing
 	 */
 
-	return true;
+	return 1;
 }
 
-static bool
+static int
 init_96(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -2829,13 +2834,13 @@
 	val <<= bios->data[offset + 16];
 
 	if (!iexec->execute)
-		return true;
+		return 17;
 
 	bios_wr32(bios, reg, (bios_rd32(bios, reg) & mask) | val);
-	return true;
+	return 17;
 }
 
-static bool
+static int
 init_97(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -2859,13 +2864,13 @@
 	val = (val & mask) | ((val + add) & ~mask);
 
 	if (!iexec->execute)
-		return true;
+		return 13;
 
 	bios_wr32(bios, reg, val);
-	return true;
+	return 13;
 }
 
-static bool
+static int
 init_auxch(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -2883,32 +2888,33 @@
 	struct drm_device *dev = bios->dev;
 	struct nouveau_i2c_chan *auxch;
 	uint32_t addr = ROM32(bios->data[offset + 1]);
-	uint8_t len = bios->data[offset + 5];
+	uint8_t count = bios->data[offset + 5];
+	int len = 6 + count * 2;
 	int ret, i;
 
 	if (!bios->display.output) {
 		NV_ERROR(dev, "INIT_AUXCH: no active output\n");
-		return false;
+		return 0;
 	}
 
 	auxch = init_i2c_device_find(dev, bios->display.output->i2c_index);
 	if (!auxch) {
 		NV_ERROR(dev, "INIT_AUXCH: couldn't get auxch %d\n",
 			 bios->display.output->i2c_index);
-		return false;
+		return 0;
 	}
 
 	if (!iexec->execute)
-		return true;
+		return len;
 
 	offset += 6;
-	for (i = 0; i < len; i++, offset += 2) {
+	for (i = 0; i < count; i++, offset += 2) {
 		uint8_t data;
 
 		ret = nouveau_dp_auxch(auxch, 9, addr, &data, 1);
 		if (ret) {
 			NV_ERROR(dev, "INIT_AUXCH: rd auxch fail %d\n", ret);
-			return false;
+			return 0;
 		}
 
 		data &= bios->data[offset + 0];
@@ -2917,14 +2923,14 @@
 		ret = nouveau_dp_auxch(auxch, 8, addr, &data, 1);
 		if (ret) {
 			NV_ERROR(dev, "INIT_AUXCH: wr auxch fail %d\n", ret);
-			return false;
+			return 0;
 		}
 	}
 
-	return true;
+	return len;
 }
 
-static bool
+static int
 init_zm_auxch(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
 	/*
@@ -2941,106 +2947,99 @@
 	struct drm_device *dev = bios->dev;
 	struct nouveau_i2c_chan *auxch;
 	uint32_t addr = ROM32(bios->data[offset + 1]);
-	uint8_t len = bios->data[offset + 5];
+	uint8_t count = bios->data[offset + 5];
+	int len = 6 + count;
 	int ret, i;
 
 	if (!bios->display.output) {
 		NV_ERROR(dev, "INIT_ZM_AUXCH: no active output\n");
-		return false;
+		return 0;
 	}
 
 	auxch = init_i2c_device_find(dev, bios->display.output->i2c_index);
 	if (!auxch) {
 		NV_ERROR(dev, "INIT_ZM_AUXCH: couldn't get auxch %d\n",
 			 bios->display.output->i2c_index);
-		return false;
+		return 0;
 	}
 
 	if (!iexec->execute)
-		return true;
+		return len;
 
 	offset += 6;
-	for (i = 0; i < len; i++, offset++) {
+	for (i = 0; i < count; i++, offset++) {
 		ret = nouveau_dp_auxch(auxch, 8, addr, &bios->data[offset], 1);
 		if (ret) {
 			NV_ERROR(dev, "INIT_ZM_AUXCH: wr auxch fail %d\n", ret);
-			return false;
+			return 0;
 		}
 	}
 
-	return true;
+	return len;
 }
 
 static struct init_tbl_entry itbl_entry[] = {
 	/* command name                       , id  , length  , offset  , mult    , command handler                 */
 	/* INIT_PROG (0x31, 15, 10, 4) removed due to no example of use */
-	{ "INIT_IO_RESTRICT_PROG"             , 0x32, 11      , 6       , 4       , init_io_restrict_prog           },
-	{ "INIT_REPEAT"                       , 0x33, 2       , 0       , 0       , init_repeat                     },
-	{ "INIT_IO_RESTRICT_PLL"              , 0x34, 12      , 7       , 2       , init_io_restrict_pll            },
-	{ "INIT_END_REPEAT"                   , 0x36, 1       , 0       , 0       , init_end_repeat                 },
-	{ "INIT_COPY"                         , 0x37, 11      , 0       , 0       , init_copy                       },
-	{ "INIT_NOT"                          , 0x38, 1       , 0       , 0       , init_not                        },
-	{ "INIT_IO_FLAG_CONDITION"            , 0x39, 2       , 0       , 0       , init_io_flag_condition          },
-	{ "INIT_INDEX_ADDRESS_LATCHED"        , 0x49, 18      , 17      , 2       , init_idx_addr_latched           },
-	{ "INIT_IO_RESTRICT_PLL2"             , 0x4A, 11      , 6       , 4       , init_io_restrict_pll2           },
-	{ "INIT_PLL2"                         , 0x4B, 9       , 0       , 0       , init_pll2                       },
-	{ "INIT_I2C_BYTE"                     , 0x4C, 4       , 3       , 3       , init_i2c_byte                   },
-	{ "INIT_ZM_I2C_BYTE"                  , 0x4D, 4       , 3       , 2       , init_zm_i2c_byte                },
-	{ "INIT_ZM_I2C"                       , 0x4E, 4       , 3       , 1       , init_zm_i2c                     },
-	{ "INIT_TMDS"                         , 0x4F, 5       , 0       , 0       , init_tmds                       },
-	{ "INIT_ZM_TMDS_GROUP"                , 0x50, 3       , 2       , 2       , init_zm_tmds_group              },
-	{ "INIT_CR_INDEX_ADDRESS_LATCHED"     , 0x51, 5       , 4       , 1       , init_cr_idx_adr_latch           },
-	{ "INIT_CR"                           , 0x52, 4       , 0       , 0       , init_cr                         },
-	{ "INIT_ZM_CR"                        , 0x53, 3       , 0       , 0       , init_zm_cr                      },
-	{ "INIT_ZM_CR_GROUP"                  , 0x54, 2       , 1       , 2       , init_zm_cr_group                },
-	{ "INIT_CONDITION_TIME"               , 0x56, 3       , 0       , 0       , init_condition_time             },
-	{ "INIT_ZM_REG_SEQUENCE"              , 0x58, 6       , 5       , 4       , init_zm_reg_sequence            },
+	{ "INIT_IO_RESTRICT_PROG"             , 0x32, init_io_restrict_prog           },
+	{ "INIT_REPEAT"                       , 0x33, init_repeat                     },
+	{ "INIT_IO_RESTRICT_PLL"              , 0x34, init_io_restrict_pll            },
+	{ "INIT_END_REPEAT"                   , 0x36, init_end_repeat                 },
+	{ "INIT_COPY"                         , 0x37, init_copy                       },
+	{ "INIT_NOT"                          , 0x38, init_not                        },
+	{ "INIT_IO_FLAG_CONDITION"            , 0x39, init_io_flag_condition          },
+	{ "INIT_INDEX_ADDRESS_LATCHED"        , 0x49, init_idx_addr_latched           },
+	{ "INIT_IO_RESTRICT_PLL2"             , 0x4A, init_io_restrict_pll2           },
+	{ "INIT_PLL2"                         , 0x4B, init_pll2                       },
+	{ "INIT_I2C_BYTE"                     , 0x4C, init_i2c_byte                   },
+	{ "INIT_ZM_I2C_BYTE"                  , 0x4D, init_zm_i2c_byte                },
+	{ "INIT_ZM_I2C"                       , 0x4E, init_zm_i2c                     },
+	{ "INIT_TMDS"                         , 0x4F, init_tmds                       },
+	{ "INIT_ZM_TMDS_GROUP"                , 0x50, init_zm_tmds_group              },
+	{ "INIT_CR_INDEX_ADDRESS_LATCHED"     , 0x51, init_cr_idx_adr_latch           },
+	{ "INIT_CR"                           , 0x52, init_cr                         },
+	{ "INIT_ZM_CR"                        , 0x53, init_zm_cr                      },
+	{ "INIT_ZM_CR_GROUP"                  , 0x54, init_zm_cr_group                },
+	{ "INIT_CONDITION_TIME"               , 0x56, init_condition_time             },
+	{ "INIT_ZM_REG_SEQUENCE"              , 0x58, init_zm_reg_sequence            },
 	/* INIT_INDIRECT_REG (0x5A, 7, 0, 0) removed due to no example of use */
-	{ "INIT_SUB_DIRECT"                   , 0x5B, 3       , 0       , 0       , init_sub_direct                 },
-	{ "INIT_COPY_NV_REG"                  , 0x5F, 22      , 0       , 0       , init_copy_nv_reg                },
-	{ "INIT_ZM_INDEX_IO"                  , 0x62, 5       , 0       , 0       , init_zm_index_io                },
-	{ "INIT_COMPUTE_MEM"                  , 0x63, 1       , 0       , 0       , init_compute_mem                },
-	{ "INIT_RESET"                        , 0x65, 13      , 0       , 0       , init_reset                      },
-	{ "INIT_CONFIGURE_MEM"                , 0x66, 1       , 0       , 0       , init_configure_mem              },
-	{ "INIT_CONFIGURE_CLK"                , 0x67, 1       , 0       , 0       , init_configure_clk              },
-	{ "INIT_CONFIGURE_PREINIT"            , 0x68, 1       , 0       , 0       , init_configure_preinit          },
-	{ "INIT_IO"                           , 0x69, 5       , 0       , 0       , init_io                         },
-	{ "INIT_SUB"                          , 0x6B, 2       , 0       , 0       , init_sub                        },
-	{ "INIT_RAM_CONDITION"                , 0x6D, 3       , 0       , 0       , init_ram_condition              },
-	{ "INIT_NV_REG"                       , 0x6E, 13      , 0       , 0       , init_nv_reg                     },
-	{ "INIT_MACRO"                        , 0x6F, 2       , 0       , 0       , init_macro                      },
-	{ "INIT_DONE"                         , 0x71, 1       , 0       , 0       , init_done                       },
-	{ "INIT_RESUME"                       , 0x72, 1       , 0       , 0       , init_resume                     },
+	{ "INIT_SUB_DIRECT"                   , 0x5B, init_sub_direct                 },
+	{ "INIT_COPY_NV_REG"                  , 0x5F, init_copy_nv_reg                },
+	{ "INIT_ZM_INDEX_IO"                  , 0x62, init_zm_index_io                },
+	{ "INIT_COMPUTE_MEM"                  , 0x63, init_compute_mem                },
+	{ "INIT_RESET"                        , 0x65, init_reset                      },
+	{ "INIT_CONFIGURE_MEM"                , 0x66, init_configure_mem              },
+	{ "INIT_CONFIGURE_CLK"                , 0x67, init_configure_clk              },
+	{ "INIT_CONFIGURE_PREINIT"            , 0x68, init_configure_preinit          },
+	{ "INIT_IO"                           , 0x69, init_io                         },
+	{ "INIT_SUB"                          , 0x6B, init_sub                        },
+	{ "INIT_RAM_CONDITION"                , 0x6D, init_ram_condition              },
+	{ "INIT_NV_REG"                       , 0x6E, init_nv_reg                     },
+	{ "INIT_MACRO"                        , 0x6F, init_macro                      },
+	{ "INIT_DONE"                         , 0x71, init_done                       },
+	{ "INIT_RESUME"                       , 0x72, init_resume                     },
 	/* INIT_RAM_CONDITION2 (0x73, 9, 0, 0) removed due to no example of use */
-	{ "INIT_TIME"                         , 0x74, 3       , 0       , 0       , init_time                       },
-	{ "INIT_CONDITION"                    , 0x75, 2       , 0       , 0       , init_condition                  },
-	{ "INIT_IO_CONDITION"                 , 0x76, 2       , 0       , 0       , init_io_condition               },
-	{ "INIT_INDEX_IO"                     , 0x78, 6       , 0       , 0       , init_index_io                   },
-	{ "INIT_PLL"                          , 0x79, 7       , 0       , 0       , init_pll                        },
-	{ "INIT_ZM_REG"                       , 0x7A, 9       , 0       , 0       , init_zm_reg                     },
-	/* INIT_RAM_RESTRICT_PLL's length is adjusted by the BIT M table */
-	{ "INIT_RAM_RESTRICT_PLL"             , 0x87, 2       , 0       , 0       , init_ram_restrict_pll           },
-	{ "INIT_8C"                           , 0x8C, 1       , 0       , 0       , init_8c                         },
-	{ "INIT_8D"                           , 0x8D, 1       , 0       , 0       , init_8d                         },
-	{ "INIT_GPIO"                         , 0x8E, 1       , 0       , 0       , init_gpio                       },
-	/* INIT_RAM_RESTRICT_ZM_REG_GROUP's mult is loaded by M table in BIT */
-	{ "INIT_RAM_RESTRICT_ZM_REG_GROUP"    , 0x8F, 7       , 6       , 0       , init_ram_restrict_zm_reg_group  },
-	{ "INIT_COPY_ZM_REG"                  , 0x90, 9       , 0       , 0       , init_copy_zm_reg                },
-	{ "INIT_ZM_REG_GROUP_ADDRESS_LATCHED" , 0x91, 6       , 5       , 4       , init_zm_reg_group_addr_latched  },
-	{ "INIT_RESERVED"                     , 0x92, 1       , 0       , 0       , init_reserved                   },
-	{ "INIT_96"                           , 0x96, 17      , 0       , 0       , init_96                         },
-	{ "INIT_97"                           , 0x97, 13      , 0       , 0       , init_97                         },
-	{ "INIT_AUXCH"                        , 0x98, 6       , 5       , 2       , init_auxch                      },
-	{ "INIT_ZM_AUXCH"                     , 0x99, 6       , 5       , 1       , init_zm_auxch                   },
-	{ NULL                                , 0   , 0       , 0       , 0       , NULL                            }
+	{ "INIT_TIME"                         , 0x74, init_time                       },
+	{ "INIT_CONDITION"                    , 0x75, init_condition                  },
+	{ "INIT_IO_CONDITION"                 , 0x76, init_io_condition               },
+	{ "INIT_INDEX_IO"                     , 0x78, init_index_io                   },
+	{ "INIT_PLL"                          , 0x79, init_pll                        },
+	{ "INIT_ZM_REG"                       , 0x7A, init_zm_reg                     },
+	{ "INIT_RAM_RESTRICT_PLL"             , 0x87, init_ram_restrict_pll           },
+	{ "INIT_8C"                           , 0x8C, init_8c                         },
+	{ "INIT_8D"                           , 0x8D, init_8d                         },
+	{ "INIT_GPIO"                         , 0x8E, init_gpio                       },
+	{ "INIT_RAM_RESTRICT_ZM_REG_GROUP"    , 0x8F, init_ram_restrict_zm_reg_group  },
+	{ "INIT_COPY_ZM_REG"                  , 0x90, init_copy_zm_reg                },
+	{ "INIT_ZM_REG_GROUP_ADDRESS_LATCHED" , 0x91, init_zm_reg_group_addr_latched  },
+	{ "INIT_RESERVED"                     , 0x92, init_reserved                   },
+	{ "INIT_96"                           , 0x96, init_96                         },
+	{ "INIT_97"                           , 0x97, init_97                         },
+	{ "INIT_AUXCH"                        , 0x98, init_auxch                      },
+	{ "INIT_ZM_AUXCH"                     , 0x99, init_zm_auxch                   },
+	{ NULL                                , 0   , NULL                            }
 };
 
-static unsigned int get_init_table_entry_length(struct nvbios *bios, unsigned int offset, int i)
-{
-	/* Calculates the length of a given init table entry. */
-	return itbl_entry[i].length + bios->data[offset + itbl_entry[i].length_offset]*itbl_entry[i].length_multiplier;
-}
-
 #define MAX_TABLE_OPS 1000
 
 static int
@@ -3056,7 +3055,7 @@
 	 * is changed back to EXECUTE.
 	 */
 
-	int count = 0, i;
+	int count = 0, i, res;
 	uint8_t id;
 
 	/*
@@ -3076,22 +3075,21 @@
 				offset, itbl_entry[i].id, itbl_entry[i].name);
 
 			/* execute eventual command handler */
-			if (itbl_entry[i].handler)
-				if (!(*itbl_entry[i].handler)(bios, offset, iexec))
-					break;
+			res = (*itbl_entry[i].handler)(bios, offset, iexec);
+			if (!res)
+				break;
+			/*
+			 * Add the offset of the current command including all data
+			 * of that command. The offset will then be pointing on the
+			 * next op code.
+			 */
+			offset += res;
 		} else {
 			NV_ERROR(bios->dev,
 				 "0x%04X: Init table command not found: "
 				 "0x%02X\n", offset, id);
 			return -ENOENT;
 		}
-
-		/*
-		 * Add the offset of the current command including all data
-		 * of that command. The offset will then be pointing on the
-		 * next op code.
-		 */
-		offset += get_init_table_entry_length(bios, offset, i);
 	}
 
 	if (offset >= bios->length)
@@ -3854,7 +3852,7 @@
 	 * script tables is a pointer to the script to execute.
 	 */
 
-	NV_DEBUG(dev, "Searching for output entry for %d %d %d\n",
+	NV_DEBUG_KMS(dev, "Searching for output entry for %d %d %d\n",
 			dcbent->type, dcbent->location, dcbent->or);
 	otable = bios_output_config_match(dev, dcbent, table[1] +
 					  bios->display.script_table_ptr,
@@ -3884,7 +3882,7 @@
 	if (pxclk == 0) {
 		script = ROM16(otable[6]);
 		if (!script) {
-			NV_DEBUG(dev, "output script 0 not found\n");
+			NV_DEBUG_KMS(dev, "output script 0 not found\n");
 			return 1;
 		}
 
@@ -3894,7 +3892,7 @@
 	if (pxclk == -1) {
 		script = ROM16(otable[8]);
 		if (!script) {
-			NV_DEBUG(dev, "output script 1 not found\n");
+			NV_DEBUG_KMS(dev, "output script 1 not found\n");
 			return 1;
 		}
 
@@ -3907,7 +3905,7 @@
 		else
 			script = 0;
 		if (!script) {
-			NV_DEBUG(dev, "output script 2 not found\n");
+			NV_DEBUG_KMS(dev, "output script 2 not found\n");
 			return 1;
 		}
 
@@ -3931,7 +3929,7 @@
 		if (script)
 			script = clkcmptable(bios, script, -pxclk);
 		if (!script) {
-			NV_DEBUG(dev, "clock script 1 not found\n");
+			NV_DEBUG_KMS(dev, "clock script 1 not found\n");
 			return 1;
 		}
 
@@ -4606,10 +4604,6 @@
 	 * stuff that we don't use - their use currently unknown
 	 */
 
-	uint16_t rr_strap_xlat;
-	uint8_t rr_group_count;
-	int i;
-
 	/*
 	 * Older bios versions don't have a sufficiently long table for
 	 * what we want
@@ -4618,24 +4612,13 @@
 		return 0;
 
 	if (bitentry->id[1] < 2) {
-		rr_group_count = bios->data[bitentry->offset + 2];
-		rr_strap_xlat = ROM16(bios->data[bitentry->offset + 3]);
+		bios->ram_restrict_group_count = bios->data[bitentry->offset + 2];
+		bios->ram_restrict_tbl_ptr = ROM16(bios->data[bitentry->offset + 3]);
 	} else {
-		rr_group_count = bios->data[bitentry->offset + 0];
-		rr_strap_xlat = ROM16(bios->data[bitentry->offset + 1]);
+		bios->ram_restrict_group_count = bios->data[bitentry->offset + 0];
+		bios->ram_restrict_tbl_ptr = ROM16(bios->data[bitentry->offset + 1]);
 	}
 
-	/* adjust length of INIT_87 */
-	for (i = 0; itbl_entry[i].name && (itbl_entry[i].id != 0x87); i++);
-	itbl_entry[i].length += rr_group_count * 4;
-
-	/* set up multiplier for INIT_RAM_RESTRICT_ZM_REG_GROUP */
-	for (; itbl_entry[i].name && (itbl_entry[i].id != 0x8f); i++);
-	itbl_entry[i].length_multiplier = rr_group_count * 4;
-
-	init_ram_restrict_zm_reg_group_blocklen = itbl_entry[i].length_multiplier;
-	bios->ram_restrict_tbl_ptr = rr_strap_xlat;
-
 	return 0;
 }
 
@@ -5234,7 +5217,7 @@
 	int i;
 
 	if (!bios->bdcb.connector_table_ptr) {
-		NV_DEBUG(dev, "No DCB connector table present\n");
+		NV_DEBUG_KMS(dev, "No DCB connector table present\n");
 		return;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.h b/drivers/gpu/drm/nouveau/nouveau_bios.h
index 1d5f10b..058e98c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.h
@@ -227,6 +227,7 @@
 
 	uint16_t pll_limit_tbl_ptr;
 	uint16_t ram_restrict_tbl_ptr;
+	uint8_t ram_restrict_group_count;
 
 	uint16_t some_script_ptr; /* BIT I + 14 */
 	uint16_t init96_tbl_ptr; /* BIT I + 16 */
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index aa2dfbc..0cad6d8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -154,6 +154,11 @@
 	nvbo->placement.busy_placement = nvbo->placements;
 	nvbo->placement.num_placement = n;
 	nvbo->placement.num_busy_placement = n;
+
+	if (nvbo->pin_refcnt) {
+		while (n--)
+			nvbo->placements[n] |= TTM_PL_FLAG_NO_EVICT;
+	}
 }
 
 int
@@ -400,10 +405,16 @@
 	struct nouveau_bo *nvbo = nouveau_bo(bo);
 
 	switch (bo->mem.mem_type) {
+	case TTM_PL_VRAM:
+		nouveau_bo_placement_set(nvbo, TTM_PL_FLAG_TT |
+					 TTM_PL_FLAG_SYSTEM);
+		break;
 	default:
 		nouveau_bo_placement_set(nvbo, TTM_PL_FLAG_SYSTEM);
 		break;
 	}
+
+	*pl = nvbo->placement;
 }
 
 
@@ -455,11 +466,8 @@
 	int ret;
 
 	chan = nvbo->channel;
-	if (!chan || nvbo->tile_flags || nvbo->no_vm) {
+	if (!chan || nvbo->tile_flags || nvbo->no_vm)
 		chan = dev_priv->channel;
-		if (!chan)
-			return -EINVAL;
-	}
 
 	src_offset = old_mem->mm_node->start << PAGE_SHIFT;
 	dst_offset = new_mem->mm_node->start << PAGE_SHIFT;
@@ -625,7 +633,8 @@
 			return ret;
 	}
 
-	if (dev_priv->init_state != NOUVEAU_CARD_INIT_DONE)
+	if (dev_priv->init_state != NOUVEAU_CARD_INIT_DONE ||
+	    !dev_priv->channel)
 		return ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
 
 	if (old_mem->mem_type == TTM_PL_SYSTEM && !bo->ttm) {
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 032cf09..5a10deb 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -86,7 +86,7 @@
 	struct nouveau_connector *connector = nouveau_connector(drm_connector);
 	struct drm_device *dev = connector->base.dev;
 
-	NV_DEBUG(dev, "\n");
+	NV_DEBUG_KMS(dev, "\n");
 
 	if (!connector)
 		return;
@@ -420,7 +420,7 @@
 	/* Use preferred mode if there is one.. */
 	list_for_each_entry(mode, &connector->base.probed_modes, head) {
 		if (mode->type & DRM_MODE_TYPE_PREFERRED) {
-			NV_DEBUG(dev, "native mode from preferred\n");
+			NV_DEBUG_KMS(dev, "native mode from preferred\n");
 			return drm_mode_duplicate(dev, mode);
 		}
 	}
@@ -445,7 +445,7 @@
 		largest = mode;
 	}
 
-	NV_DEBUG(dev, "native mode from largest: %dx%d@%d\n",
+	NV_DEBUG_KMS(dev, "native mode from largest: %dx%d@%d\n",
 		      high_w, high_h, high_v);
 	return largest ? drm_mode_duplicate(dev, largest) : NULL;
 }
@@ -725,7 +725,7 @@
 	struct drm_encoder *encoder;
 	int ret;
 
-	NV_DEBUG(dev, "\n");
+	NV_DEBUG_KMS(dev, "\n");
 
 	nv_connector = kzalloc(sizeof(*nv_connector), GFP_KERNEL);
 	if (!nv_connector)
diff --git a/drivers/gpu/drm/nouveau/nouveau_dp.c b/drivers/gpu/drm/nouveau/nouveau_dp.c
index de61f46..9e2926c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dp.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dp.c
@@ -187,7 +187,7 @@
 	if (ret)
 		return false;
 
-	NV_DEBUG(dev, "\t\tadjust 0x%02x 0x%02x\n", request[0], request[1]);
+	NV_DEBUG_KMS(dev, "\t\tadjust 0x%02x 0x%02x\n", request[0], request[1]);
 
 	/* Keep all lanes at the same level.. */
 	for (i = 0; i < nv_encoder->dp.link_nr; i++) {
@@ -228,7 +228,7 @@
 	int or = nv_encoder->or, link = !(nv_encoder->dcb->sorconf.link & 1);
 	int dpe_headerlen, ret, i;
 
-	NV_DEBUG(dev, "\t\tconfig 0x%02x 0x%02x 0x%02x 0x%02x\n",
+	NV_DEBUG_KMS(dev, "\t\tconfig 0x%02x 0x%02x 0x%02x 0x%02x\n",
 		 config[0], config[1], config[2], config[3]);
 
 	dpe = nouveau_bios_dp_table(dev, nv_encoder->dcb, &dpe_headerlen);
@@ -276,12 +276,12 @@
 	bool cr_done, cr_max_vs, eq_done;
 	int ret = 0, i, tries, voltage;
 
-	NV_DEBUG(dev, "link training!!\n");
+	NV_DEBUG_KMS(dev, "link training!!\n");
 train:
 	cr_done = eq_done = false;
 
 	/* set link configuration */
-	NV_DEBUG(dev, "\tbegin train: bw %d, lanes %d\n",
+	NV_DEBUG_KMS(dev, "\tbegin train: bw %d, lanes %d\n",
 		 nv_encoder->dp.link_bw, nv_encoder->dp.link_nr);
 
 	ret = nouveau_dp_link_bw_set(encoder, nv_encoder->dp.link_bw);
@@ -297,7 +297,7 @@
 		return false;
 
 	/* clock recovery */
-	NV_DEBUG(dev, "\tbegin cr\n");
+	NV_DEBUG_KMS(dev, "\tbegin cr\n");
 	ret = nouveau_dp_link_train_set(encoder, DP_TRAINING_PATTERN_1);
 	if (ret)
 		goto stop;
@@ -314,7 +314,7 @@
 		ret = auxch_rd(encoder, DP_LANE0_1_STATUS, status, 2);
 		if (ret)
 			break;
-		NV_DEBUG(dev, "\t\tstatus: 0x%02x 0x%02x\n",
+		NV_DEBUG_KMS(dev, "\t\tstatus: 0x%02x 0x%02x\n",
 			 status[0], status[1]);
 
 		cr_done = true;
@@ -346,7 +346,7 @@
 		goto stop;
 
 	/* channel equalisation */
-	NV_DEBUG(dev, "\tbegin eq\n");
+	NV_DEBUG_KMS(dev, "\tbegin eq\n");
 	ret = nouveau_dp_link_train_set(encoder, DP_TRAINING_PATTERN_2);
 	if (ret)
 		goto stop;
@@ -357,7 +357,7 @@
 		ret = auxch_rd(encoder, DP_LANE0_1_STATUS, status, 3);
 		if (ret)
 			break;
-		NV_DEBUG(dev, "\t\tstatus: 0x%02x 0x%02x\n",
+		NV_DEBUG_KMS(dev, "\t\tstatus: 0x%02x 0x%02x\n",
 			 status[0], status[1]);
 
 		eq_done = true;
@@ -395,9 +395,9 @@
 
 	/* retry at a lower setting, if possible */
 	if (!ret && !(eq_done && cr_done)) {
-		NV_DEBUG(dev, "\twe failed\n");
+		NV_DEBUG_KMS(dev, "\twe failed\n");
 		if (nv_encoder->dp.link_bw != DP_LINK_BW_1_62) {
-			NV_DEBUG(dev, "retry link training at low rate\n");
+			NV_DEBUG_KMS(dev, "retry link training at low rate\n");
 			nv_encoder->dp.link_bw = DP_LINK_BW_1_62;
 			goto train;
 		}
@@ -418,7 +418,7 @@
 	if (ret)
 		return false;
 
-	NV_DEBUG(dev, "encoder: link_bw %d, link_nr %d\n"
+	NV_DEBUG_KMS(dev, "encoder: link_bw %d, link_nr %d\n"
 		      "display: link_bw %d, link_nr %d version 0x%02x\n",
 		 nv_encoder->dcb->dpconf.link_bw,
 		 nv_encoder->dcb->dpconf.link_nr,
@@ -446,7 +446,7 @@
 	uint32_t tmp, ctrl, stat = 0, data32[4] = {};
 	int ret = 0, i, index = auxch->rd;
 
-	NV_DEBUG(dev, "ch %d cmd %d addr 0x%x len %d\n", index, cmd, addr, data_nr);
+	NV_DEBUG_KMS(dev, "ch %d cmd %d addr 0x%x len %d\n", index, cmd, addr, data_nr);
 
 	tmp = nv_rd32(dev, NV50_AUXCH_CTRL(auxch->rd));
 	nv_wr32(dev, NV50_AUXCH_CTRL(auxch->rd), tmp | 0x00100000);
@@ -472,7 +472,7 @@
 	if (!(cmd & 1)) {
 		memcpy(data32, data, data_nr);
 		for (i = 0; i < 4; i++) {
-			NV_DEBUG(dev, "wr %d: 0x%08x\n", i, data32[i]);
+			NV_DEBUG_KMS(dev, "wr %d: 0x%08x\n", i, data32[i]);
 			nv_wr32(dev, NV50_AUXCH_DATA_OUT(index, i), data32[i]);
 		}
 	}
@@ -504,7 +504,7 @@
 	if (cmd & 1) {
 		for (i = 0; i < 4; i++) {
 			data32[i] = nv_rd32(dev, NV50_AUXCH_DATA_IN(index, i));
-			NV_DEBUG(dev, "rd %d: 0x%08x\n", i, data32[i]);
+			NV_DEBUG_KMS(dev, "rd %d: 0x%08x\n", i, data32[i]);
 		}
 		memcpy(data, data32, data_nr);
 	}
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c
index 3f943c0..06eb993 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.c
@@ -35,6 +35,10 @@
 
 #include "drm_pciids.h"
 
+MODULE_PARM_DESC(ctxfw, "Use external firmware blob for grctx init (NV40)");
+int nouveau_ctxfw = 0;
+module_param_named(ctxfw, nouveau_ctxfw, int, 0400);
+
 MODULE_PARM_DESC(noagp, "Disable AGP");
 int nouveau_noagp;
 module_param_named(noagp, nouveau_noagp, int, 0400);
@@ -273,7 +277,7 @@
 
 		for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
 			chan = dev_priv->fifos[i];
-			if (!chan)
+			if (!chan || !chan->pushbuf_bo)
 				continue;
 
 			for (j = 0; j < NOUVEAU_DMA_SKIPS; j++)
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 88b4c7b..5f8cbb7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -54,6 +54,7 @@
 #include "nouveau_drm.h"
 #include "nouveau_reg.h"
 #include "nouveau_bios.h"
+struct nouveau_grctx;
 
 #define MAX_NUM_DCB_ENTRIES 16
 
@@ -317,6 +318,7 @@
 	bool accel_blocked;
 	void *ctxprog;
 	void *ctxvals;
+	int grctx_size;
 
 	int  (*init)(struct drm_device *);
 	void (*takedown)(struct drm_device *);
@@ -647,6 +649,7 @@
 extern char *nouveau_tv_norm;
 extern int nouveau_reg_debug;
 extern char *nouveau_vbios;
+extern int nouveau_ctxfw;
 
 /* nouveau_state.c */
 extern void nouveau_preclose(struct drm_device *dev, struct drm_file *);
@@ -959,9 +962,7 @@
 extern void nv40_graph_destroy_context(struct nouveau_channel *);
 extern int  nv40_graph_load_context(struct nouveau_channel *);
 extern int  nv40_graph_unload_context(struct drm_device *);
-extern int  nv40_grctx_init(struct drm_device *);
-extern void nv40_grctx_fini(struct drm_device *);
-extern void nv40_grctx_vals_load(struct drm_device *, struct nouveau_gpuobj *);
+extern void nv40_grctx_init(struct nouveau_grctx *);
 
 /* nv50_graph.c */
 extern struct nouveau_pgraph_object_class nv50_graph_grclass[];
@@ -975,6 +976,12 @@
 extern int  nv50_graph_unload_context(struct drm_device *);
 extern void nv50_graph_context_switch(struct drm_device *);
 
+/* nouveau_grctx.c */
+extern int  nouveau_grctx_prog_load(struct drm_device *);
+extern void nouveau_grctx_vals_load(struct drm_device *,
+				    struct nouveau_gpuobj *);
+extern void nouveau_grctx_fini(struct drm_device *);
+
 /* nv04_instmem.c */
 extern int  nv04_instmem_init(struct drm_device *);
 extern void nv04_instmem_takedown(struct drm_device *);
@@ -1207,14 +1214,24 @@
 					pci_name(d->pdev), ##arg)
 #ifndef NV_DEBUG_NOTRACE
 #define NV_DEBUG(d, fmt, arg...) do {                                          \
-	if (drm_debug) {                                                       \
+	if (drm_debug & DRM_UT_DRIVER) {                                       \
+		NV_PRINTK(KERN_DEBUG, d, "%s:%d - " fmt, __func__,             \
+			  __LINE__, ##arg);                                    \
+	}                                                                      \
+} while (0)
+#define NV_DEBUG_KMS(d, fmt, arg...) do {                                      \
+	if (drm_debug & DRM_UT_KMS) {                                          \
 		NV_PRINTK(KERN_DEBUG, d, "%s:%d - " fmt, __func__,             \
 			  __LINE__, ##arg);                                    \
 	}                                                                      \
 } while (0)
 #else
 #define NV_DEBUG(d, fmt, arg...) do {                                          \
-	if (drm_debug)                                                         \
+	if (drm_debug & DRM_UT_DRIVER)                                         \
+		NV_PRINTK(KERN_DEBUG, d, fmt, ##arg);                          \
+} while (0)
+#define NV_DEBUG_KMS(d, fmt, arg...) do {                                      \
+	if (drm_debug & DRM_UT_KMS)                                            \
 		NV_PRINTK(KERN_DEBUG, d, fmt, ##arg);                          \
 } while (0)
 #endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index 36e8c5e..84af25c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -58,7 +58,7 @@
 	struct nouveau_channel *chan = dev_priv->channel;
 	int ret, i;
 
-	if (!chan->accel_done ||
+	if (!chan || !chan->accel_done ||
 	    info->state != FBINFO_STATE_RUNNING ||
 	    info->flags & FBINFO_HWACCEL_DISABLED)
 		return 0;
@@ -318,14 +318,16 @@
 	par->nouveau_fb = nouveau_fb;
 	par->dev = dev;
 
-	switch (dev_priv->card_type) {
-	case NV_50:
-		nv50_fbcon_accel_init(info);
-		break;
-	default:
-		nv04_fbcon_accel_init(info);
-		break;
-	};
+	if (dev_priv->channel) {
+		switch (dev_priv->card_type) {
+		case NV_50:
+			nv50_fbcon_accel_init(info);
+			break;
+		default:
+			nv04_fbcon_accel_init(info);
+			break;
+		};
+	}
 
 	nouveau_fbcon_zfill(dev);
 
@@ -347,7 +349,7 @@
 int
 nouveau_fbcon_probe(struct drm_device *dev)
 {
-	NV_DEBUG(dev, "\n");
+	NV_DEBUG_KMS(dev, "\n");
 
 	return drm_fb_helper_single_fb_probe(dev, 32, nouveau_fbcon_create);
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_grctx.c b/drivers/gpu/drm/nouveau/nouveau_grctx.c
new file mode 100644
index 0000000..419f4c2
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nouveau_grctx.c
@@ -0,0 +1,161 @@
+/*
+ * Copyright 2009 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <linux/firmware.h>
+
+#include "drmP.h"
+#include "nouveau_drv.h"
+
+struct nouveau_ctxprog {
+	uint32_t signature;
+	uint8_t  version;
+	uint16_t length;
+	uint32_t data[];
+} __attribute__ ((packed));
+
+struct nouveau_ctxvals {
+	uint32_t signature;
+	uint8_t  version;
+	uint32_t length;
+	struct {
+		uint32_t offset;
+		uint32_t value;
+	} data[];
+} __attribute__ ((packed));
+
+int
+nouveau_grctx_prog_load(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
+	const int chipset = dev_priv->chipset;
+	const struct firmware *fw;
+	const struct nouveau_ctxprog *cp;
+	const struct nouveau_ctxvals *cv;
+	char name[32];
+	int ret, i;
+
+	if (pgraph->accel_blocked)
+		return -ENODEV;
+
+	if (!pgraph->ctxprog) {
+		sprintf(name, "nouveau/nv%02x.ctxprog", chipset);
+		ret = request_firmware(&fw, name, &dev->pdev->dev);
+		if (ret) {
+			NV_ERROR(dev, "No ctxprog for NV%02x\n", chipset);
+			return ret;
+		}
+
+		pgraph->ctxprog = kmalloc(fw->size, GFP_KERNEL);
+		if (!pgraph->ctxprog) {
+			NV_ERROR(dev, "OOM copying ctxprog\n");
+			release_firmware(fw);
+			return -ENOMEM;
+		}
+		memcpy(pgraph->ctxprog, fw->data, fw->size);
+
+		cp = pgraph->ctxprog;
+		if (le32_to_cpu(cp->signature) != 0x5043564e ||
+		    cp->version != 0 ||
+		    le16_to_cpu(cp->length) != ((fw->size - 7) / 4)) {
+			NV_ERROR(dev, "ctxprog invalid\n");
+			release_firmware(fw);
+			nouveau_grctx_fini(dev);
+			return -EINVAL;
+		}
+		release_firmware(fw);
+	}
+
+	if (!pgraph->ctxvals) {
+		sprintf(name, "nouveau/nv%02x.ctxvals", chipset);
+		ret = request_firmware(&fw, name, &dev->pdev->dev);
+		if (ret) {
+			NV_ERROR(dev, "No ctxvals for NV%02x\n", chipset);
+			nouveau_grctx_fini(dev);
+			return ret;
+		}
+
+		pgraph->ctxvals = kmalloc(fw->size, GFP_KERNEL);
+		if (!pgraph->ctxprog) {
+			NV_ERROR(dev, "OOM copying ctxprog\n");
+			release_firmware(fw);
+			nouveau_grctx_fini(dev);
+			return -ENOMEM;
+		}
+		memcpy(pgraph->ctxvals, fw->data, fw->size);
+
+		cv = (void *)pgraph->ctxvals;
+		if (le32_to_cpu(cv->signature) != 0x5643564e ||
+		    cv->version != 0 ||
+		    le32_to_cpu(cv->length) != ((fw->size - 9) / 8)) {
+			NV_ERROR(dev, "ctxvals invalid\n");
+			release_firmware(fw);
+			nouveau_grctx_fini(dev);
+			return -EINVAL;
+		}
+		release_firmware(fw);
+	}
+
+	cp = pgraph->ctxprog;
+
+	nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_INDEX, 0);
+	for (i = 0; i < le16_to_cpu(cp->length); i++)
+		nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_DATA,
+			le32_to_cpu(cp->data[i]));
+
+	return 0;
+}
+
+void
+nouveau_grctx_fini(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
+
+	if (pgraph->ctxprog) {
+		kfree(pgraph->ctxprog);
+		pgraph->ctxprog = NULL;
+	}
+
+	if (pgraph->ctxvals) {
+		kfree(pgraph->ctxprog);
+		pgraph->ctxvals = NULL;
+	}
+}
+
+void
+nouveau_grctx_vals_load(struct drm_device *dev, struct nouveau_gpuobj *ctx)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
+	struct nouveau_ctxvals *cv = pgraph->ctxvals;
+	int i;
+
+	if (!cv)
+		return;
+
+	for (i = 0; i < le32_to_cpu(cv->length); i++)
+		nv_wo32(dev, ctx, le32_to_cpu(cv->data[i].offset),
+			le32_to_cpu(cv->data[i].value));
+}
diff --git a/drivers/gpu/drm/nouveau/nouveau_grctx.h b/drivers/gpu/drm/nouveau/nouveau_grctx.h
new file mode 100644
index 0000000..5d39c4c
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nouveau_grctx.h
@@ -0,0 +1,133 @@
+#ifndef __NOUVEAU_GRCTX_H__
+#define __NOUVEAU_GRCTX_H__
+
+struct nouveau_grctx {
+	struct drm_device *dev;
+
+	enum {
+		NOUVEAU_GRCTX_PROG,
+		NOUVEAU_GRCTX_VALS
+	} mode;
+	void *data;
+
+	uint32_t ctxprog_max;
+	uint32_t ctxprog_len;
+	uint32_t ctxprog_reg;
+	int      ctxprog_label[32];
+	uint32_t ctxvals_pos;
+	uint32_t ctxvals_base;
+};
+
+#ifdef CP_CTX
+static inline void
+cp_out(struct nouveau_grctx *ctx, uint32_t inst)
+{
+	uint32_t *ctxprog = ctx->data;
+
+	if (ctx->mode != NOUVEAU_GRCTX_PROG)
+		return;
+
+	BUG_ON(ctx->ctxprog_len == ctx->ctxprog_max);
+	ctxprog[ctx->ctxprog_len++] = inst;
+}
+
+static inline void
+cp_lsr(struct nouveau_grctx *ctx, uint32_t val)
+{
+	cp_out(ctx, CP_LOAD_SR | val);
+}
+
+static inline void
+cp_ctx(struct nouveau_grctx *ctx, uint32_t reg, uint32_t length)
+{
+	ctx->ctxprog_reg = (reg - 0x00400000) >> 2;
+
+	ctx->ctxvals_base = ctx->ctxvals_pos;
+	ctx->ctxvals_pos = ctx->ctxvals_base + length;
+
+	if (length > (CP_CTX_COUNT >> CP_CTX_COUNT_SHIFT)) {
+		cp_lsr(ctx, length);
+		length = 0;
+	}
+
+	cp_out(ctx, CP_CTX | (length << CP_CTX_COUNT_SHIFT) | ctx->ctxprog_reg);
+}
+
+static inline void
+cp_name(struct nouveau_grctx *ctx, int name)
+{
+	uint32_t *ctxprog = ctx->data;
+	int i;
+
+	if (ctx->mode != NOUVEAU_GRCTX_PROG)
+		return;
+
+	ctx->ctxprog_label[name] = ctx->ctxprog_len;
+	for (i = 0; i < ctx->ctxprog_len; i++) {
+		if ((ctxprog[i] & 0xfff00000) != 0xff400000)
+			continue;
+		if ((ctxprog[i] & CP_BRA_IP) != ((name) << CP_BRA_IP_SHIFT))
+			continue;
+		ctxprog[i] = (ctxprog[i] & 0x00ff00ff) |
+			     (ctx->ctxprog_len << CP_BRA_IP_SHIFT);
+	}
+}
+
+static inline void
+_cp_bra(struct nouveau_grctx *ctx, u32 mod, int flag, int state, int name)
+{
+	int ip = 0;
+
+	if (mod != 2) {
+		ip = ctx->ctxprog_label[name] << CP_BRA_IP_SHIFT;
+		if (ip == 0)
+			ip = 0xff000000 | (name << CP_BRA_IP_SHIFT);
+	}
+
+	cp_out(ctx, CP_BRA | (mod << 18) | ip | flag |
+		    (state ? 0 : CP_BRA_IF_CLEAR));
+}
+#define cp_bra(c,f,s,n) _cp_bra((c), 0, CP_FLAG_##f, CP_FLAG_##f##_##s, n)
+#ifdef CP_BRA_MOD
+#define cp_cal(c,f,s,n) _cp_bra((c), 1, CP_FLAG_##f, CP_FLAG_##f##_##s, n)
+#define cp_ret(c,f,s) _cp_bra((c), 2, CP_FLAG_##f, CP_FLAG_##f##_##s, 0)
+#endif
+
+static inline void
+_cp_wait(struct nouveau_grctx *ctx, int flag, int state)
+{
+	cp_out(ctx, CP_WAIT | flag | (state ? CP_WAIT_SET : 0));
+}
+#define cp_wait(c,f,s) _cp_wait((c), CP_FLAG_##f, CP_FLAG_##f##_##s)
+
+static inline void
+_cp_set(struct nouveau_grctx *ctx, int flag, int state)
+{
+	cp_out(ctx, CP_SET | flag | (state ? CP_SET_1 : 0));
+}
+#define cp_set(c,f,s) _cp_set((c), CP_FLAG_##f, CP_FLAG_##f##_##s)
+
+static inline void
+cp_pos(struct nouveau_grctx *ctx, int offset)
+{
+	ctx->ctxvals_pos = offset;
+	ctx->ctxvals_base = ctx->ctxvals_pos;
+
+	cp_lsr(ctx, ctx->ctxvals_pos);
+	cp_out(ctx, CP_SET_CONTEXT_POINTER);
+}
+
+static inline void
+gr_def(struct nouveau_grctx *ctx, uint32_t reg, uint32_t val)
+{
+	if (ctx->mode != NOUVEAU_GRCTX_VALS)
+		return;
+
+	reg = (reg - 0x00400000) / 4;
+	reg = (reg - ctx->ctxprog_reg) + ctx->ctxvals_base;
+
+	nv_wo32(ctx->dev, ctx->data, reg, val);
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index 2ed41d3..e76ec2d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -299,94 +299,13 @@
 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
 }
 
-int
-nouveau_card_init(struct drm_device *dev)
+static int
+nouveau_card_init_channel(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_engine *engine;
 	struct nouveau_gpuobj *gpuobj;
 	int ret;
 
-	NV_DEBUG(dev, "prev state = %d\n", dev_priv->init_state);
-
-	if (dev_priv->init_state == NOUVEAU_CARD_INIT_DONE)
-		return 0;
-
-	vga_client_register(dev->pdev, dev, NULL, nouveau_vga_set_decode);
-
-	/* Initialise internal driver API hooks */
-	ret = nouveau_init_engine_ptrs(dev);
-	if (ret)
-		return ret;
-	engine = &dev_priv->engine;
-	dev_priv->init_state = NOUVEAU_CARD_INIT_FAILED;
-
-	/* Parse BIOS tables / Run init tables if card not POSTed */
-	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
-		ret = nouveau_bios_init(dev);
-		if (ret)
-			return ret;
-	}
-
-	ret = nouveau_gpuobj_early_init(dev);
-	if (ret)
-		return ret;
-
-	/* Initialise instance memory, must happen before mem_init so we
-	 * know exactly how much VRAM we're able to use for "normal"
-	 * purposes.
-	 */
-	ret = engine->instmem.init(dev);
-	if (ret)
-		return ret;
-
-	/* Setup the memory manager */
-	ret = nouveau_mem_init(dev);
-	if (ret)
-		return ret;
-
-	ret = nouveau_gpuobj_init(dev);
-	if (ret)
-		return ret;
-
-	/* PMC */
-	ret = engine->mc.init(dev);
-	if (ret)
-		return ret;
-
-	/* PTIMER */
-	ret = engine->timer.init(dev);
-	if (ret)
-		return ret;
-
-	/* PFB */
-	ret = engine->fb.init(dev);
-	if (ret)
-		return ret;
-
-	/* PGRAPH */
-	ret = engine->graph.init(dev);
-	if (ret)
-		return ret;
-
-	/* PFIFO */
-	ret = engine->fifo.init(dev);
-	if (ret)
-		return ret;
-
-	/* this call irq_preinstall, register irq handler and
-	 * call irq_postinstall
-	 */
-	ret = drm_irq_install(dev);
-	if (ret)
-		return ret;
-
-	ret = drm_vblank_init(dev, 0);
-	if (ret)
-		return ret;
-
-	/* what about PVIDEO/PCRTC/PRAMDAC etc? */
-
 	ret = nouveau_channel_alloc(dev, &dev_priv->channel,
 				    (struct drm_file *)-2,
 				    NvDmaFB, NvDmaTT);
@@ -399,39 +318,133 @@
 				     NV_DMA_ACCESS_RW, NV_DMA_TARGET_VIDMEM,
 				     &gpuobj);
 	if (ret)
-		return ret;
+		goto out_err;
 
 	ret = nouveau_gpuobj_ref_add(dev, dev_priv->channel, NvDmaVRAM,
 				     gpuobj, NULL);
-	if (ret) {
-		nouveau_gpuobj_del(dev, &gpuobj);
-		return ret;
-	}
+	if (ret)
+		goto out_err;
 
 	gpuobj = NULL;
 	ret = nouveau_gpuobj_gart_dma_new(dev_priv->channel, 0,
 					  dev_priv->gart_info.aper_size,
 					  NV_DMA_ACCESS_RW, &gpuobj, NULL);
 	if (ret)
-		return ret;
+		goto out_err;
 
 	ret = nouveau_gpuobj_ref_add(dev, dev_priv->channel, NvDmaGART,
 				     gpuobj, NULL);
-	if (ret) {
-		nouveau_gpuobj_del(dev, &gpuobj);
-		return ret;
+	if (ret)
+		goto out_err;
+
+	return 0;
+out_err:
+	nouveau_gpuobj_del(dev, &gpuobj);
+	nouveau_channel_free(dev_priv->channel);
+	dev_priv->channel = NULL;
+	return ret;
+}
+
+int
+nouveau_card_init(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_engine *engine;
+	int ret;
+
+	NV_DEBUG(dev, "prev state = %d\n", dev_priv->init_state);
+
+	if (dev_priv->init_state == NOUVEAU_CARD_INIT_DONE)
+		return 0;
+
+	vga_client_register(dev->pdev, dev, NULL, nouveau_vga_set_decode);
+
+	/* Initialise internal driver API hooks */
+	ret = nouveau_init_engine_ptrs(dev);
+	if (ret)
+		goto out;
+	engine = &dev_priv->engine;
+	dev_priv->init_state = NOUVEAU_CARD_INIT_FAILED;
+
+	/* Parse BIOS tables / Run init tables if card not POSTed */
+	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
+		ret = nouveau_bios_init(dev);
+		if (ret)
+			goto out;
+	}
+
+	ret = nouveau_gpuobj_early_init(dev);
+	if (ret)
+		goto out_bios;
+
+	/* Initialise instance memory, must happen before mem_init so we
+	 * know exactly how much VRAM we're able to use for "normal"
+	 * purposes.
+	 */
+	ret = engine->instmem.init(dev);
+	if (ret)
+		goto out_gpuobj_early;
+
+	/* Setup the memory manager */
+	ret = nouveau_mem_init(dev);
+	if (ret)
+		goto out_instmem;
+
+	ret = nouveau_gpuobj_init(dev);
+	if (ret)
+		goto out_mem;
+
+	/* PMC */
+	ret = engine->mc.init(dev);
+	if (ret)
+		goto out_gpuobj;
+
+	/* PTIMER */
+	ret = engine->timer.init(dev);
+	if (ret)
+		goto out_mc;
+
+	/* PFB */
+	ret = engine->fb.init(dev);
+	if (ret)
+		goto out_timer;
+
+	/* PGRAPH */
+	ret = engine->graph.init(dev);
+	if (ret)
+		goto out_fb;
+
+	/* PFIFO */
+	ret = engine->fifo.init(dev);
+	if (ret)
+		goto out_graph;
+
+	/* this call irq_preinstall, register irq handler and
+	 * call irq_postinstall
+	 */
+	ret = drm_irq_install(dev);
+	if (ret)
+		goto out_fifo;
+
+	ret = drm_vblank_init(dev, 0);
+	if (ret)
+		goto out_irq;
+
+	/* what about PVIDEO/PCRTC/PRAMDAC etc? */
+
+	if (!engine->graph.accel_blocked) {
+		ret = nouveau_card_init_channel(dev);
+		if (ret)
+			goto out_irq;
 	}
 
 	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
-		if (dev_priv->card_type >= NV_50) {
+		if (dev_priv->card_type >= NV_50)
 			ret = nv50_display_create(dev);
-			if (ret)
-				return ret;
-		} else {
+		else
 			ret = nv04_display_create(dev);
-			if (ret)
-				return ret;
-		}
+		if (ret)
+			goto out_irq;
 	}
 
 	ret = nouveau_backlight_init(dev);
@@ -444,6 +457,32 @@
 		drm_helper_initial_config(dev);
 
 	return 0;
+
+out_irq:
+	drm_irq_uninstall(dev);
+out_fifo:
+	engine->fifo.takedown(dev);
+out_graph:
+	engine->graph.takedown(dev);
+out_fb:
+	engine->fb.takedown(dev);
+out_timer:
+	engine->timer.takedown(dev);
+out_mc:
+	engine->mc.takedown(dev);
+out_gpuobj:
+	nouveau_gpuobj_takedown(dev);
+out_mem:
+	nouveau_mem_close(dev);
+out_instmem:
+	engine->instmem.takedown(dev);
+out_gpuobj_early:
+	nouveau_gpuobj_late_takedown(dev);
+out_bios:
+	nouveau_bios_takedown(dev);
+out:
+	vga_client_register(dev->pdev, NULL, NULL, NULL);
+	return ret;
 }
 
 static void nouveau_card_takedown(struct drm_device *dev)
diff --git a/drivers/gpu/drm/nouveau/nv04_crtc.c b/drivers/gpu/drm/nouveau/nv04_crtc.c
index b913636..d2f143e 100644
--- a/drivers/gpu/drm/nouveau/nv04_crtc.c
+++ b/drivers/gpu/drm/nouveau/nv04_crtc.c
@@ -143,10 +143,10 @@
 	state->pllsel |= nv_crtc->index ? PLLSEL_VPLL2_MASK : PLLSEL_VPLL1_MASK;
 
 	if (pv->NM2)
-		NV_TRACE(dev, "vpll: n1 %d n2 %d m1 %d m2 %d log2p %d\n",
+		NV_DEBUG_KMS(dev, "vpll: n1 %d n2 %d m1 %d m2 %d log2p %d\n",
 			 pv->N1, pv->N2, pv->M1, pv->M2, pv->log2P);
 	else
-		NV_TRACE(dev, "vpll: n %d m %d log2p %d\n",
+		NV_DEBUG_KMS(dev, "vpll: n %d m %d log2p %d\n",
 			 pv->N1, pv->M1, pv->log2P);
 
 	nv_crtc->cursor.set_offset(nv_crtc, nv_crtc->cursor.offset);
@@ -160,7 +160,7 @@
 	unsigned char seq1 = 0, crtc17 = 0;
 	unsigned char crtc1A;
 
-	NV_TRACE(dev, "Setting dpms mode %d on CRTC %d\n", mode,
+	NV_DEBUG_KMS(dev, "Setting dpms mode %d on CRTC %d\n", mode,
 							nv_crtc->index);
 
 	if (nv_crtc->last_dpms == mode) /* Don't do unnecesary mode changes. */
@@ -603,7 +603,7 @@
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 
-	NV_DEBUG(dev, "CTRC mode on CRTC %d:\n", nv_crtc->index);
+	NV_DEBUG_KMS(dev, "CTRC mode on CRTC %d:\n", nv_crtc->index);
 	drm_mode_debug_printmodeline(adjusted_mode);
 
 	/* unlock must come after turning off FP_TG_CONTROL in output_prepare */
@@ -703,7 +703,7 @@
 {
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
 
-	NV_DEBUG(crtc->dev, "\n");
+	NV_DEBUG_KMS(crtc->dev, "\n");
 
 	if (!nv_crtc)
 		return;
diff --git a/drivers/gpu/drm/nouveau/nv04_dac.c b/drivers/gpu/drm/nouveau/nv04_dac.c
index a5fa517..d9f3287 100644
--- a/drivers/gpu/drm/nouveau/nv04_dac.c
+++ b/drivers/gpu/drm/nouveau/nv04_dac.c
@@ -205,7 +205,7 @@
 	NVWriteVgaSeq(dev, 0, NV_VIO_SR_CLOCK_INDEX, saved_seq1);
 
 	if (blue == 0x18) {
-		NV_TRACE(dev, "Load detected on head A\n");
+		NV_INFO(dev, "Load detected on head A\n");
 		return connector_status_connected;
 	}
 
@@ -350,14 +350,10 @@
 			      struct drm_display_mode *mode,
 			      struct drm_display_mode *adjusted_mode)
 {
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
 	struct drm_device *dev = encoder->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	int head = nouveau_crtc(encoder->crtc)->index;
 
-	NV_TRACE(dev, "%s called for encoder %d\n", __func__,
-		      nv_encoder->dcb->index);
-
 	if (nv_gf4_disp_arch(dev)) {
 		struct drm_encoder *rebind;
 		uint32_t dac_offset = nv04_dac_output_offset(encoder);
@@ -466,7 +462,7 @@
 {
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
 
-	NV_DEBUG(encoder->dev, "\n");
+	NV_DEBUG_KMS(encoder->dev, "\n");
 
 	drm_encoder_cleanup(encoder);
 	kfree(nv_encoder);
diff --git a/drivers/gpu/drm/nouveau/nv04_dfp.c b/drivers/gpu/drm/nouveau/nv04_dfp.c
index e5b3333..483f875 100644
--- a/drivers/gpu/drm/nouveau/nv04_dfp.c
+++ b/drivers/gpu/drm/nouveau/nv04_dfp.c
@@ -261,7 +261,7 @@
 	struct drm_display_mode *output_mode = &nv_encoder->mode;
 	uint32_t mode_ratio, panel_ratio;
 
-	NV_DEBUG(dev, "Output mode on CRTC %d:\n", nv_crtc->index);
+	NV_DEBUG_KMS(dev, "Output mode on CRTC %d:\n", nv_crtc->index);
 	drm_mode_debug_printmodeline(output_mode);
 
 	/* Initialize the FP registers in this CRTC. */
@@ -413,7 +413,9 @@
 	struct dcb_entry *dcbe = nv_encoder->dcb;
 	int head = nouveau_crtc(encoder->crtc)->index;
 
-	NV_TRACE(dev, "%s called for encoder %d\n", __func__, nv_encoder->dcb->index);
+	NV_INFO(dev, "Output %s is running on CRTC %d using output %c\n",
+		drm_get_connector_name(&nouveau_encoder_connector_get(nv_encoder)->base),
+		nv_crtc->index, '@' + ffs(nv_encoder->dcb->or));
 
 	if (dcbe->type == OUTPUT_TMDS)
 		run_tmds_table(dev, dcbe, head, nv_encoder->mode.clock);
@@ -550,7 +552,7 @@
 {
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
 
-	NV_DEBUG(encoder->dev, "\n");
+	NV_DEBUG_KMS(encoder->dev, "\n");
 
 	drm_encoder_cleanup(encoder);
 	kfree(nv_encoder);
diff --git a/drivers/gpu/drm/nouveau/nv04_display.c b/drivers/gpu/drm/nouveau/nv04_display.c
index b47c757..ef77215 100644
--- a/drivers/gpu/drm/nouveau/nv04_display.c
+++ b/drivers/gpu/drm/nouveau/nv04_display.c
@@ -99,10 +99,11 @@
 	uint16_t connector[16] = { 0 };
 	int i, ret;
 
-	NV_DEBUG(dev, "\n");
+	NV_DEBUG_KMS(dev, "\n");
 
 	if (nv_two_heads(dev))
 		nv04_display_store_initial_head_owner(dev);
+	nouveau_hw_save_vga_fonts(dev, 1);
 
 	drm_mode_config_init(dev);
 	drm_mode_create_scaling_mode_property(dev);
@@ -203,8 +204,6 @@
 	/* Save previous state */
 	NVLockVgaCrtcs(dev, false);
 
-	nouveau_hw_save_vga_fonts(dev, 1);
-
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
 		crtc->funcs->save(crtc);
 
@@ -223,7 +222,7 @@
 	struct drm_encoder *encoder;
 	struct drm_crtc *crtc;
 
-	NV_DEBUG(dev, "\n");
+	NV_DEBUG_KMS(dev, "\n");
 
 	/* Turn every CRTC off. */
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
@@ -246,9 +245,9 @@
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
 		crtc->funcs->restore(crtc);
 
-	nouveau_hw_save_vga_fonts(dev, 0);
-
 	drm_mode_config_cleanup(dev);
+
+	nouveau_hw_save_vga_fonts(dev, 0);
 }
 
 void
diff --git a/drivers/gpu/drm/nouveau/nv04_graph.c b/drivers/gpu/drm/nouveau/nv04_graph.c
index 396ee92..d561d77 100644
--- a/drivers/gpu/drm/nouveau/nv04_graph.c
+++ b/drivers/gpu/drm/nouveau/nv04_graph.c
@@ -543,7 +543,7 @@
 
 	nv_wi32(dev, instance, tmp);
 	nv_wr32(dev, NV04_PGRAPH_CTX_SWITCH1, tmp);
-	nv_wr32(dev, NV04_PGRAPH_CTX_CACHE1 + subc, tmp);
+	nv_wr32(dev, NV04_PGRAPH_CTX_CACHE1 + (subc<<2), tmp);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nv10_graph.c b/drivers/gpu/drm/nouveau/nv10_graph.c
index 6bf6804..6870e0e 100644
--- a/drivers/gpu/drm/nouveau/nv10_graph.c
+++ b/drivers/gpu/drm/nouveau/nv10_graph.c
@@ -389,49 +389,50 @@
 	int nv10[ARRAY_SIZE(nv10_graph_ctx_regs)];
 	int nv17[ARRAY_SIZE(nv17_graph_ctx_regs)];
 	struct pipe_state pipe_state;
+	uint32_t lma_window[4];
 };
 
+#define PIPE_SAVE(dev, state, addr)					\
+	do {								\
+		int __i;						\
+		nv_wr32(dev, NV10_PGRAPH_PIPE_ADDRESS, addr);		\
+		for (__i = 0; __i < ARRAY_SIZE(state); __i++)		\
+			state[__i] = nv_rd32(dev, NV10_PGRAPH_PIPE_DATA); \
+	} while (0)
+
+#define PIPE_RESTORE(dev, state, addr)					\
+	do {								\
+		int __i;						\
+		nv_wr32(dev, NV10_PGRAPH_PIPE_ADDRESS, addr);		\
+		for (__i = 0; __i < ARRAY_SIZE(state); __i++)		\
+			nv_wr32(dev, NV10_PGRAPH_PIPE_DATA, state[__i]); \
+	} while (0)
+
 static void nv10_graph_save_pipe(struct nouveau_channel *chan)
 {
 	struct drm_device *dev = chan->dev;
 	struct graph_state *pgraph_ctx = chan->pgraph_ctx;
-	struct pipe_state *fifo_pipe_state = &pgraph_ctx->pipe_state;
-	int i;
-#define PIPE_SAVE(addr) \
-	do { \
-		nv_wr32(dev, NV10_PGRAPH_PIPE_ADDRESS, addr); \
-		for (i = 0; i < ARRAY_SIZE(fifo_pipe_state->pipe_##addr); i++) \
-			fifo_pipe_state->pipe_##addr[i] = nv_rd32(dev, NV10_PGRAPH_PIPE_DATA); \
-	} while (0)
+	struct pipe_state *pipe = &pgraph_ctx->pipe_state;
 
-	PIPE_SAVE(0x4400);
-	PIPE_SAVE(0x0200);
-	PIPE_SAVE(0x6400);
-	PIPE_SAVE(0x6800);
-	PIPE_SAVE(0x6c00);
-	PIPE_SAVE(0x7000);
-	PIPE_SAVE(0x7400);
-	PIPE_SAVE(0x7800);
-	PIPE_SAVE(0x0040);
-	PIPE_SAVE(0x0000);
-
-#undef PIPE_SAVE
+	PIPE_SAVE(dev, pipe->pipe_0x4400, 0x4400);
+	PIPE_SAVE(dev, pipe->pipe_0x0200, 0x0200);
+	PIPE_SAVE(dev, pipe->pipe_0x6400, 0x6400);
+	PIPE_SAVE(dev, pipe->pipe_0x6800, 0x6800);
+	PIPE_SAVE(dev, pipe->pipe_0x6c00, 0x6c00);
+	PIPE_SAVE(dev, pipe->pipe_0x7000, 0x7000);
+	PIPE_SAVE(dev, pipe->pipe_0x7400, 0x7400);
+	PIPE_SAVE(dev, pipe->pipe_0x7800, 0x7800);
+	PIPE_SAVE(dev, pipe->pipe_0x0040, 0x0040);
+	PIPE_SAVE(dev, pipe->pipe_0x0000, 0x0000);
 }
 
 static void nv10_graph_load_pipe(struct nouveau_channel *chan)
 {
 	struct drm_device *dev = chan->dev;
 	struct graph_state *pgraph_ctx = chan->pgraph_ctx;
-	struct pipe_state *fifo_pipe_state = &pgraph_ctx->pipe_state;
-	int i;
+	struct pipe_state *pipe = &pgraph_ctx->pipe_state;
 	uint32_t xfmode0, xfmode1;
-#define PIPE_RESTORE(addr) \
-	do { \
-		nv_wr32(dev, NV10_PGRAPH_PIPE_ADDRESS, addr); \
-		for (i = 0; i < ARRAY_SIZE(fifo_pipe_state->pipe_##addr); i++) \
-			nv_wr32(dev, NV10_PGRAPH_PIPE_DATA, fifo_pipe_state->pipe_##addr[i]); \
-	} while (0)
-
+	int i;
 
 	nouveau_wait_for_idle(dev);
 	/* XXX check haiku comments */
@@ -457,24 +458,22 @@
 	nv_wr32(dev, NV10_PGRAPH_PIPE_DATA, 0x00000008);
 
 
-	PIPE_RESTORE(0x0200);
+	PIPE_RESTORE(dev, pipe->pipe_0x0200, 0x0200);
 	nouveau_wait_for_idle(dev);
 
 	/* restore XFMODE */
 	nv_wr32(dev, NV10_PGRAPH_XFMODE0, xfmode0);
 	nv_wr32(dev, NV10_PGRAPH_XFMODE1, xfmode1);
-	PIPE_RESTORE(0x6400);
-	PIPE_RESTORE(0x6800);
-	PIPE_RESTORE(0x6c00);
-	PIPE_RESTORE(0x7000);
-	PIPE_RESTORE(0x7400);
-	PIPE_RESTORE(0x7800);
-	PIPE_RESTORE(0x4400);
-	PIPE_RESTORE(0x0000);
-	PIPE_RESTORE(0x0040);
+	PIPE_RESTORE(dev, pipe->pipe_0x6400, 0x6400);
+	PIPE_RESTORE(dev, pipe->pipe_0x6800, 0x6800);
+	PIPE_RESTORE(dev, pipe->pipe_0x6c00, 0x6c00);
+	PIPE_RESTORE(dev, pipe->pipe_0x7000, 0x7000);
+	PIPE_RESTORE(dev, pipe->pipe_0x7400, 0x7400);
+	PIPE_RESTORE(dev, pipe->pipe_0x7800, 0x7800);
+	PIPE_RESTORE(dev, pipe->pipe_0x4400, 0x4400);
+	PIPE_RESTORE(dev, pipe->pipe_0x0000, 0x0000);
+	PIPE_RESTORE(dev, pipe->pipe_0x0040, 0x0040);
 	nouveau_wait_for_idle(dev);
-
-#undef PIPE_RESTORE
 }
 
 static void nv10_graph_create_pipe(struct nouveau_channel *chan)
@@ -832,6 +831,9 @@
 				      (1<<31));
 	if (dev_priv->chipset >= 0x17) {
 		nv_wr32(dev, NV10_PGRAPH_DEBUG_4, 0x1f000000);
+		nv_wr32(dev, 0x400a10, 0x3ff3fb6);
+		nv_wr32(dev, 0x400838, 0x2f8684);
+		nv_wr32(dev, 0x40083c, 0x115f3f);
 		nv_wr32(dev, 0x004006b0, 0x40000020);
 	} else
 		nv_wr32(dev, NV10_PGRAPH_DEBUG_4, 0x00000000);
@@ -867,6 +869,115 @@
 {
 }
 
+static int
+nv17_graph_mthd_lma_window(struct nouveau_channel *chan, int grclass,
+			   int mthd, uint32_t data)
+{
+	struct drm_device *dev = chan->dev;
+	struct graph_state *ctx = chan->pgraph_ctx;
+	struct pipe_state *pipe = &ctx->pipe_state;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
+	uint32_t pipe_0x0040[1], pipe_0x64c0[8], pipe_0x6a80[3], pipe_0x6ab0[3];
+	uint32_t xfmode0, xfmode1;
+	int i;
+
+	ctx->lma_window[(mthd - 0x1638) / 4] = data;
+
+	if (mthd != 0x1644)
+		return 0;
+
+	nouveau_wait_for_idle(dev);
+
+	PIPE_SAVE(dev, pipe_0x0040, 0x0040);
+	PIPE_SAVE(dev, pipe->pipe_0x0200, 0x0200);
+
+	PIPE_RESTORE(dev, ctx->lma_window, 0x6790);
+
+	nouveau_wait_for_idle(dev);
+
+	xfmode0 = nv_rd32(dev, NV10_PGRAPH_XFMODE0);
+	xfmode1 = nv_rd32(dev, NV10_PGRAPH_XFMODE1);
+
+	PIPE_SAVE(dev, pipe->pipe_0x4400, 0x4400);
+	PIPE_SAVE(dev, pipe_0x64c0, 0x64c0);
+	PIPE_SAVE(dev, pipe_0x6ab0, 0x6ab0);
+	PIPE_SAVE(dev, pipe_0x6a80, 0x6a80);
+
+	nouveau_wait_for_idle(dev);
+
+	nv_wr32(dev, NV10_PGRAPH_XFMODE0, 0x10000000);
+	nv_wr32(dev, NV10_PGRAPH_XFMODE1, 0x00000000);
+	nv_wr32(dev, NV10_PGRAPH_PIPE_ADDRESS, 0x000064c0);
+	for (i = 0; i < 4; i++)
+		nv_wr32(dev, NV10_PGRAPH_PIPE_DATA, 0x3f800000);
+	for (i = 0; i < 4; i++)
+		nv_wr32(dev, NV10_PGRAPH_PIPE_DATA, 0x00000000);
+
+	nv_wr32(dev, NV10_PGRAPH_PIPE_ADDRESS, 0x00006ab0);
+	for (i = 0; i < 3; i++)
+		nv_wr32(dev, NV10_PGRAPH_PIPE_DATA, 0x3f800000);
+
+	nv_wr32(dev, NV10_PGRAPH_PIPE_ADDRESS, 0x00006a80);
+	for (i = 0; i < 3; i++)
+		nv_wr32(dev, NV10_PGRAPH_PIPE_DATA, 0x00000000);
+
+	nv_wr32(dev, NV10_PGRAPH_PIPE_ADDRESS, 0x00000040);
+	nv_wr32(dev, NV10_PGRAPH_PIPE_DATA, 0x00000008);
+
+	PIPE_RESTORE(dev, pipe->pipe_0x0200, 0x0200);
+
+	nouveau_wait_for_idle(dev);
+
+	PIPE_RESTORE(dev, pipe_0x0040, 0x0040);
+
+	nv_wr32(dev, NV10_PGRAPH_XFMODE0, xfmode0);
+	nv_wr32(dev, NV10_PGRAPH_XFMODE1, xfmode1);
+
+	PIPE_RESTORE(dev, pipe_0x64c0, 0x64c0);
+	PIPE_RESTORE(dev, pipe_0x6ab0, 0x6ab0);
+	PIPE_RESTORE(dev, pipe_0x6a80, 0x6a80);
+	PIPE_RESTORE(dev, pipe->pipe_0x4400, 0x4400);
+
+	nv_wr32(dev, NV10_PGRAPH_PIPE_ADDRESS, 0x000000c0);
+	nv_wr32(dev, NV10_PGRAPH_PIPE_DATA, 0x00000000);
+
+	nouveau_wait_for_idle(dev);
+
+	pgraph->fifo_access(dev, true);
+
+	return 0;
+}
+
+static int
+nv17_graph_mthd_lma_enable(struct nouveau_channel *chan, int grclass,
+			   int mthd, uint32_t data)
+{
+	struct drm_device *dev = chan->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
+
+	nouveau_wait_for_idle(dev);
+
+	nv_wr32(dev, NV10_PGRAPH_DEBUG_4,
+		nv_rd32(dev, NV10_PGRAPH_DEBUG_4) | 0x1 << 8);
+	nv_wr32(dev, 0x004006b0,
+		nv_rd32(dev, 0x004006b0) | 0x8 << 24);
+
+	pgraph->fifo_access(dev, true);
+
+	return 0;
+}
+
+static struct nouveau_pgraph_object_method nv17_graph_celsius_mthds[] = {
+	{ 0x1638, nv17_graph_mthd_lma_window },
+	{ 0x163c, nv17_graph_mthd_lma_window },
+	{ 0x1640, nv17_graph_mthd_lma_window },
+	{ 0x1644, nv17_graph_mthd_lma_window },
+	{ 0x1658, nv17_graph_mthd_lma_enable },
+	{}
+};
+
 struct nouveau_pgraph_object_class nv10_graph_grclass[] = {
 	{ 0x0030, false, NULL }, /* null */
 	{ 0x0039, false, NULL }, /* m2mf */
@@ -887,6 +998,6 @@
 	{ 0x0095, false, NULL }, /* multitex_tri */
 	{ 0x0056, false, NULL }, /* celcius (nv10) */
 	{ 0x0096, false, NULL }, /* celcius (nv11) */
-	{ 0x0099, false, NULL }, /* celcius (nv17) */
+	{ 0x0099, false, nv17_graph_celsius_mthds }, /* celcius (nv17) */
 	{}
 };
diff --git a/drivers/gpu/drm/nouveau/nv17_tv.c b/drivers/gpu/drm/nouveau/nv17_tv.c
index 46cfd9c..81c0135 100644
--- a/drivers/gpu/drm/nouveau/nv17_tv.c
+++ b/drivers/gpu/drm/nouveau/nv17_tv.c
@@ -219,7 +219,7 @@
 		return;
 	nouveau_encoder(encoder)->last_dpms = mode;
 
-	NV_TRACE(dev, "Setting dpms mode %d on TV encoder (output %d)\n",
+	NV_INFO(dev, "Setting dpms mode %d on TV encoder (output %d)\n",
 		 mode, nouveau_encoder(encoder)->dcb->index);
 
 	regs->ptv_200 &= ~1;
@@ -619,7 +619,7 @@
 {
 	struct nv17_tv_encoder *tv_enc = to_tv_enc(encoder);
 
-	NV_DEBUG(encoder->dev, "\n");
+	NV_DEBUG_KMS(encoder->dev, "\n");
 
 	drm_encoder_cleanup(encoder);
 	kfree(tv_enc);
diff --git a/drivers/gpu/drm/nouveau/nv40_graph.c b/drivers/gpu/drm/nouveau/nv40_graph.c
index 7e8547c..2b332bb 100644
--- a/drivers/gpu/drm/nouveau/nv40_graph.c
+++ b/drivers/gpu/drm/nouveau/nv40_graph.c
@@ -24,36 +24,10 @@
  *
  */
 
-#include <linux/firmware.h>
-
 #include "drmP.h"
 #include "drm.h"
 #include "nouveau_drv.h"
-
-MODULE_FIRMWARE("nouveau/nv40.ctxprog");
-MODULE_FIRMWARE("nouveau/nv40.ctxvals");
-MODULE_FIRMWARE("nouveau/nv41.ctxprog");
-MODULE_FIRMWARE("nouveau/nv41.ctxvals");
-MODULE_FIRMWARE("nouveau/nv42.ctxprog");
-MODULE_FIRMWARE("nouveau/nv42.ctxvals");
-MODULE_FIRMWARE("nouveau/nv43.ctxprog");
-MODULE_FIRMWARE("nouveau/nv43.ctxvals");
-MODULE_FIRMWARE("nouveau/nv44.ctxprog");
-MODULE_FIRMWARE("nouveau/nv44.ctxvals");
-MODULE_FIRMWARE("nouveau/nv46.ctxprog");
-MODULE_FIRMWARE("nouveau/nv46.ctxvals");
-MODULE_FIRMWARE("nouveau/nv47.ctxprog");
-MODULE_FIRMWARE("nouveau/nv47.ctxvals");
-MODULE_FIRMWARE("nouveau/nv49.ctxprog");
-MODULE_FIRMWARE("nouveau/nv49.ctxvals");
-MODULE_FIRMWARE("nouveau/nv4a.ctxprog");
-MODULE_FIRMWARE("nouveau/nv4a.ctxvals");
-MODULE_FIRMWARE("nouveau/nv4b.ctxprog");
-MODULE_FIRMWARE("nouveau/nv4b.ctxvals");
-MODULE_FIRMWARE("nouveau/nv4c.ctxprog");
-MODULE_FIRMWARE("nouveau/nv4c.ctxvals");
-MODULE_FIRMWARE("nouveau/nv4e.ctxprog");
-MODULE_FIRMWARE("nouveau/nv4e.ctxvals");
+#include "nouveau_grctx.h"
 
 struct nouveau_channel *
 nv40_graph_channel(struct drm_device *dev)
@@ -83,27 +57,30 @@
 {
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_gpuobj *ctx;
+	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
 	int ret;
 
-	/* Allocate a 175KiB block of PRAMIN to store the context.  This
-	 * is massive overkill for a lot of chipsets, but it should be safe
-	 * until we're able to implement this properly (will happen at more
-	 * or less the same time we're able to write our own context programs.
-	 */
-	ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, 175*1024, 16,
-					  NVOBJ_FLAG_ZERO_ALLOC,
-					  &chan->ramin_grctx);
+	ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, pgraph->grctx_size,
+				     16, NVOBJ_FLAG_ZERO_ALLOC,
+				     &chan->ramin_grctx);
 	if (ret)
 		return ret;
-	ctx = chan->ramin_grctx->gpuobj;
 
 	/* Initialise default context values */
 	dev_priv->engine.instmem.prepare_access(dev, true);
-	nv40_grctx_vals_load(dev, ctx);
-	nv_wo32(dev, ctx, 0, ctx->im_pramin->start);
-	dev_priv->engine.instmem.finish_access(dev);
+	if (!pgraph->ctxprog) {
+		struct nouveau_grctx ctx = {};
 
+		ctx.dev = chan->dev;
+		ctx.mode = NOUVEAU_GRCTX_VALS;
+		ctx.data = chan->ramin_grctx->gpuobj;
+		nv40_grctx_init(&ctx);
+	} else {
+		nouveau_grctx_vals_load(dev, chan->ramin_grctx->gpuobj);
+	}
+	nv_wo32(dev, chan->ramin_grctx->gpuobj, 0,
+		     chan->ramin_grctx->gpuobj->im_pramin->start);
+	dev_priv->engine.instmem.finish_access(dev);
 	return 0;
 }
 
@@ -204,139 +181,6 @@
 	return ret;
 }
 
-struct nouveau_ctxprog {
-	uint32_t signature;
-	uint8_t  version;
-	uint16_t length;
-	uint32_t data[];
-} __attribute__ ((packed));
-
-struct nouveau_ctxvals {
-	uint32_t signature;
-	uint8_t  version;
-	uint32_t length;
-	struct {
-		uint32_t offset;
-		uint32_t value;
-	} data[];
-} __attribute__ ((packed));
-
-int
-nv40_grctx_init(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
-	const int chipset = dev_priv->chipset;
-	const struct firmware *fw;
-	const struct nouveau_ctxprog *cp;
-	const struct nouveau_ctxvals *cv;
-	char name[32];
-	int ret, i;
-
-	pgraph->accel_blocked = true;
-
-	if (!pgraph->ctxprog) {
-		sprintf(name, "nouveau/nv%02x.ctxprog", chipset);
-		ret = request_firmware(&fw, name, &dev->pdev->dev);
-		if (ret) {
-			NV_ERROR(dev, "No ctxprog for NV%02x\n", chipset);
-			return ret;
-		}
-
-		pgraph->ctxprog = kmalloc(fw->size, GFP_KERNEL);
-		if (!pgraph->ctxprog) {
-			NV_ERROR(dev, "OOM copying ctxprog\n");
-			release_firmware(fw);
-			return -ENOMEM;
-		}
-		memcpy(pgraph->ctxprog, fw->data, fw->size);
-
-		cp = pgraph->ctxprog;
-		if (le32_to_cpu(cp->signature) != 0x5043564e ||
-		    cp->version != 0 ||
-		    le16_to_cpu(cp->length) != ((fw->size - 7) / 4)) {
-			NV_ERROR(dev, "ctxprog invalid\n");
-			release_firmware(fw);
-			nv40_grctx_fini(dev);
-			return -EINVAL;
-		}
-		release_firmware(fw);
-	}
-
-	if (!pgraph->ctxvals) {
-		sprintf(name, "nouveau/nv%02x.ctxvals", chipset);
-		ret = request_firmware(&fw, name, &dev->pdev->dev);
-		if (ret) {
-			NV_ERROR(dev, "No ctxvals for NV%02x\n", chipset);
-			nv40_grctx_fini(dev);
-			return ret;
-		}
-
-		pgraph->ctxvals = kmalloc(fw->size, GFP_KERNEL);
-		if (!pgraph->ctxprog) {
-			NV_ERROR(dev, "OOM copying ctxprog\n");
-			release_firmware(fw);
-			nv40_grctx_fini(dev);
-			return -ENOMEM;
-		}
-		memcpy(pgraph->ctxvals, fw->data, fw->size);
-
-		cv = (void *)pgraph->ctxvals;
-		if (le32_to_cpu(cv->signature) != 0x5643564e ||
-		    cv->version != 0 ||
-		    le32_to_cpu(cv->length) != ((fw->size - 9) / 8)) {
-			NV_ERROR(dev, "ctxvals invalid\n");
-			release_firmware(fw);
-			nv40_grctx_fini(dev);
-			return -EINVAL;
-		}
-		release_firmware(fw);
-	}
-
-	cp = pgraph->ctxprog;
-
-	nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_INDEX, 0);
-	for (i = 0; i < le16_to_cpu(cp->length); i++)
-		nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_DATA,
-			le32_to_cpu(cp->data[i]));
-
-	pgraph->accel_blocked = false;
-	return 0;
-}
-
-void
-nv40_grctx_fini(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
-
-	if (pgraph->ctxprog) {
-		kfree(pgraph->ctxprog);
-		pgraph->ctxprog = NULL;
-	}
-
-	if (pgraph->ctxvals) {
-		kfree(pgraph->ctxprog);
-		pgraph->ctxvals = NULL;
-	}
-}
-
-void
-nv40_grctx_vals_load(struct drm_device *dev, struct nouveau_gpuobj *ctx)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
-	struct nouveau_ctxvals *cv = pgraph->ctxvals;
-	int i;
-
-	if (!cv)
-		return;
-
-	for (i = 0; i < le32_to_cpu(cv->length); i++)
-		nv_wo32(dev, ctx, le32_to_cpu(cv->data[i].offset),
-			le32_to_cpu(cv->data[i].value));
-}
-
 /*
  * G70		0x47
  * G71		0x49
@@ -359,7 +203,26 @@
 	nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) |
 			 NV_PMC_ENABLE_PGRAPH);
 
-	nv40_grctx_init(dev);
+	if (nouveau_ctxfw) {
+		nouveau_grctx_prog_load(dev);
+		dev_priv->engine.graph.grctx_size = 175 * 1024;
+	}
+
+	if (!dev_priv->engine.graph.ctxprog) {
+		struct nouveau_grctx ctx = {};
+		uint32_t cp[256];
+
+		ctx.dev = dev;
+		ctx.mode = NOUVEAU_GRCTX_PROG;
+		ctx.data = cp;
+		ctx.ctxprog_max = 256;
+		nv40_grctx_init(&ctx);
+		dev_priv->engine.graph.grctx_size = ctx.ctxvals_pos * 4;
+
+		nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_INDEX, 0);
+		for (i = 0; i < ctx.ctxprog_len; i++)
+			nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_DATA, cp[i]);
+	}
 
 	/* No context present currently */
 	nv_wr32(dev, NV40_PGRAPH_CTXCTL_CUR, 0x00000000);
@@ -539,6 +402,7 @@
 
 void nv40_graph_takedown(struct drm_device *dev)
 {
+	nouveau_grctx_fini(dev);
 }
 
 struct nouveau_pgraph_object_class nv40_graph_grclass[] = {
diff --git a/drivers/gpu/drm/nouveau/nv40_grctx.c b/drivers/gpu/drm/nouveau/nv40_grctx.c
new file mode 100644
index 0000000..11b11c3
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nv40_grctx.c
@@ -0,0 +1,678 @@
+/*
+ * Copyright 2009 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+/* NVIDIA context programs handle a number of other conditions which are
+ * not implemented in our versions.  It's not clear why NVIDIA context
+ * programs have this code, nor whether it's strictly necessary for
+ * correct operation.  We'll implement additional handling if/when we
+ * discover it's necessary.
+ *
+ * - On context save, NVIDIA set 0x400314 bit 0 to 1 if the "3D state"
+ *   flag is set, this gets saved into the context.
+ * - On context save, the context program for all cards load nsource
+ *   into a flag register and check for ILLEGAL_MTHD.  If it's set,
+ *   opcode 0x60000d is called before resuming normal operation.
+ * - Some context programs check more conditions than the above.  NV44
+ *   checks: ((nsource & 0x0857) || (0x400718 & 0x0100) || (intr & 0x0001))
+ *   and calls 0x60000d before resuming normal operation.
+ * - At the very beginning of NVIDIA's context programs, flag 9 is checked
+ *   and if true 0x800001 is called with count=0, pos=0, the flag is cleared
+ *   and then the ctxprog is aborted.  It looks like a complicated NOP,
+ *   its purpose is unknown.
+ * - In the section of code that loads the per-vs state, NVIDIA check
+ *   flag 10.  If it's set, they only transfer the small 0x300 byte block
+ *   of state + the state for a single vs as opposed to the state for
+ *   all vs units.  It doesn't seem likely that it'll occur in normal
+ *   operation, especially seeing as it appears NVIDIA may have screwed
+ *   up the ctxprogs for some cards and have an invalid instruction
+ *   rather than a cp_lsr(ctx, dwords_for_1_vs_unit) instruction.
+ * - There's a number of places where context offset 0 (where we place
+ *   the PRAMIN offset of the context) is loaded into either 0x408000,
+ *   0x408004 or 0x408008.  Not sure what's up there either.
+ * - The ctxprogs for some cards save 0x400a00 again during the cleanup
+ *   path for auto-loadctx.
+ */
+
+#define CP_FLAG_CLEAR                 0
+#define CP_FLAG_SET                   1
+#define CP_FLAG_SWAP_DIRECTION        ((0 * 32) + 0)
+#define CP_FLAG_SWAP_DIRECTION_LOAD   0
+#define CP_FLAG_SWAP_DIRECTION_SAVE   1
+#define CP_FLAG_USER_SAVE             ((0 * 32) + 5)
+#define CP_FLAG_USER_SAVE_NOT_PENDING 0
+#define CP_FLAG_USER_SAVE_PENDING     1
+#define CP_FLAG_USER_LOAD             ((0 * 32) + 6)
+#define CP_FLAG_USER_LOAD_NOT_PENDING 0
+#define CP_FLAG_USER_LOAD_PENDING     1
+#define CP_FLAG_STATUS                ((3 * 32) + 0)
+#define CP_FLAG_STATUS_IDLE           0
+#define CP_FLAG_STATUS_BUSY           1
+#define CP_FLAG_AUTO_SAVE             ((3 * 32) + 4)
+#define CP_FLAG_AUTO_SAVE_NOT_PENDING 0
+#define CP_FLAG_AUTO_SAVE_PENDING     1
+#define CP_FLAG_AUTO_LOAD             ((3 * 32) + 5)
+#define CP_FLAG_AUTO_LOAD_NOT_PENDING 0
+#define CP_FLAG_AUTO_LOAD_PENDING     1
+#define CP_FLAG_UNK54                 ((3 * 32) + 6)
+#define CP_FLAG_UNK54_CLEAR           0
+#define CP_FLAG_UNK54_SET             1
+#define CP_FLAG_ALWAYS                ((3 * 32) + 8)
+#define CP_FLAG_ALWAYS_FALSE          0
+#define CP_FLAG_ALWAYS_TRUE           1
+#define CP_FLAG_UNK57                 ((3 * 32) + 9)
+#define CP_FLAG_UNK57_CLEAR           0
+#define CP_FLAG_UNK57_SET             1
+
+#define CP_CTX                   0x00100000
+#define CP_CTX_COUNT             0x000fc000
+#define CP_CTX_COUNT_SHIFT               14
+#define CP_CTX_REG               0x00003fff
+#define CP_LOAD_SR               0x00200000
+#define CP_LOAD_SR_VALUE         0x000fffff
+#define CP_BRA                   0x00400000
+#define CP_BRA_IP                0x0000ff00
+#define CP_BRA_IP_SHIFT                   8
+#define CP_BRA_IF_CLEAR          0x00000080
+#define CP_BRA_FLAG              0x0000007f
+#define CP_WAIT                  0x00500000
+#define CP_WAIT_SET              0x00000080
+#define CP_WAIT_FLAG             0x0000007f
+#define CP_SET                   0x00700000
+#define CP_SET_1                 0x00000080
+#define CP_SET_FLAG              0x0000007f
+#define CP_NEXT_TO_SWAP          0x00600007
+#define CP_NEXT_TO_CURRENT       0x00600009
+#define CP_SET_CONTEXT_POINTER   0x0060000a
+#define CP_END                   0x0060000e
+#define CP_LOAD_MAGIC_UNK01      0x00800001 /* unknown */
+#define CP_LOAD_MAGIC_NV44TCL    0x00800029 /* per-vs state (0x4497) */
+#define CP_LOAD_MAGIC_NV40TCL    0x00800041 /* per-vs state (0x4097) */
+
+#include "drmP.h"
+#include "nouveau_drv.h"
+#include "nouveau_grctx.h"
+
+/* TODO:
+ *  - get vs count from 0x1540
+ *  - document unimplemented bits compared to nvidia
+ *    - nsource handling
+ *    - R0 & 0x0200 handling
+ *    - single-vs handling
+ *    - 400314 bit 0
+ */
+
+static int
+nv40_graph_4097(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+
+	if ((dev_priv->chipset & 0xf0) == 0x60)
+		return 0;
+
+	return !!(0x0baf & (1 << dev_priv->chipset));
+}
+
+static int
+nv40_graph_vs_count(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+
+	switch (dev_priv->chipset) {
+	case 0x47:
+	case 0x49:
+	case 0x4b:
+		return 8;
+	case 0x40:
+		return 6;
+	case 0x41:
+	case 0x42:
+		return 5;
+	case 0x43:
+	case 0x44:
+	case 0x46:
+	case 0x4a:
+		return 3;
+	case 0x4c:
+	case 0x4e:
+	case 0x67:
+	default:
+		return 1;
+	}
+}
+
+
+enum cp_label {
+	cp_check_load = 1,
+	cp_setup_auto_load,
+	cp_setup_load,
+	cp_setup_save,
+	cp_swap_state,
+	cp_swap_state3d_3_is_save,
+	cp_prepare_exit,
+	cp_exit,
+};
+
+static void
+nv40_graph_construct_general(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	int i;
+
+	cp_ctx(ctx, 0x4000a4, 1);
+	gr_def(ctx, 0x4000a4, 0x00000008);
+	cp_ctx(ctx, 0x400144, 58);
+	gr_def(ctx, 0x400144, 0x00000001);
+	cp_ctx(ctx, 0x400314, 1);
+	gr_def(ctx, 0x400314, 0x00000000);
+	cp_ctx(ctx, 0x400400, 10);
+	cp_ctx(ctx, 0x400480, 10);
+	cp_ctx(ctx, 0x400500, 19);
+	gr_def(ctx, 0x400514, 0x00040000);
+	gr_def(ctx, 0x400524, 0x55555555);
+	gr_def(ctx, 0x400528, 0x55555555);
+	gr_def(ctx, 0x40052c, 0x55555555);
+	gr_def(ctx, 0x400530, 0x55555555);
+	cp_ctx(ctx, 0x400560, 6);
+	gr_def(ctx, 0x400568, 0x0000ffff);
+	gr_def(ctx, 0x40056c, 0x0000ffff);
+	cp_ctx(ctx, 0x40057c, 5);
+	cp_ctx(ctx, 0x400710, 3);
+	gr_def(ctx, 0x400710, 0x20010001);
+	gr_def(ctx, 0x400714, 0x0f73ef00);
+	cp_ctx(ctx, 0x400724, 1);
+	gr_def(ctx, 0x400724, 0x02008821);
+	cp_ctx(ctx, 0x400770, 3);
+	if (dev_priv->chipset == 0x40) {
+		cp_ctx(ctx, 0x400814, 4);
+		cp_ctx(ctx, 0x400828, 5);
+		cp_ctx(ctx, 0x400840, 5);
+		gr_def(ctx, 0x400850, 0x00000040);
+		cp_ctx(ctx, 0x400858, 4);
+		gr_def(ctx, 0x400858, 0x00000040);
+		gr_def(ctx, 0x40085c, 0x00000040);
+		gr_def(ctx, 0x400864, 0x80000000);
+		cp_ctx(ctx, 0x40086c, 9);
+		gr_def(ctx, 0x40086c, 0x80000000);
+		gr_def(ctx, 0x400870, 0x80000000);
+		gr_def(ctx, 0x400874, 0x80000000);
+		gr_def(ctx, 0x400878, 0x80000000);
+		gr_def(ctx, 0x400888, 0x00000040);
+		gr_def(ctx, 0x40088c, 0x80000000);
+		cp_ctx(ctx, 0x4009c0, 8);
+		gr_def(ctx, 0x4009cc, 0x80000000);
+		gr_def(ctx, 0x4009dc, 0x80000000);
+	} else {
+		cp_ctx(ctx, 0x400840, 20);
+		if (!nv40_graph_4097(ctx->dev)) {
+			for (i = 0; i < 8; i++)
+				gr_def(ctx, 0x400860 + (i * 4), 0x00000001);
+		}
+		gr_def(ctx, 0x400880, 0x00000040);
+		gr_def(ctx, 0x400884, 0x00000040);
+		gr_def(ctx, 0x400888, 0x00000040);
+		cp_ctx(ctx, 0x400894, 11);
+		gr_def(ctx, 0x400894, 0x00000040);
+		if (nv40_graph_4097(ctx->dev)) {
+			for (i = 0; i < 8; i++)
+				gr_def(ctx, 0x4008a0 + (i * 4), 0x80000000);
+		}
+		cp_ctx(ctx, 0x4008e0, 2);
+		cp_ctx(ctx, 0x4008f8, 2);
+		if (dev_priv->chipset == 0x4c ||
+		    (dev_priv->chipset & 0xf0) == 0x60)
+			cp_ctx(ctx, 0x4009f8, 1);
+	}
+	cp_ctx(ctx, 0x400a00, 73);
+	gr_def(ctx, 0x400b0c, 0x0b0b0b0c);
+	cp_ctx(ctx, 0x401000, 4);
+	cp_ctx(ctx, 0x405004, 1);
+	switch (dev_priv->chipset) {
+	case 0x47:
+	case 0x49:
+	case 0x4b:
+		cp_ctx(ctx, 0x403448, 1);
+		gr_def(ctx, 0x403448, 0x00001010);
+		break;
+	default:
+		cp_ctx(ctx, 0x403440, 1);
+		switch (dev_priv->chipset) {
+		case 0x40:
+			gr_def(ctx, 0x403440, 0x00000010);
+			break;
+		case 0x44:
+		case 0x46:
+		case 0x4a:
+			gr_def(ctx, 0x403440, 0x00003010);
+			break;
+		case 0x41:
+		case 0x42:
+		case 0x43:
+		case 0x4c:
+		case 0x4e:
+		case 0x67:
+		default:
+			gr_def(ctx, 0x403440, 0x00001010);
+			break;
+		}
+		break;
+	}
+}
+
+static void
+nv40_graph_construct_state3d(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	int i;
+
+	if (dev_priv->chipset == 0x40) {
+		cp_ctx(ctx, 0x401880, 51);
+		gr_def(ctx, 0x401940, 0x00000100);
+	} else
+	if (dev_priv->chipset == 0x46 || dev_priv->chipset == 0x47 ||
+	    dev_priv->chipset == 0x49 || dev_priv->chipset == 0x4b) {
+		cp_ctx(ctx, 0x401880, 32);
+		for (i = 0; i < 16; i++)
+			gr_def(ctx, 0x401880 + (i * 4), 0x00000111);
+		if (dev_priv->chipset == 0x46)
+			cp_ctx(ctx, 0x401900, 16);
+		cp_ctx(ctx, 0x401940, 3);
+	}
+	cp_ctx(ctx, 0x40194c, 18);
+	gr_def(ctx, 0x401954, 0x00000111);
+	gr_def(ctx, 0x401958, 0x00080060);
+	gr_def(ctx, 0x401974, 0x00000080);
+	gr_def(ctx, 0x401978, 0xffff0000);
+	gr_def(ctx, 0x40197c, 0x00000001);
+	gr_def(ctx, 0x401990, 0x46400000);
+	if (dev_priv->chipset == 0x40) {
+		cp_ctx(ctx, 0x4019a0, 2);
+		cp_ctx(ctx, 0x4019ac, 5);
+	} else {
+		cp_ctx(ctx, 0x4019a0, 1);
+		cp_ctx(ctx, 0x4019b4, 3);
+	}
+	gr_def(ctx, 0x4019bc, 0xffff0000);
+	switch (dev_priv->chipset) {
+	case 0x46:
+	case 0x47:
+	case 0x49:
+	case 0x4b:
+		cp_ctx(ctx, 0x4019c0, 18);
+		for (i = 0; i < 16; i++)
+			gr_def(ctx, 0x4019c0 + (i * 4), 0x88888888);
+		break;
+	}
+	cp_ctx(ctx, 0x401a08, 8);
+	gr_def(ctx, 0x401a10, 0x0fff0000);
+	gr_def(ctx, 0x401a14, 0x0fff0000);
+	gr_def(ctx, 0x401a1c, 0x00011100);
+	cp_ctx(ctx, 0x401a2c, 4);
+	cp_ctx(ctx, 0x401a44, 26);
+	for (i = 0; i < 16; i++)
+		gr_def(ctx, 0x401a44 + (i * 4), 0x07ff0000);
+	gr_def(ctx, 0x401a8c, 0x4b7fffff);
+	if (dev_priv->chipset == 0x40) {
+		cp_ctx(ctx, 0x401ab8, 3);
+	} else {
+		cp_ctx(ctx, 0x401ab8, 1);
+		cp_ctx(ctx, 0x401ac0, 1);
+	}
+	cp_ctx(ctx, 0x401ad0, 8);
+	gr_def(ctx, 0x401ad0, 0x30201000);
+	gr_def(ctx, 0x401ad4, 0x70605040);
+	gr_def(ctx, 0x401ad8, 0xb8a89888);
+	gr_def(ctx, 0x401adc, 0xf8e8d8c8);
+	cp_ctx(ctx, 0x401b10, dev_priv->chipset == 0x40 ? 2 : 1);
+	gr_def(ctx, 0x401b10, 0x40100000);
+	cp_ctx(ctx, 0x401b18, dev_priv->chipset == 0x40 ? 6 : 5);
+	gr_def(ctx, 0x401b28, dev_priv->chipset == 0x40 ?
+			      0x00000004 : 0x00000000);
+	cp_ctx(ctx, 0x401b30, 25);
+	gr_def(ctx, 0x401b34, 0x0000ffff);
+	gr_def(ctx, 0x401b68, 0x435185d6);
+	gr_def(ctx, 0x401b6c, 0x2155b699);
+	gr_def(ctx, 0x401b70, 0xfedcba98);
+	gr_def(ctx, 0x401b74, 0x00000098);
+	gr_def(ctx, 0x401b84, 0xffffffff);
+	gr_def(ctx, 0x401b88, 0x00ff7000);
+	gr_def(ctx, 0x401b8c, 0x0000ffff);
+	if (dev_priv->chipset != 0x44 && dev_priv->chipset != 0x4a &&
+	    dev_priv->chipset != 0x4e)
+		cp_ctx(ctx, 0x401b94, 1);
+	cp_ctx(ctx, 0x401b98, 8);
+	gr_def(ctx, 0x401b9c, 0x00ff0000);
+	cp_ctx(ctx, 0x401bc0, 9);
+	gr_def(ctx, 0x401be0, 0x00ffff00);
+	cp_ctx(ctx, 0x401c00, 192);
+	for (i = 0; i < 16; i++) { /* fragment texture units */
+		gr_def(ctx, 0x401c40 + (i * 4), 0x00018488);
+		gr_def(ctx, 0x401c80 + (i * 4), 0x00028202);
+		gr_def(ctx, 0x401d00 + (i * 4), 0x0000aae4);
+		gr_def(ctx, 0x401d40 + (i * 4), 0x01012000);
+		gr_def(ctx, 0x401d80 + (i * 4), 0x00080008);
+		gr_def(ctx, 0x401e00 + (i * 4), 0x00100008);
+	}
+	for (i = 0; i < 4; i++) { /* vertex texture units */
+		gr_def(ctx, 0x401e90 + (i * 4), 0x0001bc80);
+		gr_def(ctx, 0x401ea0 + (i * 4), 0x00000202);
+		gr_def(ctx, 0x401ec0 + (i * 4), 0x00000008);
+		gr_def(ctx, 0x401ee0 + (i * 4), 0x00080008);
+	}
+	cp_ctx(ctx, 0x400f5c, 3);
+	gr_def(ctx, 0x400f5c, 0x00000002);
+	cp_ctx(ctx, 0x400f84, 1);
+}
+
+static void
+nv40_graph_construct_state3d_2(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	int i;
+
+	cp_ctx(ctx, 0x402000, 1);
+	cp_ctx(ctx, 0x402404, dev_priv->chipset == 0x40 ? 1 : 2);
+	switch (dev_priv->chipset) {
+	case 0x40:
+		gr_def(ctx, 0x402404, 0x00000001);
+		break;
+	case 0x4c:
+	case 0x4e:
+	case 0x67:
+		gr_def(ctx, 0x402404, 0x00000020);
+		break;
+	case 0x46:
+	case 0x49:
+	case 0x4b:
+		gr_def(ctx, 0x402404, 0x00000421);
+		break;
+	default:
+		gr_def(ctx, 0x402404, 0x00000021);
+	}
+	if (dev_priv->chipset != 0x40)
+		gr_def(ctx, 0x402408, 0x030c30c3);
+	switch (dev_priv->chipset) {
+	case 0x44:
+	case 0x46:
+	case 0x4a:
+	case 0x4c:
+	case 0x4e:
+	case 0x67:
+		cp_ctx(ctx, 0x402440, 1);
+		gr_def(ctx, 0x402440, 0x00011001);
+		break;
+	default:
+		break;
+	}
+	cp_ctx(ctx, 0x402480, dev_priv->chipset == 0x40 ? 8 : 9);
+	gr_def(ctx, 0x402488, 0x3e020200);
+	gr_def(ctx, 0x40248c, 0x00ffffff);
+	switch (dev_priv->chipset) {
+	case 0x40:
+		gr_def(ctx, 0x402490, 0x60103f00);
+		break;
+	case 0x47:
+		gr_def(ctx, 0x402490, 0x40103f00);
+		break;
+	case 0x41:
+	case 0x42:
+	case 0x49:
+	case 0x4b:
+		gr_def(ctx, 0x402490, 0x20103f00);
+		break;
+	default:
+		gr_def(ctx, 0x402490, 0x0c103f00);
+		break;
+	}
+	gr_def(ctx, 0x40249c, dev_priv->chipset <= 0x43 ?
+			      0x00020000 : 0x00040000);
+	cp_ctx(ctx, 0x402500, 31);
+	gr_def(ctx, 0x402530, 0x00008100);
+	if (dev_priv->chipset == 0x40)
+		cp_ctx(ctx, 0x40257c, 6);
+	cp_ctx(ctx, 0x402594, 16);
+	cp_ctx(ctx, 0x402800, 17);
+	gr_def(ctx, 0x402800, 0x00000001);
+	switch (dev_priv->chipset) {
+	case 0x47:
+	case 0x49:
+	case 0x4b:
+		cp_ctx(ctx, 0x402864, 1);
+		gr_def(ctx, 0x402864, 0x00001001);
+		cp_ctx(ctx, 0x402870, 3);
+		gr_def(ctx, 0x402878, 0x00000003);
+		if (dev_priv->chipset != 0x47) { /* belong at end!! */
+			cp_ctx(ctx, 0x402900, 1);
+			cp_ctx(ctx, 0x402940, 1);
+			cp_ctx(ctx, 0x402980, 1);
+			cp_ctx(ctx, 0x4029c0, 1);
+			cp_ctx(ctx, 0x402a00, 1);
+			cp_ctx(ctx, 0x402a40, 1);
+			cp_ctx(ctx, 0x402a80, 1);
+			cp_ctx(ctx, 0x402ac0, 1);
+		}
+		break;
+	case 0x40:
+		cp_ctx(ctx, 0x402844, 1);
+		gr_def(ctx, 0x402844, 0x00000001);
+		cp_ctx(ctx, 0x402850, 1);
+		break;
+	default:
+		cp_ctx(ctx, 0x402844, 1);
+		gr_def(ctx, 0x402844, 0x00001001);
+		cp_ctx(ctx, 0x402850, 2);
+		gr_def(ctx, 0x402854, 0x00000003);
+		break;
+	}
+
+	cp_ctx(ctx, 0x402c00, 4);
+	gr_def(ctx, 0x402c00, dev_priv->chipset == 0x40 ?
+			      0x80800001 : 0x00888001);
+	switch (dev_priv->chipset) {
+	case 0x47:
+	case 0x49:
+	case 0x4b:
+		cp_ctx(ctx, 0x402c20, 40);
+		for (i = 0; i < 32; i++)
+			gr_def(ctx, 0x402c40 + (i * 4), 0xffffffff);
+		cp_ctx(ctx, 0x4030b8, 13);
+		gr_def(ctx, 0x4030dc, 0x00000005);
+		gr_def(ctx, 0x4030e8, 0x0000ffff);
+		break;
+	default:
+		cp_ctx(ctx, 0x402c10, 4);
+		if (dev_priv->chipset == 0x40)
+			cp_ctx(ctx, 0x402c20, 36);
+		else
+		if (dev_priv->chipset <= 0x42)
+			cp_ctx(ctx, 0x402c20, 24);
+		else
+		if (dev_priv->chipset <= 0x4a)
+			cp_ctx(ctx, 0x402c20, 16);
+		else
+			cp_ctx(ctx, 0x402c20, 8);
+		cp_ctx(ctx, 0x402cb0, dev_priv->chipset == 0x40 ? 12 : 13);
+		gr_def(ctx, 0x402cd4, 0x00000005);
+		if (dev_priv->chipset != 0x40)
+			gr_def(ctx, 0x402ce0, 0x0000ffff);
+		break;
+	}
+
+	cp_ctx(ctx, 0x403400, dev_priv->chipset == 0x40 ? 4 : 3);
+	cp_ctx(ctx, 0x403410, dev_priv->chipset == 0x40 ? 4 : 3);
+	cp_ctx(ctx, 0x403420, nv40_graph_vs_count(ctx->dev));
+	for (i = 0; i < nv40_graph_vs_count(ctx->dev); i++)
+		gr_def(ctx, 0x403420 + (i * 4), 0x00005555);
+
+	if (dev_priv->chipset != 0x40) {
+		cp_ctx(ctx, 0x403600, 1);
+		gr_def(ctx, 0x403600, 0x00000001);
+	}
+	cp_ctx(ctx, 0x403800, 1);
+
+	cp_ctx(ctx, 0x403c18, 1);
+	gr_def(ctx, 0x403c18, 0x00000001);
+	switch (dev_priv->chipset) {
+	case 0x46:
+	case 0x47:
+	case 0x49:
+	case 0x4b:
+		cp_ctx(ctx, 0x405018, 1);
+		gr_def(ctx, 0x405018, 0x08e00001);
+		cp_ctx(ctx, 0x405c24, 1);
+		gr_def(ctx, 0x405c24, 0x000e3000);
+		break;
+	}
+	if (dev_priv->chipset != 0x4e)
+		cp_ctx(ctx, 0x405800, 11);
+	cp_ctx(ctx, 0x407000, 1);
+}
+
+static void
+nv40_graph_construct_state3d_3(struct nouveau_grctx *ctx)
+{
+	int len = nv40_graph_4097(ctx->dev) ? 0x0684 : 0x0084;
+
+	cp_out (ctx, 0x300000);
+	cp_lsr (ctx, len - 4);
+	cp_bra (ctx, SWAP_DIRECTION, SAVE, cp_swap_state3d_3_is_save);
+	cp_lsr (ctx, len);
+	cp_name(ctx, cp_swap_state3d_3_is_save);
+	cp_out (ctx, 0x800001);
+
+	ctx->ctxvals_pos += len;
+}
+
+static void
+nv40_graph_construct_shader(struct nouveau_grctx *ctx)
+{
+	struct drm_device *dev = ctx->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_gpuobj *obj = ctx->data;
+	int vs, vs_nr, vs_len, vs_nr_b0, vs_nr_b1, b0_offset, b1_offset;
+	int offset, i;
+
+	vs_nr    = nv40_graph_vs_count(ctx->dev);
+	vs_nr_b0 = 363;
+	vs_nr_b1 = dev_priv->chipset == 0x40 ? 128 : 64;
+	if (dev_priv->chipset == 0x40) {
+		b0_offset = 0x2200/4; /* 33a0 */
+		b1_offset = 0x55a0/4; /* 1500 */
+		vs_len = 0x6aa0/4;
+	} else
+	if (dev_priv->chipset == 0x41 || dev_priv->chipset == 0x42) {
+		b0_offset = 0x2200/4; /* 2200 */
+		b1_offset = 0x4400/4; /* 0b00 */
+		vs_len = 0x4f00/4;
+	} else {
+		b0_offset = 0x1d40/4; /* 2200 */
+		b1_offset = 0x3f40/4; /* 0b00 : 0a40 */
+		vs_len = nv40_graph_4097(dev) ? 0x4a40/4 : 0x4980/4;
+	}
+
+	cp_lsr(ctx, vs_len * vs_nr + 0x300/4);
+	cp_out(ctx, nv40_graph_4097(dev) ? 0x800041 : 0x800029);
+
+	offset = ctx->ctxvals_pos;
+	ctx->ctxvals_pos += (0x0300/4 + (vs_nr * vs_len));
+
+	if (ctx->mode != NOUVEAU_GRCTX_VALS)
+		return;
+
+	offset += 0x0280/4;
+	for (i = 0; i < 16; i++, offset += 2)
+		nv_wo32(dev, obj, offset, 0x3f800000);
+
+	for (vs = 0; vs < vs_nr; vs++, offset += vs_len) {
+		for (i = 0; i < vs_nr_b0 * 6; i += 6)
+			nv_wo32(dev, obj, offset + b0_offset + i, 0x00000001);
+		for (i = 0; i < vs_nr_b1 * 4; i += 4)
+			nv_wo32(dev, obj, offset + b1_offset + i, 0x3f800000);
+	}
+}
+
+void
+nv40_grctx_init(struct nouveau_grctx *ctx)
+{
+	/* decide whether we're loading/unloading the context */
+	cp_bra (ctx, AUTO_SAVE, PENDING, cp_setup_save);
+	cp_bra (ctx, USER_SAVE, PENDING, cp_setup_save);
+
+	cp_name(ctx, cp_check_load);
+	cp_bra (ctx, AUTO_LOAD, PENDING, cp_setup_auto_load);
+	cp_bra (ctx, USER_LOAD, PENDING, cp_setup_load);
+	cp_bra (ctx, ALWAYS, TRUE, cp_exit);
+
+	/* setup for context load */
+	cp_name(ctx, cp_setup_auto_load);
+	cp_wait(ctx, STATUS, IDLE);
+	cp_out (ctx, CP_NEXT_TO_SWAP);
+	cp_name(ctx, cp_setup_load);
+	cp_wait(ctx, STATUS, IDLE);
+	cp_set (ctx, SWAP_DIRECTION, LOAD);
+	cp_out (ctx, 0x00910880); /* ?? */
+	cp_out (ctx, 0x00901ffe); /* ?? */
+	cp_out (ctx, 0x01940000); /* ?? */
+	cp_lsr (ctx, 0x20);
+	cp_out (ctx, 0x0060000b); /* ?? */
+	cp_wait(ctx, UNK57, CLEAR);
+	cp_out (ctx, 0x0060000c); /* ?? */
+	cp_bra (ctx, ALWAYS, TRUE, cp_swap_state);
+
+	/* setup for context save */
+	cp_name(ctx, cp_setup_save);
+	cp_set (ctx, SWAP_DIRECTION, SAVE);
+
+	/* general PGRAPH state */
+	cp_name(ctx, cp_swap_state);
+	cp_pos (ctx, 0x00020/4);
+	nv40_graph_construct_general(ctx);
+	cp_wait(ctx, STATUS, IDLE);
+
+	/* 3D state, block 1 */
+	cp_bra (ctx, UNK54, CLEAR, cp_prepare_exit);
+	nv40_graph_construct_state3d(ctx);
+	cp_wait(ctx, STATUS, IDLE);
+
+	/* 3D state, block 2 */
+	nv40_graph_construct_state3d_2(ctx);
+
+	/* Some other block of "random" state */
+	nv40_graph_construct_state3d_3(ctx);
+
+	/* Per-vertex shader state */
+	cp_pos (ctx, ctx->ctxvals_pos);
+	nv40_graph_construct_shader(ctx);
+
+	/* pre-exit state updates */
+	cp_name(ctx, cp_prepare_exit);
+	cp_bra (ctx, SWAP_DIRECTION, SAVE, cp_check_load);
+	cp_bra (ctx, USER_SAVE, PENDING, cp_exit);
+	cp_out (ctx, CP_NEXT_TO_CURRENT);
+
+	cp_name(ctx, cp_exit);
+	cp_set (ctx, USER_SAVE, NOT_PENDING);
+	cp_set (ctx, USER_LOAD, NOT_PENDING);
+	cp_out (ctx, CP_END);
+}
+
diff --git a/drivers/gpu/drm/nouveau/nv50_crtc.c b/drivers/gpu/drm/nouveau/nv50_crtc.c
index f8e28a1..118d328 100644
--- a/drivers/gpu/drm/nouveau/nv50_crtc.c
+++ b/drivers/gpu/drm/nouveau/nv50_crtc.c
@@ -45,7 +45,7 @@
 	void __iomem *lut = nvbo_kmap_obj_iovirtual(nv_crtc->lut.nvbo);
 	int i;
 
-	NV_DEBUG(crtc->dev, "\n");
+	NV_DEBUG_KMS(crtc->dev, "\n");
 
 	for (i = 0; i < 256; i++) {
 		writew(nv_crtc->lut.r[i] >> 2, lut + 8*i + 0);
@@ -68,8 +68,8 @@
 	struct nouveau_channel *evo = dev_priv->evo;
 	int index = nv_crtc->index, ret;
 
-	NV_DEBUG(dev, "index %d\n", nv_crtc->index);
-	NV_DEBUG(dev, "%s\n", blanked ? "blanked" : "unblanked");
+	NV_DEBUG_KMS(dev, "index %d\n", nv_crtc->index);
+	NV_DEBUG_KMS(dev, "%s\n", blanked ? "blanked" : "unblanked");
 
 	if (blanked) {
 		nv_crtc->cursor.hide(nv_crtc, false);
@@ -139,7 +139,7 @@
 	struct nouveau_channel *evo = dev_priv->evo;
 	int ret;
 
-	NV_DEBUG(dev, "\n");
+	NV_DEBUG_KMS(dev, "\n");
 
 	ret = RING_SPACE(evo, 2 + (update ? 2 : 0));
 	if (ret) {
@@ -193,7 +193,7 @@
 	uint32_t outX, outY, horiz, vert;
 	int ret;
 
-	NV_DEBUG(dev, "\n");
+	NV_DEBUG_KMS(dev, "\n");
 
 	switch (scaling_mode) {
 	case DRM_MODE_SCALE_NONE:
@@ -301,7 +301,7 @@
 	struct drm_device *dev = crtc->dev;
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
 
-	NV_DEBUG(dev, "\n");
+	NV_DEBUG_KMS(dev, "\n");
 
 	if (!crtc)
 		return;
@@ -433,7 +433,7 @@
 	struct drm_device *dev = crtc->dev;
 	struct drm_encoder *encoder;
 
-	NV_DEBUG(dev, "index %d\n", nv_crtc->index);
+	NV_DEBUG_KMS(dev, "index %d\n", nv_crtc->index);
 
 	/* Disconnect all unused encoders. */
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
@@ -458,7 +458,7 @@
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
 	int ret;
 
-	NV_DEBUG(dev, "index %d\n", nv_crtc->index);
+	NV_DEBUG_KMS(dev, "index %d\n", nv_crtc->index);
 
 	nv50_crtc_blank(nv_crtc, false);
 
@@ -497,7 +497,7 @@
 	struct nouveau_framebuffer *fb = nouveau_framebuffer(drm_fb);
 	int ret, format;
 
-	NV_DEBUG(dev, "index %d\n", nv_crtc->index);
+	NV_DEBUG_KMS(dev, "index %d\n", nv_crtc->index);
 
 	switch (drm_fb->depth) {
 	case  8:
@@ -612,7 +612,7 @@
 
 	*nv_crtc->mode = *adjusted_mode;
 
-	NV_DEBUG(dev, "index %d\n", nv_crtc->index);
+	NV_DEBUG_KMS(dev, "index %d\n", nv_crtc->index);
 
 	hsync_dur = adjusted_mode->hsync_end - adjusted_mode->hsync_start;
 	vsync_dur = adjusted_mode->vsync_end - adjusted_mode->vsync_start;
@@ -706,7 +706,7 @@
 	struct nouveau_crtc *nv_crtc = NULL;
 	int ret, i;
 
-	NV_DEBUG(dev, "\n");
+	NV_DEBUG_KMS(dev, "\n");
 
 	nv_crtc = kzalloc(sizeof(*nv_crtc), GFP_KERNEL);
 	if (!nv_crtc)
diff --git a/drivers/gpu/drm/nouveau/nv50_cursor.c b/drivers/gpu/drm/nouveau/nv50_cursor.c
index e2e79a8..753e723 100644
--- a/drivers/gpu/drm/nouveau/nv50_cursor.c
+++ b/drivers/gpu/drm/nouveau/nv50_cursor.c
@@ -41,7 +41,7 @@
 	struct drm_device *dev = nv_crtc->base.dev;
 	int ret;
 
-	NV_DEBUG(dev, "\n");
+	NV_DEBUG_KMS(dev, "\n");
 
 	if (update && nv_crtc->cursor.visible)
 		return;
@@ -76,7 +76,7 @@
 	struct drm_device *dev = nv_crtc->base.dev;
 	int ret;
 
-	NV_DEBUG(dev, "\n");
+	NV_DEBUG_KMS(dev, "\n");
 
 	if (update && !nv_crtc->cursor.visible)
 		return;
@@ -116,7 +116,7 @@
 static void
 nv50_cursor_set_offset(struct nouveau_crtc *nv_crtc, uint32_t offset)
 {
-	NV_DEBUG(nv_crtc->base.dev, "\n");
+	NV_DEBUG_KMS(nv_crtc->base.dev, "\n");
 	if (offset == nv_crtc->cursor.offset)
 		return;
 
@@ -143,7 +143,7 @@
 	struct drm_device *dev = nv_crtc->base.dev;
 	int idx = nv_crtc->index;
 
-	NV_DEBUG(dev, "\n");
+	NV_DEBUG_KMS(dev, "\n");
 
 	nv_wr32(dev, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(idx), 0);
 	if (!nv_wait(NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(idx),
diff --git a/drivers/gpu/drm/nouveau/nv50_dac.c b/drivers/gpu/drm/nouveau/nv50_dac.c
index fb5838e..f08f042 100644
--- a/drivers/gpu/drm/nouveau/nv50_dac.c
+++ b/drivers/gpu/drm/nouveau/nv50_dac.c
@@ -44,7 +44,7 @@
 	struct nouveau_channel *evo = dev_priv->evo;
 	int ret;
 
-	NV_DEBUG(dev, "Disconnecting DAC %d\n", nv_encoder->or);
+	NV_DEBUG_KMS(dev, "Disconnecting DAC %d\n", nv_encoder->or);
 
 	ret = RING_SPACE(evo, 2);
 	if (ret) {
@@ -81,11 +81,11 @@
 	/* Use bios provided value if possible. */
 	if (dev_priv->vbios->dactestval) {
 		load_pattern = dev_priv->vbios->dactestval;
-		NV_DEBUG(dev, "Using bios provided load_pattern of %d\n",
+		NV_DEBUG_KMS(dev, "Using bios provided load_pattern of %d\n",
 			  load_pattern);
 	} else {
 		load_pattern = 340;
-		NV_DEBUG(dev, "Using default load_pattern of %d\n",
+		NV_DEBUG_KMS(dev, "Using default load_pattern of %d\n",
 			 load_pattern);
 	}
 
@@ -103,9 +103,9 @@
 		status = connector_status_connected;
 
 	if (status == connector_status_connected)
-		NV_DEBUG(dev, "Load was detected on output with or %d\n", or);
+		NV_DEBUG_KMS(dev, "Load was detected on output with or %d\n", or);
 	else
-		NV_DEBUG(dev, "Load was not detected on output with or %d\n", or);
+		NV_DEBUG_KMS(dev, "Load was not detected on output with or %d\n", or);
 
 	return status;
 }
@@ -118,7 +118,7 @@
 	uint32_t val;
 	int or = nv_encoder->or;
 
-	NV_DEBUG(dev, "or %d mode %d\n", or, mode);
+	NV_DEBUG_KMS(dev, "or %d mode %d\n", or, mode);
 
 	/* wait for it to be done */
 	if (!nv_wait(NV50_PDISPLAY_DAC_DPMS_CTRL(or),
@@ -173,7 +173,7 @@
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
 	struct nouveau_connector *connector;
 
-	NV_DEBUG(encoder->dev, "or %d\n", nv_encoder->or);
+	NV_DEBUG_KMS(encoder->dev, "or %d\n", nv_encoder->or);
 
 	connector = nouveau_encoder_connector_get(nv_encoder);
 	if (!connector) {
@@ -213,7 +213,7 @@
 	uint32_t mode_ctl = 0, mode_ctl2 = 0;
 	int ret;
 
-	NV_DEBUG(dev, "or %d\n", nv_encoder->or);
+	NV_DEBUG_KMS(dev, "or %d\n", nv_encoder->or);
 
 	nv50_dac_dpms(encoder, DRM_MODE_DPMS_ON);
 
@@ -264,7 +264,7 @@
 	if (!encoder)
 		return;
 
-	NV_DEBUG(encoder->dev, "\n");
+	NV_DEBUG_KMS(encoder->dev, "\n");
 
 	drm_encoder_cleanup(encoder);
 	kfree(nv_encoder);
@@ -280,7 +280,7 @@
 	struct nouveau_encoder *nv_encoder;
 	struct drm_encoder *encoder;
 
-	NV_DEBUG(dev, "\n");
+	NV_DEBUG_KMS(dev, "\n");
 	NV_INFO(dev, "Detected a DAC output\n");
 
 	nv_encoder = kzalloc(sizeof(*nv_encoder), GFP_KERNEL);
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index 12c5ee6..a9263d9 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -188,7 +188,7 @@
 	uint64_t start;
 	int ret, i;
 
-	NV_DEBUG(dev, "\n");
+	NV_DEBUG_KMS(dev, "\n");
 
 	nv_wr32(dev, 0x00610184, nv_rd32(dev, 0x00614004));
 	/*
@@ -232,7 +232,7 @@
 	nv_wr32(dev, NV50_PDISPLAY_UNK_380, 0);
 	/* RAM is clamped to 256 MiB. */
 	ram_amount = nouveau_mem_fb_amount(dev);
-	NV_DEBUG(dev, "ram_amount %d\n", ram_amount);
+	NV_DEBUG_KMS(dev, "ram_amount %d\n", ram_amount);
 	if (ram_amount > 256*1024*1024)
 		ram_amount = 256*1024*1024;
 	nv_wr32(dev, NV50_PDISPLAY_RAM_AMOUNT, ram_amount - 1);
@@ -398,7 +398,7 @@
 	struct drm_crtc *drm_crtc;
 	int ret, i;
 
-	NV_DEBUG(dev, "\n");
+	NV_DEBUG_KMS(dev, "\n");
 
 	list_for_each_entry(drm_crtc, &dev->mode_config.crtc_list, head) {
 		struct nouveau_crtc *crtc = nouveau_crtc(drm_crtc);
@@ -469,7 +469,7 @@
 	uint32_t connector[16] = {};
 	int ret, i;
 
-	NV_DEBUG(dev, "\n");
+	NV_DEBUG_KMS(dev, "\n");
 
 	/* init basic kernel modesetting */
 	drm_mode_config_init(dev);
@@ -573,7 +573,7 @@
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 
-	NV_DEBUG(dev, "\n");
+	NV_DEBUG_KMS(dev, "\n");
 
 	drm_mode_config_cleanup(dev);
 
@@ -617,7 +617,7 @@
 	 * CRTC separately, and submission will be blocked by the GPU
 	 * until we handle each in turn.
 	 */
-	NV_DEBUG(dev, "0x610030: 0x%08x\n", unk30);
+	NV_DEBUG_KMS(dev, "0x610030: 0x%08x\n", unk30);
 	head = ffs((unk30 >> 9) & 3) - 1;
 	if (head < 0)
 		return -EINVAL;
@@ -661,7 +661,7 @@
 		or = i;
 	}
 
-	NV_DEBUG(dev, "type %d, or %d\n", type, or);
+	NV_DEBUG_KMS(dev, "type %d, or %d\n", type, or);
 	if (type == OUTPUT_ANY) {
 		NV_ERROR(dev, "unknown encoder!!\n");
 		return -1;
@@ -811,7 +811,7 @@
 	pclk = nv_rd32(dev, NV50_PDISPLAY_CRTC_P(head, CLOCK)) & 0x3fffff;
 	script = nv50_display_script_select(dev, dcbent, pclk);
 
-	NV_DEBUG(dev, "head %d pxclk: %dKHz\n", head, pclk);
+	NV_DEBUG_KMS(dev, "head %d pxclk: %dKHz\n", head, pclk);
 
 	if (dcbent->type != OUTPUT_DP)
 		nouveau_bios_run_display_table(dev, dcbent, 0, -2);
@@ -870,7 +870,7 @@
 		uint32_t intr0 = nv_rd32(dev, NV50_PDISPLAY_INTR_0);
 		uint32_t intr1 = nv_rd32(dev, NV50_PDISPLAY_INTR_1);
 
-		NV_DEBUG(dev, "PDISPLAY_INTR_BH 0x%08x 0x%08x\n", intr0, intr1);
+		NV_DEBUG_KMS(dev, "PDISPLAY_INTR_BH 0x%08x 0x%08x\n", intr0, intr1);
 
 		if (intr1 & NV50_PDISPLAY_INTR_1_CLK_UNK10)
 			nv50_display_unk10_handler(dev);
@@ -974,7 +974,7 @@
 		uint32_t intr1 = nv_rd32(dev, NV50_PDISPLAY_INTR_1);
 		uint32_t clock;
 
-		NV_DEBUG(dev, "PDISPLAY_INTR 0x%08x 0x%08x\n", intr0, intr1);
+		NV_DEBUG_KMS(dev, "PDISPLAY_INTR 0x%08x 0x%08x\n", intr0, intr1);
 
 		if (!intr0 && !(intr1 & ~delayed))
 			break;
diff --git a/drivers/gpu/drm/nouveau/nv50_fifo.c b/drivers/gpu/drm/nouveau/nv50_fifo.c
index 77ae1aa..b728228 100644
--- a/drivers/gpu/drm/nouveau/nv50_fifo.c
+++ b/drivers/gpu/drm/nouveau/nv50_fifo.c
@@ -416,7 +416,7 @@
 	NV_DEBUG(dev, "\n");
 
 	chid = pfifo->channel_id(dev);
-	if (chid < 0 || chid >= dev_priv->engine.fifo.channels)
+	if (chid < 1 || chid >= dev_priv->engine.fifo.channels - 1)
 		return 0;
 
 	chan = dev_priv->fifos[chid];
diff --git a/drivers/gpu/drm/nouveau/nv50_graph.c b/drivers/gpu/drm/nouveau/nv50_graph.c
index 177d822..ca79f32 100644
--- a/drivers/gpu/drm/nouveau/nv50_graph.c
+++ b/drivers/gpu/drm/nouveau/nv50_graph.c
@@ -107,9 +107,13 @@
 static int
 nv50_graph_init_ctxctl(struct drm_device *dev)
 {
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+
 	NV_DEBUG(dev, "\n");
 
-	nv40_grctx_init(dev);
+	nouveau_grctx_prog_load(dev);
+	if (!dev_priv->engine.graph.ctxprog)
+		dev_priv->engine.graph.accel_blocked = true;
 
 	nv_wr32(dev, 0x400320, 4);
 	nv_wr32(dev, NV40_PGRAPH_CTXCTL_CUR, 0);
@@ -140,7 +144,7 @@
 nv50_graph_takedown(struct drm_device *dev)
 {
 	NV_DEBUG(dev, "\n");
-	nv40_grctx_fini(dev);
+	nouveau_grctx_fini(dev);
 }
 
 void
@@ -207,7 +211,7 @@
 	dev_priv->engine.instmem.finish_access(dev);
 
 	dev_priv->engine.instmem.prepare_access(dev, true);
-	nv40_grctx_vals_load(dev, ctx);
+	nouveau_grctx_vals_load(dev, ctx);
 	nv_wo32(dev, ctx, 0x00000/4, chan->ramin->instance >> 12);
 	if ((dev_priv->chipset & 0xf0) == 0xa0)
 		nv_wo32(dev, ctx, 0x00004/4, 0x00000000);
diff --git a/drivers/gpu/drm/nouveau/nv50_sor.c b/drivers/gpu/drm/nouveau/nv50_sor.c
index 8c28046..e395c16 100644
--- a/drivers/gpu/drm/nouveau/nv50_sor.c
+++ b/drivers/gpu/drm/nouveau/nv50_sor.c
@@ -44,7 +44,7 @@
 	struct nouveau_channel *evo = dev_priv->evo;
 	int ret;
 
-	NV_DEBUG(dev, "Disconnecting SOR %d\n", nv_encoder->or);
+	NV_DEBUG_KMS(dev, "Disconnecting SOR %d\n", nv_encoder->or);
 
 	ret = RING_SPACE(evo, 2);
 	if (ret) {
@@ -70,7 +70,7 @@
 	}
 
 	if (dpe->script0) {
-		NV_DEBUG(dev, "SOR-%d: running DP script 0\n", nv_encoder->or);
+		NV_DEBUG_KMS(dev, "SOR-%d: running DP script 0\n", nv_encoder->or);
 		nouveau_bios_run_init_table(dev, le16_to_cpu(dpe->script0),
 					    nv_encoder->dcb);
 	}
@@ -79,7 +79,7 @@
 		NV_ERROR(dev, "SOR-%d: link training failed\n", nv_encoder->or);
 
 	if (dpe->script1) {
-		NV_DEBUG(dev, "SOR-%d: running DP script 1\n", nv_encoder->or);
+		NV_DEBUG_KMS(dev, "SOR-%d: running DP script 1\n", nv_encoder->or);
 		nouveau_bios_run_init_table(dev, le16_to_cpu(dpe->script1),
 					    nv_encoder->dcb);
 	}
@@ -93,7 +93,7 @@
 	uint32_t val;
 	int or = nv_encoder->or;
 
-	NV_DEBUG(dev, "or %d mode %d\n", or, mode);
+	NV_DEBUG_KMS(dev, "or %d mode %d\n", or, mode);
 
 	/* wait for it to be done */
 	if (!nv_wait(NV50_PDISPLAY_SOR_DPMS_CTRL(or),
@@ -142,7 +142,7 @@
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
 	struct nouveau_connector *connector;
 
-	NV_DEBUG(encoder->dev, "or %d\n", nv_encoder->or);
+	NV_DEBUG_KMS(encoder->dev, "or %d\n", nv_encoder->or);
 
 	connector = nouveau_encoder_connector_get(nv_encoder);
 	if (!connector) {
@@ -182,7 +182,7 @@
 	uint32_t mode_ctl = 0;
 	int ret;
 
-	NV_DEBUG(dev, "or %d\n", nv_encoder->or);
+	NV_DEBUG_KMS(dev, "or %d\n", nv_encoder->or);
 
 	nv50_sor_dpms(encoder, DRM_MODE_DPMS_ON);
 
@@ -246,7 +246,7 @@
 	if (!encoder)
 		return;
 
-	NV_DEBUG(encoder->dev, "\n");
+	NV_DEBUG_KMS(encoder->dev, "\n");
 
 	drm_encoder_cleanup(encoder);
 
@@ -265,7 +265,7 @@
 	bool dum;
 	int type;
 
-	NV_DEBUG(dev, "\n");
+	NV_DEBUG_KMS(dev, "\n");
 
 	switch (entry->type) {
 	case OUTPUT_TMDS:
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 71dafb6..ffac157 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1408,7 +1408,7 @@
 				   struct drm_ati_pcigart_info * gart_info);
 
 extern drm_dma_handle_t *drm_pci_alloc(struct drm_device *dev, size_t size,
-				       size_t align, dma_addr_t maxaddr);
+				       size_t align);
 extern void __drm_pci_free(struct drm_device *dev, drm_dma_handle_t * dmah);
 extern void drm_pci_free(struct drm_device *dev, drm_dma_handle_t * dmah);