Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux

* 'drm-fixes' of git://people.freedesktop.org/~airlied/linux:
  vmwgfx: Clip cliprects against screen boundaries in present and dirty
  vmwgfx: Resend the cursor after legacy modeset
  vmwgfx: Do better culling of presents
  vmwgfx: Refactor kms code to use vmw_user_lookup_handle helper
  vmwgfx: Add helper function to get surface or dmabuf
  vmwgfx: Refactor cursor update
  vmwgfx: Remove dmabuf check in present ioctl
  vmwgfx: Use the revised fifo hw version register when present
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 8cca91a..dc27970 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -390,6 +390,11 @@
 			     struct ttm_object_file *tfile,
 			     int id,
 			     struct vmw_resource **p_res);
+extern int vmw_user_lookup_handle(struct vmw_private *dev_priv,
+				  struct ttm_object_file *tfile,
+				  uint32_t handle,
+				  struct vmw_surface **out_surf,
+				  struct vmw_dma_buffer **out_buf);
 extern void vmw_surface_res_free(struct vmw_resource *res);
 extern int vmw_surface_init(struct vmw_private *dev_priv,
 			    struct vmw_surface *srf,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
index 03bbc2a..a0c2f12 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
@@ -33,6 +33,7 @@
 {
 	__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
 	uint32_t fifo_min, hwversion;
+	const struct vmw_fifo_state *fifo = &dev_priv->fifo;
 
 	if (!(dev_priv->capabilities & SVGA_CAP_EXTENDED_FIFO))
 		return false;
@@ -41,7 +42,12 @@
 	if (fifo_min <= SVGA_FIFO_3D_HWVERSION * sizeof(unsigned int))
 		return false;
 
-	hwversion = ioread32(fifo_mem + SVGA_FIFO_3D_HWVERSION);
+	hwversion = ioread32(fifo_mem +
+			     ((fifo->capabilities &
+			       SVGA_FIFO_CAP_3D_HWVERSION_REVISED) ?
+			      SVGA_FIFO_3D_HWVERSION_REVISED :
+			      SVGA_FIFO_3D_HWVERSION));
+
 	if (hwversion == 0)
 		return false;
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index 5ff561d..66917c6 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -58,8 +58,14 @@
 	case DRM_VMW_PARAM_FIFO_HW_VERSION:
 	{
 		__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
+		const struct vmw_fifo_state *fifo = &dev_priv->fifo;
 
-		param->value = ioread32(fifo_mem + SVGA_FIFO_3D_HWVERSION);
+		param->value =
+			ioread32(fifo_mem +
+				 ((fifo->capabilities &
+				   SVGA_FIFO_CAP_3D_HWVERSION_REVISED) ?
+				  SVGA_FIFO_3D_HWVERSION_REVISED :
+				  SVGA_FIFO_3D_HWVERSION));
 		break;
 	}
 	default:
@@ -166,13 +172,7 @@
 		ret = -EINVAL;
 		goto out_no_fb;
 	}
-
 	vfb = vmw_framebuffer_to_vfb(obj_to_fb(obj));
-	if (!vfb->dmabuf) {
-		DRM_ERROR("Framebuffer not dmabuf backed.\n");
-		ret = -EINVAL;
-		goto out_no_fb;
-	}
 
 	ret = ttm_read_lock(&vmaster->lock, true);
 	if (unlikely(ret != 0))
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 37d4054..8aa1dbb 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -31,6 +31,44 @@
 /* Might need a hrtimer here? */
 #define VMWGFX_PRESENT_RATE ((HZ / 60 > 0) ? HZ / 60 : 1)
 
+
+struct vmw_clip_rect {
+	int x1, x2, y1, y2;
+};
+
+/**
+ * Clip @num_rects number of @rects against @clip storing the
+ * results in @out_rects and the number of passed rects in @out_num.
+ */
+void vmw_clip_cliprects(struct drm_clip_rect *rects,
+			int num_rects,
+			struct vmw_clip_rect clip,
+			SVGASignedRect *out_rects,
+			int *out_num)
+{
+	int i, k;
+
+	for (i = 0, k = 0; i < num_rects; i++) {
+		int x1 = max_t(int, clip.x1, rects[i].x1);
+		int y1 = max_t(int, clip.y1, rects[i].y1);
+		int x2 = min_t(int, clip.x2, rects[i].x2);
+		int y2 = min_t(int, clip.y2, rects[i].y2);
+
+		if (x1 >= x2)
+			continue;
+		if (y1 >= y2)
+			continue;
+
+		out_rects[k].left   = x1;
+		out_rects[k].top    = y1;
+		out_rects[k].right  = x2;
+		out_rects[k].bottom = y2;
+		k++;
+	}
+
+	*out_num = k;
+}
+
 void vmw_display_unit_cleanup(struct vmw_display_unit *du)
 {
 	if (du->cursor_surface)
@@ -82,6 +120,43 @@
 	return 0;
 }
 
+int vmw_cursor_update_dmabuf(struct vmw_private *dev_priv,
+			     struct vmw_dma_buffer *dmabuf,
+			     u32 width, u32 height,
+			     u32 hotspotX, u32 hotspotY)
+{
+	struct ttm_bo_kmap_obj map;
+	unsigned long kmap_offset;
+	unsigned long kmap_num;
+	void *virtual;
+	bool dummy;
+	int ret;
+
+	kmap_offset = 0;
+	kmap_num = (width*height*4 + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+	ret = ttm_bo_reserve(&dmabuf->base, true, false, false, 0);
+	if (unlikely(ret != 0)) {
+		DRM_ERROR("reserve failed\n");
+		return -EINVAL;
+	}
+
+	ret = ttm_bo_kmap(&dmabuf->base, kmap_offset, kmap_num, &map);
+	if (unlikely(ret != 0))
+		goto err_unreserve;
+
+	virtual = ttm_kmap_obj_virtual(&map, &dummy);
+	ret = vmw_cursor_update_image(dev_priv, virtual, width, height,
+				      hotspotX, hotspotY);
+
+	ttm_bo_kunmap(&map);
+err_unreserve:
+	ttm_bo_unreserve(&dmabuf->base);
+
+	return ret;
+}
+
+
 void vmw_cursor_update_position(struct vmw_private *dev_priv,
 				bool show, int x, int y)
 {
@@ -110,24 +185,21 @@
 		return -EINVAL;
 
 	if (handle) {
-		ret = vmw_user_surface_lookup_handle(dev_priv, tfile,
-						     handle, &surface);
-		if (!ret) {
-			if (!surface->snooper.image) {
-				DRM_ERROR("surface not suitable for cursor\n");
-				vmw_surface_unreference(&surface);
-				return -EINVAL;
-			}
-		} else {
-			ret = vmw_user_dmabuf_lookup(tfile,
-						     handle, &dmabuf);
-			if (ret) {
-				DRM_ERROR("failed to find surface or dmabuf: %i\n", ret);
-				return -EINVAL;
-			}
+		ret = vmw_user_lookup_handle(dev_priv, tfile,
+					     handle, &surface, &dmabuf);
+		if (ret) {
+			DRM_ERROR("failed to find surface or dmabuf: %i\n", ret);
+			return -EINVAL;
 		}
 	}
 
+	/* need to do this before taking down old image */
+	if (surface && !surface->snooper.image) {
+		DRM_ERROR("surface not suitable for cursor\n");
+		vmw_surface_unreference(&surface);
+		return -EINVAL;
+	}
+
 	/* takedown old cursor */
 	if (du->cursor_surface) {
 		du->cursor_surface->snooper.crtc = NULL;
@@ -146,36 +218,11 @@
 		vmw_cursor_update_image(dev_priv, surface->snooper.image,
 					64, 64, du->hotspot_x, du->hotspot_y);
 	} else if (dmabuf) {
-		struct ttm_bo_kmap_obj map;
-		unsigned long kmap_offset;
-		unsigned long kmap_num;
-		void *virtual;
-		bool dummy;
-
 		/* vmw_user_surface_lookup takes one reference */
 		du->cursor_dmabuf = dmabuf;
 
-		kmap_offset = 0;
-		kmap_num = (64*64*4) >> PAGE_SHIFT;
-
-		ret = ttm_bo_reserve(&dmabuf->base, true, false, false, 0);
-		if (unlikely(ret != 0)) {
-			DRM_ERROR("reserve failed\n");
-			return -EINVAL;
-		}
-
-		ret = ttm_bo_kmap(&dmabuf->base, kmap_offset, kmap_num, &map);
-		if (unlikely(ret != 0))
-			goto err_unreserve;
-
-		virtual = ttm_kmap_obj_virtual(&map, &dummy);
-		vmw_cursor_update_image(dev_priv, virtual, 64, 64,
-					du->hotspot_x, du->hotspot_y);
-
-		ttm_bo_kunmap(&map);
-err_unreserve:
-		ttm_bo_unreserve(&dmabuf->base);
-
+		ret = vmw_cursor_update_dmabuf(dev_priv, dmabuf, width, height,
+					       du->hotspot_x, du->hotspot_y);
 	} else {
 		vmw_cursor_update_position(dev_priv, false, 0, 0);
 		return 0;
@@ -377,8 +424,9 @@
 				struct drm_clip_rect *clips,
 				unsigned num_clips, int inc)
 {
-	struct drm_clip_rect *clips_ptr;
 	struct vmw_display_unit *units[VMWGFX_NUM_DISPLAY_UNITS];
+	struct drm_clip_rect *clips_ptr;
+	struct drm_clip_rect *tmp;
 	struct drm_crtc *crtc;
 	size_t fifo_size;
 	int i, num_units;
@@ -391,7 +439,6 @@
 	} *cmd;
 	SVGASignedRect *blits;
 
-
 	num_units = 0;
 	list_for_each_entry(crtc, &dev_priv->dev->mode_config.crtc_list,
 			    head) {
@@ -402,13 +449,24 @@
 
 	BUG_ON(!clips || !num_clips);
 
+	tmp = kzalloc(sizeof(*tmp) * num_clips, GFP_KERNEL);
+	if (unlikely(tmp == NULL)) {
+		DRM_ERROR("Temporary cliprect memory alloc failed.\n");
+		return -ENOMEM;
+	}
+
 	fifo_size = sizeof(*cmd) + sizeof(SVGASignedRect) * num_clips;
 	cmd = kzalloc(fifo_size, GFP_KERNEL);
 	if (unlikely(cmd == NULL)) {
 		DRM_ERROR("Temporary fifo memory alloc failed.\n");
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto out_free_tmp;
 	}
 
+	/* setup blits pointer */
+	blits = (SVGASignedRect *)&cmd[1];
+
+	/* initial clip region */
 	left = clips->x1;
 	right = clips->x2;
 	top = clips->y1;
@@ -434,45 +492,60 @@
 	cmd->body.srcRect.bottom = bottom;
 
 	clips_ptr = clips;
-	blits = (SVGASignedRect *)&cmd[1];
 	for (i = 0; i < num_clips; i++, clips_ptr += inc) {
-		blits[i].left   = clips_ptr->x1 - left;
-		blits[i].right  = clips_ptr->x2 - left;
-		blits[i].top    = clips_ptr->y1 - top;
-		blits[i].bottom = clips_ptr->y2 - top;
+		tmp[i].x1 = clips_ptr->x1 - left;
+		tmp[i].x2 = clips_ptr->x2 - left;
+		tmp[i].y1 = clips_ptr->y1 - top;
+		tmp[i].y2 = clips_ptr->y2 - top;
 	}
 
 	/* do per unit writing, reuse fifo for each */
 	for (i = 0; i < num_units; i++) {
 		struct vmw_display_unit *unit = units[i];
-		int clip_x1 = left - unit->crtc.x;
-		int clip_y1 = top - unit->crtc.y;
-		int clip_x2 = right - unit->crtc.x;
-		int clip_y2 = bottom - unit->crtc.y;
+		struct vmw_clip_rect clip;
+		int num;
+
+		clip.x1 = left - unit->crtc.x;
+		clip.y1 = top - unit->crtc.y;
+		clip.x2 = right - unit->crtc.x;
+		clip.y2 = bottom - unit->crtc.y;
 
 		/* skip any crtcs that misses the clip region */
-		if (clip_x1 >= unit->crtc.mode.hdisplay ||
-		    clip_y1 >= unit->crtc.mode.vdisplay ||
-		    clip_x2 <= 0 || clip_y2 <= 0)
+		if (clip.x1 >= unit->crtc.mode.hdisplay ||
+		    clip.y1 >= unit->crtc.mode.vdisplay ||
+		    clip.x2 <= 0 || clip.y2 <= 0)
 			continue;
 
+		/*
+		 * In order for the clip rects to be correctly scaled
+		 * the src and dest rects needs to be the same size.
+		 */
+		cmd->body.destRect.left = clip.x1;
+		cmd->body.destRect.right = clip.x2;
+		cmd->body.destRect.top = clip.y1;
+		cmd->body.destRect.bottom = clip.y2;
+
+		/* create a clip rect of the crtc in dest coords */
+		clip.x2 = unit->crtc.mode.hdisplay - clip.x1;
+		clip.y2 = unit->crtc.mode.vdisplay - clip.y1;
+		clip.x1 = 0 - clip.x1;
+		clip.y1 = 0 - clip.y1;
+
 		/* need to reset sid as it is changed by execbuf */
 		cmd->body.srcImage.sid = cpu_to_le32(framebuffer->user_handle);
-
 		cmd->body.destScreenId = unit->unit;
 
-		/*
-		 * The blit command is a lot more resilient then the
-		 * readback command when it comes to clip rects. So its
-		 * okay to go out of bounds.
-		 */
+		/* clip and write blits to cmd stream */
+		vmw_clip_cliprects(tmp, num_clips, clip, blits, &num);
 
-		cmd->body.destRect.left = clip_x1;
-		cmd->body.destRect.right = clip_x2;
-		cmd->body.destRect.top = clip_y1;
-		cmd->body.destRect.bottom = clip_y2;
+		/* if no cliprects hit skip this */
+		if (num == 0)
+			continue;
 
 
+		/* recalculate package length */
+		fifo_size = sizeof(*cmd) + sizeof(SVGASignedRect) * num;
+		cmd->header.size = cpu_to_le32(fifo_size - sizeof(cmd->header));
 		ret = vmw_execbuf_process(file_priv, dev_priv, NULL, cmd,
 					  fifo_size, 0, NULL);
 
@@ -480,7 +553,10 @@
 			break;
 	}
 
+
 	kfree(cmd);
+out_free_tmp:
+	kfree(tmp);
 
 	return ret;
 }
@@ -556,6 +632,10 @@
 	 * Sanity checks.
 	 */
 
+	/* Surface must be marked as a scanout. */
+	if (unlikely(!surface->scanout))
+		return -EINVAL;
+
 	if (unlikely(surface->mip_levels[0] != 1 ||
 		     surface->num_sizes != 1 ||
 		     surface->sizes[0].width < mode_cmd->width ||
@@ -782,6 +862,7 @@
 			int clip_y1 = clips_ptr->y1 - unit->crtc.y;
 			int clip_x2 = clips_ptr->x2 - unit->crtc.x;
 			int clip_y2 = clips_ptr->y2 - unit->crtc.y;
+			int move_x, move_y;
 
 			/* skip any crtcs that misses the clip region */
 			if (clip_x1 >= unit->crtc.mode.hdisplay ||
@@ -789,12 +870,21 @@
 			    clip_x2 <= 0 || clip_y2 <= 0)
 				continue;
 
+			/* clip size to crtc size */
+			clip_x2 = min_t(int, clip_x2, unit->crtc.mode.hdisplay);
+			clip_y2 = min_t(int, clip_y2, unit->crtc.mode.vdisplay);
+
+			/* translate both src and dest to bring clip into screen */
+			move_x = min_t(int, clip_x1, 0);
+			move_y = min_t(int, clip_y1, 0);
+
+			/* actual translate done here */
 			blits[hit_num].header = SVGA_CMD_BLIT_GMRFB_TO_SCREEN;
 			blits[hit_num].body.destScreenId = unit->unit;
-			blits[hit_num].body.srcOrigin.x = clips_ptr->x1;
-			blits[hit_num].body.srcOrigin.y = clips_ptr->y1;
-			blits[hit_num].body.destRect.left = clip_x1;
-			blits[hit_num].body.destRect.top = clip_y1;
+			blits[hit_num].body.srcOrigin.x = clips_ptr->x1 - move_x;
+			blits[hit_num].body.srcOrigin.y = clips_ptr->y1 - move_y;
+			blits[hit_num].body.destRect.left = clip_x1 - move_x;
+			blits[hit_num].body.destRect.top = clip_y1 - move_y;
 			blits[hit_num].body.destRect.right = clip_x2;
 			blits[hit_num].body.destRect.bottom = clip_y2;
 			hit_num++;
@@ -1033,46 +1123,29 @@
 		return ERR_PTR(-ENOENT);
 	}
 
-	/**
-	 * End conditioned code.
-	 */
-
-	ret = vmw_user_surface_lookup_handle(dev_priv, tfile,
-					     mode_cmd->handle, &surface);
+	/* returns either a dmabuf or surface */
+	ret = vmw_user_lookup_handle(dev_priv, tfile,
+				     mode_cmd->handle,
+				     &surface, &bo);
 	if (ret)
-		goto try_dmabuf;
+		goto err_out;
 
-	if (!surface->scanout)
-		goto err_not_scanout;
+	/* Create the new framebuffer depending one what we got back */
+	if (bo)
+		ret = vmw_kms_new_framebuffer_dmabuf(dev_priv, bo, &vfb,
+						     mode_cmd);
+	else if (surface)
+		ret = vmw_kms_new_framebuffer_surface(dev_priv, file_priv,
+						      surface, &vfb, mode_cmd);
+	else
+		BUG();
 
-	ret = vmw_kms_new_framebuffer_surface(dev_priv, file_priv, surface,
-					      &vfb, mode_cmd);
-
-	/* vmw_user_surface_lookup takes one ref so does new_fb */
-	vmw_surface_unreference(&surface);
-
-	if (ret) {
-		DRM_ERROR("failed to create vmw_framebuffer: %i\n", ret);
-		ttm_base_object_unref(&user_obj);
-		return ERR_PTR(ret);
-	} else
-		vfb->user_obj = user_obj;
-	return &vfb->base;
-
-try_dmabuf:
-	DRM_INFO("%s: trying buffer\n", __func__);
-
-	ret = vmw_user_dmabuf_lookup(tfile, mode_cmd->handle, &bo);
-	if (ret) {
-		DRM_ERROR("failed to find buffer: %i\n", ret);
-		return ERR_PTR(-ENOENT);
-	}
-
-	ret = vmw_kms_new_framebuffer_dmabuf(dev_priv, bo, &vfb,
-					     mode_cmd);
-
-	/* vmw_user_dmabuf_lookup takes one ref so does new_fb */
-	vmw_dmabuf_unreference(&bo);
+err_out:
+	/* vmw_user_lookup_handle takes one ref so does new_fb */
+	if (bo)
+		vmw_dmabuf_unreference(&bo);
+	if (surface)
+		vmw_surface_unreference(&surface);
 
 	if (ret) {
 		DRM_ERROR("failed to create vmw_framebuffer: %i\n", ret);
@@ -1082,14 +1155,6 @@
 		vfb->user_obj = user_obj;
 
 	return &vfb->base;
-
-err_not_scanout:
-	DRM_ERROR("surface not marked as scanout\n");
-	/* vmw_user_surface_lookup takes one ref */
-	vmw_surface_unreference(&surface);
-	ttm_base_object_unref(&user_obj);
-
-	return ERR_PTR(-EINVAL);
 }
 
 static struct drm_mode_config_funcs vmw_kms_funcs = {
@@ -1106,10 +1171,12 @@
 		    uint32_t num_clips)
 {
 	struct vmw_display_unit *units[VMWGFX_NUM_DISPLAY_UNITS];
+	struct drm_clip_rect *tmp;
 	struct drm_crtc *crtc;
 	size_t fifo_size;
 	int i, k, num_units;
 	int ret = 0; /* silence warning */
+	int left, right, top, bottom;
 
 	struct {
 		SVGA3dCmdHeader header;
@@ -1127,60 +1194,95 @@
 	BUG_ON(surface == NULL);
 	BUG_ON(!clips || !num_clips);
 
+	tmp = kzalloc(sizeof(*tmp) * num_clips, GFP_KERNEL);
+	if (unlikely(tmp == NULL)) {
+		DRM_ERROR("Temporary cliprect memory alloc failed.\n");
+		return -ENOMEM;
+	}
+
 	fifo_size = sizeof(*cmd) + sizeof(SVGASignedRect) * num_clips;
 	cmd = kmalloc(fifo_size, GFP_KERNEL);
 	if (unlikely(cmd == NULL)) {
 		DRM_ERROR("Failed to allocate temporary fifo memory.\n");
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto out_free_tmp;
+	}
+
+	left = clips->x;
+	right = clips->x + clips->w;
+	top = clips->y;
+	bottom = clips->y + clips->h;
+
+	for (i = 1; i < num_clips; i++) {
+		left = min_t(int, left, (int)clips[i].x);
+		right = max_t(int, right, (int)clips[i].x + clips[i].w);
+		top = min_t(int, top, (int)clips[i].y);
+		bottom = max_t(int, bottom, (int)clips[i].y + clips[i].h);
 	}
 
 	/* only need to do this once */
 	memset(cmd, 0, fifo_size);
 	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN);
-	cmd->header.size = cpu_to_le32(fifo_size - sizeof(cmd->header));
-
-	cmd->body.srcRect.left = 0;
-	cmd->body.srcRect.right = surface->sizes[0].width;
-	cmd->body.srcRect.top = 0;
-	cmd->body.srcRect.bottom = surface->sizes[0].height;
 
 	blits = (SVGASignedRect *)&cmd[1];
+
+	cmd->body.srcRect.left = left;
+	cmd->body.srcRect.right = right;
+	cmd->body.srcRect.top = top;
+	cmd->body.srcRect.bottom = bottom;
+
 	for (i = 0; i < num_clips; i++) {
-		blits[i].left   = clips[i].x;
-		blits[i].right  = clips[i].x + clips[i].w;
-		blits[i].top    = clips[i].y;
-		blits[i].bottom = clips[i].y + clips[i].h;
+		tmp[i].x1 = clips[i].x - left;
+		tmp[i].x2 = clips[i].x + clips[i].w - left;
+		tmp[i].y1 = clips[i].y - top;
+		tmp[i].y2 = clips[i].y + clips[i].h - top;
 	}
 
 	for (k = 0; k < num_units; k++) {
 		struct vmw_display_unit *unit = units[k];
-		int clip_x1 = destX - unit->crtc.x;
-		int clip_y1 = destY - unit->crtc.y;
-		int clip_x2 = clip_x1 + surface->sizes[0].width;
-		int clip_y2 = clip_y1 + surface->sizes[0].height;
+		struct vmw_clip_rect clip;
+		int num;
+
+		clip.x1 = left + destX - unit->crtc.x;
+		clip.y1 = top + destY - unit->crtc.y;
+		clip.x2 = right + destX - unit->crtc.x;
+		clip.y2 = bottom + destY - unit->crtc.y;
 
 		/* skip any crtcs that misses the clip region */
-		if (clip_x1 >= unit->crtc.mode.hdisplay ||
-		    clip_y1 >= unit->crtc.mode.vdisplay ||
-		    clip_x2 <= 0 || clip_y2 <= 0)
+		if (clip.x1 >= unit->crtc.mode.hdisplay ||
+		    clip.y1 >= unit->crtc.mode.vdisplay ||
+		    clip.x2 <= 0 || clip.y2 <= 0)
 			continue;
 
+		/*
+		 * In order for the clip rects to be correctly scaled
+		 * the src and dest rects needs to be the same size.
+		 */
+		cmd->body.destRect.left = clip.x1;
+		cmd->body.destRect.right = clip.x2;
+		cmd->body.destRect.top = clip.y1;
+		cmd->body.destRect.bottom = clip.y2;
+
+		/* create a clip rect of the crtc in dest coords */
+		clip.x2 = unit->crtc.mode.hdisplay - clip.x1;
+		clip.y2 = unit->crtc.mode.vdisplay - clip.y1;
+		clip.x1 = 0 - clip.x1;
+		clip.y1 = 0 - clip.y1;
+
 		/* need to reset sid as it is changed by execbuf */
 		cmd->body.srcImage.sid = sid;
-
 		cmd->body.destScreenId = unit->unit;
 
-		/*
-		 * The blit command is a lot more resilient then the
-		 * readback command when it comes to clip rects. So its
-		 * okay to go out of bounds.
-		 */
+		/* clip and write blits to cmd stream */
+		vmw_clip_cliprects(tmp, num_clips, clip, blits, &num);
 
-		cmd->body.destRect.left = clip_x1;
-		cmd->body.destRect.right = clip_x2;
-		cmd->body.destRect.top = clip_y1;
-		cmd->body.destRect.bottom = clip_y2;
+		/* if no cliprects hit skip this */
+		if (num == 0)
+			continue;
 
+		/* recalculate package length */
+		fifo_size = sizeof(*cmd) + sizeof(SVGASignedRect) * num;
+		cmd->header.size = cpu_to_le32(fifo_size - sizeof(cmd->header));
 		ret = vmw_execbuf_process(file_priv, dev_priv, NULL, cmd,
 					  fifo_size, 0, NULL);
 
@@ -1189,6 +1291,8 @@
 	}
 
 	kfree(cmd);
+out_free_tmp:
+	kfree(tmp);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
index af8e6e5..e1cb855 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
@@ -62,9 +62,14 @@
 int vmw_cursor_update_image(struct vmw_private *dev_priv,
 			    u32 *image, u32 width, u32 height,
 			    u32 hotspotX, u32 hotspotY);
+int vmw_cursor_update_dmabuf(struct vmw_private *dev_priv,
+			     struct vmw_dma_buffer *dmabuf,
+			     u32 width, u32 height,
+			     u32 hotspotX, u32 hotspotY);
 void vmw_cursor_update_position(struct vmw_private *dev_priv,
 				bool show, int x, int y);
 
+
 /**
  * Base class display unit.
  *
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
index 90c5e39..8f8dbd4 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
@@ -74,9 +74,10 @@
 {
 	struct vmw_legacy_display *lds = dev_priv->ldu_priv;
 	struct vmw_legacy_display_unit *entry;
+	struct vmw_display_unit *du = NULL;
 	struct drm_framebuffer *fb = NULL;
 	struct drm_crtc *crtc = NULL;
-	int i = 0;
+	int i = 0, ret;
 
 	/* If there is no display topology the host just assumes
 	 * that the guest will set the same layout as the host.
@@ -129,6 +130,25 @@
 
 	lds->last_num_active = lds->num_active;
 
+
+	/* Find the first du with a cursor. */
+	list_for_each_entry(entry, &lds->active, active) {
+		du = &entry->base;
+
+		if (!du->cursor_dmabuf)
+			continue;
+
+		ret = vmw_cursor_update_dmabuf(dev_priv,
+					       du->cursor_dmabuf,
+					       64, 64,
+					       du->hotspot_x,
+					       du->hotspot_y);
+		if (ret == 0)
+			break;
+
+		DRM_ERROR("Could not update cursor image\n");
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index 86c5e4c..1c7f09e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -1190,6 +1190,29 @@
 		write_unlock(lock);
 }
 
+/**
+ * Helper function that looks either a surface or dmabuf.
+ *
+ * The pointer this pointed at by out_surf and out_buf needs to be null.
+ */
+int vmw_user_lookup_handle(struct vmw_private *dev_priv,
+			   struct ttm_object_file *tfile,
+			   uint32_t handle,
+			   struct vmw_surface **out_surf,
+			   struct vmw_dma_buffer **out_buf)
+{
+	int ret;
+
+	BUG_ON(*out_surf || *out_buf);
+
+	ret = vmw_user_surface_lookup_handle(dev_priv, tfile, handle, out_surf);
+	if (!ret)
+		return 0;
+
+	ret = vmw_user_dmabuf_lookup(tfile, handle, out_buf);
+	return ret;
+}
+
 
 int vmw_user_surface_lookup_handle(struct vmw_private *dev_priv,
 				   struct ttm_object_file *tfile,