[XFS] Provide XFS support for the splice syscall.

Signed-off-by: Nathan Scott <nathans@sgi.com>
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 85997b1..ae4c475 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -69,7 +69,6 @@
 	return rval;
 }
 
-
 STATIC ssize_t
 xfs_file_aio_read(
 	struct kiocb		*iocb,
@@ -90,7 +89,6 @@
 	return __xfs_file_read(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
 }
 
-
 STATIC inline ssize_t
 __xfs_file_write(
 	struct kiocb	*iocb,
@@ -113,7 +111,6 @@
 	return rval;
 }
 
-
 STATIC ssize_t
 xfs_file_aio_write(
 	struct kiocb		*iocb,
@@ -134,7 +131,6 @@
 	return __xfs_file_write(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
 }
 
-
 STATIC inline ssize_t
 __xfs_file_readv(
 	struct file		*file,
@@ -179,7 +175,6 @@
 	return __xfs_file_readv(file, iov, IO_INVIS, nr_segs, ppos);
 }
 
-
 STATIC inline ssize_t
 __xfs_file_writev(
 	struct file		*file,
@@ -204,7 +199,6 @@
 	return rval;
 }
 
-
 STATIC ssize_t
 xfs_file_writev(
 	struct file		*file,
@@ -228,7 +222,7 @@
 STATIC ssize_t
 xfs_file_sendfile(
 	struct file		*filp,
-	loff_t			*ppos,
+	loff_t			*pos,
 	size_t			count,
 	read_actor_t		actor,
 	void			*target)
@@ -236,10 +230,80 @@
 	vnode_t			*vp = vn_from_inode(filp->f_dentry->d_inode);
 	ssize_t			rval;
 
-	VOP_SENDFILE(vp, filp, ppos, 0, count, actor, target, NULL, rval);
+	VOP_SENDFILE(vp, filp, pos, 0, count, actor, target, NULL, rval);
 	return rval;
 }
 
+STATIC ssize_t
+xfs_file_sendfile_invis(
+	struct file		*filp,
+	loff_t			*pos,
+	size_t			count,
+	read_actor_t		actor,
+	void			*target)
+{
+	vnode_t			*vp = vn_from_inode(filp->f_dentry->d_inode);
+	ssize_t			rval;
+
+	VOP_SENDFILE(vp, filp, pos, IO_INVIS, count, actor, target, NULL, rval);
+	return rval;
+}
+
+STATIC ssize_t
+xfs_file_splice_read(
+	struct file		*infilp,
+	struct inode		*pipe,
+	size_t			len,
+	unsigned int		flags)
+{
+	vnode_t			*vp = vn_from_inode(infilp->f_dentry->d_inode);
+	ssize_t			rval;
+
+	VOP_SPLICE_READ(vp, infilp, pipe, len, flags, 0, NULL, rval);
+	return rval;
+}
+
+STATIC ssize_t
+xfs_file_splice_read_invis(
+	struct file		*infilp,
+	struct inode		*pipe,
+	size_t			len,
+	unsigned int		flags)
+{
+	vnode_t			*vp = vn_from_inode(infilp->f_dentry->d_inode);
+	ssize_t			rval;
+
+	VOP_SPLICE_READ(vp, infilp, pipe, len, flags, IO_INVIS, NULL, rval);
+	return rval;
+}
+
+STATIC ssize_t
+xfs_file_splice_write(
+	struct inode		*pipe,
+	struct file		*outfilp,
+	size_t			len,
+	unsigned int		flags)
+{
+	vnode_t			*vp = vn_from_inode(outfilp->f_dentry->d_inode);
+	ssize_t			rval;
+
+	VOP_SPLICE_WRITE(vp, pipe, outfilp, len, flags, 0, NULL, rval);
+	return rval;
+}
+
+STATIC ssize_t
+xfs_file_splice_write_invis(
+	struct inode		*pipe,
+	struct file		*outfilp,
+	size_t			len,
+	unsigned int		flags)
+{
+	vnode_t			*vp = vn_from_inode(outfilp->f_dentry->d_inode);
+	ssize_t			rval;
+
+	VOP_SPLICE_WRITE(vp, pipe, outfilp, len, flags, IO_INVIS, NULL, rval);
+	return rval;
+}
 
 STATIC int
 xfs_file_open(
@@ -251,13 +315,10 @@
 
 	if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
 		return -EFBIG;
-
-	ASSERT(vp);
 	VOP_OPEN(vp, NULL, error);
 	return -error;
 }
 
-
 STATIC int
 xfs_file_release(
 	struct inode	*inode,
@@ -271,7 +332,6 @@
 	return -error;
 }
 
-
 STATIC int
 xfs_file_fsync(
 	struct file	*filp,
@@ -285,21 +345,11 @@
 
 	if (datasync)
 		flags |= FSYNC_DATA;
-
-	ASSERT(vp);
 	VOP_FSYNC(vp, flags, NULL, (xfs_off_t)0, (xfs_off_t)-1, error);
 	return -error;
 }
 
-/*
- * xfs_file_readdir maps to VOP_READDIR().
- * We need to build a uio, cred, ...
- */
-
-#define nextdp(dp)      ((struct xfs_dirent *)((char *)(dp) + (dp)->d_reclen))
-
 #ifdef CONFIG_XFS_DMAPI
-
 STATIC struct page *
 xfs_vm_nopage(
 	struct vm_area_struct	*area,
@@ -319,10 +369,8 @@
 
 	return filemap_nopage(area, address, type);
 }
-
 #endif /* CONFIG_XFS_DMAPI */
 
-
 STATIC int
 xfs_file_readdir(
 	struct file	*filp,
@@ -330,7 +378,7 @@
 	filldir_t	filldir)
 {
 	int		error = 0;
-	vnode_t		*vp;
+	vnode_t		*vp = vn_from_inode(filp->f_dentry->d_inode);
 	uio_t		uio;
 	iovec_t		iov;
 	int		eof = 0;
@@ -340,9 +388,6 @@
 	xfs_off_t	start_offset, curr_offset;
 	xfs_dirent_t	*dbp = NULL;
 
-	vp = vn_from_inode(filp->f_dentry->d_inode);
-	ASSERT(vp);
-
 	/* Try fairly hard to get memory */
 	do {
 		if ((read_buf = (caddr_t)kmalloc(rlen, GFP_KERNEL)))
@@ -387,7 +432,7 @@
 			}
 			size -= dbp->d_reclen;
 			curr_offset = (loff_t)dbp->d_off /* & 0x7fffffff */;
-			dbp = nextdp(dbp);
+			dbp = (xfs_dirent_t *)((char *)dbp + dbp->d_reclen);
 		}
 	}
 done:
@@ -402,7 +447,6 @@
 	return -error;
 }
 
-
 STATIC int
 xfs_file_mmap(
 	struct file	*filp,
@@ -457,11 +501,10 @@
 	unsigned int	cmd,
 	unsigned long	arg)
 {
-	int		error;
 	struct inode	*inode = filp->f_dentry->d_inode;
 	vnode_t		*vp = vn_from_inode(inode);
+	int		error;
 
-	ASSERT(vp);
 	VOP_IOCTL(vp, inode, filp, IO_INVIS, cmd, (void __user *)arg, error);
 	VMODIFY(vp);
 
@@ -537,6 +580,8 @@
 	.aio_read	= xfs_file_aio_read,
 	.aio_write	= xfs_file_aio_write,
 	.sendfile	= xfs_file_sendfile,
+	.splice_read	= xfs_file_splice_read,
+	.splice_write	= xfs_file_splice_write,
 	.unlocked_ioctl	= xfs_file_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= xfs_file_compat_ioctl,
@@ -558,7 +603,9 @@
 	.writev		= xfs_file_writev_invis,
 	.aio_read	= xfs_file_aio_read_invis,
 	.aio_write	= xfs_file_aio_write_invis,
-	.sendfile	= xfs_file_sendfile,
+	.sendfile	= xfs_file_sendfile_invis,
+	.splice_read	= xfs_file_splice_read_invis,
+	.splice_write	= xfs_file_splice_write_invis,
 	.unlocked_ioctl	= xfs_file_ioctl_invis,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= xfs_file_compat_invis_ioctl,
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 1fe09f2..e9fe43d 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -103,6 +103,7 @@
  */
 #undef  HAVE_REFCACHE	/* reference cache not needed for NFS in 2.6 */
 #define HAVE_SENDFILE	/* sendfile(2) exists in 2.6, but not in 2.4 */
+#define HAVE_SPLICE	/* a splice(2) exists in 2.6, but not in 2.4 */
 #ifdef CONFIG_SMP
 #define HAVE_PERCPU_SB	/* per cpu superblock counters are a 2.6 feature */
 #else
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 84ddf18..90cd314 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -301,36 +301,23 @@
 	void			*target,
 	cred_t			*credp)
 {
+	xfs_inode_t		*ip = XFS_BHVTOI(bdp);
+	xfs_mount_t		*mp = ip->i_mount;
 	ssize_t			ret;
-	xfs_fsize_t		n;
-	xfs_inode_t		*ip;
-	xfs_mount_t		*mp;
-	vnode_t			*vp;
-
-	ip = XFS_BHVTOI(bdp);
-	vp = BHV_TO_VNODE(bdp);
-	mp = ip->i_mount;
 
 	XFS_STATS_INC(xs_read_calls);
-
-	n = XFS_MAXIOFFSET(mp) - *offset;
-	if ((n <= 0) || (count == 0))
-		return 0;
-
-	if (n < count)
-		count = n;
-
-	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
 
 	xfs_ilock(ip, XFS_IOLOCK_SHARED);
 
-	if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) &&
+	if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) &&
 	    (!(ioflags & IO_INVIS))) {
 		vrwlock_t locktype = VRWLOCK_READ;
 		int error;
 
-		error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), *offset, count,
+		error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp),
+				      *offset, count,
 				      FILP_DELAY_FLAG(filp), &locktype);
 		if (error) {
 			xfs_iunlock(ip, XFS_IOLOCK_SHARED);
@@ -340,12 +327,96 @@
 	xfs_rw_enter_trace(XFS_SENDFILE_ENTER, &ip->i_iocore,
 		   (void *)(unsigned long)target, count, *offset, ioflags);
 	ret = generic_file_sendfile(filp, offset, count, actor, target);
-
-	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-
 	if (ret > 0)
 		XFS_STATS_ADD(xs_read_bytes, ret);
 
+	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+	return ret;
+}
+
+ssize_t
+xfs_splice_read(
+	bhv_desc_t		*bdp,
+	struct file		*infilp,
+	struct inode		*pipe,
+	size_t			count,
+	int			flags,
+	int			ioflags,
+	cred_t			*credp)
+{
+	xfs_inode_t		*ip = XFS_BHVTOI(bdp);
+	xfs_mount_t		*mp = ip->i_mount;
+	ssize_t			ret;
+
+	XFS_STATS_INC(xs_read_calls);
+	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+		return -EIO;
+
+	xfs_ilock(ip, XFS_IOLOCK_SHARED);
+
+	if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) &&
+	    (!(ioflags & IO_INVIS))) {
+		vrwlock_t locktype = VRWLOCK_READ;
+		int error;
+
+		error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp),
+					infilp->f_pos, count,
+					FILP_DELAY_FLAG(infilp), &locktype);
+		if (error) {
+			xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+			return -error;
+		}
+	}
+	xfs_rw_enter_trace(XFS_SPLICE_READ_ENTER, &ip->i_iocore,
+			   pipe, count, infilp->f_pos, ioflags);
+	ret = generic_file_splice_read(infilp, pipe, count, flags);
+	if (ret > 0)
+		XFS_STATS_ADD(xs_read_bytes, ret);
+
+	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+	return ret;
+}
+
+ssize_t
+xfs_splice_write(
+	bhv_desc_t		*bdp,
+	struct inode		*pipe,
+	struct file		*outfilp,
+	size_t			count,
+	int			flags,
+	int			ioflags,
+	cred_t			*credp)
+{
+	xfs_inode_t		*ip = XFS_BHVTOI(bdp);
+	xfs_mount_t		*mp = ip->i_mount;
+	ssize_t			ret;
+
+	XFS_STATS_INC(xs_write_calls);
+	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+		return -EIO;
+
+	xfs_ilock(ip, XFS_IOLOCK_EXCL);
+
+	if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_WRITE) &&
+	    (!(ioflags & IO_INVIS))) {
+		vrwlock_t locktype = VRWLOCK_WRITE;
+		int error;
+
+		error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, BHV_TO_VNODE(bdp),
+					outfilp->f_pos, count,
+					FILP_DELAY_FLAG(outfilp), &locktype);
+		if (error) {
+			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+			return -error;
+		}
+	}
+	xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, &ip->i_iocore,
+			   pipe, count, outfilp->f_pos, ioflags);
+	ret = generic_file_splice_write(pipe, outfilp, count, flags);
+	if (ret > 0)
+		XFS_STATS_ADD(xs_write_bytes, ret);
+
+	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
 	return ret;
 }
 
@@ -363,7 +434,7 @@
 	xfs_fsize_t	end_size)
 {
 	xfs_fileoff_t	last_fsb;
-	xfs_mount_t	*mp;
+	xfs_mount_t	*mp = io->io_mount;
 	int		nimaps;
 	int		zero_offset;
 	int		zero_len;
@@ -373,8 +444,6 @@
 
 	ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0);
 
-	mp = io->io_mount;
-
 	zero_offset = XFS_B_FSB_OFFSET(mp, isize);
 	if (zero_offset == 0) {
 		/*
@@ -405,10 +474,9 @@
 	 * don't deadlock when the buffer cache calls back to us.
 	 */
 	XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD);
+
 	loff = XFS_FSB_TO_B(mp, last_fsb);
-
 	zero_len = mp->m_sb.sb_blocksize - zero_offset;
-
 	error = xfs_iozero(ip, loff + zero_offset, zero_len, end_size);
 
 	XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
@@ -441,7 +509,7 @@
 	xfs_fileoff_t	zero_count_fsb;
 	xfs_fileoff_t	last_fsb;
 	xfs_extlen_t	buf_len_fsb;
-	xfs_mount_t	*mp;
+	xfs_mount_t	*mp = io->io_mount;
 	int		nimaps;
 	int		error = 0;
 	xfs_bmbt_irec_t	imap;
@@ -450,8 +518,6 @@
 	ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
 	ASSERT(offset > isize);
 
-	mp = io->io_mount;
-
 	/*
 	 * First handle zeroing the block on which isize resides.
 	 * We only zero a part of that block so it is handled specially.
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index 38864a8..eaa5659 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -60,6 +60,8 @@
 #define	XFS_IOMAP_ALLOC_ENTER	25
 #define	XFS_IOMAP_ALLOC_MAP	26
 #define	XFS_IOMAP_UNWRITTEN	27
+#define XFS_SPLICE_READ_ENTER	28
+#define XFS_SPLICE_WRITE_ENTER	29
 extern void xfs_rw_enter_trace(int, struct xfs_iocore *,
 				void *, size_t, loff_t, int);
 extern void xfs_inval_cached_trace(struct xfs_iocore *,
@@ -78,6 +80,7 @@
 			struct xfs_iomap *, int *);
 extern int xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
 extern int xfs_bdstrat_cb(struct xfs_buf *);
+extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
 
 extern int xfs_zero_eof(struct vnode *, struct xfs_iocore *, xfs_off_t,
 				xfs_fsize_t, xfs_fsize_t);
@@ -90,7 +93,11 @@
 extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *,
 				loff_t *, int, size_t, read_actor_t,
 				void *, struct cred *);
-
-extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
+extern ssize_t xfs_splice_read(struct bhv_desc *, struct file *,
+				struct inode *, size_t, int, int,
+				struct cred *);
+extern ssize_t xfs_splice_write(struct bhv_desc *, struct inode *,
+				struct file *, size_t, int, int,
+				struct cred *);
 
 #endif	/* __XFS_LRW_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 06f5845..6f1c79a 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -173,6 +173,12 @@
 typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *,
 				loff_t *, int, size_t, read_actor_t,
 				void *, struct cred *);
+typedef ssize_t (*vop_splice_read_t)(bhv_desc_t *, struct file *,
+				struct inode *, size_t, int, int,
+				struct cred *);
+typedef ssize_t (*vop_splice_write_t)(bhv_desc_t *, struct inode *,
+				struct file *, size_t, int, int,
+				struct cred *);
 typedef int	(*vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *,
 				int, unsigned int, void __user *);
 typedef int	(*vop_getattr_t)(bhv_desc_t *, struct vattr *, int,
@@ -231,6 +237,8 @@
 	vop_read_t		vop_read;
 	vop_write_t		vop_write;
 	vop_sendfile_t		vop_sendfile;
+	vop_splice_read_t	vop_splice_read;
+	vop_splice_write_t	vop_splice_write;
 	vop_ioctl_t		vop_ioctl;
 	vop_getattr_t		vop_getattr;
 	vop_setattr_t		vop_setattr;
@@ -276,6 +284,10 @@
 	rv = _VOP_(vop_write, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr)
 #define VOP_SENDFILE(vp,f,off,ioflags,cnt,act,targ,cr,rv)		\
 	rv = _VOP_(vop_sendfile, vp)((vp)->v_fbhv,f,off,ioflags,cnt,act,targ,cr)
+#define VOP_SPLICE_READ(vp,f,pipe,cnt,fl,iofl,cr,rv)			\
+	rv = _VOP_(vop_splice_read, vp)((vp)->v_fbhv,f,pipe,cnt,fl,iofl,cr)
+#define VOP_SPLICE_WRITE(vp,f,pipe,cnt,fl,iofl,cr,rv)			\
+	rv = _VOP_(vop_splice_write, vp)((vp)->v_fbhv,f,pipe,cnt,fl,iofl,cr)
 #define VOP_BMAP(vp,of,sz,rw,b,n,rv)					\
 	rv = _VOP_(vop_bmap, vp)((vp)->v_fbhv,of,sz,rw,b,n)
 #define VOP_OPEN(vp, cr, rv)						\
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index de49601..fa71b30 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -4649,6 +4649,10 @@
 #ifdef HAVE_SENDFILE
 	.vop_sendfile		= xfs_sendfile,
 #endif
+#ifdef HAVE_SPLICE
+	.vop_splice_read	= xfs_splice_read,
+	.vop_splice_write	= xfs_splice_write,
+#endif
 	.vop_write		= xfs_write,
 	.vop_ioctl		= xfs_ioctl,
 	.vop_getattr		= xfs_getattr,