NFS: Readdir plus in v4

By requsting more attributes during a readdir, we can mimic the readdir plus
operation that was in NFSv3.

To test, I ran the command `ls -lU --color=none` on directories with various
numbers of files.  Without readdir plus, I see this:

n files |    100    |   1,000   |  10,000   |  100,000  | 1,000,000
--------+-----------+-----------+-----------+-----------+----------
real    | 0m00.153s | 0m00.589s | 0m05.601s | 0m56.691s | 9m59.128s
user    | 0m00.007s | 0m00.007s | 0m00.077s | 0m00.703s | 0m06.800s
sys     | 0m00.010s | 0m00.070s | 0m00.633s | 0m06.423s | 1m10.005s
access  | 3         | 1         | 1         | 4         | 31
getattr | 2         | 1         | 1         | 1         | 1
lookup  | 104       | 1,003     | 10,003    | 100,003   | 1,000,003
readdir | 2         | 16        | 158       | 1,575     | 15,749
total   | 111       | 1,021     | 10,163    | 101,583   | 1,015,784

With readdir plus enabled, I see this:

n files |    100    |   1,000   |  10,000   |  100,000  | 1,000,000
--------+-----------+-----------+-----------+-----------+----------
real    | 0m00.115s | 0m00.206s | 0m01.079s | 0m12.521s | 2m07.528s
user    | 0m00.003s | 0m00.003s | 0m00.040s | 0m00.290s | 0m03.296s
sys     | 0m00.007s | 0m00.020s | 0m00.120s | 0m01.357s | 0m17.556s
access  | 3         | 1         | 1         | 1         | 7
getattr | 2         | 1         | 1         | 1         | 1
lookup  | 4         | 3         | 3         | 3         | 3
readdir | 6         | 62        | 630       | 6,300     | 62,993
total   | 15        | 67        | 635       | 6,305     | 63,004

Readdir plus disabled has about a 16x increase in the number of rpc calls and
is 4 - 5 times slower on large directories.

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 876ba85..da2f2f0 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1358,8 +1358,9 @@
 
 	/* Initialise the client representation from the mount data */
 	server->flags = data->flags;
-	server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|
-		NFS_CAP_POSIX_LOCK;
+	server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|NFS_CAP_POSIX_LOCK;
+	if (!(data->flags & NFS_MOUNT_NORDIRPLUS))
+			server->caps |= NFS_CAP_READDIRPLUS;
 	server->options = data->options;
 
 	/* Get a client record */
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 0cbb714..2a768d0 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -172,7 +172,7 @@
 
 #define MAX_READDIR_ARRAY ((PAGE_SIZE - sizeof(struct nfs_cache_array)) / sizeof(struct nfs_cache_array_entry))
 
-typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, int);
+typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
 typedef struct {
 	struct file	*file;
 	struct page	*page;
@@ -360,7 +360,7 @@
 static
 int xdr_decode(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, struct xdr_stream *stream)
 {
-	__be32 *p = desc->decode(stream, entry, desc->plus);
+	__be32 *p = desc->decode(stream, entry, NFS_SERVER(desc->file->f_path.dentry->d_inode), desc->plus);
 	if (IS_ERR(p))
 		return PTR_ERR(p);
 
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 7b0e894..db08ff3 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -187,15 +187,15 @@
 /* nfs2xdr.c */
 extern int nfs_stat_to_errno(int);
 extern struct rpc_procinfo nfs_procedures[];
-extern __be32 *nfs_decode_dirent(struct xdr_stream *, struct nfs_entry *, int);
+extern __be32 *nfs_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
 
 /* nfs3xdr.c */
 extern struct rpc_procinfo nfs3_procedures[];
-extern __be32 *nfs3_decode_dirent(struct xdr_stream *, struct nfs_entry *, int);
+extern __be32 *nfs3_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
 
 /* nfs4xdr.c */
 #ifdef CONFIG_NFS_V4
-extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *entry, int plus);
+extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
 #endif
 #ifdef CONFIG_NFS_V4_1
 extern const u32 nfs41_maxread_overhead;
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 82f0264..e6bf457 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -454,7 +454,7 @@
 }
 
 __be32 *
-nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, int plus)
+nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_server *server, int plus)
 {
 	__be32 *p;
 	p = xdr_inline_decode(xdr, 4);
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index dc98eb7..31a44df 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -590,7 +590,7 @@
 }
 
 __be32 *
-nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, int plus)
+nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_server *server, int plus)
 {
 	__be32 *p;
 	struct nfs_entry old = *entry;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index c58ea63..9fa4963 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -331,7 +331,7 @@
 extern const nfs4_stateid zero_stateid;
 
 /* nfs4xdr.c */
-extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *entry, int plus);
+extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
 extern struct rpc_procinfo nfs4_procedures[];
 
 struct nfs4_mount_data;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index cb33c73..f5ab216 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2832,6 +2832,7 @@
 		.pgbase = 0,
 		.count = count,
 		.bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask,
+		.plus = plus,
 	};
 	struct nfs4_readdir_res res;
 	struct rpc_message msg = {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index b7eff20..ccfb1c92 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1385,12 +1385,20 @@
 
 static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr)
 {
-	uint32_t attrs[2] = {
-		FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID,
-		FATTR4_WORD1_MOUNTED_ON_FILEID,
-	};
+	uint32_t attrs[2] = {0, 0};
 	__be32 *p;
 
+	if (readdir->plus) {
+		attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE|
+			FATTR4_WORD0_FSID|FATTR4_WORD0_FILEHANDLE;
+		attrs[1] |= FATTR4_WORD1_MODE|FATTR4_WORD1_NUMLINKS|FATTR4_WORD1_OWNER|
+			FATTR4_WORD1_OWNER_GROUP|FATTR4_WORD1_RAWDEV|
+			FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS|
+			FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
+	}
+	attrs[0] |= FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID;
+	attrs[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID;
+
 	p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20);
 	*p++ = cpu_to_be32(OP_READDIR);
 	p = xdr_encode_hyper(p, readdir->cookie);
@@ -1398,11 +1406,15 @@
 	*p++ = cpu_to_be32(readdir->count >> 1);  /* We're not doing readdirplus */
 	*p++ = cpu_to_be32(readdir->count);
 	*p++ = cpu_to_be32(2);
-	/* Switch to mounted_on_fileid if the server supports it */
-	if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
-		attrs[0] &= ~FATTR4_WORD0_FILEID;
-	else
-		attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
+
+	if (!readdir->plus) {
+		/* Switch to mounted_on_fileid if the server supports it */
+		if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
+			attrs[0] &= ~FATTR4_WORD0_FILEID;
+		else
+			attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
+	}
+
 	*p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]);
 	*p = cpu_to_be32(attrs[1] & readdir->bitmask[1]);
 	hdr->nops++;
@@ -5768,7 +5780,8 @@
 }
 #endif /* CONFIG_NFS_V4_1 */
 
-__be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, int plus)
+__be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
+			   struct nfs_server *server, int plus)
 {
 	uint32_t bitmap[2] = {0};
 	uint32_t len;
@@ -5824,24 +5837,10 @@
 		goto out_overflow;
 	len = XDR_QUADLEN(ntohl(*p++));	/* attribute buffer length */
 	if (len > 0) {
-		if (bitmap[0] & FATTR4_WORD0_RDATTR_ERROR) {
-			bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR;
-			/* Ignore the return value of rdattr_error for now */
-			p = xdr_inline_decode(xdr, 4);
-			if (unlikely(!p))
-				goto out_overflow;
-		}
-		if (bitmap[0] == 0 && bitmap[1] == FATTR4_WORD1_MOUNTED_ON_FILEID) {
-			p = xdr_inline_decode(xdr, 8);
-			if (unlikely(!p))
-				goto out_overflow;
-			xdr_decode_hyper(p, &entry->ino);
-		} else if (bitmap[0] == FATTR4_WORD0_FILEID) {
-			p = xdr_inline_decode(xdr, 8);
-			if (unlikely(!p))
-				goto out_overflow;
-			xdr_decode_hyper(p, &entry->ino);
-		}
+		if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, server, 1) < 0)
+			goto out_overflow;
+		if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID)
+			entry->ino = entry->fattr->fileid;
 	}
 
 	p = xdr_inline_peek(xdr, 8);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 1b9a17a..efe2eab 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -778,6 +778,7 @@
 	struct page **			pages;	/* zero-copy data */
 	unsigned int			pgbase;	/* zero-copy data */
 	const u32 *			bitmask;
+	int				plus;
 	struct nfs4_sequence_args	seq_args;
 };
 
@@ -1036,7 +1037,7 @@
 	int	(*pathconf) (struct nfs_server *, struct nfs_fh *,
 			     struct nfs_pathconf *);
 	int	(*set_capabilities)(struct nfs_server *, struct nfs_fh *);
-	__be32 *(*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int plus);
+	__be32 *(*decode_dirent)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int plus);
 	void	(*read_setup)   (struct nfs_read_data *, struct rpc_message *);
 	int	(*read_done)  (struct rpc_task *, struct nfs_read_data *);
 	void	(*write_setup)  (struct nfs_write_data *, struct rpc_message *);