Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull more vfs updates from Al Viro:
"In this pile:
- autofs-namespace series
- dedupe stuff
- more struct path constification"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (40 commits)
ocfs2: implement the VFS clone_range, copy_range, and dedupe_range features
ocfs2: charge quota for reflinked blocks
ocfs2: fix bad pointer cast
ocfs2: always unlock when completing dio writes
ocfs2: don't eat io errors during _dio_end_io_write
ocfs2: budget for extent tree splits when adding refcount flag
ocfs2: prohibit refcounted swapfiles
ocfs2: add newlines to some error messages
ocfs2: convert inode refcount test to a helper
simple_write_end(): don't zero in short copy into uptodate
exofs: don't mess with simple_write_{begin,end}
9p: saner ->write_end() on failing copy into non-uptodate page
fix gfs2_stuffed_write_end() on short copies
fix ceph_write_end()
nfs_write_end(): fix handling of short copies
vfs: refactor clone/dedupe_file_range common functions
fs: try to clone files first in vfs_copy_file_range
vfs: misc struct path constification
namespace.c: constify struct path passed to a bunch of primitives
quota: constify struct path in quota_on
...
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 69e2387c..ace63cd 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -20,7 +20,7 @@
void (*d_iput)(struct dentry *, struct inode *);
char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen);
struct vfsmount *(*d_automount)(struct path *path);
- int (*d_manage)(struct dentry *, bool);
+ int (*d_manage)(const struct path *, bool);
struct dentry *(*d_real)(struct dentry *, const struct inode *,
unsigned int);
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index b5039a0..3893f4d 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -948,7 +948,7 @@
void (*d_iput)(struct dentry *, struct inode *);
char *(*d_dname)(struct dentry *, char *, int);
struct vfsmount *(*d_automount)(struct path *);
- int (*d_manage)(struct dentry *, bool);
+ int (*d_manage)(const struct path *, bool);
struct dentry *(*d_real)(struct dentry *, const struct inode *,
unsigned int);
};
diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c
index 83d2b4e..44d67b1 100644
--- a/arch/powerpc/oprofile/cell/spu_task_sync.c
+++ b/arch/powerpc/oprofile/cell/spu_task_sync.c
@@ -295,7 +295,7 @@
* dcookie user still being registered (namely, the reader
* of the event buffer).
*/
-static inline unsigned long fast_get_dcookie(struct path *path)
+static inline unsigned long fast_get_dcookie(const struct path *path)
{
unsigned long cookie;
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index 82f7000..642478d 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -206,7 +206,7 @@
* because we cannot reach this code without at least one
* dcookie user still being registered (namely, the reader
* of the event buffer). */
-static inline unsigned long fast_get_dcookie(struct path *path)
+static inline unsigned long fast_get_dcookie(const struct path *path)
{
unsigned long cookie;
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 5ca1fb0..adaf6f6 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -310,18 +310,10 @@
p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping);
- if (unlikely(copied < len)) {
- /*
- * zero out the rest of the area
- */
- unsigned from = pos & (PAGE_SIZE - 1);
-
- zero_user(page, from + copied, len - copied);
- flush_dcache_page(page);
+ if (unlikely(copied < len && !PageUptodate(page))) {
+ copied = 0;
+ goto out;
}
-
- if (!PageUptodate(page))
- SetPageUptodate(page);
/*
* No need to use i_size_read() here, the i_size
* cannot change under us because we hold the i_mutex.
@@ -331,6 +323,7 @@
i_size_write(inode, last_pos);
}
set_page_dirty(page);
+out:
unlock_page(page);
put_page(page);
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index a1fba42..c885daa 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -145,7 +145,7 @@
/* Expiration */
int is_autofs4_dentry(struct dentry *);
-int autofs4_expire_wait(struct dentry *dentry, int rcu_walk);
+int autofs4_expire_wait(const struct path *path, int rcu_walk);
int autofs4_expire_run(struct super_block *, struct vfsmount *,
struct autofs_sb_info *,
struct autofs_packet_expire __user *);
@@ -217,7 +217,8 @@
/* Queue management functions */
-int autofs4_wait(struct autofs_sb_info *, struct dentry *, enum autofs_notify);
+int autofs4_wait(struct autofs_sb_info *,
+ const struct path *, enum autofs_notify);
int autofs4_wait_release(struct autofs_sb_info *, autofs_wqt_t, int);
void autofs4_catatonic_mode(struct autofs_sb_info *);
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index fc09eb7..6f48d67 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -204,7 +204,7 @@
/* Find the topmost mount satisfying test() */
static int find_autofs_mount(const char *pathname,
struct path *res,
- int test(struct path *path, void *data),
+ int test(const struct path *path, void *data),
void *data)
{
struct path path;
@@ -230,12 +230,12 @@
return err;
}
-static int test_by_dev(struct path *path, void *p)
+static int test_by_dev(const struct path *path, void *p)
{
return path->dentry->d_sb->s_dev == *(dev_t *)p;
}
-static int test_by_type(struct path *path, void *p)
+static int test_by_type(const struct path *path, void *p)
{
struct autofs_info *ino = autofs4_dentry_ino(path->dentry);
@@ -468,7 +468,7 @@
ino = autofs4_dentry_ino(path.dentry);
if (ino) {
err = 0;
- autofs4_expire_wait(path.dentry, 0);
+ autofs4_expire_wait(&path, 0);
spin_lock(&sbi->fs_lock);
param->requester.uid =
from_kuid_munged(current_user_ns(), ino->uid);
@@ -575,7 +575,7 @@
devid = new_encode_dev(dev);
- err = have_submounts(path.dentry);
+ err = path_has_submounts(&path);
if (follow_down_one(&path))
magic = path.dentry->d_sb->s_magic;
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index d8e6d42..57725d4 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -310,26 +310,29 @@
now = jiffies;
timeout = sbi->exp_timeout;
- spin_lock(&sbi->fs_lock);
- ino = autofs4_dentry_ino(root);
- /* No point expiring a pending mount */
- if (ino->flags & AUTOFS_INF_PENDING)
- goto out;
if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
+ spin_lock(&sbi->fs_lock);
+ ino = autofs4_dentry_ino(root);
+ /* No point expiring a pending mount */
+ if (ino->flags & AUTOFS_INF_PENDING) {
+ spin_unlock(&sbi->fs_lock);
+ goto out;
+ }
ino->flags |= AUTOFS_INF_WANT_EXPIRE;
spin_unlock(&sbi->fs_lock);
synchronize_rcu();
- spin_lock(&sbi->fs_lock);
if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
+ spin_lock(&sbi->fs_lock);
ino->flags |= AUTOFS_INF_EXPIRING;
init_completion(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
return root;
}
+ spin_lock(&sbi->fs_lock);
ino->flags &= ~AUTOFS_INF_WANT_EXPIRE;
+ spin_unlock(&sbi->fs_lock);
}
out:
- spin_unlock(&sbi->fs_lock);
dput(root);
return NULL;
@@ -495,8 +498,9 @@
return expired;
}
-int autofs4_expire_wait(struct dentry *dentry, int rcu_walk)
+int autofs4_expire_wait(const struct path *path, int rcu_walk)
{
+ struct dentry *dentry = path->dentry;
struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
struct autofs_info *ino = autofs4_dentry_ino(dentry);
int status;
@@ -525,7 +529,7 @@
pr_debug("waiting for expire %p name=%pd\n", dentry, dentry);
- status = autofs4_wait(sbi, dentry, NFY_NONE);
+ status = autofs4_wait(sbi, path, NFY_NONE);
wait_for_completion(&ino->expire_complete);
pr_debug("expire done status=%d\n", status);
@@ -592,11 +596,12 @@
if (dentry) {
struct autofs_info *ino = autofs4_dentry_ino(dentry);
+ const struct path path = { .mnt = mnt, .dentry = dentry };
/* This is synchronous because it makes the daemon a
* little easier
*/
- ret = autofs4_wait(sbi, dentry, NFY_EXPIRE);
+ ret = autofs4_wait(sbi, &path, NFY_EXPIRE);
spin_lock(&sbi->fs_lock);
/* avoid rapid-fire expire attempts if expiry fails */
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index a11f731..82e8f6e 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -32,7 +32,7 @@
static struct dentry *autofs4_lookup(struct inode *,
struct dentry *, unsigned int);
static struct vfsmount *autofs4_d_automount(struct path *);
-static int autofs4_d_manage(struct dentry *, bool);
+static int autofs4_d_manage(const struct path *, bool);
static void autofs4_dentry_release(struct dentry *);
const struct file_operations autofs4_root_operations = {
@@ -123,7 +123,7 @@
* it.
*/
spin_lock(&sbi->lookup_lock);
- if (!d_mountpoint(dentry) && simple_empty(dentry)) {
+ if (!path_is_mountpoint(&file->f_path) && simple_empty(dentry)) {
spin_unlock(&sbi->lookup_lock);
return -ENOENT;
}
@@ -269,39 +269,41 @@
return NULL;
}
-static int autofs4_mount_wait(struct dentry *dentry, bool rcu_walk)
+static int autofs4_mount_wait(const struct path *path, bool rcu_walk)
{
- struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
- struct autofs_info *ino = autofs4_dentry_ino(dentry);
+ struct autofs_sb_info *sbi = autofs4_sbi(path->dentry->d_sb);
+ struct autofs_info *ino = autofs4_dentry_ino(path->dentry);
int status = 0;
if (ino->flags & AUTOFS_INF_PENDING) {
if (rcu_walk)
return -ECHILD;
- pr_debug("waiting for mount name=%pd\n", dentry);
- status = autofs4_wait(sbi, dentry, NFY_MOUNT);
+ pr_debug("waiting for mount name=%pd\n", path->dentry);
+ status = autofs4_wait(sbi, path, NFY_MOUNT);
pr_debug("mount wait done status=%d\n", status);
}
ino->last_used = jiffies;
return status;
}
-static int do_expire_wait(struct dentry *dentry, bool rcu_walk)
+static int do_expire_wait(const struct path *path, bool rcu_walk)
{
+ struct dentry *dentry = path->dentry;
struct dentry *expiring;
expiring = autofs4_lookup_expiring(dentry, rcu_walk);
if (IS_ERR(expiring))
return PTR_ERR(expiring);
if (!expiring)
- return autofs4_expire_wait(dentry, rcu_walk);
+ return autofs4_expire_wait(path, rcu_walk);
else {
+ const struct path this = { .mnt = path->mnt, .dentry = expiring };
/*
* If we are racing with expire the request might not
* be quite complete, but the directory has been removed
* so it must have been successful, just wait for it.
*/
- autofs4_expire_wait(expiring, 0);
+ autofs4_expire_wait(&this, 0);
autofs4_del_expiring(expiring);
dput(expiring);
}
@@ -354,7 +356,7 @@
* and the directory was removed, so just go ahead and try
* the mount.
*/
- status = do_expire_wait(dentry, 0);
+ status = do_expire_wait(path, 0);
if (status && status != -EAGAIN)
return NULL;
@@ -362,7 +364,7 @@
spin_lock(&sbi->fs_lock);
if (ino->flags & AUTOFS_INF_PENDING) {
spin_unlock(&sbi->fs_lock);
- status = autofs4_mount_wait(dentry, 0);
+ status = autofs4_mount_wait(path, 0);
if (status)
return ERR_PTR(status);
goto done;
@@ -370,28 +372,28 @@
/*
* If the dentry is a symlink it's equivalent to a directory
- * having d_mountpoint() true, so there's no need to call back
- * to the daemon.
+ * having path_is_mountpoint() true, so there's no need to call
+ * back to the daemon.
*/
if (d_really_is_positive(dentry) && d_is_symlink(dentry)) {
spin_unlock(&sbi->fs_lock);
goto done;
}
- if (!d_mountpoint(dentry)) {
+ if (!path_is_mountpoint(path)) {
/*
* It's possible that user space hasn't removed directories
* after umounting a rootless multi-mount, although it
- * should. For v5 have_submounts() is sufficient to handle
- * this because the leaves of the directory tree under the
- * mount never trigger mounts themselves (they have an autofs
- * trigger mount mounted on them). But v4 pseudo direct mounts
- * do need the leaves to trigger mounts. In this case we
- * have no choice but to use the list_empty() check and
+ * should. For v5 path_has_submounts() is sufficient to
+ * handle this because the leaves of the directory tree under
+ * the mount never trigger mounts themselves (they have an
+ * autofs trigger mount mounted on them). But v4 pseudo direct
+ * mounts do need the leaves to trigger mounts. In this case
+ * we have no choice but to use the list_empty() check and
* require user space behave.
*/
if (sbi->version > 4) {
- if (have_submounts(dentry)) {
+ if (path_has_submounts(path)) {
spin_unlock(&sbi->fs_lock);
goto done;
}
@@ -403,7 +405,7 @@
}
ino->flags |= AUTOFS_INF_PENDING;
spin_unlock(&sbi->fs_lock);
- status = autofs4_mount_wait(dentry, 0);
+ status = autofs4_mount_wait(path, 0);
spin_lock(&sbi->fs_lock);
ino->flags &= ~AUTOFS_INF_PENDING;
if (status) {
@@ -421,8 +423,9 @@
return NULL;
}
-static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
+static int autofs4_d_manage(const struct path *path, bool rcu_walk)
{
+ struct dentry *dentry = path->dentry;
struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
struct autofs_info *ino = autofs4_dentry_ino(dentry);
int status;
@@ -431,20 +434,20 @@
/* The daemon never waits. */
if (autofs4_oz_mode(sbi)) {
- if (!d_mountpoint(dentry))
+ if (!path_is_mountpoint(path))
return -EISDIR;
return 0;
}
/* Wait for pending expires */
- if (do_expire_wait(dentry, rcu_walk) == -ECHILD)
+ if (do_expire_wait(path, rcu_walk) == -ECHILD)
return -ECHILD;
/*
* This dentry may be under construction so wait on mount
* completion.
*/
- status = autofs4_mount_wait(dentry, rcu_walk);
+ status = autofs4_mount_wait(path, rcu_walk);
if (status)
return status;
@@ -460,7 +463,7 @@
if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
return 0;
- if (d_mountpoint(dentry))
+ if (path_is_mountpoint(path))
return 0;
inode = d_inode_rcu(dentry);
if (inode && S_ISLNK(inode->i_mode))
@@ -487,7 +490,7 @@
* we can avoid needless calls ->d_automount() and avoid
* an incorrect ELOOP error return.
*/
- if ((!d_mountpoint(dentry) && !simple_empty(dentry)) ||
+ if ((!path_is_mountpoint(path) && !simple_empty(dentry)) ||
(d_really_is_positive(dentry) && d_is_symlink(dentry)))
status = -EISDIR;
}
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index e44271d..1278335 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -250,8 +250,9 @@
static int validate_request(struct autofs_wait_queue **wait,
struct autofs_sb_info *sbi,
const struct qstr *qstr,
- struct dentry *dentry, enum autofs_notify notify)
+ const struct path *path, enum autofs_notify notify)
{
+ struct dentry *dentry = path->dentry;
struct autofs_wait_queue *wq;
struct autofs_info *ino;
@@ -314,6 +315,7 @@
*/
if (notify == NFY_MOUNT) {
struct dentry *new = NULL;
+ struct path this;
int valid = 1;
/*
@@ -333,7 +335,9 @@
dentry = new;
}
}
- if (have_submounts(dentry))
+ this.mnt = path->mnt;
+ this.dentry = dentry;
+ if (path_has_submounts(&this))
valid = 0;
if (new)
@@ -345,8 +349,9 @@
}
int autofs4_wait(struct autofs_sb_info *sbi,
- struct dentry *dentry, enum autofs_notify notify)
+ const struct path *path, enum autofs_notify notify)
{
+ struct dentry *dentry = path->dentry;
struct autofs_wait_queue *wq;
struct qstr qstr;
char *name;
@@ -405,7 +410,7 @@
return -EINTR;
}
- ret = validate_request(&wq, sbi, &qstr, dentry, notify);
+ ret = validate_request(&wq, sbi, &qstr, path, notify);
if (ret <= 0) {
if (ret != -EINTR)
mutex_unlock(&sbi->wq_mutex);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 50bcfb8..6a82371 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3232,9 +3232,6 @@
size_t num_pages, loff_t pos, size_t write_bytes,
struct extent_state **cached);
int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
-ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out,
- size_t len, unsigned int flags);
int btrfs_clone_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out, u64 len);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 448f57d..b5c5da2 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -3033,7 +3033,6 @@
#ifdef CONFIG_COMPAT
.compat_ioctl = btrfs_compat_ioctl,
#endif
- .copy_file_range = btrfs_copy_file_range,
.clone_file_range = btrfs_clone_file_range,
.dedupe_file_range = btrfs_dedupe_file_range,
};
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0a69025..33f967d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -834,7 +834,7 @@
* sys_mkdirat and vfs_mkdir, but we only do a single component lookup
* inside this filesystem so it's quite a bit simpler.
*/
-static noinline int btrfs_mksubvol(struct path *parent,
+static noinline int btrfs_mksubvol(const struct path *parent,
char *name, int namelen,
struct btrfs_root *snap_src,
u64 *async_transid, bool readonly,
@@ -3987,18 +3987,6 @@
return ret;
}
-ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out,
- size_t len, unsigned int flags)
-{
- ssize_t ret;
-
- ret = btrfs_clone_files(file_out, file_in, pos_in, len, pos_out);
- if (ret == 0)
- ret = len;
- return ret;
-}
-
int btrfs_clone_file_range(struct file *src_file, loff_t off,
struct file *dst_file, loff_t destoff, u64 len)
{
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index a0f1e2b..9cd0c0e 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1317,25 +1317,27 @@
struct page *page, void *fsdata)
{
struct inode *inode = file_inode(file);
- unsigned from = pos & (PAGE_SIZE - 1);
int check_cap = 0;
dout("write_end file %p inode %p page %p %d~%d (%d)\n", file,
inode, page, (int)pos, (int)copied, (int)len);
/* zero the stale part of the page if we did a short copy */
- if (copied < len)
- zero_user_segment(page, from+copied, len);
+ if (!PageUptodate(page)) {
+ if (copied < len) {
+ copied = 0;
+ goto out;
+ }
+ SetPageUptodate(page);
+ }
/* did file size increase? */
if (pos+copied > i_size_read(inode))
check_cap = ceph_inode_set_size(inode, pos+copied);
- if (!PageUptodate(page))
- SetPageUptodate(page);
-
set_page_dirty(page);
+out:
unlock_page(page);
put_page(page);
diff --git a/fs/dcache.c b/fs/dcache.c
index 5c7cc95..2523783 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1273,38 +1273,44 @@
goto again;
}
-/*
- * Search for at least 1 mount point in the dentry's subdirs.
- * We descend to the next level whenever the d_subdirs
- * list is non-empty and continue searching.
- */
+struct check_mount {
+ struct vfsmount *mnt;
+ unsigned int mounted;
+};
-static enum d_walk_ret check_mount(void *data, struct dentry *dentry)
+static enum d_walk_ret path_check_mount(void *data, struct dentry *dentry)
{
- int *ret = data;
- if (d_mountpoint(dentry)) {
- *ret = 1;
+ struct check_mount *info = data;
+ struct path path = { .mnt = info->mnt, .dentry = dentry };
+
+ if (likely(!d_mountpoint(dentry)))
+ return D_WALK_CONTINUE;
+ if (__path_is_mountpoint(&path)) {
+ info->mounted = 1;
return D_WALK_QUIT;
}
return D_WALK_CONTINUE;
}
/**
- * have_submounts - check for mounts over a dentry
- * @parent: dentry to check.
+ * path_has_submounts - check for mounts over a dentry in the
+ * current namespace.
+ * @parent: path to check.
*
* Return true if the parent or its subdirectories contain
- * a mount point
+ * a mount point in the current namespace.
*/
-int have_submounts(struct dentry *parent)
+int path_has_submounts(const struct path *parent)
{
- int ret = 0;
+ struct check_mount data = { .mnt = parent->mnt, .mounted = 0 };
- d_walk(parent, &ret, check_mount, NULL);
+ read_seqlock_excl(&mount_lock);
+ d_walk(parent->dentry, &data, path_check_mount, NULL);
+ read_sequnlock_excl(&mount_lock);
- return ret;
+ return data.mounted;
}
-EXPORT_SYMBOL(have_submounts);
+EXPORT_SYMBOL(path_has_submounts);
/*
* Called by mount code to set a mountpoint and check if the mountpoint is
diff --git a/fs/dcookies.c b/fs/dcookies.c
index ac44a69..a26a701 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -90,7 +90,7 @@
}
-static struct dcookie_struct *alloc_dcookie(struct path *path)
+static struct dcookie_struct *alloc_dcookie(const struct path *path)
{
struct dcookie_struct *dcs = kmem_cache_alloc(dcookie_cache,
GFP_KERNEL);
@@ -113,7 +113,7 @@
/* This is the main kernel-side routine that retrieves the cookie
* value for a dentry/vfsmnt pair.
*/
-int get_dcookie(struct path *path, unsigned long *cookie)
+int get_dcookie(const struct path *path, unsigned long *cookie)
{
int err = 0;
struct dcookie_struct * dcs;
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index d8072bc..0ac6281 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -870,46 +870,31 @@
page = *pagep;
if (page == NULL) {
- ret = simple_write_begin(file, mapping, pos, len, flags, pagep,
- fsdata);
- if (ret) {
- EXOFS_DBGMSG("simple_write_begin failed\n");
- goto out;
+ page = grab_cache_page_write_begin(mapping, pos >> PAGE_SHIFT,
+ flags);
+ if (!page) {
+ EXOFS_DBGMSG("grab_cache_page_write_begin failed\n");
+ return -ENOMEM;
}
-
- page = *pagep;
+ *pagep = page;
}
/* read modify write */
if (!PageUptodate(page) && (len != PAGE_SIZE)) {
loff_t i_size = i_size_read(mapping->host);
pgoff_t end_index = i_size >> PAGE_SHIFT;
- size_t rlen;
- if (page->index < end_index)
- rlen = PAGE_SIZE;
- else if (page->index == end_index)
- rlen = i_size & ~PAGE_MASK;
- else
- rlen = 0;
-
- if (!rlen) {
+ if (page->index > end_index) {
clear_highpage(page);
SetPageUptodate(page);
- goto out;
- }
-
- ret = _readpage(page, true);
- if (ret) {
- /*SetPageError was done by _readpage. Is it ok?*/
- unlock_page(page);
- EXOFS_DBGMSG("__readpage failed\n");
+ } else {
+ ret = _readpage(page, true);
+ if (ret) {
+ unlock_page(page);
+ EXOFS_DBGMSG("__readpage failed\n");
+ }
}
}
-out:
- if (unlikely(ret))
- _write_failed(mapping->host, pos + len);
-
return ret;
}
@@ -929,18 +914,25 @@
struct page *page, void *fsdata)
{
struct inode *inode = mapping->host;
- /* According to comment in simple_write_end i_mutex is held */
- loff_t i_size = inode->i_size;
- int ret;
+ loff_t last_pos = pos + copied;
- ret = simple_write_end(file, mapping,pos, len, copied, page, fsdata);
- if (unlikely(ret))
- _write_failed(inode, pos + len);
-
- /* TODO: once simple_write_end marks inode dirty remove */
- if (i_size != inode->i_size)
+ if (!PageUptodate(page)) {
+ if (copied < len) {
+ _write_failed(inode, pos + len);
+ copied = 0;
+ goto out;
+ }
+ SetPageUptodate(page);
+ }
+ if (last_pos > inode->i_size) {
+ i_size_write(inode, last_pos);
mark_inode_dirty(inode);
- return ret;
+ }
+ set_page_dirty(page);
+out:
+ unlock_page(page);
+ put_page(page);
+ return copied;
}
static int exofs_releasepage(struct page *page, gfp_t gfp)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index dfc8309..63a6b63 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1205,7 +1205,7 @@
static int ext4_mark_dquot_dirty(struct dquot *dquot);
static int ext4_write_info(struct super_block *sb, int type);
static int ext4_quota_on(struct super_block *sb, int type, int format_id,
- struct path *path);
+ const struct path *path);
static int ext4_quota_off(struct super_block *sb, int type);
static int ext4_quota_on_mount(struct super_block *sb, int type);
static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
@@ -5293,7 +5293,7 @@
* Standard function to be called on quota_on
*/
static int ext4_quota_on(struct super_block *sb, int type, int format_id,
- struct path *path)
+ const struct path *path)
{
int err;
diff --git a/fs/file_table.c b/fs/file_table.c
index ad17e05..6d982b5 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -155,7 +155,7 @@
* @mode: the mode with which the new file will be opened
* @fop: the 'struct file_operations' for the new file
*/
-struct file *alloc_file(struct path *path, fmode_t mode,
+struct file *alloc_file(const struct path *path, fmode_t mode,
const struct file_operations *fop)
{
struct file *file;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 5a6f52e..6b039d7 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -839,12 +839,10 @@
BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode)));
kaddr = kmap_atomic(page);
memcpy(buf + pos, kaddr + pos, copied);
- memset(kaddr + pos + copied, 0, len - copied);
flush_dcache_page(page);
kunmap_atomic(kaddr);
- if (!PageUptodate(page))
- SetPageUptodate(page);
+ WARN_ON(!PageUptodate(page));
unlock_page(page);
put_page(page);
diff --git a/fs/internal.h b/fs/internal.h
index 4fcf517..b63cf3a 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -62,7 +62,7 @@
extern void *copy_mount_options(const void __user *);
extern char *copy_mount_string(const void __user *);
-extern struct vfsmount *lookup_mnt(struct path *);
+extern struct vfsmount *lookup_mnt(const struct path *);
extern int finish_automount(struct vfsmount *, struct path *);
extern int sb_prepare_remount_readonly(struct super_block *);
diff --git a/fs/libfs.c b/fs/libfs.c
index 48826d4..7604870 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -465,6 +465,8 @@
* is not called, so a filesystem that actually does store data in .write_inode
* should extend on what's done here with a call to mark_inode_dirty() in the
* case that i_size has changed.
+ *
+ * Use *ONLY* with simple_readpage()
*/
int simple_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
@@ -474,14 +476,14 @@
loff_t last_pos = pos + copied;
/* zero the stale part of the page if we did a short copy */
- if (copied < len) {
- unsigned from = pos & (PAGE_SIZE - 1);
+ if (!PageUptodate(page)) {
+ if (copied < len) {
+ unsigned from = pos & (PAGE_SIZE - 1);
- zero_user(page, from + copied, len - copied);
- }
-
- if (!PageUptodate(page))
+ zero_user(page, from + copied, len - copied);
+ }
SetPageUptodate(page);
+ }
/*
* No need to use i_size_read() here, the i_size
* cannot change under us because we hold the i_mutex.
diff --git a/fs/mount.h b/fs/mount.h
index d2e25d7..2c856fc 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -94,6 +94,12 @@
extern int __legitimize_mnt(struct vfsmount *, unsigned);
extern bool legitimize_mnt(struct vfsmount *, unsigned);
+static inline bool __path_is_mountpoint(const struct path *path)
+{
+ struct mount *m = __lookup_mnt(path->mnt, path->dentry);
+ return m && likely(!(m->mnt.mnt_flags & MNT_SYNC_UMOUNT));
+}
+
extern void __detach_mounts(struct dentry *dentry);
static inline void detach_mounts(struct dentry *dentry)
diff --git a/fs/namei.c b/fs/namei.c
index 2b55ea1..1c372de 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1200,7 +1200,7 @@
if (managed & DCACHE_MANAGE_TRANSIT) {
BUG_ON(!path->dentry->d_op);
BUG_ON(!path->dentry->d_op->d_manage);
- ret = path->dentry->d_op->d_manage(path->dentry, false);
+ ret = path->dentry->d_op->d_manage(path, false);
if (ret < 0)
break;
}
@@ -1263,10 +1263,10 @@
}
EXPORT_SYMBOL(follow_down_one);
-static inline int managed_dentry_rcu(struct dentry *dentry)
+static inline int managed_dentry_rcu(const struct path *path)
{
- return (dentry->d_flags & DCACHE_MANAGE_TRANSIT) ?
- dentry->d_op->d_manage(dentry, true) : 0;
+ return (path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) ?
+ path->dentry->d_op->d_manage(path, true) : 0;
}
/*
@@ -1282,7 +1282,7 @@
* Don't forget we might have a non-mountpoint managed dentry
* that wants to block transit.
*/
- switch (managed_dentry_rcu(path->dentry)) {
+ switch (managed_dentry_rcu(path)) {
case -ECHILD:
default:
return false;
@@ -1392,8 +1392,7 @@
if (managed & DCACHE_MANAGE_TRANSIT) {
BUG_ON(!path->dentry->d_op);
BUG_ON(!path->dentry->d_op->d_manage);
- ret = path->dentry->d_op->d_manage(
- path->dentry, false);
+ ret = path->dentry->d_op->d_manage(path, false);
if (ret < 0)
return ret == -EISDIR ? 0 : ret;
}
@@ -2863,7 +2862,7 @@
!(path->mnt->mnt_sb->s_iflags & SB_I_NODEV);
}
-static int may_open(struct path *path, int acc_mode, int flag)
+static int may_open(const struct path *path, int acc_mode, int flag)
{
struct dentry *dentry = path->dentry;
struct inode *inode = dentry->d_inode;
@@ -2913,7 +2912,7 @@
static int handle_truncate(struct file *filp)
{
- struct path *path = &filp->f_path;
+ const struct path *path = &filp->f_path;
struct inode *inode = path->dentry->d_inode;
int error = get_write_access(inode);
if (error)
diff --git a/fs/namespace.c b/fs/namespace.c
index e6c234b..f7e28f8 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -678,7 +678,7 @@
*
* lookup_mnt takes a reference to the found vfsmount.
*/
-struct vfsmount *lookup_mnt(struct path *path)
+struct vfsmount *lookup_mnt(const struct path *path)
{
struct mount *child_mnt;
struct vfsmount *m;
@@ -1159,7 +1159,36 @@
}
EXPORT_SYMBOL(mntget);
-struct vfsmount *mnt_clone_internal(struct path *path)
+/* path_is_mountpoint() - Check if path is a mount in the current
+ * namespace.
+ *
+ * d_mountpoint() can only be used reliably to establish if a dentry is
+ * not mounted in any namespace and that common case is handled inline.
+ * d_mountpoint() isn't aware of the possibility there may be multiple
+ * mounts using a given dentry in a different namespace. This function
+ * checks if the passed in path is a mountpoint rather than the dentry
+ * alone.
+ */
+bool path_is_mountpoint(const struct path *path)
+{
+ unsigned seq;
+ bool res;
+
+ if (!d_mountpoint(path->dentry))
+ return false;
+
+ rcu_read_lock();
+ do {
+ seq = read_seqbegin(&mount_lock);
+ res = __path_is_mountpoint(path);
+ } while (read_seqretry(&mount_lock, seq));
+ rcu_read_unlock();
+
+ return res;
+}
+EXPORT_SYMBOL(path_is_mountpoint);
+
+struct vfsmount *mnt_clone_internal(const struct path *path)
{
struct mount *p;
p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE);
@@ -1758,7 +1787,7 @@
/* Caller should check returned pointer for errors */
-struct vfsmount *collect_mounts(struct path *path)
+struct vfsmount *collect_mounts(const struct path *path)
{
struct mount *tree;
namespace_lock();
@@ -1791,7 +1820,7 @@
*
* Release with mntput().
*/
-struct vfsmount *clone_private_mount(struct path *path)
+struct vfsmount *clone_private_mount(const struct path *path)
{
struct mount *old_mnt = real_mount(path->mnt);
struct mount *new_mnt;
@@ -2997,7 +3026,7 @@
return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry);
}
-bool path_is_under(struct path *path1, struct path *path2)
+bool path_is_under(const struct path *path1, const struct path *path2)
{
bool res;
read_seqlock_excl(&mount_lock);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 64c11f39..55208b9 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -377,7 +377,7 @@
*/
if (!PageUptodate(page)) {
unsigned pglen = nfs_page_length(page);
- unsigned end = offset + len;
+ unsigned end = offset + copied;
if (pglen == 0) {
zero_user_segments(page, 0, offset,
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 6faaf71..5a4ec30 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -85,7 +85,7 @@
struct inode *inode,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
- u32 mask, void *data, int data_type,
+ u32 mask, const void *data, int data_type,
const unsigned char *file_name, u32 cookie)
{
struct dnotify_mark *dn_mark;
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index e0e5f7c..bbc175d 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -90,10 +90,10 @@
static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmnt_mark,
u32 event_mask,
- void *data, int data_type)
+ const void *data, int data_type)
{
__u32 marks_mask, marks_ignored_mask;
- struct path *path = data;
+ const struct path *path = data;
pr_debug("%s: inode_mark=%p vfsmnt_mark=%p mask=%x data=%p"
" data_type=%d\n", __func__, inode_mark, vfsmnt_mark,
@@ -140,7 +140,7 @@
}
struct fanotify_event_info *fanotify_alloc_event(struct inode *inode, u32 mask,
- struct path *path)
+ const struct path *path)
{
struct fanotify_event_info *event;
@@ -177,7 +177,7 @@
struct inode *inode,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *fanotify_mark,
- u32 mask, void *data, int data_type,
+ u32 mask, const void *data, int data_type,
const unsigned char *file_name, u32 cookie)
{
int ret = 0;
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 2a5fb14..4500a74 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -47,4 +47,4 @@
}
struct fanotify_event_info *fanotify_alloc_event(struct inode *inode, u32 mask,
- struct path *path);
+ const struct path *path);
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index db39de2..b41515d 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -86,7 +86,7 @@
}
/* Notify this dentry's parent about a child's events. */
-int __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
+int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask)
{
struct dentry *parent;
struct inode *p_inode;
@@ -125,7 +125,7 @@
static int send_to_group(struct inode *to_tell,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
- __u32 mask, void *data,
+ __u32 mask, const void *data,
int data_is, u32 cookie,
const unsigned char *file_name)
{
@@ -187,7 +187,7 @@
* out to all of the registered fsnotify_group. Those groups can then use the
* notification event in whatever means they feel necessary.
*/
-int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
+int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
const unsigned char *file_name, u32 cookie)
{
struct hlist_node *inode_node = NULL, *vfsmount_node = NULL;
@@ -199,7 +199,7 @@
__u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
if (data_is == FSNOTIFY_EVENT_PATH)
- mnt = real_mount(((struct path *)data)->mnt);
+ mnt = real_mount(((const struct path *)data)->mnt);
else
mnt = NULL;
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
index ed855ef..a6f5907 100644
--- a/fs/notify/inotify/inotify.h
+++ b/fs/notify/inotify/inotify.h
@@ -26,7 +26,7 @@
struct inode *inode,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
- u32 mask, void *data, int data_type,
+ u32 mask, const void *data, int data_type,
const unsigned char *file_name, u32 cookie);
extern const struct fsnotify_ops inotify_fsnotify_ops;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 2cd900c..19e7ec1 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -66,7 +66,7 @@
struct inode *inode,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
- u32 mask, void *data, int data_type,
+ u32 mask, const void *data, int data_type,
const unsigned char *file_name, u32 cookie)
{
struct inotify_inode_mark *i_mark;
@@ -80,7 +80,7 @@
if ((inode_mark->mask & FS_EXCL_UNLINK) &&
(data_type == FSNOTIFY_EVENT_PATH)) {
- struct path *path = data;
+ const struct path *path = data;
if (d_unlinked(path->dentry))
return 0;
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index f72712f..d4ec0d8 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5194,7 +5194,7 @@
rec = &el->l_recs[index];
if (new_flags && (rec->e_flags & new_flags)) {
mlog(ML_ERROR, "Owner %llu tried to set %d flags on an "
- "extent that already had them",
+ "extent that already had them\n",
(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
new_flags);
goto out;
@@ -5202,7 +5202,7 @@
if (clear_flags && !(rec->e_flags & clear_flags)) {
mlog(ML_ERROR, "Owner %llu tried to clear %d flags on an "
- "extent that didn't have them",
+ "extent that didn't have them\n",
(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
clear_flags);
goto out;
@@ -5713,8 +5713,7 @@
struct ocfs2_refcount_tree *ref_tree = NULL;
if ((flags & OCFS2_EXT_REFCOUNTED) && len) {
- BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
- OCFS2_HAS_REFCOUNT_FL));
+ BUG_ON(!ocfs2_is_refcount_inode(inode));
if (!refcount_tree_locked) {
ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 4d9c6f5..11556b7 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -464,6 +464,15 @@
trace_ocfs2_bmap((unsigned long long)OCFS2_I(inode)->ip_blkno,
(unsigned long long)block);
+ /*
+ * The swap code (ab-)uses ->bmap to get a block mapping and then
+ * bypasseѕ the file system for actual I/O. We really can't allow
+ * that on refcounted inodes, so we have to skip out here. And yes,
+ * 0 is the magic code for a bmap error..
+ */
+ if (ocfs2_is_refcount_inode(inode))
+ return 0;
+
/* We don't need to lock journal system files, since they aren't
* accessed concurrently from multiple nodes.
*/
@@ -2253,10 +2262,10 @@
return ret;
}
-static void ocfs2_dio_end_io_write(struct inode *inode,
- struct ocfs2_dio_write_ctxt *dwc,
- loff_t offset,
- ssize_t bytes)
+static int ocfs2_dio_end_io_write(struct inode *inode,
+ struct ocfs2_dio_write_ctxt *dwc,
+ loff_t offset,
+ ssize_t bytes)
{
struct ocfs2_cached_dealloc_ctxt dealloc;
struct ocfs2_extent_tree et;
@@ -2307,7 +2316,7 @@
mlog_errno(ret);
}
- di = (struct ocfs2_dinode *)di_bh;
+ di = (struct ocfs2_dinode *)di_bh->b_data;
ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
@@ -2364,6 +2373,8 @@
if (locked)
inode_unlock(inode);
ocfs2_dio_free_write_ctx(inode, dwc);
+
+ return ret;
}
/*
@@ -2378,21 +2389,19 @@
{
struct inode *inode = file_inode(iocb->ki_filp);
int level;
-
- if (bytes <= 0)
- return 0;
+ int ret = 0;
/* this io's submitter should not have unlocked this before we could */
BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
- if (private)
- ocfs2_dio_end_io_write(inode, private, offset, bytes);
+ if (bytes > 0 && private)
+ ret = ocfs2_dio_end_io_write(inode, private, offset, bytes);
ocfs2_iocb_clear_rw_locked(iocb);
level = ocfs2_iocb_rw_locked_level(iocb);
ocfs2_rw_unlock(inode, level);
- return 0;
+ return ret;
}
static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 000c234..c488965 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1030,7 +1030,7 @@
* Only quota files call this without a bh, and they can't be
* refcounted.
*/
- BUG_ON(!di_bh && (oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+ BUG_ON(!di_bh && ocfs2_is_refcount_inode(inode));
BUG_ON(!di_bh && !(oi->ip_flags & OCFS2_INODE_SYSTEM_FILE));
clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size);
@@ -1667,9 +1667,9 @@
*done = ret;
}
-static int ocfs2_remove_inode_range(struct inode *inode,
- struct buffer_head *di_bh, u64 byte_start,
- u64 byte_len)
+int ocfs2_remove_inode_range(struct inode *inode,
+ struct buffer_head *di_bh, u64 byte_start,
+ u64 byte_len)
{
int ret = 0, flags = 0, done = 0, i;
u32 trunc_start, trunc_len, trunc_end, trunc_cpos, phys_cpos;
@@ -1719,8 +1719,7 @@
* within one cluster(means is not exactly aligned to clustersize).
*/
- if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) {
-
+ if (ocfs2_is_refcount_inode(inode)) {
ret = ocfs2_cow_file_pos(inode, di_bh, byte_start);
if (ret) {
mlog_errno(ret);
@@ -2036,7 +2035,7 @@
struct super_block *sb = inode->i_sb;
if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)) ||
- !(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) ||
+ !ocfs2_is_refcount_inode(inode) ||
OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
return 0;
@@ -2440,6 +2439,31 @@
return offset;
}
+static int ocfs2_file_clone_range(struct file *file_in,
+ loff_t pos_in,
+ struct file *file_out,
+ loff_t pos_out,
+ u64 len)
+{
+ return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out,
+ len, false);
+}
+
+static ssize_t ocfs2_file_dedupe_range(struct file *src_file,
+ u64 loff,
+ u64 len,
+ struct file *dst_file,
+ u64 dst_loff)
+{
+ int error;
+
+ error = ocfs2_reflink_remap_range(src_file, loff, dst_file, dst_loff,
+ len, true);
+ if (error)
+ return error;
+ return len;
+}
+
const struct inode_operations ocfs2_file_iops = {
.setattr = ocfs2_setattr,
.getattr = ocfs2_getattr,
@@ -2479,6 +2503,8 @@
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = ocfs2_fallocate,
+ .clone_file_range = ocfs2_file_clone_range,
+ .dedupe_file_range = ocfs2_file_dedupe_range,
};
const struct file_operations ocfs2_dops = {
@@ -2524,6 +2550,8 @@
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = ocfs2_fallocate,
+ .clone_file_range = ocfs2_file_clone_range,
+ .dedupe_file_range = ocfs2_file_dedupe_range,
};
const struct file_operations ocfs2_dops_no_plocks = {
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index e8c62f2..897fd9a 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -82,4 +82,7 @@
int ocfs2_check_range_for_refcount(struct inode *inode, loff_t pos,
size_t count);
+int ocfs2_remove_inode_range(struct inode *inode,
+ struct buffer_head *di_bh, u64 byte_start,
+ u64 byte_len);
#endif /* OCFS2_FILE_H */
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 5af68fc..9b955f7 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -181,4 +181,10 @@
return container_of(ci, struct ocfs2_inode_info, ip_metadata_cache);
}
+/* Does this inode have the reflink flag set? */
+static inline bool ocfs2_is_refcount_inode(struct inode *inode)
+{
+ return (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
+}
+
#endif /* OCFS2_INODE_H */
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 4e8f32eb..e52a285 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -235,10 +235,7 @@
u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) {
-
- BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
- OCFS2_HAS_REFCOUNT_FL));
-
+ BUG_ON(!ocfs2_is_refcount_inode(inode));
BUG_ON(!context->refcount_loc);
ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1,
@@ -581,10 +578,7 @@
phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
if ((ext_flags & OCFS2_EXT_REFCOUNTED) && len) {
-
- BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
- OCFS2_HAS_REFCOUNT_FL));
-
+ BUG_ON(!ocfs2_is_refcount_inode(inode));
BUG_ON(!context->refcount_loc);
ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1,
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 738b4ea..d171d2c 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -34,6 +34,7 @@
#include "xattr.h"
#include "namei.h"
#include "ocfs2_trace.h"
+#include "file.h"
#include <linux/bio.h>
#include <linux/blkdev.h>
@@ -410,7 +411,7 @@
goto out;
}
- BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+ BUG_ON(!ocfs2_is_refcount_inode(inode));
di = (struct ocfs2_dinode *)di_bh->b_data;
*ref_blkno = le64_to_cpu(di->i_refcount_loc);
@@ -569,7 +570,7 @@
u32 num_got;
u64 suballoc_loc, first_blkno;
- BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
+ BUG_ON(ocfs2_is_refcount_inode(inode));
trace_ocfs2_create_refcount_tree(
(unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -707,7 +708,7 @@
struct ocfs2_refcount_block *rb;
struct ocfs2_refcount_tree *ref_tree;
- BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
+ BUG_ON(ocfs2_is_refcount_inode(inode));
ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
&ref_tree, &ref_root_bh);
@@ -774,7 +775,7 @@
u64 blk = 0, bg_blkno = 0, ref_blkno = le64_to_cpu(di->i_refcount_loc);
u16 bit = 0;
- if (!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL))
+ if (!ocfs2_is_refcount_inode(inode))
return 0;
BUG_ON(!ref_blkno);
@@ -2298,11 +2299,10 @@
{
int ret;
u64 ref_blkno;
- struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct buffer_head *ref_root_bh = NULL;
struct ocfs2_refcount_tree *tree;
- BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+ BUG_ON(!ocfs2_is_refcount_inode(inode));
ret = ocfs2_get_refcount_block(inode, &ref_blkno);
if (ret) {
@@ -2532,7 +2532,6 @@
int *ref_blocks)
{
int ret;
- struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct buffer_head *ref_root_bh = NULL;
struct ocfs2_refcount_tree *tree;
u64 start_cpos = ocfs2_blocks_to_clusters(inode->i_sb, phys_blkno);
@@ -2543,7 +2542,7 @@
goto out;
}
- BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+ BUG_ON(!ocfs2_is_refcount_inode(inode));
ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb),
refcount_loc, &tree);
@@ -3411,14 +3410,13 @@
{
int ret;
u32 cow_start = 0, cow_len = 0;
- struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
struct buffer_head *ref_root_bh = NULL;
struct ocfs2_refcount_tree *ref_tree;
struct ocfs2_cow_context *context = NULL;
- BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+ BUG_ON(!ocfs2_is_refcount_inode(inode));
ret = ocfs2_refcount_cal_cow_clusters(inode, &di->id2.i_list,
cpos, write_len, max_cpos,
@@ -3628,11 +3626,10 @@
{
int ret;
struct ocfs2_xattr_value_root *xv = vb->vb_xv;
- struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct ocfs2_cow_context *context = NULL;
u32 cow_start, cow_len;
- BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+ BUG_ON(!ocfs2_is_refcount_inode(inode));
ret = ocfs2_refcount_cal_cow_clusters(inode, &xv->xr_list,
cpos, write_len, UINT_MAX,
@@ -3695,6 +3692,9 @@
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_alloc_context *meta_ac = NULL;
+ /* We need to be able to handle at least an extent tree split. */
+ ref_blocks = ocfs2_extend_meta_needed(data_et->et_root_el);
+
ret = ocfs2_calc_refcount_meta_credits(inode->i_sb,
ref_ci, ref_root_bh,
p_cluster, num_clusters,
@@ -3806,7 +3806,7 @@
ocfs2_init_dealloc_ctxt(&dealloc);
- if (!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)) {
+ if (!ocfs2_is_refcount_inode(inode)) {
ret = ocfs2_create_refcount_tree(inode, di_bh);
if (ret) {
mlog_errno(ret);
@@ -3933,6 +3933,13 @@
ret = ocfs2_increase_refcount(handle, ref_ci, ref_root_bh,
p_cluster, num_clusters,
meta_ac, dealloc);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_commit;
+ }
+
+ ret = dquot_alloc_space_nodirty(inode,
+ ocfs2_clusters_to_bytes(osb->sb, num_clusters));
if (ret)
mlog_errno(ret);
@@ -4441,3 +4448,434 @@
return error;
}
+
+/* Update destination inode size, if necessary. */
+static int ocfs2_reflink_update_dest(struct inode *dest,
+ struct buffer_head *d_bh,
+ loff_t newlen)
+{
+ handle_t *handle;
+ int ret;
+
+ dest->i_blocks = ocfs2_inode_sector_count(dest);
+
+ if (newlen <= i_size_read(dest))
+ return 0;
+
+ handle = ocfs2_start_trans(OCFS2_SB(dest->i_sb),
+ OCFS2_INODE_UPDATE_CREDITS);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ mlog_errno(ret);
+ return ret;
+ }
+
+ /* Extend i_size if needed. */
+ spin_lock(&OCFS2_I(dest)->ip_lock);
+ if (newlen > i_size_read(dest))
+ i_size_write(dest, newlen);
+ spin_unlock(&OCFS2_I(dest)->ip_lock);
+ dest->i_ctime = dest->i_mtime = current_time(dest);
+
+ ret = ocfs2_mark_inode_dirty(handle, dest, d_bh);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_commit;
+ }
+
+out_commit:
+ ocfs2_commit_trans(OCFS2_SB(dest->i_sb), handle);
+ return ret;
+}
+
+/* Remap the range pos_in:len in s_inode to pos_out:len in t_inode. */
+static int ocfs2_reflink_remap_extent(struct inode *s_inode,
+ struct buffer_head *s_bh,
+ loff_t pos_in,
+ struct inode *t_inode,
+ struct buffer_head *t_bh,
+ loff_t pos_out,
+ loff_t len,
+ struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+ struct ocfs2_extent_tree s_et;
+ struct ocfs2_extent_tree t_et;
+ struct ocfs2_dinode *dis;
+ struct buffer_head *ref_root_bh = NULL;
+ struct ocfs2_refcount_tree *ref_tree;
+ struct ocfs2_super *osb;
+ loff_t pstart, plen;
+ u32 p_cluster, num_clusters, slast, spos, tpos;
+ unsigned int ext_flags;
+ int ret = 0;
+
+ osb = OCFS2_SB(s_inode->i_sb);
+ dis = (struct ocfs2_dinode *)s_bh->b_data;
+ ocfs2_init_dinode_extent_tree(&s_et, INODE_CACHE(s_inode), s_bh);
+ ocfs2_init_dinode_extent_tree(&t_et, INODE_CACHE(t_inode), t_bh);
+
+ spos = ocfs2_bytes_to_clusters(s_inode->i_sb, pos_in);
+ tpos = ocfs2_bytes_to_clusters(t_inode->i_sb, pos_out);
+ slast = ocfs2_clusters_for_bytes(s_inode->i_sb, pos_in + len);
+
+ while (spos < slast) {
+ if (fatal_signal_pending(current)) {
+ ret = -EINTR;
+ goto out;
+ }
+
+ /* Look up the extent. */
+ ret = ocfs2_get_clusters(s_inode, spos, &p_cluster,
+ &num_clusters, &ext_flags);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ num_clusters = min_t(u32, num_clusters, slast - spos);
+
+ /* Punch out the dest range. */
+ pstart = ocfs2_clusters_to_bytes(t_inode->i_sb, tpos);
+ plen = ocfs2_clusters_to_bytes(t_inode->i_sb, num_clusters);
+ ret = ocfs2_remove_inode_range(t_inode, t_bh, pstart, plen);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ if (p_cluster == 0)
+ goto next_loop;
+
+ /* Lock the refcount btree... */
+ ret = ocfs2_lock_refcount_tree(osb,
+ le64_to_cpu(dis->i_refcount_loc),
+ 1, &ref_tree, &ref_root_bh);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ /* Mark s_inode's extent as refcounted. */
+ if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) {
+ ret = ocfs2_add_refcount_flag(s_inode, &s_et,
+ &ref_tree->rf_ci,
+ ref_root_bh, spos,
+ p_cluster, num_clusters,
+ dealloc, NULL);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_unlock_refcount;
+ }
+ }
+
+ /* Map in the new extent. */
+ ext_flags |= OCFS2_EXT_REFCOUNTED;
+ ret = ocfs2_add_refcounted_extent(t_inode, &t_et,
+ &ref_tree->rf_ci,
+ ref_root_bh,
+ tpos, p_cluster,
+ num_clusters,
+ ext_flags,
+ dealloc);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_unlock_refcount;
+ }
+
+ ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
+ brelse(ref_root_bh);
+next_loop:
+ spos += num_clusters;
+ tpos += num_clusters;
+ }
+
+out:
+ return ret;
+out_unlock_refcount:
+ ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
+ brelse(ref_root_bh);
+ return ret;
+}
+
+/* Set up refcount tree and remap s_inode to t_inode. */
+static int ocfs2_reflink_remap_blocks(struct inode *s_inode,
+ struct buffer_head *s_bh,
+ loff_t pos_in,
+ struct inode *t_inode,
+ struct buffer_head *t_bh,
+ loff_t pos_out,
+ loff_t len)
+{
+ struct ocfs2_cached_dealloc_ctxt dealloc;
+ struct ocfs2_super *osb;
+ struct ocfs2_dinode *dis;
+ struct ocfs2_dinode *dit;
+ int ret;
+
+ osb = OCFS2_SB(s_inode->i_sb);
+ dis = (struct ocfs2_dinode *)s_bh->b_data;
+ dit = (struct ocfs2_dinode *)t_bh->b_data;
+ ocfs2_init_dealloc_ctxt(&dealloc);
+
+ /*
+ * If we're reflinking the entire file and the source is inline
+ * data, just copy the contents.
+ */
+ if (pos_in == pos_out && pos_in == 0 && len == i_size_read(s_inode) &&
+ i_size_read(t_inode) <= len &&
+ (OCFS2_I(s_inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)) {
+ ret = ocfs2_duplicate_inline_data(s_inode, s_bh, t_inode, t_bh);
+ if (ret)
+ mlog_errno(ret);
+ goto out;
+ }
+
+ /*
+ * If both inodes belong to two different refcount groups then
+ * forget it because we don't know how (or want) to go merging
+ * refcount trees.
+ */
+ ret = -EOPNOTSUPP;
+ if (ocfs2_is_refcount_inode(s_inode) &&
+ ocfs2_is_refcount_inode(t_inode) &&
+ le64_to_cpu(dis->i_refcount_loc) !=
+ le64_to_cpu(dit->i_refcount_loc))
+ goto out;
+
+ /* Neither inode has a refcount tree. Add one to s_inode. */
+ if (!ocfs2_is_refcount_inode(s_inode) &&
+ !ocfs2_is_refcount_inode(t_inode)) {
+ ret = ocfs2_create_refcount_tree(s_inode, s_bh);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+
+ /* Ensure that both inodes end up with the same refcount tree. */
+ if (!ocfs2_is_refcount_inode(s_inode)) {
+ ret = ocfs2_set_refcount_tree(s_inode, s_bh,
+ le64_to_cpu(dit->i_refcount_loc));
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+ if (!ocfs2_is_refcount_inode(t_inode)) {
+ ret = ocfs2_set_refcount_tree(t_inode, t_bh,
+ le64_to_cpu(dis->i_refcount_loc));
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+
+ /* Turn off inline data in the dest file. */
+ if (OCFS2_I(t_inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
+ ret = ocfs2_convert_inline_data_to_extents(t_inode, t_bh);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+
+ /* Actually remap extents now. */
+ ret = ocfs2_reflink_remap_extent(s_inode, s_bh, pos_in, t_inode, t_bh,
+ pos_out, len, &dealloc);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+out:
+ if (ocfs2_dealloc_has_cluster(&dealloc)) {
+ ocfs2_schedule_truncate_log_flush(osb, 1);
+ ocfs2_run_deallocs(osb, &dealloc);
+ }
+
+ return ret;
+}
+
+/* Lock an inode and grab a bh pointing to the inode. */
+static int ocfs2_reflink_inodes_lock(struct inode *s_inode,
+ struct buffer_head **bh1,
+ struct inode *t_inode,
+ struct buffer_head **bh2)
+{
+ struct inode *inode1;
+ struct inode *inode2;
+ struct ocfs2_inode_info *oi1;
+ struct ocfs2_inode_info *oi2;
+ bool same_inode = (s_inode == t_inode);
+ int status;
+
+ /* First grab the VFS and rw locks. */
+ lock_two_nondirectories(s_inode, t_inode);
+ inode1 = s_inode;
+ inode2 = t_inode;
+ if (inode1->i_ino > inode2->i_ino)
+ swap(inode1, inode2);
+
+ status = ocfs2_rw_lock(inode1, 1);
+ if (status) {
+ mlog_errno(status);
+ goto out_i1;
+ }
+ if (!same_inode) {
+ status = ocfs2_rw_lock(inode2, 1);
+ if (status) {
+ mlog_errno(status);
+ goto out_i2;
+ }
+ }
+
+ /* Now go for the cluster locks */
+ oi1 = OCFS2_I(inode1);
+ oi2 = OCFS2_I(inode2);
+
+ trace_ocfs2_double_lock((unsigned long long)oi1->ip_blkno,
+ (unsigned long long)oi2->ip_blkno);
+
+ if (*bh1)
+ *bh1 = NULL;
+ if (*bh2)
+ *bh2 = NULL;
+
+ /* We always want to lock the one with the lower lockid first. */
+ if (oi1->ip_blkno > oi2->ip_blkno)
+ mlog_errno(-ENOLCK);
+
+ /* lock id1 */
+ status = ocfs2_inode_lock_nested(inode1, bh1, 1, OI_LS_REFLINK_TARGET);
+ if (status < 0) {
+ if (status != -ENOENT)
+ mlog_errno(status);
+ goto out_rw2;
+ }
+
+ /* lock id2 */
+ if (!same_inode) {
+ status = ocfs2_inode_lock_nested(inode2, bh2, 1,
+ OI_LS_REFLINK_TARGET);
+ if (status < 0) {
+ if (status != -ENOENT)
+ mlog_errno(status);
+ goto out_cl1;
+ }
+ } else
+ *bh2 = *bh1;
+
+ trace_ocfs2_double_lock_end(
+ (unsigned long long)OCFS2_I(inode1)->ip_blkno,
+ (unsigned long long)OCFS2_I(inode2)->ip_blkno);
+
+ return 0;
+
+out_cl1:
+ ocfs2_inode_unlock(inode1, 1);
+ brelse(*bh1);
+ *bh1 = NULL;
+out_rw2:
+ ocfs2_rw_unlock(inode2, 1);
+out_i2:
+ ocfs2_rw_unlock(inode1, 1);
+out_i1:
+ unlock_two_nondirectories(s_inode, t_inode);
+ return status;
+}
+
+/* Unlock both inodes and release buffers. */
+static void ocfs2_reflink_inodes_unlock(struct inode *s_inode,
+ struct buffer_head *s_bh,
+ struct inode *t_inode,
+ struct buffer_head *t_bh)
+{
+ ocfs2_inode_unlock(s_inode, 1);
+ ocfs2_rw_unlock(s_inode, 1);
+ brelse(s_bh);
+ if (s_inode != t_inode) {
+ ocfs2_inode_unlock(t_inode, 1);
+ ocfs2_rw_unlock(t_inode, 1);
+ brelse(t_bh);
+ }
+ unlock_two_nondirectories(s_inode, t_inode);
+}
+
+/* Link a range of blocks from one file to another. */
+int ocfs2_reflink_remap_range(struct file *file_in,
+ loff_t pos_in,
+ struct file *file_out,
+ loff_t pos_out,
+ u64 len,
+ bool is_dedupe)
+{
+ struct inode *inode_in = file_inode(file_in);
+ struct inode *inode_out = file_inode(file_out);
+ struct ocfs2_super *osb = OCFS2_SB(inode_in->i_sb);
+ struct buffer_head *in_bh = NULL, *out_bh = NULL;
+ bool same_inode = (inode_in == inode_out);
+ ssize_t ret;
+
+ if (!ocfs2_refcount_tree(osb))
+ return -EOPNOTSUPP;
+ if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
+ return -EROFS;
+
+ /* Lock both files against IO */
+ ret = ocfs2_reflink_inodes_lock(inode_in, &in_bh, inode_out, &out_bh);
+ if (ret)
+ return ret;
+
+ /* Check file eligibility and prepare for block sharing. */
+ ret = -EINVAL;
+ if ((OCFS2_I(inode_in)->ip_flags & OCFS2_INODE_SYSTEM_FILE) ||
+ (OCFS2_I(inode_out)->ip_flags & OCFS2_INODE_SYSTEM_FILE))
+ goto out_unlock;
+
+ ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out,
+ &len, is_dedupe);
+ if (ret || len == 0)
+ goto out_unlock;
+
+ /* Lock out changes to the allocation maps and remap. */
+ down_write(&OCFS2_I(inode_in)->ip_alloc_sem);
+ if (!same_inode)
+ down_write_nested(&OCFS2_I(inode_out)->ip_alloc_sem,
+ SINGLE_DEPTH_NESTING);
+
+ ret = ocfs2_reflink_remap_blocks(inode_in, in_bh, pos_in, inode_out,
+ out_bh, pos_out, len);
+
+ /* Zap any page cache for the destination file's range. */
+ if (!ret)
+ truncate_inode_pages_range(&inode_out->i_data, pos_out,
+ PAGE_ALIGN(pos_out + len) - 1);
+
+ up_write(&OCFS2_I(inode_in)->ip_alloc_sem);
+ if (!same_inode)
+ up_write(&OCFS2_I(inode_out)->ip_alloc_sem);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_unlock;
+ }
+
+ /*
+ * Empty the extent map so that we may get the right extent
+ * record from the disk.
+ */
+ ocfs2_extent_map_trunc(inode_in, 0);
+ ocfs2_extent_map_trunc(inode_out, 0);
+
+ ret = ocfs2_reflink_update_dest(inode_out, out_bh, pos_out + len);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_unlock;
+ }
+
+ ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh);
+ return 0;
+
+out_unlock:
+ ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh);
+ return ret;
+}
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index 6422bbc..4af55bf 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -115,4 +115,11 @@
const char __user *oldname,
const char __user *newname,
bool preserve);
+int ocfs2_reflink_remap_range(struct file *file_in,
+ loff_t pos_in,
+ struct file *file_out,
+ loff_t pos_out,
+ u64 len,
+ bool is_dedupe);
+
#endif /* OCFS2_REFCOUNTTREE_H */
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index cb157a3..3c5384d 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -2577,7 +2577,7 @@
if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
return 0;
- if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) {
+ if (ocfs2_is_refcount_inode(inode)) {
ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb),
le64_to_cpu(di->i_refcount_loc),
1, &ref_tree, &ref_root_bh);
@@ -3608,7 +3608,7 @@
}
/* Check whether the value is refcounted and do some preparation. */
- if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL &&
+ if (ocfs2_is_refcount_inode(inode) &&
(!xis.not_found || !xbs.not_found)) {
ret = ocfs2_prepare_refcount_xattr(inode, di, &xi,
&xis, &xbs, &ref_tree,
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 1bfac28..8738a0d 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2401,7 +2401,7 @@
EXPORT_SYMBOL(dquot_resume);
int dquot_quota_on(struct super_block *sb, int type, int format_id,
- struct path *path)
+ const struct path *path)
{
int error = security_quota_on(path->dentry);
if (error)
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 2d44542..5acd0c47 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -80,7 +80,7 @@
}
static int quota_quotaon(struct super_block *sb, int type, qid_t id,
- struct path *path)
+ const struct path *path)
{
if (!sb->s_qcop->quota_on && !sb->s_qcop->quota_enable)
return -ENOSYS;
@@ -700,7 +700,7 @@
/* Copy parameters and call proper function */
static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
- void __user *addr, struct path *path)
+ void __user *addr, const struct path *path)
{
int ret;
diff --git a/fs/read_write.c b/fs/read_write.c
index 53bccd1..da6de12 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1540,20 +1540,37 @@
sb_start_write(inode_out->i_sb);
- ret = -EOPNOTSUPP;
- if (file_out->f_op->copy_file_range)
+ /*
+ * Try cloning first, this is supported by more file systems, and
+ * more efficient if both clone and copy are supported (e.g. NFS).
+ */
+ if (file_in->f_op->clone_file_range) {
+ ret = file_in->f_op->clone_file_range(file_in, pos_in,
+ file_out, pos_out, len);
+ if (ret == 0) {
+ ret = len;
+ goto done;
+ }
+ }
+
+ if (file_out->f_op->copy_file_range) {
ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out,
pos_out, len, flags);
- if (ret == -EOPNOTSUPP)
- ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out,
- len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
+ if (ret != -EOPNOTSUPP)
+ goto done;
+ }
+ ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out,
+ len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
+
+done:
if (ret > 0) {
fsnotify_access(file_in);
add_rchar(current, ret);
fsnotify_modify(file_out);
add_wchar(current, ret);
}
+
inc_syscr(current);
inc_syscw(current);
@@ -1648,6 +1665,114 @@
return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
}
+/*
+ * Check that the two inodes are eligible for cloning, the ranges make
+ * sense, and then flush all dirty data. Caller must ensure that the
+ * inodes have been locked against any other modifications.
+ */
+int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
+ struct inode *inode_out, loff_t pos_out,
+ u64 *len, bool is_dedupe)
+{
+ loff_t bs = inode_out->i_sb->s_blocksize;
+ loff_t blen;
+ loff_t isize;
+ bool same_inode = (inode_in == inode_out);
+ int ret;
+
+ /* Don't touch certain kinds of inodes */
+ if (IS_IMMUTABLE(inode_out))
+ return -EPERM;
+
+ if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
+ return -ETXTBSY;
+
+ /* Don't reflink dirs, pipes, sockets... */
+ if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
+ return -EISDIR;
+ if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
+ return -EINVAL;
+
+ /* Are we going all the way to the end? */
+ isize = i_size_read(inode_in);
+ if (isize == 0) {
+ *len = 0;
+ return 0;
+ }
+
+ /* Zero length dedupe exits immediately; reflink goes to EOF. */
+ if (*len == 0) {
+ if (is_dedupe) {
+ *len = 0;
+ return 0;
+ }
+ *len = isize - pos_in;
+ }
+
+ /* Ensure offsets don't wrap and the input is inside i_size */
+ if (pos_in + *len < pos_in || pos_out + *len < pos_out ||
+ pos_in + *len > isize)
+ return -EINVAL;
+
+ /* Don't allow dedupe past EOF in the dest file */
+ if (is_dedupe) {
+ loff_t disize;
+
+ disize = i_size_read(inode_out);
+ if (pos_out >= disize || pos_out + *len > disize)
+ return -EINVAL;
+ }
+
+ /* If we're linking to EOF, continue to the block boundary. */
+ if (pos_in + *len == isize)
+ blen = ALIGN(isize, bs) - pos_in;
+ else
+ blen = *len;
+
+ /* Only reflink if we're aligned to block boundaries */
+ if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
+ !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
+ return -EINVAL;
+
+ /* Don't allow overlapped reflink within the same file */
+ if (same_inode) {
+ if (pos_out + blen > pos_in && pos_out < pos_in + blen)
+ return -EINVAL;
+ }
+
+ /* Wait for the completion of any pending IOs on both files */
+ inode_dio_wait(inode_in);
+ if (!same_inode)
+ inode_dio_wait(inode_out);
+
+ ret = filemap_write_and_wait_range(inode_in->i_mapping,
+ pos_in, pos_in + *len - 1);
+ if (ret)
+ return ret;
+
+ ret = filemap_write_and_wait_range(inode_out->i_mapping,
+ pos_out, pos_out + *len - 1);
+ if (ret)
+ return ret;
+
+ /*
+ * Check that the extents are the same.
+ */
+ if (is_dedupe) {
+ bool is_same = false;
+
+ ret = vfs_dedupe_file_range_compare(inode_in, pos_in,
+ inode_out, pos_out, *len, &is_same);
+ if (ret)
+ return ret;
+ if (!is_same)
+ return -EBADE;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(vfs_clone_file_prep_inodes);
+
int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out, u64 len)
{
@@ -1698,6 +1823,102 @@
}
EXPORT_SYMBOL(vfs_clone_file_range);
+/*
+ * Read a page's worth of file data into the page cache. Return the page
+ * locked.
+ */
+static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
+{
+ struct address_space *mapping;
+ struct page *page;
+ pgoff_t n;
+
+ n = offset >> PAGE_SHIFT;
+ mapping = inode->i_mapping;
+ page = read_mapping_page(mapping, n, NULL);
+ if (IS_ERR(page))
+ return page;
+ if (!PageUptodate(page)) {
+ put_page(page);
+ return ERR_PTR(-EIO);
+ }
+ lock_page(page);
+ return page;
+}
+
+/*
+ * Compare extents of two files to see if they are the same.
+ * Caller must have locked both inodes to prevent write races.
+ */
+int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
+ struct inode *dest, loff_t destoff,
+ loff_t len, bool *is_same)
+{
+ loff_t src_poff;
+ loff_t dest_poff;
+ void *src_addr;
+ void *dest_addr;
+ struct page *src_page;
+ struct page *dest_page;
+ loff_t cmp_len;
+ bool same;
+ int error;
+
+ error = -EINVAL;
+ same = true;
+ while (len) {
+ src_poff = srcoff & (PAGE_SIZE - 1);
+ dest_poff = destoff & (PAGE_SIZE - 1);
+ cmp_len = min(PAGE_SIZE - src_poff,
+ PAGE_SIZE - dest_poff);
+ cmp_len = min(cmp_len, len);
+ if (cmp_len <= 0)
+ goto out_error;
+
+ src_page = vfs_dedupe_get_page(src, srcoff);
+ if (IS_ERR(src_page)) {
+ error = PTR_ERR(src_page);
+ goto out_error;
+ }
+ dest_page = vfs_dedupe_get_page(dest, destoff);
+ if (IS_ERR(dest_page)) {
+ error = PTR_ERR(dest_page);
+ unlock_page(src_page);
+ put_page(src_page);
+ goto out_error;
+ }
+ src_addr = kmap_atomic(src_page);
+ dest_addr = kmap_atomic(dest_page);
+
+ flush_dcache_page(src_page);
+ flush_dcache_page(dest_page);
+
+ if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
+ same = false;
+
+ kunmap_atomic(dest_addr);
+ kunmap_atomic(src_addr);
+ unlock_page(dest_page);
+ unlock_page(src_page);
+ put_page(dest_page);
+ put_page(src_page);
+
+ if (!same)
+ break;
+
+ srcoff += cmp_len;
+ destoff += cmp_len;
+ len -= cmp_len;
+ }
+
+ *is_same = same;
+ return 0;
+
+out_error:
+ return error;
+}
+EXPORT_SYMBOL(vfs_dedupe_file_range_compare);
+
int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
{
struct file_dedupe_range_info *info;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 0a6ad4e..e314cb3 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -802,7 +802,7 @@
static int reiserfs_release_dquot(struct dquot *);
static int reiserfs_mark_dquot_dirty(struct dquot *);
static int reiserfs_write_info(struct super_block *, int);
-static int reiserfs_quota_on(struct super_block *, int, int, struct path *);
+static int reiserfs_quota_on(struct super_block *, int, int, const struct path *);
static const struct dquot_operations reiserfs_quota_operations = {
.write_dquot = reiserfs_write_dquot,
@@ -2348,7 +2348,7 @@
* Standard function to be called on quota_on
*/
static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
- struct path *path)
+ const struct path *path)
{
int err;
struct inode *inode;
diff --git a/fs/statfs.c b/fs/statfs.c
index 083dc0a..13ae259 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -63,7 +63,7 @@
return retval;
}
-int vfs_statfs(struct path *path, struct kstatfs *buf)
+int vfs_statfs(const struct path *path, struct kstatfs *buf)
{
int error;
diff --git a/fs/utimes.c b/fs/utimes.c
index 22307cd..5fdb505 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -48,7 +48,7 @@
return nsec >= 0 && nsec <= 999999999;
}
-static int utimes_common(struct path *path, struct timespec *times)
+static int utimes_common(const struct path *path, struct timespec *times)
{
int error;
struct iattr newattrs;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 65d27a5..bbb9eb6 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -848,24 +848,6 @@
return error;
}
-STATIC ssize_t
-xfs_file_copy_range(
- struct file *file_in,
- loff_t pos_in,
- struct file *file_out,
- loff_t pos_out,
- size_t len,
- unsigned int flags)
-{
- int error;
-
- error = xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
- len, false);
- if (error)
- return error;
- return len;
-}
-
STATIC int
xfs_file_clone_range(
struct file *file_in,
@@ -1549,7 +1531,6 @@
.fsync = xfs_file_fsync,
.get_unmapped_area = thp_get_unmapped_area,
.fallocate = xfs_file_fallocate,
- .copy_file_range = xfs_file_copy_range,
.clone_file_range = xfs_file_clone_range,
.dedupe_file_range = xfs_file_dedupe_range,
};
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 88fd03c..aca2d4b 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1114,111 +1114,6 @@
}
/*
- * Read a page's worth of file data into the page cache. Return the page
- * locked.
- */
-static struct page *
-xfs_get_page(
- struct inode *inode,
- xfs_off_t offset)
-{
- struct address_space *mapping;
- struct page *page;
- pgoff_t n;
-
- n = offset >> PAGE_SHIFT;
- mapping = inode->i_mapping;
- page = read_mapping_page(mapping, n, NULL);
- if (IS_ERR(page))
- return page;
- if (!PageUptodate(page)) {
- put_page(page);
- return ERR_PTR(-EIO);
- }
- lock_page(page);
- return page;
-}
-
-/*
- * Compare extents of two files to see if they are the same.
- */
-static int
-xfs_compare_extents(
- struct inode *src,
- xfs_off_t srcoff,
- struct inode *dest,
- xfs_off_t destoff,
- xfs_off_t len,
- bool *is_same)
-{
- xfs_off_t src_poff;
- xfs_off_t dest_poff;
- void *src_addr;
- void *dest_addr;
- struct page *src_page;
- struct page *dest_page;
- xfs_off_t cmp_len;
- bool same;
- int error;
-
- error = -EINVAL;
- same = true;
- while (len) {
- src_poff = srcoff & (PAGE_SIZE - 1);
- dest_poff = destoff & (PAGE_SIZE - 1);
- cmp_len = min(PAGE_SIZE - src_poff,
- PAGE_SIZE - dest_poff);
- cmp_len = min(cmp_len, len);
- ASSERT(cmp_len > 0);
-
- trace_xfs_reflink_compare_extents(XFS_I(src), srcoff, cmp_len,
- XFS_I(dest), destoff);
-
- src_page = xfs_get_page(src, srcoff);
- if (IS_ERR(src_page)) {
- error = PTR_ERR(src_page);
- goto out_error;
- }
- dest_page = xfs_get_page(dest, destoff);
- if (IS_ERR(dest_page)) {
- error = PTR_ERR(dest_page);
- unlock_page(src_page);
- put_page(src_page);
- goto out_error;
- }
- src_addr = kmap_atomic(src_page);
- dest_addr = kmap_atomic(dest_page);
-
- flush_dcache_page(src_page);
- flush_dcache_page(dest_page);
-
- if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
- same = false;
-
- kunmap_atomic(dest_addr);
- kunmap_atomic(src_addr);
- unlock_page(dest_page);
- unlock_page(src_page);
- put_page(dest_page);
- put_page(src_page);
-
- if (!same)
- break;
-
- srcoff += cmp_len;
- destoff += cmp_len;
- len -= cmp_len;
- }
-
- *is_same = same;
- return 0;
-
-out_error:
- trace_xfs_reflink_compare_extents_error(XFS_I(dest), error, _RET_IP_);
- return error;
-}
-
-/*
* Link a range of blocks from one file to another.
*/
int
@@ -1235,14 +1130,11 @@
struct inode *inode_out = file_inode(file_out);
struct xfs_inode *dest = XFS_I(inode_out);
struct xfs_mount *mp = src->i_mount;
- loff_t bs = inode_out->i_sb->s_blocksize;
bool same_inode = (inode_in == inode_out);
xfs_fileoff_t sfsbno, dfsbno;
xfs_filblks_t fsblen;
xfs_extlen_t cowextsize;
- loff_t isize;
ssize_t ret;
- loff_t blen;
if (!xfs_sb_version_hasreflink(&mp->m_sb))
return -EOPNOTSUPP;
@@ -1257,26 +1149,8 @@
else
xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL);
- /* Don't touch certain kinds of inodes */
- ret = -EPERM;
- if (IS_IMMUTABLE(inode_out))
- goto out_unlock;
-
- ret = -ETXTBSY;
- if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
- goto out_unlock;
-
-
- /* Don't reflink dirs, pipes, sockets... */
- ret = -EISDIR;
- if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
- goto out_unlock;
+ /* Check file eligibility and prepare for block sharing. */
ret = -EINVAL;
- if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode))
- goto out_unlock;
- if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
- goto out_unlock;
-
/* Don't reflink realtime inodes */
if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
goto out_unlock;
@@ -1285,97 +1159,18 @@
if (IS_DAX(inode_in) || IS_DAX(inode_out))
goto out_unlock;
- /* Are we going all the way to the end? */
- isize = i_size_read(inode_in);
- if (isize == 0) {
- ret = 0;
- goto out_unlock;
- }
-
- /* Zero length dedupe exits immediately; reflink goes to EOF. */
- if (len == 0) {
- if (is_dedupe) {
- ret = 0;
- goto out_unlock;
- }
- len = isize - pos_in;
- }
-
- /* Ensure offsets don't wrap and the input is inside i_size */
- if (pos_in + len < pos_in || pos_out + len < pos_out ||
- pos_in + len > isize)
- goto out_unlock;
-
- /* Don't allow dedupe past EOF in the dest file */
- if (is_dedupe) {
- loff_t disize;
-
- disize = i_size_read(inode_out);
- if (pos_out >= disize || pos_out + len > disize)
- goto out_unlock;
- }
-
- /* If we're linking to EOF, continue to the block boundary. */
- if (pos_in + len == isize)
- blen = ALIGN(isize, bs) - pos_in;
- else
- blen = len;
-
- /* Only reflink if we're aligned to block boundaries */
- if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
- !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
- goto out_unlock;
-
- /* Don't allow overlapped reflink within the same file */
- if (same_inode) {
- if (pos_out + blen > pos_in && pos_out < pos_in + blen)
- goto out_unlock;
- }
-
- /* Wait for the completion of any pending IOs on both files */
- inode_dio_wait(inode_in);
- if (!same_inode)
- inode_dio_wait(inode_out);
-
- ret = filemap_write_and_wait_range(inode_in->i_mapping,
- pos_in, pos_in + len - 1);
- if (ret)
- goto out_unlock;
-
- ret = filemap_write_and_wait_range(inode_out->i_mapping,
- pos_out, pos_out + len - 1);
- if (ret)
+ ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out,
+ &len, is_dedupe);
+ if (ret || len == 0)
goto out_unlock;
trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
- /*
- * Check that the extents are the same.
- */
- if (is_dedupe) {
- bool is_same = false;
-
- ret = xfs_compare_extents(inode_in, pos_in, inode_out, pos_out,
- len, &is_same);
- if (ret)
- goto out_unlock;
- if (!is_same) {
- ret = -EBADE;
- goto out_unlock;
- }
- }
-
+ /* Set flags and remap blocks. */
ret = xfs_reflink_set_inode_flag(src, dest);
if (ret)
goto out_unlock;
- /*
- * Invalidate the page cache so that we can clear any CoW mappings
- * in the destination file.
- */
- truncate_inode_pages_range(&inode_out->i_data, pos_out,
- PAGE_ALIGN(pos_out + len) - 1);
-
dfsbno = XFS_B_TO_FSBT(mp, pos_out);
sfsbno = XFS_B_TO_FSBT(mp, pos_in);
fsblen = XFS_B_TO_FSB(mp, len);
@@ -1384,6 +1179,10 @@
if (ret)
goto out_unlock;
+ /* Zap any page cache for the destination file's range. */
+ truncate_inode_pages_range(&inode_out->i_data, pos_out,
+ PAGE_ALIGN(pos_out + len) - 1);
+
/*
* Carry the cowextsize hint from src to dest if we're sharing the
* entire source file to the entire destination file, the source file
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 9d4443f..f51fca8d 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -147,7 +147,7 @@
extern void audit_log_key(struct audit_buffer *ab,
char *key);
extern void audit_log_link_denied(const char *operation,
- struct path *link);
+ const struct path *link);
extern void audit_log_lost(const char *message);
#ifdef CONFIG_SECURITY
extern void audit_log_secctx(struct audit_buffer *ab, u32 secid);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 5beed7b..c965e44 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -139,7 +139,7 @@
void (*d_iput)(struct dentry *, struct inode *);
char *(*d_dname)(struct dentry *, char *, int);
struct vfsmount *(*d_automount)(struct path *);
- int (*d_manage)(struct dentry *, bool);
+ int (*d_manage)(const struct path *, bool);
struct dentry *(*d_real)(struct dentry *, const struct inode *,
unsigned int);
} ____cacheline_aligned;
@@ -254,7 +254,7 @@
extern void d_prune_aliases(struct inode *);
/* test whether we have any submounts in a subdir tree */
-extern int have_submounts(struct dentry *);
+extern int path_has_submounts(const struct path *);
/*
* This adds the entry to the hash queues.
diff --git a/include/linux/dcookies.h b/include/linux/dcookies.h
index 5ac3bdd..699b6c4 100644
--- a/include/linux/dcookies.h
+++ b/include/linux/dcookies.h
@@ -44,7 +44,7 @@
*
* Returns 0 on success, with *cookie filled in
*/
-int get_dcookie(struct path *path, unsigned long *cookie);
+int get_dcookie(const struct path *path, unsigned long *cookie);
#else
@@ -58,7 +58,7 @@
return;
}
-static inline int get_dcookie(struct path *path, unsigned long *cookie)
+static inline int get_dcookie(const struct path *path, unsigned long *cookie)
{
return -ENOSYS;
}
diff --git a/include/linux/file.h b/include/linux/file.h
index 7444f5f..61eb82c 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -17,7 +17,7 @@
struct vfsmount;
struct dentry;
struct path;
-extern struct file *alloc_file(struct path *, fmode_t mode,
+extern struct file *alloc_file(const struct path *, fmode_t mode,
const struct file_operations *fop);
static inline void fput_light(struct file *file, int fput_needed)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 83de8b6..3587896 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1726,8 +1726,14 @@
unsigned long, loff_t *, int);
extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
loff_t, size_t, unsigned int);
+extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
+ struct inode *inode_out, loff_t pos_out,
+ u64 *len, bool is_dedupe);
extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out, u64 len);
+extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
+ struct inode *dest, loff_t destoff,
+ loff_t len, bool *is_same);
extern int vfs_dedupe_file_range(struct file *file,
struct file_dedupe_range *same);
@@ -2084,11 +2090,11 @@
extern int may_umount(struct vfsmount *);
extern long do_mount(const char *, const char __user *,
const char *, unsigned long, void *);
-extern struct vfsmount *collect_mounts(struct path *);
+extern struct vfsmount *collect_mounts(const struct path *);
extern void drop_collected_mounts(struct vfsmount *);
extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
struct vfsmount *);
-extern int vfs_statfs(struct path *, struct kstatfs *);
+extern int vfs_statfs(const struct path *, struct kstatfs *);
extern int user_statfs(const char __user *, struct kstatfs *);
extern int fd_statfs(int, struct kstatfs *);
extern int vfs_ustat(dev_t, struct kstatfs *);
@@ -2657,7 +2663,7 @@
/* fs/dcache.c -- generic fs support functions */
extern bool is_subdir(struct dentry *, struct dentry *);
-extern bool path_is_under(struct path *, struct path *);
+extern bool path_is_under(const struct path *, const struct path *);
extern char *file_path(struct file *, char *, int);
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index b8bcc05..b43d3f5 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -17,7 +17,7 @@
#include <linux/bug.h>
/* Notify this dentry's parent about a child's events. */
-static inline int fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
+static inline int fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask)
{
if (!dentry)
dentry = path->dentry;
@@ -28,7 +28,7 @@
/* simple call site for access decisions */
static inline int fsnotify_perm(struct file *file, int mask)
{
- struct path *path = &file->f_path;
+ const struct path *path = &file->f_path;
/*
* Do not use file_inode() here or anywhere in this file to get the
* inode. That would break *notity on overlayfs.
@@ -176,7 +176,7 @@
*/
static inline void fsnotify_access(struct file *file)
{
- struct path *path = &file->f_path;
+ const struct path *path = &file->f_path;
struct inode *inode = path->dentry->d_inode;
__u32 mask = FS_ACCESS;
@@ -194,7 +194,7 @@
*/
static inline void fsnotify_modify(struct file *file)
{
- struct path *path = &file->f_path;
+ const struct path *path = &file->f_path;
struct inode *inode = path->dentry->d_inode;
__u32 mask = FS_MODIFY;
@@ -212,7 +212,7 @@
*/
static inline void fsnotify_open(struct file *file)
{
- struct path *path = &file->f_path;
+ const struct path *path = &file->f_path;
struct inode *inode = path->dentry->d_inode;
__u32 mask = FS_OPEN;
@@ -228,7 +228,7 @@
*/
static inline void fsnotify_close(struct file *file)
{
- struct path *path = &file->f_path;
+ const struct path *path = &file->f_path;
struct inode *inode = path->dentry->d_inode;
fmode_t mode = file->f_mode;
__u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE;
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 79467b2..0cf34d6 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -96,7 +96,7 @@
struct inode *inode,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
- u32 mask, void *data, int data_type,
+ u32 mask, const void *data, int data_type,
const unsigned char *file_name, u32 cookie);
void (*free_group_priv)(struct fsnotify_group *group);
void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group);
@@ -245,9 +245,9 @@
/* called from the vfs helpers */
/* main fsnotify call to send events */
-extern int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
+extern int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
const unsigned char *name, u32 cookie);
-extern int __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask);
+extern int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask);
extern void __fsnotify_inode_delete(struct inode *inode);
extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt);
extern u32 fsnotify_get_cookie(void);
@@ -357,13 +357,13 @@
#else
-static inline int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
+static inline int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
const unsigned char *name, u32 cookie)
{
return 0;
}
-static inline int __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
+static inline int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask)
{
return 0;
}
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 1172cce..c6f5515 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -79,12 +79,12 @@
extern void mnt_drop_write_file(struct file *file);
extern void mntput(struct vfsmount *mnt);
extern struct vfsmount *mntget(struct vfsmount *mnt);
-extern struct vfsmount *mnt_clone_internal(struct path *path);
+extern struct vfsmount *mnt_clone_internal(const struct path *path);
extern int __mnt_is_readonly(struct vfsmount *mnt);
extern bool mnt_may_suid(struct vfsmount *mnt);
struct path;
-extern struct vfsmount *clone_private_mount(struct path *path);
+extern struct vfsmount *clone_private_mount(const struct path *path);
struct file_system_type;
extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
@@ -98,4 +98,6 @@
extern unsigned int sysctl_mount_max;
+extern bool path_is_mountpoint(const struct path *path);
+
#endif /* _LINUX_MOUNT_H */
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 55107a8..78a9882 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -431,7 +431,7 @@
/* Operations handling requests from userspace */
struct quotactl_ops {
- int (*quota_on)(struct super_block *, int, int, struct path *);
+ int (*quota_on)(struct super_block *, int, int, const struct path *);
int (*quota_off)(struct super_block *, int);
int (*quota_enable)(struct super_block *, unsigned int);
int (*quota_disable)(struct super_block *, unsigned int);
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index f00fa86..799a63d 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -90,7 +90,7 @@
int dquot_enable(struct inode *inode, int type, int format_id,
unsigned int flags);
int dquot_quota_on(struct super_block *sb, int type, int format_id,
- struct path *path);
+ const struct path *path);
int dquot_quota_on_mount(struct super_block *sb, char *qf_name,
int format_id, int type);
int dquot_quota_off(struct super_block *sb, int type);
diff --git a/kernel/audit.c b/kernel/audit.c
index 91bff3c..6e399bb 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1893,7 +1893,7 @@
* @call_panic: optional pointer to int that will be updated if secid fails
*/
void audit_log_name(struct audit_context *context, struct audit_names *n,
- struct path *path, int record_num, int *call_panic)
+ const struct path *path, int record_num, int *call_panic)
{
struct audit_buffer *ab;
ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH);
@@ -2081,7 +2081,7 @@
* @operation: specific link operation
* @link: the path that triggered the restriction
*/
-void audit_log_link_denied(const char *operation, struct path *link)
+void audit_log_link_denied(const char *operation, const struct path *link)
{
struct audit_buffer *ab;
struct audit_names *name;
diff --git a/kernel/audit.h b/kernel/audit.h
index 431444c..960d49c 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -212,7 +212,7 @@
extern void audit_log_cap(struct audit_buffer *ab, char *prefix,
kernel_cap_t *cap);
extern void audit_log_name(struct audit_context *context,
- struct audit_names *n, struct path *path,
+ struct audit_names *n, const struct path *path,
int record_num, int *call_panic);
extern int audit_pid;
diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
index f751548..7ea57e5 100644
--- a/kernel/audit_fsnotify.c
+++ b/kernel/audit_fsnotify.c
@@ -74,7 +74,7 @@
}
static void audit_update_mark(struct audit_fsnotify_mark *audit_mark,
- struct inode *inode)
+ const struct inode *inode)
{
audit_mark->dev = inode ? inode->i_sb->s_dev : AUDIT_DEV_UNSET;
audit_mark->ino = inode ? inode->i_ino : AUDIT_INO_UNSET;
@@ -167,11 +167,11 @@
struct inode *to_tell,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
- u32 mask, void *data, int data_type,
+ u32 mask, const void *data, int data_type,
const unsigned char *dname, u32 cookie)
{
struct audit_fsnotify_mark *audit_mark;
- struct inode *inode = NULL;
+ const struct inode *inode = NULL;
audit_mark = container_of(inode_mark, struct audit_fsnotify_mark, mark);
@@ -179,10 +179,10 @@
switch (data_type) {
case (FSNOTIFY_EVENT_PATH):
- inode = ((struct path *)data)->dentry->d_inode;
+ inode = ((const struct path *)data)->dentry->d_inode;
break;
case (FSNOTIFY_EVENT_INODE):
- inode = (struct inode *)data;
+ inode = (const struct inode *)data;
break;
default:
BUG();
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 055f11b..8b1dde9 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -947,7 +947,7 @@
struct inode *to_tell,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
- u32 mask, void *data, int data_type,
+ u32 mask, const void *data, int data_type,
const unsigned char *file_name, u32 cookie)
{
return 0;
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 2d7bdcb..f79e465 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -471,10 +471,10 @@
struct inode *to_tell,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
- u32 mask, void *data, int data_type,
+ u32 mask, const void *data, int data_type,
const unsigned char *dname, u32 cookie)
{
- struct inode *inode;
+ const struct inode *inode;
struct audit_parent *parent;
parent = container_of(inode_mark, struct audit_parent, mark);
@@ -483,10 +483,10 @@
switch (data_type) {
case (FSNOTIFY_EVENT_PATH):
- inode = d_backing_inode(((struct path *)data)->dentry);
+ inode = d_backing_inode(((const struct path *)data)->dentry);
break;
case (FSNOTIFY_EVENT_INODE):
- inode = (struct inode *)data;
+ inode = (const struct inode *)data;
break;
default:
BUG();