Merge branch 'master' of /home/src/linux-2.6/
diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 49eafbd..c7e9237 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -92,6 +92,8 @@
an octal number to conform to how chmod(1) works, too. Thanks to
Giuseppe Bilotta and Horst von Brand for pointing out the errors of
my ways.
+ - Fix various bugs in the runlist merging code. (Based on libntfs
+ changes by Richard Russon.)
2.1.23 - Implement extension of resident files and make writing safe as well as
many bug fixes, cleanups, and enhancements...
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index b6cc8cf..5e80c07 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -59,39 +59,49 @@
unsigned long flags;
struct buffer_head *first, *tmp;
struct page *page;
+ struct inode *vi;
ntfs_inode *ni;
int page_uptodate = 1;
page = bh->b_page;
- ni = NTFS_I(page->mapping->host);
+ vi = page->mapping->host;
+ ni = NTFS_I(vi);
if (likely(uptodate)) {
- s64 file_ofs, initialized_size;
+ loff_t i_size;
+ s64 file_ofs, init_size;
set_buffer_uptodate(bh);
file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) +
bh_offset(bh);
read_lock_irqsave(&ni->size_lock, flags);
- initialized_size = ni->initialized_size;
+ init_size = ni->initialized_size;
+ i_size = i_size_read(vi);
read_unlock_irqrestore(&ni->size_lock, flags);
+ if (unlikely(init_size > i_size)) {
+ /* Race with shrinking truncate. */
+ init_size = i_size;
+ }
/* Check for the current buffer head overflowing. */
- if (file_ofs + bh->b_size > initialized_size) {
- char *addr;
- int ofs = 0;
+ if (unlikely(file_ofs + bh->b_size > init_size)) {
+ u8 *kaddr;
+ int ofs;
- if (file_ofs < initialized_size)
- ofs = initialized_size - file_ofs;
- addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
- memset(addr + bh_offset(bh) + ofs, 0, bh->b_size - ofs);
+ ofs = 0;
+ if (file_ofs < init_size)
+ ofs = init_size - file_ofs;
+ kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ);
+ memset(kaddr + bh_offset(bh) + ofs, 0,
+ bh->b_size - ofs);
+ kunmap_atomic(kaddr, KM_BIO_SRC_IRQ);
flush_dcache_page(page);
- kunmap_atomic(addr, KM_BIO_SRC_IRQ);
}
} else {
clear_buffer_uptodate(bh);
SetPageError(page);
- ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.",
- (unsigned long long)bh->b_blocknr);
+ ntfs_error(ni->vol->sb, "Buffer I/O error, logical block "
+ "0x%llx.", (unsigned long long)bh->b_blocknr);
}
first = page_buffers(page);
local_irq_save(flags);
@@ -124,7 +134,7 @@
if (likely(page_uptodate && !PageError(page)))
SetPageUptodate(page);
} else {
- char *addr;
+ u8 *kaddr;
unsigned int i, recs;
u32 rec_size;
@@ -132,12 +142,12 @@
recs = PAGE_CACHE_SIZE / rec_size;
/* Should have been verified before we got here... */
BUG_ON(!recs);
- addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
+ kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ);
for (i = 0; i < recs; i++)
- post_read_mst_fixup((NTFS_RECORD*)(addr +
+ post_read_mst_fixup((NTFS_RECORD*)(kaddr +
i * rec_size), rec_size);
+ kunmap_atomic(kaddr, KM_BIO_SRC_IRQ);
flush_dcache_page(page);
- kunmap_atomic(addr, KM_BIO_SRC_IRQ);
if (likely(page_uptodate && !PageError(page)))
SetPageUptodate(page);
}
@@ -168,8 +178,11 @@
*/
static int ntfs_read_block(struct page *page)
{
+ loff_t i_size;
VCN vcn;
LCN lcn;
+ s64 init_size;
+ struct inode *vi;
ntfs_inode *ni;
ntfs_volume *vol;
runlist_element *rl;
@@ -180,7 +193,8 @@
int i, nr;
unsigned char blocksize_bits;
- ni = NTFS_I(page->mapping->host);
+ vi = page->mapping->host;
+ ni = NTFS_I(vi);
vol = ni->vol;
/* $MFT/$DATA must have its complete runlist in memory at all times. */
@@ -199,11 +213,28 @@
bh = head = page_buffers(page);
BUG_ON(!bh);
+ /*
+ * We may be racing with truncate. To avoid some of the problems we
+ * now take a snapshot of the various sizes and use those for the whole
+ * of the function. In case of an extending truncate it just means we
+ * may leave some buffers unmapped which are now allocated. This is
+ * not a problem since these buffers will just get mapped when a write
+ * occurs. In case of a shrinking truncate, we will detect this later
+ * on due to the runlist being incomplete and if the page is being
+ * fully truncated, truncate will throw it away as soon as we unlock
+ * it so no need to worry what we do with it.
+ */
iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
read_lock_irqsave(&ni->size_lock, flags);
lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits;
- zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits;
+ init_size = ni->initialized_size;
+ i_size = i_size_read(vi);
read_unlock_irqrestore(&ni->size_lock, flags);
+ if (unlikely(init_size > i_size)) {
+ /* Race with shrinking truncate. */
+ init_size = i_size;
+ }
+ zblock = (init_size + blocksize - 1) >> blocksize_bits;
/* Loop through all the buffers in the page. */
rl = NULL;
@@ -366,6 +397,8 @@
*/
static int ntfs_readpage(struct file *file, struct page *page)
{
+ loff_t i_size;
+ struct inode *vi;
ntfs_inode *ni, *base_ni;
u8 *kaddr;
ntfs_attr_search_ctx *ctx;
@@ -384,14 +417,17 @@
unlock_page(page);
return 0;
}
- ni = NTFS_I(page->mapping->host);
+ vi = page->mapping->host;
+ ni = NTFS_I(vi);
/*
* Only $DATA attributes can be encrypted and only unnamed $DATA
* attributes can be compressed. Index root can have the flags set but
* this means to create compressed/encrypted files, not that the
- * attribute is compressed/encrypted.
+ * attribute is compressed/encrypted. Note we need to check for
+ * AT_INDEX_ALLOCATION since this is the type of both directory and
+ * index inodes.
*/
- if (ni->type != AT_INDEX_ROOT) {
+ if (ni->type != AT_INDEX_ALLOCATION) {
/* If attribute is encrypted, deny access, just like NT4. */
if (NInoEncrypted(ni)) {
BUG_ON(ni->type != AT_DATA);
@@ -456,7 +492,12 @@
read_lock_irqsave(&ni->size_lock, flags);
if (unlikely(attr_len > ni->initialized_size))
attr_len = ni->initialized_size;
+ i_size = i_size_read(vi);
read_unlock_irqrestore(&ni->size_lock, flags);
+ if (unlikely(attr_len > i_size)) {
+ /* Race with shrinking truncate. */
+ attr_len = i_size;
+ }
kaddr = kmap_atomic(page, KM_USER0);
/* Copy the data to the page. */
memcpy(kaddr, (u8*)ctx->attr +
@@ -1341,9 +1382,11 @@
* Only $DATA attributes can be encrypted and only unnamed $DATA
* attributes can be compressed. Index root can have the flags set but
* this means to create compressed/encrypted files, not that the
- * attribute is compressed/encrypted.
+ * attribute is compressed/encrypted. Note we need to check for
+ * AT_INDEX_ALLOCATION since this is the type of both directory and
+ * index inodes.
*/
- if (ni->type != AT_INDEX_ROOT) {
+ if (ni->type != AT_INDEX_ALLOCATION) {
/* If file is encrypted, deny access, just like NT4. */
if (NInoEncrypted(ni)) {
unlock_page(page);
@@ -1379,8 +1422,8 @@
unsigned int ofs = i_size & ~PAGE_CACHE_MASK;
kaddr = kmap_atomic(page, KM_USER0);
memset(kaddr + ofs, 0, PAGE_CACHE_SIZE - ofs);
- flush_dcache_page(page);
kunmap_atomic(kaddr, KM_USER0);
+ flush_dcache_page(page);
}
/* Handle mst protected attributes. */
if (NInoMstProtected(ni))
@@ -1443,34 +1486,33 @@
BUG_ON(PageWriteback(page));
set_page_writeback(page);
unlock_page(page);
- /*
- * Here, we do not need to zero the out of bounds area everytime
- * because the below memcpy() already takes care of the
- * mmap-at-end-of-file requirements. If the file is converted to a
- * non-resident one, then the code path use is switched to the
- * non-resident one where the zeroing happens on each ntfs_writepage()
- * invocation.
- */
attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
i_size = i_size_read(vi);
if (unlikely(attr_len > i_size)) {
+ /* Race with shrinking truncate or a failed truncate. */
attr_len = i_size;
- ctx->attr->data.resident.value_length = cpu_to_le32(attr_len);
+ /*
+ * If the truncate failed, fix it up now. If a concurrent
+ * truncate, we do its job, so it does not have to do anything.
+ */
+ err = ntfs_resident_attr_value_resize(ctx->mrec, ctx->attr,
+ attr_len);
+ /* Shrinking cannot fail. */
+ BUG_ON(err);
}
kaddr = kmap_atomic(page, KM_USER0);
/* Copy the data from the page to the mft record. */
memcpy((u8*)ctx->attr +
le16_to_cpu(ctx->attr->data.resident.value_offset),
kaddr, attr_len);
- flush_dcache_mft_record_page(ctx->ntfs_ino);
/* Zero out of bounds area in the page cache page. */
memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
- flush_dcache_page(page);
kunmap_atomic(kaddr, KM_USER0);
-
+ flush_dcache_mft_record_page(ctx->ntfs_ino);
+ flush_dcache_page(page);
+ /* We are done with the page. */
end_page_writeback(page);
-
- /* Mark the mft record dirty, so it gets written back. */
+ /* Finally, mark the mft record dirty, so it gets written back. */
mark_mft_record_dirty(ctx->ntfs_ino);
ntfs_attr_put_search_ctx(ctx);
unmap_mft_record(base_ni);
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index dc4bbe3..7ec0451 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -1166,6 +1166,8 @@
*
* Return 0 on success and -errno on error. In the error case, the inode will
* have had make_bad_inode() executed on it.
+ *
+ * Note this cannot be called for AT_INDEX_ALLOCATION.
*/
static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
{
@@ -1242,8 +1244,8 @@
}
}
/*
- * The encryption flag set in an index root just means to
- * compress all files.
+ * The compressed/sparse flag set in an index root just means
+ * to compress all files.
*/
if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) {
ntfs_error(vi->i_sb, "Found mst protected attribute "
@@ -1319,8 +1321,7 @@
"the mapping pairs array.");
goto unm_err_out;
}
- if ((NInoCompressed(ni) || NInoSparse(ni)) &&
- ni->type != AT_INDEX_ROOT) {
+ if (NInoCompressed(ni) || NInoSparse(ni)) {
if (a->data.non_resident.compression_unit != 4) {
ntfs_error(vi->i_sb, "Found nonstandard "
"compression unit (%u instead "
diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h
index 3288bcc..006946e 100644
--- a/fs/ntfs/malloc.h
+++ b/fs/ntfs/malloc.h
@@ -1,7 +1,7 @@
/*
* malloc.h - NTFS kernel memory handling. Part of the Linux-NTFS project.
*
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2005 Anton Altaparmakov
*
* This program/include file is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published
diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c
index f5b2ac9..e2665d0 100644
--- a/fs/ntfs/runlist.c
+++ b/fs/ntfs/runlist.c
@@ -2,7 +2,7 @@
* runlist.c - NTFS runlist handling code. Part of the Linux-NTFS project.
*
* Copyright (c) 2001-2005 Anton Altaparmakov
- * Copyright (c) 2002 Richard Russon
+ * Copyright (c) 2002-2005 Richard Russon
*
* This program/include file is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published
@@ -214,8 +214,8 @@
static inline runlist_element *ntfs_rl_append(runlist_element *dst,
int dsize, runlist_element *src, int ssize, int loc)
{
- BOOL right;
- int magic;
+ BOOL right; /* Right end of @src needs merging. */
+ int marker; /* End of the inserted runs. */
BUG_ON(!dst);
BUG_ON(!src);
@@ -236,18 +236,19 @@
if (right)
__ntfs_rl_merge(src + ssize - 1, dst + loc + 1);
- magic = loc + ssize;
+ /* First run after the @src runs that have been inserted. */
+ marker = loc + ssize + 1;
/* Move the tail of @dst out of the way, then copy in @src. */
- ntfs_rl_mm(dst, magic + 1, loc + 1 + right, dsize - loc - 1 - right);
+ ntfs_rl_mm(dst, marker, loc + 1 + right, dsize - (loc + 1 + right));
ntfs_rl_mc(dst, loc + 1, src, 0, ssize);
/* Adjust the size of the preceding hole. */
dst[loc].length = dst[loc + 1].vcn - dst[loc].vcn;
/* We may have changed the length of the file, so fix the end marker */
- if (dst[magic + 1].lcn == LCN_ENOENT)
- dst[magic + 1].vcn = dst[magic].vcn + dst[magic].length;
+ if (dst[marker].lcn == LCN_ENOENT)
+ dst[marker].vcn = dst[marker - 1].vcn + dst[marker - 1].length;
return dst;
}
@@ -279,18 +280,17 @@
static inline runlist_element *ntfs_rl_insert(runlist_element *dst,
int dsize, runlist_element *src, int ssize, int loc)
{
- BOOL left = FALSE;
- BOOL disc = FALSE; /* Discontinuity */
- BOOL hole = FALSE; /* Following a hole */
- int magic;
+ BOOL left = FALSE; /* Left end of @src needs merging. */
+ BOOL disc = FALSE; /* Discontinuity between @dst and @src. */
+ int marker; /* End of the inserted runs. */
BUG_ON(!dst);
BUG_ON(!src);
- /* disc => Discontinuity between the end of @dst and the start of @src.
- * This means we might need to insert a hole.
- * hole => @dst ends with a hole or an unmapped region which we can
- * extend to match the discontinuity. */
+ /*
+ * disc => Discontinuity between the end of @dst and the start of @src.
+ * This means we might need to insert a "not mapped" run.
+ */
if (loc == 0)
disc = (src[0].vcn > 0);
else {
@@ -303,58 +303,49 @@
merged_length += src->length;
disc = (src[0].vcn > dst[loc - 1].vcn + merged_length);
- if (disc)
- hole = (dst[loc - 1].lcn == LCN_HOLE);
}
-
- /* Space required: @dst size + @src size, less one if we merged, plus
- * one if there was a discontinuity, less one for a trailing hole. */
- dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left + disc - hole);
+ /*
+ * Space required: @dst size + @src size, less one if we merged, plus
+ * one if there was a discontinuity.
+ */
+ dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left + disc);
if (IS_ERR(dst))
return dst;
/*
* We are guaranteed to succeed from here so can start modifying the
* original runlist.
*/
-
if (left)
__ntfs_rl_merge(dst + loc - 1, src);
-
- magic = loc + ssize - left + disc - hole;
+ /*
+ * First run after the @src runs that have been inserted.
+ * Nominally, @marker equals @loc + @ssize, i.e. location + number of
+ * runs in @src. However, if @left, then the first run in @src has
+ * been merged with one in @dst. And if @disc, then @dst and @src do
+ * not meet and we need an extra run to fill the gap.
+ */
+ marker = loc + ssize - left + disc;
/* Move the tail of @dst out of the way, then copy in @src. */
- ntfs_rl_mm(dst, magic, loc, dsize - loc);
- ntfs_rl_mc(dst, loc + disc - hole, src, left, ssize - left);
+ ntfs_rl_mm(dst, marker, loc, dsize - loc);
+ ntfs_rl_mc(dst, loc + disc, src, left, ssize - left);
- /* Adjust the VCN of the last run ... */
- if (dst[magic].lcn <= LCN_HOLE)
- dst[magic].vcn = dst[magic - 1].vcn + dst[magic - 1].length;
+ /* Adjust the VCN of the first run after the insertion... */
+ dst[marker].vcn = dst[marker - 1].vcn + dst[marker - 1].length;
/* ... and the length. */
- if (dst[magic].lcn == LCN_HOLE || dst[magic].lcn == LCN_RL_NOT_MAPPED)
- dst[magic].length = dst[magic + 1].vcn - dst[magic].vcn;
+ if (dst[marker].lcn == LCN_HOLE || dst[marker].lcn == LCN_RL_NOT_MAPPED)
+ dst[marker].length = dst[marker + 1].vcn - dst[marker].vcn;
- /* Writing beyond the end of the file and there's a discontinuity. */
+ /* Writing beyond the end of the file and there is a discontinuity. */
if (disc) {
- if (hole)
- dst[loc - 1].length = dst[loc].vcn - dst[loc - 1].vcn;
- else {
- if (loc > 0) {
- dst[loc].vcn = dst[loc - 1].vcn +
- dst[loc - 1].length;
- dst[loc].length = dst[loc + 1].vcn -
- dst[loc].vcn;
- } else {
- dst[loc].vcn = 0;
- dst[loc].length = dst[loc + 1].vcn;
- }
- dst[loc].lcn = LCN_RL_NOT_MAPPED;
+ if (loc > 0) {
+ dst[loc].vcn = dst[loc - 1].vcn + dst[loc - 1].length;
+ dst[loc].length = dst[loc + 1].vcn - dst[loc].vcn;
+ } else {
+ dst[loc].vcn = 0;
+ dst[loc].length = dst[loc + 1].vcn;
}
-
- magic += hole;
-
- if (dst[magic].lcn == LCN_ENOENT)
- dst[magic].vcn = dst[magic - 1].vcn +
- dst[magic - 1].length;
+ dst[loc].lcn = LCN_RL_NOT_MAPPED;
}
return dst;
}
@@ -385,9 +376,10 @@
static inline runlist_element *ntfs_rl_replace(runlist_element *dst,
int dsize, runlist_element *src, int ssize, int loc)
{
- BOOL left = FALSE;
- BOOL right;
- int magic;
+ BOOL left = FALSE; /* Left end of @src needs merging. */
+ BOOL right; /* Right end of @src needs merging. */
+ int tail; /* Start of tail of @dst. */
+ int marker; /* End of the inserted runs. */
BUG_ON(!dst);
BUG_ON(!src);
@@ -396,9 +388,10 @@
right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1);
if (loc > 0)
left = ntfs_are_rl_mergeable(dst + loc - 1, src);
-
- /* Allocate some space. We'll need less if the left, right, or both
- * ends were merged. */
+ /*
+ * Allocate some space. We will need less if the left, right, or both
+ * ends were merged.
+ */
dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left - right);
if (IS_ERR(dst))
return dst;
@@ -410,17 +403,28 @@
__ntfs_rl_merge(src + ssize - 1, dst + loc + 1);
if (left)
__ntfs_rl_merge(dst + loc - 1, src);
-
- /* FIXME: What does this mean? (AIA) */
- magic = loc + ssize - left;
+ /*
+ * First run of @dst that needs to be moved out of the way to make
+ * space for the runs to be copied from @src, i.e. the first run of the
+ * tail of @dst.
+ */
+ tail = loc + right + 1;
+ /*
+ * First run after the @src runs that have been inserted, i.e. where
+ * the tail of @dst needs to be moved to.
+ * Nominally, marker equals @loc + @ssize, i.e. location + number of
+ * runs in @src). However, if @left, then the first run in @src has
+ * been merged with one in @dst.
+ */
+ marker = loc + ssize - left;
/* Move the tail of @dst out of the way, then copy in @src. */
- ntfs_rl_mm(dst, magic, loc + right + 1, dsize - loc - right - 1);
+ ntfs_rl_mm(dst, marker, tail, dsize - tail);
ntfs_rl_mc(dst, loc, src, left, ssize - left);
- /* We may have changed the length of the file, so fix the end marker */
- if (dst[magic].lcn == LCN_ENOENT)
- dst[magic].vcn = dst[magic - 1].vcn + dst[magic - 1].length;
+ /* We may have changed the length of the file, so fix the end marker. */
+ if (dsize - tail > 0 && dst[marker].lcn == LCN_ENOENT)
+ dst[marker].vcn = dst[marker - 1].vcn + dst[marker - 1].length;
return dst;
}