ext4: more efficient SEEK_DATA implementation
Using SEEK_DATA in a huge sparse file can easily lead to sotflockups as
ext4_seek_data() iterates hole block-by-block. Fix the problem by using
returned hole size from ext4_map_blocks() and thus skip the hole in one
go.
Update also SEEK_HOLE implementation to follow the same pattern as
SEEK_DATA to make future maintenance easier.
Furthermore we add cond_resched() to both ext4_seek_data() and
ext4_seek_hole() to avoid softlockups in case evil user creates huge
fragmented file and we have to go through lots of extents.
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 4a11535..e93a7ef 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -426,7 +426,7 @@
*/
static int ext4_find_unwritten_pgoff(struct inode *inode,
int whence,
- struct ext4_map_blocks *map,
+ ext4_lblk_t end_blk,
loff_t *offset)
{
struct pagevec pvec;
@@ -441,7 +441,7 @@
blkbits = inode->i_sb->s_blocksize_bits;
startoff = *offset;
lastoff = startoff;
- endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits;
+ endoff = (loff_t)end_blk << blkbits;
index = startoff >> PAGE_CACHE_SHIFT;
end = endoff >> PAGE_CACHE_SHIFT;
@@ -559,12 +559,11 @@
static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
{
struct inode *inode = file->f_mapping->host;
- struct ext4_map_blocks map;
struct extent_status es;
ext4_lblk_t start, last, end;
loff_t dataoff, isize;
int blkbits;
- int ret = 0;
+ int ret;
inode_lock(inode);
@@ -581,41 +580,32 @@
dataoff = offset;
do {
- map.m_lblk = last;
- map.m_len = end - last + 1;
- ret = ext4_map_blocks(NULL, inode, &map, 0);
- if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
- if (last != start)
- dataoff = (loff_t)last << blkbits;
- break;
+ ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
+ if (ret <= 0) {
+ /* No extent found -> no data */
+ if (ret == 0)
+ ret = -ENXIO;
+ inode_unlock(inode);
+ return ret;
}
- /*
- * If there is a delay extent at this offset,
- * it will be as a data.
- */
- ext4_es_find_delayed_extent_range(inode, last, last, &es);
- if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
- if (last != start)
- dataoff = (loff_t)last << blkbits;
+ last = es.es_lblk;
+ if (last != start)
+ dataoff = (loff_t)last << blkbits;
+ if (!ext4_es_is_unwritten(&es))
break;
- }
/*
* If there is a unwritten extent at this offset,
* it will be as a data or a hole according to page
* cache that has data or not.
*/
- if (map.m_flags & EXT4_MAP_UNWRITTEN) {
- int unwritten;
- unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA,
- &map, &dataoff);
- if (unwritten)
- break;
- }
-
- last++;
+ if (ext4_find_unwritten_pgoff(inode, SEEK_DATA,
+ es.es_lblk + es.es_len, &dataoff))
+ break;
+ last += es.es_len;
dataoff = (loff_t)last << blkbits;
+ cond_resched();
} while (last <= end);
inode_unlock(inode);
@@ -632,12 +622,11 @@
static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
{
struct inode *inode = file->f_mapping->host;
- struct ext4_map_blocks map;
struct extent_status es;
ext4_lblk_t start, last, end;
loff_t holeoff, isize;
int blkbits;
- int ret = 0;
+ int ret;
inode_lock(inode);
@@ -654,44 +643,30 @@
holeoff = offset;
do {
- map.m_lblk = last;
- map.m_len = end - last + 1;
- ret = ext4_map_blocks(NULL, inode, &map, 0);
- if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
- last += ret;
- holeoff = (loff_t)last << blkbits;
- continue;
+ ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
+ if (ret < 0) {
+ inode_unlock(inode);
+ return ret;
}
-
- /*
- * If there is a delay extent at this offset,
- * we will skip this extent.
- */
- ext4_es_find_delayed_extent_range(inode, last, last, &es);
- if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
- last = es.es_lblk + es.es_len;
- holeoff = (loff_t)last << blkbits;
- continue;
+ /* Found a hole? */
+ if (ret == 0 || es.es_lblk > last) {
+ if (last != start)
+ holeoff = (loff_t)last << blkbits;
+ break;
}
-
/*
* If there is a unwritten extent at this offset,
* it will be as a data or a hole according to page
* cache that has data or not.
*/
- if (map.m_flags & EXT4_MAP_UNWRITTEN) {
- int unwritten;
- unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
- &map, &holeoff);
- if (!unwritten) {
- last += ret;
- holeoff = (loff_t)last << blkbits;
- continue;
- }
- }
+ if (ext4_es_is_unwritten(&es) &&
+ ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
+ last + es.es_len, &holeoff))
+ break;
- /* find a hole */
- break;
+ last += es.es_len;
+ holeoff = (loff_t)last << blkbits;
+ cond_resched();
} while (last <= end);
inode_unlock(inode);