Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block
* 'for-linus' of git://git.kernel.dk/linux-2.6-block:
cfq-iosched: fix RCU problem in cfq_cic_lookup()
block: make blktrace use per-cpu buffers for message notes
Added in elevator switch message to blktrace stream
Added in MESSAGE notes for blktraces
block: reorder cfq_queue to save space on 64bit builds
block: Move the second call to get_request to the end of the loop
splice: handle try_to_release_page() failure
splice: fix sendfile() issue with relay
diff --git a/block/blk-core.c b/block/blk-core.c
index 6a9cc0d..1905aab 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -806,35 +806,32 @@
rq = get_request(q, rw_flags, bio, GFP_NOIO);
while (!rq) {
DEFINE_WAIT(wait);
+ struct io_context *ioc;
struct request_list *rl = &q->rq;
prepare_to_wait_exclusive(&rl->wait[rw], &wait,
TASK_UNINTERRUPTIBLE);
- rq = get_request(q, rw_flags, bio, GFP_NOIO);
+ blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ);
- if (!rq) {
- struct io_context *ioc;
+ __generic_unplug_device(q);
+ spin_unlock_irq(q->queue_lock);
+ io_schedule();
- blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ);
+ /*
+ * After sleeping, we become a "batching" process and
+ * will be able to allocate at least one request, and
+ * up to a big batch of them for a small period time.
+ * See ioc_batching, ioc_set_batching
+ */
+ ioc = current_io_context(GFP_NOIO, q->node);
+ ioc_set_batching(q, ioc);
- __generic_unplug_device(q);
- spin_unlock_irq(q->queue_lock);
- io_schedule();
-
- /*
- * After sleeping, we become a "batching" process and
- * will be able to allocate at least one request, and
- * up to a big batch of them for a small period time.
- * See ioc_batching, ioc_set_batching
- */
- ioc = current_io_context(GFP_NOIO, q->node);
- ioc_set_batching(q, ioc);
-
- spin_lock_irq(q->queue_lock);
- }
+ spin_lock_irq(q->queue_lock);
finish_wait(&rl->wait[rw], &wait);
- }
+
+ rq = get_request(q, rw_flags, bio, GFP_NOIO);
+ };
return rq;
}
diff --git a/block/blktrace.c b/block/blktrace.c
index b2cbb4e..7ae87cc 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -75,6 +75,23 @@
local_irq_restore(flags);
}
+void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
+{
+ int n;
+ va_list args;
+ char *buf;
+
+ preempt_disable();
+ buf = per_cpu_ptr(bt->msg_data, smp_processor_id());
+ va_start(args, fmt);
+ n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
+ va_end(args);
+
+ trace_note(bt, 0, BLK_TN_MESSAGE, buf, n);
+ preempt_enable();
+}
+EXPORT_SYMBOL_GPL(__trace_note_message);
+
static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
pid_t pid)
{
@@ -232,6 +249,7 @@
debugfs_remove(bt->dropped_file);
blk_remove_tree(bt->dir);
free_percpu(bt->sequence);
+ free_percpu(bt->msg_data);
kfree(bt);
}
@@ -346,6 +364,10 @@
if (!bt->sequence)
goto err;
+ bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG);
+ if (!bt->msg_data)
+ goto err;
+
ret = -ENOENT;
dir = blk_create_tree(buts->name);
if (!dir)
@@ -392,6 +414,7 @@
if (bt->dropped_file)
debugfs_remove(bt->dropped_file);
free_percpu(bt->sequence);
+ free_percpu(bt->msg_data);
if (bt->rchan)
relay_close(bt->rchan);
kfree(bt);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index b399c62..d01b411 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -124,6 +124,8 @@
struct cfq_queue {
/* reference count */
atomic_t ref;
+ /* various state flags, see below */
+ unsigned int flags;
/* parent cfq_data */
struct cfq_data *cfqd;
/* service_tree member */
@@ -138,14 +140,14 @@
int queued[2];
/* currently allocated requests */
int allocated[2];
- /* pending metadata requests */
- int meta_pending;
/* fifo list of requests in sort_list */
struct list_head fifo;
unsigned long slice_end;
long slice_resid;
+ /* pending metadata requests */
+ int meta_pending;
/* number of requests that are on the dispatch list or inside driver */
int dispatched;
@@ -153,8 +155,6 @@
unsigned short ioprio, org_ioprio;
unsigned short ioprio_class, org_ioprio_class;
- /* various state flags, see below */
- unsigned int flags;
};
enum cfqq_state_flags {
@@ -1142,6 +1142,9 @@
kmem_cache_free(cfq_pool, cfqq);
}
+/*
+ * Must always be called with the rcu_read_lock() held
+ */
static void
__call_for_each_cic(struct io_context *ioc,
void (*func)(struct io_context *, struct cfq_io_context *))
@@ -1197,6 +1200,11 @@
cfq_cic_free(cic);
}
+/*
+ * Must be called with rcu_read_lock() held or preemption otherwise disabled.
+ * Only two callers of this - ->dtor() which is called with the rcu_read_lock(),
+ * and ->trim() which is called with the task lock held
+ */
static void cfq_free_io_context(struct io_context *ioc)
{
/*
@@ -1502,20 +1510,24 @@
cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
{
struct cfq_io_context *cic;
+ unsigned long flags;
void *k;
if (unlikely(!ioc))
return NULL;
+ rcu_read_lock();
+
/*
* we maintain a last-hit cache, to avoid browsing over the tree
*/
cic = rcu_dereference(ioc->ioc_data);
- if (cic && cic->key == cfqd)
+ if (cic && cic->key == cfqd) {
+ rcu_read_unlock();
return cic;
+ }
do {
- rcu_read_lock();
cic = radix_tree_lookup(&ioc->radix_root, (unsigned long) cfqd);
rcu_read_unlock();
if (!cic)
@@ -1524,10 +1536,13 @@
k = cic->key;
if (unlikely(!k)) {
cfq_drop_dead_cic(cfqd, ioc, cic);
+ rcu_read_lock();
continue;
}
+ spin_lock_irqsave(&ioc->lock, flags);
rcu_assign_pointer(ioc->ioc_data, cic);
+ spin_unlock_irqrestore(&ioc->lock, flags);
break;
} while (1);
@@ -2134,6 +2149,10 @@
static void cfq_slab_kill(void)
{
+ /*
+ * Caller already ensured that pending RCU callbacks are completed,
+ * so we should have no busy allocations at this point.
+ */
if (cfq_pool)
kmem_cache_destroy(cfq_pool);
if (cfq_ioc_pool)
@@ -2292,6 +2311,11 @@
ioc_gone = &all_gone;
/* ioc_gone's update must be visible before reading ioc_count */
smp_wmb();
+
+ /*
+ * this also protects us from entering cfq_slab_kill() with
+ * pending RCU callbacks
+ */
if (elv_ioc_count_read(ioc_count))
wait_for_completion(ioc_gone);
cfq_slab_kill();
diff --git a/block/elevator.c b/block/elevator.c
index 980f8ae..902dd13 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -1110,6 +1110,8 @@
queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
spin_unlock_irq(q->queue_lock);
+ blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name);
+
return 1;
fail_register:
diff --git a/fs/splice.c b/fs/splice.c
index 7815003..aa5f6f6 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -58,8 +58,8 @@
*/
wait_on_page_writeback(page);
- if (PagePrivate(page))
- try_to_release_page(page, GFP_KERNEL);
+ if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL))
+ goto out_unlock;
/*
* If we succeeded in removing the mapping, set LRU flag
@@ -75,6 +75,7 @@
* Raced with truncate or failed to remove page from current
* address space, unlock and return failure.
*/
+out_unlock:
unlock_page(page);
return 1;
}
@@ -983,7 +984,7 @@
while (len) {
size_t read_len;
- loff_t pos = sd->pos;
+ loff_t pos = sd->pos, prev_pos = pos;
ret = do_splice_to(in, &pos, pipe, len, flags);
if (unlikely(ret <= 0))
@@ -998,15 +999,19 @@
* could get stuck data in the internal pipe:
*/
ret = actor(pipe, sd);
- if (unlikely(ret <= 0))
+ if (unlikely(ret <= 0)) {
+ sd->pos = prev_pos;
goto out_release;
+ }
bytes += ret;
len -= ret;
sd->pos = pos;
- if (ret < read_len)
+ if (ret < read_len) {
+ sd->pos = prev_pos + ret;
goto out_release;
+ }
}
done:
@@ -1072,7 +1077,7 @@
ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
if (ret > 0)
- *ppos += ret;
+ *ppos = sd.pos;
return ret;
}
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index cfc3147..e3ef903 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -55,6 +55,7 @@
enum blktrace_notify {
__BLK_TN_PROCESS = 0, /* establish pid/name mapping */
__BLK_TN_TIMESTAMP, /* include system clock */
+ __BLK_TN_MESSAGE, /* Character string message */
};
@@ -79,6 +80,7 @@
#define BLK_TN_PROCESS (__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY))
#define BLK_TN_TIMESTAMP (__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY))
+#define BLK_TN_MESSAGE (__BLK_TN_MESSAGE | BLK_TC_ACT(BLK_TC_NOTIFY))
#define BLK_IO_TRACE_MAGIC 0x65617400
#define BLK_IO_TRACE_VERSION 0x07
@@ -119,6 +121,7 @@
int trace_state;
struct rchan *rchan;
unsigned long *sequence;
+ unsigned char *msg_data;
u16 act_mask;
u64 start_lba;
u64 end_lba;
@@ -149,7 +152,28 @@
extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *);
extern int do_blk_trace_setup(struct request_queue *q,
char *name, dev_t dev, struct blk_user_trace_setup *buts);
+extern void __trace_note_message(struct blk_trace *, const char *fmt, ...);
+/**
+ * blk_add_trace_msg - Add a (simple) message to the blktrace stream
+ * @q: queue the io is for
+ * @fmt: format to print message in
+ * args... Variable argument list for format
+ *
+ * Description:
+ * Records a (simple) message onto the blktrace stream.
+ *
+ * NOTE: BLK_TN_MAX_MSG characters are output at most.
+ * NOTE: Can not use 'static inline' due to presence of var args...
+ *
+ **/
+#define blk_add_trace_msg(q, fmt, ...) \
+ do { \
+ struct blk_trace *bt = (q)->blk_trace; \
+ if (unlikely(bt)) \
+ __trace_note_message(bt, fmt, ##__VA_ARGS__); \
+ } while (0)
+#define BLK_TN_MAX_MSG 128
/**
* blk_add_trace_rq - Add a trace for a request oriented action
@@ -299,6 +323,8 @@
#define blk_trace_setup(q, name, dev, arg) (-ENOTTY)
#define blk_trace_startstop(q, start) (-ENOTTY)
#define blk_trace_remove(q) (-ENOTTY)
+#define blk_add_trace_msg(q, fmt, ...) do { } while (0)
+
#endif /* CONFIG_BLK_DEV_IO_TRACE */
#endif /* __KERNEL__ */
#endif
diff --git a/kernel/relay.c b/kernel/relay.c
index bc24dcd..7de644c 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1191,7 +1191,7 @@
ret = 0;
spliced = 0;
- while (len) {
+ while (len && !spliced) {
ret = subbuf_splice_actor(in, ppos, pipe, len, flags, &nonpad_ret);
if (ret < 0)
break;