blob: 41cfb09442013325f7511949f8d6171e2bbac487 [file] [log] [blame]
Jeff Dike75e55842005-09-03 15:57:45 -07001/*
2 * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
3 * Licensed under the GPL
4 */
5
6#include <stdlib.h>
7#include <unistd.h>
8#include <signal.h>
9#include <errno.h>
10#include <sched.h>
11#include <sys/syscall.h>
12#include "os.h"
13#include "helper.h"
14#include "aio.h"
15#include "init.h"
16#include "user.h"
17#include "mode.h"
18
Jeff Dike91acb212005-10-10 23:10:32 -040019struct aio_thread_req {
20 enum aio_type type;
21 int io_fd;
22 unsigned long long offset;
23 char *buf;
24 int len;
25 struct aio_context *aio;
26};
27
Jeff Dike75e55842005-09-03 15:57:45 -070028static int aio_req_fd_r = -1;
29static int aio_req_fd_w = -1;
30
31#if defined(HAVE_AIO_ABI)
32#include <linux/aio_abi.h>
33
34/* If we have the headers, we are going to build with AIO enabled.
35 * If we don't have aio in libc, we define the necessary stubs here.
36 */
37
38#if !defined(HAVE_AIO_LIBC)
39
40static long io_setup(int n, aio_context_t *ctxp)
41{
42 return syscall(__NR_io_setup, n, ctxp);
43}
44
45static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp)
46{
47 return syscall(__NR_io_submit, ctx, nr, iocbpp);
48}
49
50static long io_getevents(aio_context_t ctx_id, long min_nr, long nr,
51 struct io_event *events, struct timespec *timeout)
52{
53 return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout);
54}
55
56#endif
57
58/* The AIO_MMAP cases force the mmapped page into memory here
59 * rather than in whatever place first touches the data. I used
60 * to do this by touching the page, but that's delicate because
61 * gcc is prone to optimizing that away. So, what's done here
62 * is we read from the descriptor from which the page was
63 * mapped. The caller is required to pass an offset which is
64 * inside the page that was mapped. Thus, when the read
65 * returns, we know that the page is in the page cache, and
66 * that it now backs the mmapped area.
67 */
68
Jeff Dike91acb212005-10-10 23:10:32 -040069static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf,
70 int len, unsigned long long offset, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -070071{
72 struct iocb iocb, *iocbp = &iocb;
73 char c;
74 int err;
75
76 iocb = ((struct iocb) { .aio_data = (unsigned long) aio,
77 .aio_reqprio = 0,
Jeff Dike91acb212005-10-10 23:10:32 -040078 .aio_fildes = fd,
79 .aio_buf = (unsigned long) buf,
80 .aio_nbytes = len,
81 .aio_offset = offset,
Jeff Dike75e55842005-09-03 15:57:45 -070082 .aio_reserved1 = 0,
83 .aio_reserved2 = 0,
84 .aio_reserved3 = 0 });
85
Jeff Dike91acb212005-10-10 23:10:32 -040086 switch(type){
Jeff Dike75e55842005-09-03 15:57:45 -070087 case AIO_READ:
88 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
Jeff Dike91acb212005-10-10 23:10:32 -040089 err = io_submit(ctx, 1, &iocbp);
Jeff Dike75e55842005-09-03 15:57:45 -070090 break;
91 case AIO_WRITE:
92 iocb.aio_lio_opcode = IOCB_CMD_PWRITE;
Jeff Dike91acb212005-10-10 23:10:32 -040093 err = io_submit(ctx, 1, &iocbp);
Jeff Dike75e55842005-09-03 15:57:45 -070094 break;
95 case AIO_MMAP:
96 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
97 iocb.aio_buf = (unsigned long) &c;
98 iocb.aio_nbytes = sizeof(c);
Jeff Dike91acb212005-10-10 23:10:32 -040099 err = io_submit(ctx, 1, &iocbp);
Jeff Dike75e55842005-09-03 15:57:45 -0700100 break;
101 default:
Jeff Dike91acb212005-10-10 23:10:32 -0400102 printk("Bogus op in do_aio - %d\n", type);
Jeff Dike75e55842005-09-03 15:57:45 -0700103 err = -EINVAL;
Jeff Dike91acb212005-10-10 23:10:32 -0400104 break;
Jeff Dike75e55842005-09-03 15:57:45 -0700105 }
Jeff Dike09ace812005-09-03 15:57:46 -0700106
Jeff Dike75e55842005-09-03 15:57:45 -0700107 if(err > 0)
108 err = 0;
Jeff Dike2867ace2005-09-16 19:27:51 -0700109 else
110 err = -errno;
Jeff Dike75e55842005-09-03 15:57:45 -0700111
112 return err;
113}
114
115static aio_context_t ctx = 0;
116
117static int aio_thread(void *arg)
118{
119 struct aio_thread_reply reply;
120 struct io_event event;
Jeff Dike91acb212005-10-10 23:10:32 -0400121 int err, n, reply_fd;
Jeff Dike75e55842005-09-03 15:57:45 -0700122
123 signal(SIGWINCH, SIG_IGN);
124
125 while(1){
126 n = io_getevents(ctx, 1, 1, &event, NULL);
127 if(n < 0){
128 if(errno == EINTR)
129 continue;
130 printk("aio_thread - io_getevents failed, "
131 "errno = %d\n", errno);
132 }
133 else {
134 reply = ((struct aio_thread_reply)
Jeff Dike91acb212005-10-10 23:10:32 -0400135 { .data = (void *) (long) event.data,
136 .err = event.res });
137 reply_fd = ((struct aio_context *) reply.data)->reply_fd;
138 err = os_write_file(reply_fd, &reply, sizeof(reply));
Jeff Dike75e55842005-09-03 15:57:45 -0700139 if(err != sizeof(reply))
Jeff Dike91acb212005-10-10 23:10:32 -0400140 printk("aio_thread - write failed, fd = %d, "
141 "err = %d\n", aio_req_fd_r, -err);
Jeff Dike75e55842005-09-03 15:57:45 -0700142 }
143 }
144 return 0;
145}
146
147#endif
148
Jeff Dike91acb212005-10-10 23:10:32 -0400149static int do_not_aio(struct aio_thread_req *req)
Jeff Dike75e55842005-09-03 15:57:45 -0700150{
151 char c;
152 int err;
153
Jeff Dike91acb212005-10-10 23:10:32 -0400154 switch(req->type){
Jeff Dike75e55842005-09-03 15:57:45 -0700155 case AIO_READ:
Jeff Dike91acb212005-10-10 23:10:32 -0400156 err = os_seek_file(req->io_fd, req->offset);
Jeff Dike75e55842005-09-03 15:57:45 -0700157 if(err)
158 goto out;
159
Jeff Dike91acb212005-10-10 23:10:32 -0400160 err = os_read_file(req->io_fd, req->buf, req->len);
Jeff Dike75e55842005-09-03 15:57:45 -0700161 break;
162 case AIO_WRITE:
Jeff Dike91acb212005-10-10 23:10:32 -0400163 err = os_seek_file(req->io_fd, req->offset);
Jeff Dike75e55842005-09-03 15:57:45 -0700164 if(err)
165 goto out;
166
Jeff Dike91acb212005-10-10 23:10:32 -0400167 err = os_write_file(req->io_fd, req->buf, req->len);
Jeff Dike75e55842005-09-03 15:57:45 -0700168 break;
169 case AIO_MMAP:
Jeff Dike91acb212005-10-10 23:10:32 -0400170 err = os_seek_file(req->io_fd, req->offset);
Jeff Dike75e55842005-09-03 15:57:45 -0700171 if(err)
172 goto out;
173
Jeff Dike91acb212005-10-10 23:10:32 -0400174 err = os_read_file(req->io_fd, &c, sizeof(c));
Jeff Dike75e55842005-09-03 15:57:45 -0700175 break;
176 default:
Jeff Dike91acb212005-10-10 23:10:32 -0400177 printk("do_not_aio - bad request type : %d\n", req->type);
Jeff Dike75e55842005-09-03 15:57:45 -0700178 err = -EINVAL;
179 break;
180 }
181
182 out:
183 return err;
184}
185
186static int not_aio_thread(void *arg)
187{
Jeff Dike91acb212005-10-10 23:10:32 -0400188 struct aio_thread_req req;
Jeff Dike75e55842005-09-03 15:57:45 -0700189 struct aio_thread_reply reply;
190 int err;
191
192 signal(SIGWINCH, SIG_IGN);
193 while(1){
Jeff Dike91acb212005-10-10 23:10:32 -0400194 err = os_read_file(aio_req_fd_r, &req, sizeof(req));
195 if(err != sizeof(req)){
Jeff Dike75e55842005-09-03 15:57:45 -0700196 if(err < 0)
197 printk("not_aio_thread - read failed, "
198 "fd = %d, err = %d\n", aio_req_fd_r,
199 -err);
200 else {
201 printk("not_aio_thread - short read, fd = %d, "
202 "length = %d\n", aio_req_fd_r, err);
203 }
204 continue;
205 }
Jeff Dike91acb212005-10-10 23:10:32 -0400206 err = do_not_aio(&req);
207 reply = ((struct aio_thread_reply) { .data = req.aio,
208 .err = err });
209 err = os_write_file(req.aio->reply_fd, &reply, sizeof(reply));
Jeff Dike75e55842005-09-03 15:57:45 -0700210 if(err != sizeof(reply))
211 printk("not_aio_thread - write failed, fd = %d, "
212 "err = %d\n", aio_req_fd_r, -err);
213 }
214}
215
216static int aio_pid = -1;
217
218static int init_aio_24(void)
219{
220 unsigned long stack;
221 int fds[2], err;
222
223 err = os_pipe(fds, 1, 1);
224 if(err)
225 goto out;
226
227 aio_req_fd_w = fds[0];
228 aio_req_fd_r = fds[1];
229 err = run_helper_thread(not_aio_thread, NULL,
230 CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
231 if(err < 0)
232 goto out_close_pipe;
233
234 aio_pid = err;
235 goto out;
236
237 out_close_pipe:
238 os_close_file(fds[0]);
239 os_close_file(fds[1]);
240 aio_req_fd_w = -1;
241 aio_req_fd_r = -1;
242 out:
243#ifndef HAVE_AIO_ABI
244 printk("/usr/include/linux/aio_abi.h not present during build\n");
245#endif
246 printk("2.6 host AIO support not used - falling back to I/O "
247 "thread\n");
248 return 0;
249}
250
251#ifdef HAVE_AIO_ABI
252#define DEFAULT_24_AIO 0
253static int init_aio_26(void)
254{
255 unsigned long stack;
256 int err;
257
258 if(io_setup(256, &ctx)){
Jeff Dikeb4fd3102005-09-16 19:27:49 -0700259 err = -errno;
Jeff Dike75e55842005-09-03 15:57:45 -0700260 printk("aio_thread failed to initialize context, err = %d\n",
261 errno);
Jeff Dikeb4fd3102005-09-16 19:27:49 -0700262 return err;
Jeff Dike75e55842005-09-03 15:57:45 -0700263 }
264
265 err = run_helper_thread(aio_thread, NULL,
266 CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
267 if(err < 0)
Jeff Dikeb4fd3102005-09-16 19:27:49 -0700268 return err;
Jeff Dike75e55842005-09-03 15:57:45 -0700269
270 aio_pid = err;
271
272 printk("Using 2.6 host AIO\n");
273 return 0;
274}
275
Jeff Dike91acb212005-10-10 23:10:32 -0400276static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
277 unsigned long long offset, struct aio_context *aio)
278{
279 struct aio_thread_reply reply;
280 int err;
281
282 err = do_aio(ctx, type, io_fd, buf, len, offset, aio);
283 if(err){
284 reply = ((struct aio_thread_reply) { .data = aio,
285 .err = err });
286 err = os_write_file(aio->reply_fd, &reply, sizeof(reply));
287 if(err != sizeof(reply))
288 printk("submit_aio_26 - write failed, "
289 "fd = %d, err = %d\n", aio->reply_fd, -err);
290 else err = 0;
291 }
292
293 return err;
294}
295
Jeff Dike75e55842005-09-03 15:57:45 -0700296#else
297#define DEFAULT_24_AIO 1
Jeff Dike91acb212005-10-10 23:10:32 -0400298static int init_aio_26(void)
Jeff Dike75e55842005-09-03 15:57:45 -0700299{
300 return -ENOSYS;
301}
302
Jeff Dike91acb212005-10-10 23:10:32 -0400303static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
304 unsigned long long offset, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -0700305{
306 return -ENOSYS;
307}
308#endif
309
310static int aio_24 = DEFAULT_24_AIO;
311
312static int __init set_aio_24(char *name, int *add)
313{
314 aio_24 = 1;
315 return 0;
316}
317
318__uml_setup("aio=2.4", set_aio_24,
319"aio=2.4\n"
320" This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n"
321" available. 2.4 AIO is a single thread that handles one request at a\n"
322" time, synchronously. 2.6 AIO is a thread which uses the 2.6 AIO \n"
323" interface to handle an arbitrary number of pending requests. 2.6 AIO \n"
324" is not available in tt mode, on 2.4 hosts, or when UML is built with\n"
325" /usr/include/linux/aio_abi.h not available. Many distributions don't\n"
326" include aio_abi.h, so you will need to copy it from a kernel tree to\n"
327" your /usr/include/linux in order to build an AIO-capable UML\n\n"
328);
329
330static int init_aio(void)
331{
332 int err;
333
334 CHOOSE_MODE(({
335 if(!aio_24){
336 printk("Disabling 2.6 AIO in tt mode\n");
337 aio_24 = 1;
338 } }), (void) 0);
339
340 if(!aio_24){
341 err = init_aio_26();
342 if(err && (errno == ENOSYS)){
343 printk("2.6 AIO not supported on the host - "
344 "reverting to 2.4 AIO\n");
345 aio_24 = 1;
346 }
347 else return err;
348 }
349
350 if(aio_24)
351 return init_aio_24();
352
353 return 0;
354}
355
356/* The reason for the __initcall/__uml_exitcall asymmetry is that init_aio
357 * needs to be called when the kernel is running because it calls run_helper,
358 * which needs get_free_page. exit_aio is a __uml_exitcall because the generic
359 * kernel does not run __exitcalls on shutdown, and can't because many of them
360 * break when called outside of module unloading.
361 */
362__initcall(init_aio);
363
364static void exit_aio(void)
365{
366 if(aio_pid != -1)
367 os_kill_process(aio_pid, 1);
368}
369
370__uml_exitcall(exit_aio);
371
Jeff Dike91acb212005-10-10 23:10:32 -0400372static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len,
373 unsigned long long offset, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -0700374{
Jeff Dike91acb212005-10-10 23:10:32 -0400375 struct aio_thread_req req = { .type = type,
376 .io_fd = io_fd,
377 .offset = offset,
378 .buf = buf,
379 .len = len,
380 .aio = aio,
381 };
382 int err;
383
384 err = os_write_file(aio_req_fd_w, &req, sizeof(req));
385 if(err == sizeof(req))
386 err = 0;
387
388 return err;
389}
390
391int submit_aio(enum aio_type type, int io_fd, char *buf, int len,
392 unsigned long long offset, int reply_fd,
393 struct aio_context *aio)
394{
395 aio->reply_fd = reply_fd;
396 if(aio_24)
397 return submit_aio_24(type, io_fd, buf, len, offset, aio);
398 else {
399 return submit_aio_26(type, io_fd, buf, len, offset, aio);
400 }
Jeff Dike75e55842005-09-03 15:57:45 -0700401}