blob: 040de6bd74dd8cb527d1ef1544cb3d9b45eacbb8 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * sysctl.c: General linux system control interface
3 *
4 * Begun 24 March 1995, Stephen Tweedie
5 * Added /proc support, Dec 1995
6 * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7 * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8 * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9 * Dynamic registration fixes, Stephen Tweedie.
10 * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11 * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12 * Horn.
13 * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14 * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15 * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16 * Wendling.
17 * The list_for_each() macro wasn't appropriate for the sysctl loop.
18 * Removed it and replaced it with older style, 03/23/00, Bill Wendling
19 */
20
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/module.h>
22#include <linux/mm.h>
23#include <linux/swap.h>
24#include <linux/slab.h>
25#include <linux/sysctl.h>
26#include <linux/proc_fs.h>
Randy.Dunlapc59ede72006-01-11 12:17:46 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/ctype.h>
29#include <linux/utsname.h>
30#include <linux/capability.h>
31#include <linux/smp_lock.h>
32#include <linux/init.h>
33#include <linux/kernel.h>
Kay Sievers0296b222005-11-11 05:33:52 +010034#include <linux/kobject.h>
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -030035#include <linux/net.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070036#include <linux/sysrq.h>
37#include <linux/highuid.h>
38#include <linux/writeback.h>
39#include <linux/hugetlb.h>
40#include <linux/security.h>
41#include <linux/initrd.h>
42#include <linux/times.h>
43#include <linux/limits.h>
44#include <linux/dcache.h>
45#include <linux/syscalls.h>
Pavel Machekc255d842006-02-20 18:27:58 -080046#include <linux/nfs_fs.h>
47#include <linux/acpi.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070048
49#include <asm/uaccess.h>
50#include <asm/processor.h>
51
Dipankar Sarma529bf6b2006-03-07 21:55:35 -080052extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
53 void __user *buffer, size_t *lenp, loff_t *ppos);
54
Linus Torvalds1da177e2005-04-16 15:20:36 -070055#if defined(CONFIG_SYSCTL)
56
57/* External variables not in a header file. */
58extern int C_A_D;
59extern int sysctl_overcommit_memory;
60extern int sysctl_overcommit_ratio;
KAMEZAWA Hiroyukifadd8fb2006-06-23 02:03:13 -070061extern int sysctl_panic_on_oom;
Linus Torvalds1da177e2005-04-16 15:20:36 -070062extern int max_threads;
63extern int sysrq_enabled;
64extern int core_uses_pid;
Alan Coxd6e71142005-06-23 00:09:43 -070065extern int suid_dumpable;
Linus Torvalds1da177e2005-04-16 15:20:36 -070066extern char core_pattern[];
67extern int cad_pid;
68extern int pid_max;
69extern int min_free_kbytes;
70extern int printk_ratelimit_jiffies;
71extern int printk_ratelimit_burst;
72extern int pid_max_min, pid_max_max;
Andrew Morton9d0243b2006-01-08 01:00:39 -080073extern int sysctl_drop_caches;
Rohit Seth8ad4b1f2006-01-08 01:00:40 -080074extern int percpu_pagelist_fraction;
Andi Kleenbebfa102006-06-26 13:56:52 +020075extern int compat_log;
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
77#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
78int unknown_nmi_panic;
Don Zickus407984f2006-09-26 10:52:27 +020079int nmi_watchdog_enabled;
80extern int proc_nmi_enabled(struct ctl_table *, int , struct file *,
81 void __user *, size_t *, loff_t *);
Linus Torvalds1da177e2005-04-16 15:20:36 -070082#endif
83
84/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
85static int maxolduid = 65535;
86static int minolduid;
Rohit Seth8ad4b1f2006-01-08 01:00:40 -080087static int min_percpu_pagelist_fract = 8;
Linus Torvalds1da177e2005-04-16 15:20:36 -070088
89static int ngroups_max = NGROUPS_MAX;
90
91#ifdef CONFIG_KMOD
92extern char modprobe_path[];
93#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -070094#ifdef CONFIG_CHR_DEV_SG
95extern int sg_big_buff;
96#endif
97#ifdef CONFIG_SYSVIPC
98extern size_t shm_ctlmax;
99extern size_t shm_ctlall;
100extern int shm_ctlmni;
101extern int msg_ctlmax;
102extern int msg_ctlmnb;
103extern int msg_ctlmni;
104extern int sem_ctls[];
105#endif
106
107#ifdef __sparc__
108extern char reboot_command [];
109extern int stop_a_enabled;
110extern int scons_pwroff;
111#endif
112
113#ifdef __hppa__
114extern int pwrsw_enabled;
115extern int unaligned_enabled;
116#endif
117
Martin Schwidefsky347a8dc2006-01-06 00:19:28 -0800118#ifdef CONFIG_S390
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119#ifdef CONFIG_MATHEMU
120extern int sysctl_ieee_emulation_warnings;
121#endif
122extern int sysctl_userprocess_debug;
Martin Schwidefsky951f22d2005-07-27 11:44:57 -0700123extern int spin_retry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700124#endif
125
126extern int sysctl_hz_timer;
127
128#ifdef CONFIG_BSD_PROCESS_ACCT
129extern int acct_parm[];
130#endif
131
Jes Sorensend2b176e2006-02-28 09:42:23 -0800132#ifdef CONFIG_IA64
133extern int no_unaligned_warning;
134#endif
135
Ingo Molnar23f78d4a2006-06-27 02:54:53 -0700136#ifdef CONFIG_RT_MUTEXES
137extern int max_lock_depth;
138#endif
139
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t,
141 ctl_table *, void **);
142static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
143 void __user *buffer, size_t *lenp, loff_t *ppos);
144
145static ctl_table root_table[];
146static struct ctl_table_header root_table_header =
147 { root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) };
148
149static ctl_table kern_table[];
150static ctl_table vm_table[];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151static ctl_table fs_table[];
152static ctl_table debug_table[];
153static ctl_table dev_table[];
154extern ctl_table random_table[];
155#ifdef CONFIG_UNIX98_PTYS
156extern ctl_table pty_table[];
157#endif
Amy Griffis2d9048e2006-06-01 13:10:59 -0700158#ifdef CONFIG_INOTIFY_USER
Robert Love0399cb02005-07-13 12:38:18 -0400159extern ctl_table inotify_table[];
160#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700161
162#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
163int sysctl_legacy_va_layout;
164#endif
165
166/* /proc declarations: */
167
168#ifdef CONFIG_PROC_FS
169
170static ssize_t proc_readsys(struct file *, char __user *, size_t, loff_t *);
171static ssize_t proc_writesys(struct file *, const char __user *, size_t, loff_t *);
172static int proc_opensys(struct inode *, struct file *);
173
174struct file_operations proc_sys_file_operations = {
175 .open = proc_opensys,
176 .read = proc_readsys,
177 .write = proc_writesys,
178};
179
180extern struct proc_dir_entry *proc_sys_root;
181
Al Viro330d57f2005-11-04 10:18:40 +0000182static void register_proc_table(ctl_table *, struct proc_dir_entry *, void *);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183static void unregister_proc_table(ctl_table *, struct proc_dir_entry *);
184#endif
185
186/* The default sysctl tables: */
187
188static ctl_table root_table[] = {
189 {
190 .ctl_name = CTL_KERN,
191 .procname = "kernel",
192 .mode = 0555,
193 .child = kern_table,
194 },
195 {
196 .ctl_name = CTL_VM,
197 .procname = "vm",
198 .mode = 0555,
199 .child = vm_table,
200 },
201#ifdef CONFIG_NET
202 {
203 .ctl_name = CTL_NET,
204 .procname = "net",
205 .mode = 0555,
206 .child = net_table,
207 },
208#endif
209 {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210 .ctl_name = CTL_FS,
211 .procname = "fs",
212 .mode = 0555,
213 .child = fs_table,
214 },
215 {
216 .ctl_name = CTL_DEBUG,
217 .procname = "debug",
218 .mode = 0555,
219 .child = debug_table,
220 },
221 {
222 .ctl_name = CTL_DEV,
223 .procname = "dev",
224 .mode = 0555,
225 .child = dev_table,
226 },
Robert Love0eeca282005-07-12 17:06:03 -0400227
Linus Torvalds1da177e2005-04-16 15:20:36 -0700228 { .ctl_name = 0 }
229};
230
231static ctl_table kern_table[] = {
232 {
233 .ctl_name = KERN_OSTYPE,
234 .procname = "ostype",
235 .data = system_utsname.sysname,
236 .maxlen = sizeof(system_utsname.sysname),
237 .mode = 0444,
238 .proc_handler = &proc_doutsstring,
239 .strategy = &sysctl_string,
240 },
241 {
242 .ctl_name = KERN_OSRELEASE,
243 .procname = "osrelease",
244 .data = system_utsname.release,
245 .maxlen = sizeof(system_utsname.release),
246 .mode = 0444,
247 .proc_handler = &proc_doutsstring,
248 .strategy = &sysctl_string,
249 },
250 {
251 .ctl_name = KERN_VERSION,
252 .procname = "version",
253 .data = system_utsname.version,
254 .maxlen = sizeof(system_utsname.version),
255 .mode = 0444,
256 .proc_handler = &proc_doutsstring,
257 .strategy = &sysctl_string,
258 },
259 {
260 .ctl_name = KERN_NODENAME,
261 .procname = "hostname",
262 .data = system_utsname.nodename,
263 .maxlen = sizeof(system_utsname.nodename),
264 .mode = 0644,
265 .proc_handler = &proc_doutsstring,
266 .strategy = &sysctl_string,
267 },
268 {
269 .ctl_name = KERN_DOMAINNAME,
270 .procname = "domainname",
271 .data = system_utsname.domainname,
272 .maxlen = sizeof(system_utsname.domainname),
273 .mode = 0644,
274 .proc_handler = &proc_doutsstring,
275 .strategy = &sysctl_string,
276 },
277 {
278 .ctl_name = KERN_PANIC,
279 .procname = "panic",
280 .data = &panic_timeout,
281 .maxlen = sizeof(int),
282 .mode = 0644,
283 .proc_handler = &proc_dointvec,
284 },
285 {
286 .ctl_name = KERN_CORE_USES_PID,
287 .procname = "core_uses_pid",
288 .data = &core_uses_pid,
289 .maxlen = sizeof(int),
290 .mode = 0644,
291 .proc_handler = &proc_dointvec,
292 },
293 {
294 .ctl_name = KERN_CORE_PATTERN,
295 .procname = "core_pattern",
296 .data = core_pattern,
297 .maxlen = 64,
298 .mode = 0644,
299 .proc_handler = &proc_dostring,
300 .strategy = &sysctl_string,
301 },
302 {
303 .ctl_name = KERN_TAINTED,
304 .procname = "tainted",
305 .data = &tainted,
306 .maxlen = sizeof(int),
307 .mode = 0444,
308 .proc_handler = &proc_dointvec,
309 },
310 {
311 .ctl_name = KERN_CAP_BSET,
312 .procname = "cap-bound",
313 .data = &cap_bset,
314 .maxlen = sizeof(kernel_cap_t),
315 .mode = 0600,
316 .proc_handler = &proc_dointvec_bset,
317 },
318#ifdef CONFIG_BLK_DEV_INITRD
319 {
320 .ctl_name = KERN_REALROOTDEV,
321 .procname = "real-root-dev",
322 .data = &real_root_dev,
323 .maxlen = sizeof(int),
324 .mode = 0644,
325 .proc_handler = &proc_dointvec,
326 },
327#endif
328#ifdef __sparc__
329 {
330 .ctl_name = KERN_SPARC_REBOOT,
331 .procname = "reboot-cmd",
332 .data = reboot_command,
333 .maxlen = 256,
334 .mode = 0644,
335 .proc_handler = &proc_dostring,
336 .strategy = &sysctl_string,
337 },
338 {
339 .ctl_name = KERN_SPARC_STOP_A,
340 .procname = "stop-a",
341 .data = &stop_a_enabled,
342 .maxlen = sizeof (int),
343 .mode = 0644,
344 .proc_handler = &proc_dointvec,
345 },
346 {
347 .ctl_name = KERN_SPARC_SCONS_PWROFF,
348 .procname = "scons-poweroff",
349 .data = &scons_pwroff,
350 .maxlen = sizeof (int),
351 .mode = 0644,
352 .proc_handler = &proc_dointvec,
353 },
354#endif
355#ifdef __hppa__
356 {
357 .ctl_name = KERN_HPPA_PWRSW,
358 .procname = "soft-power",
359 .data = &pwrsw_enabled,
360 .maxlen = sizeof (int),
361 .mode = 0644,
362 .proc_handler = &proc_dointvec,
363 },
364 {
365 .ctl_name = KERN_HPPA_UNALIGNED,
366 .procname = "unaligned-trap",
367 .data = &unaligned_enabled,
368 .maxlen = sizeof (int),
369 .mode = 0644,
370 .proc_handler = &proc_dointvec,
371 },
372#endif
373 {
374 .ctl_name = KERN_CTLALTDEL,
375 .procname = "ctrl-alt-del",
376 .data = &C_A_D,
377 .maxlen = sizeof(int),
378 .mode = 0644,
379 .proc_handler = &proc_dointvec,
380 },
381 {
382 .ctl_name = KERN_PRINTK,
383 .procname = "printk",
384 .data = &console_loglevel,
385 .maxlen = 4*sizeof(int),
386 .mode = 0644,
387 .proc_handler = &proc_dointvec,
388 },
389#ifdef CONFIG_KMOD
390 {
391 .ctl_name = KERN_MODPROBE,
392 .procname = "modprobe",
393 .data = &modprobe_path,
394 .maxlen = KMOD_PATH_LEN,
395 .mode = 0644,
396 .proc_handler = &proc_dostring,
397 .strategy = &sysctl_string,
398 },
399#endif
Andrew Morton57ae2502006-06-23 02:05:47 -0700400#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401 {
402 .ctl_name = KERN_HOTPLUG,
403 .procname = "hotplug",
Kay Sievers312c0042005-11-16 09:00:00 +0100404 .data = &uevent_helper,
405 .maxlen = UEVENT_HELPER_PATH_LEN,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406 .mode = 0644,
407 .proc_handler = &proc_dostring,
408 .strategy = &sysctl_string,
409 },
410#endif
411#ifdef CONFIG_CHR_DEV_SG
412 {
413 .ctl_name = KERN_SG_BIG_BUFF,
414 .procname = "sg-big-buff",
415 .data = &sg_big_buff,
416 .maxlen = sizeof (int),
417 .mode = 0444,
418 .proc_handler = &proc_dointvec,
419 },
420#endif
421#ifdef CONFIG_BSD_PROCESS_ACCT
422 {
423 .ctl_name = KERN_ACCT,
424 .procname = "acct",
425 .data = &acct_parm,
426 .maxlen = 3*sizeof(int),
427 .mode = 0644,
428 .proc_handler = &proc_dointvec,
429 },
430#endif
431#ifdef CONFIG_SYSVIPC
432 {
433 .ctl_name = KERN_SHMMAX,
434 .procname = "shmmax",
435 .data = &shm_ctlmax,
436 .maxlen = sizeof (size_t),
437 .mode = 0644,
438 .proc_handler = &proc_doulongvec_minmax,
439 },
440 {
441 .ctl_name = KERN_SHMALL,
442 .procname = "shmall",
443 .data = &shm_ctlall,
444 .maxlen = sizeof (size_t),
445 .mode = 0644,
446 .proc_handler = &proc_doulongvec_minmax,
447 },
448 {
449 .ctl_name = KERN_SHMMNI,
450 .procname = "shmmni",
451 .data = &shm_ctlmni,
452 .maxlen = sizeof (int),
453 .mode = 0644,
454 .proc_handler = &proc_dointvec,
455 },
456 {
457 .ctl_name = KERN_MSGMAX,
458 .procname = "msgmax",
459 .data = &msg_ctlmax,
460 .maxlen = sizeof (int),
461 .mode = 0644,
462 .proc_handler = &proc_dointvec,
463 },
464 {
465 .ctl_name = KERN_MSGMNI,
466 .procname = "msgmni",
467 .data = &msg_ctlmni,
468 .maxlen = sizeof (int),
469 .mode = 0644,
470 .proc_handler = &proc_dointvec,
471 },
472 {
473 .ctl_name = KERN_MSGMNB,
474 .procname = "msgmnb",
475 .data = &msg_ctlmnb,
476 .maxlen = sizeof (int),
477 .mode = 0644,
478 .proc_handler = &proc_dointvec,
479 },
480 {
481 .ctl_name = KERN_SEM,
482 .procname = "sem",
483 .data = &sem_ctls,
484 .maxlen = 4*sizeof (int),
485 .mode = 0644,
486 .proc_handler = &proc_dointvec,
487 },
488#endif
489#ifdef CONFIG_MAGIC_SYSRQ
490 {
491 .ctl_name = KERN_SYSRQ,
492 .procname = "sysrq",
493 .data = &sysrq_enabled,
494 .maxlen = sizeof (int),
495 .mode = 0644,
496 .proc_handler = &proc_dointvec,
497 },
498#endif
499 {
500 .ctl_name = KERN_CADPID,
501 .procname = "cad_pid",
502 .data = &cad_pid,
503 .maxlen = sizeof (int),
504 .mode = 0600,
505 .proc_handler = &proc_dointvec,
506 },
507 {
508 .ctl_name = KERN_MAX_THREADS,
509 .procname = "threads-max",
510 .data = &max_threads,
511 .maxlen = sizeof(int),
512 .mode = 0644,
513 .proc_handler = &proc_dointvec,
514 },
515 {
516 .ctl_name = KERN_RANDOM,
517 .procname = "random",
518 .mode = 0555,
519 .child = random_table,
520 },
521#ifdef CONFIG_UNIX98_PTYS
522 {
523 .ctl_name = KERN_PTY,
524 .procname = "pty",
525 .mode = 0555,
526 .child = pty_table,
527 },
528#endif
529 {
530 .ctl_name = KERN_OVERFLOWUID,
531 .procname = "overflowuid",
532 .data = &overflowuid,
533 .maxlen = sizeof(int),
534 .mode = 0644,
535 .proc_handler = &proc_dointvec_minmax,
536 .strategy = &sysctl_intvec,
537 .extra1 = &minolduid,
538 .extra2 = &maxolduid,
539 },
540 {
541 .ctl_name = KERN_OVERFLOWGID,
542 .procname = "overflowgid",
543 .data = &overflowgid,
544 .maxlen = sizeof(int),
545 .mode = 0644,
546 .proc_handler = &proc_dointvec_minmax,
547 .strategy = &sysctl_intvec,
548 .extra1 = &minolduid,
549 .extra2 = &maxolduid,
550 },
Martin Schwidefsky347a8dc2006-01-06 00:19:28 -0800551#ifdef CONFIG_S390
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552#ifdef CONFIG_MATHEMU
553 {
554 .ctl_name = KERN_IEEE_EMULATION_WARNINGS,
555 .procname = "ieee_emulation_warnings",
556 .data = &sysctl_ieee_emulation_warnings,
557 .maxlen = sizeof(int),
558 .mode = 0644,
559 .proc_handler = &proc_dointvec,
560 },
561#endif
562#ifdef CONFIG_NO_IDLE_HZ
563 {
564 .ctl_name = KERN_HZ_TIMER,
565 .procname = "hz_timer",
566 .data = &sysctl_hz_timer,
567 .maxlen = sizeof(int),
568 .mode = 0644,
569 .proc_handler = &proc_dointvec,
570 },
571#endif
572 {
573 .ctl_name = KERN_S390_USER_DEBUG_LOGGING,
574 .procname = "userprocess_debug",
575 .data = &sysctl_userprocess_debug,
576 .maxlen = sizeof(int),
577 .mode = 0644,
578 .proc_handler = &proc_dointvec,
579 },
580#endif
581 {
582 .ctl_name = KERN_PIDMAX,
583 .procname = "pid_max",
584 .data = &pid_max,
585 .maxlen = sizeof (int),
586 .mode = 0644,
587 .proc_handler = &proc_dointvec_minmax,
588 .strategy = sysctl_intvec,
589 .extra1 = &pid_max_min,
590 .extra2 = &pid_max_max,
591 },
592 {
593 .ctl_name = KERN_PANIC_ON_OOPS,
594 .procname = "panic_on_oops",
595 .data = &panic_on_oops,
596 .maxlen = sizeof(int),
597 .mode = 0644,
598 .proc_handler = &proc_dointvec,
599 },
600 {
601 .ctl_name = KERN_PRINTK_RATELIMIT,
602 .procname = "printk_ratelimit",
603 .data = &printk_ratelimit_jiffies,
604 .maxlen = sizeof(int),
605 .mode = 0644,
606 .proc_handler = &proc_dointvec_jiffies,
607 .strategy = &sysctl_jiffies,
608 },
609 {
610 .ctl_name = KERN_PRINTK_RATELIMIT_BURST,
611 .procname = "printk_ratelimit_burst",
612 .data = &printk_ratelimit_burst,
613 .maxlen = sizeof(int),
614 .mode = 0644,
615 .proc_handler = &proc_dointvec,
616 },
617 {
618 .ctl_name = KERN_NGROUPS_MAX,
619 .procname = "ngroups_max",
620 .data = &ngroups_max,
621 .maxlen = sizeof (int),
622 .mode = 0444,
623 .proc_handler = &proc_dointvec,
624 },
625#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
626 {
627 .ctl_name = KERN_UNKNOWN_NMI_PANIC,
628 .procname = "unknown_nmi_panic",
629 .data = &unknown_nmi_panic,
630 .maxlen = sizeof (int),
631 .mode = 0644,
Don Zickus2fbe7b22006-09-26 10:52:27 +0200632 .proc_handler = &proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633 },
Don Zickus407984f2006-09-26 10:52:27 +0200634 {
635 .ctl_name = KERN_NMI_WATCHDOG,
636 .procname = "nmi_watchdog",
637 .data = &nmi_watchdog_enabled,
638 .maxlen = sizeof (int),
639 .mode = 0644,
640 .proc_handler = &proc_nmi_enabled,
641 },
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642#endif
643#if defined(CONFIG_X86)
644 {
645 .ctl_name = KERN_BOOTLOADER_TYPE,
646 .procname = "bootloader_type",
647 .data = &bootloader_type,
648 .maxlen = sizeof (int),
649 .mode = 0444,
650 .proc_handler = &proc_dointvec,
651 },
652#endif
Luke Yang7a9166e2006-02-20 18:28:07 -0800653#if defined(CONFIG_MMU)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654 {
655 .ctl_name = KERN_RANDOMIZE,
656 .procname = "randomize_va_space",
657 .data = &randomize_va_space,
658 .maxlen = sizeof(int),
659 .mode = 0644,
660 .proc_handler = &proc_dointvec,
661 },
Luke Yang7a9166e2006-02-20 18:28:07 -0800662#endif
Martin Schwidefsky0152fb32006-01-14 13:21:00 -0800663#if defined(CONFIG_S390) && defined(CONFIG_SMP)
Martin Schwidefsky951f22d2005-07-27 11:44:57 -0700664 {
665 .ctl_name = KERN_SPIN_RETRY,
666 .procname = "spin_retry",
667 .data = &spin_retry,
668 .maxlen = sizeof (int),
669 .mode = 0644,
670 .proc_handler = &proc_dointvec,
671 },
672#endif
Pavel Machekc255d842006-02-20 18:27:58 -0800673#ifdef CONFIG_ACPI_SLEEP
674 {
675 .ctl_name = KERN_ACPI_VIDEO_FLAGS,
676 .procname = "acpi_video_flags",
677 .data = &acpi_video_flags,
678 .maxlen = sizeof (unsigned long),
679 .mode = 0644,
Stefan Seyfried7f99f062006-03-02 02:54:34 -0800680 .proc_handler = &proc_doulongvec_minmax,
Pavel Machekc255d842006-02-20 18:27:58 -0800681 },
682#endif
Jes Sorensend2b176e2006-02-28 09:42:23 -0800683#ifdef CONFIG_IA64
684 {
685 .ctl_name = KERN_IA64_UNALIGNED,
686 .procname = "ignore-unaligned-usertrap",
687 .data = &no_unaligned_warning,
688 .maxlen = sizeof (int),
689 .mode = 0644,
690 .proc_handler = &proc_dointvec,
691 },
692#endif
Andi Kleenbebfa102006-06-26 13:56:52 +0200693#ifdef CONFIG_COMPAT
694 {
695 .ctl_name = KERN_COMPAT_LOG,
696 .procname = "compat-log",
697 .data = &compat_log,
698 .maxlen = sizeof (int),
699 .mode = 0644,
700 .proc_handler = &proc_dointvec,
701 },
702#endif
Ingo Molnar23f78d4a2006-06-27 02:54:53 -0700703#ifdef CONFIG_RT_MUTEXES
704 {
705 .ctl_name = KERN_MAX_LOCK_DEPTH,
706 .procname = "max_lock_depth",
707 .data = &max_lock_depth,
708 .maxlen = sizeof(int),
709 .mode = 0644,
710 .proc_handler = &proc_dointvec,
711 },
712#endif
713
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714 { .ctl_name = 0 }
715};
716
717/* Constants for minimum and maximum testing in vm_table.
718 We use these as one-element integer vectors. */
719static int zero;
720static int one_hundred = 100;
721
722
723static ctl_table vm_table[] = {
724 {
725 .ctl_name = VM_OVERCOMMIT_MEMORY,
726 .procname = "overcommit_memory",
727 .data = &sysctl_overcommit_memory,
728 .maxlen = sizeof(sysctl_overcommit_memory),
729 .mode = 0644,
730 .proc_handler = &proc_dointvec,
731 },
732 {
KAMEZAWA Hiroyukifadd8fb2006-06-23 02:03:13 -0700733 .ctl_name = VM_PANIC_ON_OOM,
734 .procname = "panic_on_oom",
735 .data = &sysctl_panic_on_oom,
736 .maxlen = sizeof(sysctl_panic_on_oom),
737 .mode = 0644,
738 .proc_handler = &proc_dointvec,
739 },
740 {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700741 .ctl_name = VM_OVERCOMMIT_RATIO,
742 .procname = "overcommit_ratio",
743 .data = &sysctl_overcommit_ratio,
744 .maxlen = sizeof(sysctl_overcommit_ratio),
745 .mode = 0644,
746 .proc_handler = &proc_dointvec,
747 },
748 {
749 .ctl_name = VM_PAGE_CLUSTER,
750 .procname = "page-cluster",
751 .data = &page_cluster,
752 .maxlen = sizeof(int),
753 .mode = 0644,
754 .proc_handler = &proc_dointvec,
755 },
756 {
757 .ctl_name = VM_DIRTY_BACKGROUND,
758 .procname = "dirty_background_ratio",
759 .data = &dirty_background_ratio,
760 .maxlen = sizeof(dirty_background_ratio),
761 .mode = 0644,
762 .proc_handler = &proc_dointvec_minmax,
763 .strategy = &sysctl_intvec,
764 .extra1 = &zero,
765 .extra2 = &one_hundred,
766 },
767 {
768 .ctl_name = VM_DIRTY_RATIO,
769 .procname = "dirty_ratio",
770 .data = &vm_dirty_ratio,
771 .maxlen = sizeof(vm_dirty_ratio),
772 .mode = 0644,
773 .proc_handler = &proc_dointvec_minmax,
774 .strategy = &sysctl_intvec,
775 .extra1 = &zero,
776 .extra2 = &one_hundred,
777 },
778 {
779 .ctl_name = VM_DIRTY_WB_CS,
780 .procname = "dirty_writeback_centisecs",
Bart Samwelf6ef9432006-03-24 03:15:48 -0800781 .data = &dirty_writeback_interval,
782 .maxlen = sizeof(dirty_writeback_interval),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783 .mode = 0644,
784 .proc_handler = &dirty_writeback_centisecs_handler,
785 },
786 {
787 .ctl_name = VM_DIRTY_EXPIRE_CS,
788 .procname = "dirty_expire_centisecs",
Bart Samwelf6ef9432006-03-24 03:15:48 -0800789 .data = &dirty_expire_interval,
790 .maxlen = sizeof(dirty_expire_interval),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700791 .mode = 0644,
Bart Samwelf6ef9432006-03-24 03:15:48 -0800792 .proc_handler = &proc_dointvec_userhz_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700793 },
794 {
795 .ctl_name = VM_NR_PDFLUSH_THREADS,
796 .procname = "nr_pdflush_threads",
797 .data = &nr_pdflush_threads,
798 .maxlen = sizeof nr_pdflush_threads,
799 .mode = 0444 /* read-only*/,
800 .proc_handler = &proc_dointvec,
801 },
802 {
803 .ctl_name = VM_SWAPPINESS,
804 .procname = "swappiness",
805 .data = &vm_swappiness,
806 .maxlen = sizeof(vm_swappiness),
807 .mode = 0644,
808 .proc_handler = &proc_dointvec_minmax,
809 .strategy = &sysctl_intvec,
810 .extra1 = &zero,
811 .extra2 = &one_hundred,
812 },
813#ifdef CONFIG_HUGETLB_PAGE
814 {
815 .ctl_name = VM_HUGETLB_PAGES,
816 .procname = "nr_hugepages",
817 .data = &max_huge_pages,
818 .maxlen = sizeof(unsigned long),
819 .mode = 0644,
820 .proc_handler = &hugetlb_sysctl_handler,
821 .extra1 = (void *)&hugetlb_zero,
822 .extra2 = (void *)&hugetlb_infinity,
823 },
824 {
825 .ctl_name = VM_HUGETLB_GROUP,
826 .procname = "hugetlb_shm_group",
827 .data = &sysctl_hugetlb_shm_group,
828 .maxlen = sizeof(gid_t),
829 .mode = 0644,
830 .proc_handler = &proc_dointvec,
831 },
832#endif
833 {
834 .ctl_name = VM_LOWMEM_RESERVE_RATIO,
835 .procname = "lowmem_reserve_ratio",
836 .data = &sysctl_lowmem_reserve_ratio,
837 .maxlen = sizeof(sysctl_lowmem_reserve_ratio),
838 .mode = 0644,
839 .proc_handler = &lowmem_reserve_ratio_sysctl_handler,
840 .strategy = &sysctl_intvec,
841 },
842 {
Andrew Morton9d0243b2006-01-08 01:00:39 -0800843 .ctl_name = VM_DROP_PAGECACHE,
844 .procname = "drop_caches",
845 .data = &sysctl_drop_caches,
846 .maxlen = sizeof(int),
847 .mode = 0644,
848 .proc_handler = drop_caches_sysctl_handler,
849 .strategy = &sysctl_intvec,
850 },
851 {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700852 .ctl_name = VM_MIN_FREE_KBYTES,
853 .procname = "min_free_kbytes",
854 .data = &min_free_kbytes,
855 .maxlen = sizeof(min_free_kbytes),
856 .mode = 0644,
857 .proc_handler = &min_free_kbytes_sysctl_handler,
858 .strategy = &sysctl_intvec,
859 .extra1 = &zero,
860 },
Rohit Seth8ad4b1f2006-01-08 01:00:40 -0800861 {
862 .ctl_name = VM_PERCPU_PAGELIST_FRACTION,
863 .procname = "percpu_pagelist_fraction",
864 .data = &percpu_pagelist_fraction,
865 .maxlen = sizeof(percpu_pagelist_fraction),
866 .mode = 0644,
867 .proc_handler = &percpu_pagelist_fraction_sysctl_handler,
868 .strategy = &sysctl_intvec,
869 .extra1 = &min_percpu_pagelist_fract,
870 },
Linus Torvalds1da177e2005-04-16 15:20:36 -0700871#ifdef CONFIG_MMU
872 {
873 .ctl_name = VM_MAX_MAP_COUNT,
874 .procname = "max_map_count",
875 .data = &sysctl_max_map_count,
876 .maxlen = sizeof(sysctl_max_map_count),
877 .mode = 0644,
878 .proc_handler = &proc_dointvec
879 },
880#endif
881 {
882 .ctl_name = VM_LAPTOP_MODE,
883 .procname = "laptop_mode",
884 .data = &laptop_mode,
885 .maxlen = sizeof(laptop_mode),
886 .mode = 0644,
Bart Samweled5b43f2006-03-24 03:15:49 -0800887 .proc_handler = &proc_dointvec_jiffies,
888 .strategy = &sysctl_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700889 },
890 {
891 .ctl_name = VM_BLOCK_DUMP,
892 .procname = "block_dump",
893 .data = &block_dump,
894 .maxlen = sizeof(block_dump),
895 .mode = 0644,
896 .proc_handler = &proc_dointvec,
897 .strategy = &sysctl_intvec,
898 .extra1 = &zero,
899 },
900 {
901 .ctl_name = VM_VFS_CACHE_PRESSURE,
902 .procname = "vfs_cache_pressure",
903 .data = &sysctl_vfs_cache_pressure,
904 .maxlen = sizeof(sysctl_vfs_cache_pressure),
905 .mode = 0644,
906 .proc_handler = &proc_dointvec,
907 .strategy = &sysctl_intvec,
908 .extra1 = &zero,
909 },
910#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
911 {
912 .ctl_name = VM_LEGACY_VA_LAYOUT,
913 .procname = "legacy_va_layout",
914 .data = &sysctl_legacy_va_layout,
915 .maxlen = sizeof(sysctl_legacy_va_layout),
916 .mode = 0644,
917 .proc_handler = &proc_dointvec,
918 .strategy = &sysctl_intvec,
919 .extra1 = &zero,
920 },
921#endif
922#ifdef CONFIG_SWAP
923 {
924 .ctl_name = VM_SWAP_TOKEN_TIMEOUT,
925 .procname = "swap_token_timeout",
926 .data = &swap_token_default_timeout,
927 .maxlen = sizeof(swap_token_default_timeout),
928 .mode = 0644,
929 .proc_handler = &proc_dointvec_jiffies,
930 .strategy = &sysctl_jiffies,
931 },
932#endif
Christoph Lameter17436602006-01-18 17:42:32 -0800933#ifdef CONFIG_NUMA
934 {
935 .ctl_name = VM_ZONE_RECLAIM_MODE,
936 .procname = "zone_reclaim_mode",
937 .data = &zone_reclaim_mode,
938 .maxlen = sizeof(zone_reclaim_mode),
939 .mode = 0644,
940 .proc_handler = &proc_dointvec,
Christoph Lameterc84db232006-02-01 03:05:29 -0800941 .strategy = &sysctl_intvec,
942 .extra1 = &zero,
Christoph Lameter17436602006-01-18 17:42:32 -0800943 },
Christoph Lameter96146342006-07-03 00:24:13 -0700944 {
945 .ctl_name = VM_MIN_UNMAPPED,
946 .procname = "min_unmapped_ratio",
947 .data = &sysctl_min_unmapped_ratio,
948 .maxlen = sizeof(sysctl_min_unmapped_ratio),
949 .mode = 0644,
950 .proc_handler = &sysctl_min_unmapped_ratio_sysctl_handler,
951 .strategy = &sysctl_intvec,
952 .extra1 = &zero,
953 .extra2 = &one_hundred,
954 },
Christoph Lameter17436602006-01-18 17:42:32 -0800955#endif
Ingo Molnare6e54942006-06-27 02:53:50 -0700956#ifdef CONFIG_X86_32
957 {
958 .ctl_name = VM_VDSO_ENABLED,
959 .procname = "vdso_enabled",
960 .data = &vdso_enabled,
961 .maxlen = sizeof(vdso_enabled),
962 .mode = 0644,
963 .proc_handler = &proc_dointvec,
964 .strategy = &sysctl_intvec,
965 .extra1 = &zero,
966 },
967#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968 { .ctl_name = 0 }
969};
970
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971static ctl_table fs_table[] = {
972 {
973 .ctl_name = FS_NRINODE,
974 .procname = "inode-nr",
975 .data = &inodes_stat,
976 .maxlen = 2*sizeof(int),
977 .mode = 0444,
978 .proc_handler = &proc_dointvec,
979 },
980 {
981 .ctl_name = FS_STATINODE,
982 .procname = "inode-state",
983 .data = &inodes_stat,
984 .maxlen = 7*sizeof(int),
985 .mode = 0444,
986 .proc_handler = &proc_dointvec,
987 },
988 {
989 .ctl_name = FS_NRFILE,
990 .procname = "file-nr",
991 .data = &files_stat,
992 .maxlen = 3*sizeof(int),
993 .mode = 0444,
Dipankar Sarma529bf6b2006-03-07 21:55:35 -0800994 .proc_handler = &proc_nr_files,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700995 },
996 {
997 .ctl_name = FS_MAXFILE,
998 .procname = "file-max",
999 .data = &files_stat.max_files,
1000 .maxlen = sizeof(int),
1001 .mode = 0644,
1002 .proc_handler = &proc_dointvec,
1003 },
1004 {
1005 .ctl_name = FS_DENTRY,
1006 .procname = "dentry-state",
1007 .data = &dentry_stat,
1008 .maxlen = 6*sizeof(int),
1009 .mode = 0444,
1010 .proc_handler = &proc_dointvec,
1011 },
1012 {
1013 .ctl_name = FS_OVERFLOWUID,
1014 .procname = "overflowuid",
1015 .data = &fs_overflowuid,
1016 .maxlen = sizeof(int),
1017 .mode = 0644,
1018 .proc_handler = &proc_dointvec_minmax,
1019 .strategy = &sysctl_intvec,
1020 .extra1 = &minolduid,
1021 .extra2 = &maxolduid,
1022 },
1023 {
1024 .ctl_name = FS_OVERFLOWGID,
1025 .procname = "overflowgid",
1026 .data = &fs_overflowgid,
1027 .maxlen = sizeof(int),
1028 .mode = 0644,
1029 .proc_handler = &proc_dointvec_minmax,
1030 .strategy = &sysctl_intvec,
1031 .extra1 = &minolduid,
1032 .extra2 = &maxolduid,
1033 },
1034 {
1035 .ctl_name = FS_LEASES,
1036 .procname = "leases-enable",
1037 .data = &leases_enable,
1038 .maxlen = sizeof(int),
1039 .mode = 0644,
1040 .proc_handler = &proc_dointvec,
1041 },
1042#ifdef CONFIG_DNOTIFY
1043 {
1044 .ctl_name = FS_DIR_NOTIFY,
1045 .procname = "dir-notify-enable",
1046 .data = &dir_notify_enable,
1047 .maxlen = sizeof(int),
1048 .mode = 0644,
1049 .proc_handler = &proc_dointvec,
1050 },
1051#endif
1052#ifdef CONFIG_MMU
1053 {
1054 .ctl_name = FS_LEASE_TIME,
1055 .procname = "lease-break-time",
1056 .data = &lease_break_time,
1057 .maxlen = sizeof(int),
1058 .mode = 0644,
1059 .proc_handler = &proc_dointvec,
1060 },
1061 {
1062 .ctl_name = FS_AIO_NR,
1063 .procname = "aio-nr",
1064 .data = &aio_nr,
1065 .maxlen = sizeof(aio_nr),
1066 .mode = 0444,
Zach Brownd55b5fd2005-11-07 00:59:31 -08001067 .proc_handler = &proc_doulongvec_minmax,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001068 },
1069 {
1070 .ctl_name = FS_AIO_MAX_NR,
1071 .procname = "aio-max-nr",
1072 .data = &aio_max_nr,
1073 .maxlen = sizeof(aio_max_nr),
1074 .mode = 0644,
Zach Brownd55b5fd2005-11-07 00:59:31 -08001075 .proc_handler = &proc_doulongvec_minmax,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001076 },
Amy Griffis2d9048e2006-06-01 13:10:59 -07001077#ifdef CONFIG_INOTIFY_USER
Robert Love0399cb02005-07-13 12:38:18 -04001078 {
1079 .ctl_name = FS_INOTIFY,
1080 .procname = "inotify",
1081 .mode = 0555,
1082 .child = inotify_table,
1083 },
1084#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001085#endif
Alan Coxd6e71142005-06-23 00:09:43 -07001086 {
1087 .ctl_name = KERN_SETUID_DUMPABLE,
1088 .procname = "suid_dumpable",
1089 .data = &suid_dumpable,
1090 .maxlen = sizeof(int),
1091 .mode = 0644,
1092 .proc_handler = &proc_dointvec,
1093 },
Linus Torvalds1da177e2005-04-16 15:20:36 -07001094 { .ctl_name = 0 }
1095};
1096
1097static ctl_table debug_table[] = {
1098 { .ctl_name = 0 }
1099};
1100
1101static ctl_table dev_table[] = {
1102 { .ctl_name = 0 }
Robert Love0eeca282005-07-12 17:06:03 -04001103};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104
1105extern void init_irq_proc (void);
1106
Al Viro330d57f2005-11-04 10:18:40 +00001107static DEFINE_SPINLOCK(sysctl_lock);
1108
1109/* called under sysctl_lock */
1110static int use_table(struct ctl_table_header *p)
1111{
1112 if (unlikely(p->unregistering))
1113 return 0;
1114 p->used++;
1115 return 1;
1116}
1117
1118/* called under sysctl_lock */
1119static void unuse_table(struct ctl_table_header *p)
1120{
1121 if (!--p->used)
1122 if (unlikely(p->unregistering))
1123 complete(p->unregistering);
1124}
1125
1126/* called under sysctl_lock, will reacquire if has to wait */
1127static void start_unregistering(struct ctl_table_header *p)
1128{
1129 /*
1130 * if p->used is 0, nobody will ever touch that entry again;
1131 * we'll eliminate all paths to it before dropping sysctl_lock
1132 */
1133 if (unlikely(p->used)) {
1134 struct completion wait;
1135 init_completion(&wait);
1136 p->unregistering = &wait;
1137 spin_unlock(&sysctl_lock);
1138 wait_for_completion(&wait);
1139 spin_lock(&sysctl_lock);
1140 }
1141 /*
1142 * do not remove from the list until nobody holds it; walking the
1143 * list in do_sysctl() relies on that.
1144 */
1145 list_del_init(&p->ctl_entry);
1146}
1147
Linus Torvalds1da177e2005-04-16 15:20:36 -07001148void __init sysctl_init(void)
1149{
1150#ifdef CONFIG_PROC_FS
Al Viro330d57f2005-11-04 10:18:40 +00001151 register_proc_table(root_table, proc_sys_root, &root_table_header);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001152 init_irq_proc();
1153#endif
1154}
1155
1156int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
1157 void __user *newval, size_t newlen)
1158{
1159 struct list_head *tmp;
Al Viro330d57f2005-11-04 10:18:40 +00001160 int error = -ENOTDIR;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001161
1162 if (nlen <= 0 || nlen >= CTL_MAXNAME)
1163 return -ENOTDIR;
1164 if (oldval) {
1165 int old_len;
1166 if (!oldlenp || get_user(old_len, oldlenp))
1167 return -EFAULT;
1168 }
Al Viro330d57f2005-11-04 10:18:40 +00001169 spin_lock(&sysctl_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001170 tmp = &root_table_header.ctl_entry;
1171 do {
1172 struct ctl_table_header *head =
1173 list_entry(tmp, struct ctl_table_header, ctl_entry);
1174 void *context = NULL;
Al Viro330d57f2005-11-04 10:18:40 +00001175
1176 if (!use_table(head))
1177 continue;
1178
1179 spin_unlock(&sysctl_lock);
1180
1181 error = parse_table(name, nlen, oldval, oldlenp,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001182 newval, newlen, head->ctl_table,
1183 &context);
Jesper Juhl5a6b4542005-06-25 14:58:48 -07001184 kfree(context);
Al Viro330d57f2005-11-04 10:18:40 +00001185
1186 spin_lock(&sysctl_lock);
1187 unuse_table(head);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001188 if (error != -ENOTDIR)
Al Viro330d57f2005-11-04 10:18:40 +00001189 break;
1190 } while ((tmp = tmp->next) != &root_table_header.ctl_entry);
1191 spin_unlock(&sysctl_lock);
1192 return error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001193}
1194
1195asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
1196{
1197 struct __sysctl_args tmp;
1198 int error;
1199
1200 if (copy_from_user(&tmp, args, sizeof(tmp)))
1201 return -EFAULT;
1202
1203 lock_kernel();
1204 error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
1205 tmp.newval, tmp.newlen);
1206 unlock_kernel();
1207 return error;
1208}
1209
1210/*
1211 * ctl_perm does NOT grant the superuser all rights automatically, because
1212 * some sysctl variables are readonly even to root.
1213 */
1214
1215static int test_perm(int mode, int op)
1216{
1217 if (!current->euid)
1218 mode >>= 6;
1219 else if (in_egroup_p(0))
1220 mode >>= 3;
1221 if ((mode & op & 0007) == op)
1222 return 0;
1223 return -EACCES;
1224}
1225
1226static inline int ctl_perm(ctl_table *table, int op)
1227{
1228 int error;
1229 error = security_sysctl(table, op);
1230 if (error)
1231 return error;
1232 return test_perm(table->mode, op);
1233}
1234
1235static int parse_table(int __user *name, int nlen,
1236 void __user *oldval, size_t __user *oldlenp,
1237 void __user *newval, size_t newlen,
1238 ctl_table *table, void **context)
1239{
1240 int n;
1241repeat:
1242 if (!nlen)
1243 return -ENOTDIR;
1244 if (get_user(n, name))
1245 return -EFAULT;
1246 for ( ; table->ctl_name; table++) {
1247 if (n == table->ctl_name || table->ctl_name == CTL_ANY) {
1248 int error;
1249 if (table->child) {
1250 if (ctl_perm(table, 001))
1251 return -EPERM;
1252 if (table->strategy) {
1253 error = table->strategy(
1254 table, name, nlen,
1255 oldval, oldlenp,
1256 newval, newlen, context);
1257 if (error)
1258 return error;
1259 }
1260 name++;
1261 nlen--;
1262 table = table->child;
1263 goto repeat;
1264 }
1265 error = do_sysctl_strategy(table, name, nlen,
1266 oldval, oldlenp,
1267 newval, newlen, context);
1268 return error;
1269 }
1270 }
1271 return -ENOTDIR;
1272}
1273
1274/* Perform the actual read/write of a sysctl table entry. */
1275int do_sysctl_strategy (ctl_table *table,
1276 int __user *name, int nlen,
1277 void __user *oldval, size_t __user *oldlenp,
1278 void __user *newval, size_t newlen, void **context)
1279{
1280 int op = 0, rc;
1281 size_t len;
1282
1283 if (oldval)
1284 op |= 004;
1285 if (newval)
1286 op |= 002;
1287 if (ctl_perm(table, op))
1288 return -EPERM;
1289
1290 if (table->strategy) {
1291 rc = table->strategy(table, name, nlen, oldval, oldlenp,
1292 newval, newlen, context);
1293 if (rc < 0)
1294 return rc;
1295 if (rc > 0)
1296 return 0;
1297 }
1298
1299 /* If there is no strategy routine, or if the strategy returns
1300 * zero, proceed with automatic r/w */
1301 if (table->data && table->maxlen) {
1302 if (oldval && oldlenp) {
1303 if (get_user(len, oldlenp))
1304 return -EFAULT;
1305 if (len) {
1306 if (len > table->maxlen)
1307 len = table->maxlen;
1308 if(copy_to_user(oldval, table->data, len))
1309 return -EFAULT;
1310 if(put_user(len, oldlenp))
1311 return -EFAULT;
1312 }
1313 }
1314 if (newval && newlen) {
1315 len = newlen;
1316 if (len > table->maxlen)
1317 len = table->maxlen;
1318 if(copy_from_user(table->data, newval, len))
1319 return -EFAULT;
1320 }
1321 }
1322 return 0;
1323}
1324
1325/**
1326 * register_sysctl_table - register a sysctl hierarchy
1327 * @table: the top-level table structure
1328 * @insert_at_head: whether the entry should be inserted in front or at the end
1329 *
1330 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1331 * array. An entry with a ctl_name of 0 terminates the table.
1332 *
1333 * The members of the &ctl_table structure are used as follows:
1334 *
1335 * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
1336 * must be unique within that level of sysctl
1337 *
1338 * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1339 * enter a sysctl file
1340 *
1341 * data - a pointer to data for use by proc_handler
1342 *
1343 * maxlen - the maximum size in bytes of the data
1344 *
1345 * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1346 *
1347 * child - a pointer to the child sysctl table if this entry is a directory, or
1348 * %NULL.
1349 *
1350 * proc_handler - the text handler routine (described below)
1351 *
1352 * strategy - the strategy routine (described below)
1353 *
1354 * de - for internal use by the sysctl routines
1355 *
1356 * extra1, extra2 - extra pointers usable by the proc handler routines
1357 *
1358 * Leaf nodes in the sysctl tree will be represented by a single file
1359 * under /proc; non-leaf nodes will be represented by directories.
1360 *
1361 * sysctl(2) can automatically manage read and write requests through
1362 * the sysctl table. The data and maxlen fields of the ctl_table
1363 * struct enable minimal validation of the values being written to be
1364 * performed, and the mode field allows minimal authentication.
1365 *
1366 * More sophisticated management can be enabled by the provision of a
1367 * strategy routine with the table entry. This will be called before
1368 * any automatic read or write of the data is performed.
1369 *
1370 * The strategy routine may return
1371 *
1372 * < 0 - Error occurred (error is passed to user process)
1373 *
1374 * 0 - OK - proceed with automatic read or write.
1375 *
1376 * > 0 - OK - read or write has been done by the strategy routine, so
1377 * return immediately.
1378 *
1379 * There must be a proc_handler routine for any terminal nodes
1380 * mirrored under /proc/sys (non-terminals are handled by a built-in
1381 * directory handler). Several default handlers are available to
1382 * cover common cases -
1383 *
1384 * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1385 * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1386 * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1387 *
1388 * It is the handler's job to read the input buffer from user memory
1389 * and process it. The handler should return 0 on success.
1390 *
1391 * This routine returns %NULL on a failure to register, and a pointer
1392 * to the table header on success.
1393 */
1394struct ctl_table_header *register_sysctl_table(ctl_table * table,
1395 int insert_at_head)
1396{
1397 struct ctl_table_header *tmp;
1398 tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
1399 if (!tmp)
1400 return NULL;
1401 tmp->ctl_table = table;
1402 INIT_LIST_HEAD(&tmp->ctl_entry);
Al Viro330d57f2005-11-04 10:18:40 +00001403 tmp->used = 0;
1404 tmp->unregistering = NULL;
1405 spin_lock(&sysctl_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001406 if (insert_at_head)
1407 list_add(&tmp->ctl_entry, &root_table_header.ctl_entry);
1408 else
1409 list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
Al Viro330d57f2005-11-04 10:18:40 +00001410 spin_unlock(&sysctl_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001411#ifdef CONFIG_PROC_FS
Al Viro330d57f2005-11-04 10:18:40 +00001412 register_proc_table(table, proc_sys_root, tmp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001413#endif
1414 return tmp;
1415}
1416
1417/**
1418 * unregister_sysctl_table - unregister a sysctl table hierarchy
1419 * @header: the header returned from register_sysctl_table
1420 *
1421 * Unregisters the sysctl table and all children. proc entries may not
1422 * actually be removed until they are no longer used by anyone.
1423 */
1424void unregister_sysctl_table(struct ctl_table_header * header)
1425{
Al Viro330d57f2005-11-04 10:18:40 +00001426 might_sleep();
1427 spin_lock(&sysctl_lock);
1428 start_unregistering(header);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001429#ifdef CONFIG_PROC_FS
1430 unregister_proc_table(header->ctl_table, proc_sys_root);
1431#endif
Al Viro330d57f2005-11-04 10:18:40 +00001432 spin_unlock(&sysctl_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001433 kfree(header);
1434}
1435
1436/*
1437 * /proc/sys support
1438 */
1439
1440#ifdef CONFIG_PROC_FS
1441
1442/* Scan the sysctl entries in table and add them all into /proc */
Al Viro330d57f2005-11-04 10:18:40 +00001443static void register_proc_table(ctl_table * table, struct proc_dir_entry *root, void *set)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001444{
1445 struct proc_dir_entry *de;
1446 int len;
1447 mode_t mode;
1448
1449 for (; table->ctl_name; table++) {
1450 /* Can't do anything without a proc name. */
1451 if (!table->procname)
1452 continue;
1453 /* Maybe we can't do anything with it... */
1454 if (!table->proc_handler && !table->child) {
1455 printk(KERN_WARNING "SYSCTL: Can't register %s\n",
1456 table->procname);
1457 continue;
1458 }
1459
1460 len = strlen(table->procname);
1461 mode = table->mode;
1462
1463 de = NULL;
1464 if (table->proc_handler)
1465 mode |= S_IFREG;
1466 else {
1467 mode |= S_IFDIR;
1468 for (de = root->subdir; de; de = de->next) {
1469 if (proc_match(len, table->procname, de))
1470 break;
1471 }
1472 /* If the subdir exists already, de is non-NULL */
1473 }
1474
1475 if (!de) {
1476 de = create_proc_entry(table->procname, mode, root);
1477 if (!de)
1478 continue;
Al Viro330d57f2005-11-04 10:18:40 +00001479 de->set = set;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001480 de->data = (void *) table;
1481 if (table->proc_handler)
1482 de->proc_fops = &proc_sys_file_operations;
1483 }
1484 table->de = de;
1485 if (de->mode & S_IFDIR)
Al Viro330d57f2005-11-04 10:18:40 +00001486 register_proc_table(table->child, de, set);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487 }
1488}
1489
1490/*
1491 * Unregister a /proc sysctl table and any subdirectories.
1492 */
1493static void unregister_proc_table(ctl_table * table, struct proc_dir_entry *root)
1494{
1495 struct proc_dir_entry *de;
1496 for (; table->ctl_name; table++) {
1497 if (!(de = table->de))
1498 continue;
1499 if (de->mode & S_IFDIR) {
1500 if (!table->child) {
1501 printk (KERN_ALERT "Help - malformed sysctl tree on free\n");
1502 continue;
1503 }
1504 unregister_proc_table(table->child, de);
1505
1506 /* Don't unregister directories which still have entries.. */
1507 if (de->subdir)
1508 continue;
1509 }
1510
Al Viro330d57f2005-11-04 10:18:40 +00001511 /*
1512 * In any case, mark the entry as goner; we'll keep it
1513 * around if it's busy, but we'll know to do nothing with
1514 * its fields. We are under sysctl_lock here.
1515 */
1516 de->data = NULL;
1517
Linus Torvalds1da177e2005-04-16 15:20:36 -07001518 /* Don't unregister proc entries that are still being used.. */
1519 if (atomic_read(&de->count))
1520 continue;
1521
1522 table->de = NULL;
1523 remove_proc_entry(table->procname, root);
1524 }
1525}
1526
1527static ssize_t do_rw_proc(int write, struct file * file, char __user * buf,
1528 size_t count, loff_t *ppos)
1529{
1530 int op;
Al Viro330d57f2005-11-04 10:18:40 +00001531 struct proc_dir_entry *de = PDE(file->f_dentry->d_inode);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532 struct ctl_table *table;
1533 size_t res;
Al Viro330d57f2005-11-04 10:18:40 +00001534 ssize_t error = -ENOTDIR;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001535
Al Viro330d57f2005-11-04 10:18:40 +00001536 spin_lock(&sysctl_lock);
1537 if (de && de->data && use_table(de->set)) {
1538 /*
1539 * at that point we know that sysctl was not unregistered
1540 * and won't be until we finish
1541 */
1542 spin_unlock(&sysctl_lock);
1543 table = (struct ctl_table *) de->data;
1544 if (!table || !table->proc_handler)
1545 goto out;
1546 error = -EPERM;
1547 op = (write ? 002 : 004);
1548 if (ctl_perm(table, op))
1549 goto out;
1550
1551 /* careful: calling conventions are nasty here */
1552 res = count;
1553 error = (*table->proc_handler)(table, write, file,
1554 buf, &res, ppos);
1555 if (!error)
1556 error = res;
1557 out:
1558 spin_lock(&sysctl_lock);
1559 unuse_table(de->set);
1560 }
1561 spin_unlock(&sysctl_lock);
1562 return error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001563}
1564
1565static int proc_opensys(struct inode *inode, struct file *file)
1566{
1567 if (file->f_mode & FMODE_WRITE) {
1568 /*
1569 * sysctl entries that are not writable,
1570 * are _NOT_ writable, capabilities or not.
1571 */
1572 if (!(inode->i_mode & S_IWUSR))
1573 return -EPERM;
1574 }
1575
1576 return 0;
1577}
1578
1579static ssize_t proc_readsys(struct file * file, char __user * buf,
1580 size_t count, loff_t *ppos)
1581{
1582 return do_rw_proc(0, file, buf, count, ppos);
1583}
1584
1585static ssize_t proc_writesys(struct file * file, const char __user * buf,
1586 size_t count, loff_t *ppos)
1587{
1588 return do_rw_proc(1, file, (char __user *) buf, count, ppos);
1589}
1590
1591/**
1592 * proc_dostring - read a string sysctl
1593 * @table: the sysctl table
1594 * @write: %TRUE if this is a write to the sysctl file
1595 * @filp: the file structure
1596 * @buffer: the user buffer
1597 * @lenp: the size of the user buffer
1598 * @ppos: file position
1599 *
1600 * Reads/writes a string from/to the user buffer. If the kernel
1601 * buffer provided is not large enough to hold the string, the
1602 * string is truncated. The copied string is %NULL-terminated.
1603 * If the string is being read by the user process, it is copied
1604 * and a newline '\n' is added. It is truncated if the buffer is
1605 * not large enough.
1606 *
1607 * Returns 0 on success.
1608 */
1609int proc_dostring(ctl_table *table, int write, struct file *filp,
1610 void __user *buffer, size_t *lenp, loff_t *ppos)
1611{
1612 size_t len;
1613 char __user *p;
1614 char c;
1615
1616 if (!table->data || !table->maxlen || !*lenp ||
1617 (*ppos && !write)) {
1618 *lenp = 0;
1619 return 0;
1620 }
1621
1622 if (write) {
1623 len = 0;
1624 p = buffer;
1625 while (len < *lenp) {
1626 if (get_user(c, p++))
1627 return -EFAULT;
1628 if (c == 0 || c == '\n')
1629 break;
1630 len++;
1631 }
1632 if (len >= table->maxlen)
1633 len = table->maxlen-1;
1634 if(copy_from_user(table->data, buffer, len))
1635 return -EFAULT;
1636 ((char *) table->data)[len] = 0;
1637 *ppos += *lenp;
1638 } else {
1639 len = strlen(table->data);
1640 if (len > table->maxlen)
1641 len = table->maxlen;
1642 if (len > *lenp)
1643 len = *lenp;
1644 if (len)
1645 if(copy_to_user(buffer, table->data, len))
1646 return -EFAULT;
1647 if (len < *lenp) {
1648 if(put_user('\n', ((char __user *) buffer) + len))
1649 return -EFAULT;
1650 len++;
1651 }
1652 *lenp = len;
1653 *ppos += len;
1654 }
1655 return 0;
1656}
1657
1658/*
1659 * Special case of dostring for the UTS structure. This has locks
1660 * to observe. Should this be in kernel/sys.c ????
1661 */
1662
1663static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
1664 void __user *buffer, size_t *lenp, loff_t *ppos)
1665{
1666 int r;
1667
1668 if (!write) {
1669 down_read(&uts_sem);
1670 r=proc_dostring(table,0,filp,buffer,lenp, ppos);
1671 up_read(&uts_sem);
1672 } else {
1673 down_write(&uts_sem);
1674 r=proc_dostring(table,1,filp,buffer,lenp, ppos);
1675 up_write(&uts_sem);
1676 }
1677 return r;
1678}
1679
1680static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
1681 int *valp,
1682 int write, void *data)
1683{
1684 if (write) {
1685 *valp = *negp ? -*lvalp : *lvalp;
1686 } else {
1687 int val = *valp;
1688 if (val < 0) {
1689 *negp = -1;
1690 *lvalp = (unsigned long)-val;
1691 } else {
1692 *negp = 0;
1693 *lvalp = (unsigned long)val;
1694 }
1695 }
1696 return 0;
1697}
1698
1699static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
1700 void __user *buffer, size_t *lenp, loff_t *ppos,
1701 int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1702 int write, void *data),
1703 void *data)
1704{
1705#define TMPBUFLEN 21
1706 int *i, vleft, first=1, neg, val;
1707 unsigned long lval;
1708 size_t left, len;
1709
1710 char buf[TMPBUFLEN], *p;
1711 char __user *s = buffer;
1712
1713 if (!table->data || !table->maxlen || !*lenp ||
1714 (*ppos && !write)) {
1715 *lenp = 0;
1716 return 0;
1717 }
1718
1719 i = (int *) table->data;
1720 vleft = table->maxlen / sizeof(*i);
1721 left = *lenp;
1722
1723 if (!conv)
1724 conv = do_proc_dointvec_conv;
1725
1726 for (; left && vleft--; i++, first=0) {
1727 if (write) {
1728 while (left) {
1729 char c;
1730 if (get_user(c, s))
1731 return -EFAULT;
1732 if (!isspace(c))
1733 break;
1734 left--;
1735 s++;
1736 }
1737 if (!left)
1738 break;
1739 neg = 0;
1740 len = left;
1741 if (len > sizeof(buf) - 1)
1742 len = sizeof(buf) - 1;
1743 if (copy_from_user(buf, s, len))
1744 return -EFAULT;
1745 buf[len] = 0;
1746 p = buf;
1747 if (*p == '-' && left > 1) {
1748 neg = 1;
1749 left--, p++;
1750 }
1751 if (*p < '0' || *p > '9')
1752 break;
1753
1754 lval = simple_strtoul(p, &p, 0);
1755
1756 len = p-buf;
1757 if ((len < left) && *p && !isspace(*p))
1758 break;
1759 if (neg)
1760 val = -val;
1761 s += len;
1762 left -= len;
1763
1764 if (conv(&neg, &lval, i, 1, data))
1765 break;
1766 } else {
1767 p = buf;
1768 if (!first)
1769 *p++ = '\t';
1770
1771 if (conv(&neg, &lval, i, 0, data))
1772 break;
1773
1774 sprintf(p, "%s%lu", neg ? "-" : "", lval);
1775 len = strlen(buf);
1776 if (len > left)
1777 len = left;
1778 if(copy_to_user(s, buf, len))
1779 return -EFAULT;
1780 left -= len;
1781 s += len;
1782 }
1783 }
1784
1785 if (!write && !first && left) {
1786 if(put_user('\n', s))
1787 return -EFAULT;
1788 left--, s++;
1789 }
1790 if (write) {
1791 while (left) {
1792 char c;
1793 if (get_user(c, s++))
1794 return -EFAULT;
1795 if (!isspace(c))
1796 break;
1797 left--;
1798 }
1799 }
1800 if (write && first)
1801 return -EINVAL;
1802 *lenp -= left;
1803 *ppos += *lenp;
1804 return 0;
1805#undef TMPBUFLEN
1806}
1807
1808/**
1809 * proc_dointvec - read a vector of integers
1810 * @table: the sysctl table
1811 * @write: %TRUE if this is a write to the sysctl file
1812 * @filp: the file structure
1813 * @buffer: the user buffer
1814 * @lenp: the size of the user buffer
1815 * @ppos: file position
1816 *
1817 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1818 * values from/to the user buffer, treated as an ASCII string.
1819 *
1820 * Returns 0 on success.
1821 */
1822int proc_dointvec(ctl_table *table, int write, struct file *filp,
1823 void __user *buffer, size_t *lenp, loff_t *ppos)
1824{
1825 return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1826 NULL,NULL);
1827}
1828
1829#define OP_SET 0
1830#define OP_AND 1
1831#define OP_OR 2
1832#define OP_MAX 3
1833#define OP_MIN 4
1834
1835static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
1836 int *valp,
1837 int write, void *data)
1838{
1839 int op = *(int *)data;
1840 if (write) {
1841 int val = *negp ? -*lvalp : *lvalp;
1842 switch(op) {
1843 case OP_SET: *valp = val; break;
1844 case OP_AND: *valp &= val; break;
1845 case OP_OR: *valp |= val; break;
1846 case OP_MAX: if(*valp < val)
1847 *valp = val;
1848 break;
1849 case OP_MIN: if(*valp > val)
1850 *valp = val;
1851 break;
1852 }
1853 } else {
1854 int val = *valp;
1855 if (val < 0) {
1856 *negp = -1;
1857 *lvalp = (unsigned long)-val;
1858 } else {
1859 *negp = 0;
1860 *lvalp = (unsigned long)val;
1861 }
1862 }
1863 return 0;
1864}
1865
1866/*
1867 * init may raise the set.
1868 */
1869
1870int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
1871 void __user *buffer, size_t *lenp, loff_t *ppos)
1872{
1873 int op;
1874
1875 if (!capable(CAP_SYS_MODULE)) {
1876 return -EPERM;
1877 }
1878
1879 op = (current->pid == 1) ? OP_SET : OP_AND;
1880 return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1881 do_proc_dointvec_bset_conv,&op);
1882}
1883
1884struct do_proc_dointvec_minmax_conv_param {
1885 int *min;
1886 int *max;
1887};
1888
1889static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
1890 int *valp,
1891 int write, void *data)
1892{
1893 struct do_proc_dointvec_minmax_conv_param *param = data;
1894 if (write) {
1895 int val = *negp ? -*lvalp : *lvalp;
1896 if ((param->min && *param->min > val) ||
1897 (param->max && *param->max < val))
1898 return -EINVAL;
1899 *valp = val;
1900 } else {
1901 int val = *valp;
1902 if (val < 0) {
1903 *negp = -1;
1904 *lvalp = (unsigned long)-val;
1905 } else {
1906 *negp = 0;
1907 *lvalp = (unsigned long)val;
1908 }
1909 }
1910 return 0;
1911}
1912
1913/**
1914 * proc_dointvec_minmax - read a vector of integers with min/max values
1915 * @table: the sysctl table
1916 * @write: %TRUE if this is a write to the sysctl file
1917 * @filp: the file structure
1918 * @buffer: the user buffer
1919 * @lenp: the size of the user buffer
1920 * @ppos: file position
1921 *
1922 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1923 * values from/to the user buffer, treated as an ASCII string.
1924 *
1925 * This routine will ensure the values are within the range specified by
1926 * table->extra1 (min) and table->extra2 (max).
1927 *
1928 * Returns 0 on success.
1929 */
1930int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
1931 void __user *buffer, size_t *lenp, loff_t *ppos)
1932{
1933 struct do_proc_dointvec_minmax_conv_param param = {
1934 .min = (int *) table->extra1,
1935 .max = (int *) table->extra2,
1936 };
1937 return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
1938 do_proc_dointvec_minmax_conv, &param);
1939}
1940
1941static int do_proc_doulongvec_minmax(ctl_table *table, int write,
1942 struct file *filp,
1943 void __user *buffer,
1944 size_t *lenp, loff_t *ppos,
1945 unsigned long convmul,
1946 unsigned long convdiv)
1947{
1948#define TMPBUFLEN 21
1949 unsigned long *i, *min, *max, val;
1950 int vleft, first=1, neg;
1951 size_t len, left;
1952 char buf[TMPBUFLEN], *p;
1953 char __user *s = buffer;
1954
1955 if (!table->data || !table->maxlen || !*lenp ||
1956 (*ppos && !write)) {
1957 *lenp = 0;
1958 return 0;
1959 }
1960
1961 i = (unsigned long *) table->data;
1962 min = (unsigned long *) table->extra1;
1963 max = (unsigned long *) table->extra2;
1964 vleft = table->maxlen / sizeof(unsigned long);
1965 left = *lenp;
1966
1967 for (; left && vleft--; i++, min++, max++, first=0) {
1968 if (write) {
1969 while (left) {
1970 char c;
1971 if (get_user(c, s))
1972 return -EFAULT;
1973 if (!isspace(c))
1974 break;
1975 left--;
1976 s++;
1977 }
1978 if (!left)
1979 break;
1980 neg = 0;
1981 len = left;
1982 if (len > TMPBUFLEN-1)
1983 len = TMPBUFLEN-1;
1984 if (copy_from_user(buf, s, len))
1985 return -EFAULT;
1986 buf[len] = 0;
1987 p = buf;
1988 if (*p == '-' && left > 1) {
1989 neg = 1;
1990 left--, p++;
1991 }
1992 if (*p < '0' || *p > '9')
1993 break;
1994 val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
1995 len = p-buf;
1996 if ((len < left) && *p && !isspace(*p))
1997 break;
1998 if (neg)
1999 val = -val;
2000 s += len;
2001 left -= len;
2002
2003 if(neg)
2004 continue;
2005 if ((min && val < *min) || (max && val > *max))
2006 continue;
2007 *i = val;
2008 } else {
2009 p = buf;
2010 if (!first)
2011 *p++ = '\t';
2012 sprintf(p, "%lu", convdiv * (*i) / convmul);
2013 len = strlen(buf);
2014 if (len > left)
2015 len = left;
2016 if(copy_to_user(s, buf, len))
2017 return -EFAULT;
2018 left -= len;
2019 s += len;
2020 }
2021 }
2022
2023 if (!write && !first && left) {
2024 if(put_user('\n', s))
2025 return -EFAULT;
2026 left--, s++;
2027 }
2028 if (write) {
2029 while (left) {
2030 char c;
2031 if (get_user(c, s++))
2032 return -EFAULT;
2033 if (!isspace(c))
2034 break;
2035 left--;
2036 }
2037 }
2038 if (write && first)
2039 return -EINVAL;
2040 *lenp -= left;
2041 *ppos += *lenp;
2042 return 0;
2043#undef TMPBUFLEN
2044}
2045
2046/**
2047 * proc_doulongvec_minmax - read a vector of long integers with min/max values
2048 * @table: the sysctl table
2049 * @write: %TRUE if this is a write to the sysctl file
2050 * @filp: the file structure
2051 * @buffer: the user buffer
2052 * @lenp: the size of the user buffer
2053 * @ppos: file position
2054 *
2055 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2056 * values from/to the user buffer, treated as an ASCII string.
2057 *
2058 * This routine will ensure the values are within the range specified by
2059 * table->extra1 (min) and table->extra2 (max).
2060 *
2061 * Returns 0 on success.
2062 */
2063int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2064 void __user *buffer, size_t *lenp, loff_t *ppos)
2065{
2066 return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l);
2067}
2068
2069/**
2070 * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2071 * @table: the sysctl table
2072 * @write: %TRUE if this is a write to the sysctl file
2073 * @filp: the file structure
2074 * @buffer: the user buffer
2075 * @lenp: the size of the user buffer
2076 * @ppos: file position
2077 *
2078 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2079 * values from/to the user buffer, treated as an ASCII string. The values
2080 * are treated as milliseconds, and converted to jiffies when they are stored.
2081 *
2082 * This routine will ensure the values are within the range specified by
2083 * table->extra1 (min) and table->extra2 (max).
2084 *
2085 * Returns 0 on success.
2086 */
2087int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2088 struct file *filp,
2089 void __user *buffer,
2090 size_t *lenp, loff_t *ppos)
2091{
2092 return do_proc_doulongvec_minmax(table, write, filp, buffer,
2093 lenp, ppos, HZ, 1000l);
2094}
2095
2096
2097static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
2098 int *valp,
2099 int write, void *data)
2100{
2101 if (write) {
Bart Samwelcba9f332006-03-24 03:15:50 -08002102 if (*lvalp > LONG_MAX / HZ)
2103 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002104 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2105 } else {
2106 int val = *valp;
2107 unsigned long lval;
2108 if (val < 0) {
2109 *negp = -1;
2110 lval = (unsigned long)-val;
2111 } else {
2112 *negp = 0;
2113 lval = (unsigned long)val;
2114 }
2115 *lvalp = lval / HZ;
2116 }
2117 return 0;
2118}
2119
2120static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
2121 int *valp,
2122 int write, void *data)
2123{
2124 if (write) {
Bart Samwelcba9f332006-03-24 03:15:50 -08002125 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2126 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002127 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2128 } else {
2129 int val = *valp;
2130 unsigned long lval;
2131 if (val < 0) {
2132 *negp = -1;
2133 lval = (unsigned long)-val;
2134 } else {
2135 *negp = 0;
2136 lval = (unsigned long)val;
2137 }
2138 *lvalp = jiffies_to_clock_t(lval);
2139 }
2140 return 0;
2141}
2142
2143static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
2144 int *valp,
2145 int write, void *data)
2146{
2147 if (write) {
2148 *valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2149 } else {
2150 int val = *valp;
2151 unsigned long lval;
2152 if (val < 0) {
2153 *negp = -1;
2154 lval = (unsigned long)-val;
2155 } else {
2156 *negp = 0;
2157 lval = (unsigned long)val;
2158 }
2159 *lvalp = jiffies_to_msecs(lval);
2160 }
2161 return 0;
2162}
2163
2164/**
2165 * proc_dointvec_jiffies - read a vector of integers as seconds
2166 * @table: the sysctl table
2167 * @write: %TRUE if this is a write to the sysctl file
2168 * @filp: the file structure
2169 * @buffer: the user buffer
2170 * @lenp: the size of the user buffer
2171 * @ppos: file position
2172 *
2173 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2174 * values from/to the user buffer, treated as an ASCII string.
2175 * The values read are assumed to be in seconds, and are converted into
2176 * jiffies.
2177 *
2178 * Returns 0 on success.
2179 */
2180int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2181 void __user *buffer, size_t *lenp, loff_t *ppos)
2182{
2183 return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2184 do_proc_dointvec_jiffies_conv,NULL);
2185}
2186
2187/**
2188 * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2189 * @table: the sysctl table
2190 * @write: %TRUE if this is a write to the sysctl file
2191 * @filp: the file structure
2192 * @buffer: the user buffer
2193 * @lenp: the size of the user buffer
Randy Dunlap1e5d5332005-11-07 01:01:06 -08002194 * @ppos: pointer to the file position
Linus Torvalds1da177e2005-04-16 15:20:36 -07002195 *
2196 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2197 * values from/to the user buffer, treated as an ASCII string.
2198 * The values read are assumed to be in 1/USER_HZ seconds, and
2199 * are converted into jiffies.
2200 *
2201 * Returns 0 on success.
2202 */
2203int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2204 void __user *buffer, size_t *lenp, loff_t *ppos)
2205{
2206 return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2207 do_proc_dointvec_userhz_jiffies_conv,NULL);
2208}
2209
2210/**
2211 * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2212 * @table: the sysctl table
2213 * @write: %TRUE if this is a write to the sysctl file
2214 * @filp: the file structure
2215 * @buffer: the user buffer
2216 * @lenp: the size of the user buffer
Martin Waitz67be2dd2005-05-01 08:59:26 -07002217 * @ppos: file position
2218 * @ppos: the current position in the file
Linus Torvalds1da177e2005-04-16 15:20:36 -07002219 *
2220 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2221 * values from/to the user buffer, treated as an ASCII string.
2222 * The values read are assumed to be in 1/1000 seconds, and
2223 * are converted into jiffies.
2224 *
2225 * Returns 0 on success.
2226 */
2227int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2228 void __user *buffer, size_t *lenp, loff_t *ppos)
2229{
2230 return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2231 do_proc_dointvec_ms_jiffies_conv, NULL);
2232}
2233
2234#else /* CONFIG_PROC_FS */
2235
2236int proc_dostring(ctl_table *table, int write, struct file *filp,
2237 void __user *buffer, size_t *lenp, loff_t *ppos)
2238{
2239 return -ENOSYS;
2240}
2241
2242static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
2243 void __user *buffer, size_t *lenp, loff_t *ppos)
2244{
2245 return -ENOSYS;
2246}
2247
2248int proc_dointvec(ctl_table *table, int write, struct file *filp,
2249 void __user *buffer, size_t *lenp, loff_t *ppos)
2250{
2251 return -ENOSYS;
2252}
2253
2254int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
2255 void __user *buffer, size_t *lenp, loff_t *ppos)
2256{
2257 return -ENOSYS;
2258}
2259
2260int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
2261 void __user *buffer, size_t *lenp, loff_t *ppos)
2262{
2263 return -ENOSYS;
2264}
2265
2266int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2267 void __user *buffer, size_t *lenp, loff_t *ppos)
2268{
2269 return -ENOSYS;
2270}
2271
2272int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2273 void __user *buffer, size_t *lenp, loff_t *ppos)
2274{
2275 return -ENOSYS;
2276}
2277
2278int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2279 void __user *buffer, size_t *lenp, loff_t *ppos)
2280{
2281 return -ENOSYS;
2282}
2283
2284int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2285 void __user *buffer, size_t *lenp, loff_t *ppos)
2286{
2287 return -ENOSYS;
2288}
2289
2290int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2291 struct file *filp,
2292 void __user *buffer,
2293 size_t *lenp, loff_t *ppos)
2294{
2295 return -ENOSYS;
2296}
2297
2298
2299#endif /* CONFIG_PROC_FS */
2300
2301
2302/*
2303 * General sysctl support routines
2304 */
2305
2306/* The generic string strategy routine: */
2307int sysctl_string(ctl_table *table, int __user *name, int nlen,
2308 void __user *oldval, size_t __user *oldlenp,
2309 void __user *newval, size_t newlen, void **context)
2310{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002311 if (!table->data || !table->maxlen)
2312 return -ENOTDIR;
2313
2314 if (oldval && oldlenp) {
Linus Torvaldsde9e0072005-12-31 17:00:29 -08002315 size_t bufsize;
2316 if (get_user(bufsize, oldlenp))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002317 return -EFAULT;
Linus Torvaldsde9e0072005-12-31 17:00:29 -08002318 if (bufsize) {
2319 size_t len = strlen(table->data), copied;
2320
2321 /* This shouldn't trigger for a well-formed sysctl */
2322 if (len > table->maxlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002323 len = table->maxlen;
Linus Torvaldsde9e0072005-12-31 17:00:29 -08002324
2325 /* Copy up to a max of bufsize-1 bytes of the string */
2326 copied = (len >= bufsize) ? bufsize - 1 : len;
2327
2328 if (copy_to_user(oldval, table->data, copied) ||
2329 put_user(0, (char __user *)(oldval + copied)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002330 return -EFAULT;
Linus Torvaldsde9e0072005-12-31 17:00:29 -08002331 if (put_user(len, oldlenp))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002332 return -EFAULT;
2333 }
2334 }
2335 if (newval && newlen) {
Linus Torvaldsde9e0072005-12-31 17:00:29 -08002336 size_t len = newlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002337 if (len > table->maxlen)
2338 len = table->maxlen;
2339 if(copy_from_user(table->data, newval, len))
2340 return -EFAULT;
2341 if (len == table->maxlen)
2342 len--;
2343 ((char *) table->data)[len] = 0;
2344 }
Yi Yang82c9df82005-12-30 16:37:10 +08002345 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002346}
2347
2348/*
2349 * This function makes sure that all of the integers in the vector
2350 * are between the minimum and maximum values given in the arrays
2351 * table->extra1 and table->extra2, respectively.
2352 */
2353int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2354 void __user *oldval, size_t __user *oldlenp,
2355 void __user *newval, size_t newlen, void **context)
2356{
2357
2358 if (newval && newlen) {
2359 int __user *vec = (int __user *) newval;
2360 int *min = (int *) table->extra1;
2361 int *max = (int *) table->extra2;
2362 size_t length;
2363 int i;
2364
2365 if (newlen % sizeof(int) != 0)
2366 return -EINVAL;
2367
2368 if (!table->extra1 && !table->extra2)
2369 return 0;
2370
2371 if (newlen > table->maxlen)
2372 newlen = table->maxlen;
2373 length = newlen / sizeof(int);
2374
2375 for (i = 0; i < length; i++) {
2376 int value;
2377 if (get_user(value, vec + i))
2378 return -EFAULT;
2379 if (min && value < min[i])
2380 return -EINVAL;
2381 if (max && value > max[i])
2382 return -EINVAL;
2383 }
2384 }
2385 return 0;
2386}
2387
2388/* Strategy function to convert jiffies to seconds */
2389int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2390 void __user *oldval, size_t __user *oldlenp,
2391 void __user *newval, size_t newlen, void **context)
2392{
2393 if (oldval) {
2394 size_t olen;
2395 if (oldlenp) {
2396 if (get_user(olen, oldlenp))
2397 return -EFAULT;
2398 if (olen!=sizeof(int))
2399 return -EINVAL;
2400 }
2401 if (put_user(*(int *)(table->data)/HZ, (int __user *)oldval) ||
2402 (oldlenp && put_user(sizeof(int),oldlenp)))
2403 return -EFAULT;
2404 }
2405 if (newval && newlen) {
2406 int new;
2407 if (newlen != sizeof(int))
2408 return -EINVAL;
2409 if (get_user(new, (int __user *)newval))
2410 return -EFAULT;
2411 *(int *)(table->data) = new*HZ;
2412 }
2413 return 1;
2414}
2415
2416/* Strategy function to convert jiffies to seconds */
2417int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2418 void __user *oldval, size_t __user *oldlenp,
2419 void __user *newval, size_t newlen, void **context)
2420{
2421 if (oldval) {
2422 size_t olen;
2423 if (oldlenp) {
2424 if (get_user(olen, oldlenp))
2425 return -EFAULT;
2426 if (olen!=sizeof(int))
2427 return -EINVAL;
2428 }
2429 if (put_user(jiffies_to_msecs(*(int *)(table->data)), (int __user *)oldval) ||
2430 (oldlenp && put_user(sizeof(int),oldlenp)))
2431 return -EFAULT;
2432 }
2433 if (newval && newlen) {
2434 int new;
2435 if (newlen != sizeof(int))
2436 return -EINVAL;
2437 if (get_user(new, (int __user *)newval))
2438 return -EFAULT;
2439 *(int *)(table->data) = msecs_to_jiffies(new);
2440 }
2441 return 1;
2442}
2443
2444#else /* CONFIG_SYSCTL */
2445
2446
2447asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
2448{
2449 return -ENOSYS;
2450}
2451
2452int sysctl_string(ctl_table *table, int __user *name, int nlen,
2453 void __user *oldval, size_t __user *oldlenp,
2454 void __user *newval, size_t newlen, void **context)
2455{
2456 return -ENOSYS;
2457}
2458
2459int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2460 void __user *oldval, size_t __user *oldlenp,
2461 void __user *newval, size_t newlen, void **context)
2462{
2463 return -ENOSYS;
2464}
2465
2466int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2467 void __user *oldval, size_t __user *oldlenp,
2468 void __user *newval, size_t newlen, void **context)
2469{
2470 return -ENOSYS;
2471}
2472
2473int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2474 void __user *oldval, size_t __user *oldlenp,
2475 void __user *newval, size_t newlen, void **context)
2476{
2477 return -ENOSYS;
2478}
2479
2480int proc_dostring(ctl_table *table, int write, struct file *filp,
2481 void __user *buffer, size_t *lenp, loff_t *ppos)
2482{
2483 return -ENOSYS;
2484}
2485
2486int proc_dointvec(ctl_table *table, int write, struct file *filp,
2487 void __user *buffer, size_t *lenp, loff_t *ppos)
2488{
2489 return -ENOSYS;
2490}
2491
2492int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
2493 void __user *buffer, size_t *lenp, loff_t *ppos)
2494{
2495 return -ENOSYS;
2496}
2497
2498int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
2499 void __user *buffer, size_t *lenp, loff_t *ppos)
2500{
2501 return -ENOSYS;
2502}
2503
2504int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2505 void __user *buffer, size_t *lenp, loff_t *ppos)
2506{
2507 return -ENOSYS;
2508}
2509
2510int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2511 void __user *buffer, size_t *lenp, loff_t *ppos)
2512{
2513 return -ENOSYS;
2514}
2515
2516int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2517 void __user *buffer, size_t *lenp, loff_t *ppos)
2518{
2519 return -ENOSYS;
2520}
2521
2522int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2523 void __user *buffer, size_t *lenp, loff_t *ppos)
2524{
2525 return -ENOSYS;
2526}
2527
2528int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2529 struct file *filp,
2530 void __user *buffer,
2531 size_t *lenp, loff_t *ppos)
2532{
2533 return -ENOSYS;
2534}
2535
2536struct ctl_table_header * register_sysctl_table(ctl_table * table,
2537 int insert_at_head)
2538{
2539 return NULL;
2540}
2541
2542void unregister_sysctl_table(struct ctl_table_header * table)
2543{
2544}
2545
2546#endif /* CONFIG_SYSCTL */
2547
2548/*
2549 * No sense putting this after each symbol definition, twice,
2550 * exception granted :-)
2551 */
2552EXPORT_SYMBOL(proc_dointvec);
2553EXPORT_SYMBOL(proc_dointvec_jiffies);
2554EXPORT_SYMBOL(proc_dointvec_minmax);
2555EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2556EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2557EXPORT_SYMBOL(proc_dostring);
2558EXPORT_SYMBOL(proc_doulongvec_minmax);
2559EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2560EXPORT_SYMBOL(register_sysctl_table);
2561EXPORT_SYMBOL(sysctl_intvec);
2562EXPORT_SYMBOL(sysctl_jiffies);
2563EXPORT_SYMBOL(sysctl_ms_jiffies);
2564EXPORT_SYMBOL(sysctl_string);
2565EXPORT_SYMBOL(unregister_sysctl_table);