Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
* 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc: (34 commits)
powerpc: Wireup new syscalls
Move update_mmu_cache() declaration from tlbflush.h to pgtable.h
powerpc/pseries: Remove kmalloc call in handling writes to lparcfg
powerpc/pseries: Update arch vector to indicate support for CMO
ibmvfc: Add support for collaborative memory overcommit
ibmvscsi: driver enablement for CMO
ibmveth: enable driver for CMO
ibmveth: Automatically enable larger rx buffer pools for larger mtu
powerpc/pseries: Verify CMO memory entitlement updates with virtual I/O
powerpc/pseries: vio bus support for CMO
powerpc/pseries: iommu enablement for CMO
powerpc/pseries: Add CMO paging statistics
powerpc/pseries: Add collaborative memory manager
powerpc/pseries: Utilities to set firmware page state
powerpc/pseries: Enable CMO feature during platform setup
powerpc/pseries: Split retrieval of processor entitlement data into a helper routine
powerpc/pseries: Add memory entitlement capabilities to /proc/ppc64/lparcfg
powerpc/pseries: Split processor entitlement retrieval and gathering to helper routines
powerpc/pseries: Remove extraneous error reporting for hcall failures in lparcfg
powerpc: Fix compile error with binutils 2.15
...
Fixed up conflict in arch/powerpc/platforms/52xx/Kconfig manually.
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index 1977fab..6de7130 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -361,8 +361,6 @@
- directory with info on telephony (e.g. voice over IP) support.
time_interpolators.txt
- info on time interpolators.
-tipar.txt
- - information about Parallel link cable for Texas Instruments handhelds.
tty.txt
- guide to the locking policies of the tty layer.
uml/
diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle
index 6caa146..1875e50 100644
--- a/Documentation/CodingStyle
+++ b/Documentation/CodingStyle
@@ -474,25 +474,29 @@
So, you can either get rid of GNU emacs, or change it to use saner
values. To do the latter, you can stick the following in your .emacs file:
-(defun linux-c-mode ()
- "C mode with adjusted defaults for use with the Linux kernel."
- (interactive)
- (c-mode)
- (c-set-style "K&R")
- (setq tab-width 8)
- (setq indent-tabs-mode t)
- (setq c-basic-offset 8))
+(defun c-lineup-arglist-tabs-only (ignored)
+ "Line up argument lists by tabs, not spaces"
+ (let* ((anchor (c-langelem-pos c-syntactic-element))
+ (column (c-langelem-2nd-pos c-syntactic-element))
+ (offset (- (1+ column) anchor))
+ (steps (floor offset c-basic-offset)))
+ (* (max steps 1)
+ c-basic-offset)))
-This will define the M-x linux-c-mode command. When hacking on a
-module, if you put the string -*- linux-c -*- somewhere on the first
-two lines, this mode will be automatically invoked. Also, you may want
-to add
+(add-hook 'c-mode-hook
+ (lambda ()
+ (let ((filename (buffer-file-name)))
+ ;; Enable kernel mode for the appropriate files
+ (when (and filename
+ (string-match "~/src/linux-trees" filename))
+ (setq indent-tabs-mode t)
+ (c-set-style "linux")
+ (c-set-offset 'arglist-cont-nonempty
+ '(c-lineup-gcc-asm-reg
+ c-lineup-arglist-tabs-only))))))
-(setq auto-mode-alist (cons '("/usr/src/linux.*/.*\\.[ch]$" . linux-c-mode)
- auto-mode-alist))
-
-to your .emacs file if you want to have linux-c-mode switched on
-automagically when you edit source files under /usr/src/linux.
+This will make emacs go better with the kernel coding style for C
+files below ~/src/linux-trees.
But even if you fail in getting emacs to do sane formatting, not
everything is lost: use "indent".
diff --git a/Documentation/DocBook/procfs-guide.tmpl b/Documentation/DocBook/procfs-guide.tmpl
index 1fd6a1e..8a5dc6e 100644
--- a/Documentation/DocBook/procfs-guide.tmpl
+++ b/Documentation/DocBook/procfs-guide.tmpl
@@ -29,12 +29,12 @@
<revhistory>
<revision>
- <revnumber>1.0 </revnumber>
+ <revnumber>1.0</revnumber>
<date>May 30, 2001</date>
<revremark>Initial revision posted to linux-kernel</revremark>
</revision>
<revision>
- <revnumber>1.1 </revnumber>
+ <revnumber>1.1</revnumber>
<date>June 3, 2001</date>
<revremark>Revised after comments from linux-kernel</revremark>
</revision>
diff --git a/Documentation/accounting/delay-accounting.txt b/Documentation/accounting/delay-accounting.txt
index 1443cd7..8a12f07 100644
--- a/Documentation/accounting/delay-accounting.txt
+++ b/Documentation/accounting/delay-accounting.txt
@@ -11,6 +11,7 @@
a) waiting for a CPU (while being runnable)
b) completion of synchronous block I/O initiated by the task
c) swapping in pages
+d) memory reclaim
and makes these statistics available to userspace through
the taskstats interface.
@@ -41,7 +42,7 @@
include/linux/taskstats.h
for a description of the fields pertaining to delay accounting.
It will generally be in the form of counters returning the cumulative
-delay seen for cpu, sync block I/O, swapin etc.
+delay seen for cpu, sync block I/O, swapin, memory reclaim etc.
Taking the difference of two successive readings of a given
counter (say cpu_delay_total) for a task will give the delay
@@ -94,7 +95,9 @@
7876 92005750 100000000 24001500
IO count delay total
0 0
-MEM count delay total
+SWAP count delay total
+ 0 0
+RECLAIM count delay total
0 0
Get delays seen in executing a given simple command
@@ -108,5 +111,7 @@
6 4000250 4000000 0
IO count delay total
0 0
-MEM count delay total
+SWAP count delay total
+ 0 0
+RECLAIM count delay total
0 0
diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c
index 40121b5..3f7755f 100644
--- a/Documentation/accounting/getdelays.c
+++ b/Documentation/accounting/getdelays.c
@@ -196,14 +196,18 @@
" %15llu%15llu%15llu%15llu\n"
"IO %15s%15s\n"
" %15llu%15llu\n"
- "MEM %15s%15s\n"
+ "SWAP %15s%15s\n"
+ " %15llu%15llu\n"
+ "RECLAIM %12s%15s\n"
" %15llu%15llu\n",
"count", "real total", "virtual total", "delay total",
t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total,
t->cpu_delay_total,
"count", "delay total",
t->blkio_count, t->blkio_delay_total,
- "count", "delay total", t->swapin_count, t->swapin_delay_total);
+ "count", "delay total", t->swapin_count, t->swapin_delay_total,
+ "count", "delay total",
+ t->freepages_count, t->freepages_delay_total);
}
void task_context_switch_counts(struct taskstats *t)
diff --git a/Documentation/accounting/taskstats-struct.txt b/Documentation/accounting/taskstats-struct.txt
index cd784f4..b988d11 100644
--- a/Documentation/accounting/taskstats-struct.txt
+++ b/Documentation/accounting/taskstats-struct.txt
@@ -26,6 +26,8 @@
5) Time accounting for SMT machines
+6) Extended delay accounting fields for memory reclaim
+
Future extension should add fields to the end of the taskstats struct, and
should not change the relative position of each field within the struct.
@@ -170,4 +172,9 @@
__u64 ac_utimescaled; /* utime scaled on frequency etc */
__u64 ac_stimescaled; /* stime scaled on frequency etc */
__u64 cpu_scaled_run_real_total; /* scaled cpu_run_real_total */
+
+6) Extended delay accounting fields for memory reclaim
+ /* Delay waiting for memory reclaim */
+ __u64 freepages_count;
+ __u64 freepages_delay_total;
}
diff --git a/Documentation/bt8xxgpio.txt b/Documentation/bt8xxgpio.txt
new file mode 100644
index 0000000..d8297e4
--- /dev/null
+++ b/Documentation/bt8xxgpio.txt
@@ -0,0 +1,67 @@
+===============================================================
+== BT8XXGPIO driver ==
+== ==
+== A driver for a selfmade cheap BT8xx based PCI GPIO-card ==
+== ==
+== For advanced documentation, see ==
+== http://www.bu3sch.de/btgpio.php ==
+===============================================================
+
+
+A generic digital 24-port PCI GPIO card can be built out of an ordinary
+Brooktree bt848, bt849, bt878 or bt879 based analog TV tuner card. The
+Brooktree chip is used in old analog Hauppauge WinTV PCI cards. You can easily
+find them used for low prices on the net.
+
+The bt8xx chip does have 24 digital GPIO ports.
+These ports are accessible via 24 pins on the SMD chip package.
+
+
+==============================================
+== How to physically access the GPIO pins ==
+==============================================
+
+The are several ways to access these pins. One might unsolder the whole chip
+and put it on a custom PCI board, or one might only unsolder each individual
+GPIO pin and solder that to some tiny wire. As the chip package really is tiny
+there are some advanced soldering skills needed in any case.
+
+The physical pinouts are drawn in the following ASCII art.
+The GPIO pins are marked with G00-G23
+
+ G G G G G G G G G G G G G G G G G G
+ 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7
+ | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
+ ---------------------------------------------------------------------------
+ --| ^ ^ |--
+ --| pin 86 pin 67 |--
+ --| |--
+ --| pin 61 > |-- G18
+ --| |-- G19
+ --| |-- G20
+ --| |-- G21
+ --| |-- G22
+ --| pin 56 > |-- G23
+ --| |--
+ --| Brooktree 878/879 |--
+ --| |--
+ --| |--
+ --| |--
+ --| |--
+ --| |--
+ --| |--
+ --| |--
+ --| |--
+ --| |--
+ --| |--
+ --| |--
+ --| |--
+ --| |--
+ --| O |--
+ --| |--
+ ---------------------------------------------------------------------------
+ | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
+ ^
+ This is pin 1
+
diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt
index 866b9cd..9b53d58 100644
--- a/Documentation/controllers/memory.txt
+++ b/Documentation/controllers/memory.txt
@@ -242,8 +242,7 @@
1. Add support for accounting huge pages (as a separate controller)
2. Make per-cgroup scanner reclaim not-shared pages first
3. Teach controller to account for shared-pages
-4. Start reclamation when the limit is lowered
-5. Start reclamation in the background when the limit is
+4. Start reclamation in the background when the limit is
not yet hit but the usage is getting closer
Summary
diff --git a/Documentation/edac.txt b/Documentation/edac.txt
index a5c3684..ced5273 100644
--- a/Documentation/edac.txt
+++ b/Documentation/edac.txt
@@ -222,74 +222,9 @@
set of DIMMs for channels 0 and 1.
-Within each of the 'mc','mcX' and 'csrowX' directories are several
+Within each of the 'mcX' and 'csrowX' directories are several
EDAC control and attribute files.
-
-============================================================================
-DIRECTORY 'mc'
-
-In directory 'mc' are EDAC system overall control and attribute files:
-
-
-Panic on UE control file:
-
- 'edac_mc_panic_on_ue'
-
- An uncorrectable error will cause a machine panic. This is usually
- desirable. It is a bad idea to continue when an uncorrectable error
- occurs - it is indeterminate what was uncorrected and the operating
- system context might be so mangled that continuing will lead to further
- corruption. If the kernel has MCE configured, then EDAC will never
- notice the UE.
-
- LOAD TIME: module/kernel parameter: panic_on_ue=[0|1]
-
- RUN TIME: echo "1" >/sys/devices/system/edac/mc/edac_mc_panic_on_ue
-
-
-Log UE control file:
-
- 'edac_mc_log_ue'
-
- Generate kernel messages describing uncorrectable errors. These errors
- are reported through the system message log system. UE statistics
- will be accumulated even when UE logging is disabled.
-
- LOAD TIME: module/kernel parameter: log_ue=[0|1]
-
- RUN TIME: echo "1" >/sys/devices/system/edac/mc/edac_mc_log_ue
-
-
-Log CE control file:
-
- 'edac_mc_log_ce'
-
- Generate kernel messages describing correctable errors. These
- errors are reported through the system message log system.
- CE statistics will be accumulated even when CE logging is disabled.
-
- LOAD TIME: module/kernel parameter: log_ce=[0|1]
-
- RUN TIME: echo "1" >/sys/devices/system/edac/mc/edac_mc_log_ce
-
-
-Polling period control file:
-
- 'edac_mc_poll_msec'
-
- The time period, in milliseconds, for polling for error information.
- Too small a value wastes resources. Too large a value might delay
- necessary handling of errors and might loose valuable information for
- locating the error. 1000 milliseconds (once each second) is the current
- default. Systems which require all the bandwidth they can get, may
- increase this.
-
- LOAD TIME: module/kernel parameter: poll_msec=[0|1]
-
- RUN TIME: echo "1000" >/sys/devices/system/edac/mc/edac_mc_poll_msec
-
-
============================================================================
'mcX' DIRECTORIES
@@ -537,7 +472,6 @@
motherboard specific and determination of this information
must occur in userland at this time.
-
============================================================================
SYSTEM LOGGING
@@ -570,7 +504,6 @@
driver-specific error message.
-
============================================================================
PCI Bus Parity Detection
@@ -604,6 +537,74 @@
echo "0" >/sys/devices/system/edac/pci/check_pci_parity
+Parity Count:
+
+ 'pci_parity_count'
+
+ This attribute file will display the number of parity errors that
+ have been detected.
+
+
+============================================================================
+MODULE PARAMETERS
+
+Panic on UE control file:
+
+ 'edac_mc_panic_on_ue'
+
+ An uncorrectable error will cause a machine panic. This is usually
+ desirable. It is a bad idea to continue when an uncorrectable error
+ occurs - it is indeterminate what was uncorrected and the operating
+ system context might be so mangled that continuing will lead to further
+ corruption. If the kernel has MCE configured, then EDAC will never
+ notice the UE.
+
+ LOAD TIME: module/kernel parameter: edac_mc_panic_on_ue=[0|1]
+
+ RUN TIME: echo "1" > /sys/module/edac_core/parameters/edac_mc_panic_on_ue
+
+
+Log UE control file:
+
+ 'edac_mc_log_ue'
+
+ Generate kernel messages describing uncorrectable errors. These errors
+ are reported through the system message log system. UE statistics
+ will be accumulated even when UE logging is disabled.
+
+ LOAD TIME: module/kernel parameter: edac_mc_log_ue=[0|1]
+
+ RUN TIME: echo "1" > /sys/module/edac_core/parameters/edac_mc_log_ue
+
+
+Log CE control file:
+
+ 'edac_mc_log_ce'
+
+ Generate kernel messages describing correctable errors. These
+ errors are reported through the system message log system.
+ CE statistics will be accumulated even when CE logging is disabled.
+
+ LOAD TIME: module/kernel parameter: edac_mc_log_ce=[0|1]
+
+ RUN TIME: echo "1" > /sys/module/edac_core/parameters/edac_mc_log_ce
+
+
+Polling period control file:
+
+ 'edac_mc_poll_msec'
+
+ The time period, in milliseconds, for polling for error information.
+ Too small a value wastes resources. Too large a value might delay
+ necessary handling of errors and might loose valuable information for
+ locating the error. 1000 milliseconds (once each second) is the current
+ default. Systems which require all the bandwidth they can get, may
+ increase this.
+
+ LOAD TIME: module/kernel parameter: edac_mc_poll_msec=[0|1]
+
+ RUN TIME: echo "1000" > /sys/module/edac_core/parameters/edac_mc_poll_msec
+
Panic on PCI PARITY Error:
@@ -614,21 +615,13 @@
error has been detected.
- module/kernel parameter: panic_on_pci_parity=[0|1]
+ module/kernel parameter: edac_panic_on_pci_pe=[0|1]
Enable:
- echo "1" >/sys/devices/system/edac/pci/panic_on_pci_parity
+ echo "1" > /sys/module/edac_core/parameters/edac_panic_on_pci_pe
Disable:
- echo "0" >/sys/devices/system/edac/pci/panic_on_pci_parity
-
-
-Parity Count:
-
- 'pci_parity_count'
-
- This attribute file will display the number of parity errors that
- have been detected.
+ echo "0" > /sys/module/edac_core/parameters/edac_panic_on_pci_pe
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 09c4a1e..721c71b 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -138,24 +138,6 @@
---------------------------
-What: find_task_by_pid
-When: 2.6.26
-Why: With pid namespaces, calling this funciton will return the
- wrong task when called from inside a namespace.
-
- The best way to save a task pid and find a task by this
- pid later, is to find this task's struct pid pointer (or get
- it directly from the task) and call pid_task() later.
-
- If someone really needs to get a task by its pid_t, then
- he most likely needs the find_task_by_vpid() to get the
- task from the same namespace as the current task is in, but
- this may be not so in general.
-
-Who: Pavel Emelyanov <xemul@openvz.org>
-
----------------------------
-
What: ACPI procfs interface
When: July 2008
Why: ACPI sysfs conversion should be finished by January 2008.
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt
index 2d5e1e5..bbac4f1 100644
--- a/Documentation/filesystems/vfat.txt
+++ b/Documentation/filesystems/vfat.txt
@@ -96,6 +96,14 @@
emulate the Windows 95 rule for create.
Default setting is `lower'.
+tz=UTC -- Interpret timestamps as UTC rather than local time.
+ This option disables the conversion of timestamps
+ between local time (as used by Windows on FAT) and UTC
+ (which Linux uses internally). This is particuluarly
+ useful when mounting devices (like digital cameras)
+ that are set to UTC in order to avoid the pitfalls of
+ local time.
+
<bool>: 0,1,yes,no,true,false
TODO
diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt
index c35ca9e..18022e2 100644
--- a/Documentation/gpio.txt
+++ b/Documentation/gpio.txt
@@ -347,15 +347,12 @@
Dynamic definition of GPIOs is not currently standard; for example, as
a side effect of configuring an add-on board with some GPIO expanders.
-These calls are purely for kernel space, but a userspace API could be built
-on top of them.
-
GPIO implementor's framework (OPTIONAL)
=======================================
As noted earlier, there is an optional implementation framework making it
easier for platforms to support different kinds of GPIO controller using
-the same programming interface.
+the same programming interface. This framework is called "gpiolib".
As a debugging aid, if debugfs is available a /sys/kernel/debug/gpio file
will be found there. That will list all the controllers registered through
@@ -392,11 +389,21 @@
Platform Support
----------------
-To support this framework, a platform's Kconfig will "select HAVE_GPIO_LIB"
+To support this framework, a platform's Kconfig will "select" either
+ARCH_REQUIRE_GPIOLIB or ARCH_WANT_OPTIONAL_GPIOLIB
and arrange that its <asm/gpio.h> includes <asm-generic/gpio.h> and defines
three functions: gpio_get_value(), gpio_set_value(), and gpio_cansleep().
They may also want to provide a custom value for ARCH_NR_GPIOS.
+ARCH_REQUIRE_GPIOLIB means that the gpio-lib code will always get compiled
+into the kernel on that architecture.
+
+ARCH_WANT_OPTIONAL_GPIOLIB means the gpio-lib code defaults to off and the user
+can enable it and build it into the kernel optionally.
+
+If neither of these options are selected, the platform does not support
+GPIOs through GPIO-lib and the code cannot be enabled by the user.
+
Trivial implementations of those functions can directly use framework
code, which always dispatches through the gpio_chip:
@@ -439,4 +446,120 @@
calls for that GPIO can work. One way to address such dependencies is for
such gpio_chip controllers to provide setup() and teardown() callbacks to
board specific code; those board specific callbacks would register devices
-once all the necessary resources are available.
+once all the necessary resources are available, and remove them later when
+the GPIO controller device becomes unavailable.
+
+
+Sysfs Interface for Userspace (OPTIONAL)
+========================================
+Platforms which use the "gpiolib" implementors framework may choose to
+configure a sysfs user interface to GPIOs. This is different from the
+debugfs interface, since it provides control over GPIO direction and
+value instead of just showing a gpio state summary. Plus, it could be
+present on production systems without debugging support.
+
+Given approprate hardware documentation for the system, userspace could
+know for example that GPIO #23 controls the write protect line used to
+protect boot loader segments in flash memory. System upgrade procedures
+may need to temporarily remove that protection, first importing a GPIO,
+then changing its output state, then updating the code before re-enabling
+the write protection. In normal use, GPIO #23 would never be touched,
+and the kernel would have no need to know about it.
+
+Again depending on appropriate hardware documentation, on some systems
+userspace GPIO can be used to determine system configuration data that
+standard kernels won't know about. And for some tasks, simple userspace
+GPIO drivers could be all that the system really needs.
+
+Note that standard kernel drivers exist for common "LEDs and Buttons"
+GPIO tasks: "leds-gpio" and "gpio_keys", respectively. Use those
+instead of talking directly to the GPIOs; they integrate with kernel
+frameworks better than your userspace code could.
+
+
+Paths in Sysfs
+--------------
+There are three kinds of entry in /sys/class/gpio:
+
+ - Control interfaces used to get userspace control over GPIOs;
+
+ - GPIOs themselves; and
+
+ - GPIO controllers ("gpio_chip" instances).
+
+That's in addition to standard files including the "device" symlink.
+
+The control interfaces are write-only:
+
+ /sys/class/gpio/
+
+ "export" ... Userspace may ask the kernel to export control of
+ a GPIO to userspace by writing its number to this file.
+
+ Example: "echo 19 > export" will create a "gpio19" node
+ for GPIO #19, if that's not requested by kernel code.
+
+ "unexport" ... Reverses the effect of exporting to userspace.
+
+ Example: "echo 19 > unexport" will remove a "gpio19"
+ node exported using the "export" file.
+
+GPIO signals have paths like /sys/class/gpio/gpio42/ (for GPIO #42)
+and have the following read/write attributes:
+
+ /sys/class/gpio/gpioN/
+
+ "direction" ... reads as either "in" or "out". This value may
+ normally be written. Writing as "out" defaults to
+ initializing the value as low. To ensure glitch free
+ operation, values "low" and "high" may be written to
+ configure the GPIO as an output with that initial value.
+
+ Note that this attribute *will not exist* if the kernel
+ doesn't support changing the direction of a GPIO, or
+ it was exported by kernel code that didn't explicitly
+ allow userspace to reconfigure this GPIO's direction.
+
+ "value" ... reads as either 0 (low) or 1 (high). If the GPIO
+ is configured as an output, this value may be written;
+ any nonzero value is treated as high.
+
+GPIO controllers have paths like /sys/class/gpio/chipchip42/ (for the
+controller implementing GPIOs starting at #42) and have the following
+read-only attributes:
+
+ /sys/class/gpio/gpiochipN/
+
+ "base" ... same as N, the first GPIO managed by this chip
+
+ "label" ... provided for diagnostics (not always unique)
+
+ "ngpio" ... how many GPIOs this manges (N to N + ngpio - 1)
+
+Board documentation should in most cases cover what GPIOs are used for
+what purposes. However, those numbers are not always stable; GPIOs on
+a daughtercard might be different depending on the base board being used,
+or other cards in the stack. In such cases, you may need to use the
+gpiochip nodes (possibly in conjunction with schematics) to determine
+the correct GPIO number to use for a given signal.
+
+
+Exporting from Kernel code
+--------------------------
+Kernel code can explicitly manage exports of GPIOs which have already been
+requested using gpio_request():
+
+ /* export the GPIO to userspace */
+ int gpio_export(unsigned gpio, bool direction_may_change);
+
+ /* reverse gpio_export() */
+ void gpio_unexport();
+
+After a kernel driver requests a GPIO, it may only be made available in
+the sysfs interface by gpio_export(). The driver can control whether the
+signal direction may change. This helps drivers prevent userspace code
+from accidentally clobbering important system state.
+
+This explicit exporting can help with debugging (by making some kinds
+of experiments easier), or can provide an always-there interface that's
+suitable for documenting as part of a board support package.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 497a98d..e7bea3e 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2159,13 +2159,6 @@
<deci-seconds>: poll all this frequency
0: no polling (default)
- tipar.timeout= [HW,PPT]
- Set communications timeout in tenths of a second
- (default 15).
-
- tipar.delay= [HW,PPT]
- Set inter-bit delay in microseconds (default 10).
-
tmscsim= [HW,SCSI]
See comment before function dc390_setup() in
drivers/scsi/tmscsim.c.
diff --git a/Documentation/moxa-smartio b/Documentation/moxa-smartio
index fe24ecc..5337e80 100644
--- a/Documentation/moxa-smartio
+++ b/Documentation/moxa-smartio
@@ -1,14 +1,22 @@
=============================================================================
-
- MOXA Smartio Family Device Driver Ver 1.1 Installation Guide
- for Linux Kernel 2.2.x and 2.0.3x
- Copyright (C) 1999, Moxa Technologies Co, Ltd.
+ MOXA Smartio/Industio Family Device Driver Installation Guide
+ for Linux Kernel 2.4.x, 2.6.x
+ Copyright (C) 2008, Moxa Inc.
=============================================================================
+Date: 01/21/2008
+
Content
1. Introduction
2. System Requirement
3. Installation
+ 3.1 Hardware installation
+ 3.2 Driver files
+ 3.3 Device naming convention
+ 3.4 Module driver configuration
+ 3.5 Static driver configuration for Linux kernel 2.4.x and 2.6.x.
+ 3.6 Custom configuration
+ 3.7 Verify driver installation
4. Utilities
5. Setserial
6. Troubleshooting
@@ -16,27 +24,48 @@
-----------------------------------------------------------------------------
1. Introduction
- The Smartio family Linux driver, Ver. 1.1, supports following multiport
+ The Smartio/Industio/UPCI family Linux driver supports following multiport
boards.
- -C104P/H/HS, C104H/PCI, C104HS/PCI, CI-104J 4 port multiport board.
- -C168P/H/HS, C168H/PCI 8 port multiport board.
+ - 2 ports multiport board
+ CP-102U, CP-102UL, CP-102UF
+ CP-132U-I, CP-132UL,
+ CP-132, CP-132I, CP132S, CP-132IS,
+ CI-132, CI-132I, CI-132IS,
+ (C102H, C102HI, C102HIS, C102P, CP-102, CP-102S)
- This driver has been modified a little and cleaned up from the Moxa
- contributed driver code and merged into Linux 2.2.14pre. In particular
- official major/minor numbers have been assigned which are different to
- those the original Moxa supplied driver used.
+ - 4 ports multiport board
+ CP-104EL,
+ CP-104UL, CP-104JU,
+ CP-134U, CP-134U-I,
+ C104H/PCI, C104HS/PCI,
+ CP-114, CP-114I, CP-114S, CP-114IS, CP-114UL,
+ C104H, C104HS,
+ CI-104J, CI-104JS,
+ CI-134, CI-134I, CI-134IS,
+ (C114HI, CT-114I, C104P)
+ POS-104UL,
+ CB-114,
+ CB-134I
+
+ - 8 ports multiport board
+ CP-118EL, CP-168EL,
+ CP-118U, CP-168U,
+ C168H/PCI,
+ C168H, C168HS,
+ (C168P),
+ CB-108
This driver and installation procedure have been developed upon Linux Kernel
- 2.2.5 and backward compatible to 2.0.3x. This driver supports Intel x86 and
- Alpha hardware platform. In order to maintain compatibility, this version
- has also been properly tested with RedHat, OpenLinux, TurboLinux and
- S.u.S.E Linux. However, if compatibility problem occurs, please contact
- Moxa at support@moxa.com.tw.
+ 2.4.x and 2.6.x. This driver supports Intel x86 hardware platform. In order
+ to maintain compatibility, this version has also been properly tested with
+ RedHat, Mandrake, Fedora and S.u.S.E Linux. However, if compatibility problem
+ occurs, please contact Moxa at support@moxa.com.tw.
In addition to device driver, useful utilities are also provided in this
version. They are
- - msdiag Diagnostic program for detecting installed Moxa Smartio boards.
+ - msdiag Diagnostic program for displaying installed Moxa
+ Smartio/Industio boards.
- msmon Monitor program to observe data count and line status signals.
- msterm A simple terminal program which is useful in testing serial
ports.
@@ -47,8 +76,7 @@
GNU General Public License in this version. Please refer to GNU General
Public License announcement in each source code file for more detail.
- In Moxa's ftp sites, you may always find latest driver at
- ftp://ftp.moxa.com or ftp://ftp.moxa.com.tw.
+ In Moxa's Web sites, you may always find latest driver at http://web.moxa.com.
This version of driver can be installed as Loadable Module (Module driver)
or built-in into kernel (Static driver). You may refer to following
@@ -61,8 +89,8 @@
-----------------------------------------------------------------------------
2. System Requirement
- - Hardware platform: Intel x86 or Alpha machine
- - Kernel version: 2.0.3x or 2.2.x
+ - Hardware platform: Intel x86 machine
+ - Kernel version: 2.4.x or 2.6.x
- gcc version 2.72 or later
- Maximum 4 boards can be installed in combination
@@ -70,9 +98,18 @@
3. Installation
3.1 Hardware installation
+ 3.2 Driver files
+ 3.3 Device naming convention
+ 3.4 Module driver configuration
+ 3.5 Static driver configuration for Linux kernel 2.4.x, 2.6.x.
+ 3.6 Custom configuration
+ 3.7 Verify driver installation
- There are two types of buses, ISA and PCI, for Smartio family multiport
- board.
+
+ 3.1 Hardware installation
+
+ There are two types of buses, ISA and PCI, for Smartio/Industio
+ family multiport board.
ISA board
---------
@@ -81,47 +118,57 @@
installation procedure in User's Manual before proceed any further.
Please make sure the JP1 is open after the ISA board is set properly.
- PCI board
- ---------
+ PCI/UPCI board
+ --------------
You may need to adjust IRQ usage in BIOS to avoid from IRQ conflict
with other ISA devices. Please refer to hardware installation
procedure in User's Manual in advance.
- IRQ Sharing
+ PCI IRQ Sharing
-----------
Each port within the same multiport board shares the same IRQ. Up to
- 4 Moxa Smartio Family multiport boards can be installed together on
- one system and they can share the same IRQ.
+ 4 Moxa Smartio/Industio PCI Family multiport boards can be installed
+ together on one system and they can share the same IRQ.
- 3.2 Driver files and device naming convention
+
+ 3.2 Driver files
The driver file may be obtained from ftp, CD-ROM or floppy disk. The
first step, anyway, is to copy driver file "mxser.tgz" into specified
directory. e.g. /moxa. The execute commands as below.
+ # cd /
+ # mkdir moxa
# cd /moxa
- # tar xvf /dev/fd0
+ # tar xvf /dev/fd0
+
or
+
+ # cd /
+ # mkdir moxa
# cd /moxa
# cp /mnt/cdrom/<driver directory>/mxser.tgz .
# tar xvfz mxser.tgz
+
+ 3.3 Device naming convention
+
You may find all the driver and utilities files in /moxa/mxser.
Following installation procedure depends on the model you'd like to
- run the driver. If you prefer module driver, please refer to 3.3.
- If static driver is required, please refer to 3.4.
+ run the driver. If you prefer module driver, please refer to 3.4.
+ If static driver is required, please refer to 3.5.
Dialin and callout port
-----------------------
- This driver remains traditional serial device properties. There're
+ This driver remains traditional serial device properties. There are
two special file name for each serial port. One is dial-in port
which is named "ttyMxx". For callout port, the naming convention
is "cumxx".
Device naming when more than 2 boards installed
-----------------------------------------------
- Naming convention for each Smartio multiport board is pre-defined
- as below.
+ Naming convention for each Smartio/Industio multiport board is
+ pre-defined as below.
Board Num. Dial-in Port Callout port
1st board ttyM0 - ttyM7 cum0 - cum7
@@ -129,6 +176,12 @@
3rd board ttyM16 - ttyM23 cum16 - cum23
4th board ttyM24 - ttym31 cum24 - cum31
+
+ !!!!!!!!!!!!!!!!!!!! NOTE !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ Under Kernel 2.6 the cum Device is Obsolete. So use ttyM*
+ device instead.
+ !!!!!!!!!!!!!!!!!!!! NOTE !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
Board sequence
--------------
This driver will activate ISA boards according to the parameter set
@@ -138,69 +191,131 @@
For PCI boards, their sequence will be after ISA boards and C168H/PCI
has higher priority than C104H/PCI boards.
- 3.3 Module driver configuration
+ 3.4 Module driver configuration
Module driver is easiest way to install. If you prefer static driver
installation, please skip this paragraph.
- 1. Find "Makefile" in /moxa/mxser, then run
- # make install
- The driver files "mxser.o" and utilities will be properly compiled
- and copied to system directories respectively.Then run
+ ------------- Prepare to use the MOXA driver--------------------
+ 3.4.1 Create tty device with correct major number
+ Before using MOXA driver, your system must have the tty devices
+ which are created with driver's major number. We offer one shell
+ script "msmknod" to simplify the procedure.
+ This step is only needed to be executed once. But you still
+ need to do this procedure when:
+ a. You change the driver's major number. Please refer the "3.7"
+ section.
+ b. Your total installed MOXA boards number is changed. Maybe you
+ add/delete one MOXA board.
+ c. You want to change the tty name. This needs to modify the
+ shell script "msmknod"
- # insmod mxser
-
- to activate the modular driver. You may run "lsmod" to check
- if "mxser.o" is activated.
-
- 2. Create special files by executing "msmknod".
+ The procedure is:
# cd /moxa/mxser/driver
# ./msmknod
- Default major numbers for dial-in device and callout device are
- 174, 175. Msmknod will delete any special files occupying the same
- device naming.
+ This shell script will require the major number for dial-in
+ device and callout device to create tty device. You also need
+ to specify the total installed MOXA board number. Default major
+ numbers for dial-in device and callout device are 30, 35. If
+ you need to change to other number, please refer section "3.7"
+ for more detailed procedure.
+ Msmknod will delete any special files occupying the same device
+ naming.
- 3. Up to now, you may manually execute "insmod mxser" to activate
- this driver and run "rmmod mxser" to remove it. However, it's
- better to have a boot time configuration to eliminate manual
- operation.
- Boot time configuration can be achieved by rc file. Run following
- command for setting rc files.
+ 3.4.2 Build the MOXA driver and utilities
+ Before using the MOXA driver and utilities, you need compile the
+ all the source code. This step is only need to be executed once.
+ But you still re-compile the source code if you modify the source
+ code. For example, if you change the driver's major number (see
+ "3.7" section), then you need to do this step again.
+
+ Find "Makefile" in /moxa/mxser, then run
+
+ # make clean; make install
+
+ !!!!!!!!!! NOTE !!!!!!!!!!!!!!!!!
+ For Red Hat 9, Red Hat Enterprise Linux AS3/ES3/WS3 & Fedora Core1:
+ # make clean; make installsp1
+
+ For Red Hat Enterprise Linux AS4/ES4/WS4:
+ # make clean; make installsp2
+ !!!!!!!!!! NOTE !!!!!!!!!!!!!!!!!
+
+ The driver files "mxser.o" and utilities will be properly compiled
+ and copied to system directories respectively.
+
+ ------------- Load MOXA driver--------------------
+ 3.4.3 Load the MOXA driver
+
+ # modprobe mxser <argument>
+
+ will activate the module driver. You may run "lsmod" to check
+ if "mxser" is activated. If the MOXA board is ISA board, the
+ <argument> is needed. Please refer to section "3.4.5" for more
+ information.
+
+
+ ------------- Load MOXA driver on boot --------------------
+ 3.4.4 For the above description, you may manually execute
+ "modprobe mxser" to activate this driver and run
+ "rmmod mxser" to remove it.
+ However, it's better to have a boot time configuration to
+ eliminate manual operation. Boot time configuration can be
+ achieved by rc file. We offer one "rc.mxser" file to simplify
+ the procedure under "moxa/mxser/driver".
+
+ But if you use ISA board, please modify the "modprobe ..." command
+ to add the argument (see "3.4.5" section). After modifying the
+ rc.mxser, please try to execute "/moxa/mxser/driver/rc.mxser"
+ manually to make sure the modification is ok. If any error
+ encountered, please try to modify again. If the modification is
+ completed, follow the below step.
+
+ Run following command for setting rc files.
# cd /moxa/mxser/driver
# cp ./rc.mxser /etc/rc.d
# cd /etc/rc.d
- You may have to modify part of the content in rc.mxser to specify
- parameters for ISA board. Please refer to rc.mxser for more detail.
- Find "rc.serial". If "rc.serial" doesn't exist, create it by vi.
- Add "rc.mxser" in last line. Next, open rc.local by vi
- and append following content.
+ Check "rc.serial" is existed or not. If "rc.serial" doesn't exist,
+ create it by vi, run "chmod 755 rc.serial" to change the permission.
+ Add "/etc/rc.d/rc.mxser" in last line,
- if [ -f /etc/rc.d/rc.serial ]; then
- sh /etc/rc.d/rc.serial
- fi
+ Reboot and check if moxa.o activated by "lsmod" command.
- 4. Reboot and check if mxser.o activated by "lsmod" command.
- 5. If you'd like to drive Smartio ISA boards in the system, you'll
- have to add parameter to specify CAP address of given board while
- activating "mxser.o". The format for parameters are as follows.
+ 3.4.5. If you'd like to drive Smartio/Industio ISA boards in the system,
+ you'll have to add parameter to specify CAP address of given
+ board while activating "mxser.o". The format for parameters are
+ as follows.
- insmod mxser ioaddr=0x???,0x???,0x???,0x???
+ modprobe mxser ioaddr=0x???,0x???,0x???,0x???
| | | |
| | | +- 4th ISA board
| | +------ 3rd ISA board
| +------------ 2nd ISA board
+------------------- 1st ISA board
- 3.4 Static driver configuration
+ 3.5 Static driver configuration for Linux kernel 2.4.x and 2.6.x
- 1. Create link
+ Note: To use static driver, you must install the linux kernel
+ source package.
+
+ 3.5.1 Backup the built-in driver in the kernel.
+ # cd /usr/src/linux/drivers/char
+ # mv mxser.c mxser.c.old
+
+ For Red Hat 7.x user, you need to create link:
+ # cd /usr/src
+ # ln -s linux-2.4 linux
+
+ 3.5.2 Create link
# cd /usr/src/linux/drivers/char
# ln -s /moxa/mxser/driver/mxser.c mxser.c
- 2. Add CAP address list for ISA boards
+ 3.5.3 Add CAP address list for ISA boards. For PCI boards user,
+ please skip this step.
+
In module mode, the CAP address for ISA board is given by
parameter. In static driver configuration, you'll have to
assign it within driver's source code. If you will not
@@ -222,73 +337,55 @@
static int mxserBoardCAP[]
= {0x280, 0x180, 0x00, 0x00};
- 3. Modify tty_io.c
- # cd /usr/src/linux/drivers/char/
- # vi tty_io.c
- Find pty_init(), insert "mxser_init()" as
+ 3.5.4 Setup kernel configuration
- pty_init();
- mxser_init();
+ Configure the kernel:
- 4. Modify tty.h
- # cd /usr/src/linux/include/linux
- # vi tty.h
- Find extern int tty_init(void), insert "mxser_init()" as
+ # cd /usr/src/linux
+ # make menuconfig
- extern int tty_init(void);
- extern int mxser_init(void);
-
- 5. Modify Makefile
- # cd /usr/src/linux/drivers/char
- # vi Makefile
- Find L_OBJS := tty_io.o ...... random.o, add
- "mxser.o" at last of this line as
- L_OBJS := tty_io.o ....... mxser.o
+ You will go into a menu-driven system. Please select [Character
+ devices][Non-standard serial port support], enable the [Moxa
+ SmartIO support] driver with "[*]" for built-in (not "[M]"), then
+ select [Exit] to exit this program.
- 6. Rebuild kernel
- The following are for Linux kernel rebuilding,for your reference only.
+ 3.5.5 Rebuild kernel
+ The following are for Linux kernel rebuilding, for your
+ reference only.
For appropriate details, please refer to the Linux document.
- If 'lilo' utility is installed, please use 'make zlilo' to rebuild
- kernel. If 'lilo' is not installed, please follow the following steps.
-
a. cd /usr/src/linux
- b. make clean /* take a few minutes */
- c. make bzImage /* take probably 10-20 minutes */
- d. Backup original boot kernel. /* optional step */
- e. cp /usr/src/linux/arch/i386/boot/bzImage /boot/vmlinuz
+ b. make clean /* take a few minutes */
+ c. make dep /* take a few minutes */
+ d. make bzImage /* take probably 10-20 minutes */
+ e. make install /* copy boot image to correct position */
f. Please make sure the boot kernel (vmlinuz) is in the
- correct position. If you use 'lilo' utility, you should
- check /etc/lilo.conf 'image' item specified the path
- which is the 'vmlinuz' path, or you will load wrong
- (or old) boot kernel image (vmlinuz).
- g. chmod 400 /vmlinuz
- h. lilo
- i. rdev -R /vmlinuz 1
- j. sync
+ correct position.
+ g. If you use 'lilo' utility, you should check /etc/lilo.conf
+ 'image' item specified the path which is the 'vmlinuz' path,
+ or you will load wrong (or old) boot kernel image (vmlinuz).
+ After checking /etc/lilo.conf, please run "lilo".
- Note that if the result of "make zImage" is ERROR, then you have to
- go back to Linux configuration Setup. Type "make config" in directory
- /usr/src/linux or "setup".
+ Note that if the result of "make bzImage" is ERROR, then you have to
+ go back to Linux configuration Setup. Type "make menuconfig" in
+ directory /usr/src/linux.
- Since system include file, /usr/src/linux/include/linux/interrupt.h,
- is modified each time the MOXA driver is installed, kernel rebuilding
- is inevitable. And it takes about 10 to 20 minutes depends on the
- machine.
- 7. Make utility
- # cd /moxa/mxser/utility
- # make install
-
- 8. Make special file
+ 3.5.6 Make tty device and special file
# cd /moxa/mxser/driver
# ./msmknod
- 9. Reboot
+ 3.5.7 Make utility
+ # cd /moxa/mxser/utility
+ # make clean; make install
- 3.5 Custom configuration
+ 3.5.8 Reboot
+
+
+
+ 3.6 Custom configuration
Although this driver already provides you default configuration, you
- still can change the device name and major number.The instruction to
+ still can change the device name and major number. The instruction to
change these parameters are shown as below.
Change Device name
@@ -306,33 +403,37 @@
2 free major numbers for this driver. There are 3 steps to change
major numbers.
- 1. Find free major numbers
+ 3.6.1 Find free major numbers
In /proc/devices, you may find all the major numbers occupied
in the system. Please select 2 major numbers that are available.
e.g. 40, 45.
- 2. Create special files
+ 3.6.2 Create special files
Run /moxa/mxser/driver/msmknod to create special files with
specified major numbers.
- 3. Modify driver with new major number
+ 3.6.3 Modify driver with new major number
Run vi to open /moxa/mxser/driver/mxser.c. Locate the line
contains "MXSERMAJOR". Change the content as below.
#define MXSERMAJOR 40
#define MXSERCUMAJOR 45
- 4. Run # make install in /moxa/mxser/driver.
+ 3.6.4 Run "make clean; make install" in /moxa/mxser/driver.
- 3.6 Verify driver installation
+ 3.7 Verify driver installation
You may refer to /var/log/messages to check the latest status
log reported by this driver whenever it's activated.
+
-----------------------------------------------------------------------------
4. Utilities
There are 3 utilities contained in this driver. They are msdiag, msmon and
msterm. These 3 utilities are released in form of source code. They should
be compiled into executable file and copied into /usr/bin.
+ Before using these utilities, please load driver (refer 3.4 & 3.5) and
+ make sure you had run the "msmknod" utility.
+
msdiag - Diagnostic
--------------------
- This utility provides the function to detect what Moxa Smartio multiport
- board exists in the system.
+ This utility provides the function to display what Moxa Smartio/Industio
+ board found by driver in the system.
msmon - Port Monitoring
-----------------------
@@ -353,12 +454,13 @@
application, for example, sending AT command to a modem connected to the
port or used as a terminal for login purpose. Note that this is only a
dumb terminal emulation without handling full screen operation.
+
-----------------------------------------------------------------------------
5. Setserial
Supported Setserial parameters are listed as below.
- uart set UART type(16450-->disable FIFO, 16550A-->enable FIFO)
+ uart set UART type(16450-->disable FIFO, 16550A-->enable FIFO)
close_delay set the amount of time(in 1/100 of a second) that DTR
should be kept low while being closed.
closing_wait set the amount of time(in 1/100 of a second) that the
@@ -366,7 +468,13 @@
being closed, before the receiver is disable.
spd_hi Use 57.6kb when the application requests 38.4kb.
spd_vhi Use 115.2kb when the application requests 38.4kb.
+ spd_shi Use 230.4kb when the application requests 38.4kb.
+ spd_warp Use 460.8kb when the application requests 38.4kb.
spd_normal Use 38.4kb when the application requests 38.4kb.
+ spd_cust Use the custom divisor to set the speed when the
+ application requests 38.4kb.
+ divisor This option set the custom divison.
+ baud_base This option set the base baud rate.
-----------------------------------------------------------------------------
6. Troubleshooting
@@ -375,8 +483,9 @@
possible. If all the possible solutions fail, please contact our technical
support team to get more help.
- Error msg: More than 4 Moxa Smartio family boards found. Fifth board and
- after are ignored.
+
+ Error msg: More than 4 Moxa Smartio/Industio family boards found. Fifth board
+ and after are ignored.
Solution:
To avoid this problem, please unplug fifth and after board, because Moxa
driver supports up to 4 boards.
@@ -384,7 +493,7 @@
Error msg: Request_irq fail, IRQ(?) may be conflict with another device.
Solution:
Other PCI or ISA devices occupy the assigned IRQ. If you are not sure
- which device causes the situation,please check /proc/interrupts to find
+ which device causes the situation, please check /proc/interrupts to find
free IRQ and simply change another free IRQ for Moxa board.
Error msg: Board #: C1xx Series(CAP=xxx) interrupt number invalid.
@@ -397,15 +506,18 @@
Moxa ISA board needs an interrupt vector.Please refer to user's manual
"Hardware Installation" chapter to set interrupt vector.
- Error msg: Couldn't install MOXA Smartio family driver!
+ Error msg: Couldn't install MOXA Smartio/Industio family driver!
Solution:
Load Moxa driver fail, the major number may conflict with other devices.
- Please refer to previous section 3.5 to change a free major number for
+ Please refer to previous section 3.7 to change a free major number for
Moxa driver.
- Error msg: Couldn't install MOXA Smartio family callout driver!
+ Error msg: Couldn't install MOXA Smartio/Industio family callout driver!
Solution:
Load Moxa callout driver fail, the callout device major number may
- conflict with other devices. Please refer to previous section 3.5 to
+ conflict with other devices. Please refer to previous section 3.7 to
change a free callout device major number for Moxa driver.
+
+
-----------------------------------------------------------------------------
+
diff --git a/Documentation/unaligned-memory-access.txt b/Documentation/unaligned-memory-access.txt
index b0472ac..f866c72 100644
--- a/Documentation/unaligned-memory-access.txt
+++ b/Documentation/unaligned-memory-access.txt
@@ -218,9 +218,35 @@
where the source or destination (or both) are of type u8* or unsigned char*.
Due to the byte-wise nature of this operation, unaligned accesses are avoided.
+
+Alignment vs. Networking
+========================
+
+On architectures that require aligned loads, networking requires that the IP
+header is aligned on a four-byte boundary to optimise the IP stack. For
+regular ethernet hardware, the constant NET_IP_ALIGN is used. On most
+architectures this constant has the value 2 because the normal ethernet
+header is 14 bytes long, so in order to get proper alignment one needs to
+DMA to an address which can be expressed as 4*n + 2. One notable exception
+here is powerpc which defines NET_IP_ALIGN to 0 because DMA to unaligned
+addresses can be very expensive and dwarf the cost of unaligned loads.
+
+For some ethernet hardware that cannot DMA to unaligned addresses like
+4*n+2 or non-ethernet hardware, this can be a problem, and it is then
+required to copy the incoming frame into an aligned buffer. Because this is
+unnecessary on architectures that can do unaligned accesses, the code can be
+made dependent on CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS like so:
+
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ skb = original skb
+#else
+ skb = copy skb
+#endif
+
--
-Author: Daniel Drake <dsd@gentoo.org>
+Authors: Daniel Drake <dsd@gentoo.org>,
+ Johannes Berg <johannes@sipsolutions.net>
With help from: Alan Cox, Avuton Olrich, Heikki Orsila, Jan Engelhardt,
-Johannes Berg, Kyle McMartin, Kyle Moffett, Randy Dunlap, Robert Hancock,
-Uli Kunitz, Vadim Lobanov
+Kyle McMartin, Kyle Moffett, Randy Dunlap, Robert Hancock, Uli Kunitz,
+Vadim Lobanov
diff --git a/MAINTAINERS b/MAINTAINERS
index be05ef9..4cbf601 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1043,6 +1043,12 @@
L: linux-scsi@vger.kernel.org
S: Supported
+BT8XXGPIO DRIVER
+P: Michael Buesch
+M: mb@bu3sch.de
+W: http://bu3sch.de/btgpio.php
+S: Maintained
+
BTTV VIDEO4LINUX DRIVER
P: Mauro Carvalho Chehab
M: mchehab@infradead.org
diff --git a/arch/Kconfig b/arch/Kconfig
index 6093c0b..b0fabfa 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -27,6 +27,25 @@
for kernel debugging, non-intrusive instrumentation and testing.
If in doubt, say "N".
+config HAVE_EFFICIENT_UNALIGNED_ACCESS
+ def_bool n
+ help
+ Some architectures are unable to perform unaligned accesses
+ without the use of get_unaligned/put_unaligned. Others are
+ unable to perform such accesses efficiently (e.g. trap on
+ unaligned access and require fixing it up in the exception
+ handler.)
+
+ This symbol should be selected by an architecture if it can
+ perform unaligned accesses efficiently to allow different
+ code paths to be selected for these cases. Some network
+ drivers, for example, could opt to not fix up alignment
+ problems with received packets if doing so would not help
+ much.
+
+ See Documentation/unaligned-memory-access.txt for more
+ information on the topic of unaligned memory accesses.
+
config KRETPROBES
def_bool y
depends on KPROBES && HAVE_KRETPROBES
diff --git a/arch/alpha/boot/misc.c b/arch/alpha/boot/misc.c
index c00646b..3047a1b 100644
--- a/arch/alpha/boot/misc.c
+++ b/arch/alpha/boot/misc.c
@@ -78,8 +78,6 @@
static int fill_inbuf(void);
static void flush_window(void);
static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
static char *input_data;
static int input_data_size;
@@ -88,51 +86,18 @@
static ulg output_ptr;
static ulg bytes_out;
-static void *malloc(int size);
-static void free(void *where);
static void error(char *m);
static void gzip_mark(void **);
static void gzip_release(void **);
extern int end;
static ulg free_mem_ptr;
-static ulg free_mem_ptr_end;
+static ulg free_mem_end_ptr;
#define HEAP_SIZE 0x3000
#include "../../../lib/inflate.c"
-static void *malloc(int size)
-{
- void *p;
-
- if (size <0) error("Malloc error");
- if (free_mem_ptr <= 0) error("Memory error");
-
- free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
- p = (void *)free_mem_ptr;
- free_mem_ptr += size;
-
- if (free_mem_ptr >= free_mem_ptr_end)
- error("Out of memory");
- return p;
-}
-
-static void free(void *where)
-{ /* gzip_mark & gzip_release do the free */
-}
-
-static void gzip_mark(void **ptr)
-{
- *ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
- free_mem_ptr = (long) *ptr;
-}
-
/* ===========================================================================
* Fill the input buffer. This is called only when the buffer is empty
* and at least one byte is really needed.
@@ -193,7 +158,7 @@
/* FIXME FIXME FIXME */
free_mem_ptr = (ulg)output_start + ksize;
- free_mem_ptr_end = (ulg)output_start + ksize + 0x200000;
+ free_mem_end_ptr = (ulg)output_start + ksize + 0x200000;
/* FIXME FIXME FIXME */
/* put in temp area to reduce initial footprint */
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 6fb4f03..dabb015 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -268,7 +268,7 @@
select GENERIC_GPIO
select HAVE_CLK
select HAVE_CLK
- select HAVE_GPIO_LIB
+ select ARCH_REQUIRE_GPIOLIB
help
This enables support for the Cirrus EP93xx series of CPUs.
@@ -447,7 +447,7 @@
select ARCH_MTD_XIP
select GENERIC_GPIO
select HAVE_CLK
- select HAVE_GPIO_LIB
+ select ARCH_REQUIRE_GPIOLIB
select GENERIC_TIME
select GENERIC_CLOCKEVENTS
select TICK_ONESHOT
@@ -479,7 +479,7 @@
select GENERIC_CLOCKEVENTS
select HAVE_CLK
select TICK_ONESHOT
- select HAVE_GPIO_LIB
+ select ARCH_REQUIRE_GPIOLIB
help
Support for StrongARM 11x0 based boards.
@@ -522,7 +522,7 @@
bool "TI OMAP"
select GENERIC_GPIO
select HAVE_CLK
- select HAVE_GPIO_LIB
+ select ARCH_REQUIRE_GPIOLIB
select GENERIC_TIME
select GENERIC_CLOCKEVENTS
help
diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c
index 9b44402..7145cc7 100644
--- a/arch/arm/boot/compressed/misc.c
+++ b/arch/arm/boot/compressed/misc.c
@@ -217,8 +217,6 @@
static int fill_inbuf(void);
static void flush_window(void);
static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
extern char input_data[];
extern char input_data_end[];
@@ -227,65 +225,22 @@
static ulg output_ptr;
static ulg bytes_out;
-static void *malloc(int size);
-static void free(void *where);
static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
static void putstr(const char *);
extern int end;
static ulg free_mem_ptr;
-static ulg free_mem_ptr_end;
+static ulg free_mem_end_ptr;
-#define HEAP_SIZE 0x3000
+#ifdef STANDALONE_DEBUG
+#define NO_INFLATE_MALLOC
+#endif
+
+#define ARCH_HAS_DECOMP_WDOG
#include "../../../../lib/inflate.c"
-#ifndef STANDALONE_DEBUG
-static void *malloc(int size)
-{
- void *p;
-
- if (size <0) error("Malloc error");
- if (free_mem_ptr <= 0) error("Memory error");
-
- free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
- p = (void *)free_mem_ptr;
- free_mem_ptr += size;
-
- if (free_mem_ptr >= free_mem_ptr_end)
- error("Out of memory");
- return p;
-}
-
-static void free(void *where)
-{ /* gzip_mark & gzip_release do the free */
-}
-
-static void gzip_mark(void **ptr)
-{
- arch_decomp_wdog();
- *ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
- arch_decomp_wdog();
- free_mem_ptr = (long) *ptr;
-}
-#else
-static void gzip_mark(void **ptr)
-{
-}
-
-static void gzip_release(void **ptr)
-{
-}
-#endif
-
/* ===========================================================================
* Fill the input buffer. This is called only when the buffer is empty
* and at least one byte is really needed.
@@ -348,7 +303,7 @@
{
output_data = (uch *)output_start; /* Points to kernel start */
free_mem_ptr = free_mem_ptr_p;
- free_mem_ptr_end = free_mem_ptr_end_p;
+ free_mem_end_ptr = free_mem_ptr_end_p;
__machine_arch_type = arch_id;
arch_decomp_setup();
diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c
index 5ee39e1..d28513f1 100644
--- a/arch/arm/kernel/kprobes.c
+++ b/arch/arm/kernel/kprobes.c
@@ -296,8 +296,7 @@
unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
INIT_HLIST_HEAD(&empty_rp);
- spin_lock_irqsave(&kretprobe_lock, flags);
- head = kretprobe_inst_table_head(current);
+ kretprobe_hash_lock(current, &head, &flags);
/*
* It is possible to have multiple instances associated with a given
@@ -337,7 +336,7 @@
}
kretprobe_assert(ri, orig_ret_address, trampoline_address);
- spin_unlock_irqrestore(&kretprobe_lock, flags);
+ kretprobe_hash_unlock(current, &flags);
hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
hlist_del(&ri->hlist);
@@ -347,7 +346,6 @@
return (void *)orig_ret_address;
}
-/* Called with kretprobe_lock held. */
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
diff --git a/arch/arm/plat-omap/gpio.c b/arch/arm/plat-omap/gpio.c
index 1903a34..d8e9c2c 100644
--- a/arch/arm/plat-omap/gpio.c
+++ b/arch/arm/plat-omap/gpio.c
@@ -1488,6 +1488,9 @@
bank->chip.set = gpio_set;
if (bank_is_mpuio(bank)) {
bank->chip.label = "mpuio";
+#ifdef CONFIG_ARCH_OMAP1
+ bank->chip.dev = &omap_mpuio_device.dev;
+#endif
bank->chip.base = OMAP_MPUIO(0);
} else {
bank->chip.label = "gpio";
diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig
index df4adef..7c239a9 100644
--- a/arch/avr32/Kconfig
+++ b/arch/avr32/Kconfig
@@ -88,7 +88,7 @@
select SUBARCH_AVR32B
select MMU
select PERFORMANCE_COUNTERS
- select HAVE_GPIO_LIB
+ select ARCH_REQUIRE_GPIOLIB
select GENERIC_ALLOCATOR
#
diff --git a/arch/avr32/mach-at32ap/pio.c b/arch/avr32/mach-at32ap/pio.c
index 60da03b..296294f 100644
--- a/arch/avr32/mach-at32ap/pio.c
+++ b/arch/avr32/mach-at32ap/pio.c
@@ -360,6 +360,8 @@
pio->chip.label = pio->name;
pio->chip.base = pdev->id * 32;
pio->chip.ngpio = 32;
+ pio->chip.dev = &pdev->dev;
+ pio->chip.owner = THIS_MODULE;
pio->chip.direction_input = direction_input;
pio->chip.get = gpio_get;
diff --git a/arch/cris/arch-v10/boot/compressed/misc.c b/arch/cris/arch-v10/boot/compressed/misc.c
index 18e13bc..d933c89 100644
--- a/arch/cris/arch-v10/boot/compressed/misc.c
+++ b/arch/cris/arch-v10/boot/compressed/misc.c
@@ -102,50 +102,16 @@
static long bytes_out = 0;
static uch *output_data;
static unsigned long output_ptr = 0;
-
-static void *malloc(int size);
-static void free(void *where);
-static void gzip_mark(void **);
-static void gzip_release(void **);
-
static void puts(const char *);
/* the "heap" is put directly after the BSS ends, at end */
extern int _end;
static long free_mem_ptr = (long)&_end;
+static long free_mem_end_ptr;
#include "../../../../../lib/inflate.c"
-static void *malloc(int size)
-{
- void *p;
-
- if (size < 0)
- error("Malloc error");
-
- free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
- p = (void *)free_mem_ptr;
- free_mem_ptr += size;
-
- return p;
-}
-
-static void free(void *where)
-{ /* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
- *ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
- free_mem_ptr = (long) *ptr;
-}
-
/* decompressor info and error messages to serial console */
static void
diff --git a/arch/cris/arch-v32/boot/compressed/misc.c b/arch/cris/arch-v32/boot/compressed/misc.c
index 55b2695..3595e16 100644
--- a/arch/cris/arch-v32/boot/compressed/misc.c
+++ b/arch/cris/arch-v32/boot/compressed/misc.c
@@ -89,20 +89,14 @@
static void flush_window(void);
static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
extern char *input_data; /* lives in head.S */
-static long bytes_out = 0;
+static long bytes_out;
static uch *output_data;
-static unsigned long output_ptr = 0;
+static unsigned long output_ptr;
-static void *malloc(int size);
-static void free(void *where);
static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
static void puts(const char *);
@@ -110,37 +104,10 @@
extern int _end;
static long free_mem_ptr = (long)&_end;
+static long free_mem_end_ptr;
#include "../../../../../lib/inflate.c"
-static void *malloc(int size)
-{
- void *p;
-
- if (size <0) error("Malloc error");
-
- free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
- p = (void *)free_mem_ptr;
- free_mem_ptr += size;
-
- return p;
-}
-
-static void free(void *where)
-{ /* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
- *ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
- free_mem_ptr = (long) *ptr;
-}
-
/* decompressor info and error messages to serial console */
static inline void
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 085dc6e..396ab05 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -203,20 +203,6 @@
Read the instructions in <file:Documentation/Changes> pertaining to
pseudo terminals. It's safe to say N.
-config UNIX98_PTY_COUNT
- int "Maximum number of Unix98 PTYs in use (0-2048)"
- depends on UNIX98_PTYS
- default "256"
- help
- The maximum number of Unix98 PTYs that can be used at any one time.
- The default is 256, and should be enough for desktop systems. Server
- machines which support incoming telnet/rlogin/ssh connections and/or
- serve several X terminals may want to increase this: every incoming
- connection and every xterm uses up one PTY.
-
- When not in use, each additional set of 256 PTYs occupy
- approximately 8 KB of kernel memory on 32-bit architectures.
-
source "drivers/char/pcmcia/Kconfig"
source "drivers/serial/Kconfig"
diff --git a/arch/h8300/boot/compressed/misc.c b/arch/h8300/boot/compressed/misc.c
index 8450745..51ab6cb 100644
--- a/arch/h8300/boot/compressed/misc.c
+++ b/arch/h8300/boot/compressed/misc.c
@@ -67,8 +67,6 @@
static int fill_inbuf(void);
static void flush_window(void);
static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
extern char input_data[];
extern int input_len;
@@ -77,11 +75,7 @@
static uch *output_data;
static unsigned long output_ptr = 0;
-static void *malloc(int size);
-static void free(void *where);
static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
int puts(const char *);
@@ -98,38 +92,6 @@
#define TDR *((volatile unsigned char *)0xffff8b)
#define SSR *((volatile unsigned char *)0xffff8c)
-static void *malloc(int size)
-{
- void *p;
-
- if (size <0) error("Malloc error");
- if (free_mem_ptr == 0) error("Memory error");
-
- free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
- p = (void *)free_mem_ptr;
- free_mem_ptr += size;
-
- if (free_mem_ptr >= free_mem_end_ptr)
- error("Out of memory");
-
- return p;
-}
-
-static void free(void *where)
-{ /* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
- *ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
- free_mem_ptr = (long) *ptr;
-}
-
int puts(const char *s)
{
return 0;
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 233434f..f07688d 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -429,8 +429,7 @@
((struct fnptr *)kretprobe_trampoline)->ip;
INIT_HLIST_HEAD(&empty_rp);
- spin_lock_irqsave(&kretprobe_lock, flags);
- head = kretprobe_inst_table_head(current);
+ kretprobe_hash_lock(current, &head, &flags);
/*
* It is possible to have multiple instances associated with a given
@@ -485,7 +484,7 @@
kretprobe_assert(ri, orig_ret_address, trampoline_address);
reset_current_kprobe();
- spin_unlock_irqrestore(&kretprobe_lock, flags);
+ kretprobe_hash_unlock(current, &flags);
preempt_enable_no_resched();
hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
@@ -500,7 +499,6 @@
return 1;
}
-/* Called with kretprobe_lock held */
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
diff --git a/arch/m32r/boot/compressed/misc.c b/arch/m32r/boot/compressed/misc.c
index 600d40e..d394292 100644
--- a/arch/m32r/boot/compressed/misc.c
+++ b/arch/m32r/boot/compressed/misc.c
@@ -70,8 +70,6 @@
static int fill_inbuf(void);
static void flush_window(void);
static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
static unsigned char *input_data;
static int input_len;
@@ -82,9 +80,6 @@
#include "m32r_sio.c"
-static void *malloc(int size);
-static void free(void *where);
-
static unsigned long free_mem_ptr;
static unsigned long free_mem_end_ptr;
@@ -92,38 +87,6 @@
#include "../../../../lib/inflate.c"
-static void *malloc(int size)
-{
- void *p;
-
- if (size <0) error("Malloc error");
- if (free_mem_ptr == 0) error("Memory error");
-
- free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
- p = (void *)free_mem_ptr;
- free_mem_ptr += size;
-
- if (free_mem_ptr >= free_mem_end_ptr)
- error("Out of memory");
-
- return p;
-}
-
-static void free(void *where)
-{ /* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
- *ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
- free_mem_ptr = (long) *ptr;
-}
-
void* memset(void* s, int c, size_t n)
{
int i;
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index b9c754f..b4c4eaa 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -713,7 +713,7 @@
config GPIO_TXX9
select GENERIC_GPIO
- select HAVE_GPIO_LIB
+ select ARCH_REQUIRE_GPIOLIB
bool
config CFE
diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c
index c266211..2fefb14 100644
--- a/arch/mips/kernel/linux32.c
+++ b/arch/mips/kernel/linux32.c
@@ -11,7 +11,6 @@
#include <linux/file.h>
#include <linux/smp_lock.h>
#include <linux/highuid.h>
-#include <linux/dirent.h>
#include <linux/resource.h>
#include <linux/highmem.h>
#include <linux/time.h>
diff --git a/arch/mn10300/boot/compressed/misc.c b/arch/mn10300/boot/compressed/misc.c
index ded207e..f673383 100644
--- a/arch/mn10300/boot/compressed/misc.c
+++ b/arch/mn10300/boot/compressed/misc.c
@@ -153,26 +153,9 @@
static unsigned long output_ptr;
-static void *malloc(int size);
-
-static inline void free(void *where)
-{ /* Don't care */
-}
-
static unsigned long free_mem_ptr = (unsigned long) &end;
static unsigned long free_mem_end_ptr = (unsigned long) &end + 0x90000;
-static inline void gzip_mark(void **ptr)
-{
- kputs(".");
- *ptr = (void *) free_mem_ptr;
-}
-
-static inline void gzip_release(void **ptr)
-{
- free_mem_ptr = (unsigned long) *ptr;
-}
-
#define INPLACE_MOVE_ROUTINE 0x1000
#define LOW_BUFFER_START 0x2000
#define LOW_BUFFER_END 0x90000
@@ -186,26 +169,6 @@
#include "../../../../lib/inflate.c"
-static void *malloc(int size)
-{
- void *p;
-
- if (size < 0)
- error("Malloc error\n");
- if (!free_mem_ptr)
- error("Memory error\n");
-
- free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
- p = (void *) free_mem_ptr;
- free_mem_ptr += size;
-
- if (free_mem_ptr >= free_mem_end_ptr)
- error("\nOut of memory\n");
-
- return p;
-}
-
static inline void scroll(void)
{
int i;
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a487671..fe88418 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -110,8 +110,10 @@
default y
select HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE
+ select ARCH_WANT_OPTIONAL_GPIOLIB
select HAVE_IDE
select HAVE_IOREMAP_PROT
+ select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_KPROBES
select HAVE_ARCH_KGDB
select HAVE_KRETPROBES
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 4ba2af1..de79915 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -144,7 +144,6 @@
kcb->kprobe_saved_msr = regs->msr;
}
-/* Called with kretprobe_lock held */
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
@@ -312,8 +311,7 @@
unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
INIT_HLIST_HEAD(&empty_rp);
- spin_lock_irqsave(&kretprobe_lock, flags);
- head = kretprobe_inst_table_head(current);
+ kretprobe_hash_lock(current, &head, &flags);
/*
* It is possible to have multiple instances associated with a given
@@ -352,7 +350,7 @@
regs->nip = orig_ret_address;
reset_current_kprobe();
- spin_unlock_irqrestore(&kretprobe_lock, flags);
+ kretprobe_hash_unlock(current, &flags);
preempt_enable_no_resched();
hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
diff --git a/arch/powerpc/platforms/52xx/Kconfig b/arch/powerpc/platforms/52xx/Kconfig
index 9a51675..696a5ee 100644
--- a/arch/powerpc/platforms/52xx/Kconfig
+++ b/arch/powerpc/platforms/52xx/Kconfig
@@ -47,7 +47,7 @@
config PPC_MPC5200_GPIO
bool "MPC5200 GPIO support"
depends on PPC_MPC52xx
+ select ARCH_REQUIRE_GPIOLIB
select GENERIC_GPIO
- select HAVE_GPIO_LIB
help
Enable gpiolib support for mpc5200 based boards
diff --git a/arch/powerpc/sysdev/qe_lib/Kconfig b/arch/powerpc/sysdev/qe_lib/Kconfig
index 4bb18f5..1ce5464 100644
--- a/arch/powerpc/sysdev/qe_lib/Kconfig
+++ b/arch/powerpc/sysdev/qe_lib/Kconfig
@@ -29,7 +29,7 @@
bool "QE GPIO support"
depends on QUICC_ENGINE
select GENERIC_GPIO
- select HAVE_GPIO_LIB
+ select ARCH_REQUIRE_GPIOLIB
help
Say Y here if you're going to use hardware that connects to the
QE GPIOs.
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index eb530b4..2ed8812 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -565,6 +565,7 @@
depends on 64BIT && EXPERIMENTAL
select VIRTIO
select VIRTIO_RING
+ select VIRTIO_CONSOLE
help
Select this option if you want to run the kernel under s390 linux
endmenu
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 288ad49..4f82e5b 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -270,7 +270,6 @@
__ctl_store(kcb->kprobe_saved_ctl, 9, 11);
}
-/* Called with kretprobe_lock held */
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
@@ -377,8 +376,7 @@
unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
INIT_HLIST_HEAD(&empty_rp);
- spin_lock_irqsave(&kretprobe_lock, flags);
- head = kretprobe_inst_table_head(current);
+ kretprobe_hash_lock(current, &head, &flags);
/*
* It is possible to have multiple instances associated with a given
@@ -417,7 +415,7 @@
regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE;
reset_current_kprobe();
- spin_unlock_irqrestore(&kretprobe_lock, flags);
+ kretprobe_hash_unlock(current, &flags);
preempt_enable_no_resched();
hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index b358e18..62122ba 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -54,6 +54,7 @@
#include <asm/sections.h>
#include <asm/ebcdic.h>
#include <asm/compat.h>
+#include <asm/kvm_virtio.h>
long psw_kernel_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY |
PSW_MASK_MCHECK | PSW_DEFAULT_KEY);
@@ -766,7 +767,8 @@
printk("We are running under VM (64 bit mode)\n");
else if (MACHINE_IS_KVM) {
printk("We are running under KVM (64 bit mode)\n");
- add_preferred_console("ttyS", 1, NULL);
+ add_preferred_console("hvc", 0, NULL);
+ s390_virtio_console_init();
} else
printk("We are running native (64 bit mode)\n");
#endif /* CONFIG_64BIT */
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 212d618..632b13e 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -9,7 +9,6 @@
#include <linux/device.h>
#include <linux/bootmem.h>
#include <linux/sched.h>
-#include <linux/kthread.h>
#include <linux/workqueue.h>
#include <linux/cpu.h>
#include <linux/smp.h>
@@ -230,20 +229,9 @@
}
}
-static int topology_kthread(void *data)
-{
- arch_reinit_sched_domains();
- return 0;
-}
-
static void topology_work_fn(struct work_struct *work)
{
- /* We can't call arch_reinit_sched_domains() from a multi-threaded
- * workqueue context since it may deadlock in case of cpu hotplug.
- * So we have to create a kernel thread in order to call
- * arch_reinit_sched_domains().
- */
- kthread_run(topology_kthread, NULL, "topology_update");
+ arch_reinit_sched_domains();
}
void topology_schedule_update(void)
diff --git a/arch/sh/boot/compressed/misc_32.c b/arch/sh/boot/compressed/misc_32.c
index adcea31..f386997 100644
--- a/arch/sh/boot/compressed/misc_32.c
+++ b/arch/sh/boot/compressed/misc_32.c
@@ -74,8 +74,6 @@
static int fill_inbuf(void);
static void flush_window(void);
static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
extern char input_data[];
extern int input_len;
@@ -84,11 +82,7 @@
static uch *output_data;
static unsigned long output_ptr = 0;
-static void *malloc(int size);
-static void free(void *where);
static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
int puts(const char *);
@@ -101,38 +95,6 @@
#include "../../../../lib/inflate.c"
-static void *malloc(int size)
-{
- void *p;
-
- if (size <0) error("Malloc error");
- if (free_mem_ptr == 0) error("Memory error");
-
- free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
- p = (void *)free_mem_ptr;
- free_mem_ptr += size;
-
- if (free_mem_ptr >= free_mem_end_ptr)
- error("Out of memory");
-
- return p;
-}
-
-static void free(void *where)
-{ /* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
- *ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
- free_mem_ptr = (long) *ptr;
-}
-
#ifdef CONFIG_SH_STANDARD_BIOS
size_t strlen(const char *s)
{
diff --git a/arch/sh/boot/compressed/misc_64.c b/arch/sh/boot/compressed/misc_64.c
index a006ef8..2941657 100644
--- a/arch/sh/boot/compressed/misc_64.c
+++ b/arch/sh/boot/compressed/misc_64.c
@@ -72,8 +72,6 @@
static int fill_inbuf(void);
static void flush_window(void);
static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
extern char input_data[];
extern int input_len;
@@ -82,11 +80,7 @@
static uch *output_data;
static unsigned long output_ptr = 0;
-static void *malloc(int size);
-static void free(void *where);
static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
static void puts(const char *);
@@ -99,40 +93,6 @@
#include "../../../../lib/inflate.c"
-static void *malloc(int size)
-{
- void *p;
-
- if (size < 0)
- error("Malloc error\n");
- if (free_mem_ptr == 0)
- error("Memory error\n");
-
- free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
- p = (void *) free_mem_ptr;
- free_mem_ptr += size;
-
- if (free_mem_ptr >= free_mem_end_ptr)
- error("\nOut of memory\n");
-
- return p;
-}
-
-static void free(void *where)
-{ /* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
- *ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
- free_mem_ptr = (long) *ptr;
-}
-
void puts(const char *s)
{
}
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 789724e..375de7c 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -298,20 +298,6 @@
Read the instructions in <file:Documentation/Changes> pertaining to
pseudo terminals. It's safe to say N.
-config UNIX98_PTY_COUNT
- int "Maximum number of Unix98 PTYs in use (0-2048)"
- depends on UNIX98_PTYS
- default "256"
- help
- The maximum number of Unix98 PTYs that can be used at any one time.
- The default is 256, and should be enough for desktop systems. Server
- machines which support incoming telnet/rlogin/ssh connections and/or
- serve several X terminals may want to increase this: every incoming
- connection and every xterm uses up one PTY.
-
- When not in use, each additional set of 256 PTYs occupy
- approximately 8 KB of kernel memory on 32-bit architectures.
-
endmenu
source "fs/Kconfig"
diff --git a/arch/sparc64/kernel/kprobes.c b/arch/sparc64/kernel/kprobes.c
index f43b5d7..201a6e5 100644
--- a/arch/sparc64/kernel/kprobes.c
+++ b/arch/sparc64/kernel/kprobes.c
@@ -478,9 +478,9 @@
return 0;
}
-/* Called with kretprobe_lock held. The value stored in the return
- * address register is actually 2 instructions before where the
- * callee will return to. Sequences usually look something like this
+/* The value stored in the return address register is actually 2
+ * instructions before where the callee will return to.
+ * Sequences usually look something like this
*
* call some_function <--- return register points here
* nop <--- call delay slot
@@ -512,8 +512,7 @@
unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
INIT_HLIST_HEAD(&empty_rp);
- spin_lock_irqsave(&kretprobe_lock, flags);
- head = kretprobe_inst_table_head(current);
+ kretprobe_hash_lock(current, &head, &flags);
/*
* It is possible to have multiple instances associated with a given
@@ -553,7 +552,7 @@
regs->tnpc = orig_ret_address + 4;
reset_current_kprobe();
- spin_unlock_irqrestore(&kretprobe_lock, flags);
+ kretprobe_hash_unlock(current, &flags);
preempt_enable_no_resched();
hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b2ddfcf..e3cba0b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -23,11 +23,13 @@
select HAVE_OPROFILE
select HAVE_IOREMAP_PROT
select HAVE_KPROBES
+ select ARCH_WANT_OPTIONAL_GPIOLIB if !X86_RDC321X
select HAVE_KRETPROBES
select HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE
select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
select HAVE_ARCH_KGDB if !X86_VOYAGER
+ select HAVE_EFFICIENT_UNALIGNED_ACCESS
config ARCH_DEFCONFIG
string
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index bc5553b..9fea737 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -182,8 +182,6 @@
static int fill_inbuf(void);
static void flush_window(void);
static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
/*
* This is set up by the setup-routine at boot-time
@@ -196,9 +194,6 @@
static long bytes_out;
-static void *malloc(int size);
-static void free(void *where);
-
static void *memset(void *s, int c, unsigned n);
static void *memcpy(void *dest, const void *src, unsigned n);
@@ -220,40 +215,6 @@
#include "../../../../lib/inflate.c"
-static void *malloc(int size)
-{
- void *p;
-
- if (size < 0)
- error("Malloc error");
- if (free_mem_ptr <= 0)
- error("Memory error");
-
- free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
- p = (void *)free_mem_ptr;
- free_mem_ptr += size;
-
- if (free_mem_ptr >= free_mem_end_ptr)
- error("Out of memory");
-
- return p;
-}
-
-static void free(void *where)
-{ /* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
- *ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
- free_mem_ptr = (memptr) *ptr;
-}
-
static void scroll(void)
{
int i;
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 43c019f..6c27679 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -431,7 +431,6 @@
regs->ip = (unsigned long)p->ainsn.insn;
}
-/* Called with kretprobe_lock held */
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
@@ -682,8 +681,7 @@
unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
INIT_HLIST_HEAD(&empty_rp);
- spin_lock_irqsave(&kretprobe_lock, flags);
- head = kretprobe_inst_table_head(current);
+ kretprobe_hash_lock(current, &head, &flags);
/* fixup registers */
#ifdef CONFIG_X86_64
regs->cs = __KERNEL_CS;
@@ -732,7 +730,7 @@
kretprobe_assert(ri, orig_ret_address, trampoline_address);
- spin_unlock_irqrestore(&kretprobe_lock, flags);
+ kretprobe_hash_unlock(current, &flags);
hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
hlist_del(&ri->hlist);
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 151f2d1..19e7fc7 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -29,6 +29,7 @@
#include <linux/mm.h>
#include <linux/spinlock.h>
#include <linux/string.h>
+#include <linux/crash_dump.h>
#include <linux/dma-mapping.h>
#include <linux/bitops.h>
#include <linux/pci_ids.h>
@@ -167,6 +168,8 @@
static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev);
static void calioc2_tce_cache_blast(struct iommu_table *tbl);
static void calioc2_dump_error_regs(struct iommu_table *tbl);
+static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl);
+static void get_tce_space_from_tar(void);
static struct cal_chipset_ops calgary_chip_ops = {
.handle_quirks = calgary_handle_quirks,
@@ -830,7 +833,11 @@
tbl = pci_iommu(dev->bus);
tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space;
- tce_free(tbl, 0, tbl->it_size);
+
+ if (is_kdump_kernel())
+ calgary_init_bitmap_from_tce_table(tbl);
+ else
+ tce_free(tbl, 0, tbl->it_size);
if (is_calgary(dev->device))
tbl->chip_ops = &calgary_chip_ops;
@@ -1209,6 +1216,10 @@
if (ret)
return ret;
+ /* Purely for kdump kernel case */
+ if (is_kdump_kernel())
+ get_tce_space_from_tar();
+
do {
dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev);
if (!dev)
@@ -1339,6 +1350,61 @@
return (val != 0xffffffff);
}
+/*
+ * calgary_init_bitmap_from_tce_table():
+ * Funtion for kdump case. In the second/kdump kernel initialize
+ * the bitmap based on the tce table entries obtained from first kernel
+ */
+static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl)
+{
+ u64 *tp;
+ unsigned int index;
+ tp = ((u64 *)tbl->it_base);
+ for (index = 0 ; index < tbl->it_size; index++) {
+ if (*tp != 0x0)
+ set_bit(index, tbl->it_map);
+ tp++;
+ }
+}
+
+/*
+ * get_tce_space_from_tar():
+ * Function for kdump case. Get the tce tables from first kernel
+ * by reading the contents of the base adress register of calgary iommu
+ */
+static void get_tce_space_from_tar()
+{
+ int bus;
+ void __iomem *target;
+ unsigned long tce_space;
+
+ for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
+ struct calgary_bus_info *info = &bus_info[bus];
+ unsigned short pci_device;
+ u32 val;
+
+ val = read_pci_config(bus, 0, 0, 0);
+ pci_device = (val & 0xFFFF0000) >> 16;
+
+ if (!is_cal_pci_dev(pci_device))
+ continue;
+ if (info->translation_disabled)
+ continue;
+
+ if (calgary_bus_has_devices(bus, pci_device) ||
+ translate_empty_slots) {
+ target = calgary_reg(bus_info[bus].bbar,
+ tar_offset(bus));
+ tce_space = be64_to_cpu(readq(target));
+ tce_space = tce_space & TAR_SW_BITS;
+
+ tce_space = tce_space & (~specified_table_size);
+ info->tce_space = (u64 *)__va(tce_space);
+ }
+ }
+ return;
+}
+
void __init detect_calgary(void)
{
int bus;
@@ -1394,7 +1460,8 @@
return;
}
- specified_table_size = determine_tce_table_size(max_pfn * PAGE_SIZE);
+ specified_table_size = determine_tce_table_size((is_kdump_kernel() ?
+ saved_max_pfn : max_pfn) * PAGE_SIZE);
for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
struct calgary_bus_info *info = &bus_info[bus];
@@ -1412,10 +1479,16 @@
if (calgary_bus_has_devices(bus, pci_device) ||
translate_empty_slots) {
- tbl = alloc_tce_table();
- if (!tbl)
- goto cleanup;
- info->tce_space = tbl;
+ /*
+ * If it is kdump kernel, find and use tce tables
+ * from first kernel, else allocate tce tables here
+ */
+ if (!is_kdump_kernel()) {
+ tbl = alloc_tce_table();
+ if (!tbl)
+ goto cleanup;
+ info->tce_space = tbl;
+ }
calgary_found = 1;
}
}
diff --git a/block/ioctl.c b/block/ioctl.c
index 52d6385..77185e5 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -17,6 +17,7 @@
long long start, length;
int part;
int i;
+ int err;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
@@ -61,9 +62,9 @@
}
}
/* all seems OK */
- add_partition(disk, part, start, length, ADDPART_FLAG_NONE);
+ err = add_partition(disk, part, start, length, ADDPART_FLAG_NONE);
mutex_unlock(&bdev->bd_mutex);
- return 0;
+ return err;
case BLKPG_DEL_PARTITION:
if (!disk->part[part-1])
return -ENXIO;
diff --git a/drivers/Makefile b/drivers/Makefile
index 808e0ae..54ec5e7 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -5,7 +5,7 @@
# Rewritten to use lists instead of if-statements.
#
-obj-$(CONFIG_HAVE_GPIO_LIB) += gpio/
+obj-y += gpio/
obj-$(CONFIG_PCI) += pci/
obj-$(CONFIG_PARISC) += parisc/
obj-$(CONFIG_RAPIDIO) += rapidio/
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index dc7596f..ef3e552 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1273,7 +1273,7 @@
void __iomem *mmio = ap->host->iomap[AHCI_PCI_BAR];
u32 em_ctl;
u32 message[] = {0, 0};
- unsigned int flags;
+ unsigned long flags;
int pmp;
struct ahci_em_priv *emp;
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index b0be1d1..c9c92b0 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -184,7 +184,7 @@
struct device *dev = to_dev(kobj);
struct firmware_priv *fw_priv = dev_get_drvdata(dev);
struct firmware *fw;
- ssize_t ret_count = count;
+ ssize_t ret_count;
mutex_lock(&fw_lock);
fw = fw_priv->fw;
@@ -192,14 +192,8 @@
ret_count = -ENODEV;
goto out;
}
- if (offset > fw->size) {
- ret_count = 0;
- goto out;
- }
- if (offset + ret_count > fw->size)
- ret_count = fw->size - offset;
-
- memcpy(buffer, fw->data + offset, ret_count);
+ ret_count = memory_read_from_buffer(buffer, count, &offset,
+ fw->data, fw->size);
out:
mutex_unlock(&fw_lock);
return ret_count;
diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c
index c04440c..181ebb8 100644
--- a/drivers/block/aoe/aoechr.c
+++ b/drivers/block/aoe/aoechr.c
@@ -6,6 +6,7 @@
#include <linux/hdreg.h>
#include <linux/blkdev.h>
+#include <linux/completion.h>
#include <linux/delay.h>
#include <linux/smp_lock.h>
#include "aoe.h"
@@ -36,7 +37,7 @@
static struct ErrMsg emsgs[NMSG];
static int emsgs_head_idx, emsgs_tail_idx;
-static struct semaphore emsgs_sema;
+static struct completion emsgs_comp;
static spinlock_t emsgs_lock;
static int nblocked_emsgs_readers;
static struct class *aoe_class;
@@ -141,7 +142,7 @@
spin_unlock_irqrestore(&emsgs_lock, flags);
if (nblocked_emsgs_readers)
- up(&emsgs_sema);
+ complete(&emsgs_comp);
}
static ssize_t
@@ -221,7 +222,7 @@
spin_unlock_irqrestore(&emsgs_lock, flags);
- n = down_interruptible(&emsgs_sema);
+ n = wait_for_completion_interruptible(&emsgs_comp);
spin_lock_irqsave(&emsgs_lock, flags);
@@ -269,7 +270,7 @@
printk(KERN_ERR "aoe: can't register char device\n");
return n;
}
- sema_init(&emsgs_sema, 0);
+ init_completion(&emsgs_comp);
spin_lock_init(&emsgs_lock);
aoe_class = class_create(THIS_MODULE, "aoe");
if (IS_ERR(aoe_class)) {
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index dd7ea20..4225109 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -196,6 +196,7 @@
int err;
u64 cap;
u32 v;
+ u32 blk_size;
if (index_to_minor(index) >= 1 << MINORBITS)
return -ENOSPC;
@@ -290,6 +291,13 @@
if (!err)
blk_queue_max_hw_segments(vblk->disk->queue, v);
+ /* Host can optionally specify the block size of the device */
+ err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE,
+ offsetof(struct virtio_blk_config, blk_size),
+ &blk_size);
+ if (!err)
+ blk_queue_hardsect_size(vblk->disk->queue, blk_size);
+
add_disk(vblk->disk);
return 0;
@@ -330,7 +338,7 @@
static unsigned int features[] = {
VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX,
- VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO,
+ VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
};
static struct virtio_driver virtio_blk = {
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 67b0757..6c070dc 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -578,11 +578,14 @@
It will automatically be selected if one of the back-end console drivers
is selected.
+config HVC_IRQ
+ bool
config HVC_CONSOLE
bool "pSeries Hypervisor Virtual Console support"
depends on PPC_PSERIES
select HVC_DRIVER
+ select HVC_IRQ
help
pSeries machines when partitioned support a hypervisor virtual
console. This driver allows each pSeries partition to have a console
@@ -593,6 +596,7 @@
depends on PPC_ISERIES
default y
select HVC_DRIVER
+ select HVC_IRQ
help
iSeries machines support a hypervisor virtual console.
@@ -614,13 +618,18 @@
bool "Xen Hypervisor Console support"
depends on XEN
select HVC_DRIVER
+ select HVC_IRQ
default y
help
Xen virtual console device driver
config VIRTIO_CONSOLE
- bool
+ tristate "Virtio console"
+ depends on VIRTIO
select HVC_DRIVER
+ help
+ Virtio console for use with lguest and other hypervisors.
+
config HVCS
tristate "IBM Hypervisor Virtual Console Server support"
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index 4b6e736..f7a0d1a 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -48,6 +48,7 @@
obj-$(CONFIG_HVC_RTAS) += hvc_rtas.o
obj-$(CONFIG_HVC_BEAT) += hvc_beat.o
obj-$(CONFIG_HVC_DRIVER) += hvc_console.o
+obj-$(CONFIG_HVC_IRQ) += hvc_irq.o
obj-$(CONFIG_HVC_XEN) += hvc_xen.o
obj-$(CONFIG_VIRTIO_CONSOLE) += virtio_console.o
obj-$(CONFIG_RAW_DRIVER) += raw.o
@@ -63,7 +64,6 @@
obj-$(CONFIG_BFIN_OTP) += bfin-otp.o
obj-$(CONFIG_PRINTER) += lp.o
-obj-$(CONFIG_TIPAR) += tipar.o
obj-$(CONFIG_APM_EMULATION) += apm-emulation.o
diff --git a/drivers/char/ds1302.c b/drivers/char/ds1302.c
index fada6dd..c5e67a6 100644
--- a/drivers/char/ds1302.c
+++ b/drivers/char/ds1302.c
@@ -20,10 +20,11 @@
#include <linux/miscdevice.h>
#include <linux/delay.h>
#include <linux/bcd.h>
+#include <linux/smp_lock.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
-#include <asm/uaccess.h>
#include <asm/system.h>
-#include <asm/io.h>
#include <asm/rtc.h>
#if defined(CONFIG_M32R)
#include <asm/m32r.h>
@@ -153,9 +154,7 @@
/* ioctl that supports RTC_RD_TIME and RTC_SET_TIME (read and set time/date). */
-static int
-rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
- unsigned long arg)
+static long rtc_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
unsigned long flags;
@@ -165,7 +164,9 @@
struct rtc_time rtc_tm;
memset(&rtc_tm, 0, sizeof (struct rtc_time));
+ lock_kernel();
get_rtc_time(&rtc_tm);
+ unlock_kernel();
if (copy_to_user((struct rtc_time*)arg, &rtc_tm, sizeof(struct rtc_time)))
return -EFAULT;
return 0;
@@ -217,6 +218,7 @@
BIN_TO_BCD(mon);
BIN_TO_BCD(yrs);
+ lock_kernel();
local_irq_save(flags);
CMOS_WRITE(yrs, RTC_YEAR);
CMOS_WRITE(mon, RTC_MONTH);
@@ -225,6 +227,7 @@
CMOS_WRITE(min, RTC_MINUTES);
CMOS_WRITE(sec, RTC_SECONDS);
local_irq_restore(flags);
+ unlock_kernel();
/* Notice that at this point, the RTC is updated but
* the kernel is still running with the old time.
@@ -244,8 +247,10 @@
if(copy_from_user(&tcs_val, (int*)arg, sizeof(int)))
return -EFAULT;
+ lock_kernel();
tcs_val = RTC_TCR_PATTERN | (tcs_val & 0x0F);
ds1302_writereg(RTC_TRICKLECHARGER, tcs_val);
+ unlock_kernel();
return 0;
}
default:
@@ -282,7 +287,7 @@
static const struct file_operations rtc_fops = {
.owner = THIS_MODULE,
- .ioctl = rtc_ioctl,
+ .unlocked_ioctl = rtc_ioctl,
};
/* Probe for the chip by writing something to its RAM and try reading it back. */
diff --git a/drivers/char/dsp56k.c b/drivers/char/dsp56k.c
index 33c466a..19b8850 100644
--- a/drivers/char/dsp56k.c
+++ b/drivers/char/dsp56k.c
@@ -36,10 +36,10 @@
#include <linux/smp_lock.h>
#include <linux/firmware.h>
#include <linux/platform_device.h>
+#include <linux/uaccess.h> /* For put_user and get_user */
#include <asm/atarihw.h>
#include <asm/traps.h>
-#include <asm/uaccess.h> /* For put_user and get_user */
#include <asm/dsp56k.h>
@@ -303,8 +303,8 @@
}
}
-static int dsp56k_ioctl(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg)
+static long dsp56k_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
{
int dev = iminor(inode) & 0x0f;
void __user *argp = (void __user *)arg;
@@ -331,8 +331,9 @@
if (len > DSP56K_MAX_BINARY_LENGTH) {
return -EINVAL;
}
-
+ lock_kernel();
r = dsp56k_upload(bin, len);
+ unlock_kernel();
if (r < 0) {
return r;
}
@@ -342,12 +343,16 @@
case DSP56K_SET_TX_WSIZE:
if (arg > 4 || arg < 1)
return -EINVAL;
+ lock_kernel();
dsp56k.tx_wsize = (int) arg;
+ unlock_kernel();
break;
case DSP56K_SET_RX_WSIZE:
if (arg > 4 || arg < 1)
return -EINVAL;
+ lock_kernel();
dsp56k.rx_wsize = (int) arg;
+ unlock_kernel();
break;
case DSP56K_HOST_FLAGS:
{
@@ -359,6 +364,7 @@
if(get_user(out, &hf->out) < 0)
return -EFAULT;
+ lock_kernel();
if ((dir & 0x1) && (out & 0x1))
dsp56k_host_interface.icr |= DSP56K_ICR_HF0;
else if (dir & 0x1)
@@ -373,14 +379,16 @@
if (dsp56k_host_interface.icr & DSP56K_ICR_HF1) status |= 0x2;
if (dsp56k_host_interface.isr & DSP56K_ISR_HF2) status |= 0x4;
if (dsp56k_host_interface.isr & DSP56K_ISR_HF3) status |= 0x8;
-
+ unlock_kernel();
return put_user(status, &hf->status);
}
case DSP56K_HOST_CMD:
if (arg > 31 || arg < 0)
return -EINVAL;
+ lock_kernel();
dsp56k_host_interface.cvr = (u_char)((arg & DSP56K_CVR_HV_MASK) |
DSP56K_CVR_HC);
+ unlock_kernel();
break;
default:
return -EINVAL;
@@ -472,7 +480,7 @@
.owner = THIS_MODULE,
.read = dsp56k_read,
.write = dsp56k_write,
- .ioctl = dsp56k_ioctl,
+ .unlocked_ioctl = dsp56k_ioctl,
.open = dsp56k_open,
.release = dsp56k_release,
};
diff --git a/drivers/char/efirtc.c b/drivers/char/efirtc.c
index d57ca3e..67fbd7a 100644
--- a/drivers/char/efirtc.c
+++ b/drivers/char/efirtc.c
@@ -37,8 +37,9 @@
#include <linux/rtc.h>
#include <linux/proc_fs.h>
#include <linux/efi.h>
+#include <linux/smp_lock.h>
+#include <linux/uaccess.h>
-#include <asm/uaccess.h>
#include <asm/system.h>
#define EFI_RTC_VERSION "0.4"
@@ -51,8 +52,8 @@
static DEFINE_SPINLOCK(efi_rtc_lock);
-static int efi_rtc_ioctl(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg);
+static long efi_rtc_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg);
#define is_leap(year) \
((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0))
@@ -146,9 +147,8 @@
}
}
-static int
-efi_rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
- unsigned long arg)
+static long efi_rtc_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
{
efi_status_t status;
@@ -175,13 +175,13 @@
return -EINVAL;
case RTC_RD_TIME:
-
+ lock_kernel();
spin_lock_irqsave(&efi_rtc_lock, flags);
status = efi.get_time(&eft, &cap);
spin_unlock_irqrestore(&efi_rtc_lock,flags);
-
+ unlock_kernel();
if (status != EFI_SUCCESS) {
/* should never happen */
printk(KERN_ERR "efitime: can't read time\n");
@@ -203,11 +203,13 @@
convert_to_efi_time(&wtime, &eft);
+ lock_kernel();
spin_lock_irqsave(&efi_rtc_lock, flags);
status = efi.set_time(&eft);
spin_unlock_irqrestore(&efi_rtc_lock,flags);
+ unlock_kernel();
return status == EFI_SUCCESS ? 0 : -EINVAL;
@@ -223,6 +225,7 @@
convert_to_efi_time(&wtime, &eft);
+ lock_kernel();
spin_lock_irqsave(&efi_rtc_lock, flags);
/*
* XXX Fixme:
@@ -233,16 +236,19 @@
status = efi.set_wakeup_time((efi_bool_t)enabled, &eft);
spin_unlock_irqrestore(&efi_rtc_lock,flags);
+ unlock_kernel();
return status == EFI_SUCCESS ? 0 : -EINVAL;
case RTC_WKALM_RD:
+ lock_kernel();
spin_lock_irqsave(&efi_rtc_lock, flags);
status = efi.get_wakeup_time((efi_bool_t *)&enabled, (efi_bool_t *)&pending, &eft);
spin_unlock_irqrestore(&efi_rtc_lock,flags);
+ unlock_kernel();
if (status != EFI_SUCCESS) return -EINVAL;
@@ -256,7 +262,7 @@
return copy_to_user(&ewp->time, &wtime,
sizeof(struct rtc_time)) ? -EFAULT : 0;
}
- return -EINVAL;
+ return -ENOTTY;
}
/*
@@ -265,8 +271,7 @@
* up things on a close.
*/
-static int
-efi_rtc_open(struct inode *inode, struct file *file)
+static int efi_rtc_open(struct inode *inode, struct file *file)
{
/*
* nothing special to do here
@@ -277,8 +282,7 @@
return 0;
}
-static int
-efi_rtc_close(struct inode *inode, struct file *file)
+static int efi_rtc_close(struct inode *inode, struct file *file)
{
return 0;
}
@@ -289,13 +293,12 @@
static const struct file_operations efi_rtc_fops = {
.owner = THIS_MODULE,
- .ioctl = efi_rtc_ioctl,
+ .unlocked_ioctl = efi_rtc_ioctl,
.open = efi_rtc_open,
.release = efi_rtc_close,
};
-static struct miscdevice efi_rtc_dev=
-{
+static struct miscdevice efi_rtc_dev= {
EFI_RTC_MINOR,
"efirtc",
&efi_rtc_fops
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index fb0a85a..b3f5dbc 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -623,6 +623,7 @@
return -ENXIO;
}
+#if 0
int hpet_unregister(struct hpet_task *tp)
{
struct hpet_dev *devp;
@@ -652,6 +653,7 @@
return 0;
}
+#endif /* 0 */
static ctl_table hpet_table[] = {
{
diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c
index 2f9759d..02aac10 100644
--- a/drivers/char/hvc_console.c
+++ b/drivers/char/hvc_console.c
@@ -27,7 +27,6 @@
#include <linux/init.h>
#include <linux/kbd_kern.h>
#include <linux/kernel.h>
-#include <linux/kref.h>
#include <linux/kthread.h>
#include <linux/list.h>
#include <linux/module.h>
@@ -75,23 +74,6 @@
static int sysrq_pressed;
#endif
-struct hvc_struct {
- spinlock_t lock;
- int index;
- struct tty_struct *tty;
- unsigned int count;
- int do_wakeup;
- char *outbuf;
- int outbuf_size;
- int n_outbuf;
- uint32_t vtermno;
- struct hv_ops *ops;
- int irq_requested;
- int irq;
- struct list_head next;
- struct kref kref; /* ref count & hvc_struct lifetime */
-};
-
/* dynamic list of hvc_struct instances */
static LIST_HEAD(hvc_structs);
@@ -298,27 +280,15 @@
return 0;
}
+EXPORT_SYMBOL_GPL(hvc_instantiate);
/* Wake the sleeping khvcd */
-static void hvc_kick(void)
+void hvc_kick(void)
{
hvc_kicked = 1;
wake_up_process(hvc_task);
}
-
-static int hvc_poll(struct hvc_struct *hp);
-
-/*
- * NOTE: This API isn't used if the console adapter doesn't support interrupts.
- * In this case the console is poll driven.
- */
-static irqreturn_t hvc_handle_interrupt(int irq, void *dev_instance)
-{
- /* if hvc_poll request a repoll, then kick the hvcd thread */
- if (hvc_poll(dev_instance))
- hvc_kick();
- return IRQ_HANDLED;
-}
+EXPORT_SYMBOL_GPL(hvc_kick);
static void hvc_unthrottle(struct tty_struct *tty)
{
@@ -333,7 +303,6 @@
{
struct hvc_struct *hp;
unsigned long flags;
- int irq = 0;
int rc = 0;
/* Auto increments kref reference if found. */
@@ -352,18 +321,15 @@
tty->low_latency = 1; /* Makes flushes to ldisc synchronous. */
hp->tty = tty;
- /* Save for request_irq outside of spin_lock. */
- irq = hp->irq;
- if (irq)
- hp->irq_requested = 1;
+
+ if (hp->ops->notifier_add)
+ rc = hp->ops->notifier_add(hp, hp->data);
spin_unlock_irqrestore(&hp->lock, flags);
- /* check error, fallback to non-irq */
- if (irq)
- rc = request_irq(irq, hvc_handle_interrupt, IRQF_DISABLED, "hvc_console", hp);
+
/*
- * If the request_irq() fails and we return an error. The tty layer
+ * If the notifier fails we return an error. The tty layer
* will call hvc_close() after a failed open but we don't want to clean
* up there so we'll clean up here and clear out the previously set
* tty fields and return the kref reference.
@@ -371,7 +337,6 @@
if (rc) {
spin_lock_irqsave(&hp->lock, flags);
hp->tty = NULL;
- hp->irq_requested = 0;
spin_unlock_irqrestore(&hp->lock, flags);
tty->driver_data = NULL;
kref_put(&hp->kref, destroy_hvc_struct);
@@ -386,7 +351,6 @@
static void hvc_close(struct tty_struct *tty, struct file * filp)
{
struct hvc_struct *hp;
- int irq = 0;
unsigned long flags;
if (tty_hung_up_p(filp))
@@ -404,9 +368,8 @@
spin_lock_irqsave(&hp->lock, flags);
if (--hp->count == 0) {
- if (hp->irq_requested)
- irq = hp->irq;
- hp->irq_requested = 0;
+ if (hp->ops->notifier_del)
+ hp->ops->notifier_del(hp, hp->data);
/* We are done with the tty pointer now. */
hp->tty = NULL;
@@ -418,10 +381,6 @@
* waking periodically to check chars_in_buffer().
*/
tty_wait_until_sent(tty, HVC_CLOSE_WAIT);
-
- if (irq)
- free_irq(irq, hp);
-
} else {
if (hp->count < 0)
printk(KERN_ERR "hvc_close %X: oops, count is %d\n",
@@ -436,7 +395,6 @@
{
struct hvc_struct *hp = tty->driver_data;
unsigned long flags;
- int irq = 0;
int temp_open_count;
if (!hp)
@@ -458,13 +416,12 @@
hp->count = 0;
hp->n_outbuf = 0;
hp->tty = NULL;
- if (hp->irq_requested)
- /* Saved for use outside of spin_lock. */
- irq = hp->irq;
- hp->irq_requested = 0;
+
+ if (hp->ops->notifier_del)
+ hp->ops->notifier_del(hp, hp->data);
+
spin_unlock_irqrestore(&hp->lock, flags);
- if (irq)
- free_irq(irq, hp);
+
while(temp_open_count) {
--temp_open_count;
kref_put(&hp->kref, destroy_hvc_struct);
@@ -575,7 +532,7 @@
#define HVC_POLL_READ 0x00000001
#define HVC_POLL_WRITE 0x00000002
-static int hvc_poll(struct hvc_struct *hp)
+int hvc_poll(struct hvc_struct *hp)
{
struct tty_struct *tty;
int i, n, poll_mask = 0;
@@ -602,10 +559,10 @@
if (test_bit(TTY_THROTTLED, &tty->flags))
goto throttled;
- /* If we aren't interrupt driven and aren't throttled, we always
+ /* If we aren't notifier driven and aren't throttled, we always
* request a reschedule
*/
- if (hp->irq == 0)
+ if (!hp->irq_requested)
poll_mask |= HVC_POLL_READ;
/* Read data if any */
@@ -674,6 +631,7 @@
return poll_mask;
}
+EXPORT_SYMBOL_GPL(hvc_poll);
/*
* This kthread is either polling or interrupt driven. This is determined by
@@ -733,7 +691,7 @@
.chars_in_buffer = hvc_chars_in_buffer,
};
-struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int irq,
+struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int data,
struct hv_ops *ops, int outbuf_size)
{
struct hvc_struct *hp;
@@ -754,7 +712,7 @@
memset(hp, 0x00, sizeof(*hp));
hp->vtermno = vtermno;
- hp->irq = irq;
+ hp->data = data;
hp->ops = ops;
hp->outbuf_size = outbuf_size;
hp->outbuf = &((char *)hp)[ALIGN(sizeof(*hp), sizeof(long))];
@@ -784,6 +742,7 @@
return hp;
}
+EXPORT_SYMBOL_GPL(hvc_alloc);
int __devexit hvc_remove(struct hvc_struct *hp)
{
diff --git a/drivers/char/hvc_console.h b/drivers/char/hvc_console.h
index 42ffb17..d9ce109 100644
--- a/drivers/char/hvc_console.h
+++ b/drivers/char/hvc_console.h
@@ -26,6 +26,7 @@
#ifndef HVC_CONSOLE_H
#define HVC_CONSOLE_H
+#include <linux/kref.h>
/*
* This is the max number of console adapters that can/will be found as
@@ -42,24 +43,50 @@
*/
#define HVC_ALLOC_TTY_ADAPTERS 8
+struct hvc_struct {
+ spinlock_t lock;
+ int index;
+ struct tty_struct *tty;
+ unsigned int count;
+ int do_wakeup;
+ char *outbuf;
+ int outbuf_size;
+ int n_outbuf;
+ uint32_t vtermno;
+ struct hv_ops *ops;
+ int irq_requested;
+ int data;
+ struct list_head next;
+ struct kref kref; /* ref count & hvc_struct lifetime */
+};
/* implemented by a low level driver */
struct hv_ops {
int (*get_chars)(uint32_t vtermno, char *buf, int count);
int (*put_chars)(uint32_t vtermno, const char *buf, int count);
-};
-struct hvc_struct;
+ /* Callbacks for notification. Called in open and close */
+ int (*notifier_add)(struct hvc_struct *hp, int irq);
+ void (*notifier_del)(struct hvc_struct *hp, int irq);
+};
/* Register a vterm and a slot index for use as a console (console_init) */
extern int hvc_instantiate(uint32_t vtermno, int index, struct hv_ops *ops);
/* register a vterm for hvc tty operation (module_init or hotplug add) */
-extern struct hvc_struct * __devinit hvc_alloc(uint32_t vtermno, int irq,
+extern struct hvc_struct * __devinit hvc_alloc(uint32_t vtermno, int data,
struct hv_ops *ops, int outbuf_size);
-/* remove a vterm from hvc tty operation (modele_exit or hotplug remove) */
+/* remove a vterm from hvc tty operation (module_exit or hotplug remove) */
extern int __devexit hvc_remove(struct hvc_struct *hp);
+/* data available */
+int hvc_poll(struct hvc_struct *hp);
+void hvc_kick(void);
+
+/* default notifier for irq based notification */
+extern int notifier_add_irq(struct hvc_struct *hp, int data);
+extern void notifier_del_irq(struct hvc_struct *hp, int data);
+
#if defined(CONFIG_XMON) && defined(CONFIG_SMP)
#include <asm/xmon.h>
diff --git a/drivers/char/hvc_irq.c b/drivers/char/hvc_irq.c
new file mode 100644
index 0000000..73a59cd
--- /dev/null
+++ b/drivers/char/hvc_irq.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright IBM Corp. 2001,2008
+ *
+ * This file contains the IRQ specific code for hvc_console
+ *
+ */
+
+#include <linux/interrupt.h>
+
+#include "hvc_console.h"
+
+static irqreturn_t hvc_handle_interrupt(int irq, void *dev_instance)
+{
+ /* if hvc_poll request a repoll, then kick the hvcd thread */
+ if (hvc_poll(dev_instance))
+ hvc_kick();
+ return IRQ_HANDLED;
+}
+
+/*
+ * For IRQ based systems these callbacks can be used
+ */
+int notifier_add_irq(struct hvc_struct *hp, int irq)
+{
+ int rc;
+
+ if (!irq) {
+ hp->irq_requested = 0;
+ return 0;
+ }
+ rc = request_irq(irq, hvc_handle_interrupt, IRQF_DISABLED,
+ "hvc_console", hp);
+ if (!rc)
+ hp->irq_requested = 1;
+ return rc;
+}
+
+void notifier_del_irq(struct hvc_struct *hp, int irq)
+{
+ if (!irq)
+ return;
+ free_irq(irq, hp);
+ hp->irq_requested = 0;
+}
diff --git a/drivers/char/hvc_iseries.c b/drivers/char/hvc_iseries.c
index a08f8f9..b71c610 100644
--- a/drivers/char/hvc_iseries.c
+++ b/drivers/char/hvc_iseries.c
@@ -200,6 +200,8 @@
static struct hv_ops hvc_get_put_ops = {
.get_chars = get_chars,
.put_chars = put_chars,
+ .notifier_add = notifier_add_irq,
+ .notifier_del = notifier_del_irq,
};
static int __devinit hvc_vio_probe(struct vio_dev *vdev,
diff --git a/drivers/char/hvc_vio.c b/drivers/char/hvc_vio.c
index 79711aa..93f3840 100644
--- a/drivers/char/hvc_vio.c
+++ b/drivers/char/hvc_vio.c
@@ -80,6 +80,8 @@
static struct hv_ops hvc_get_put_ops = {
.get_chars = filtered_get_chars,
.put_chars = hvc_put_chars,
+ .notifier_add = notifier_add_irq,
+ .notifier_del = notifier_del_irq,
};
static int __devinit hvc_vio_probe(struct vio_dev *vdev,
diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c
index db2ae42..6b70aa6 100644
--- a/drivers/char/hvc_xen.c
+++ b/drivers/char/hvc_xen.c
@@ -100,6 +100,8 @@
static struct hv_ops hvc_ops = {
.get_chars = read_console,
.put_chars = write_console,
+ .notifier_add = notifier_add_irq,
+ .notifier_del = notifier_del_irq,
};
static int __init xen_init(void)
diff --git a/drivers/char/ip2/ip2main.c b/drivers/char/ip2/ip2main.c
index 9cb48fc..689f9dc 100644
--- a/drivers/char/ip2/ip2main.c
+++ b/drivers/char/ip2/ip2main.c
@@ -203,7 +203,7 @@
static ssize_t ip2_ipl_read(struct file *, char __user *, size_t, loff_t *);
static ssize_t ip2_ipl_write(struct file *, const char __user *, size_t, loff_t *);
-static int ip2_ipl_ioctl(struct inode *, struct file *, UINT, ULONG);
+static long ip2_ipl_ioctl(struct file *, UINT, ULONG);
static int ip2_ipl_open(struct inode *, struct file *);
static int DumpTraceBuffer(char __user *, int);
@@ -236,7 +236,7 @@
.owner = THIS_MODULE,
.read = ip2_ipl_read,
.write = ip2_ipl_write,
- .ioctl = ip2_ipl_ioctl,
+ .unlocked_ioctl = ip2_ipl_ioctl,
.open = ip2_ipl_open,
};
@@ -2845,10 +2845,10 @@
/* */
/* */
/******************************************************************************/
-static int
-ip2_ipl_ioctl ( struct inode *pInode, struct file *pFile, UINT cmd, ULONG arg )
+static long
+ip2_ipl_ioctl (struct file *pFile, UINT cmd, ULONG arg )
{
- unsigned int iplminor = iminor(pInode);
+ unsigned int iplminor = iminor(pFile->f_path.dentry->d_inode);
int rc = 0;
void __user *argp = (void __user *)arg;
ULONG __user *pIndex = argp;
@@ -2859,6 +2859,8 @@
printk (KERN_DEBUG "IP2IPL: ioctl cmd %d, arg %ld\n", cmd, arg );
#endif
+ lock_kernel();
+
switch ( iplminor ) {
case 0: // IPL device
rc = -EINVAL;
@@ -2919,6 +2921,7 @@
rc = -ENODEV;
break;
}
+ unlock_kernel();
return rc;
}
diff --git a/drivers/char/mwave/mwavedd.c b/drivers/char/mwave/mwavedd.c
index 50243fc..4f8d67f 100644
--- a/drivers/char/mwave/mwavedd.c
+++ b/drivers/char/mwave/mwavedd.c
@@ -86,8 +86,8 @@
static int mwave_open(struct inode *inode, struct file *file);
static int mwave_close(struct inode *inode, struct file *file);
-static int mwave_ioctl(struct inode *inode, struct file *filp,
- unsigned int iocmd, unsigned long ioarg);
+static long mwave_ioctl(struct file *filp, unsigned int iocmd,
+ unsigned long ioarg);
MWAVE_DEVICE_DATA mwave_s_mdd;
@@ -119,16 +119,16 @@
return retval;
}
-static int mwave_ioctl(struct inode *inode, struct file *file,
- unsigned int iocmd, unsigned long ioarg)
+static long mwave_ioctl(struct file *file, unsigned int iocmd,
+ unsigned long ioarg)
{
unsigned int retval = 0;
pMWAVE_DEVICE_DATA pDrvData = &mwave_s_mdd;
void __user *arg = (void __user *)ioarg;
- PRINTK_5(TRACE_MWAVE,
- "mwavedd::mwave_ioctl, entry inode %p file %p cmd %x arg %x\n",
- inode, file, iocmd, (int) ioarg);
+ PRINTK_4(TRACE_MWAVE,
+ "mwavedd::mwave_ioctl, entry file %p cmd %x arg %x\n",
+ file, iocmd, (int) ioarg);
switch (iocmd) {
@@ -136,7 +136,9 @@
PRINTK_1(TRACE_MWAVE,
"mwavedd::mwave_ioctl, IOCTL_MW_RESET"
" calling tp3780I_ResetDSP\n");
+ lock_kernel();
retval = tp3780I_ResetDSP(&pDrvData->rBDData);
+ unlock_kernel();
PRINTK_2(TRACE_MWAVE,
"mwavedd::mwave_ioctl, IOCTL_MW_RESET"
" retval %x from tp3780I_ResetDSP\n",
@@ -147,7 +149,9 @@
PRINTK_1(TRACE_MWAVE,
"mwavedd::mwave_ioctl, IOCTL_MW_RUN"
" calling tp3780I_StartDSP\n");
+ lock_kernel();
retval = tp3780I_StartDSP(&pDrvData->rBDData);
+ unlock_kernel();
PRINTK_2(TRACE_MWAVE,
"mwavedd::mwave_ioctl, IOCTL_MW_RUN"
" retval %x from tp3780I_StartDSP\n",
@@ -161,8 +165,10 @@
"mwavedd::mwave_ioctl,"
" IOCTL_MW_DSP_ABILITIES calling"
" tp3780I_QueryAbilities\n");
+ lock_kernel();
retval = tp3780I_QueryAbilities(&pDrvData->rBDData,
&rAbilities);
+ unlock_kernel();
PRINTK_2(TRACE_MWAVE,
"mwavedd::mwave_ioctl, IOCTL_MW_DSP_ABILITIES"
" retval %x from tp3780I_QueryAbilities\n",
@@ -193,11 +199,13 @@
"mwavedd::mwave_ioctl IOCTL_MW_READ_DATA,"
" size %lx, ioarg %lx pusBuffer %p\n",
rReadData.ulDataLength, ioarg, pusBuffer);
+ lock_kernel();
retval = tp3780I_ReadWriteDspDStore(&pDrvData->rBDData,
iocmd,
pusBuffer,
rReadData.ulDataLength,
rReadData.usDspAddress);
+ unlock_kernel();
}
break;
@@ -215,10 +223,12 @@
" size %lx, ioarg %lx pusBuffer %p\n",
rReadData.ulDataLength / 2, ioarg,
pusBuffer);
+ lock_kernel();
retval = tp3780I_ReadWriteDspDStore(&pDrvData->rBDData,
iocmd, pusBuffer,
rReadData.ulDataLength / 2,
rReadData.usDspAddress);
+ unlock_kernel();
}
break;
@@ -236,10 +246,12 @@
" size %lx, ioarg %lx pusBuffer %p\n",
rWriteData.ulDataLength, ioarg,
pusBuffer);
+ lock_kernel();
retval = tp3780I_ReadWriteDspDStore(&pDrvData->rBDData,
iocmd, pusBuffer,
rWriteData.ulDataLength,
rWriteData.usDspAddress);
+ unlock_kernel();
}
break;
@@ -257,10 +269,12 @@
" size %lx, ioarg %lx pusBuffer %p\n",
rWriteData.ulDataLength, ioarg,
pusBuffer);
+ lock_kernel();
retval = tp3780I_ReadWriteDspIStore(&pDrvData->rBDData,
iocmd, pusBuffer,
rWriteData.ulDataLength,
rWriteData.usDspAddress);
+ unlock_kernel();
}
break;
@@ -281,8 +295,10 @@
ipcnum);
return -EINVAL;
}
+ lock_kernel();
pDrvData->IPCs[ipcnum].bIsHere = FALSE;
pDrvData->IPCs[ipcnum].bIsEnabled = TRUE;
+ unlock_kernel();
PRINTK_2(TRACE_MWAVE,
"mwavedd::mwave_ioctl IOCTL_MW_REGISTER_IPC"
@@ -307,6 +323,7 @@
return -EINVAL;
}
+ lock_kernel();
if (pDrvData->IPCs[ipcnum].bIsEnabled == TRUE) {
DECLARE_WAITQUEUE(wait, current);
@@ -347,6 +364,7 @@
" processing\n",
ipcnum);
}
+ unlock_kernel();
}
break;
@@ -365,19 +383,18 @@
ipcnum);
return -EINVAL;
}
+ lock_kernel();
if (pDrvData->IPCs[ipcnum].bIsEnabled == TRUE) {
pDrvData->IPCs[ipcnum].bIsEnabled = FALSE;
if (pDrvData->IPCs[ipcnum].bIsHere == TRUE) {
wake_up_interruptible(&pDrvData->IPCs[ipcnum].ipc_wait_queue);
}
}
+ unlock_kernel();
}
break;
default:
- PRINTK_ERROR(KERN_ERR_MWAVE "mwavedd::mwave_ioctl:"
- " Error: Unrecognized iocmd %x\n",
- iocmd);
return -ENOTTY;
break;
} /* switch */
@@ -460,7 +477,7 @@
.owner = THIS_MODULE,
.read = mwave_read,
.write = mwave_write,
- .ioctl = mwave_ioctl,
+ .unlocked_ioctl = mwave_ioctl,
.open = mwave_open,
.release = mwave_close
};
diff --git a/drivers/char/mwave/mwavedd.h b/drivers/char/mwave/mwavedd.h
index 8eca61e..7e0d530 100644
--- a/drivers/char/mwave/mwavedd.h
+++ b/drivers/char/mwave/mwavedd.h
@@ -147,4 +147,6 @@
} MWAVE_DEVICE_DATA, *pMWAVE_DEVICE_DATA;
+extern MWAVE_DEVICE_DATA mwave_s_mdd;
+
#endif
diff --git a/drivers/char/mwave/tp3780i.c b/drivers/char/mwave/tp3780i.c
index f282976..c689697 100644
--- a/drivers/char/mwave/tp3780i.c
+++ b/drivers/char/mwave/tp3780i.c
@@ -57,8 +57,6 @@
#include "3780i.h"
#include "mwavepub.h"
-extern MWAVE_DEVICE_DATA mwave_s_mdd;
-
static unsigned short s_ausThinkpadIrqToField[16] =
{ 0xFFFF, 0xFFFF, 0xFFFF, 0x0001, 0x0002, 0x0003, 0xFFFF, 0x0004,
0xFFFF, 0xFFFF, 0x0005, 0x0006, 0xFFFF, 0xFFFF, 0xFFFF, 0x0007 };
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index 4c756bb..e30575e 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -16,7 +16,6 @@
* Fed through a cleanup, indent and remove of non 2.6 code by Alan Cox
* <alan@redhat.com>. The original 1.8 code is available on www.moxa.com.
* - Fixed x86_64 cleanness
- * - Fixed sleep with spinlock held in mxser_send_break
*/
#include <linux/module.h>
@@ -49,18 +48,12 @@
#define MXSER_VERSION "2.0.4" /* 1.12 */
#define MXSERMAJOR 174
-#define MXSERCUMAJOR 175
#define MXSER_BOARDS 4 /* Max. boards */
#define MXSER_PORTS_PER_BOARD 8 /* Max. ports per board */
#define MXSER_PORTS (MXSER_BOARDS * MXSER_PORTS_PER_BOARD)
#define MXSER_ISR_PASS_LIMIT 100
-#define MXSER_ERR_IOADDR -1
-#define MXSER_ERR_IRQ -2
-#define MXSER_ERR_IRQ_CONFLIT -3
-#define MXSER_ERR_VECTOR -4
-
/*CheckIsMoxaMust return value*/
#define MOXA_OTHER_UART 0x00
#define MOXA_MUST_MU150_HWID 0x01
@@ -179,14 +172,15 @@
};
MODULE_DEVICE_TABLE(pci, mxser_pcibrds);
-static int ioaddr[MXSER_BOARDS] = { 0, 0, 0, 0 };
+static unsigned long ioaddr[MXSER_BOARDS];
static int ttymajor = MXSERMAJOR;
/* Variables for insmod */
MODULE_AUTHOR("Casper Yang");
MODULE_DESCRIPTION("MOXA Smartio/Industio Family Multiport Board Device Driver");
-module_param_array(ioaddr, int, NULL, 0);
+module_param_array(ioaddr, ulong, NULL, 0);
+MODULE_PARM_DESC(ioaddr, "ISA io addresses to look for a moxa board");
module_param(ttymajor, int, 0);
MODULE_LICENSE("GPL");
@@ -196,7 +190,6 @@
unsigned long txcnt[MXSER_PORTS];
};
-
struct mxser_mon {
unsigned long rxcnt;
unsigned long txcnt;
@@ -287,19 +280,9 @@
int dcd;
};
-static struct mxser_mstatus GMStatus[MXSER_PORTS];
-
-static int mxserBoardCAP[MXSER_BOARDS] = {
- 0, 0, 0, 0
- /* 0x180, 0x280, 0x200, 0x320 */
-};
-
static struct mxser_board mxser_boards[MXSER_BOARDS];
static struct tty_driver *mxvar_sdriver;
static struct mxser_log mxvar_log;
-static int mxvar_diagflag;
-static unsigned char mxser_msr[MXSER_PORTS + 1];
-static struct mxser_mon_ext mon_data_ext;
static int mxser_set_baud_method[MXSER_PORTS + 1];
static void mxser_enable_must_enchance_mode(unsigned long baseio)
@@ -543,6 +526,7 @@
static unsigned char mxser_get_msr(int baseaddr, int mode, int port)
{
+ static unsigned char mxser_msr[MXSER_PORTS + 1];
unsigned char status = 0;
status = inb(baseaddr + UART_MSR);
@@ -1319,13 +1303,9 @@
struct mxser_port *info = tty->driver_data;
unsigned long flags;
- if (info->xmit_cnt <= 0 ||
- tty->stopped ||
- !info->port.xmit_buf ||
- (tty->hw_stopped &&
- (info->type != PORT_16550A) &&
- (!info->board->chip_flag)
- ))
+ if (info->xmit_cnt <= 0 || tty->stopped || !info->port.xmit_buf ||
+ (tty->hw_stopped && info->type != PORT_16550A &&
+ !info->board->chip_flag))
return;
spin_lock_irqsave(&info->slock, flags);
@@ -1343,9 +1323,7 @@
int ret;
ret = SERIAL_XMIT_SIZE - info->xmit_cnt - 1;
- if (ret < 0)
- ret = 0;
- return ret;
+ return ret < 0 ? 0 : ret;
}
static int mxser_chars_in_buffer(struct tty_struct *tty)
@@ -1634,6 +1612,8 @@
switch (cmd) {
case MOXA_GET_MAJOR:
+ printk(KERN_WARNING "mxser: '%s' uses deprecated ioctl %x, fix "
+ "your userspace\n", current->comm, cmd);
return put_user(ttymajor, (int __user *)argp);
case MOXA_CHKPORTENABLE:
@@ -1651,62 +1631,60 @@
ret = -EFAULT;
unlock_kernel();
return ret;
- case MOXA_GETMSTATUS:
+ case MOXA_GETMSTATUS: {
+ struct mxser_mstatus ms, __user *msu = argp;
lock_kernel();
for (i = 0; i < MXSER_BOARDS; i++)
for (j = 0; j < MXSER_PORTS_PER_BOARD; j++) {
port = &mxser_boards[i].ports[j];
+ memset(&ms, 0, sizeof(ms));
- GMStatus[i].ri = 0;
- if (!port->ioaddr) {
- GMStatus[i].dcd = 0;
- GMStatus[i].dsr = 0;
- GMStatus[i].cts = 0;
- continue;
- }
+ if (!port->ioaddr)
+ goto copy;
if (!port->port.tty || !port->port.tty->termios)
- GMStatus[i].cflag =
- port->normal_termios.c_cflag;
+ ms.cflag = port->normal_termios.c_cflag;
else
- GMStatus[i].cflag =
- port->port.tty->termios->c_cflag;
+ ms.cflag = port->port.tty->termios->c_cflag;
status = inb(port->ioaddr + UART_MSR);
- if (status & 0x80 /*UART_MSR_DCD */ )
- GMStatus[i].dcd = 1;
- else
- GMStatus[i].dcd = 0;
-
- if (status & 0x20 /*UART_MSR_DSR */ )
- GMStatus[i].dsr = 1;
- else
- GMStatus[i].dsr = 0;
-
-
- if (status & 0x10 /*UART_MSR_CTS */ )
- GMStatus[i].cts = 1;
- else
- GMStatus[i].cts = 0;
+ if (status & UART_MSR_DCD)
+ ms.dcd = 1;
+ if (status & UART_MSR_DSR)
+ ms.dsr = 1;
+ if (status & UART_MSR_CTS)
+ ms.cts = 1;
+ copy:
+ if (copy_to_user(msu, &ms, sizeof(ms))) {
+ unlock_kernel();
+ return -EFAULT;
+ }
+ msu++;
}
unlock_kernel();
- if (copy_to_user(argp, GMStatus,
- sizeof(struct mxser_mstatus) * MXSER_PORTS))
- return -EFAULT;
return 0;
+ }
case MOXA_ASPP_MON_EXT: {
- int p, shiftbit;
- unsigned long opmode;
- unsigned cflag, iflag;
+ struct mxser_mon_ext *me; /* it's 2k, stack unfriendly */
+ unsigned int cflag, iflag, p;
+ u8 opmode;
+
+ me = kzalloc(sizeof(*me), GFP_KERNEL);
+ if (!me)
+ return -ENOMEM;
lock_kernel();
- for (i = 0; i < MXSER_BOARDS; i++) {
- for (j = 0; j < MXSER_PORTS_PER_BOARD; j++) {
+ for (i = 0, p = 0; i < MXSER_BOARDS; i++) {
+ for (j = 0; j < MXSER_PORTS_PER_BOARD; j++, p++) {
+ if (p >= ARRAY_SIZE(me->rx_cnt)) {
+ i = MXSER_BOARDS;
+ break;
+ }
port = &mxser_boards[i].ports[j];
if (!port->ioaddr)
continue;
- status = mxser_get_msr(port->ioaddr, 0, i);
+ status = mxser_get_msr(port->ioaddr, 0, p);
if (status & UART_MSR_TERI)
port->icount.rng++;
@@ -1718,16 +1696,13 @@
port->icount.cts++;
port->mon_data.modem_status = status;
- mon_data_ext.rx_cnt[i] = port->mon_data.rxcnt;
- mon_data_ext.tx_cnt[i] = port->mon_data.txcnt;
- mon_data_ext.up_rxcnt[i] =
- port->mon_data.up_rxcnt;
- mon_data_ext.up_txcnt[i] =
- port->mon_data.up_txcnt;
- mon_data_ext.modem_status[i] =
+ me->rx_cnt[p] = port->mon_data.rxcnt;
+ me->tx_cnt[p] = port->mon_data.txcnt;
+ me->up_rxcnt[p] = port->mon_data.up_rxcnt;
+ me->up_txcnt[p] = port->mon_data.up_txcnt;
+ me->modem_status[p] =
port->mon_data.modem_status;
- mon_data_ext.baudrate[i] =
- tty_get_baud_rate(port->port.tty);
+ me->baudrate[p] = tty_get_baud_rate(port->port.tty);
if (!port->port.tty || !port->port.tty->termios) {
cflag = port->normal_termios.c_cflag;
@@ -1737,40 +1712,31 @@
iflag = port->port.tty->termios->c_iflag;
}
- mon_data_ext.databits[i] = cflag & CSIZE;
-
- mon_data_ext.stopbits[i] = cflag & CSTOPB;
-
- mon_data_ext.parity[i] =
- cflag & (PARENB | PARODD | CMSPAR);
-
- mon_data_ext.flowctrl[i] = 0x00;
+ me->databits[p] = cflag & CSIZE;
+ me->stopbits[p] = cflag & CSTOPB;
+ me->parity[p] = cflag & (PARENB | PARODD |
+ CMSPAR);
if (cflag & CRTSCTS)
- mon_data_ext.flowctrl[i] |= 0x03;
+ me->flowctrl[p] |= 0x03;
if (iflag & (IXON | IXOFF))
- mon_data_ext.flowctrl[i] |= 0x0C;
+ me->flowctrl[p] |= 0x0C;
if (port->type == PORT_16550A)
- mon_data_ext.fifo[i] = 1;
- else
- mon_data_ext.fifo[i] = 0;
+ me->fifo[p] = 1;
- p = i % 4;
- shiftbit = p * 2;
- opmode = inb(port->opmode_ioaddr) >> shiftbit;
+ opmode = inb(port->opmode_ioaddr) >>
+ ((p % 4) * 2);
opmode &= OP_MODE_MASK;
-
- mon_data_ext.iftype[i] = opmode;
-
+ me->iftype[p] = opmode;
}
}
unlock_kernel();
- if (copy_to_user(argp, &mon_data_ext,
- sizeof(mon_data_ext)))
- return -EFAULT;
- return 0;
+ if (copy_to_user(argp, me, sizeof(*me)))
+ ret = -EFAULT;
+ kfree(me);
+ return ret;
}
default:
return -ENOIOCTLCMD;
@@ -1804,7 +1770,6 @@
{
struct mxser_port *info = tty->driver_data;
struct async_icount cnow;
- struct serial_icounter_struct __user *p_cuser;
unsigned long flags;
void __user *argp = (void __user *)arg;
int retval;
@@ -1884,30 +1849,26 @@
* NB: both 1->0 and 0->1 transitions are counted except for
* RI where only 0->1 is counted.
*/
- case TIOCGICOUNT:
+ case TIOCGICOUNT: {
+ struct serial_icounter_struct icnt = { 0 };
spin_lock_irqsave(&info->slock, flags);
cnow = info->icount;
spin_unlock_irqrestore(&info->slock, flags);
- p_cuser = argp;
- if (put_user(cnow.frame, &p_cuser->frame))
- return -EFAULT;
- if (put_user(cnow.brk, &p_cuser->brk))
- return -EFAULT;
- if (put_user(cnow.overrun, &p_cuser->overrun))
- return -EFAULT;
- if (put_user(cnow.buf_overrun, &p_cuser->buf_overrun))
- return -EFAULT;
- if (put_user(cnow.parity, &p_cuser->parity))
- return -EFAULT;
- if (put_user(cnow.rx, &p_cuser->rx))
- return -EFAULT;
- if (put_user(cnow.tx, &p_cuser->tx))
- return -EFAULT;
- put_user(cnow.cts, &p_cuser->cts);
- put_user(cnow.dsr, &p_cuser->dsr);
- put_user(cnow.rng, &p_cuser->rng);
- put_user(cnow.dcd, &p_cuser->dcd);
- return 0;
+
+ icnt.frame = cnow.frame;
+ icnt.brk = cnow.brk;
+ icnt.overrun = cnow.overrun;
+ icnt.buf_overrun = cnow.buf_overrun;
+ icnt.parity = cnow.parity;
+ icnt.rx = cnow.rx;
+ icnt.tx = cnow.tx;
+ icnt.cts = cnow.cts;
+ icnt.dsr = cnow.dsr;
+ icnt.rng = cnow.rng;
+ icnt.dcd = cnow.dcd;
+
+ return copy_to_user(argp, &icnt, sizeof(icnt)) ? -EFAULT : 0;
+ }
case MOXA_HighSpeedOn:
return put_user(info->baud_base != 115200 ? 1 : 0, (int __user *)argp);
case MOXA_SDS_RSTICOUNTER:
@@ -2503,7 +2464,8 @@
unsigned int i;
int retval;
- printk(KERN_INFO "max. baud rate = %d bps.\n", brd->ports[0].max_baud);
+ printk(KERN_INFO "mxser: max. baud rate = %d bps\n",
+ brd->ports[0].max_baud);
for (i = 0; i < brd->info->nports; i++) {
info = &brd->ports[i];
@@ -2586,28 +2548,32 @@
irq = regs[9] & 0xF000;
irq = irq | (irq >> 4);
if (irq != (regs[9] & 0xFF00))
- return MXSER_ERR_IRQ_CONFLIT;
+ goto err_irqconflict;
} else if (brd->info->nports == 4) {
irq = regs[9] & 0xF000;
irq = irq | (irq >> 4);
irq = irq | (irq >> 8);
if (irq != regs[9])
- return MXSER_ERR_IRQ_CONFLIT;
+ goto err_irqconflict;
} else if (brd->info->nports == 8) {
irq = regs[9] & 0xF000;
irq = irq | (irq >> 4);
irq = irq | (irq >> 8);
if ((irq != regs[9]) || (irq != regs[10]))
- return MXSER_ERR_IRQ_CONFLIT;
+ goto err_irqconflict;
}
- if (!irq)
- return MXSER_ERR_IRQ;
+ if (!irq) {
+ printk(KERN_ERR "mxser: interrupt number unset\n");
+ return -EIO;
+ }
brd->irq = ((int)(irq & 0xF000) >> 12);
for (i = 0; i < 8; i++)
brd->ports[i].ioaddr = (int) regs[i + 1] & 0xFFF8;
- if ((regs[12] & 0x80) == 0)
- return MXSER_ERR_VECTOR;
+ if ((regs[12] & 0x80) == 0) {
+ printk(KERN_ERR "mxser: invalid interrupt vector\n");
+ return -EIO;
+ }
brd->vector = (int)regs[11]; /* interrupt vector */
if (id == 1)
brd->vector_mask = 0x00FF;
@@ -2634,13 +2600,26 @@
else
brd->uart_type = PORT_16450;
if (!request_region(brd->ports[0].ioaddr, 8 * brd->info->nports,
- "mxser(IO)"))
- return MXSER_ERR_IOADDR;
+ "mxser(IO)")) {
+ printk(KERN_ERR "mxser: can't request ports I/O region: "
+ "0x%.8lx-0x%.8lx\n",
+ brd->ports[0].ioaddr, brd->ports[0].ioaddr +
+ 8 * brd->info->nports - 1);
+ return -EIO;
+ }
if (!request_region(brd->vector, 1, "mxser(vector)")) {
release_region(brd->ports[0].ioaddr, 8 * brd->info->nports);
- return MXSER_ERR_VECTOR;
+ printk(KERN_ERR "mxser: can't request interrupt vector region: "
+ "0x%.8lx-0x%.8lx\n",
+ brd->ports[0].ioaddr, brd->ports[0].ioaddr +
+ 8 * brd->info->nports - 1);
+ return -EIO;
}
return brd->info->nports;
+
+err_irqconflict:
+ printk(KERN_ERR "mxser: invalid interrupt number\n");
+ return -EIO;
}
static int __devinit mxser_probe(struct pci_dev *pdev,
@@ -2657,20 +2636,20 @@
break;
if (i >= MXSER_BOARDS) {
- printk(KERN_ERR "Too many Smartio/Industio family boards found "
- "(maximum %d), board not configured\n", MXSER_BOARDS);
+ dev_err(&pdev->dev, "too many boards found (maximum %d), board "
+ "not configured\n", MXSER_BOARDS);
goto err;
}
brd = &mxser_boards[i];
brd->idx = i * MXSER_PORTS_PER_BOARD;
- printk(KERN_INFO "Found MOXA %s board (BusNo=%d, DevNo=%d)\n",
+ dev_info(&pdev->dev, "found MOXA %s board (BusNo=%d, DevNo=%d)\n",
mxser_cards[ent->driver_data].name,
pdev->bus->number, PCI_SLOT(pdev->devfn));
retval = pci_enable_device(pdev);
if (retval) {
- printk(KERN_ERR "Moxa SmartI/O PCI enable fail !\n");
+ dev_err(&pdev->dev, "PCI enable failed\n");
goto err;
}
@@ -2772,11 +2751,8 @@
static int __init mxser_module_init(void)
{
struct mxser_board *brd;
- unsigned long cap;
- unsigned int i, m, isaloop;
- int retval, b;
-
- pr_debug("Loading module mxser ...\n");
+ unsigned int b, i, m;
+ int retval;
mxvar_sdriver = alloc_tty_driver(MXSER_PORTS + 1);
if (!mxvar_sdriver)
@@ -2806,74 +2782,43 @@
goto err_put;
}
- mxvar_diagflag = 0;
-
- m = 0;
/* Start finding ISA boards here */
- for (isaloop = 0; isaloop < 2; isaloop++)
- for (b = 0; b < MXSER_BOARDS && m < MXSER_BOARDS; b++) {
- if (!isaloop)
- cap = mxserBoardCAP[b]; /* predefined */
- else
- cap = ioaddr[b]; /* module param */
+ for (m = 0, b = 0; b < MXSER_BOARDS; b++) {
+ if (!ioaddr[b])
+ continue;
- if (!cap)
- continue;
-
- brd = &mxser_boards[m];
- retval = mxser_get_ISA_conf(cap, brd);
-
- if (retval != 0)
- printk(KERN_INFO "Found MOXA %s board "
- "(CAP=0x%x)\n",
- brd->info->name, ioaddr[b]);
-
- if (retval <= 0) {
- if (retval == MXSER_ERR_IRQ)
- printk(KERN_ERR "Invalid interrupt "
- "number, board not "
- "configured\n");
- else if (retval == MXSER_ERR_IRQ_CONFLIT)
- printk(KERN_ERR "Invalid interrupt "
- "number, board not "
- "configured\n");
- else if (retval == MXSER_ERR_VECTOR)
- printk(KERN_ERR "Invalid interrupt "
- "vector, board not "
- "configured\n");
- else if (retval == MXSER_ERR_IOADDR)
- printk(KERN_ERR "Invalid I/O address, "
- "board not configured\n");
-
- brd->info = NULL;
- continue;
- }
-
- /* mxser_initbrd will hook ISR. */
- if (mxser_initbrd(brd, NULL) < 0) {
- brd->info = NULL;
- continue;
- }
-
- brd->idx = m * MXSER_PORTS_PER_BOARD;
- for (i = 0; i < brd->info->nports; i++)
- tty_register_device(mxvar_sdriver, brd->idx + i,
- NULL);
-
- m++;
+ brd = &mxser_boards[m];
+ retval = mxser_get_ISA_conf(!ioaddr[b], brd);
+ if (retval <= 0) {
+ brd->info = NULL;
+ continue;
}
+ printk(KERN_INFO "mxser: found MOXA %s board (CAP=0x%lx)\n",
+ brd->info->name, ioaddr[b]);
+
+ /* mxser_initbrd will hook ISR. */
+ if (mxser_initbrd(brd, NULL) < 0) {
+ brd->info = NULL;
+ continue;
+ }
+
+ brd->idx = m * MXSER_PORTS_PER_BOARD;
+ for (i = 0; i < brd->info->nports; i++)
+ tty_register_device(mxvar_sdriver, brd->idx + i, NULL);
+
+ m++;
+ }
+
retval = pci_register_driver(&mxser_driver);
if (retval) {
- printk(KERN_ERR "Can't register pci driver\n");
+ printk(KERN_ERR "mxser: can't register pci driver\n");
if (!m) {
retval = -ENODEV;
goto err_unr;
} /* else: we have some ISA cards under control */
}
- pr_debug("Done.\n");
-
return 0;
err_unr:
tty_unregister_driver(mxvar_sdriver);
@@ -2886,8 +2831,6 @@
{
unsigned int i, j;
- pr_debug("Unloading module mxser ...\n");
-
pci_unregister_driver(&mxser_driver);
for (i = 0; i < MXSER_BOARDS; i++) /* ISA remains */
@@ -2901,8 +2844,6 @@
for (i = 0; i < MXSER_BOARDS; i++)
if (mxser_boards[i].info != NULL)
mxser_release_res(&mxser_boards[i], NULL, 1);
-
- pr_debug("Done.\n");
}
module_init(mxser_module_init);
diff --git a/drivers/char/nwflash.c b/drivers/char/nwflash.c
index ba012c2..f9f72a2 100644
--- a/drivers/char/nwflash.c
+++ b/drivers/char/nwflash.c
@@ -122,35 +122,20 @@
static ssize_t flash_read(struct file *file, char __user *buf, size_t size,
loff_t *ppos)
{
- unsigned long p = *ppos;
- unsigned int count = size;
- int ret = 0;
+ ssize_t ret;
if (flashdebug)
printk(KERN_DEBUG "flash_read: flash_read: offset=0x%lX, "
"buffer=%p, count=0x%X.\n", p, buf, count);
+ /*
+ * We now lock against reads and writes. --rmk
+ */
+ if (mutex_lock_interruptible(&nwflash_mutex))
+ return -ERESTARTSYS;
- if (count)
- ret = -ENXIO;
+ ret = simple_read_from_buffer(buf, size, ppos, FLASH_BASE, gbFlashSize);
+ mutex_unlock(&nwflash_mutex);
- if (p < gbFlashSize) {
- if (count > gbFlashSize - p)
- count = gbFlashSize - p;
-
- /*
- * We now lock against reads and writes. --rmk
- */
- if (mutex_lock_interruptible(&nwflash_mutex))
- return -ERESTARTSYS;
-
- ret = copy_to_user(buf, (void *)(FLASH_BASE + p), count);
- if (ret == 0) {
- ret = count;
- *ppos += count;
- } else
- ret = -EFAULT;
- mutex_unlock(&nwflash_mutex);
- }
return ret;
}
diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c
index 7af7a7e..bee39fd 100644
--- a/drivers/char/ppdev.c
+++ b/drivers/char/ppdev.c
@@ -67,7 +67,7 @@
#include <linux/major.h>
#include <linux/ppdev.h>
#include <linux/smp_lock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#define PP_VERSION "ppdev: user-space parallel port driver"
#define CHRDEV "ppdev"
@@ -328,10 +328,9 @@
return IEEE1284_PH_FWD_IDLE;
}
-static int pp_ioctl(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg)
+static int pp_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
- unsigned int minor = iminor(inode);
+ unsigned int minor = iminor(file->f_path.dentry->d_inode);
struct pp_struct *pp = file->private_data;
struct parport * port;
void __user *argp = (void __user *)arg;
@@ -634,6 +633,15 @@
return 0;
}
+static long pp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ long ret;
+ lock_kernel();
+ ret = pp_do_ioctl(file, cmd, arg);
+ unlock_kernel();
+ return ret;
+}
+
static int pp_open (struct inode * inode, struct file * file)
{
unsigned int minor = iminor(inode);
@@ -745,7 +753,7 @@
.read = pp_read,
.write = pp_write,
.poll = pp_poll,
- .ioctl = pp_ioctl,
+ .unlocked_ioctl = pp_ioctl,
.open = pp_open,
.release = pp_release,
};
diff --git a/drivers/char/rio/rio_linux.c b/drivers/char/rio/rio_linux.c
index 0cdfee1..a8f68a3 100644
--- a/drivers/char/rio/rio_linux.c
+++ b/drivers/char/rio/rio_linux.c
@@ -179,7 +179,7 @@
static void rio_hungup(void *ptr);
static void rio_close(void *ptr);
static int rio_chars_in_buffer(void *ptr);
-static int rio_fw_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+static long rio_fw_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
static int rio_init_drivers(void);
static void my_hd(void *addr, int len);
@@ -240,7 +240,7 @@
static const struct file_operations rio_fw_fops = {
.owner = THIS_MODULE,
- .ioctl = rio_fw_ioctl,
+ .unlocked_ioctl = rio_fw_ioctl,
};
static struct miscdevice rio_fw_device = {
@@ -560,13 +560,15 @@
-static int rio_fw_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+static long rio_fw_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
int rc = 0;
func_enter();
/* The "dev" argument isn't used. */
+ lock_kernel();
rc = riocontrol(p, 0, cmd, arg, capable(CAP_SYS_ADMIN));
+ unlock_kernel();
func_exit();
return rc;
diff --git a/drivers/char/sx.c b/drivers/char/sx.c
index 2162439b..c385206 100644
--- a/drivers/char/sx.c
+++ b/drivers/char/sx.c
@@ -286,8 +286,8 @@
static int sx_chars_in_buffer(void *ptr);
static int sx_init_board(struct sx_board *board);
static int sx_init_portstructs(int nboards, int nports);
-static int sx_fw_ioctl(struct inode *inode, struct file *filp,
- unsigned int cmd, unsigned long arg);
+static long sx_fw_ioctl(struct file *filp, unsigned int cmd,
+ unsigned long arg);
static int sx_init_drivers(void);
static struct tty_driver *sx_driver;
@@ -396,7 +396,7 @@
static const struct file_operations sx_fw_fops = {
.owner = THIS_MODULE,
- .ioctl = sx_fw_ioctl,
+ .unlocked_ioctl = sx_fw_ioctl,
};
static struct miscdevice sx_fw_device = {
@@ -1686,10 +1686,10 @@
}
#endif
-static int sx_fw_ioctl(struct inode *inode, struct file *filp,
- unsigned int cmd, unsigned long arg)
+static long sx_fw_ioctl(struct file *filp, unsigned int cmd,
+ unsigned long arg)
{
- int rc = 0;
+ long rc = 0;
int __user *descr = (int __user *)arg;
int i;
static struct sx_board *board = NULL;
@@ -1699,13 +1699,10 @@
func_enter();
-#if 0
- /* Removed superuser check: Sysops can use the permissions on the device
- file to restrict access. Recommendation: Root only. (root.root 600) */
- if (!capable(CAP_SYS_ADMIN)) {
+ if (!capable(CAP_SYS_RAWIO))
return -EPERM;
- }
-#endif
+
+ lock_kernel();
sx_dprintk(SX_DEBUG_FIRMWARE, "IOCTL %x: %lx\n", cmd, arg);
@@ -1720,19 +1717,23 @@
for (i = 0; i < SX_NBOARDS; i++)
sx_dprintk(SX_DEBUG_FIRMWARE, "<%x> ", boards[i].flags);
sx_dprintk(SX_DEBUG_FIRMWARE, "\n");
+ unlock_kernel();
return -EIO;
}
switch (cmd) {
case SXIO_SET_BOARD:
sx_dprintk(SX_DEBUG_FIRMWARE, "set board to %ld\n", arg);
+ rc = -EIO;
if (arg >= SX_NBOARDS)
- return -EIO;
+ break;
sx_dprintk(SX_DEBUG_FIRMWARE, "not out of range\n");
if (!(boards[arg].flags & SX_BOARD_PRESENT))
- return -EIO;
+ break;
sx_dprintk(SX_DEBUG_FIRMWARE, ".. and present!\n");
board = &boards[arg];
+ rc = 0;
+ /* FIXME: And this does ... nothing?? */
break;
case SXIO_GET_TYPE:
rc = -ENOENT; /* If we manage to miss one, return error. */
@@ -1746,7 +1747,7 @@
rc = SX_TYPE_SI;
if (IS_EISA_BOARD(board))
rc = SX_TYPE_SI;
- sx_dprintk(SX_DEBUG_FIRMWARE, "returning type= %d\n", rc);
+ sx_dprintk(SX_DEBUG_FIRMWARE, "returning type= %ld\n", rc);
break;
case SXIO_DO_RAMTEST:
if (sx_initialized) /* Already initialized: better not ramtest the board. */
@@ -1760,19 +1761,26 @@
rc = do_memtest(board, 0, 0x7ff8);
/* if (!rc) rc = do_memtest_w (board, 0, 0x7ff8); */
}
- sx_dprintk(SX_DEBUG_FIRMWARE, "returning memtest result= %d\n",
- rc);
+ sx_dprintk(SX_DEBUG_FIRMWARE,
+ "returning memtest result= %ld\n", rc);
break;
case SXIO_DOWNLOAD:
- if (sx_initialized) /* Already initialized */
- return -EEXIST;
- if (!sx_reset(board))
- return -EIO;
+ if (sx_initialized) {/* Already initialized */
+ rc = -EEXIST;
+ break;
+ }
+ if (!sx_reset(board)) {
+ rc = -EIO;
+ break;
+ }
sx_dprintk(SX_DEBUG_INIT, "reset the board...\n");
tmp = kmalloc(SX_CHUNK_SIZE, GFP_USER);
- if (!tmp)
- return -ENOMEM;
+ if (!tmp) {
+ rc = -ENOMEM;
+ break;
+ }
+ /* FIXME: check returns */
get_user(nbytes, descr++);
get_user(offset, descr++);
get_user(data, descr++);
@@ -1782,7 +1790,8 @@
(i + SX_CHUNK_SIZE > nbytes) ?
nbytes - i : SX_CHUNK_SIZE)) {
kfree(tmp);
- return -EFAULT;
+ rc = -EFAULT;
+ break;
}
memcpy_toio(board->base2 + offset + i, tmp,
(i + SX_CHUNK_SIZE > nbytes) ?
@@ -1798,13 +1807,17 @@
rc = sx_nports;
break;
case SXIO_INIT:
- if (sx_initialized) /* Already initialized */
- return -EEXIST;
+ if (sx_initialized) { /* Already initialized */
+ rc = -EEXIST;
+ break;
+ }
/* This is not allowed until all boards are initialized... */
for (i = 0; i < SX_NBOARDS; i++) {
if ((boards[i].flags & SX_BOARD_PRESENT) &&
- !(boards[i].flags & SX_BOARD_INITIALIZED))
- return -EIO;
+ !(boards[i].flags & SX_BOARD_INITIALIZED)) {
+ rc = -EIO;
+ break;
+ }
}
for (i = 0; i < SX_NBOARDS; i++)
if (!(boards[i].flags & SX_BOARD_PRESENT))
@@ -1832,10 +1845,10 @@
rc = sx_nports;
break;
default:
- printk(KERN_WARNING "Unknown ioctl on firmware device (%x).\n",
- cmd);
+ rc = -ENOTTY;
break;
}
+ unlock_kernel();
func_exit();
return rc;
}
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 6f4d856..e1b46bc 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -3580,7 +3580,6 @@
p->signal->tty = NULL;
spin_unlock_irq(&p->sighand->siglock);
}
-EXPORT_SYMBOL(proc_clear_tty);
/* Called under the sighand lock */
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index dc17fe3..d0f4eb6 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -46,6 +46,9 @@
/* The operations for our console. */
static struct hv_ops virtio_cons;
+/* The hvc device */
+static struct hvc_struct *hvc;
+
/*D:310 The put_chars() callback is pretty straightforward.
*
* We turn the characters into a scatter-gather list, add it to the output
@@ -134,6 +137,27 @@
return hvc_instantiate(0, 0, &virtio_cons);
}
+/*
+ * we support only one console, the hvc struct is a global var
+ * There is no need to do anything
+ */
+static int notifier_add_vio(struct hvc_struct *hp, int data)
+{
+ hp->irq_requested = 1;
+ return 0;
+}
+
+static void notifier_del_vio(struct hvc_struct *hp, int data)
+{
+ hp->irq_requested = 0;
+}
+
+static void hvc_handle_input(struct virtqueue *vq)
+{
+ if (hvc_poll(hvc))
+ hvc_kick();
+}
+
/*D:370 Once we're further in boot, we get probed like any other virtio device.
* At this stage we set up the output virtqueue.
*
@@ -144,7 +168,6 @@
static int __devinit virtcons_probe(struct virtio_device *dev)
{
int err;
- struct hvc_struct *hvc;
vdev = dev;
@@ -158,7 +181,7 @@
/* Find the input queue. */
/* FIXME: This is why we want to wean off hvc: we do nothing
* when input comes in. */
- in_vq = vdev->config->find_vq(vdev, 0, NULL);
+ in_vq = vdev->config->find_vq(vdev, 0, hvc_handle_input);
if (IS_ERR(in_vq)) {
err = PTR_ERR(in_vq);
goto free;
@@ -173,15 +196,18 @@
/* Start using the new console output. */
virtio_cons.get_chars = get_chars;
virtio_cons.put_chars = put_chars;
+ virtio_cons.notifier_add = notifier_add_vio;
+ virtio_cons.notifier_del = notifier_del_vio;
/* The first argument of hvc_alloc() is the virtual console number, so
- * we use zero. The second argument is the interrupt number; we
- * currently leave this as zero: it would be better not to use the
- * hvc mechanism and fix this (FIXME!).
+ * we use zero. The second argument is the parameter for the
+ * notification mechanism (like irq number). We currently leave this
+ * as zero, virtqueues have implicit notifications.
*
* The third argument is a "struct hv_ops" containing the put_chars()
- * and get_chars() pointers. The final argument is the output buffer
- * size: we can do any size, so we put PAGE_SIZE here. */
+ * get_chars(), notifier_add() and notifier_del() pointers.
+ * The final argument is the output buffer size: we can do any size,
+ * so we put PAGE_SIZE here. */
hvc = hvc_alloc(0, 0, &virtio_cons, PAGE_SIZE);
if (IS_ERR(hvc)) {
err = PTR_ERR(hvc);
diff --git a/drivers/char/xilinx_hwicap/xilinx_hwicap.c b/drivers/char/xilinx_hwicap/xilinx_hwicap.c
index 51966cc..8bfee5f 100644
--- a/drivers/char/xilinx_hwicap/xilinx_hwicap.c
+++ b/drivers/char/xilinx_hwicap/xilinx_hwicap.c
@@ -87,7 +87,6 @@
#include <linux/mutex.h>
#include <linux/smp_lock.h>
#include <linux/sysctl.h>
-#include <linux/version.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/platform_device.h>
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 6e6c3c4..5a11e3c 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -123,6 +123,13 @@
Support for error detection and correction the Intel
Greekcreek/Blackford chipsets.
+config EDAC_I5100
+ tristate "Intel San Clemente MCH"
+ depends on EDAC_MM_EDAC && X86 && PCI
+ help
+ Support for error detection and correction the Intel
+ San Clemente MCH.
+
config EDAC_MPC85XX
tristate "Freescale MPC85xx"
depends on EDAC_MM_EDAC && FSL_SOC && MPC85xx
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 8380773..e5e9104 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -19,6 +19,7 @@
obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o
obj-$(CONFIG_EDAC_I5000) += i5000_edac.o
+obj-$(CONFIG_EDAC_I5100) += i5100_edac.o
obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o
obj-$(CONFIG_EDAC_E752X) += e752x_edac.o
obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o
diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c
index c94a0eb..facfdb1 100644
--- a/drivers/edac/e752x_edac.c
+++ b/drivers/edac/e752x_edac.c
@@ -28,6 +28,7 @@
#define E752X_REVISION " Ver: 2.0.2 " __DATE__
#define EDAC_MOD_STR "e752x_edac"
+static int report_non_memory_errors;
static int force_function_unhide;
static int sysbus_parity = -1;
@@ -117,7 +118,7 @@
#define E752X_BUF_FERR 0x70 /* Memory buffer first error reg (8b) */
#define E752X_BUF_NERR 0x72 /* Memory buffer next error reg (8b) */
#define E752X_BUF_ERRMASK 0x74 /* Memory buffer error mask reg (8b) */
-#define E752X_BUF_SMICMD 0x7A /* Memory buffer SMI command reg (8b) */
+#define E752X_BUF_SMICMD 0x7A /* Memory buffer SMI cmd reg (8b) */
#define E752X_DRAM_FERR 0x80 /* DRAM first error register (16b) */
#define E752X_DRAM_NERR 0x82 /* DRAM next error register (16b) */
#define E752X_DRAM_ERRMASK 0x84 /* DRAM error mask register (8b) */
@@ -127,7 +128,7 @@
/* error address register (32b) */
/*
* 31 Reserved
- * 30:2 CE address (64 byte block 34:6)
+ * 30:2 CE address (64 byte block 34:6
* 1 Reserved
* 0 HiLoCS
*/
@@ -147,11 +148,11 @@
* 1 Reserved
* 0 HiLoCS
*/
-#define E752X_DRAM_SCRB_ADD 0xA8 /* DRAM first uncorrectable scrub memory */
+#define E752X_DRAM_SCRB_ADD 0xA8 /* DRAM 1st uncorrectable scrub mem */
/* error address register (32b) */
/*
* 31 Reserved
- * 30:2 CE address (64 byte block 34:6)
+ * 30:2 CE address (64 byte block 34:6
* 1 Reserved
* 0 HiLoCS
*/
@@ -394,9 +395,12 @@
struct e752x_pvt *pvt = (struct e752x_pvt *)mci->pvt_info;
error_1b = retry_add;
- page = error_1b >> (PAGE_SHIFT - 4); /* convert the addr to 4k page */
- row = pvt->mc_symmetric ? ((page >> 1) & 3) : /* chip select are bits 14 & 13 */
+ page = error_1b >> (PAGE_SHIFT - 4); /* convert the addr to 4k page */
+
+ /* chip select are bits 14 & 13 */
+ row = pvt->mc_symmetric ? ((page >> 1) & 3) :
edac_mc_find_csrow_by_page(mci, page);
+
e752x_mc_printk(mci, KERN_WARNING,
"CE page 0x%lx, row %d : Memory read retry\n",
(long unsigned int)page, row);
@@ -422,12 +426,21 @@
}
static char *global_message[11] = {
- "PCI Express C1", "PCI Express C", "PCI Express B1",
- "PCI Express B", "PCI Express A1", "PCI Express A",
- "DMA Controler", "HUB or NS Interface", "System Bus",
- "DRAM Controler", "Internal Buffer"
+ "PCI Express C1",
+ "PCI Express C",
+ "PCI Express B1",
+ "PCI Express B",
+ "PCI Express A1",
+ "PCI Express A",
+ "DMA Controller",
+ "HUB or NS Interface",
+ "System Bus",
+ "DRAM Controller", /* 9th entry */
+ "Internal Buffer"
};
+#define DRAM_ENTRY 9
+
static char *fatal_message[2] = { "Non-Fatal ", "Fatal " };
static void do_global_error(int fatal, u32 errors)
@@ -435,9 +448,16 @@
int i;
for (i = 0; i < 11; i++) {
- if (errors & (1 << i))
- e752x_printk(KERN_WARNING, "%sError %s\n",
- fatal_message[fatal], global_message[i]);
+ if (errors & (1 << i)) {
+ /* If the error is from DRAM Controller OR
+ * we are to report ALL errors, then
+ * report the error
+ */
+ if ((i == DRAM_ENTRY) || report_non_memory_errors)
+ e752x_printk(KERN_WARNING, "%sError %s\n",
+ fatal_message[fatal],
+ global_message[i]);
+ }
}
}
@@ -1021,7 +1041,7 @@
struct pci_dev *dev;
pvt->bridge_ck = pci_get_device(PCI_VENDOR_ID_INTEL,
- pvt->dev_info->err_dev, pvt->bridge_ck);
+ pvt->dev_info->err_dev, pvt->bridge_ck);
if (pvt->bridge_ck == NULL)
pvt->bridge_ck = pci_scan_single_device(pdev->bus,
@@ -1034,8 +1054,9 @@
return 1;
}
- dev = pci_get_device(PCI_VENDOR_ID_INTEL, e752x_devs[dev_idx].ctl_dev,
- NULL);
+ dev = pci_get_device(PCI_VENDOR_ID_INTEL,
+ e752x_devs[dev_idx].ctl_dev,
+ NULL);
if (dev == NULL)
goto fail;
@@ -1316,7 +1337,8 @@
module_param(force_function_unhide, int, 0444);
MODULE_PARM_DESC(force_function_unhide, "if BIOS sets Dev0:Fun1 up as hidden:"
- " 1=force unhide and hope BIOS doesn't fight driver for Dev0:Fun1 access");
+ " 1=force unhide and hope BIOS doesn't fight driver for "
+ "Dev0:Fun1 access");
module_param(edac_op_state, int, 0444);
MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
@@ -1324,3 +1346,6 @@
module_param(sysbus_parity, int, 0444);
MODULE_PARM_DESC(sysbus_parity, "0=disable system bus parity checking,"
" 1=enable system bus parity checking, default=auto-detect");
+module_param(report_non_memory_errors, int, 0644);
+MODULE_PARM_DESC(report_non_memory_errors, "0=disable non-memory error "
+ "reporting, 1=enable non-memory error reporting");
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 021d187..ad218fe 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -44,6 +44,25 @@
return edac_mc_poll_msec;
}
+static int edac_set_poll_msec(const char *val, struct kernel_param *kp)
+{
+ long l;
+ int ret;
+
+ if (!val)
+ return -EINVAL;
+
+ ret = strict_strtol(val, 0, &l);
+ if (ret == -EINVAL || ((int)l != l))
+ return -EINVAL;
+ *((int *)kp->arg) = l;
+
+ /* notify edac_mc engine to reset the poll period */
+ edac_mc_reset_delay_period(l);
+
+ return 0;
+}
+
/* Parameter declarations for above */
module_param(edac_mc_panic_on_ue, int, 0644);
MODULE_PARM_DESC(edac_mc_panic_on_ue, "Panic on uncorrected error: 0=off 1=on");
@@ -53,7 +72,8 @@
module_param(edac_mc_log_ce, int, 0644);
MODULE_PARM_DESC(edac_mc_log_ce,
"Log correctable error to console: 0=off 1=on");
-module_param(edac_mc_poll_msec, int, 0644);
+module_param_call(edac_mc_poll_msec, edac_set_poll_msec, param_get_int,
+ &edac_mc_poll_msec, 0644);
MODULE_PARM_DESC(edac_mc_poll_msec, "Polling period in milliseconds");
/*
@@ -103,16 +123,6 @@
-/*
- * /sys/devices/system/edac/mc;
- * data structures and methods
- */
-static ssize_t memctrl_int_show(void *ptr, char *buffer)
-{
- int *value = (int *)ptr;
- return sprintf(buffer, "%u\n", *value);
-}
-
static ssize_t memctrl_int_store(void *ptr, const char *buffer, size_t count)
{
int *value = (int *)ptr;
@@ -123,23 +133,6 @@
return count;
}
-/*
- * mc poll_msec time value
- */
-static ssize_t poll_msec_int_store(void *ptr, const char *buffer, size_t count)
-{
- int *value = (int *)ptr;
-
- if (isdigit(*buffer)) {
- *value = simple_strtoul(buffer, NULL, 0);
-
- /* notify edac_mc engine to reset the poll period */
- edac_mc_reset_delay_period(*value);
- }
-
- return count;
-}
-
/* EDAC sysfs CSROW data structures and methods
*/
@@ -185,7 +178,11 @@
static ssize_t channel_dimm_label_show(struct csrow_info *csrow,
char *data, int channel)
{
- return snprintf(data, EDAC_MC_LABEL_LEN, "%s",
+ /* if field has not been initialized, there is nothing to send */
+ if (!csrow->channels[channel].label[0])
+ return 0;
+
+ return snprintf(data, EDAC_MC_LABEL_LEN, "%s\n",
csrow->channels[channel].label);
}
@@ -649,98 +646,10 @@
.default_attrs = (struct attribute **)mci_attr,
};
-/* show/store, tables, etc for the MC kset */
-
-
-struct memctrl_dev_attribute {
- struct attribute attr;
- void *value;
- ssize_t(*show) (void *, char *);
- ssize_t(*store) (void *, const char *, size_t);
-};
-
-/* Set of show/store abstract level functions for memory control object */
-static ssize_t memctrl_dev_show(struct kobject *kobj,
- struct attribute *attr, char *buffer)
-{
- struct memctrl_dev_attribute *memctrl_dev;
- memctrl_dev = (struct memctrl_dev_attribute *)attr;
-
- if (memctrl_dev->show)
- return memctrl_dev->show(memctrl_dev->value, buffer);
-
- return -EIO;
-}
-
-static ssize_t memctrl_dev_store(struct kobject *kobj, struct attribute *attr,
- const char *buffer, size_t count)
-{
- struct memctrl_dev_attribute *memctrl_dev;
- memctrl_dev = (struct memctrl_dev_attribute *)attr;
-
- if (memctrl_dev->store)
- return memctrl_dev->store(memctrl_dev->value, buffer, count);
-
- return -EIO;
-}
-
-static struct sysfs_ops memctrlfs_ops = {
- .show = memctrl_dev_show,
- .store = memctrl_dev_store
-};
-
-#define MEMCTRL_ATTR(_name, _mode, _show, _store) \
-static struct memctrl_dev_attribute attr_##_name = { \
- .attr = {.name = __stringify(_name), .mode = _mode }, \
- .value = &_name, \
- .show = _show, \
- .store = _store, \
-};
-
-#define MEMCTRL_STRING_ATTR(_name, _data, _mode, _show, _store) \
-static struct memctrl_dev_attribute attr_##_name = { \
- .attr = {.name = __stringify(_name), .mode = _mode }, \
- .value = _data, \
- .show = _show, \
- .store = _store, \
-};
-
-/* csrow<id> control files */
-MEMCTRL_ATTR(edac_mc_panic_on_ue,
- S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store);
-
-MEMCTRL_ATTR(edac_mc_log_ue,
- S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store);
-
-MEMCTRL_ATTR(edac_mc_log_ce,
- S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store);
-
-MEMCTRL_ATTR(edac_mc_poll_msec,
- S_IRUGO | S_IWUSR, memctrl_int_show, poll_msec_int_store);
-
-/* Base Attributes of the memory ECC object */
-static struct memctrl_dev_attribute *memctrl_attr[] = {
- &attr_edac_mc_panic_on_ue,
- &attr_edac_mc_log_ue,
- &attr_edac_mc_log_ce,
- &attr_edac_mc_poll_msec,
- NULL,
-};
-
-
-/* the ktype for the mc_kset internal kobj */
-static struct kobj_type ktype_mc_set_attribs = {
- .sysfs_ops = &memctrlfs_ops,
- .default_attrs = (struct attribute **)memctrl_attr,
-};
-
/* EDAC memory controller sysfs kset:
* /sys/devices/system/edac/mc
*/
-static struct kset mc_kset = {
- .kobj = {.ktype = &ktype_mc_set_attribs },
-};
-
+static struct kset *mc_kset;
/*
* edac_mc_register_sysfs_main_kobj
@@ -771,7 +680,7 @@
}
/* this instance become part of the mc_kset */
- kobj_mci->kset = &mc_kset;
+ kobj_mci->kset = mc_kset;
/* register the mc<id> kobject to the mc_kset */
err = kobject_init_and_add(kobj_mci, &ktype_mci, NULL,
@@ -1001,12 +910,9 @@
}
/* Init the MC's kobject */
- kobject_set_name(&mc_kset.kobj, "mc");
- mc_kset.kobj.parent = &edac_class->kset.kobj;
-
- /* register the mc_kset */
- err = kset_register(&mc_kset);
- if (err) {
+ mc_kset = kset_create_and_add("mc", NULL, &edac_class->kset.kobj);
+ if (!mc_kset) {
+ err = -ENOMEM;
debugf1("%s() Failed to register '.../edac/mc'\n", __func__);
goto fail_out;
}
@@ -1028,6 +934,6 @@
*/
void edac_sysfs_teardown_mc_kset(void)
{
- kset_unregister(&mc_kset);
+ kset_unregister(mc_kset);
}
diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c
index 2c1fa1b..5c153dc 100644
--- a/drivers/edac/edac_pci_sysfs.c
+++ b/drivers/edac/edac_pci_sysfs.c
@@ -28,7 +28,7 @@
static atomic_t pci_parity_count = ATOMIC_INIT(0);
static atomic_t pci_nonparity_count = ATOMIC_INIT(0);
-static struct kobject edac_pci_top_main_kobj;
+static struct kobject *edac_pci_top_main_kobj;
static atomic_t edac_pci_sysfs_refcount = ATOMIC_INIT(0);
/* getter functions for the data variables */
@@ -83,7 +83,7 @@
pci = to_instance(kobj);
/* decrement reference count on top main kobj */
- kobject_put(&edac_pci_top_main_kobj);
+ kobject_put(edac_pci_top_main_kobj);
kfree(pci); /* Free the control struct */
}
@@ -166,7 +166,7 @@
* track the number of PCI instances we have, and thus nest
* properly on keeping the module loaded
*/
- main_kobj = kobject_get(&edac_pci_top_main_kobj);
+ main_kobj = kobject_get(edac_pci_top_main_kobj);
if (!main_kobj) {
err = -ENODEV;
goto error_out;
@@ -174,11 +174,11 @@
/* And now register this new kobject under the main kobj */
err = kobject_init_and_add(&pci->kobj, &ktype_pci_instance,
- &edac_pci_top_main_kobj, "pci%d", idx);
+ edac_pci_top_main_kobj, "pci%d", idx);
if (err != 0) {
debugf2("%s() failed to register instance pci%d\n",
__func__, idx);
- kobject_put(&edac_pci_top_main_kobj);
+ kobject_put(edac_pci_top_main_kobj);
goto error_out;
}
@@ -316,9 +316,10 @@
*/
static void edac_pci_release_main_kobj(struct kobject *kobj)
{
-
debugf0("%s() here to module_put(THIS_MODULE)\n", __func__);
+ kfree(kobj);
+
/* last reference to top EDAC PCI kobject has been removed,
* NOW release our ref count on the core module
*/
@@ -369,8 +370,16 @@
goto decrement_count_fail;
}
+ edac_pci_top_main_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
+ if (!edac_pci_top_main_kobj) {
+ debugf1("Failed to allocate\n");
+ err = -ENOMEM;
+ goto kzalloc_fail;
+ }
+
/* Instanstiate the pci object */
- err = kobject_init_and_add(&edac_pci_top_main_kobj, &ktype_edac_pci_main_kobj,
+ err = kobject_init_and_add(edac_pci_top_main_kobj,
+ &ktype_edac_pci_main_kobj,
&edac_class->kset.kobj, "pci");
if (err) {
debugf1("Failed to register '.../edac/pci'\n");
@@ -381,13 +390,16 @@
* for EDAC PCI, then edac_pci_main_kobj_teardown()
* must be used, for resources to be cleaned up properly
*/
- kobject_uevent(&edac_pci_top_main_kobj, KOBJ_ADD);
+ kobject_uevent(edac_pci_top_main_kobj, KOBJ_ADD);
debugf1("Registered '.../edac/pci' kobject\n");
return 0;
/* Error unwind statck */
kobject_init_and_add_fail:
+ kfree(edac_pci_top_main_kobj);
+
+kzalloc_fail:
module_put(THIS_MODULE);
decrement_count_fail:
@@ -414,7 +426,7 @@
if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) {
debugf0("%s() called kobject_put on main kobj\n",
__func__);
- kobject_put(&edac_pci_top_main_kobj);
+ kobject_put(edac_pci_top_main_kobj);
}
}
diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
new file mode 100644
index 0000000..22db05a
--- /dev/null
+++ b/drivers/edac/i5100_edac.c
@@ -0,0 +1,981 @@
+/*
+ * Intel 5100 Memory Controllers kernel module
+ *
+ * This file may be distributed under the terms of the
+ * GNU General Public License.
+ *
+ * This module is based on the following document:
+ *
+ * Intel 5100X Chipset Memory Controller Hub (MCH) - Datasheet
+ * http://download.intel.com/design/chipsets/datashts/318378.pdf
+ *
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/slab.h>
+#include <linux/edac.h>
+#include <linux/delay.h>
+#include <linux/mmzone.h>
+
+#include "edac_core.h"
+
+/* register addresses */
+
+/* device 16, func 1 */
+#define I5100_MC 0x40 /* Memory Control Register */
+#define I5100_MS 0x44 /* Memory Status Register */
+#define I5100_SPDDATA 0x48 /* Serial Presence Detect Status Reg */
+#define I5100_SPDCMD 0x4c /* Serial Presence Detect Command Reg */
+#define I5100_TOLM 0x6c /* Top of Low Memory */
+#define I5100_MIR0 0x80 /* Memory Interleave Range 0 */
+#define I5100_MIR1 0x84 /* Memory Interleave Range 1 */
+#define I5100_AMIR_0 0x8c /* Adjusted Memory Interleave Range 0 */
+#define I5100_AMIR_1 0x90 /* Adjusted Memory Interleave Range 1 */
+#define I5100_FERR_NF_MEM 0xa0 /* MC First Non Fatal Errors */
+#define I5100_FERR_NF_MEM_M16ERR_MASK (1 << 16)
+#define I5100_FERR_NF_MEM_M15ERR_MASK (1 << 15)
+#define I5100_FERR_NF_MEM_M14ERR_MASK (1 << 14)
+#define I5100_FERR_NF_MEM_M12ERR_MASK (1 << 12)
+#define I5100_FERR_NF_MEM_M11ERR_MASK (1 << 11)
+#define I5100_FERR_NF_MEM_M10ERR_MASK (1 << 10)
+#define I5100_FERR_NF_MEM_M6ERR_MASK (1 << 6)
+#define I5100_FERR_NF_MEM_M5ERR_MASK (1 << 5)
+#define I5100_FERR_NF_MEM_M4ERR_MASK (1 << 4)
+#define I5100_FERR_NF_MEM_M1ERR_MASK 1
+#define I5100_FERR_NF_MEM_ANY_MASK \
+ (I5100_FERR_NF_MEM_M16ERR_MASK | \
+ I5100_FERR_NF_MEM_M15ERR_MASK | \
+ I5100_FERR_NF_MEM_M14ERR_MASK | \
+ I5100_FERR_NF_MEM_M12ERR_MASK | \
+ I5100_FERR_NF_MEM_M11ERR_MASK | \
+ I5100_FERR_NF_MEM_M10ERR_MASK | \
+ I5100_FERR_NF_MEM_M6ERR_MASK | \
+ I5100_FERR_NF_MEM_M5ERR_MASK | \
+ I5100_FERR_NF_MEM_M4ERR_MASK | \
+ I5100_FERR_NF_MEM_M1ERR_MASK)
+#define I5100_NERR_NF_MEM 0xa4 /* MC Next Non-Fatal Errors */
+#define I5100_EMASK_MEM 0xa8 /* MC Error Mask Register */
+
+/* device 21 and 22, func 0 */
+#define I5100_MTR_0 0x154 /* Memory Technology Registers 0-3 */
+#define I5100_DMIR 0x15c /* DIMM Interleave Range */
+#define I5100_VALIDLOG 0x18c /* Valid Log Markers */
+#define I5100_NRECMEMA 0x190 /* Non-Recoverable Memory Error Log Reg A */
+#define I5100_NRECMEMB 0x194 /* Non-Recoverable Memory Error Log Reg B */
+#define I5100_REDMEMA 0x198 /* Recoverable Memory Data Error Log Reg A */
+#define I5100_REDMEMB 0x19c /* Recoverable Memory Data Error Log Reg B */
+#define I5100_RECMEMA 0x1a0 /* Recoverable Memory Error Log Reg A */
+#define I5100_RECMEMB 0x1a4 /* Recoverable Memory Error Log Reg B */
+#define I5100_MTR_4 0x1b0 /* Memory Technology Registers 4,5 */
+
+/* bit field accessors */
+
+static inline u32 i5100_mc_errdeten(u32 mc)
+{
+ return mc >> 5 & 1;
+}
+
+static inline u16 i5100_spddata_rdo(u16 a)
+{
+ return a >> 15 & 1;
+}
+
+static inline u16 i5100_spddata_sbe(u16 a)
+{
+ return a >> 13 & 1;
+}
+
+static inline u16 i5100_spddata_busy(u16 a)
+{
+ return a >> 12 & 1;
+}
+
+static inline u16 i5100_spddata_data(u16 a)
+{
+ return a & ((1 << 8) - 1);
+}
+
+static inline u32 i5100_spdcmd_create(u32 dti, u32 ckovrd, u32 sa, u32 ba,
+ u32 data, u32 cmd)
+{
+ return ((dti & ((1 << 4) - 1)) << 28) |
+ ((ckovrd & 1) << 27) |
+ ((sa & ((1 << 3) - 1)) << 24) |
+ ((ba & ((1 << 8) - 1)) << 16) |
+ ((data & ((1 << 8) - 1)) << 8) |
+ (cmd & 1);
+}
+
+static inline u16 i5100_tolm_tolm(u16 a)
+{
+ return a >> 12 & ((1 << 4) - 1);
+}
+
+static inline u16 i5100_mir_limit(u16 a)
+{
+ return a >> 4 & ((1 << 12) - 1);
+}
+
+static inline u16 i5100_mir_way1(u16 a)
+{
+ return a >> 1 & 1;
+}
+
+static inline u16 i5100_mir_way0(u16 a)
+{
+ return a & 1;
+}
+
+static inline u32 i5100_ferr_nf_mem_chan_indx(u32 a)
+{
+ return a >> 28 & 1;
+}
+
+static inline u32 i5100_ferr_nf_mem_any(u32 a)
+{
+ return a & I5100_FERR_NF_MEM_ANY_MASK;
+}
+
+static inline u32 i5100_nerr_nf_mem_any(u32 a)
+{
+ return i5100_ferr_nf_mem_any(a);
+}
+
+static inline u32 i5100_dmir_limit(u32 a)
+{
+ return a >> 16 & ((1 << 11) - 1);
+}
+
+static inline u32 i5100_dmir_rank(u32 a, u32 i)
+{
+ return a >> (4 * i) & ((1 << 2) - 1);
+}
+
+static inline u16 i5100_mtr_present(u16 a)
+{
+ return a >> 10 & 1;
+}
+
+static inline u16 i5100_mtr_ethrottle(u16 a)
+{
+ return a >> 9 & 1;
+}
+
+static inline u16 i5100_mtr_width(u16 a)
+{
+ return a >> 8 & 1;
+}
+
+static inline u16 i5100_mtr_numbank(u16 a)
+{
+ return a >> 6 & 1;
+}
+
+static inline u16 i5100_mtr_numrow(u16 a)
+{
+ return a >> 2 & ((1 << 2) - 1);
+}
+
+static inline u16 i5100_mtr_numcol(u16 a)
+{
+ return a & ((1 << 2) - 1);
+}
+
+
+static inline u32 i5100_validlog_redmemvalid(u32 a)
+{
+ return a >> 2 & 1;
+}
+
+static inline u32 i5100_validlog_recmemvalid(u32 a)
+{
+ return a >> 1 & 1;
+}
+
+static inline u32 i5100_validlog_nrecmemvalid(u32 a)
+{
+ return a & 1;
+}
+
+static inline u32 i5100_nrecmema_merr(u32 a)
+{
+ return a >> 15 & ((1 << 5) - 1);
+}
+
+static inline u32 i5100_nrecmema_bank(u32 a)
+{
+ return a >> 12 & ((1 << 3) - 1);
+}
+
+static inline u32 i5100_nrecmema_rank(u32 a)
+{
+ return a >> 8 & ((1 << 3) - 1);
+}
+
+static inline u32 i5100_nrecmema_dm_buf_id(u32 a)
+{
+ return a & ((1 << 8) - 1);
+}
+
+static inline u32 i5100_nrecmemb_cas(u32 a)
+{
+ return a >> 16 & ((1 << 13) - 1);
+}
+
+static inline u32 i5100_nrecmemb_ras(u32 a)
+{
+ return a & ((1 << 16) - 1);
+}
+
+static inline u32 i5100_redmemb_ecc_locator(u32 a)
+{
+ return a & ((1 << 18) - 1);
+}
+
+static inline u32 i5100_recmema_merr(u32 a)
+{
+ return i5100_nrecmema_merr(a);
+}
+
+static inline u32 i5100_recmema_bank(u32 a)
+{
+ return i5100_nrecmema_bank(a);
+}
+
+static inline u32 i5100_recmema_rank(u32 a)
+{
+ return i5100_nrecmema_rank(a);
+}
+
+static inline u32 i5100_recmema_dm_buf_id(u32 a)
+{
+ return i5100_nrecmema_dm_buf_id(a);
+}
+
+static inline u32 i5100_recmemb_cas(u32 a)
+{
+ return i5100_nrecmemb_cas(a);
+}
+
+static inline u32 i5100_recmemb_ras(u32 a)
+{
+ return i5100_nrecmemb_ras(a);
+}
+
+/* some generic limits */
+#define I5100_MAX_RANKS_PER_CTLR 6
+#define I5100_MAX_CTLRS 2
+#define I5100_MAX_RANKS_PER_DIMM 4
+#define I5100_DIMM_ADDR_LINES (6 - 3) /* 64 bits / 8 bits per byte */
+#define I5100_MAX_DIMM_SLOTS_PER_CTLR 4
+#define I5100_MAX_RANK_INTERLEAVE 4
+#define I5100_MAX_DMIRS 5
+
+struct i5100_priv {
+ /* ranks on each dimm -- 0 maps to not present -- obtained via SPD */
+ int dimm_numrank[I5100_MAX_CTLRS][I5100_MAX_DIMM_SLOTS_PER_CTLR];
+
+ /*
+ * mainboard chip select map -- maps i5100 chip selects to
+ * DIMM slot chip selects. In the case of only 4 ranks per
+ * controller, the mapping is fairly obvious but not unique.
+ * we map -1 -> NC and assume both controllers use the same
+ * map...
+ *
+ */
+ int dimm_csmap[I5100_MAX_DIMM_SLOTS_PER_CTLR][I5100_MAX_RANKS_PER_DIMM];
+
+ /* memory interleave range */
+ struct {
+ u64 limit;
+ unsigned way[2];
+ } mir[I5100_MAX_CTLRS];
+
+ /* adjusted memory interleave range register */
+ unsigned amir[I5100_MAX_CTLRS];
+
+ /* dimm interleave range */
+ struct {
+ unsigned rank[I5100_MAX_RANK_INTERLEAVE];
+ u64 limit;
+ } dmir[I5100_MAX_CTLRS][I5100_MAX_DMIRS];
+
+ /* memory technology registers... */
+ struct {
+ unsigned present; /* 0 or 1 */
+ unsigned ethrottle; /* 0 or 1 */
+ unsigned width; /* 4 or 8 bits */
+ unsigned numbank; /* 2 or 3 lines */
+ unsigned numrow; /* 13 .. 16 lines */
+ unsigned numcol; /* 11 .. 12 lines */
+ } mtr[I5100_MAX_CTLRS][I5100_MAX_RANKS_PER_CTLR];
+
+ u64 tolm; /* top of low memory in bytes */
+ unsigned ranksperctlr; /* number of ranks per controller */
+
+ struct pci_dev *mc; /* device 16 func 1 */
+ struct pci_dev *ch0mm; /* device 21 func 0 */
+ struct pci_dev *ch1mm; /* device 22 func 0 */
+};
+
+/* map a rank/ctlr to a slot number on the mainboard */
+static int i5100_rank_to_slot(const struct mem_ctl_info *mci,
+ int ctlr, int rank)
+{
+ const struct i5100_priv *priv = mci->pvt_info;
+ int i;
+
+ for (i = 0; i < I5100_MAX_DIMM_SLOTS_PER_CTLR; i++) {
+ int j;
+ const int numrank = priv->dimm_numrank[ctlr][i];
+
+ for (j = 0; j < numrank; j++)
+ if (priv->dimm_csmap[i][j] == rank)
+ return i * 2 + ctlr;
+ }
+
+ return -1;
+}
+
+static const char *i5100_err_msg(unsigned err)
+{
+ static const char *merrs[] = {
+ "unknown", /* 0 */
+ "uncorrectable data ECC on replay", /* 1 */
+ "unknown", /* 2 */
+ "unknown", /* 3 */
+ "aliased uncorrectable demand data ECC", /* 4 */
+ "aliased uncorrectable spare-copy data ECC", /* 5 */
+ "aliased uncorrectable patrol data ECC", /* 6 */
+ "unknown", /* 7 */
+ "unknown", /* 8 */
+ "unknown", /* 9 */
+ "non-aliased uncorrectable demand data ECC", /* 10 */
+ "non-aliased uncorrectable spare-copy data ECC", /* 11 */
+ "non-aliased uncorrectable patrol data ECC", /* 12 */
+ "unknown", /* 13 */
+ "correctable demand data ECC", /* 14 */
+ "correctable spare-copy data ECC", /* 15 */
+ "correctable patrol data ECC", /* 16 */
+ "unknown", /* 17 */
+ "SPD protocol error", /* 18 */
+ "unknown", /* 19 */
+ "spare copy initiated", /* 20 */
+ "spare copy completed", /* 21 */
+ };
+ unsigned i;
+
+ for (i = 0; i < ARRAY_SIZE(merrs); i++)
+ if (1 << i & err)
+ return merrs[i];
+
+ return "none";
+}
+
+/* convert csrow index into a rank (per controller -- 0..5) */
+static int i5100_csrow_to_rank(const struct mem_ctl_info *mci, int csrow)
+{
+ const struct i5100_priv *priv = mci->pvt_info;
+
+ return csrow % priv->ranksperctlr;
+}
+
+/* convert csrow index into a controller (0..1) */
+static int i5100_csrow_to_cntlr(const struct mem_ctl_info *mci, int csrow)
+{
+ const struct i5100_priv *priv = mci->pvt_info;
+
+ return csrow / priv->ranksperctlr;
+}
+
+static unsigned i5100_rank_to_csrow(const struct mem_ctl_info *mci,
+ int ctlr, int rank)
+{
+ const struct i5100_priv *priv = mci->pvt_info;
+
+ return ctlr * priv->ranksperctlr + rank;
+}
+
+static void i5100_handle_ce(struct mem_ctl_info *mci,
+ int ctlr,
+ unsigned bank,
+ unsigned rank,
+ unsigned long syndrome,
+ unsigned cas,
+ unsigned ras,
+ const char *msg)
+{
+ const int csrow = i5100_rank_to_csrow(mci, ctlr, rank);
+
+ printk(KERN_ERR
+ "CE ctlr %d, bank %u, rank %u, syndrome 0x%lx, "
+ "cas %u, ras %u, csrow %u, label \"%s\": %s\n",
+ ctlr, bank, rank, syndrome, cas, ras,
+ csrow, mci->csrows[csrow].channels[0].label, msg);
+
+ mci->ce_count++;
+ mci->csrows[csrow].ce_count++;
+ mci->csrows[csrow].channels[0].ce_count++;
+}
+
+static void i5100_handle_ue(struct mem_ctl_info *mci,
+ int ctlr,
+ unsigned bank,
+ unsigned rank,
+ unsigned long syndrome,
+ unsigned cas,
+ unsigned ras,
+ const char *msg)
+{
+ const int csrow = i5100_rank_to_csrow(mci, ctlr, rank);
+
+ printk(KERN_ERR
+ "UE ctlr %d, bank %u, rank %u, syndrome 0x%lx, "
+ "cas %u, ras %u, csrow %u, label \"%s\": %s\n",
+ ctlr, bank, rank, syndrome, cas, ras,
+ csrow, mci->csrows[csrow].channels[0].label, msg);
+
+ mci->ue_count++;
+ mci->csrows[csrow].ue_count++;
+}
+
+static void i5100_read_log(struct mem_ctl_info *mci, int ctlr,
+ u32 ferr, u32 nerr)
+{
+ struct i5100_priv *priv = mci->pvt_info;
+ struct pci_dev *pdev = (ctlr) ? priv->ch1mm : priv->ch0mm;
+ u32 dw;
+ u32 dw2;
+ unsigned syndrome = 0;
+ unsigned ecc_loc = 0;
+ unsigned merr;
+ unsigned bank;
+ unsigned rank;
+ unsigned cas;
+ unsigned ras;
+
+ pci_read_config_dword(pdev, I5100_VALIDLOG, &dw);
+
+ if (i5100_validlog_redmemvalid(dw)) {
+ pci_read_config_dword(pdev, I5100_REDMEMA, &dw2);
+ syndrome = dw2;
+ pci_read_config_dword(pdev, I5100_REDMEMB, &dw2);
+ ecc_loc = i5100_redmemb_ecc_locator(dw2);
+ }
+
+ if (i5100_validlog_recmemvalid(dw)) {
+ const char *msg;
+
+ pci_read_config_dword(pdev, I5100_RECMEMA, &dw2);
+ merr = i5100_recmema_merr(dw2);
+ bank = i5100_recmema_bank(dw2);
+ rank = i5100_recmema_rank(dw2);
+
+ pci_read_config_dword(pdev, I5100_RECMEMB, &dw2);
+ cas = i5100_recmemb_cas(dw2);
+ ras = i5100_recmemb_ras(dw2);
+
+ /* FIXME: not really sure if this is what merr is...
+ */
+ if (!merr)
+ msg = i5100_err_msg(ferr);
+ else
+ msg = i5100_err_msg(nerr);
+
+ i5100_handle_ce(mci, ctlr, bank, rank, syndrome, cas, ras, msg);
+ }
+
+ if (i5100_validlog_nrecmemvalid(dw)) {
+ const char *msg;
+
+ pci_read_config_dword(pdev, I5100_NRECMEMA, &dw2);
+ merr = i5100_nrecmema_merr(dw2);
+ bank = i5100_nrecmema_bank(dw2);
+ rank = i5100_nrecmema_rank(dw2);
+
+ pci_read_config_dword(pdev, I5100_NRECMEMB, &dw2);
+ cas = i5100_nrecmemb_cas(dw2);
+ ras = i5100_nrecmemb_ras(dw2);
+
+ /* FIXME: not really sure if this is what merr is...
+ */
+ if (!merr)
+ msg = i5100_err_msg(ferr);
+ else
+ msg = i5100_err_msg(nerr);
+
+ i5100_handle_ue(mci, ctlr, bank, rank, syndrome, cas, ras, msg);
+ }
+
+ pci_write_config_dword(pdev, I5100_VALIDLOG, dw);
+}
+
+static void i5100_check_error(struct mem_ctl_info *mci)
+{
+ struct i5100_priv *priv = mci->pvt_info;
+ u32 dw;
+
+
+ pci_read_config_dword(priv->mc, I5100_FERR_NF_MEM, &dw);
+ if (i5100_ferr_nf_mem_any(dw)) {
+ u32 dw2;
+
+ pci_read_config_dword(priv->mc, I5100_NERR_NF_MEM, &dw2);
+ if (dw2)
+ pci_write_config_dword(priv->mc, I5100_NERR_NF_MEM,
+ dw2);
+ pci_write_config_dword(priv->mc, I5100_FERR_NF_MEM, dw);
+
+ i5100_read_log(mci, i5100_ferr_nf_mem_chan_indx(dw),
+ i5100_ferr_nf_mem_any(dw),
+ i5100_nerr_nf_mem_any(dw2));
+ }
+}
+
+static struct pci_dev *pci_get_device_func(unsigned vendor,
+ unsigned device,
+ unsigned func)
+{
+ struct pci_dev *ret = NULL;
+
+ while (1) {
+ ret = pci_get_device(vendor, device, ret);
+
+ if (!ret)
+ break;
+
+ if (PCI_FUNC(ret->devfn) == func)
+ break;
+ }
+
+ return ret;
+}
+
+static unsigned long __devinit i5100_npages(struct mem_ctl_info *mci,
+ int csrow)
+{
+ struct i5100_priv *priv = mci->pvt_info;
+ const unsigned ctlr_rank = i5100_csrow_to_rank(mci, csrow);
+ const unsigned ctlr = i5100_csrow_to_cntlr(mci, csrow);
+ unsigned addr_lines;
+
+ /* dimm present? */
+ if (!priv->mtr[ctlr][ctlr_rank].present)
+ return 0ULL;
+
+ addr_lines =
+ I5100_DIMM_ADDR_LINES +
+ priv->mtr[ctlr][ctlr_rank].numcol +
+ priv->mtr[ctlr][ctlr_rank].numrow +
+ priv->mtr[ctlr][ctlr_rank].numbank;
+
+ return (unsigned long)
+ ((unsigned long long) (1ULL << addr_lines) / PAGE_SIZE);
+}
+
+static void __devinit i5100_init_mtr(struct mem_ctl_info *mci)
+{
+ struct i5100_priv *priv = mci->pvt_info;
+ struct pci_dev *mms[2] = { priv->ch0mm, priv->ch1mm };
+ int i;
+
+ for (i = 0; i < I5100_MAX_CTLRS; i++) {
+ int j;
+ struct pci_dev *pdev = mms[i];
+
+ for (j = 0; j < I5100_MAX_RANKS_PER_CTLR; j++) {
+ const unsigned addr =
+ (j < 4) ? I5100_MTR_0 + j * 2 :
+ I5100_MTR_4 + (j - 4) * 2;
+ u16 w;
+
+ pci_read_config_word(pdev, addr, &w);
+
+ priv->mtr[i][j].present = i5100_mtr_present(w);
+ priv->mtr[i][j].ethrottle = i5100_mtr_ethrottle(w);
+ priv->mtr[i][j].width = 4 + 4 * i5100_mtr_width(w);
+ priv->mtr[i][j].numbank = 2 + i5100_mtr_numbank(w);
+ priv->mtr[i][j].numrow = 13 + i5100_mtr_numrow(w);
+ priv->mtr[i][j].numcol = 10 + i5100_mtr_numcol(w);
+ }
+ }
+}
+
+/*
+ * FIXME: make this into a real i2c adapter (so that dimm-decode
+ * will work)?
+ */
+static int i5100_read_spd_byte(const struct mem_ctl_info *mci,
+ u8 ch, u8 slot, u8 addr, u8 *byte)
+{
+ struct i5100_priv *priv = mci->pvt_info;
+ u16 w;
+ unsigned long et;
+
+ pci_read_config_word(priv->mc, I5100_SPDDATA, &w);
+ if (i5100_spddata_busy(w))
+ return -1;
+
+ pci_write_config_dword(priv->mc, I5100_SPDCMD,
+ i5100_spdcmd_create(0xa, 1, ch * 4 + slot, addr,
+ 0, 0));
+
+ /* wait up to 100ms */
+ et = jiffies + HZ / 10;
+ udelay(100);
+ while (1) {
+ pci_read_config_word(priv->mc, I5100_SPDDATA, &w);
+ if (!i5100_spddata_busy(w))
+ break;
+ udelay(100);
+ }
+
+ if (!i5100_spddata_rdo(w) || i5100_spddata_sbe(w))
+ return -1;
+
+ *byte = i5100_spddata_data(w);
+
+ return 0;
+}
+
+/*
+ * fill dimm chip select map
+ *
+ * FIXME:
+ * o only valid for 4 ranks per controller
+ * o not the only way to may chip selects to dimm slots
+ * o investigate if there is some way to obtain this map from the bios
+ */
+static void __devinit i5100_init_dimm_csmap(struct mem_ctl_info *mci)
+{
+ struct i5100_priv *priv = mci->pvt_info;
+ int i;
+
+ WARN_ON(priv->ranksperctlr != 4);
+
+ for (i = 0; i < I5100_MAX_DIMM_SLOTS_PER_CTLR; i++) {
+ int j;
+
+ for (j = 0; j < I5100_MAX_RANKS_PER_DIMM; j++)
+ priv->dimm_csmap[i][j] = -1; /* default NC */
+ }
+
+ /* only 2 chip selects per slot... */
+ priv->dimm_csmap[0][0] = 0;
+ priv->dimm_csmap[0][1] = 3;
+ priv->dimm_csmap[1][0] = 1;
+ priv->dimm_csmap[1][1] = 2;
+ priv->dimm_csmap[2][0] = 2;
+ priv->dimm_csmap[3][0] = 3;
+}
+
+static void __devinit i5100_init_dimm_layout(struct pci_dev *pdev,
+ struct mem_ctl_info *mci)
+{
+ struct i5100_priv *priv = mci->pvt_info;
+ int i;
+
+ for (i = 0; i < I5100_MAX_CTLRS; i++) {
+ int j;
+
+ for (j = 0; j < I5100_MAX_DIMM_SLOTS_PER_CTLR; j++) {
+ u8 rank;
+
+ if (i5100_read_spd_byte(mci, i, j, 5, &rank) < 0)
+ priv->dimm_numrank[i][j] = 0;
+ else
+ priv->dimm_numrank[i][j] = (rank & 3) + 1;
+ }
+ }
+
+ i5100_init_dimm_csmap(mci);
+}
+
+static void __devinit i5100_init_interleaving(struct pci_dev *pdev,
+ struct mem_ctl_info *mci)
+{
+ u16 w;
+ u32 dw;
+ struct i5100_priv *priv = mci->pvt_info;
+ struct pci_dev *mms[2] = { priv->ch0mm, priv->ch1mm };
+ int i;
+
+ pci_read_config_word(pdev, I5100_TOLM, &w);
+ priv->tolm = (u64) i5100_tolm_tolm(w) * 256 * 1024 * 1024;
+
+ pci_read_config_word(pdev, I5100_MIR0, &w);
+ priv->mir[0].limit = (u64) i5100_mir_limit(w) << 28;
+ priv->mir[0].way[1] = i5100_mir_way1(w);
+ priv->mir[0].way[0] = i5100_mir_way0(w);
+
+ pci_read_config_word(pdev, I5100_MIR1, &w);
+ priv->mir[1].limit = (u64) i5100_mir_limit(w) << 28;
+ priv->mir[1].way[1] = i5100_mir_way1(w);
+ priv->mir[1].way[0] = i5100_mir_way0(w);
+
+ pci_read_config_word(pdev, I5100_AMIR_0, &w);
+ priv->amir[0] = w;
+ pci_read_config_word(pdev, I5100_AMIR_1, &w);
+ priv->amir[1] = w;
+
+ for (i = 0; i < I5100_MAX_CTLRS; i++) {
+ int j;
+
+ for (j = 0; j < 5; j++) {
+ int k;
+
+ pci_read_config_dword(mms[i], I5100_DMIR + j * 4, &dw);
+
+ priv->dmir[i][j].limit =
+ (u64) i5100_dmir_limit(dw) << 28;
+ for (k = 0; k < I5100_MAX_RANKS_PER_DIMM; k++)
+ priv->dmir[i][j].rank[k] =
+ i5100_dmir_rank(dw, k);
+ }
+ }
+
+ i5100_init_mtr(mci);
+}
+
+static void __devinit i5100_init_csrows(struct mem_ctl_info *mci)
+{
+ int i;
+ unsigned long total_pages = 0UL;
+ struct i5100_priv *priv = mci->pvt_info;
+
+ for (i = 0; i < mci->nr_csrows; i++) {
+ const unsigned long npages = i5100_npages(mci, i);
+ const unsigned cntlr = i5100_csrow_to_cntlr(mci, i);
+ const unsigned rank = i5100_csrow_to_rank(mci, i);
+
+ if (!npages)
+ continue;
+
+ /*
+ * FIXME: these two are totally bogus -- I don't see how to
+ * map them correctly to this structure...
+ */
+ mci->csrows[i].first_page = total_pages;
+ mci->csrows[i].last_page = total_pages + npages - 1;
+ mci->csrows[i].page_mask = 0UL;
+
+ mci->csrows[i].nr_pages = npages;
+ mci->csrows[i].grain = 32;
+ mci->csrows[i].csrow_idx = i;
+ mci->csrows[i].dtype =
+ (priv->mtr[cntlr][rank].width == 4) ? DEV_X4 : DEV_X8;
+ mci->csrows[i].ue_count = 0;
+ mci->csrows[i].ce_count = 0;
+ mci->csrows[i].mtype = MEM_RDDR2;
+ mci->csrows[i].edac_mode = EDAC_SECDED;
+ mci->csrows[i].mci = mci;
+ mci->csrows[i].nr_channels = 1;
+ mci->csrows[i].channels[0].chan_idx = 0;
+ mci->csrows[i].channels[0].ce_count = 0;
+ mci->csrows[i].channels[0].csrow = mci->csrows + i;
+ snprintf(mci->csrows[i].channels[0].label,
+ sizeof(mci->csrows[i].channels[0].label),
+ "DIMM%u", i5100_rank_to_slot(mci, cntlr, rank));
+
+ total_pages += npages;
+ }
+}
+
+static int __devinit i5100_init_one(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ int rc;
+ struct mem_ctl_info *mci;
+ struct i5100_priv *priv;
+ struct pci_dev *ch0mm, *ch1mm;
+ int ret = 0;
+ u32 dw;
+ int ranksperch;
+
+ if (PCI_FUNC(pdev->devfn) != 1)
+ return -ENODEV;
+
+ rc = pci_enable_device(pdev);
+ if (rc < 0) {
+ ret = rc;
+ goto bail;
+ }
+
+ /* ECC enabled? */
+ pci_read_config_dword(pdev, I5100_MC, &dw);
+ if (!i5100_mc_errdeten(dw)) {
+ printk(KERN_INFO "i5100_edac: ECC not enabled.\n");
+ ret = -ENODEV;
+ goto bail_pdev;
+ }
+
+ /* figure out how many ranks, from strapped state of 48GB_Mode input */
+ pci_read_config_dword(pdev, I5100_MS, &dw);
+ ranksperch = !!(dw & (1 << 8)) * 2 + 4;
+
+ if (ranksperch != 4) {
+ /* FIXME: get 6 ranks / controller to work - need hw... */
+ printk(KERN_INFO "i5100_edac: unsupported configuration.\n");
+ ret = -ENODEV;
+ goto bail_pdev;
+ }
+
+ /* enable error reporting... */
+ pci_read_config_dword(pdev, I5100_EMASK_MEM, &dw);
+ dw &= ~I5100_FERR_NF_MEM_ANY_MASK;
+ pci_write_config_dword(pdev, I5100_EMASK_MEM, dw);
+
+ /* device 21, func 0, Channel 0 Memory Map, Error Flag/Mask, etc... */
+ ch0mm = pci_get_device_func(PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_5100_21, 0);
+ if (!ch0mm) {
+ ret = -ENODEV;
+ goto bail_pdev;
+ }
+
+ rc = pci_enable_device(ch0mm);
+ if (rc < 0) {
+ ret = rc;
+ goto bail_ch0;
+ }
+
+ /* device 22, func 0, Channel 1 Memory Map, Error Flag/Mask, etc... */
+ ch1mm = pci_get_device_func(PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_5100_22, 0);
+ if (!ch1mm) {
+ ret = -ENODEV;
+ goto bail_disable_ch0;
+ }
+
+ rc = pci_enable_device(ch1mm);
+ if (rc < 0) {
+ ret = rc;
+ goto bail_ch1;
+ }
+
+ mci = edac_mc_alloc(sizeof(*priv), ranksperch * 2, 1, 0);
+ if (!mci) {
+ ret = -ENOMEM;
+ goto bail_disable_ch1;
+ }
+
+ mci->dev = &pdev->dev;
+
+ priv = mci->pvt_info;
+ priv->ranksperctlr = ranksperch;
+ priv->mc = pdev;
+ priv->ch0mm = ch0mm;
+ priv->ch1mm = ch1mm;
+
+ i5100_init_dimm_layout(pdev, mci);
+ i5100_init_interleaving(pdev, mci);
+
+ mci->mtype_cap = MEM_FLAG_FB_DDR2;
+ mci->edac_ctl_cap = EDAC_FLAG_SECDED;
+ mci->edac_cap = EDAC_FLAG_SECDED;
+ mci->mod_name = "i5100_edac.c";
+ mci->mod_ver = "not versioned";
+ mci->ctl_name = "i5100";
+ mci->dev_name = pci_name(pdev);
+ mci->ctl_page_to_phys = NULL;
+
+ mci->edac_check = i5100_check_error;
+
+ i5100_init_csrows(mci);
+
+ /* this strange construction seems to be in every driver, dunno why */
+ switch (edac_op_state) {
+ case EDAC_OPSTATE_POLL:
+ case EDAC_OPSTATE_NMI:
+ break;
+ default:
+ edac_op_state = EDAC_OPSTATE_POLL;
+ break;
+ }
+
+ if (edac_mc_add_mc(mci)) {
+ ret = -ENODEV;
+ goto bail_mc;
+ }
+
+ return ret;
+
+bail_mc:
+ edac_mc_free(mci);
+
+bail_disable_ch1:
+ pci_disable_device(ch1mm);
+
+bail_ch1:
+ pci_dev_put(ch1mm);
+
+bail_disable_ch0:
+ pci_disable_device(ch0mm);
+
+bail_ch0:
+ pci_dev_put(ch0mm);
+
+bail_pdev:
+ pci_disable_device(pdev);
+
+bail:
+ return ret;
+}
+
+static void __devexit i5100_remove_one(struct pci_dev *pdev)
+{
+ struct mem_ctl_info *mci;
+ struct i5100_priv *priv;
+
+ mci = edac_mc_del_mc(&pdev->dev);
+
+ if (!mci)
+ return;
+
+ priv = mci->pvt_info;
+ pci_disable_device(pdev);
+ pci_disable_device(priv->ch0mm);
+ pci_disable_device(priv->ch1mm);
+ pci_dev_put(priv->ch0mm);
+ pci_dev_put(priv->ch1mm);
+
+ edac_mc_free(mci);
+}
+
+static const struct pci_device_id i5100_pci_tbl[] __devinitdata = {
+ /* Device 16, Function 0, Channel 0 Memory Map, Error Flag/Mask, ... */
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5100_16) },
+ { 0, }
+};
+MODULE_DEVICE_TABLE(pci, i5100_pci_tbl);
+
+static struct pci_driver i5100_driver = {
+ .name = KBUILD_BASENAME,
+ .probe = i5100_init_one,
+ .remove = __devexit_p(i5100_remove_one),
+ .id_table = i5100_pci_tbl,
+};
+
+static int __init i5100_init(void)
+{
+ int pci_rc;
+
+ pci_rc = pci_register_driver(&i5100_driver);
+
+ return (pci_rc < 0) ? pci_rc : 0;
+}
+
+static void __exit i5100_exit(void)
+{
+ pci_unregister_driver(&i5100_driver);
+}
+
+module_init(i5100_init);
+module_exit(i5100_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR
+ ("Arthur Jones <ajones@riverbed.com>");
+MODULE_DESCRIPTION("MC Driver for Intel I5100 memory controllers");
diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c
index d49361bf..2265d9c 100644
--- a/drivers/edac/mpc85xx_edac.c
+++ b/drivers/edac/mpc85xx_edac.c
@@ -195,14 +195,15 @@
return IRQ_HANDLED;
}
-static int __devinit mpc85xx_pci_err_probe(struct platform_device *pdev)
+static int __devinit mpc85xx_pci_err_probe(struct of_device *op,
+ const struct of_device_id *match)
{
struct edac_pci_ctl_info *pci;
struct mpc85xx_pci_pdata *pdata;
- struct resource *r;
+ struct resource r;
int res = 0;
- if (!devres_open_group(&pdev->dev, mpc85xx_pci_err_probe, GFP_KERNEL))
+ if (!devres_open_group(&op->dev, mpc85xx_pci_err_probe, GFP_KERNEL))
return -ENOMEM;
pci = edac_pci_alloc_ctl_info(sizeof(*pdata), "mpc85xx_pci_err");
@@ -212,34 +213,37 @@
pdata = pci->pvt_info;
pdata->name = "mpc85xx_pci_err";
pdata->irq = NO_IRQ;
- platform_set_drvdata(pdev, pci);
- pci->dev = &pdev->dev;
+ dev_set_drvdata(&op->dev, pci);
+ pci->dev = &op->dev;
pci->mod_name = EDAC_MOD_STR;
pci->ctl_name = pdata->name;
- pci->dev_name = pdev->dev.bus_id;
+ pci->dev_name = op->dev.bus_id;
if (edac_op_state == EDAC_OPSTATE_POLL)
pci->edac_check = mpc85xx_pci_check;
pdata->edac_idx = edac_pci_idx++;
- r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!r) {
+ res = of_address_to_resource(op->node, 0, &r);
+ if (res) {
printk(KERN_ERR "%s: Unable to get resource for "
"PCI err regs\n", __func__);
goto err;
}
- if (!devm_request_mem_region(&pdev->dev, r->start,
- r->end - r->start + 1, pdata->name)) {
+ /* we only need the error registers */
+ r.start += 0xe00;
+
+ if (!devm_request_mem_region(&op->dev, r.start,
+ r.end - r.start + 1, pdata->name)) {
printk(KERN_ERR "%s: Error while requesting mem region\n",
__func__);
res = -EBUSY;
goto err;
}
- pdata->pci_vbase = devm_ioremap(&pdev->dev, r->start,
- r->end - r->start + 1);
+ pdata->pci_vbase = devm_ioremap(&op->dev, r.start,
+ r.end - r.start + 1);
if (!pdata->pci_vbase) {
printk(KERN_ERR "%s: Unable to setup PCI err regs\n", __func__);
res = -ENOMEM;
@@ -266,14 +270,15 @@
}
if (edac_op_state == EDAC_OPSTATE_INT) {
- pdata->irq = platform_get_irq(pdev, 0);
- res = devm_request_irq(&pdev->dev, pdata->irq,
+ pdata->irq = irq_of_parse_and_map(op->node, 0);
+ res = devm_request_irq(&op->dev, pdata->irq,
mpc85xx_pci_isr, IRQF_DISABLED,
"[EDAC] PCI err", pci);
if (res < 0) {
printk(KERN_ERR
"%s: Unable to requiest irq %d for "
"MPC85xx PCI err\n", __func__, pdata->irq);
+ irq_dispose_mapping(pdata->irq);
res = -ENODEV;
goto err2;
}
@@ -282,23 +287,23 @@
pdata->irq);
}
- devres_remove_group(&pdev->dev, mpc85xx_pci_err_probe);
+ devres_remove_group(&op->dev, mpc85xx_pci_err_probe);
debugf3("%s(): success\n", __func__);
printk(KERN_INFO EDAC_MOD_STR " PCI err registered\n");
return 0;
err2:
- edac_pci_del_device(&pdev->dev);
+ edac_pci_del_device(&op->dev);
err:
edac_pci_free_ctl_info(pci);
- devres_release_group(&pdev->dev, mpc85xx_pci_err_probe);
+ devres_release_group(&op->dev, mpc85xx_pci_err_probe);
return res;
}
-static int mpc85xx_pci_err_remove(struct platform_device *pdev)
+static int mpc85xx_pci_err_remove(struct of_device *op)
{
- struct edac_pci_ctl_info *pci = platform_get_drvdata(pdev);
+ struct edac_pci_ctl_info *pci = dev_get_drvdata(&op->dev);
struct mpc85xx_pci_pdata *pdata = pci->pvt_info;
debugf0("%s()\n", __func__);
@@ -318,12 +323,26 @@
return 0;
}
-static struct platform_driver mpc85xx_pci_err_driver = {
+static struct of_device_id mpc85xx_pci_err_of_match[] = {
+ {
+ .compatible = "fsl,mpc8540-pcix",
+ },
+ {
+ .compatible = "fsl,mpc8540-pci",
+ },
+ {},
+};
+
+static struct of_platform_driver mpc85xx_pci_err_driver = {
+ .owner = THIS_MODULE,
+ .name = "mpc85xx_pci_err",
+ .match_table = mpc85xx_pci_err_of_match,
.probe = mpc85xx_pci_err_probe,
.remove = __devexit_p(mpc85xx_pci_err_remove),
.driver = {
- .name = "mpc85xx_pci_err",
- }
+ .name = "mpc85xx_pci_err",
+ .owner = THIS_MODULE,
+ },
};
#endif /* CONFIG_PCI */
@@ -1002,7 +1021,7 @@
printk(KERN_WARNING EDAC_MOD_STR "L2 fails to register\n");
#ifdef CONFIG_PCI
- res = platform_driver_register(&mpc85xx_pci_err_driver);
+ res = of_register_platform_driver(&mpc85xx_pci_err_driver);
if (res)
printk(KERN_WARNING EDAC_MOD_STR "PCI fails to register\n");
#endif
@@ -1025,7 +1044,7 @@
{
mtspr(SPRN_HID1, orig_hid1);
#ifdef CONFIG_PCI
- platform_driver_unregister(&mpc85xx_pci_err_driver);
+ of_unregister_platform_driver(&mpc85xx_pci_err_driver);
#endif
of_unregister_platform_driver(&mpc85xx_l2_err_driver);
of_unregister_platform_driver(&mpc85xx_mc_err_driver);
diff --git a/drivers/edac/mv64x60_edac.c b/drivers/edac/mv64x60_edac.c
index bf071f1..083ce8d 100644
--- a/drivers/edac/mv64x60_edac.c
+++ b/drivers/edac/mv64x60_edac.c
@@ -71,6 +71,35 @@
return IRQ_HANDLED;
}
+/*
+ * Bit 0 of MV64x60_PCIx_ERR_MASK does not exist on the 64360 and because of
+ * errata FEr-#11 and FEr-##16 for the 64460, it should be 0 on that chip as
+ * well. IOW, don't set bit 0.
+ */
+
+/* Erratum FEr PCI-#16: clear bit 0 of PCI SERRn Mask reg. */
+static int __init mv64x60_pci_fixup(struct platform_device *pdev)
+{
+ struct resource *r;
+ void __iomem *pci_serr;
+
+ r = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ if (!r) {
+ printk(KERN_ERR "%s: Unable to get resource for "
+ "PCI err regs\n", __func__);
+ return -ENOENT;
+ }
+
+ pci_serr = ioremap(r->start, r->end - r->start + 1);
+ if (!pci_serr)
+ return -ENOMEM;
+
+ out_le32(pci_serr, in_le32(pci_serr) & ~0x1);
+ iounmap(pci_serr);
+
+ return 0;
+}
+
static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev)
{
struct edac_pci_ctl_info *pci;
@@ -128,6 +157,12 @@
goto err;
}
+ res = mv64x60_pci_fixup(pdev);
+ if (res < 0) {
+ printk(KERN_ERR "%s: PCI fixup failed\n", __func__);
+ goto err;
+ }
+
out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE, 0);
out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_MASK, 0);
out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_MASK,
@@ -612,7 +647,7 @@
if (!np)
return;
- reg = get_property(np, "reg", NULL);
+ reg = of_get_property(np, "reg", NULL);
pdata->total_mem = reg[1];
}
diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c
index 0b624e9..c66817e 100644
--- a/drivers/firmware/dcdbas.c
+++ b/drivers/firmware/dcdbas.c
@@ -152,20 +152,11 @@
struct bin_attribute *bin_attr,
char *buf, loff_t pos, size_t count)
{
- size_t max_read;
ssize_t ret;
mutex_lock(&smi_data_lock);
-
- if (pos >= smi_data_buf_size) {
- ret = 0;
- goto out;
- }
-
- max_read = smi_data_buf_size - pos;
- ret = min(max_read, count);
- memcpy(buf, smi_data_buf + pos, ret);
-out:
+ ret = memory_read_from_buffer(buf, count, &pos, smi_data_buf,
+ smi_data_buf_size);
mutex_unlock(&smi_data_lock);
return ret;
}
diff --git a/drivers/firmware/dell_rbu.c b/drivers/firmware/dell_rbu.c
index 7430e21..13946eb 100644
--- a/drivers/firmware/dell_rbu.c
+++ b/drivers/firmware/dell_rbu.c
@@ -507,11 +507,6 @@
static ssize_t read_rbu_mono_data(char *buffer, loff_t pos, size_t count)
{
- unsigned char *ptemp = NULL;
- size_t bytes_left = 0;
- size_t data_length = 0;
- ssize_t ret_count = 0;
-
/* check to see if we have something to return */
if ((rbu_data.image_update_buffer == NULL) ||
(rbu_data.bios_image_size == 0)) {
@@ -519,28 +514,11 @@
"bios_image_size %lu\n",
rbu_data.image_update_buffer,
rbu_data.bios_image_size);
- ret_count = -ENOMEM;
- goto read_rbu_data_exit;
+ return -ENOMEM;
}
- if (pos > rbu_data.bios_image_size) {
- ret_count = 0;
- goto read_rbu_data_exit;
- }
-
- bytes_left = rbu_data.bios_image_size - pos;
- data_length = min(bytes_left, count);
-
- ptemp = rbu_data.image_update_buffer;
- memcpy(buffer, (ptemp + pos), data_length);
-
- if ((pos + count) > rbu_data.bios_image_size)
- /* this was the last copy */
- ret_count = bytes_left;
- else
- ret_count = count;
- read_rbu_data_exit:
- return ret_count;
+ return memory_read_from_buffer(buffer, count, &pos,
+ rbu_data.image_update_buffer, rbu_data.bios_image_size);
}
static ssize_t read_rbu_data(struct kobject *kobj,
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index fced190..dbd42d6 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -2,15 +2,40 @@
# GPIO infrastructure and expanders
#
-config HAVE_GPIO_LIB
+config ARCH_WANT_OPTIONAL_GPIOLIB
bool
help
+ Select this config option from the architecture Kconfig, if
+ it is possible to use gpiolib on the architecture, but let the
+ user decide whether to actually build it or not.
+ Select this instead of ARCH_REQUIRE_GPIOLIB, if your architecture does
+ not depend on GPIOs being available, but rather let the user
+ decide whether he needs it or not.
+
+config ARCH_REQUIRE_GPIOLIB
+ bool
+ select GPIOLIB
+ help
Platforms select gpiolib if they use this infrastructure
for all their GPIOs, usually starting with ones integrated
into SOC processors.
+ Selecting this from the architecture code will cause the gpiolib
+ code to always get built in.
-menu "GPIO Support"
- depends on HAVE_GPIO_LIB
+
+
+menuconfig GPIOLIB
+ bool "GPIO Support"
+ depends on ARCH_WANT_OPTIONAL_GPIOLIB || ARCH_REQUIRE_GPIOLIB
+ select GENERIC_GPIO
+ help
+ This enables GPIO support through the generic GPIO library.
+ You only need to enable this, if you also want to enable
+ one or more of the GPIO expansion card drivers below.
+
+ If unsure, say N.
+
+if GPIOLIB
config DEBUG_GPIO
bool "Debug GPIO calls"
@@ -23,10 +48,44 @@
slower. The diagnostics help catch the type of setup errors
that are most common when setting up new platforms or boards.
+config GPIO_SYSFS
+ bool "/sys/class/gpio/... (sysfs interface)"
+ depends on SYSFS && EXPERIMENTAL
+ help
+ Say Y here to add a sysfs interface for GPIOs.
+
+ This is mostly useful to work around omissions in a system's
+ kernel support. Those are common in custom and semicustom
+ hardware assembled using standard kernels with a minimum of
+ custom patches. In those cases, userspace code may import
+ a given GPIO from the kernel, if no kernel driver requested it.
+
+ Kernel drivers may also request that a particular GPIO be
+ exported to userspace; this can be useful when debugging.
+
# put expanders in the right section, in alphabetical order
comment "I2C GPIO expanders:"
+config GPIO_MAX732X
+ tristate "MAX7319, MAX7320-7327 I2C Port Expanders"
+ depends on I2C
+ help
+ Say yes here to support the MAX7319, MAX7320-7327 series of I2C
+ Port Expanders. Each IO port on these chips has a fixed role of
+ Input (designated by 'I'), Push-Pull Output ('O'), or Open-Drain
+ Input and Output (designed by 'P'). The combinations are listed
+ below:
+
+ 8 bits: max7319 (8I), max7320 (8O), max7321 (8P),
+ max7322 (4I4O), max7323 (4P4O)
+
+ 16 bits: max7324 (8I8O), max7325 (8P8O),
+ max7326 (4I12O), max7327 (4P12O)
+
+ Board setup code must specify the model to use, and the start
+ number for these GPIOs.
+
config GPIO_PCA953X
tristate "PCA953x, PCA955x, and MAX7310 I/O ports"
depends on I2C
@@ -68,6 +127,24 @@
This driver provides an in-kernel interface to those GPIOs using
platform-neutral GPIO calls.
+comment "PCI GPIO expanders:"
+
+config GPIO_BT8XX
+ tristate "BT8XX GPIO abuser"
+ depends on PCI && VIDEO_BT848=n
+ help
+ The BT8xx frame grabber chip has 24 GPIO pins than can be abused
+ as a cheap PCI GPIO card.
+
+ This chip can be found on Miro, Hauppauge and STB TV-cards.
+
+ The card needs to be physically altered for using it as a
+ GPIO card. For more information on how to build a GPIO card
+ from a BT8xx TV card, see the documentation file at
+ Documentation/bt8xxgpio.txt
+
+ If unsure, say N.
+
comment "SPI GPIO expanders:"
config GPIO_MAX7301
@@ -83,4 +160,4 @@
SPI driver for Microchip MCP23S08 I/O expander. This provides
a GPIO interface supporting inputs and outputs.
-endmenu
+endif
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 16e796d..01b4bbd 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -2,9 +2,11 @@
ccflags-$(CONFIG_DEBUG_GPIO) += -DDEBUG
-obj-$(CONFIG_HAVE_GPIO_LIB) += gpiolib.o
+obj-$(CONFIG_GPIOLIB) += gpiolib.o
obj-$(CONFIG_GPIO_MAX7301) += max7301.o
+obj-$(CONFIG_GPIO_MAX732X) += max732x.o
obj-$(CONFIG_GPIO_MCP23S08) += mcp23s08.o
obj-$(CONFIG_GPIO_PCA953X) += pca953x.o
obj-$(CONFIG_GPIO_PCF857X) += pcf857x.o
+obj-$(CONFIG_GPIO_BT8XX) += bt8xxgpio.o
diff --git a/drivers/gpio/bt8xxgpio.c b/drivers/gpio/bt8xxgpio.c
new file mode 100644
index 0000000..7a11682
--- /dev/null
+++ b/drivers/gpio/bt8xxgpio.c
@@ -0,0 +1,348 @@
+/*
+
+ bt8xx GPIO abuser
+
+ Copyright (C) 2008 Michael Buesch <mb@bu3sch.de>
+
+ Please do _only_ contact the people listed _above_ with issues related to this driver.
+ All the other people listed below are not related to this driver. Their names
+ are only here, because this driver is derived from the bt848 driver.
+
+
+ Derived from the bt848 driver:
+
+ Copyright (C) 1996,97,98 Ralph Metzler
+ & Marcus Metzler
+ (c) 1999-2002 Gerd Knorr
+
+ some v4l2 code lines are taken from Justin's bttv2 driver which is
+ (c) 2000 Justin Schoeman
+
+ V4L1 removal from:
+ (c) 2005-2006 Nickolay V. Shmyrev
+
+ Fixes to be fully V4L2 compliant by
+ (c) 2006 Mauro Carvalho Chehab
+
+ Cropping and overscan support
+ Copyright (C) 2005, 2006 Michael H. Schimek
+ Sponsored by OPQ Systems AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+
+#include <asm/gpio.h>
+
+/* Steal the hardware definitions from the bttv driver. */
+#include "../media/video/bt8xx/bt848.h"
+
+
+#define BT8XXGPIO_NR_GPIOS 24 /* We have 24 GPIO pins */
+
+
+struct bt8xxgpio {
+ spinlock_t lock;
+
+ void __iomem *mmio;
+ struct pci_dev *pdev;
+ struct gpio_chip gpio;
+
+#ifdef CONFIG_PM
+ u32 saved_outen;
+ u32 saved_data;
+#endif
+};
+
+#define bgwrite(dat, adr) writel((dat), bg->mmio+(adr))
+#define bgread(adr) readl(bg->mmio+(adr))
+
+
+static int modparam_gpiobase = -1/* dynamic */;
+module_param_named(gpiobase, modparam_gpiobase, int, 0444);
+MODULE_PARM_DESC(gpiobase, "The GPIO number base. -1 means dynamic, which is the default.");
+
+
+static int bt8xxgpio_gpio_direction_input(struct gpio_chip *gpio, unsigned nr)
+{
+ struct bt8xxgpio *bg = container_of(gpio, struct bt8xxgpio, gpio);
+ unsigned long flags;
+ u32 outen, data;
+
+ spin_lock_irqsave(&bg->lock, flags);
+
+ data = bgread(BT848_GPIO_DATA);
+ data &= ~(1 << nr);
+ bgwrite(data, BT848_GPIO_DATA);
+
+ outen = bgread(BT848_GPIO_OUT_EN);
+ outen &= ~(1 << nr);
+ bgwrite(outen, BT848_GPIO_OUT_EN);
+
+ spin_unlock_irqrestore(&bg->lock, flags);
+
+ return 0;
+}
+
+static int bt8xxgpio_gpio_get(struct gpio_chip *gpio, unsigned nr)
+{
+ struct bt8xxgpio *bg = container_of(gpio, struct bt8xxgpio, gpio);
+ unsigned long flags;
+ u32 val;
+
+ spin_lock_irqsave(&bg->lock, flags);
+ val = bgread(BT848_GPIO_DATA);
+ spin_unlock_irqrestore(&bg->lock, flags);
+
+ return !!(val & (1 << nr));
+}
+
+static int bt8xxgpio_gpio_direction_output(struct gpio_chip *gpio,
+ unsigned nr, int val)
+{
+ struct bt8xxgpio *bg = container_of(gpio, struct bt8xxgpio, gpio);
+ unsigned long flags;
+ u32 outen, data;
+
+ spin_lock_irqsave(&bg->lock, flags);
+
+ outen = bgread(BT848_GPIO_OUT_EN);
+ outen |= (1 << nr);
+ bgwrite(outen, BT848_GPIO_OUT_EN);
+
+ data = bgread(BT848_GPIO_DATA);
+ if (val)
+ data |= (1 << nr);
+ else
+ data &= ~(1 << nr);
+ bgwrite(data, BT848_GPIO_DATA);
+
+ spin_unlock_irqrestore(&bg->lock, flags);
+
+ return 0;
+}
+
+static void bt8xxgpio_gpio_set(struct gpio_chip *gpio,
+ unsigned nr, int val)
+{
+ struct bt8xxgpio *bg = container_of(gpio, struct bt8xxgpio, gpio);
+ unsigned long flags;
+ u32 data;
+
+ spin_lock_irqsave(&bg->lock, flags);
+
+ data = bgread(BT848_GPIO_DATA);
+ if (val)
+ data |= (1 << nr);
+ else
+ data &= ~(1 << nr);
+ bgwrite(data, BT848_GPIO_DATA);
+
+ spin_unlock_irqrestore(&bg->lock, flags);
+}
+
+static void bt8xxgpio_gpio_setup(struct bt8xxgpio *bg)
+{
+ struct gpio_chip *c = &bg->gpio;
+
+ c->label = bg->pdev->dev.bus_id;
+ c->owner = THIS_MODULE;
+ c->direction_input = bt8xxgpio_gpio_direction_input;
+ c->get = bt8xxgpio_gpio_get;
+ c->direction_output = bt8xxgpio_gpio_direction_output;
+ c->set = bt8xxgpio_gpio_set;
+ c->dbg_show = NULL;
+ c->base = modparam_gpiobase;
+ c->ngpio = BT8XXGPIO_NR_GPIOS;
+ c->can_sleep = 0;
+}
+
+static int bt8xxgpio_probe(struct pci_dev *dev,
+ const struct pci_device_id *pci_id)
+{
+ struct bt8xxgpio *bg;
+ int err;
+
+ bg = kzalloc(sizeof(*bg), GFP_KERNEL);
+ if (!bg)
+ return -ENOMEM;
+
+ bg->pdev = dev;
+ spin_lock_init(&bg->lock);
+
+ err = pci_enable_device(dev);
+ if (err) {
+ printk(KERN_ERR "bt8xxgpio: Can't enable device.\n");
+ goto err_freebg;
+ }
+ if (!request_mem_region(pci_resource_start(dev, 0),
+ pci_resource_len(dev, 0),
+ "bt8xxgpio")) {
+ printk(KERN_WARNING "bt8xxgpio: Can't request iomem (0x%llx).\n",
+ (unsigned long long)pci_resource_start(dev, 0));
+ err = -EBUSY;
+ goto err_disable;
+ }
+ pci_set_master(dev);
+ pci_set_drvdata(dev, bg);
+
+ bg->mmio = ioremap(pci_resource_start(dev, 0), 0x1000);
+ if (!bg->mmio) {
+ printk(KERN_ERR "bt8xxgpio: ioremap() failed\n");
+ err = -EIO;
+ goto err_release_mem;
+ }
+
+ /* Disable interrupts */
+ bgwrite(0, BT848_INT_MASK);
+
+ /* gpio init */
+ bgwrite(0, BT848_GPIO_DMA_CTL);
+ bgwrite(0, BT848_GPIO_REG_INP);
+ bgwrite(0, BT848_GPIO_OUT_EN);
+
+ bt8xxgpio_gpio_setup(bg);
+ err = gpiochip_add(&bg->gpio);
+ if (err) {
+ printk(KERN_ERR "bt8xxgpio: Failed to register GPIOs\n");
+ goto err_release_mem;
+ }
+
+ printk(KERN_INFO "bt8xxgpio: Abusing BT8xx card for GPIOs %d to %d\n",
+ bg->gpio.base, bg->gpio.base + BT8XXGPIO_NR_GPIOS - 1);
+
+ return 0;
+
+err_release_mem:
+ release_mem_region(pci_resource_start(dev, 0),
+ pci_resource_len(dev, 0));
+ pci_set_drvdata(dev, NULL);
+err_disable:
+ pci_disable_device(dev);
+err_freebg:
+ kfree(bg);
+
+ return err;
+}
+
+static void bt8xxgpio_remove(struct pci_dev *pdev)
+{
+ struct bt8xxgpio *bg = pci_get_drvdata(pdev);
+
+ gpiochip_remove(&bg->gpio);
+
+ bgwrite(0, BT848_INT_MASK);
+ bgwrite(~0x0, BT848_INT_STAT);
+ bgwrite(0x0, BT848_GPIO_OUT_EN);
+
+ iounmap(bg->mmio);
+ release_mem_region(pci_resource_start(pdev, 0),
+ pci_resource_len(pdev, 0));
+ pci_disable_device(pdev);
+
+ pci_set_drvdata(pdev, NULL);
+ kfree(bg);
+}
+
+#ifdef CONFIG_PM
+static int bt8xxgpio_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+ struct bt8xxgpio *bg = pci_get_drvdata(pdev);
+ unsigned long flags;
+
+ spin_lock_irqsave(&bg->lock, flags);
+
+ bg->saved_outen = bgread(BT848_GPIO_OUT_EN);
+ bg->saved_data = bgread(BT848_GPIO_DATA);
+
+ bgwrite(0, BT848_INT_MASK);
+ bgwrite(~0x0, BT848_INT_STAT);
+ bgwrite(0x0, BT848_GPIO_OUT_EN);
+
+ spin_unlock_irqrestore(&bg->lock, flags);
+
+ pci_save_state(pdev);
+ pci_disable_device(pdev);
+ pci_set_power_state(pdev, pci_choose_state(pdev, state));
+
+ return 0;
+}
+
+static int bt8xxgpio_resume(struct pci_dev *pdev)
+{
+ struct bt8xxgpio *bg = pci_get_drvdata(pdev);
+ unsigned long flags;
+ int err;
+
+ pci_set_power_state(pdev, 0);
+ err = pci_enable_device(pdev);
+ if (err)
+ return err;
+ pci_restore_state(pdev);
+
+ spin_lock_irqsave(&bg->lock, flags);
+
+ bgwrite(0, BT848_INT_MASK);
+ bgwrite(0, BT848_GPIO_DMA_CTL);
+ bgwrite(0, BT848_GPIO_REG_INP);
+ bgwrite(bg->saved_outen, BT848_GPIO_OUT_EN);
+ bgwrite(bg->saved_data & bg->saved_outen,
+ BT848_GPIO_DATA);
+
+ spin_unlock_irqrestore(&bg->lock, flags);
+
+ return 0;
+}
+#else
+#define bt8xxgpio_suspend NULL
+#define bt8xxgpio_resume NULL
+#endif /* CONFIG_PM */
+
+static struct pci_device_id bt8xxgpio_pci_tbl[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_BROOKTREE, PCI_DEVICE_ID_BT848) },
+ { PCI_DEVICE(PCI_VENDOR_ID_BROOKTREE, PCI_DEVICE_ID_BT849) },
+ { PCI_DEVICE(PCI_VENDOR_ID_BROOKTREE, PCI_DEVICE_ID_BT878) },
+ { PCI_DEVICE(PCI_VENDOR_ID_BROOKTREE, PCI_DEVICE_ID_BT879) },
+ { 0, },
+};
+MODULE_DEVICE_TABLE(pci, bt8xxgpio_pci_tbl);
+
+static struct pci_driver bt8xxgpio_pci_driver = {
+ .name = "bt8xxgpio",
+ .id_table = bt8xxgpio_pci_tbl,
+ .probe = bt8xxgpio_probe,
+ .remove = bt8xxgpio_remove,
+ .suspend = bt8xxgpio_suspend,
+ .resume = bt8xxgpio_resume,
+};
+
+static int bt8xxgpio_init(void)
+{
+ return pci_register_driver(&bt8xxgpio_pci_driver);
+}
+module_init(bt8xxgpio_init)
+
+static void bt8xxgpio_exit(void)
+{
+ pci_unregister_driver(&bt8xxgpio_pci_driver);
+}
+module_exit(bt8xxgpio_exit)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michael Buesch");
+MODULE_DESCRIPTION("Abuse a BT8xx framegrabber card as generic GPIO card");
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index beaf6b3..8d29405 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -2,8 +2,11 @@
#include <linux/module.h>
#include <linux/irq.h>
#include <linux/spinlock.h>
-
-#include <asm/gpio.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/gpio.h>
/* Optional implementation infrastructure for GPIO interfaces.
@@ -44,6 +47,8 @@
#define FLAG_REQUESTED 0
#define FLAG_IS_OUT 1
#define FLAG_RESERVED 2
+#define FLAG_EXPORT 3 /* protected by sysfs_lock */
+#define FLAG_SYSFS 4 /* exported via /sys/class/gpio/control */
#ifdef CONFIG_DEBUG_FS
const char *label;
@@ -151,6 +156,482 @@
return ret;
}
+#ifdef CONFIG_GPIO_SYSFS
+
+/* lock protects against unexport_gpio() being called while
+ * sysfs files are active.
+ */
+static DEFINE_MUTEX(sysfs_lock);
+
+/*
+ * /sys/class/gpio/gpioN... only for GPIOs that are exported
+ * /direction
+ * * MAY BE OMITTED if kernel won't allow direction changes
+ * * is read/write as "in" or "out"
+ * * may also be written as "high" or "low", initializing
+ * output value as specified ("out" implies "low")
+ * /value
+ * * always readable, subject to hardware behavior
+ * * may be writable, as zero/nonzero
+ *
+ * REVISIT there will likely be an attribute for configuring async
+ * notifications, e.g. to specify polling interval or IRQ trigger type
+ * that would for example trigger a poll() on the "value".
+ */
+
+static ssize_t gpio_direction_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ const struct gpio_desc *desc = dev_get_drvdata(dev);
+ ssize_t status;
+
+ mutex_lock(&sysfs_lock);
+
+ if (!test_bit(FLAG_EXPORT, &desc->flags))
+ status = -EIO;
+ else
+ status = sprintf(buf, "%s\n",
+ test_bit(FLAG_IS_OUT, &desc->flags)
+ ? "out" : "in");
+
+ mutex_unlock(&sysfs_lock);
+ return status;
+}
+
+static ssize_t gpio_direction_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t size)
+{
+ const struct gpio_desc *desc = dev_get_drvdata(dev);
+ unsigned gpio = desc - gpio_desc;
+ ssize_t status;
+
+ mutex_lock(&sysfs_lock);
+
+ if (!test_bit(FLAG_EXPORT, &desc->flags))
+ status = -EIO;
+ else if (sysfs_streq(buf, "high"))
+ status = gpio_direction_output(gpio, 1);
+ else if (sysfs_streq(buf, "out") || sysfs_streq(buf, "low"))
+ status = gpio_direction_output(gpio, 0);
+ else if (sysfs_streq(buf, "in"))
+ status = gpio_direction_input(gpio);
+ else
+ status = -EINVAL;
+
+ mutex_unlock(&sysfs_lock);
+ return status ? : size;
+}
+
+static const DEVICE_ATTR(direction, 0644,
+ gpio_direction_show, gpio_direction_store);
+
+static ssize_t gpio_value_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ const struct gpio_desc *desc = dev_get_drvdata(dev);
+ unsigned gpio = desc - gpio_desc;
+ ssize_t status;
+
+ mutex_lock(&sysfs_lock);
+
+ if (!test_bit(FLAG_EXPORT, &desc->flags))
+ status = -EIO;
+ else
+ status = sprintf(buf, "%d\n", gpio_get_value_cansleep(gpio));
+
+ mutex_unlock(&sysfs_lock);
+ return status;
+}
+
+static ssize_t gpio_value_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t size)
+{
+ const struct gpio_desc *desc = dev_get_drvdata(dev);
+ unsigned gpio = desc - gpio_desc;
+ ssize_t status;
+
+ mutex_lock(&sysfs_lock);
+
+ if (!test_bit(FLAG_EXPORT, &desc->flags))
+ status = -EIO;
+ else if (!test_bit(FLAG_IS_OUT, &desc->flags))
+ status = -EPERM;
+ else {
+ long value;
+
+ status = strict_strtol(buf, 0, &value);
+ if (status == 0) {
+ gpio_set_value_cansleep(gpio, value != 0);
+ status = size;
+ }
+ }
+
+ mutex_unlock(&sysfs_lock);
+ return status;
+}
+
+static /*const*/ DEVICE_ATTR(value, 0644,
+ gpio_value_show, gpio_value_store);
+
+static const struct attribute *gpio_attrs[] = {
+ &dev_attr_direction.attr,
+ &dev_attr_value.attr,
+ NULL,
+};
+
+static const struct attribute_group gpio_attr_group = {
+ .attrs = (struct attribute **) gpio_attrs,
+};
+
+/*
+ * /sys/class/gpio/gpiochipN/
+ * /base ... matching gpio_chip.base (N)
+ * /label ... matching gpio_chip.label
+ * /ngpio ... matching gpio_chip.ngpio
+ */
+
+static ssize_t chip_base_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ const struct gpio_chip *chip = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%d\n", chip->base);
+}
+static DEVICE_ATTR(base, 0444, chip_base_show, NULL);
+
+static ssize_t chip_label_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ const struct gpio_chip *chip = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%s\n", chip->label ? : "");
+}
+static DEVICE_ATTR(label, 0444, chip_label_show, NULL);
+
+static ssize_t chip_ngpio_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ const struct gpio_chip *chip = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%u\n", chip->ngpio);
+}
+static DEVICE_ATTR(ngpio, 0444, chip_ngpio_show, NULL);
+
+static const struct attribute *gpiochip_attrs[] = {
+ &dev_attr_base.attr,
+ &dev_attr_label.attr,
+ &dev_attr_ngpio.attr,
+ NULL,
+};
+
+static const struct attribute_group gpiochip_attr_group = {
+ .attrs = (struct attribute **) gpiochip_attrs,
+};
+
+/*
+ * /sys/class/gpio/export ... write-only
+ * integer N ... number of GPIO to export (full access)
+ * /sys/class/gpio/unexport ... write-only
+ * integer N ... number of GPIO to unexport
+ */
+static ssize_t export_store(struct class *class, const char *buf, size_t len)
+{
+ long gpio;
+ int status;
+
+ status = strict_strtol(buf, 0, &gpio);
+ if (status < 0)
+ goto done;
+
+ /* No extra locking here; FLAG_SYSFS just signifies that the
+ * request and export were done by on behalf of userspace, so
+ * they may be undone on its behalf too.
+ */
+
+ status = gpio_request(gpio, "sysfs");
+ if (status < 0)
+ goto done;
+
+ status = gpio_export(gpio, true);
+ if (status < 0)
+ gpio_free(gpio);
+ else
+ set_bit(FLAG_SYSFS, &gpio_desc[gpio].flags);
+
+done:
+ if (status)
+ pr_debug("%s: status %d\n", __func__, status);
+ return status ? : len;
+}
+
+static ssize_t unexport_store(struct class *class, const char *buf, size_t len)
+{
+ long gpio;
+ int status;
+
+ status = strict_strtol(buf, 0, &gpio);
+ if (status < 0)
+ goto done;
+
+ status = -EINVAL;
+
+ /* reject bogus commands (gpio_unexport ignores them) */
+ if (!gpio_is_valid(gpio))
+ goto done;
+
+ /* No extra locking here; FLAG_SYSFS just signifies that the
+ * request and export were done by on behalf of userspace, so
+ * they may be undone on its behalf too.
+ */
+ if (test_and_clear_bit(FLAG_SYSFS, &gpio_desc[gpio].flags)) {
+ status = 0;
+ gpio_free(gpio);
+ }
+done:
+ if (status)
+ pr_debug("%s: status %d\n", __func__, status);
+ return status ? : len;
+}
+
+static struct class_attribute gpio_class_attrs[] = {
+ __ATTR(export, 0200, NULL, export_store),
+ __ATTR(unexport, 0200, NULL, unexport_store),
+ __ATTR_NULL,
+};
+
+static struct class gpio_class = {
+ .name = "gpio",
+ .owner = THIS_MODULE,
+
+ .class_attrs = gpio_class_attrs,
+};
+
+
+/**
+ * gpio_export - export a GPIO through sysfs
+ * @gpio: gpio to make available, already requested
+ * @direction_may_change: true if userspace may change gpio direction
+ * Context: arch_initcall or later
+ *
+ * When drivers want to make a GPIO accessible to userspace after they
+ * have requested it -- perhaps while debugging, or as part of their
+ * public interface -- they may use this routine. If the GPIO can
+ * change direction (some can't) and the caller allows it, userspace
+ * will see "direction" sysfs attribute which may be used to change
+ * the gpio's direction. A "value" attribute will always be provided.
+ *
+ * Returns zero on success, else an error.
+ */
+int gpio_export(unsigned gpio, bool direction_may_change)
+{
+ unsigned long flags;
+ struct gpio_desc *desc;
+ int status = -EINVAL;
+
+ /* can't export until sysfs is available ... */
+ if (!gpio_class.p) {
+ pr_debug("%s: called too early!\n", __func__);
+ return -ENOENT;
+ }
+
+ if (!gpio_is_valid(gpio))
+ goto done;
+
+ mutex_lock(&sysfs_lock);
+
+ spin_lock_irqsave(&gpio_lock, flags);
+ desc = &gpio_desc[gpio];
+ if (test_bit(FLAG_REQUESTED, &desc->flags)
+ && !test_bit(FLAG_EXPORT, &desc->flags)) {
+ status = 0;
+ if (!desc->chip->direction_input
+ || !desc->chip->direction_output)
+ direction_may_change = false;
+ }
+ spin_unlock_irqrestore(&gpio_lock, flags);
+
+ if (status == 0) {
+ struct device *dev;
+
+ dev = device_create(&gpio_class, desc->chip->dev, MKDEV(0, 0),
+ desc, "gpio%d", gpio);
+ if (dev) {
+ if (direction_may_change)
+ status = sysfs_create_group(&dev->kobj,
+ &gpio_attr_group);
+ else
+ status = device_create_file(dev,
+ &dev_attr_value);
+ if (status != 0)
+ device_unregister(dev);
+ } else
+ status = -ENODEV;
+ if (status == 0)
+ set_bit(FLAG_EXPORT, &desc->flags);
+ }
+
+ mutex_unlock(&sysfs_lock);
+
+done:
+ if (status)
+ pr_debug("%s: gpio%d status %d\n", __func__, gpio, status);
+
+ return status;
+}
+EXPORT_SYMBOL_GPL(gpio_export);
+
+static int match_export(struct device *dev, void *data)
+{
+ return dev_get_drvdata(dev) == data;
+}
+
+/**
+ * gpio_unexport - reverse effect of gpio_export()
+ * @gpio: gpio to make unavailable
+ *
+ * This is implicit on gpio_free().
+ */
+void gpio_unexport(unsigned gpio)
+{
+ struct gpio_desc *desc;
+ int status = -EINVAL;
+
+ if (!gpio_is_valid(gpio))
+ goto done;
+
+ mutex_lock(&sysfs_lock);
+
+ desc = &gpio_desc[gpio];
+ if (test_bit(FLAG_EXPORT, &desc->flags)) {
+ struct device *dev = NULL;
+
+ dev = class_find_device(&gpio_class, NULL, desc, match_export);
+ if (dev) {
+ clear_bit(FLAG_EXPORT, &desc->flags);
+ put_device(dev);
+ device_unregister(dev);
+ status = 0;
+ } else
+ status = -ENODEV;
+ }
+
+ mutex_unlock(&sysfs_lock);
+done:
+ if (status)
+ pr_debug("%s: gpio%d status %d\n", __func__, gpio, status);
+}
+EXPORT_SYMBOL_GPL(gpio_unexport);
+
+static int gpiochip_export(struct gpio_chip *chip)
+{
+ int status;
+ struct device *dev;
+
+ /* Many systems register gpio chips for SOC support very early,
+ * before driver model support is available. In those cases we
+ * export this later, in gpiolib_sysfs_init() ... here we just
+ * verify that _some_ field of gpio_class got initialized.
+ */
+ if (!gpio_class.p)
+ return 0;
+
+ /* use chip->base for the ID; it's already known to be unique */
+ mutex_lock(&sysfs_lock);
+ dev = device_create(&gpio_class, chip->dev, MKDEV(0, 0), chip,
+ "gpiochip%d", chip->base);
+ if (dev) {
+ status = sysfs_create_group(&dev->kobj,
+ &gpiochip_attr_group);
+ } else
+ status = -ENODEV;
+ chip->exported = (status == 0);
+ mutex_unlock(&sysfs_lock);
+
+ if (status) {
+ unsigned long flags;
+ unsigned gpio;
+
+ spin_lock_irqsave(&gpio_lock, flags);
+ gpio = chip->base;
+ while (gpio_desc[gpio].chip == chip)
+ gpio_desc[gpio++].chip = NULL;
+ spin_unlock_irqrestore(&gpio_lock, flags);
+
+ pr_debug("%s: chip %s status %d\n", __func__,
+ chip->label, status);
+ }
+
+ return status;
+}
+
+static void gpiochip_unexport(struct gpio_chip *chip)
+{
+ int status;
+ struct device *dev;
+
+ mutex_lock(&sysfs_lock);
+ dev = class_find_device(&gpio_class, NULL, chip, match_export);
+ if (dev) {
+ put_device(dev);
+ device_unregister(dev);
+ chip->exported = 0;
+ status = 0;
+ } else
+ status = -ENODEV;
+ mutex_unlock(&sysfs_lock);
+
+ if (status)
+ pr_debug("%s: chip %s status %d\n", __func__,
+ chip->label, status);
+}
+
+static int __init gpiolib_sysfs_init(void)
+{
+ int status;
+ unsigned long flags;
+ unsigned gpio;
+
+ status = class_register(&gpio_class);
+ if (status < 0)
+ return status;
+
+ /* Scan and register the gpio_chips which registered very
+ * early (e.g. before the class_register above was called).
+ *
+ * We run before arch_initcall() so chip->dev nodes can have
+ * registered, and so arch_initcall() can always gpio_export().
+ */
+ spin_lock_irqsave(&gpio_lock, flags);
+ for (gpio = 0; gpio < ARCH_NR_GPIOS; gpio++) {
+ struct gpio_chip *chip;
+
+ chip = gpio_desc[gpio].chip;
+ if (!chip || chip->exported)
+ continue;
+
+ spin_unlock_irqrestore(&gpio_lock, flags);
+ status = gpiochip_export(chip);
+ spin_lock_irqsave(&gpio_lock, flags);
+ }
+ spin_unlock_irqrestore(&gpio_lock, flags);
+
+
+ return status;
+}
+postcore_initcall(gpiolib_sysfs_init);
+
+#else
+static inline int gpiochip_export(struct gpio_chip *chip)
+{
+ return 0;
+}
+
+static inline void gpiochip_unexport(struct gpio_chip *chip)
+{
+}
+
+#endif /* CONFIG_GPIO_SYSFS */
+
/**
* gpiochip_add() - register a gpio_chip
* @chip: the chip to register, with chip->base initialized
@@ -160,6 +641,11 @@
* because the chip->base is invalid or already associated with a
* different chip. Otherwise it returns zero as a success code.
*
+ * When gpiochip_add() is called very early during boot, so that GPIOs
+ * can be freely used, the chip->dev device must be registered before
+ * the gpio framework's arch_initcall(). Otherwise sysfs initialization
+ * for GPIOs will fail rudely.
+ *
* If chip->base is negative, this requests dynamic assignment of
* a range of valid GPIOs.
*/
@@ -182,7 +668,7 @@
base = gpiochip_find_base(chip->ngpio);
if (base < 0) {
status = base;
- goto fail_unlock;
+ goto unlock;
}
chip->base = base;
}
@@ -197,12 +683,23 @@
if (status == 0) {
for (id = base; id < base + chip->ngpio; id++) {
gpio_desc[id].chip = chip;
- gpio_desc[id].flags = 0;
+
+ /* REVISIT: most hardware initializes GPIOs as
+ * inputs (often with pullups enabled) so power
+ * usage is minimized. Linux code should set the
+ * gpio direction first thing; but until it does,
+ * we may expose the wrong direction in sysfs.
+ */
+ gpio_desc[id].flags = !chip->direction_input
+ ? (1 << FLAG_IS_OUT)
+ : 0;
}
}
-fail_unlock:
+unlock:
spin_unlock_irqrestore(&gpio_lock, flags);
+ if (status == 0)
+ status = gpiochip_export(chip);
fail:
/* failures here can mean systems won't boot... */
if (status)
@@ -239,6 +736,10 @@
}
spin_unlock_irqrestore(&gpio_lock, flags);
+
+ if (status == 0)
+ gpiochip_unexport(chip);
+
return status;
}
EXPORT_SYMBOL_GPL(gpiochip_remove);
@@ -296,6 +797,8 @@
return;
}
+ gpio_unexport(gpio);
+
spin_lock_irqsave(&gpio_lock, flags);
desc = &gpio_desc[gpio];
@@ -534,10 +1037,6 @@
#ifdef CONFIG_DEBUG_FS
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-
-
static void gpiolib_dbg_show(struct seq_file *s, struct gpio_chip *chip)
{
unsigned i;
@@ -614,17 +1113,28 @@
/* REVISIT this isn't locked against gpio_chip removal ... */
for (gpio = 0; gpio_is_valid(gpio); gpio++) {
+ struct device *dev;
+
if (chip == gpio_desc[gpio].chip)
continue;
chip = gpio_desc[gpio].chip;
if (!chip)
continue;
- seq_printf(s, "%sGPIOs %d-%d, %s%s:\n",
+ seq_printf(s, "%sGPIOs %d-%d",
started ? "\n" : "",
- chip->base, chip->base + chip->ngpio - 1,
- chip->label ? : "generic",
- chip->can_sleep ? ", can sleep" : "");
+ chip->base, chip->base + chip->ngpio - 1);
+ dev = chip->dev;
+ if (dev)
+ seq_printf(s, ", %s/%s",
+ dev->bus ? dev->bus->name : "no-bus",
+ dev->bus_id);
+ if (chip->label)
+ seq_printf(s, ", %s", chip->label);
+ if (chip->can_sleep)
+ seq_printf(s, ", can sleep");
+ seq_printf(s, ":\n");
+
started = 1;
if (chip->dbg_show)
chip->dbg_show(s, chip);
diff --git a/drivers/gpio/max732x.c b/drivers/gpio/max732x.c
new file mode 100644
index 0000000..b51c813
--- /dev/null
+++ b/drivers/gpio/max732x.c
@@ -0,0 +1,385 @@
+/*
+ * max732x.c - I2C Port Expander with 8/16 I/O
+ *
+ * Copyright (C) 2007 Marvell International Ltd.
+ * Copyright (C) 2008 Jack Ren <jack.ren@marvell.com>
+ * Copyright (C) 2008 Eric Miao <eric.miao@marvell.com>
+ *
+ * Derived from drivers/gpio/pca953x.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/gpio.h>
+
+#include <linux/i2c.h>
+#include <linux/i2c/max732x.h>
+
+
+/*
+ * Each port of MAX732x (including MAX7319) falls into one of the
+ * following three types:
+ *
+ * - Push Pull Output
+ * - Input
+ * - Open Drain I/O
+ *
+ * designated by 'O', 'I' and 'P' individually according to MAXIM's
+ * datasheets.
+ *
+ * There are two groups of I/O ports, each group usually includes
+ * up to 8 I/O ports, and is accessed by a specific I2C address:
+ *
+ * - Group A : by I2C address 0b'110xxxx
+ * - Group B : by I2C address 0b'101xxxx
+ *
+ * where 'xxxx' is decided by the connections of pin AD2/AD0. The
+ * address used also affects the initial state of output signals.
+ *
+ * Within each group of ports, there are five known combinations of
+ * I/O ports: 4I4O, 4P4O, 8I, 8P, 8O, see the definitions below for
+ * the detailed organization of these ports.
+ *
+ * GPIO numbers start from 'gpio_base + 0' to 'gpio_base + 8/16',
+ * and GPIOs from GROUP_A are numbered before those from GROUP_B
+ * (if there are two groups).
+ *
+ * NOTE: MAX7328/MAX7329 are drop-in replacements for PCF8574/a, so
+ * they are not supported by this driver.
+ */
+
+#define PORT_NONE 0x0 /* '/' No Port */
+#define PORT_OUTPUT 0x1 /* 'O' Push-Pull, Output Only */
+#define PORT_INPUT 0x2 /* 'I' Input Only */
+#define PORT_OPENDRAIN 0x3 /* 'P' Open-Drain, I/O */
+
+#define IO_4I4O 0x5AA5 /* O7 O6 I5 I4 I3 I2 O1 O0 */
+#define IO_4P4O 0x5FF5 /* O7 O6 P5 P4 P3 P2 O1 O0 */
+#define IO_8I 0xAAAA /* I7 I6 I5 I4 I3 I2 I1 I0 */
+#define IO_8P 0xFFFF /* P7 P6 P5 P4 P3 P2 P1 P0 */
+#define IO_8O 0x5555 /* O7 O6 O5 O4 O3 O2 O1 O0 */
+
+#define GROUP_A(x) ((x) & 0xffff) /* I2C Addr: 0b'110xxxx */
+#define GROUP_B(x) ((x) << 16) /* I2C Addr: 0b'101xxxx */
+
+static const struct i2c_device_id max732x_id[] = {
+ { "max7319", GROUP_A(IO_8I) },
+ { "max7320", GROUP_B(IO_8O) },
+ { "max7321", GROUP_A(IO_8P) },
+ { "max7322", GROUP_A(IO_4I4O) },
+ { "max7323", GROUP_A(IO_4P4O) },
+ { "max7324", GROUP_A(IO_8I) | GROUP_B(IO_8O) },
+ { "max7325", GROUP_A(IO_8P) | GROUP_B(IO_8O) },
+ { "max7326", GROUP_A(IO_4I4O) | GROUP_B(IO_8O) },
+ { "max7327", GROUP_A(IO_4P4O) | GROUP_B(IO_8O) },
+ { },
+};
+MODULE_DEVICE_TABLE(i2c, max732x_id);
+
+struct max732x_chip {
+ struct gpio_chip gpio_chip;
+
+ struct i2c_client *client; /* "main" client */
+ struct i2c_client *client_dummy;
+ struct i2c_client *client_group_a;
+ struct i2c_client *client_group_b;
+
+ unsigned int mask_group_a;
+ unsigned int dir_input;
+ unsigned int dir_output;
+
+ struct mutex lock;
+ uint8_t reg_out[2];
+};
+
+static int max732x_write(struct max732x_chip *chip, int group_a, uint8_t val)
+{
+ struct i2c_client *client;
+ int ret;
+
+ client = group_a ? chip->client_group_a : chip->client_group_b;
+ ret = i2c_smbus_write_byte(client, val);
+ if (ret < 0) {
+ dev_err(&client->dev, "failed writing\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int max732x_read(struct max732x_chip *chip, int group_a, uint8_t *val)
+{
+ struct i2c_client *client;
+ int ret;
+
+ client = group_a ? chip->client_group_a : chip->client_group_b;
+ ret = i2c_smbus_read_byte(client);
+ if (ret < 0) {
+ dev_err(&client->dev, "failed reading\n");
+ return ret;
+ }
+
+ *val = (uint8_t)ret;
+ return 0;
+}
+
+static inline int is_group_a(struct max732x_chip *chip, unsigned off)
+{
+ return (1u << off) & chip->mask_group_a;
+}
+
+static int max732x_gpio_get_value(struct gpio_chip *gc, unsigned off)
+{
+ struct max732x_chip *chip;
+ uint8_t reg_val;
+ int ret;
+
+ chip = container_of(gc, struct max732x_chip, gpio_chip);
+
+ ret = max732x_read(chip, is_group_a(chip, off), ®_val);
+ if (ret < 0)
+ return 0;
+
+ return reg_val & (1u << (off & 0x7));
+}
+
+static void max732x_gpio_set_value(struct gpio_chip *gc, unsigned off, int val)
+{
+ struct max732x_chip *chip;
+ uint8_t reg_out, mask = 1u << (off & 0x7);
+ int ret;
+
+ chip = container_of(gc, struct max732x_chip, gpio_chip);
+
+ mutex_lock(&chip->lock);
+
+ reg_out = (off > 7) ? chip->reg_out[1] : chip->reg_out[0];
+ reg_out = (val) ? reg_out | mask : reg_out & ~mask;
+
+ ret = max732x_write(chip, is_group_a(chip, off), reg_out);
+ if (ret < 0)
+ goto out;
+
+ /* update the shadow register then */
+ if (off > 7)
+ chip->reg_out[1] = reg_out;
+ else
+ chip->reg_out[0] = reg_out;
+out:
+ mutex_unlock(&chip->lock);
+}
+
+static int max732x_gpio_direction_input(struct gpio_chip *gc, unsigned off)
+{
+ struct max732x_chip *chip;
+ unsigned int mask = 1u << off;
+
+ chip = container_of(gc, struct max732x_chip, gpio_chip);
+
+ if ((mask & chip->dir_input) == 0) {
+ dev_dbg(&chip->client->dev, "%s port %d is output only\n",
+ chip->client->name, off);
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+static int max732x_gpio_direction_output(struct gpio_chip *gc,
+ unsigned off, int val)
+{
+ struct max732x_chip *chip;
+ unsigned int mask = 1u << off;
+
+ chip = container_of(gc, struct max732x_chip, gpio_chip);
+
+ if ((mask & chip->dir_output) == 0) {
+ dev_dbg(&chip->client->dev, "%s port %d is input only\n",
+ chip->client->name, off);
+ return -EACCES;
+ }
+
+ max732x_gpio_set_value(gc, off, val);
+ return 0;
+}
+
+static int __devinit max732x_setup_gpio(struct max732x_chip *chip,
+ const struct i2c_device_id *id,
+ unsigned gpio_start)
+{
+ struct gpio_chip *gc = &chip->gpio_chip;
+ uint32_t id_data = id->driver_data;
+ int i, port = 0;
+
+ for (i = 0; i < 16; i++, id_data >>= 2) {
+ unsigned int mask = 1 << port;
+
+ switch (id_data & 0x3) {
+ case PORT_OUTPUT:
+ chip->dir_output |= mask;
+ break;
+ case PORT_INPUT:
+ chip->dir_input |= mask;
+ break;
+ case PORT_OPENDRAIN:
+ chip->dir_output |= mask;
+ chip->dir_input |= mask;
+ break;
+ default:
+ continue;
+ }
+
+ if (i < 8)
+ chip->mask_group_a |= mask;
+ port++;
+ }
+
+ if (chip->dir_input)
+ gc->direction_input = max732x_gpio_direction_input;
+ if (chip->dir_output) {
+ gc->direction_output = max732x_gpio_direction_output;
+ gc->set = max732x_gpio_set_value;
+ }
+ gc->get = max732x_gpio_get_value;
+ gc->can_sleep = 1;
+
+ gc->base = gpio_start;
+ gc->ngpio = port;
+ gc->label = chip->client->name;
+ gc->owner = THIS_MODULE;
+
+ return port;
+}
+
+static int __devinit max732x_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct max732x_platform_data *pdata;
+ struct max732x_chip *chip;
+ struct i2c_client *c;
+ uint16_t addr_a, addr_b;
+ int ret, nr_port;
+
+ pdata = client->dev.platform_data;
+ if (pdata == NULL)
+ return -ENODEV;
+
+ chip = kzalloc(sizeof(struct max732x_chip), GFP_KERNEL);
+ if (chip == NULL)
+ return -ENOMEM;
+ chip->client = client;
+
+ nr_port = max732x_setup_gpio(chip, id, pdata->gpio_base);
+
+ addr_a = (client->addr & 0x0f) | 0x60;
+ addr_b = (client->addr & 0x0f) | 0x50;
+
+ switch (client->addr & 0x70) {
+ case 0x60:
+ chip->client_group_a = client;
+ if (nr_port > 7) {
+ c = i2c_new_dummy(client->adapter, addr_b);
+ chip->client_group_b = chip->client_dummy = c;
+ }
+ break;
+ case 0x50:
+ chip->client_group_b = client;
+ if (nr_port > 7) {
+ c = i2c_new_dummy(client->adapter, addr_a);
+ chip->client_group_a = chip->client_dummy = c;
+ }
+ break;
+ default:
+ dev_err(&client->dev, "invalid I2C address specified %02x\n",
+ client->addr);
+ ret = -EINVAL;
+ goto out_failed;
+ }
+
+ mutex_init(&chip->lock);
+
+ max732x_read(chip, is_group_a(chip, 0), &chip->reg_out[0]);
+ if (nr_port > 7)
+ max732x_read(chip, is_group_a(chip, 8), &chip->reg_out[1]);
+
+ ret = gpiochip_add(&chip->gpio_chip);
+ if (ret)
+ goto out_failed;
+
+ if (pdata->setup) {
+ ret = pdata->setup(client, chip->gpio_chip.base,
+ chip->gpio_chip.ngpio, pdata->context);
+ if (ret < 0)
+ dev_warn(&client->dev, "setup failed, %d\n", ret);
+ }
+
+ i2c_set_clientdata(client, chip);
+ return 0;
+
+out_failed:
+ kfree(chip);
+ return ret;
+}
+
+static int __devexit max732x_remove(struct i2c_client *client)
+{
+ struct max732x_platform_data *pdata = client->dev.platform_data;
+ struct max732x_chip *chip = i2c_get_clientdata(client);
+ int ret;
+
+ if (pdata->teardown) {
+ ret = pdata->teardown(client, chip->gpio_chip.base,
+ chip->gpio_chip.ngpio, pdata->context);
+ if (ret < 0) {
+ dev_err(&client->dev, "%s failed, %d\n",
+ "teardown", ret);
+ return ret;
+ }
+ }
+
+ ret = gpiochip_remove(&chip->gpio_chip);
+ if (ret) {
+ dev_err(&client->dev, "%s failed, %d\n",
+ "gpiochip_remove()", ret);
+ return ret;
+ }
+
+ /* unregister any dummy i2c_client */
+ if (chip->client_dummy)
+ i2c_unregister_device(chip->client_dummy);
+
+ kfree(chip);
+ return 0;
+}
+
+static struct i2c_driver max732x_driver = {
+ .driver = {
+ .name = "max732x",
+ .owner = THIS_MODULE,
+ },
+ .probe = max732x_probe,
+ .remove = __devexit_p(max732x_remove),
+ .id_table = max732x_id,
+};
+
+static int __init max732x_init(void)
+{
+ return i2c_add_driver(&max732x_driver);
+}
+module_init(max732x_init);
+
+static void __exit max732x_exit(void)
+{
+ i2c_del_driver(&max732x_driver);
+}
+module_exit(max732x_exit);
+
+MODULE_AUTHOR("Eric Miao <eric.miao@marvell.com>");
+MODULE_DESCRIPTION("GPIO expander driver for MAX732X");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpio/mcp23s08.c b/drivers/gpio/mcp23s08.c
index 7f92fdd..8a1b405 100644
--- a/drivers/gpio/mcp23s08.c
+++ b/drivers/gpio/mcp23s08.c
@@ -40,15 +40,26 @@
struct spi_device *spi;
u8 addr;
+ u8 cache[11];
/* lock protects the cached values */
struct mutex lock;
- u8 cache[11];
struct gpio_chip chip;
struct work_struct work;
};
+/* A given spi_device can represent up to four mcp23s08 chips
+ * sharing the same chipselect but using different addresses
+ * (e.g. chips #0 and #3 might be populated, but not #1 or $2).
+ * Driver data holds all the per-chip data.
+ */
+struct mcp23s08_driver_data {
+ unsigned ngpio;
+ struct mcp23s08 *mcp[4];
+ struct mcp23s08 chip[];
+};
+
static int mcp23s08_read(struct mcp23s08 *mcp, unsigned reg)
{
u8 tx[2], rx[1];
@@ -208,25 +219,18 @@
/*----------------------------------------------------------------------*/
-static int mcp23s08_probe(struct spi_device *spi)
+static int mcp23s08_probe_one(struct spi_device *spi, unsigned addr,
+ unsigned base, unsigned pullups)
{
- struct mcp23s08 *mcp;
- struct mcp23s08_platform_data *pdata;
+ struct mcp23s08_driver_data *data = spi_get_drvdata(spi);
+ struct mcp23s08 *mcp = data->mcp[addr];
int status;
int do_update = 0;
- pdata = spi->dev.platform_data;
- if (!pdata || pdata->slave > 3 || !pdata->base)
- return -ENODEV;
-
- mcp = kzalloc(sizeof *mcp, GFP_KERNEL);
- if (!mcp)
- return -ENOMEM;
-
mutex_init(&mcp->lock);
mcp->spi = spi;
- mcp->addr = 0x40 | (pdata->slave << 1);
+ mcp->addr = 0x40 | (addr << 1);
mcp->chip.label = "mcp23s08",
@@ -236,26 +240,28 @@
mcp->chip.set = mcp23s08_set;
mcp->chip.dbg_show = mcp23s08_dbg_show;
- mcp->chip.base = pdata->base;
+ mcp->chip.base = base;
mcp->chip.ngpio = 8;
mcp->chip.can_sleep = 1;
+ mcp->chip.dev = &spi->dev;
mcp->chip.owner = THIS_MODULE;
- spi_set_drvdata(spi, mcp);
-
- /* verify MCP_IOCON.SEQOP = 0, so sequential reads work */
+ /* verify MCP_IOCON.SEQOP = 0, so sequential reads work,
+ * and MCP_IOCON.HAEN = 1, so we work with all chips.
+ */
status = mcp23s08_read(mcp, MCP_IOCON);
if (status < 0)
goto fail;
- if (status & IOCON_SEQOP) {
+ if ((status & IOCON_SEQOP) || !(status & IOCON_HAEN)) {
status &= ~IOCON_SEQOP;
+ status |= IOCON_HAEN;
status = mcp23s08_write(mcp, MCP_IOCON, (u8) status);
if (status < 0)
goto fail;
}
/* configure ~100K pullups */
- status = mcp23s08_write(mcp, MCP_GPPU, pdata->pullups);
+ status = mcp23s08_write(mcp, MCP_GPPU, pullups);
if (status < 0)
goto fail;
@@ -282,11 +288,58 @@
tx[1] = MCP_IPOL;
memcpy(&tx[2], &mcp->cache[MCP_IPOL], sizeof(tx) - 2);
status = spi_write_then_read(mcp->spi, tx, sizeof tx, NULL, 0);
-
- /* FIXME check status... */
+ if (status < 0)
+ goto fail;
}
status = gpiochip_add(&mcp->chip);
+fail:
+ if (status < 0)
+ dev_dbg(&spi->dev, "can't setup chip %d, --> %d\n",
+ addr, status);
+ return status;
+}
+
+static int mcp23s08_probe(struct spi_device *spi)
+{
+ struct mcp23s08_platform_data *pdata;
+ unsigned addr;
+ unsigned chips = 0;
+ struct mcp23s08_driver_data *data;
+ int status;
+ unsigned base;
+
+ pdata = spi->dev.platform_data;
+ if (!pdata || !gpio_is_valid(pdata->base))
+ return -ENODEV;
+
+ for (addr = 0; addr < 4; addr++) {
+ if (!pdata->chip[addr].is_present)
+ continue;
+ chips++;
+ }
+ if (!chips)
+ return -ENODEV;
+
+ data = kzalloc(sizeof *data + chips * sizeof(struct mcp23s08),
+ GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ spi_set_drvdata(spi, data);
+
+ base = pdata->base;
+ for (addr = 0; addr < 4; addr++) {
+ if (!pdata->chip[addr].is_present)
+ continue;
+ chips--;
+ data->mcp[addr] = &data->chip[chips];
+ status = mcp23s08_probe_one(spi, addr, base,
+ pdata->chip[addr].pullups);
+ if (status < 0)
+ goto fail;
+ base += 8;
+ }
+ data->ngpio = base - pdata->base;
/* NOTE: these chips have a relatively sane IRQ framework, with
* per-signal masking and level/edge triggering. It's not yet
@@ -294,8 +347,9 @@
*/
if (pdata->setup) {
- status = pdata->setup(spi, mcp->chip.base,
- mcp->chip.ngpio, pdata->context);
+ status = pdata->setup(spi,
+ pdata->base, data->ngpio,
+ pdata->context);
if (status < 0)
dev_dbg(&spi->dev, "setup --> %d\n", status);
}
@@ -303,19 +357,29 @@
return 0;
fail:
- kfree(mcp);
+ for (addr = 0; addr < 4; addr++) {
+ int tmp;
+
+ if (!data->mcp[addr])
+ continue;
+ tmp = gpiochip_remove(&data->mcp[addr]->chip);
+ if (tmp < 0)
+ dev_err(&spi->dev, "%s --> %d\n", "remove", tmp);
+ }
+ kfree(data);
return status;
}
static int mcp23s08_remove(struct spi_device *spi)
{
- struct mcp23s08 *mcp = spi_get_drvdata(spi);
+ struct mcp23s08_driver_data *data = spi_get_drvdata(spi);
struct mcp23s08_platform_data *pdata = spi->dev.platform_data;
+ unsigned addr;
int status = 0;
if (pdata->teardown) {
status = pdata->teardown(spi,
- mcp->chip.base, mcp->chip.ngpio,
+ pdata->base, data->ngpio,
pdata->context);
if (status < 0) {
dev_err(&spi->dev, "%s --> %d\n", "teardown", status);
@@ -323,11 +387,20 @@
}
}
- status = gpiochip_remove(&mcp->chip);
+ for (addr = 0; addr < 4; addr++) {
+ int tmp;
+
+ if (!data->mcp[addr])
+ continue;
+
+ tmp = gpiochip_remove(&data->mcp[addr]->chip);
+ if (tmp < 0) {
+ dev_err(&spi->dev, "%s --> %d\n", "remove", tmp);
+ status = tmp;
+ }
+ }
if (status == 0)
- kfree(mcp);
- else
- dev_err(&spi->dev, "%s --> %d\n", "remove", status);
+ kfree(data);
return status;
}
@@ -355,4 +428,3 @@
module_exit(mcp23s08_exit);
MODULE_LICENSE("GPL");
-
diff --git a/drivers/gpio/pca953x.c b/drivers/gpio/pca953x.c
index a380730..cc84686 100644
--- a/drivers/gpio/pca953x.c
+++ b/drivers/gpio/pca953x.c
@@ -188,6 +188,7 @@
gc->base = chip->gpio_start;
gc->ngpio = gpios;
gc->label = chip->client->name;
+ gc->dev = &chip->client->dev;
gc->owner = THIS_MODULE;
}
diff --git a/drivers/gpio/pcf857x.c b/drivers/gpio/pcf857x.c
index d25d356..fc9c6ae 100644
--- a/drivers/gpio/pcf857x.c
+++ b/drivers/gpio/pcf857x.c
@@ -200,6 +200,7 @@
gpio->chip.base = pdata->gpio_base;
gpio->chip.can_sleep = 1;
+ gpio->chip.dev = &client->dev;
gpio->chip.owner = THIS_MODULE;
/* NOTE: the OnSemi jlc1562b is also largely compatible with
diff --git a/drivers/i2c/chips/Kconfig b/drivers/i2c/chips/Kconfig
index 50e0a46..a95cb94 100644
--- a/drivers/i2c/chips/Kconfig
+++ b/drivers/i2c/chips/Kconfig
@@ -126,7 +126,7 @@
config TPS65010
tristate "TPS6501x Power Management chips"
- depends on HAVE_GPIO_LIB
+ depends on GPIOLIB
default y if MACH_OMAP_H2 || MACH_OMAP_H3 || MACH_OMAP_OSK
help
If you say yes here you get support for the TPS6501x series of
diff --git a/drivers/i2c/chips/tps65010.c b/drivers/i2c/chips/tps65010.c
index 8594968..cf02e8f 100644
--- a/drivers/i2c/chips/tps65010.c
+++ b/drivers/i2c/chips/tps65010.c
@@ -636,6 +636,8 @@
tps->outmask = board->outmask;
tps->chip.label = client->name;
+ tps->chip.dev = &client->dev;
+ tps->chip.owner = THIS_MODULE;
tps->chip.set = tps65010_gpio_set;
tps->chip.direction_output = tps65010_output;
diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c
index aad664d..0d39597 100644
--- a/drivers/input/serio/hp_sdc.c
+++ b/drivers/input/serio/hp_sdc.c
@@ -70,7 +70,6 @@
#include <linux/semaphore.h>
#include <linux/slab.h>
#include <linux/hil.h>
-#include <linux/semaphore.h>
#include <asm/io.h>
#include <asm/system.h>
diff --git a/drivers/isdn/hisax/st5481.h b/drivers/isdn/hisax/st5481.h
index 2044e71..cff7a63 100644
--- a/drivers/isdn/hisax/st5481.h
+++ b/drivers/isdn/hisax/st5481.h
@@ -220,7 +220,7 @@
#define ERR(format, arg...) \
printk(KERN_ERR "%s:%s: " format "\n" , __FILE__, __func__ , ## arg)
-#define WARN(format, arg...) \
+#define WARNING(format, arg...) \
printk(KERN_WARNING "%s:%s: " format "\n" , __FILE__, __func__ , ## arg)
#define INFO(format, arg...) \
@@ -412,7 +412,7 @@
({ \
int status; \
if ((status = usb_submit_urb(urb, mem_flags)) < 0) { \
- WARN("usb_submit_urb failed,status=%d", status); \
+ WARNING("usb_submit_urb failed,status=%d", status); \
} \
status; \
})
diff --git a/drivers/isdn/hisax/st5481_b.c b/drivers/isdn/hisax/st5481_b.c
index fa64115..0074b60 100644
--- a/drivers/isdn/hisax/st5481_b.c
+++ b/drivers/isdn/hisax/st5481_b.c
@@ -180,7 +180,7 @@
DBG(4,"urb killed status %d", urb->status);
return; // Give up
default:
- WARN("urb status %d",urb->status);
+ WARNING("urb status %d",urb->status);
if (b_out->busy == 0) {
st5481_usb_pipe_reset(adapter, (bcs->channel+1)*2 | USB_DIR_OUT, NULL, NULL);
}
@@ -372,6 +372,6 @@
B_L1L2(bcs, PH_DEACTIVATE | INDICATION, NULL);
break;
default:
- WARN("pr %#x\n", pr);
+ WARNING("pr %#x\n", pr);
}
}
diff --git a/drivers/isdn/hisax/st5481_d.c b/drivers/isdn/hisax/st5481_d.c
index b8c4855..077991c 100644
--- a/drivers/isdn/hisax/st5481_d.c
+++ b/drivers/isdn/hisax/st5481_d.c
@@ -389,7 +389,7 @@
DBG(1,"urb killed status %d", urb->status);
break;
default:
- WARN("urb status %d",urb->status);
+ WARNING("urb status %d",urb->status);
if (d_out->busy == 0) {
st5481_usb_pipe_reset(adapter, EP_D_OUT | USB_DIR_OUT, fifo_reseted, adapter);
}
@@ -420,7 +420,7 @@
isdnhdlc_out_init(&d_out->hdlc_state, 1, 0);
if (test_and_set_bit(buf_nr, &d_out->busy)) {
- WARN("ep %d urb %d busy %#lx", EP_D_OUT, buf_nr, d_out->busy);
+ WARNING("ep %d urb %d busy %#lx", EP_D_OUT, buf_nr, d_out->busy);
return;
}
urb = d_out->urb[buf_nr];
@@ -601,7 +601,7 @@
FsmEvent(&adapter->d_out.fsm, EV_DOUT_START_XMIT, NULL);
break;
default:
- WARN("pr %#x\n", pr);
+ WARNING("pr %#x\n", pr);
break;
}
}
diff --git a/drivers/isdn/hisax/st5481_usb.c b/drivers/isdn/hisax/st5481_usb.c
index 427a8b0..ec3c0e5 100644
--- a/drivers/isdn/hisax/st5481_usb.c
+++ b/drivers/isdn/hisax/st5481_usb.c
@@ -66,7 +66,7 @@
struct ctrl_msg *ctrl_msg;
if ((w_index = fifo_add(&ctrl->msg_fifo.f)) < 0) {
- WARN("control msg FIFO full");
+ WARNING("control msg FIFO full");
return;
}
ctrl_msg = &ctrl->msg_fifo.data[w_index];
@@ -139,7 +139,7 @@
DBG(1,"urb killed status %d", urb->status);
return; // Give up
default:
- WARN("urb status %d",urb->status);
+ WARNING("urb status %d",urb->status);
break;
}
}
@@ -198,7 +198,7 @@
DBG(2, "urb shutting down with status: %d", urb->status);
return;
default:
- WARN("nonzero urb status received: %d", urb->status);
+ WARNING("nonzero urb status received: %d", urb->status);
goto exit;
}
@@ -235,7 +235,7 @@
exit:
status = usb_submit_urb (urb, GFP_ATOMIC);
if (status)
- WARN("usb_submit_urb failed with result %d", status);
+ WARNING("usb_submit_urb failed with result %d", status);
}
/* ======================================================================
@@ -257,7 +257,7 @@
DBG(2,"");
if ((status = usb_reset_configuration (dev)) < 0) {
- WARN("reset_configuration failed,status=%d",status);
+ WARNING("reset_configuration failed,status=%d",status);
return status;
}
@@ -269,7 +269,7 @@
// Check if the config is sane
if ( altsetting->desc.bNumEndpoints != 7 ) {
- WARN("expecting 7 got %d endpoints!", altsetting->desc.bNumEndpoints);
+ WARNING("expecting 7 got %d endpoints!", altsetting->desc.bNumEndpoints);
return -EINVAL;
}
@@ -279,7 +279,7 @@
// Use alternative setting 3 on interface 0 to have 2B+D
if ((status = usb_set_interface (dev, 0, 3)) < 0) {
- WARN("usb_set_interface failed,status=%d",status);
+ WARNING("usb_set_interface failed,status=%d",status);
return status;
}
@@ -497,7 +497,7 @@
DBG(1,"urb killed status %d", urb->status);
return; // Give up
default:
- WARN("urb status %d",urb->status);
+ WARNING("urb status %d",urb->status);
break;
}
}
@@ -523,7 +523,7 @@
DBG(4,"count=%d",status);
DBG_PACKET(0x400, in->rcvbuf, status);
if (!(skb = dev_alloc_skb(status))) {
- WARN("receive out of memory\n");
+ WARNING("receive out of memory\n");
break;
}
memcpy(skb_put(skb, status), in->rcvbuf, status);
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index 1a8de57..37344aa 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -98,16 +98,20 @@
return features;
}
-static void lg_set_features(struct virtio_device *vdev, u32 features)
+static void lg_finalize_features(struct virtio_device *vdev)
{
- unsigned int i;
+ unsigned int i, bits;
struct lguest_device_desc *desc = to_lgdev(vdev)->desc;
/* Second half of bitmap is features we accept. */
u8 *out_features = lg_features(desc) + desc->feature_len;
+ /* Give virtio_ring a chance to accept features. */
+ vring_transport_features(vdev);
+
memset(out_features, 0, desc->feature_len);
- for (i = 0; i < min(desc->feature_len * 8, 32); i++) {
- if (features & (1 << i))
+ bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8;
+ for (i = 0; i < bits; i++) {
+ if (test_bit(i, vdev->features))
out_features[i / 8] |= (1 << (i % 8));
}
}
@@ -297,7 +301,7 @@
/* The ops structure which hooks everything together. */
static struct virtio_config_ops lguest_config_ops = {
.get_features = lg_get_features,
- .set_features = lg_set_features,
+ .finalize_features = lg_finalize_features,
.get = lg_get,
.set = lg_set,
.get_status = lg_get_status,
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 9f93c29..1f57a99 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -19,6 +19,14 @@
interface. The device may be connected by PCI or local bus with
varying functions enabled.
+config MFD_SM501_GPIO
+ bool "Export GPIO via GPIO layer"
+ depends on MFD_SM501 && HAVE_GPIO_LIB
+ ---help---
+ This option uses the gpio library layer to export the 64 GPIO
+ lines on the SM501. The platform data is used to supply the
+ base number for the first GPIO line to register.
+
config MFD_ASIC3
bool "Support for Compaq ASIC3"
depends on GENERIC_HARDIRQS && HAVE_GPIO_LIB && ARM
@@ -28,7 +36,7 @@
config HTC_EGPIO
bool "HTC EGPIO support"
- depends on GENERIC_HARDIRQS && HAVE_GPIO_LIB && ARM
+ depends on GENERIC_HARDIRQS && GPIOLIB && ARM
help
This driver supports the CPLD egpio chip present on
several HTC phones. It provides basic support for input
@@ -44,7 +52,7 @@
config MFD_TC6393XB
bool "Support Toshiba TC6393XB"
- depends on HAVE_GPIO_LIB
+ depends on GPIOLIB
select MFD_CORE
help
Support for Toshiba Mobile IO Controller TC6393XB
diff --git a/drivers/mfd/htc-egpio.c b/drivers/mfd/htc-egpio.c
index 8872cc0..6be4317 100644
--- a/drivers/mfd/htc-egpio.c
+++ b/drivers/mfd/htc-egpio.c
@@ -318,6 +318,8 @@
ei->chip[i].dev = &(pdev->dev);
chip = &(ei->chip[i].chip);
chip->label = "htc-egpio";
+ chip->dev = &pdev->dev;
+ chip->owner = THIS_MODULE;
chip->get = egpio_get;
chip->set = egpio_set;
chip->direction_input = egpio_direction_input;
diff --git a/drivers/mfd/htc-pasic3.c b/drivers/mfd/htc-pasic3.c
index 633cbba..91b294d 100644
--- a/drivers/mfd/htc-pasic3.c
+++ b/drivers/mfd/htc-pasic3.c
@@ -238,6 +238,8 @@
return 0;
}
+MODULE_ALIAS("platform:pasic3");
+
static struct platform_driver pasic3_driver = {
.driver = {
.name = "pasic3",
diff --git a/drivers/mfd/mcp-sa11x0.c b/drivers/mfd/mcp-sa11x0.c
index 1eab7cf..b5272b5c 100644
--- a/drivers/mfd/mcp-sa11x0.c
+++ b/drivers/mfd/mcp-sa11x0.c
@@ -242,6 +242,8 @@
/*
* The driver for the SA11x0 MCP port.
*/
+MODULE_ALIAS("platform:sa11x0-mcp");
+
static struct platform_driver mcp_sa11x0_driver = {
.probe = mcp_sa11x0_probe,
.remove = mcp_sa11x0_remove,
diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
index d7d88ce..0454be4 100644
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c
@@ -36,7 +36,7 @@
if (ret)
goto fail_device;
- memzero(res, sizeof(res));
+ memset(res, 0, sizeof(res));
for (r = 0; r < cell->num_resources; r++) {
res[r].name = cell->resources[r].name;
res[r].flags = cell->resources[r].flags;
diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c
index 2fe6473..7aebad4 100644
--- a/drivers/mfd/sm501.c
+++ b/drivers/mfd/sm501.c
@@ -19,6 +19,7 @@
#include <linux/device.h>
#include <linux/platform_device.h>
#include <linux/pci.h>
+#include <linux/i2c-gpio.h>
#include <linux/sm501.h>
#include <linux/sm501-regs.h>
@@ -31,10 +32,37 @@
struct platform_device pdev;
};
+struct sm501_gpio;
+
+#ifdef CONFIG_MFD_SM501_GPIO
+#include <linux/gpio.h>
+
+struct sm501_gpio_chip {
+ struct gpio_chip gpio;
+ struct sm501_gpio *ourgpio; /* to get back to parent. */
+ void __iomem *regbase;
+};
+
+struct sm501_gpio {
+ struct sm501_gpio_chip low;
+ struct sm501_gpio_chip high;
+ spinlock_t lock;
+
+ unsigned int registered : 1;
+ void __iomem *regs;
+ struct resource *regs_res;
+};
+#else
+struct sm501_gpio {
+ /* no gpio support, empty definition for sm501_devdata. */
+};
+#endif
+
struct sm501_devdata {
spinlock_t reg_lock;
struct mutex clock_lock;
struct list_head devices;
+ struct sm501_gpio gpio;
struct device *dev;
struct resource *io_res;
@@ -42,6 +70,7 @@
struct resource *regs_claim;
struct sm501_platdata *platdata;
+
unsigned int in_suspend;
unsigned long pm_misc;
@@ -52,6 +81,7 @@
unsigned int rev;
};
+
#define MHZ (1000 * 1000)
#ifdef DEBUG
@@ -276,58 +306,6 @@
EXPORT_SYMBOL_GPL(sm501_modify_reg);
-unsigned long sm501_gpio_get(struct device *dev,
- unsigned long gpio)
-{
- struct sm501_devdata *sm = dev_get_drvdata(dev);
- unsigned long result;
- unsigned long reg;
-
- reg = (gpio > 32) ? SM501_GPIO_DATA_HIGH : SM501_GPIO_DATA_LOW;
- result = readl(sm->regs + reg);
-
- result >>= (gpio & 31);
- return result & 1UL;
-}
-
-EXPORT_SYMBOL_GPL(sm501_gpio_get);
-
-void sm501_gpio_set(struct device *dev,
- unsigned long gpio,
- unsigned int to,
- unsigned int dir)
-{
- struct sm501_devdata *sm = dev_get_drvdata(dev);
-
- unsigned long bit = 1 << (gpio & 31);
- unsigned long base;
- unsigned long save;
- unsigned long val;
-
- base = (gpio > 32) ? SM501_GPIO_DATA_HIGH : SM501_GPIO_DATA_LOW;
- base += SM501_GPIO;
-
- spin_lock_irqsave(&sm->reg_lock, save);
-
- val = readl(sm->regs + base) & ~bit;
- if (to)
- val |= bit;
- writel(val, sm->regs + base);
-
- val = readl(sm->regs + SM501_GPIO_DDR_LOW) & ~bit;
- if (dir)
- val |= bit;
-
- writel(val, sm->regs + SM501_GPIO_DDR_LOW);
- sm501_sync_regs(sm);
-
- spin_unlock_irqrestore(&sm->reg_lock, save);
-
-}
-
-EXPORT_SYMBOL_GPL(sm501_gpio_set);
-
-
/* sm501_unit_power
*
* alters the power active gate to set specific units on or off
@@ -906,6 +884,313 @@
return sm501_register_device(sm, pdev);
}
+#ifdef CONFIG_MFD_SM501_GPIO
+
+static inline struct sm501_gpio_chip *to_sm501_gpio(struct gpio_chip *gc)
+{
+ return container_of(gc, struct sm501_gpio_chip, gpio);
+}
+
+static inline struct sm501_devdata *sm501_gpio_to_dev(struct sm501_gpio *gpio)
+{
+ return container_of(gpio, struct sm501_devdata, gpio);
+}
+
+static int sm501_gpio_get(struct gpio_chip *chip, unsigned offset)
+
+{
+ struct sm501_gpio_chip *smgpio = to_sm501_gpio(chip);
+ unsigned long result;
+
+ result = readl(smgpio->regbase + SM501_GPIO_DATA_LOW);
+ result >>= offset;
+
+ return result & 1UL;
+}
+
+static void sm501_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+
+{
+ struct sm501_gpio_chip *smchip = to_sm501_gpio(chip);
+ struct sm501_gpio *smgpio = smchip->ourgpio;
+ unsigned long bit = 1 << offset;
+ void __iomem *regs = smchip->regbase;
+ unsigned long save;
+ unsigned long val;
+
+ dev_dbg(sm501_gpio_to_dev(smgpio)->dev, "%s(%p,%d)\n",
+ __func__, chip, offset);
+
+ spin_lock_irqsave(&smgpio->lock, save);
+
+ val = readl(regs + SM501_GPIO_DATA_LOW) & ~bit;
+ if (value)
+ val |= bit;
+ writel(val, regs);
+
+ sm501_sync_regs(sm501_gpio_to_dev(smgpio));
+ spin_unlock_irqrestore(&smgpio->lock, save);
+}
+
+static int sm501_gpio_input(struct gpio_chip *chip, unsigned offset)
+{
+ struct sm501_gpio_chip *smchip = to_sm501_gpio(chip);
+ struct sm501_gpio *smgpio = smchip->ourgpio;
+ void __iomem *regs = smchip->regbase;
+ unsigned long bit = 1 << offset;
+ unsigned long save;
+ unsigned long ddr;
+
+ dev_info(sm501_gpio_to_dev(smgpio)->dev, "%s(%p,%d)\n",
+ __func__, chip, offset);
+
+ spin_lock_irqsave(&smgpio->lock, save);
+
+ ddr = readl(regs + SM501_GPIO_DDR_LOW);
+ writel(ddr & ~bit, regs + SM501_GPIO_DDR_LOW);
+
+ sm501_sync_regs(sm501_gpio_to_dev(smgpio));
+ spin_unlock_irqrestore(&smgpio->lock, save);
+
+ return 0;
+}
+
+static int sm501_gpio_output(struct gpio_chip *chip,
+ unsigned offset, int value)
+{
+ struct sm501_gpio_chip *smchip = to_sm501_gpio(chip);
+ struct sm501_gpio *smgpio = smchip->ourgpio;
+ unsigned long bit = 1 << offset;
+ void __iomem *regs = smchip->regbase;
+ unsigned long save;
+ unsigned long val;
+ unsigned long ddr;
+
+ dev_dbg(sm501_gpio_to_dev(smgpio)->dev, "%s(%p,%d,%d)\n",
+ __func__, chip, offset, value);
+
+ spin_lock_irqsave(&smgpio->lock, save);
+
+ val = readl(regs + SM501_GPIO_DATA_LOW);
+ if (value)
+ val |= bit;
+ else
+ val &= ~bit;
+ writel(val, regs);
+
+ ddr = readl(regs + SM501_GPIO_DDR_LOW);
+ writel(ddr | bit, regs + SM501_GPIO_DDR_LOW);
+
+ sm501_sync_regs(sm501_gpio_to_dev(smgpio));
+ writel(val, regs + SM501_GPIO_DATA_LOW);
+
+ sm501_sync_regs(sm501_gpio_to_dev(smgpio));
+ spin_unlock_irqrestore(&smgpio->lock, save);
+
+ return 0;
+}
+
+static struct gpio_chip gpio_chip_template = {
+ .ngpio = 32,
+ .direction_input = sm501_gpio_input,
+ .direction_output = sm501_gpio_output,
+ .set = sm501_gpio_set,
+ .get = sm501_gpio_get,
+};
+
+static int __devinit sm501_gpio_register_chip(struct sm501_devdata *sm,
+ struct sm501_gpio *gpio,
+ struct sm501_gpio_chip *chip)
+{
+ struct sm501_platdata *pdata = sm->platdata;
+ struct gpio_chip *gchip = &chip->gpio;
+ int base = pdata->gpio_base;
+
+ chip->gpio = gpio_chip_template;
+
+ if (chip == &gpio->high) {
+ if (base > 0)
+ base += 32;
+ chip->regbase = gpio->regs + SM501_GPIO_DATA_HIGH;
+ gchip->label = "SM501-HIGH";
+ } else {
+ chip->regbase = gpio->regs + SM501_GPIO_DATA_LOW;
+ gchip->label = "SM501-LOW";
+ }
+
+ gchip->base = base;
+ chip->ourgpio = gpio;
+
+ return gpiochip_add(gchip);
+}
+
+static int sm501_register_gpio(struct sm501_devdata *sm)
+{
+ struct sm501_gpio *gpio = &sm->gpio;
+ resource_size_t iobase = sm->io_res->start + SM501_GPIO;
+ int ret;
+ int tmp;
+
+ dev_dbg(sm->dev, "registering gpio block %08llx\n",
+ (unsigned long long)iobase);
+
+ spin_lock_init(&gpio->lock);
+
+ gpio->regs_res = request_mem_region(iobase, 0x20, "sm501-gpio");
+ if (gpio->regs_res == NULL) {
+ dev_err(sm->dev, "gpio: failed to request region\n");
+ return -ENXIO;
+ }
+
+ gpio->regs = ioremap(iobase, 0x20);
+ if (gpio->regs == NULL) {
+ dev_err(sm->dev, "gpio: failed to remap registers\n");
+ ret = -ENXIO;
+ goto err_claimed;
+ }
+
+ /* Register both our chips. */
+
+ ret = sm501_gpio_register_chip(sm, gpio, &gpio->low);
+ if (ret) {
+ dev_err(sm->dev, "failed to add low chip\n");
+ goto err_mapped;
+ }
+
+ ret = sm501_gpio_register_chip(sm, gpio, &gpio->high);
+ if (ret) {
+ dev_err(sm->dev, "failed to add high chip\n");
+ goto err_low_chip;
+ }
+
+ gpio->registered = 1;
+
+ return 0;
+
+ err_low_chip:
+ tmp = gpiochip_remove(&gpio->low.gpio);
+ if (tmp) {
+ dev_err(sm->dev, "cannot remove low chip, cannot tidy up\n");
+ return ret;
+ }
+
+ err_mapped:
+ iounmap(gpio->regs);
+
+ err_claimed:
+ release_resource(gpio->regs_res);
+ kfree(gpio->regs_res);
+
+ return ret;
+}
+
+static void sm501_gpio_remove(struct sm501_devdata *sm)
+{
+ struct sm501_gpio *gpio = &sm->gpio;
+ int ret;
+
+ if (!sm->gpio.registered)
+ return;
+
+ ret = gpiochip_remove(&gpio->low.gpio);
+ if (ret)
+ dev_err(sm->dev, "cannot remove low chip, cannot tidy up\n");
+
+ ret = gpiochip_remove(&gpio->high.gpio);
+ if (ret)
+ dev_err(sm->dev, "cannot remove high chip, cannot tidy up\n");
+
+ iounmap(gpio->regs);
+ release_resource(gpio->regs_res);
+ kfree(gpio->regs_res);
+}
+
+static inline int sm501_gpio_pin2nr(struct sm501_devdata *sm, unsigned int pin)
+{
+ struct sm501_gpio *gpio = &sm->gpio;
+ int base = (pin < 32) ? gpio->low.gpio.base : gpio->high.gpio.base;
+
+ return (pin % 32) + base;
+}
+
+static inline int sm501_gpio_isregistered(struct sm501_devdata *sm)
+{
+ return sm->gpio.registered;
+}
+#else
+static inline int sm501_register_gpio(struct sm501_devdata *sm)
+{
+ return 0;
+}
+
+static inline void sm501_gpio_remove(struct sm501_devdata *sm)
+{
+}
+
+static inline int sm501_gpio_pin2nr(struct sm501_devdata *sm, unsigned int pin)
+{
+ return -1;
+}
+
+static inline int sm501_gpio_isregistered(struct sm501_devdata *sm)
+{
+ return 0;
+}
+#endif
+
+static int sm501_register_gpio_i2c_instance(struct sm501_devdata *sm,
+ struct sm501_platdata_gpio_i2c *iic)
+{
+ struct i2c_gpio_platform_data *icd;
+ struct platform_device *pdev;
+
+ pdev = sm501_create_subdev(sm, "i2c-gpio", 0,
+ sizeof(struct i2c_gpio_platform_data));
+ if (!pdev)
+ return -ENOMEM;
+
+ icd = pdev->dev.platform_data;
+
+ /* We keep the pin_sda and pin_scl fields relative in case the
+ * same platform data is passed to >1 SM501.
+ */
+
+ icd->sda_pin = sm501_gpio_pin2nr(sm, iic->pin_sda);
+ icd->scl_pin = sm501_gpio_pin2nr(sm, iic->pin_scl);
+ icd->timeout = iic->timeout;
+ icd->udelay = iic->udelay;
+
+ /* note, we can't use either of the pin numbers, as the i2c-gpio
+ * driver uses the platform.id field to generate the bus number
+ * to register with the i2c core; The i2c core doesn't have enough
+ * entries to deal with anything we currently use.
+ */
+
+ pdev->id = iic->bus_num;
+
+ dev_info(sm->dev, "registering i2c-%d: sda=%d (%d), scl=%d (%d)\n",
+ iic->bus_num,
+ icd->sda_pin, iic->pin_sda, icd->scl_pin, iic->pin_scl);
+
+ return sm501_register_device(sm, pdev);
+}
+
+static int sm501_register_gpio_i2c(struct sm501_devdata *sm,
+ struct sm501_platdata *pdata)
+{
+ struct sm501_platdata_gpio_i2c *iic = pdata->gpio_i2c;
+ int index;
+ int ret;
+
+ for (index = 0; index < pdata->gpio_i2c_nr; index++, iic++) {
+ ret = sm501_register_gpio_i2c_instance(sm, iic);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
/* sm501_dbg_regs
*
* Debug attribute to attach to parent device to show core registers
@@ -1013,6 +1298,7 @@
static int sm501_init_dev(struct sm501_devdata *sm)
{
struct sm501_initdata *idata;
+ struct sm501_platdata *pdata;
resource_size_t mem_avail;
unsigned long dramctrl;
unsigned long devid;
@@ -1051,7 +1337,9 @@
/* check to see if we have some device initialisation */
- idata = sm->platdata ? sm->platdata->init : NULL;
+ pdata = sm->platdata;
+ idata = pdata ? pdata->init : NULL;
+
if (idata) {
sm501_init_regs(sm, idata);
@@ -1059,6 +1347,15 @@
sm501_register_usbhost(sm, &mem_avail);
if (idata->devices & (SM501_USE_UART0 | SM501_USE_UART1))
sm501_register_uart(sm, idata->devices);
+ if (idata->devices & SM501_USE_GPIO)
+ sm501_register_gpio(sm);
+ }
+
+ if (pdata->gpio_i2c != NULL && pdata->gpio_i2c_nr > 0) {
+ if (!sm501_gpio_isregistered(sm))
+ dev_err(sm->dev, "no gpio available for i2c gpio.\n");
+ else
+ sm501_register_gpio_i2c(sm, pdata);
}
ret = sm501_check_clocks(sm);
@@ -1138,8 +1435,31 @@
}
#ifdef CONFIG_PM
+
/* power management support */
+static void sm501_set_power(struct sm501_devdata *sm, int on)
+{
+ struct sm501_platdata *pd = sm->platdata;
+
+ if (pd == NULL)
+ return;
+
+ if (pd->get_power) {
+ if (pd->get_power(sm->dev) == on) {
+ dev_dbg(sm->dev, "is already %d\n", on);
+ return;
+ }
+ }
+
+ if (pd->set_power) {
+ dev_dbg(sm->dev, "setting power to %d\n", on);
+
+ pd->set_power(sm->dev, on);
+ sm501_mdelay(sm, 10);
+ }
+}
+
static int sm501_plat_suspend(struct platform_device *pdev, pm_message_t state)
{
struct sm501_devdata *sm = platform_get_drvdata(pdev);
@@ -1148,6 +1468,12 @@
sm->pm_misc = readl(sm->regs + SM501_MISC_CONTROL);
sm501_dump_regs(sm);
+
+ if (sm->platdata) {
+ if (sm->platdata->flags & SM501_FLAG_SUSPEND_OFF)
+ sm501_set_power(sm, 0);
+ }
+
return 0;
}
@@ -1155,6 +1481,8 @@
{
struct sm501_devdata *sm = platform_get_drvdata(pdev);
+ sm501_set_power(sm, 1);
+
sm501_dump_regs(sm);
sm501_dump_gate(sm);
sm501_dump_clk(sm);
@@ -1229,6 +1557,7 @@
static struct sm501_platdata sm501_pci_platdata = {
.init = &sm501_pci_initdata,
.fb = &sm501_fb_pdata,
+ .gpio_base = -1,
};
static int sm501_pci_probe(struct pci_dev *dev,
@@ -1335,6 +1664,8 @@
sm501_remove_sub(sm, smdev);
device_remove_file(sm->dev, &dev_attr_dbg_regs);
+
+ sm501_gpio_remove(sm);
}
static void sm501_pci_remove(struct pci_dev *dev)
@@ -1378,6 +1709,8 @@
.remove = sm501_pci_remove,
};
+MODULE_ALIAS("platform:sm501");
+
static struct platform_driver sm501_plat_drv = {
.driver = {
.name = "sm501",
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index d5bc288b..321eb91 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -77,11 +77,13 @@
for your IBM server.
config PHANTOM
- tristate "Sensable PHANToM"
+ tristate "Sensable PHANToM (PCI)"
depends on PCI
help
Say Y here if you want to build a driver for Sensable PHANToM device.
+ This driver is only for PCI PHANToMs.
+
If you choose to build module, its name will be phantom. If unsure,
say N here.
@@ -212,6 +214,18 @@
This is a driver for the WMI extensions (wireless and bluetooth power
control) of the HP Compaq TC1100 tablet.
+config HP_WMI
+ tristate "HP WMI extras"
+ depends on ACPI_WMI
+ depends on INPUT
+ depends on RFKILL
+ help
+ Say Y here if you want to support WMI-based hotkeys on HP laptops and
+ to read data from WMI such as docking or ambient light sensor state.
+
+ To compile this driver as a module, choose M here: the module will
+ be called hp-wmi.
+
config MSI_LAPTOP
tristate "MSI Laptop Extras"
depends on X86
@@ -424,6 +438,7 @@
config HP_ILO
tristate "Channel interface driver for HP iLO/iLO2 processor"
+ depends on PCI
default n
help
The channel interface driver allows applications to communicate
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 688fe76..f5e2734 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -13,6 +13,7 @@
obj-$(CONFIG_ATMEL_PWM) += atmel_pwm.o
obj-$(CONFIG_ATMEL_SSC) += atmel-ssc.o
obj-$(CONFIG_ATMEL_TCLIB) += atmel_tclib.o
+obj-$(CONFIG_HP_WMI) += hp-wmi.o
obj-$(CONFIG_TC1100_WMI) += tc1100-wmi.o
obj-$(CONFIG_LKDTM) += lkdtm.o
obj-$(CONFIG_TIFM_CORE) += tifm_core.o
diff --git a/drivers/misc/hp-wmi.c b/drivers/misc/hp-wmi.c
new file mode 100644
index 0000000..1dbcbcb
--- /dev/null
+++ b/drivers/misc/hp-wmi.c
@@ -0,0 +1,494 @@
+/*
+ * HP WMI hotkeys
+ *
+ * Copyright (C) 2008 Red Hat <mjg@redhat.com>
+ *
+ * Portions based on wistron_btns.c:
+ * Copyright (C) 2005 Miloslav Trmac <mitr@volny.cz>
+ * Copyright (C) 2005 Bernhard Rosenkraenzer <bero@arklinux.org>
+ * Copyright (C) 2005 Dmitry Torokhov <dtor@mail.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/input.h>
+#include <acpi/acpi_drivers.h>
+#include <linux/platform_device.h>
+#include <linux/acpi.h>
+#include <linux/rfkill.h>
+#include <linux/string.h>
+
+MODULE_AUTHOR("Matthew Garrett <mjg59@srcf.ucam.org>");
+MODULE_DESCRIPTION("HP laptop WMI hotkeys driver");
+MODULE_LICENSE("GPL");
+
+MODULE_ALIAS("wmi:95F24279-4D7B-4334-9387-ACCDC67EF61C");
+MODULE_ALIAS("wmi:5FB7F034-2C63-45e9-BE91-3D44E2C707E4");
+
+#define HPWMI_EVENT_GUID "95F24279-4D7B-4334-9387-ACCDC67EF61C"
+#define HPWMI_BIOS_GUID "5FB7F034-2C63-45e9-BE91-3D44E2C707E4"
+
+#define HPWMI_DISPLAY_QUERY 0x1
+#define HPWMI_HDDTEMP_QUERY 0x2
+#define HPWMI_ALS_QUERY 0x3
+#define HPWMI_DOCK_QUERY 0x4
+#define HPWMI_WIRELESS_QUERY 0x5
+
+static int __init hp_wmi_bios_setup(struct platform_device *device);
+static int __exit hp_wmi_bios_remove(struct platform_device *device);
+
+struct bios_args {
+ u32 signature;
+ u32 command;
+ u32 commandtype;
+ u32 datasize;
+ u32 data;
+};
+
+struct bios_return {
+ u32 sigpass;
+ u32 return_code;
+ u32 value;
+};
+
+struct key_entry {
+ char type; /* See KE_* below */
+ u8 code;
+ u16 keycode;
+};
+
+enum { KE_KEY, KE_SW, KE_END };
+
+static struct key_entry hp_wmi_keymap[] = {
+ {KE_SW, 0x01, SW_DOCK},
+ {KE_KEY, 0x02, KEY_BRIGHTNESSUP},
+ {KE_KEY, 0x03, KEY_BRIGHTNESSDOWN},
+ {KE_KEY, 0x04, KEY_HELP},
+ {KE_END, 0}
+};
+
+static struct input_dev *hp_wmi_input_dev;
+static struct platform_device *hp_wmi_platform_dev;
+
+static struct rfkill *wifi_rfkill;
+static struct rfkill *bluetooth_rfkill;
+static struct rfkill *wwan_rfkill;
+
+static struct platform_driver hp_wmi_driver = {
+ .driver = {
+ .name = "hp-wmi",
+ .owner = THIS_MODULE,
+ },
+ .probe = hp_wmi_bios_setup,
+ .remove = hp_wmi_bios_remove,
+};
+
+static int hp_wmi_perform_query(int query, int write, int value)
+{
+ struct bios_return bios_return;
+ acpi_status status;
+ union acpi_object *obj;
+ struct bios_args args = {
+ .signature = 0x55434553,
+ .command = write ? 0x2 : 0x1,
+ .commandtype = query,
+ .datasize = write ? 0x4 : 0,
+ .data = value,
+ };
+ struct acpi_buffer input = { sizeof(struct bios_args), &args };
+ struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
+
+ status = wmi_evaluate_method(HPWMI_BIOS_GUID, 0, 0x3, &input, &output);
+
+ obj = output.pointer;
+
+ if (!obj || obj->type != ACPI_TYPE_BUFFER)
+ return -EINVAL;
+
+ bios_return = *((struct bios_return *)obj->buffer.pointer);
+ if (bios_return.return_code > 0)
+ return bios_return.return_code * -1;
+ else
+ return bios_return.value;
+}
+
+static int hp_wmi_display_state(void)
+{
+ return hp_wmi_perform_query(HPWMI_DISPLAY_QUERY, 0, 0);
+}
+
+static int hp_wmi_hddtemp_state(void)
+{
+ return hp_wmi_perform_query(HPWMI_HDDTEMP_QUERY, 0, 0);
+}
+
+static int hp_wmi_als_state(void)
+{
+ return hp_wmi_perform_query(HPWMI_ALS_QUERY, 0, 0);
+}
+
+static int hp_wmi_dock_state(void)
+{
+ return hp_wmi_perform_query(HPWMI_DOCK_QUERY, 0, 0);
+}
+
+static int hp_wmi_wifi_set(void *data, enum rfkill_state state)
+{
+ if (state)
+ return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x101);
+ else
+ return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x100);
+}
+
+static int hp_wmi_bluetooth_set(void *data, enum rfkill_state state)
+{
+ if (state)
+ return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x202);
+ else
+ return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x200);
+}
+
+static int hp_wmi_wwan_set(void *data, enum rfkill_state state)
+{
+ if (state)
+ return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x404);
+ else
+ return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x400);
+}
+
+static int hp_wmi_wifi_state(void)
+{
+ int wireless = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 0, 0);
+
+ if (wireless & 0x100)
+ return 1;
+ else
+ return 0;
+}
+
+static int hp_wmi_bluetooth_state(void)
+{
+ int wireless = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 0, 0);
+
+ if (wireless & 0x10000)
+ return 1;
+ else
+ return 0;
+}
+
+static int hp_wmi_wwan_state(void)
+{
+ int wireless = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 0, 0);
+
+ if (wireless & 0x1000000)
+ return 1;
+ else
+ return 0;
+}
+
+static ssize_t show_display(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ int value = hp_wmi_display_state();
+ if (value < 0)
+ return -EINVAL;
+ return sprintf(buf, "%d\n", value);
+}
+
+static ssize_t show_hddtemp(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ int value = hp_wmi_hddtemp_state();
+ if (value < 0)
+ return -EINVAL;
+ return sprintf(buf, "%d\n", value);
+}
+
+static ssize_t show_als(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ int value = hp_wmi_als_state();
+ if (value < 0)
+ return -EINVAL;
+ return sprintf(buf, "%d\n", value);
+}
+
+static ssize_t show_dock(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ int value = hp_wmi_dock_state();
+ if (value < 0)
+ return -EINVAL;
+ return sprintf(buf, "%d\n", value);
+}
+
+static ssize_t set_als(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ u32 tmp = simple_strtoul(buf, NULL, 10);
+ hp_wmi_perform_query(HPWMI_ALS_QUERY, 1, tmp);
+ return count;
+}
+
+static DEVICE_ATTR(display, S_IRUGO, show_display, NULL);
+static DEVICE_ATTR(hddtemp, S_IRUGO, show_hddtemp, NULL);
+static DEVICE_ATTR(als, S_IRUGO | S_IWUSR, show_als, set_als);
+static DEVICE_ATTR(dock, S_IRUGO, show_dock, NULL);
+
+static struct key_entry *hp_wmi_get_entry_by_scancode(int code)
+{
+ struct key_entry *key;
+
+ for (key = hp_wmi_keymap; key->type != KE_END; key++)
+ if (code == key->code)
+ return key;
+
+ return NULL;
+}
+
+static struct key_entry *hp_wmi_get_entry_by_keycode(int keycode)
+{
+ struct key_entry *key;
+
+ for (key = hp_wmi_keymap; key->type != KE_END; key++)
+ if (key->type == KE_KEY && keycode == key->keycode)
+ return key;
+
+ return NULL;
+}
+
+static int hp_wmi_getkeycode(struct input_dev *dev, int scancode, int *keycode)
+{
+ struct key_entry *key = hp_wmi_get_entry_by_scancode(scancode);
+
+ if (key && key->type == KE_KEY) {
+ *keycode = key->keycode;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int hp_wmi_setkeycode(struct input_dev *dev, int scancode, int keycode)
+{
+ struct key_entry *key;
+ int old_keycode;
+
+ if (keycode < 0 || keycode > KEY_MAX)
+ return -EINVAL;
+
+ key = hp_wmi_get_entry_by_scancode(scancode);
+ if (key && key->type == KE_KEY) {
+ old_keycode = key->keycode;
+ key->keycode = keycode;
+ set_bit(keycode, dev->keybit);
+ if (!hp_wmi_get_entry_by_keycode(old_keycode))
+ clear_bit(old_keycode, dev->keybit);
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+void hp_wmi_notify(u32 value, void *context)
+{
+ struct acpi_buffer response = { ACPI_ALLOCATE_BUFFER, NULL };
+ static struct key_entry *key;
+ union acpi_object *obj;
+
+ wmi_get_event_data(value, &response);
+
+ obj = (union acpi_object *)response.pointer;
+
+ if (obj && obj->type == ACPI_TYPE_BUFFER && obj->buffer.length == 8) {
+ int eventcode = *((u8 *) obj->buffer.pointer);
+ key = hp_wmi_get_entry_by_scancode(eventcode);
+ if (key) {
+ switch (key->type) {
+ case KE_KEY:
+ input_report_key(hp_wmi_input_dev,
+ key->keycode, 1);
+ input_sync(hp_wmi_input_dev);
+ input_report_key(hp_wmi_input_dev,
+ key->keycode, 0);
+ input_sync(hp_wmi_input_dev);
+ break;
+ case KE_SW:
+ input_report_switch(hp_wmi_input_dev,
+ key->keycode,
+ hp_wmi_dock_state());
+ input_sync(hp_wmi_input_dev);
+ break;
+ }
+ } else if (eventcode == 0x5) {
+ if (wifi_rfkill)
+ wifi_rfkill->state = hp_wmi_wifi_state();
+ if (bluetooth_rfkill)
+ bluetooth_rfkill->state =
+ hp_wmi_bluetooth_state();
+ if (wwan_rfkill)
+ wwan_rfkill->state = hp_wmi_wwan_state();
+ } else
+ printk(KERN_INFO "HP WMI: Unknown key pressed - %x\n",
+ eventcode);
+ } else
+ printk(KERN_INFO "HP WMI: Unknown response received\n");
+}
+
+static int __init hp_wmi_input_setup(void)
+{
+ struct key_entry *key;
+ int err;
+
+ hp_wmi_input_dev = input_allocate_device();
+
+ hp_wmi_input_dev->name = "HP WMI hotkeys";
+ hp_wmi_input_dev->phys = "wmi/input0";
+ hp_wmi_input_dev->id.bustype = BUS_HOST;
+ hp_wmi_input_dev->getkeycode = hp_wmi_getkeycode;
+ hp_wmi_input_dev->setkeycode = hp_wmi_setkeycode;
+
+ for (key = hp_wmi_keymap; key->type != KE_END; key++) {
+ switch (key->type) {
+ case KE_KEY:
+ set_bit(EV_KEY, hp_wmi_input_dev->evbit);
+ set_bit(key->keycode, hp_wmi_input_dev->keybit);
+ break;
+ case KE_SW:
+ set_bit(EV_SW, hp_wmi_input_dev->evbit);
+ set_bit(key->keycode, hp_wmi_input_dev->swbit);
+ break;
+ }
+ }
+
+ err = input_register_device(hp_wmi_input_dev);
+
+ if (err) {
+ input_free_device(hp_wmi_input_dev);
+ return err;
+ }
+
+ return 0;
+}
+
+static void cleanup_sysfs(struct platform_device *device)
+{
+ device_remove_file(&device->dev, &dev_attr_display);
+ device_remove_file(&device->dev, &dev_attr_hddtemp);
+ device_remove_file(&device->dev, &dev_attr_als);
+ device_remove_file(&device->dev, &dev_attr_dock);
+}
+
+static int __init hp_wmi_bios_setup(struct platform_device *device)
+{
+ int err;
+
+ err = device_create_file(&device->dev, &dev_attr_display);
+ if (err)
+ goto add_sysfs_error;
+ err = device_create_file(&device->dev, &dev_attr_hddtemp);
+ if (err)
+ goto add_sysfs_error;
+ err = device_create_file(&device->dev, &dev_attr_als);
+ if (err)
+ goto add_sysfs_error;
+ err = device_create_file(&device->dev, &dev_attr_dock);
+ if (err)
+ goto add_sysfs_error;
+
+ wifi_rfkill = rfkill_allocate(&device->dev, RFKILL_TYPE_WLAN);
+ wifi_rfkill->name = "hp-wifi";
+ wifi_rfkill->state = hp_wmi_wifi_state();
+ wifi_rfkill->toggle_radio = hp_wmi_wifi_set;
+ wifi_rfkill->user_claim_unsupported = 1;
+
+ bluetooth_rfkill = rfkill_allocate(&device->dev,
+ RFKILL_TYPE_BLUETOOTH);
+ bluetooth_rfkill->name = "hp-bluetooth";
+ bluetooth_rfkill->state = hp_wmi_bluetooth_state();
+ bluetooth_rfkill->toggle_radio = hp_wmi_bluetooth_set;
+ bluetooth_rfkill->user_claim_unsupported = 1;
+
+ wwan_rfkill = rfkill_allocate(&device->dev, RFKILL_TYPE_WIMAX);
+ wwan_rfkill->name = "hp-wwan";
+ wwan_rfkill->state = hp_wmi_wwan_state();
+ wwan_rfkill->toggle_radio = hp_wmi_wwan_set;
+ wwan_rfkill->user_claim_unsupported = 1;
+
+ rfkill_register(wifi_rfkill);
+ rfkill_register(bluetooth_rfkill);
+ rfkill_register(wwan_rfkill);
+
+ return 0;
+add_sysfs_error:
+ cleanup_sysfs(device);
+ return err;
+}
+
+static int __exit hp_wmi_bios_remove(struct platform_device *device)
+{
+ cleanup_sysfs(device);
+
+ rfkill_unregister(wifi_rfkill);
+ rfkill_unregister(bluetooth_rfkill);
+ rfkill_unregister(wwan_rfkill);
+
+ return 0;
+}
+
+static int __init hp_wmi_init(void)
+{
+ int err;
+
+ if (wmi_has_guid(HPWMI_EVENT_GUID)) {
+ err = wmi_install_notify_handler(HPWMI_EVENT_GUID,
+ hp_wmi_notify, NULL);
+ if (!err)
+ hp_wmi_input_setup();
+ }
+
+ if (wmi_has_guid(HPWMI_BIOS_GUID)) {
+ err = platform_driver_register(&hp_wmi_driver);
+ if (err)
+ return 0;
+ hp_wmi_platform_dev = platform_device_alloc("hp-wmi", -1);
+ if (!hp_wmi_platform_dev) {
+ platform_driver_unregister(&hp_wmi_driver);
+ return 0;
+ }
+ platform_device_add(hp_wmi_platform_dev);
+ }
+
+ return 0;
+}
+
+static void __exit hp_wmi_exit(void)
+{
+ if (wmi_has_guid(HPWMI_EVENT_GUID)) {
+ wmi_remove_notify_handler(HPWMI_EVENT_GUID);
+ input_unregister_device(hp_wmi_input_dev);
+ }
+ if (hp_wmi_platform_dev) {
+ platform_device_del(hp_wmi_platform_dev);
+ platform_driver_unregister(&hp_wmi_driver);
+ }
+}
+
+module_init(hp_wmi_init);
+module_exit(hp_wmi_exit);
diff --git a/drivers/misc/phantom.c b/drivers/misc/phantom.c
index 4ce3bdc..daf5856 100644
--- a/drivers/misc/phantom.c
+++ b/drivers/misc/phantom.c
@@ -563,6 +563,6 @@
module_exit(phantom_exit);
MODULE_AUTHOR("Jiri Slaby <jirislaby@gmail.com>");
-MODULE_DESCRIPTION("Sensable Phantom driver");
+MODULE_DESCRIPTION("Sensable Phantom driver (PCI devices)");
MODULE_LICENSE("GPL");
MODULE_VERSION(PHANTOM_VERSION);
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 961416a..c7630a2 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -51,14 +51,13 @@
* @name: MTD device name or number string
* @vid_hdr_offs: VID header offset
*/
-struct mtd_dev_param
-{
+struct mtd_dev_param {
char name[MTD_PARAM_LEN_MAX];
int vid_hdr_offs;
};
/* Numbers of elements set in the @mtd_dev_param array */
-static int mtd_devs = 0;
+static int mtd_devs;
/* MTD devices specification parameters */
static struct mtd_dev_param mtd_dev_param[UBI_MAX_DEVICES];
@@ -160,8 +159,7 @@
}
/**
- * ubi_get_by_major - get UBI device description object by character device
- * major number.
+ * ubi_get_by_major - get UBI device by character device major number.
* @major: major number
*
* This function is similar to 'ubi_get_device()', but it searches the device
@@ -355,15 +353,34 @@
}
/**
+ * free_user_volumes - free all user volumes.
+ * @ubi: UBI device description object
+ *
+ * Normally the volumes are freed at the release function of the volume device
+ * objects. However, on error paths the volumes have to be freed before the
+ * device objects have been initialized.
+ */
+static void free_user_volumes(struct ubi_device *ubi)
+{
+ int i;
+
+ for (i = 0; i < ubi->vtbl_slots; i++)
+ if (ubi->volumes[i]) {
+ kfree(ubi->volumes[i]->eba_tbl);
+ kfree(ubi->volumes[i]);
+ }
+}
+
+/**
* uif_init - initialize user interfaces for an UBI device.
* @ubi: UBI device description object
*
* This function returns zero in case of success and a negative error code in
- * case of failure.
+ * case of failure. Note, this function destroys all volumes if it failes.
*/
static int uif_init(struct ubi_device *ubi)
{
- int i, err;
+ int i, err, do_free = 0;
dev_t dev;
sprintf(ubi->ubi_name, UBI_NAME_STR "%d", ubi->ubi_num);
@@ -384,7 +401,7 @@
ubi_assert(MINOR(dev) == 0);
cdev_init(&ubi->cdev, &ubi_cdev_operations);
- dbg_msg("%s major is %u", ubi->ubi_name, MAJOR(dev));
+ dbg_gen("%s major is %u", ubi->ubi_name, MAJOR(dev));
ubi->cdev.owner = THIS_MODULE;
err = cdev_add(&ubi->cdev, dev, 1);
@@ -410,10 +427,13 @@
out_volumes:
kill_volumes(ubi);
+ do_free = 0;
out_sysfs:
ubi_sysfs_close(ubi);
cdev_del(&ubi->cdev);
out_unreg:
+ if (do_free)
+ free_user_volumes(ubi);
unregister_chrdev_region(ubi->cdev.dev, ubi->vtbl_slots + 1);
ubi_err("cannot initialize UBI %s, error %d", ubi->ubi_name, err);
return err;
@@ -422,6 +442,10 @@
/**
* uif_close - close user interfaces for an UBI device.
* @ubi: UBI device description object
+ *
+ * Note, since this function un-registers UBI volume device objects (@vol->dev),
+ * the memory allocated voe the volumes is freed as well (in the release
+ * function).
*/
static void uif_close(struct ubi_device *ubi)
{
@@ -432,6 +456,21 @@
}
/**
+ * free_internal_volumes - free internal volumes.
+ * @ubi: UBI device description object
+ */
+static void free_internal_volumes(struct ubi_device *ubi)
+{
+ int i;
+
+ for (i = ubi->vtbl_slots;
+ i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) {
+ kfree(ubi->volumes[i]->eba_tbl);
+ kfree(ubi->volumes[i]);
+ }
+}
+
+/**
* attach_by_scanning - attach an MTD device using scanning method.
* @ubi: UBI device descriptor
*
@@ -475,6 +514,7 @@
out_wl:
ubi_wl_close(ubi);
out_vtbl:
+ free_internal_volumes(ubi);
vfree(ubi->vtbl);
out_si:
ubi_scan_destroy_si(si);
@@ -482,7 +522,7 @@
}
/**
- * io_init - initialize I/O unit for a given UBI device.
+ * io_init - initialize I/O sub-system for a given UBI device.
* @ubi: UBI device description object
*
* If @ubi->vid_hdr_offset or @ubi->leb_start is zero, default offsets are
@@ -530,7 +570,11 @@
ubi->min_io_size = ubi->mtd->writesize;
ubi->hdrs_min_io_size = ubi->mtd->writesize >> ubi->mtd->subpage_sft;
- /* Make sure minimal I/O unit is power of 2 */
+ /*
+ * Make sure minimal I/O unit is power of 2. Note, there is no
+ * fundamental reason for this assumption. It is just an optimization
+ * which allows us to avoid costly division operations.
+ */
if (!is_power_of_2(ubi->min_io_size)) {
ubi_err("min. I/O unit (%d) is not power of 2",
ubi->min_io_size);
@@ -581,7 +625,7 @@
if (ubi->vid_hdr_offset < UBI_EC_HDR_SIZE ||
ubi->leb_start < ubi->vid_hdr_offset + UBI_VID_HDR_SIZE ||
ubi->leb_start > ubi->peb_size - UBI_VID_HDR_SIZE ||
- ubi->leb_start % ubi->min_io_size) {
+ ubi->leb_start & (ubi->min_io_size - 1)) {
ubi_err("bad VID header (%d) or data offsets (%d)",
ubi->vid_hdr_offset, ubi->leb_start);
return -EINVAL;
@@ -646,7 +690,7 @@
/*
* Clear the auto-resize flag in the volume in-memory copy of the
- * volume table, and 'ubi_resize_volume()' will propogate this change
+ * volume table, and 'ubi_resize_volume()' will propagate this change
* to the flash.
*/
ubi->vtbl[vol_id].flags &= ~UBI_VTBL_AUTORESIZE_FLG;
@@ -655,7 +699,7 @@
struct ubi_vtbl_record vtbl_rec;
/*
- * No avalilable PEBs to re-size the volume, clear the flag on
+ * No available PEBs to re-size the volume, clear the flag on
* flash and exit.
*/
memcpy(&vtbl_rec, &ubi->vtbl[vol_id],
@@ -682,13 +726,13 @@
/**
* ubi_attach_mtd_dev - attach an MTD device.
- * @mtd_dev: MTD device description object
+ * @mtd: MTD device description object
* @ubi_num: number to assign to the new UBI device
* @vid_hdr_offset: VID header offset
*
* This function attaches MTD device @mtd_dev to UBI and assign @ubi_num number
* to the newly created UBI device, unless @ubi_num is %UBI_DEV_NUM_AUTO, in
- * which case this function finds a vacant device nubert and assings it
+ * which case this function finds a vacant device number and assigns it
* automatically. Returns the new UBI device number in case of success and a
* negative error code in case of failure.
*
@@ -698,7 +742,7 @@
int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
{
struct ubi_device *ubi;
- int i, err;
+ int i, err, do_free = 1;
/*
* Check if we already have the same MTD device attached.
@@ -735,7 +779,8 @@
if (!ubi_devices[ubi_num])
break;
if (ubi_num == UBI_MAX_DEVICES) {
- dbg_err("only %d UBI devices may be created", UBI_MAX_DEVICES);
+ dbg_err("only %d UBI devices may be created",
+ UBI_MAX_DEVICES);
return -ENFILE;
}
} else {
@@ -760,6 +805,7 @@
mutex_init(&ubi->buf_mutex);
mutex_init(&ubi->ckvol_mutex);
+ mutex_init(&ubi->mult_mutex);
mutex_init(&ubi->volumes_mutex);
spin_lock_init(&ubi->volumes_lock);
@@ -798,7 +844,7 @@
err = uif_init(ubi);
if (err)
- goto out_detach;
+ goto out_nofree;
ubi->bgt_thread = kthread_create(ubi_thread, ubi, ubi->bgt_name);
if (IS_ERR(ubi->bgt_thread)) {
@@ -824,20 +870,22 @@
ubi->beb_rsvd_pebs);
ubi_msg("max/mean erase counter: %d/%d", ubi->max_ec, ubi->mean_ec);
- /* Enable the background thread */
- if (!DBG_DISABLE_BGT) {
+ if (!DBG_DISABLE_BGT)
ubi->thread_enabled = 1;
- wake_up_process(ubi->bgt_thread);
- }
+ wake_up_process(ubi->bgt_thread);
ubi_devices[ubi_num] = ubi;
return ubi_num;
out_uif:
uif_close(ubi);
+out_nofree:
+ do_free = 0;
out_detach:
- ubi_eba_close(ubi);
ubi_wl_close(ubi);
+ if (do_free)
+ free_user_volumes(ubi);
+ free_internal_volumes(ubi);
vfree(ubi->vtbl);
out_free:
vfree(ubi->peb_buf1);
@@ -899,8 +947,8 @@
kthread_stop(ubi->bgt_thread);
uif_close(ubi);
- ubi_eba_close(ubi);
ubi_wl_close(ubi);
+ free_internal_volumes(ubi);
vfree(ubi->vtbl);
put_mtd_device(ubi->mtd);
vfree(ubi->peb_buf1);
@@ -1044,8 +1092,7 @@
module_exit(ubi_exit);
/**
- * bytes_str_to_int - convert a string representing number of bytes to an
- * integer.
+ * bytes_str_to_int - convert a number of bytes string into an integer.
* @str: the string to convert
*
* This function returns positive resulting integer in case of success and a
diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
index 89193ba..03c759b 100644
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c
@@ -39,9 +39,9 @@
#include <linux/stat.h>
#include <linux/ioctl.h>
#include <linux/capability.h>
+#include <linux/uaccess.h>
#include <linux/smp_lock.h>
#include <mtd/ubi-user.h>
-#include <asm/uaccess.h>
#include <asm/div64.h>
#include "ubi.h"
@@ -116,7 +116,7 @@
else
mode = UBI_READONLY;
- dbg_msg("open volume %d, mode %d", vol_id, mode);
+ dbg_gen("open volume %d, mode %d", vol_id, mode);
desc = ubi_open_volume(ubi_num, vol_id, mode);
unlock_kernel();
@@ -132,7 +132,7 @@
struct ubi_volume_desc *desc = file->private_data;
struct ubi_volume *vol = desc->vol;
- dbg_msg("release volume %d, mode %d", vol->vol_id, desc->mode);
+ dbg_gen("release volume %d, mode %d", vol->vol_id, desc->mode);
if (vol->updating) {
ubi_warn("update of volume %d not finished, volume is damaged",
@@ -141,7 +141,7 @@
vol->updating = 0;
vfree(vol->upd_buf);
} else if (vol->changing_leb) {
- dbg_msg("only %lld of %lld bytes received for atomic LEB change"
+ dbg_gen("only %lld of %lld bytes received for atomic LEB change"
" for volume %d:%d, cancel", vol->upd_received,
vol->upd_bytes, vol->ubi->ubi_num, vol->vol_id);
vol->changing_leb = 0;
@@ -183,7 +183,7 @@
return -EINVAL;
}
- dbg_msg("seek volume %d, offset %lld, origin %d, new offset %lld",
+ dbg_gen("seek volume %d, offset %lld, origin %d, new offset %lld",
vol->vol_id, offset, origin, new_offset);
file->f_pos = new_offset;
@@ -201,7 +201,7 @@
void *tbuf;
uint64_t tmp;
- dbg_msg("read %zd bytes from offset %lld of volume %d",
+ dbg_gen("read %zd bytes from offset %lld of volume %d",
count, *offp, vol->vol_id);
if (vol->updating) {
@@ -216,7 +216,7 @@
return 0;
if (vol->corrupted)
- dbg_msg("read from corrupted volume %d", vol->vol_id);
+ dbg_gen("read from corrupted volume %d", vol->vol_id);
if (*offp + count > vol->used_bytes)
count_save = count = vol->used_bytes - *offp;
@@ -285,7 +285,7 @@
char *tbuf;
uint64_t tmp;
- dbg_msg("requested: write %zd bytes to offset %lld of volume %u",
+ dbg_gen("requested: write %zd bytes to offset %lld of volume %u",
count, *offp, vol->vol_id);
if (vol->vol_type == UBI_STATIC_VOLUME)
@@ -295,7 +295,7 @@
off = do_div(tmp, vol->usable_leb_size);
lnum = tmp;
- if (off % ubi->min_io_size) {
+ if (off & (ubi->min_io_size - 1)) {
dbg_err("unaligned position");
return -EINVAL;
}
@@ -304,7 +304,7 @@
count_save = count = vol->used_bytes - *offp;
/* We can write only in fractions of the minimum I/O unit */
- if (count % ubi->min_io_size) {
+ if (count & (ubi->min_io_size - 1)) {
dbg_err("unaligned write length");
return -EINVAL;
}
@@ -352,7 +352,7 @@
}
#else
-#define vol_cdev_direct_write(file, buf, count, offp) -EPERM
+#define vol_cdev_direct_write(file, buf, count, offp) (-EPERM)
#endif /* CONFIG_MTD_UBI_DEBUG_USERSPACE_IO */
static ssize_t vol_cdev_write(struct file *file, const char __user *buf,
@@ -437,7 +437,8 @@
break;
}
- rsvd_bytes = vol->reserved_pebs * (ubi->leb_size-vol->data_pad);
+ rsvd_bytes = (long long)vol->reserved_pebs *
+ ubi->leb_size-vol->data_pad;
if (bytes < 0 || bytes > rsvd_bytes) {
err = -EINVAL;
break;
@@ -513,7 +514,7 @@
break;
}
- dbg_msg("erase LEB %d:%d", vol->vol_id, lnum);
+ dbg_gen("erase LEB %d:%d", vol->vol_id, lnum);
err = ubi_eba_unmap_leb(ubi, vol, lnum);
if (err)
break;
@@ -564,7 +565,7 @@
if (req->alignment > ubi->leb_size)
goto bad;
- n = req->alignment % ubi->min_io_size;
+ n = req->alignment & (ubi->min_io_size - 1);
if (req->alignment != 1 && n)
goto bad;
@@ -573,6 +574,10 @@
goto bad;
}
+ n = strnlen(req->name, req->name_len + 1);
+ if (n != req->name_len)
+ goto bad;
+
return 0;
bad:
@@ -600,6 +605,166 @@
return 0;
}
+/**
+ * rename_volumes - rename UBI volumes.
+ * @ubi: UBI device description object
+ * @req: volumes re-name request
+ *
+ * This is a helper function for the volume re-name IOCTL which validates the
+ * the request, opens the volume and calls corresponding volumes management
+ * function. Returns zero in case of success and a negative error code in case
+ * of failure.
+ */
+static int rename_volumes(struct ubi_device *ubi,
+ struct ubi_rnvol_req *req)
+{
+ int i, n, err;
+ struct list_head rename_list;
+ struct ubi_rename_entry *re, *re1;
+
+ if (req->count < 0 || req->count > UBI_MAX_RNVOL)
+ return -EINVAL;
+
+ if (req->count == 0)
+ return 0;
+
+ /* Validate volume IDs and names in the request */
+ for (i = 0; i < req->count; i++) {
+ if (req->ents[i].vol_id < 0 ||
+ req->ents[i].vol_id >= ubi->vtbl_slots)
+ return -EINVAL;
+ if (req->ents[i].name_len < 0)
+ return -EINVAL;
+ if (req->ents[i].name_len > UBI_VOL_NAME_MAX)
+ return -ENAMETOOLONG;
+ req->ents[i].name[req->ents[i].name_len] = '\0';
+ n = strlen(req->ents[i].name);
+ if (n != req->ents[i].name_len)
+ err = -EINVAL;
+ }
+
+ /* Make sure volume IDs and names are unique */
+ for (i = 0; i < req->count - 1; i++) {
+ for (n = i + 1; n < req->count; n++) {
+ if (req->ents[i].vol_id == req->ents[n].vol_id) {
+ dbg_err("duplicated volume id %d",
+ req->ents[i].vol_id);
+ return -EINVAL;
+ }
+ if (!strcmp(req->ents[i].name, req->ents[n].name)) {
+ dbg_err("duplicated volume name \"%s\"",
+ req->ents[i].name);
+ return -EINVAL;
+ }
+ }
+ }
+
+ /* Create the re-name list */
+ INIT_LIST_HEAD(&rename_list);
+ for (i = 0; i < req->count; i++) {
+ int vol_id = req->ents[i].vol_id;
+ int name_len = req->ents[i].name_len;
+ const char *name = req->ents[i].name;
+
+ re = kzalloc(sizeof(struct ubi_rename_entry), GFP_KERNEL);
+ if (!re) {
+ err = -ENOMEM;
+ goto out_free;
+ }
+
+ re->desc = ubi_open_volume(ubi->ubi_num, vol_id, UBI_EXCLUSIVE);
+ if (IS_ERR(re->desc)) {
+ err = PTR_ERR(re->desc);
+ dbg_err("cannot open volume %d, error %d", vol_id, err);
+ kfree(re);
+ goto out_free;
+ }
+
+ /* Skip this re-naming if the name does not really change */
+ if (re->desc->vol->name_len == name_len &&
+ !memcmp(re->desc->vol->name, name, name_len)) {
+ ubi_close_volume(re->desc);
+ kfree(re);
+ continue;
+ }
+
+ re->new_name_len = name_len;
+ memcpy(re->new_name, name, name_len);
+ list_add_tail(&re->list, &rename_list);
+ dbg_msg("will rename volume %d from \"%s\" to \"%s\"",
+ vol_id, re->desc->vol->name, name);
+ }
+
+ if (list_empty(&rename_list))
+ return 0;
+
+ /* Find out the volumes which have to be removed */
+ list_for_each_entry(re, &rename_list, list) {
+ struct ubi_volume_desc *desc;
+ int no_remove_needed = 0;
+
+ /*
+ * Volume @re->vol_id is going to be re-named to
+ * @re->new_name, while its current name is @name. If a volume
+ * with name @re->new_name currently exists, it has to be
+ * removed, unless it is also re-named in the request (@req).
+ */
+ list_for_each_entry(re1, &rename_list, list) {
+ if (re->new_name_len == re1->desc->vol->name_len &&
+ !memcmp(re->new_name, re1->desc->vol->name,
+ re1->desc->vol->name_len)) {
+ no_remove_needed = 1;
+ break;
+ }
+ }
+
+ if (no_remove_needed)
+ continue;
+
+ /*
+ * It seems we need to remove volume with name @re->new_name,
+ * if it exists.
+ */
+ desc = ubi_open_volume_nm(ubi->ubi_num, re->new_name, UBI_EXCLUSIVE);
+ if (IS_ERR(desc)) {
+ err = PTR_ERR(desc);
+ if (err == -ENODEV)
+ /* Re-naming into a non-existing volume name */
+ continue;
+
+ /* The volume exists but busy, or an error occurred */
+ dbg_err("cannot open volume \"%s\", error %d",
+ re->new_name, err);
+ goto out_free;
+ }
+
+ re = kzalloc(sizeof(struct ubi_rename_entry), GFP_KERNEL);
+ if (!re) {
+ err = -ENOMEM;
+ ubi_close_volume(desc);
+ goto out_free;
+ }
+
+ re->remove = 1;
+ re->desc = desc;
+ list_add(&re->list, &rename_list);
+ dbg_msg("will remove volume %d, name \"%s\"",
+ re->desc->vol->vol_id, re->desc->vol->name);
+ }
+
+ mutex_lock(&ubi->volumes_mutex);
+ err = ubi_rename_volumes(ubi, &rename_list);
+ mutex_unlock(&ubi->volumes_mutex);
+
+out_free:
+ list_for_each_entry_safe(re, re1, &rename_list, list) {
+ ubi_close_volume(re->desc);
+ list_del(&re->list);
+ kfree(re);
+ }
+ return err;
+}
+
static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
unsigned int cmd, unsigned long arg)
{
@@ -621,19 +786,18 @@
{
struct ubi_mkvol_req req;
- dbg_msg("create volume");
+ dbg_gen("create volume");
err = copy_from_user(&req, argp, sizeof(struct ubi_mkvol_req));
if (err) {
err = -EFAULT;
break;
}
+ req.name[req.name_len] = '\0';
err = verify_mkvol_req(ubi, &req);
if (err)
break;
- req.name[req.name_len] = '\0';
-
mutex_lock(&ubi->volumes_mutex);
err = ubi_create_volume(ubi, &req);
mutex_unlock(&ubi->volumes_mutex);
@@ -652,7 +816,7 @@
{
int vol_id;
- dbg_msg("remove volume");
+ dbg_gen("remove volume");
err = get_user(vol_id, (__user int32_t *)argp);
if (err) {
err = -EFAULT;
@@ -666,7 +830,7 @@
}
mutex_lock(&ubi->volumes_mutex);
- err = ubi_remove_volume(desc);
+ err = ubi_remove_volume(desc, 0);
mutex_unlock(&ubi->volumes_mutex);
/*
@@ -685,7 +849,7 @@
uint64_t tmp;
struct ubi_rsvol_req req;
- dbg_msg("re-size volume");
+ dbg_gen("re-size volume");
err = copy_from_user(&req, argp, sizeof(struct ubi_rsvol_req));
if (err) {
err = -EFAULT;
@@ -713,6 +877,32 @@
break;
}
+ /* Re-name volumes command */
+ case UBI_IOCRNVOL:
+ {
+ struct ubi_rnvol_req *req;
+
+ dbg_msg("re-name volumes");
+ req = kmalloc(sizeof(struct ubi_rnvol_req), GFP_KERNEL);
+ if (!req) {
+ err = -ENOMEM;
+ break;
+ };
+
+ err = copy_from_user(req, argp, sizeof(struct ubi_rnvol_req));
+ if (err) {
+ err = -EFAULT;
+ kfree(req);
+ break;
+ }
+
+ mutex_lock(&ubi->mult_mutex);
+ err = rename_volumes(ubi, req);
+ mutex_unlock(&ubi->mult_mutex);
+ kfree(req);
+ break;
+ }
+
default:
err = -ENOTTY;
break;
@@ -738,7 +928,7 @@
struct ubi_attach_req req;
struct mtd_info *mtd;
- dbg_msg("attach MTD device");
+ dbg_gen("attach MTD device");
err = copy_from_user(&req, argp, sizeof(struct ubi_attach_req));
if (err) {
err = -EFAULT;
@@ -778,7 +968,7 @@
{
int ubi_num;
- dbg_msg("dettach MTD device");
+ dbg_gen("dettach MTD device");
err = get_user(ubi_num, (__user int32_t *)argp);
if (err) {
err = -EFAULT;
diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c
index 56956ec..c0ed60e 100644
--- a/drivers/mtd/ubi/debug.c
+++ b/drivers/mtd/ubi/debug.c
@@ -24,7 +24,7 @@
* changes.
*/
-#ifdef CONFIG_MTD_UBI_DEBUG_MSG
+#ifdef CONFIG_MTD_UBI_DEBUG
#include "ubi.h"
@@ -34,14 +34,19 @@
*/
void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr)
{
- dbg_msg("erase counter header dump:");
- dbg_msg("magic %#08x", be32_to_cpu(ec_hdr->magic));
- dbg_msg("version %d", (int)ec_hdr->version);
- dbg_msg("ec %llu", (long long)be64_to_cpu(ec_hdr->ec));
- dbg_msg("vid_hdr_offset %d", be32_to_cpu(ec_hdr->vid_hdr_offset));
- dbg_msg("data_offset %d", be32_to_cpu(ec_hdr->data_offset));
- dbg_msg("hdr_crc %#08x", be32_to_cpu(ec_hdr->hdr_crc));
- dbg_msg("erase counter header hexdump:");
+ printk(KERN_DEBUG "Erase counter header dump:\n");
+ printk(KERN_DEBUG "\tmagic %#08x\n",
+ be32_to_cpu(ec_hdr->magic));
+ printk(KERN_DEBUG "\tversion %d\n", (int)ec_hdr->version);
+ printk(KERN_DEBUG "\tec %llu\n",
+ (long long)be64_to_cpu(ec_hdr->ec));
+ printk(KERN_DEBUG "\tvid_hdr_offset %d\n",
+ be32_to_cpu(ec_hdr->vid_hdr_offset));
+ printk(KERN_DEBUG "\tdata_offset %d\n",
+ be32_to_cpu(ec_hdr->data_offset));
+ printk(KERN_DEBUG "\thdr_crc %#08x\n",
+ be32_to_cpu(ec_hdr->hdr_crc));
+ printk(KERN_DEBUG "erase counter header hexdump:\n");
print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
ec_hdr, UBI_EC_HDR_SIZE, 1);
}
@@ -52,22 +57,23 @@
*/
void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr)
{
- dbg_msg("volume identifier header dump:");
- dbg_msg("magic %08x", be32_to_cpu(vid_hdr->magic));
- dbg_msg("version %d", (int)vid_hdr->version);
- dbg_msg("vol_type %d", (int)vid_hdr->vol_type);
- dbg_msg("copy_flag %d", (int)vid_hdr->copy_flag);
- dbg_msg("compat %d", (int)vid_hdr->compat);
- dbg_msg("vol_id %d", be32_to_cpu(vid_hdr->vol_id));
- dbg_msg("lnum %d", be32_to_cpu(vid_hdr->lnum));
- dbg_msg("leb_ver %u", be32_to_cpu(vid_hdr->leb_ver));
- dbg_msg("data_size %d", be32_to_cpu(vid_hdr->data_size));
- dbg_msg("used_ebs %d", be32_to_cpu(vid_hdr->used_ebs));
- dbg_msg("data_pad %d", be32_to_cpu(vid_hdr->data_pad));
- dbg_msg("sqnum %llu",
+ printk(KERN_DEBUG "Volume identifier header dump:\n");
+ printk(KERN_DEBUG "\tmagic %08x\n", be32_to_cpu(vid_hdr->magic));
+ printk(KERN_DEBUG "\tversion %d\n", (int)vid_hdr->version);
+ printk(KERN_DEBUG "\tvol_type %d\n", (int)vid_hdr->vol_type);
+ printk(KERN_DEBUG "\tcopy_flag %d\n", (int)vid_hdr->copy_flag);
+ printk(KERN_DEBUG "\tcompat %d\n", (int)vid_hdr->compat);
+ printk(KERN_DEBUG "\tvol_id %d\n", be32_to_cpu(vid_hdr->vol_id));
+ printk(KERN_DEBUG "\tlnum %d\n", be32_to_cpu(vid_hdr->lnum));
+ printk(KERN_DEBUG "\tdata_size %d\n", be32_to_cpu(vid_hdr->data_size));
+ printk(KERN_DEBUG "\tused_ebs %d\n", be32_to_cpu(vid_hdr->used_ebs));
+ printk(KERN_DEBUG "\tdata_pad %d\n", be32_to_cpu(vid_hdr->data_pad));
+ printk(KERN_DEBUG "\tsqnum %llu\n",
(unsigned long long)be64_to_cpu(vid_hdr->sqnum));
- dbg_msg("hdr_crc %08x", be32_to_cpu(vid_hdr->hdr_crc));
- dbg_msg("volume identifier header hexdump:");
+ printk(KERN_DEBUG "\thdr_crc %08x\n", be32_to_cpu(vid_hdr->hdr_crc));
+ printk(KERN_DEBUG "Volume identifier header hexdump:\n");
+ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
+ vid_hdr, UBI_VID_HDR_SIZE, 1);
}
/**
@@ -76,27 +82,27 @@
*/
void ubi_dbg_dump_vol_info(const struct ubi_volume *vol)
{
- dbg_msg("volume information dump:");
- dbg_msg("vol_id %d", vol->vol_id);
- dbg_msg("reserved_pebs %d", vol->reserved_pebs);
- dbg_msg("alignment %d", vol->alignment);
- dbg_msg("data_pad %d", vol->data_pad);
- dbg_msg("vol_type %d", vol->vol_type);
- dbg_msg("name_len %d", vol->name_len);
- dbg_msg("usable_leb_size %d", vol->usable_leb_size);
- dbg_msg("used_ebs %d", vol->used_ebs);
- dbg_msg("used_bytes %lld", vol->used_bytes);
- dbg_msg("last_eb_bytes %d", vol->last_eb_bytes);
- dbg_msg("corrupted %d", vol->corrupted);
- dbg_msg("upd_marker %d", vol->upd_marker);
+ printk(KERN_DEBUG "Volume information dump:\n");
+ printk(KERN_DEBUG "\tvol_id %d\n", vol->vol_id);
+ printk(KERN_DEBUG "\treserved_pebs %d\n", vol->reserved_pebs);
+ printk(KERN_DEBUG "\talignment %d\n", vol->alignment);
+ printk(KERN_DEBUG "\tdata_pad %d\n", vol->data_pad);
+ printk(KERN_DEBUG "\tvol_type %d\n", vol->vol_type);
+ printk(KERN_DEBUG "\tname_len %d\n", vol->name_len);
+ printk(KERN_DEBUG "\tusable_leb_size %d\n", vol->usable_leb_size);
+ printk(KERN_DEBUG "\tused_ebs %d\n", vol->used_ebs);
+ printk(KERN_DEBUG "\tused_bytes %lld\n", vol->used_bytes);
+ printk(KERN_DEBUG "\tlast_eb_bytes %d\n", vol->last_eb_bytes);
+ printk(KERN_DEBUG "\tcorrupted %d\n", vol->corrupted);
+ printk(KERN_DEBUG "\tupd_marker %d\n", vol->upd_marker);
if (vol->name_len <= UBI_VOL_NAME_MAX &&
strnlen(vol->name, vol->name_len + 1) == vol->name_len) {
- dbg_msg("name %s", vol->name);
+ printk(KERN_DEBUG "\tname %s\n", vol->name);
} else {
- dbg_msg("the 1st 5 characters of the name: %c%c%c%c%c",
- vol->name[0], vol->name[1], vol->name[2],
- vol->name[3], vol->name[4]);
+ printk(KERN_DEBUG "\t1st 5 characters of name: %c%c%c%c%c\n",
+ vol->name[0], vol->name[1], vol->name[2],
+ vol->name[3], vol->name[4]);
}
}
@@ -109,28 +115,29 @@
{
int name_len = be16_to_cpu(r->name_len);
- dbg_msg("volume table record %d dump:", idx);
- dbg_msg("reserved_pebs %d", be32_to_cpu(r->reserved_pebs));
- dbg_msg("alignment %d", be32_to_cpu(r->alignment));
- dbg_msg("data_pad %d", be32_to_cpu(r->data_pad));
- dbg_msg("vol_type %d", (int)r->vol_type);
- dbg_msg("upd_marker %d", (int)r->upd_marker);
- dbg_msg("name_len %d", name_len);
+ printk(KERN_DEBUG "Volume table record %d dump:\n", idx);
+ printk(KERN_DEBUG "\treserved_pebs %d\n",
+ be32_to_cpu(r->reserved_pebs));
+ printk(KERN_DEBUG "\talignment %d\n", be32_to_cpu(r->alignment));
+ printk(KERN_DEBUG "\tdata_pad %d\n", be32_to_cpu(r->data_pad));
+ printk(KERN_DEBUG "\tvol_type %d\n", (int)r->vol_type);
+ printk(KERN_DEBUG "\tupd_marker %d\n", (int)r->upd_marker);
+ printk(KERN_DEBUG "\tname_len %d\n", name_len);
if (r->name[0] == '\0') {
- dbg_msg("name NULL");
+ printk(KERN_DEBUG "\tname NULL\n");
return;
}
if (name_len <= UBI_VOL_NAME_MAX &&
strnlen(&r->name[0], name_len + 1) == name_len) {
- dbg_msg("name %s", &r->name[0]);
+ printk(KERN_DEBUG "\tname %s\n", &r->name[0]);
} else {
- dbg_msg("1st 5 characters of the name: %c%c%c%c%c",
+ printk(KERN_DEBUG "\t1st 5 characters of name: %c%c%c%c%c\n",
r->name[0], r->name[1], r->name[2], r->name[3],
r->name[4]);
}
- dbg_msg("crc %#08x", be32_to_cpu(r->crc));
+ printk(KERN_DEBUG "\tcrc %#08x\n", be32_to_cpu(r->crc));
}
/**
@@ -139,15 +146,15 @@
*/
void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv)
{
- dbg_msg("volume scanning information dump:");
- dbg_msg("vol_id %d", sv->vol_id);
- dbg_msg("highest_lnum %d", sv->highest_lnum);
- dbg_msg("leb_count %d", sv->leb_count);
- dbg_msg("compat %d", sv->compat);
- dbg_msg("vol_type %d", sv->vol_type);
- dbg_msg("used_ebs %d", sv->used_ebs);
- dbg_msg("last_data_size %d", sv->last_data_size);
- dbg_msg("data_pad %d", sv->data_pad);
+ printk(KERN_DEBUG "Volume scanning information dump:\n");
+ printk(KERN_DEBUG "\tvol_id %d\n", sv->vol_id);
+ printk(KERN_DEBUG "\thighest_lnum %d\n", sv->highest_lnum);
+ printk(KERN_DEBUG "\tleb_count %d\n", sv->leb_count);
+ printk(KERN_DEBUG "\tcompat %d\n", sv->compat);
+ printk(KERN_DEBUG "\tvol_type %d\n", sv->vol_type);
+ printk(KERN_DEBUG "\tused_ebs %d\n", sv->used_ebs);
+ printk(KERN_DEBUG "\tlast_data_size %d\n", sv->last_data_size);
+ printk(KERN_DEBUG "\tdata_pad %d\n", sv->data_pad);
}
/**
@@ -157,14 +164,13 @@
*/
void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type)
{
- dbg_msg("eraseblock scanning information dump:");
- dbg_msg("ec %d", seb->ec);
- dbg_msg("pnum %d", seb->pnum);
+ printk(KERN_DEBUG "eraseblock scanning information dump:\n");
+ printk(KERN_DEBUG "\tec %d\n", seb->ec);
+ printk(KERN_DEBUG "\tpnum %d\n", seb->pnum);
if (type == 0) {
- dbg_msg("lnum %d", seb->lnum);
- dbg_msg("scrub %d", seb->scrub);
- dbg_msg("sqnum %llu", seb->sqnum);
- dbg_msg("leb_ver %u", seb->leb_ver);
+ printk(KERN_DEBUG "\tlnum %d\n", seb->lnum);
+ printk(KERN_DEBUG "\tscrub %d\n", seb->scrub);
+ printk(KERN_DEBUG "\tsqnum %llu\n", seb->sqnum);
}
}
@@ -176,16 +182,16 @@
{
char nm[17];
- dbg_msg("volume creation request dump:");
- dbg_msg("vol_id %d", req->vol_id);
- dbg_msg("alignment %d", req->alignment);
- dbg_msg("bytes %lld", (long long)req->bytes);
- dbg_msg("vol_type %d", req->vol_type);
- dbg_msg("name_len %d", req->name_len);
+ printk(KERN_DEBUG "Volume creation request dump:\n");
+ printk(KERN_DEBUG "\tvol_id %d\n", req->vol_id);
+ printk(KERN_DEBUG "\talignment %d\n", req->alignment);
+ printk(KERN_DEBUG "\tbytes %lld\n", (long long)req->bytes);
+ printk(KERN_DEBUG "\tvol_type %d\n", req->vol_type);
+ printk(KERN_DEBUG "\tname_len %d\n", req->name_len);
memcpy(nm, req->name, 16);
nm[16] = 0;
- dbg_msg("the 1st 16 characters of the name: %s", nm);
+ printk(KERN_DEBUG "\t1st 16 characters of name: %s\n", nm);
}
-#endif /* CONFIG_MTD_UBI_DEBUG_MSG */
+#endif /* CONFIG_MTD_UBI_DEBUG */
diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h
index 8ea99d8..78e914d 100644
--- a/drivers/mtd/ubi/debug.h
+++ b/drivers/mtd/ubi/debug.h
@@ -24,21 +24,16 @@
#ifdef CONFIG_MTD_UBI_DEBUG
#include <linux/random.h>
-#define ubi_assert(expr) BUG_ON(!(expr))
#define dbg_err(fmt, ...) ubi_err(fmt, ##__VA_ARGS__)
-#else
-#define ubi_assert(expr) ({})
-#define dbg_err(fmt, ...) ({})
-#endif
-#ifdef CONFIG_MTD_UBI_DEBUG_DISABLE_BGT
-#define DBG_DISABLE_BGT 1
-#else
-#define DBG_DISABLE_BGT 0
-#endif
+#define ubi_assert(expr) do { \
+ if (unlikely(!(expr))) { \
+ printk(KERN_CRIT "UBI assert failed in %s at %u (pid %d)\n", \
+ __func__, __LINE__, current->pid); \
+ ubi_dbg_dump_stack(); \
+ } \
+} while (0)
-#ifdef CONFIG_MTD_UBI_DEBUG_MSG
-/* Generic debugging message */
#define dbg_msg(fmt, ...) \
printk(KERN_DEBUG "UBI DBG (pid %d): %s: " fmt "\n", \
current->pid, __func__, ##__VA_ARGS__)
@@ -61,36 +56,29 @@
void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type);
void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req);
+#ifdef CONFIG_MTD_UBI_DEBUG_MSG
+/* General debugging messages */
+#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
#else
-
-#define dbg_msg(fmt, ...) ({})
-#define ubi_dbg_dump_stack() ({})
-#define ubi_dbg_dump_ec_hdr(ec_hdr) ({})
-#define ubi_dbg_dump_vid_hdr(vid_hdr) ({})
-#define ubi_dbg_dump_vol_info(vol) ({})
-#define ubi_dbg_dump_vtbl_record(r, idx) ({})
-#define ubi_dbg_dump_sv(sv) ({})
-#define ubi_dbg_dump_seb(seb, type) ({})
-#define ubi_dbg_dump_mkvol_req(req) ({})
-
-#endif /* CONFIG_MTD_UBI_DEBUG_MSG */
+#define dbg_gen(fmt, ...) ({})
+#endif
#ifdef CONFIG_MTD_UBI_DEBUG_MSG_EBA
-/* Messages from the eraseblock association unit */
+/* Messages from the eraseblock association sub-system */
#define dbg_eba(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
#else
#define dbg_eba(fmt, ...) ({})
#endif
#ifdef CONFIG_MTD_UBI_DEBUG_MSG_WL
-/* Messages from the wear-leveling unit */
+/* Messages from the wear-leveling sub-system */
#define dbg_wl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
#else
#define dbg_wl(fmt, ...) ({})
#endif
#ifdef CONFIG_MTD_UBI_DEBUG_MSG_IO
-/* Messages from the input/output unit */
+/* Messages from the input/output sub-system */
#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
#else
#define dbg_io(fmt, ...) ({})
@@ -105,6 +93,12 @@
#define UBI_IO_DEBUG 0
#endif
+#ifdef CONFIG_MTD_UBI_DEBUG_DISABLE_BGT
+#define DBG_DISABLE_BGT 1
+#else
+#define DBG_DISABLE_BGT 0
+#endif
+
#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_BITFLIPS
/**
* ubi_dbg_is_bitflip - if it is time to emulate a bit-flip.
@@ -149,4 +143,30 @@
#define ubi_dbg_is_erase_failure() 0
#endif
+#else
+
+#define ubi_assert(expr) ({})
+#define dbg_err(fmt, ...) ({})
+#define dbg_msg(fmt, ...) ({})
+#define dbg_gen(fmt, ...) ({})
+#define dbg_eba(fmt, ...) ({})
+#define dbg_wl(fmt, ...) ({})
+#define dbg_io(fmt, ...) ({})
+#define dbg_bld(fmt, ...) ({})
+#define ubi_dbg_dump_stack() ({})
+#define ubi_dbg_dump_ec_hdr(ec_hdr) ({})
+#define ubi_dbg_dump_vid_hdr(vid_hdr) ({})
+#define ubi_dbg_dump_vol_info(vol) ({})
+#define ubi_dbg_dump_vtbl_record(r, idx) ({})
+#define ubi_dbg_dump_sv(sv) ({})
+#define ubi_dbg_dump_seb(seb, type) ({})
+#define ubi_dbg_dump_mkvol_req(req) ({})
+
+#define UBI_IO_DEBUG 0
+#define DBG_DISABLE_BGT 0
+#define ubi_dbg_is_bitflip() 0
+#define ubi_dbg_is_write_failure() 0
+#define ubi_dbg_is_erase_failure() 0
+
+#endif /* !CONFIG_MTD_UBI_DEBUG */
#endif /* !__UBI_DEBUG_H__ */
diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index 7ce91ca..e04bcf1 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -19,20 +19,20 @@
*/
/*
- * The UBI Eraseblock Association (EBA) unit.
+ * The UBI Eraseblock Association (EBA) sub-system.
*
- * This unit is responsible for I/O to/from logical eraseblock.
+ * This sub-system is responsible for I/O to/from logical eraseblock.
*
* Although in this implementation the EBA table is fully kept and managed in
* RAM, which assumes poor scalability, it might be (partially) maintained on
* flash in future implementations.
*
- * The EBA unit implements per-logical eraseblock locking. Before accessing a
- * logical eraseblock it is locked for reading or writing. The per-logical
- * eraseblock locking is implemented by means of the lock tree. The lock tree
- * is an RB-tree which refers all the currently locked logical eraseblocks. The
- * lock tree elements are &struct ubi_ltree_entry objects. They are indexed by
- * (@vol_id, @lnum) pairs.
+ * The EBA sub-system implements per-logical eraseblock locking. Before
+ * accessing a logical eraseblock it is locked for reading or writing. The
+ * per-logical eraseblock locking is implemented by means of the lock tree. The
+ * lock tree is an RB-tree which refers all the currently locked logical
+ * eraseblocks. The lock tree elements are &struct ubi_ltree_entry objects.
+ * They are indexed by (@vol_id, @lnum) pairs.
*
* EBA also maintains the global sequence counter which is incremented each
* time a logical eraseblock is mapped to a physical eraseblock and it is
@@ -189,9 +189,7 @@
le->users += 1;
spin_unlock(&ubi->ltree_lock);
- if (le_free)
- kfree(le_free);
-
+ kfree(le_free);
return le;
}
@@ -223,22 +221,18 @@
*/
static void leb_read_unlock(struct ubi_device *ubi, int vol_id, int lnum)
{
- int free = 0;
struct ubi_ltree_entry *le;
spin_lock(&ubi->ltree_lock);
le = ltree_lookup(ubi, vol_id, lnum);
le->users -= 1;
ubi_assert(le->users >= 0);
+ up_read(&le->mutex);
if (le->users == 0) {
rb_erase(&le->rb, &ubi->ltree);
- free = 1;
+ kfree(le);
}
spin_unlock(&ubi->ltree_lock);
-
- up_read(&le->mutex);
- if (free)
- kfree(le);
}
/**
@@ -274,7 +268,6 @@
*/
static int leb_write_trylock(struct ubi_device *ubi, int vol_id, int lnum)
{
- int free;
struct ubi_ltree_entry *le;
le = ltree_add_entry(ubi, vol_id, lnum);
@@ -289,12 +282,9 @@
ubi_assert(le->users >= 0);
if (le->users == 0) {
rb_erase(&le->rb, &ubi->ltree);
- free = 1;
- } else
- free = 0;
- spin_unlock(&ubi->ltree_lock);
- if (free)
kfree(le);
+ }
+ spin_unlock(&ubi->ltree_lock);
return 1;
}
@@ -307,23 +297,18 @@
*/
static void leb_write_unlock(struct ubi_device *ubi, int vol_id, int lnum)
{
- int free;
struct ubi_ltree_entry *le;
spin_lock(&ubi->ltree_lock);
le = ltree_lookup(ubi, vol_id, lnum);
le->users -= 1;
ubi_assert(le->users >= 0);
+ up_write(&le->mutex);
if (le->users == 0) {
rb_erase(&le->rb, &ubi->ltree);
- free = 1;
- } else
- free = 0;
- spin_unlock(&ubi->ltree_lock);
-
- up_write(&le->mutex);
- if (free)
kfree(le);
+ }
+ spin_unlock(&ubi->ltree_lock);
}
/**
@@ -516,9 +501,8 @@
struct ubi_vid_hdr *vid_hdr;
vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS);
- if (!vid_hdr) {
+ if (!vid_hdr)
return -ENOMEM;
- }
mutex_lock(&ubi->buf_mutex);
@@ -752,7 +736,7 @@
/* If this is the last LEB @len may be unaligned */
len = ALIGN(data_size, ubi->min_io_size);
else
- ubi_assert(len % ubi->min_io_size == 0);
+ ubi_assert(!(len & (ubi->min_io_size - 1)));
vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS);
if (!vid_hdr)
@@ -919,7 +903,7 @@
}
if (vol->eba_tbl[lnum] >= 0) {
- err = ubi_wl_put_peb(ubi, vol->eba_tbl[lnum], 1);
+ err = ubi_wl_put_peb(ubi, vol->eba_tbl[lnum], 0);
if (err)
goto out_leb_unlock;
}
@@ -1141,7 +1125,7 @@
}
/**
- * ubi_eba_init_scan - initialize the EBA unit using scanning information.
+ * ubi_eba_init_scan - initialize the EBA sub-system using scanning information.
* @ubi: UBI device description object
* @si: scanning information
*
@@ -1156,7 +1140,7 @@
struct ubi_scan_leb *seb;
struct rb_node *rb;
- dbg_eba("initialize EBA unit");
+ dbg_eba("initialize EBA sub-system");
spin_lock_init(&ubi->ltree_lock);
mutex_init(&ubi->alc_mutex);
@@ -1222,7 +1206,7 @@
ubi->rsvd_pebs += ubi->beb_rsvd_pebs;
}
- dbg_eba("EBA unit is initialized");
+ dbg_eba("EBA sub-system is initialized");
return 0;
out_free:
@@ -1233,20 +1217,3 @@
}
return err;
}
-
-/**
- * ubi_eba_close - close EBA unit.
- * @ubi: UBI device description object
- */
-void ubi_eba_close(const struct ubi_device *ubi)
-{
- int i, num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT;
-
- dbg_eba("close EBA unit");
-
- for (i = 0; i < num_volumes; i++) {
- if (!ubi->volumes[i])
- continue;
- kfree(ubi->volumes[i]->eba_tbl);
- }
-}
diff --git a/drivers/mtd/ubi/gluebi.c b/drivers/mtd/ubi/gluebi.c
index e909b39..605812b 100644
--- a/drivers/mtd/ubi/gluebi.c
+++ b/drivers/mtd/ubi/gluebi.c
@@ -111,7 +111,7 @@
struct ubi_device *ubi;
uint64_t tmp = from;
- dbg_msg("read %zd bytes from offset %lld", len, from);
+ dbg_gen("read %zd bytes from offset %lld", len, from);
if (len < 0 || from < 0 || from + len > mtd->size)
return -EINVAL;
@@ -162,7 +162,7 @@
struct ubi_device *ubi;
uint64_t tmp = to;
- dbg_msg("write %zd bytes to offset %lld", len, to);
+ dbg_gen("write %zd bytes to offset %lld", len, to);
if (len < 0 || to < 0 || len + to > mtd->size)
return -EINVAL;
@@ -215,7 +215,7 @@
struct ubi_volume *vol;
struct ubi_device *ubi;
- dbg_msg("erase %u bytes at offset %u", instr->len, instr->addr);
+ dbg_gen("erase %u bytes at offset %u", instr->len, instr->addr);
if (instr->addr < 0 || instr->addr > mtd->size - mtd->erasesize)
return -EINVAL;
@@ -249,8 +249,8 @@
if (err)
goto out_err;
- instr->state = MTD_ERASE_DONE;
- mtd_erase_callback(instr);
+ instr->state = MTD_ERASE_DONE;
+ mtd_erase_callback(instr);
return 0;
out_err:
@@ -299,12 +299,12 @@
mtd->size = vol->used_bytes;
if (add_mtd_device(mtd)) {
- ubi_err("cannot not add MTD device\n");
+ ubi_err("cannot not add MTD device");
kfree(mtd->name);
return -ENFILE;
}
- dbg_msg("added mtd%d (\"%s\"), size %u, EB size %u",
+ dbg_gen("added mtd%d (\"%s\"), size %u, EB size %u",
mtd->index, mtd->name, mtd->size, mtd->erasesize);
return 0;
}
@@ -322,7 +322,7 @@
int err;
struct mtd_info *mtd = &vol->gluebi_mtd;
- dbg_msg("remove mtd%d", mtd->index);
+ dbg_gen("remove mtd%d", mtd->index);
err = del_mtd_device(mtd);
if (err)
return err;
diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c
index 4ac11df..2fb64be 100644
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c
@@ -20,15 +20,15 @@
*/
/*
- * UBI input/output unit.
+ * UBI input/output sub-system.
*
- * This unit provides a uniform way to work with all kinds of the underlying
- * MTD devices. It also implements handy functions for reading and writing UBI
- * headers.
+ * This sub-system provides a uniform way to work with all kinds of the
+ * underlying MTD devices. It also implements handy functions for reading and
+ * writing UBI headers.
*
* We are trying to have a paranoid mindset and not to trust to what we read
- * from the flash media in order to be more secure and robust. So this unit
- * validates every single header it reads from the flash media.
+ * from the flash media in order to be more secure and robust. So this
+ * sub-system validates every single header it reads from the flash media.
*
* Some words about how the eraseblock headers are stored.
*
@@ -79,11 +79,11 @@
* 512-byte chunks, we have to allocate one more buffer and copy our VID header
* to offset 448 of this buffer.
*
- * The I/O unit does the following trick in order to avoid this extra copy.
- * It always allocates a @ubi->vid_hdr_alsize bytes buffer for the VID header
- * and returns a pointer to offset @ubi->vid_hdr_shift of this buffer. When the
- * VID header is being written out, it shifts the VID header pointer back and
- * writes the whole sub-page.
+ * The I/O sub-system does the following trick in order to avoid this extra
+ * copy. It always allocates a @ubi->vid_hdr_alsize bytes buffer for the VID
+ * header and returns a pointer to offset @ubi->vid_hdr_shift of this buffer.
+ * When the VID header is being written out, it shifts the VID header pointer
+ * back and writes the whole sub-page.
*/
#include <linux/crc32.h>
@@ -156,15 +156,19 @@
/*
* -EUCLEAN is reported if there was a bit-flip which
* was corrected, so this is harmless.
+ *
+ * We do not report about it here unless debugging is
+ * enabled. A corresponding message will be printed
+ * later, when it is has been scrubbed.
*/
- ubi_msg("fixable bit-flip detected at PEB %d", pnum);
+ dbg_msg("fixable bit-flip detected at PEB %d", pnum);
ubi_assert(len == read);
return UBI_IO_BITFLIPS;
}
if (read != len && retries++ < UBI_IO_RETRIES) {
- dbg_io("error %d while reading %d bytes from PEB %d:%d, "
- "read only %zd bytes, retry",
+ dbg_io("error %d while reading %d bytes from PEB %d:%d,"
+ " read only %zd bytes, retry",
err, len, pnum, offset, read);
yield();
goto retry;
@@ -187,7 +191,7 @@
ubi_assert(len == read);
if (ubi_dbg_is_bitflip()) {
- dbg_msg("bit-flip (emulated)");
+ dbg_gen("bit-flip (emulated)");
err = UBI_IO_BITFLIPS;
}
}
@@ -391,6 +395,7 @@
{
int err, i, patt_count;
+ ubi_msg("run torture test for PEB %d", pnum);
patt_count = ARRAY_SIZE(patterns);
ubi_assert(patt_count > 0);
@@ -434,6 +439,7 @@
}
err = patt_count;
+ ubi_msg("PEB %d passed torture test, do not mark it a bad", pnum);
out:
mutex_unlock(&ubi->buf_mutex);
@@ -699,8 +705,8 @@
if (hdr_crc != crc) {
if (verbose) {
- ubi_warn("bad EC header CRC at PEB %d, calculated %#08x,"
- " read %#08x", pnum, crc, hdr_crc);
+ ubi_warn("bad EC header CRC at PEB %d, calculated "
+ "%#08x, read %#08x", pnum, crc, hdr_crc);
ubi_dbg_dump_ec_hdr(ec_hdr);
}
return UBI_IO_BAD_EC_HDR;
@@ -1095,8 +1101,7 @@
}
/**
- * paranoid_check_peb_ec_hdr - check that the erase counter header of a
- * physical eraseblock is in-place and is all right.
+ * paranoid_check_peb_ec_hdr - check erase counter header.
* @ubi: UBI device description object
* @pnum: the physical eraseblock number to check
*
@@ -1174,8 +1179,7 @@
}
/**
- * paranoid_check_peb_vid_hdr - check that the volume identifier header of a
- * physical eraseblock is in-place and is all right.
+ * paranoid_check_peb_vid_hdr - check volume identifier header.
* @ubi: UBI device description object
* @pnum: the physical eraseblock number to check
*
@@ -1256,7 +1260,7 @@
fail:
ubi_err("paranoid check failed for PEB %d", pnum);
- dbg_msg("hex dump of the %d-%d region", offset, offset + len);
+ ubi_msg("hex dump of the %d-%d region", offset, offset + len);
print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
ubi->dbg_peb_buf, len, 1);
err = 1;
diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c
index a70d588..5d9bcf1 100644
--- a/drivers/mtd/ubi/kapi.c
+++ b/drivers/mtd/ubi/kapi.c
@@ -106,7 +106,7 @@
struct ubi_device *ubi;
struct ubi_volume *vol;
- dbg_msg("open device %d volume %d, mode %d", ubi_num, vol_id, mode);
+ dbg_gen("open device %d volume %d, mode %d", ubi_num, vol_id, mode);
if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES)
return ERR_PTR(-EINVAL);
@@ -215,7 +215,7 @@
struct ubi_device *ubi;
struct ubi_volume_desc *ret;
- dbg_msg("open volume %s, mode %d", name, mode);
+ dbg_gen("open volume %s, mode %d", name, mode);
if (!name)
return ERR_PTR(-EINVAL);
@@ -266,7 +266,7 @@
struct ubi_volume *vol = desc->vol;
struct ubi_device *ubi = vol->ubi;
- dbg_msg("close volume %d, mode %d", vol->vol_id, desc->mode);
+ dbg_gen("close volume %d, mode %d", vol->vol_id, desc->mode);
spin_lock(&ubi->volumes_lock);
switch (desc->mode) {
@@ -323,7 +323,7 @@
struct ubi_device *ubi = vol->ubi;
int err, vol_id = vol->vol_id;
- dbg_msg("read %d bytes from LEB %d:%d:%d", len, vol_id, lnum, offset);
+ dbg_gen("read %d bytes from LEB %d:%d:%d", len, vol_id, lnum, offset);
if (vol_id < 0 || vol_id >= ubi->vtbl_slots || lnum < 0 ||
lnum >= vol->used_ebs || offset < 0 || len < 0 ||
@@ -388,7 +388,7 @@
struct ubi_device *ubi = vol->ubi;
int vol_id = vol->vol_id;
- dbg_msg("write %d bytes to LEB %d:%d:%d", len, vol_id, lnum, offset);
+ dbg_gen("write %d bytes to LEB %d:%d:%d", len, vol_id, lnum, offset);
if (vol_id < 0 || vol_id >= ubi->vtbl_slots)
return -EINVAL;
@@ -397,8 +397,8 @@
return -EROFS;
if (lnum < 0 || lnum >= vol->reserved_pebs || offset < 0 || len < 0 ||
- offset + len > vol->usable_leb_size || offset % ubi->min_io_size ||
- len % ubi->min_io_size)
+ offset + len > vol->usable_leb_size ||
+ offset & (ubi->min_io_size - 1) || len & (ubi->min_io_size - 1))
return -EINVAL;
if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM &&
@@ -438,7 +438,7 @@
struct ubi_device *ubi = vol->ubi;
int vol_id = vol->vol_id;
- dbg_msg("atomically write %d bytes to LEB %d:%d", len, vol_id, lnum);
+ dbg_gen("atomically write %d bytes to LEB %d:%d", len, vol_id, lnum);
if (vol_id < 0 || vol_id >= ubi->vtbl_slots)
return -EINVAL;
@@ -447,7 +447,7 @@
return -EROFS;
if (lnum < 0 || lnum >= vol->reserved_pebs || len < 0 ||
- len > vol->usable_leb_size || len % ubi->min_io_size)
+ len > vol->usable_leb_size || len & (ubi->min_io_size - 1))
return -EINVAL;
if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM &&
@@ -482,7 +482,7 @@
struct ubi_device *ubi = vol->ubi;
int err;
- dbg_msg("erase LEB %d:%d", vol->vol_id, lnum);
+ dbg_gen("erase LEB %d:%d", vol->vol_id, lnum);
if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME)
return -EROFS;
@@ -542,7 +542,7 @@
struct ubi_volume *vol = desc->vol;
struct ubi_device *ubi = vol->ubi;
- dbg_msg("unmap LEB %d:%d", vol->vol_id, lnum);
+ dbg_gen("unmap LEB %d:%d", vol->vol_id, lnum);
if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME)
return -EROFS;
@@ -579,7 +579,7 @@
struct ubi_volume *vol = desc->vol;
struct ubi_device *ubi = vol->ubi;
- dbg_msg("unmap LEB %d:%d", vol->vol_id, lnum);
+ dbg_gen("unmap LEB %d:%d", vol->vol_id, lnum);
if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME)
return -EROFS;
@@ -621,7 +621,7 @@
{
struct ubi_volume *vol = desc->vol;
- dbg_msg("test LEB %d:%d", vol->vol_id, lnum);
+ dbg_gen("test LEB %d:%d", vol->vol_id, lnum);
if (lnum < 0 || lnum >= vol->reserved_pebs)
return -EINVAL;
@@ -632,3 +632,27 @@
return vol->eba_tbl[lnum] >= 0;
}
EXPORT_SYMBOL_GPL(ubi_is_mapped);
+
+/**
+ * ubi_sync - synchronize UBI device buffers.
+ * @ubi_num: UBI device to synchronize
+ *
+ * The underlying MTD device may cache data in hardware or in software. This
+ * function ensures the caches are flushed. Returns zero in case of success and
+ * a negative error code in case of failure.
+ */
+int ubi_sync(int ubi_num)
+{
+ struct ubi_device *ubi;
+
+ ubi = ubi_get_device(ubi_num);
+ if (!ubi)
+ return -ENODEV;
+
+ if (ubi->mtd->sync)
+ ubi->mtd->sync(ubi->mtd);
+
+ ubi_put_device(ubi);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ubi_sync);
diff --git a/drivers/mtd/ubi/misc.c b/drivers/mtd/ubi/misc.c
index 93e0528..22ad314 100644
--- a/drivers/mtd/ubi/misc.c
+++ b/drivers/mtd/ubi/misc.c
@@ -37,7 +37,7 @@
{
int i;
- ubi_assert(length % ubi->min_io_size == 0);
+ ubi_assert(!(length & (ubi->min_io_size - 1)));
for (i = length - 1; i >= 0; i--)
if (((const uint8_t *)buf)[i] != 0xFF)
diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c
index 96d410e..967bb44 100644
--- a/drivers/mtd/ubi/scan.c
+++ b/drivers/mtd/ubi/scan.c
@@ -19,9 +19,9 @@
*/
/*
- * UBI scanning unit.
+ * UBI scanning sub-system.
*
- * This unit is responsible for scanning the flash media, checking UBI
+ * This sub-system is responsible for scanning the flash media, checking UBI
* headers and providing complete information about the UBI flash image.
*
* The scanning information is represented by a &struct ubi_scan_info' object.
@@ -93,8 +93,7 @@
}
/**
- * validate_vid_hdr - check that volume identifier header is correct and
- * consistent.
+ * validate_vid_hdr - check volume identifier header.
* @vid_hdr: the volume identifier header to check
* @sv: information about the volume this logical eraseblock belongs to
* @pnum: physical eraseblock number the VID header came from
@@ -103,7 +102,7 @@
* non-zero if an inconsistency was found and zero if not.
*
* Note, UBI does sanity check of everything it reads from the flash media.
- * Most of the checks are done in the I/O unit. Here we check that the
+ * Most of the checks are done in the I/O sub-system. Here we check that the
* information in the VID header is consistent to the information in other VID
* headers of the same volume.
*/
@@ -247,45 +246,21 @@
struct ubi_vid_hdr *vh = NULL;
unsigned long long sqnum2 = be64_to_cpu(vid_hdr->sqnum);
- if (seb->sqnum == 0 && sqnum2 == 0) {
- long long abs, v1 = seb->leb_ver, v2 = be32_to_cpu(vid_hdr->leb_ver);
-
+ if (sqnum2 == seb->sqnum) {
/*
- * UBI constantly increases the logical eraseblock version
- * number and it can overflow. Thus, we have to bear in mind
- * that versions that are close to %0xFFFFFFFF are less then
- * versions that are close to %0.
- *
- * The UBI WL unit guarantees that the number of pending tasks
- * is not greater then %0x7FFFFFFF. So, if the difference
- * between any two versions is greater or equivalent to
- * %0x7FFFFFFF, there was an overflow and the logical
- * eraseblock with lower version is actually newer then the one
- * with higher version.
- *
- * FIXME: but this is anyway obsolete and will be removed at
- * some point.
+ * This must be a really ancient UBI image which has been
+ * created before sequence numbers support has been added. At
+ * that times we used 32-bit LEB versions stored in logical
+ * eraseblocks. That was before UBI got into mainline. We do not
+ * support these images anymore. Well, those images will work
+ * still work, but only if no unclean reboots happened.
*/
- dbg_bld("using old crappy leb_ver stuff");
+ ubi_err("unsupported on-flash UBI format\n");
+ return -EINVAL;
+ }
- if (v1 == v2) {
- ubi_err("PEB %d and PEB %d have the same version %lld",
- seb->pnum, pnum, v1);
- return -EINVAL;
- }
-
- abs = v1 - v2;
- if (abs < 0)
- abs = -abs;
-
- if (abs < 0x7FFFFFFF)
- /* Non-overflow situation */
- second_is_newer = (v2 > v1);
- else
- second_is_newer = (v2 < v1);
- } else
- /* Obviously the LEB with lower sequence counter is older */
- second_is_newer = sqnum2 > seb->sqnum;
+ /* Obviously the LEB with lower sequence counter is older */
+ second_is_newer = !!(sqnum2 > seb->sqnum);
/*
* Now we know which copy is newer. If the copy flag of the PEB with
@@ -293,7 +268,7 @@
* check data CRC. For the second PEB we already have the VID header,
* for the first one - we'll need to re-read it from flash.
*
- * FIXME: this may be optimized so that we wouldn't read twice.
+ * Note: this may be optimized so that we wouldn't read twice.
*/
if (second_is_newer) {
@@ -379,8 +354,7 @@
}
/**
- * ubi_scan_add_used - add information about a physical eraseblock to the
- * scanning information.
+ * ubi_scan_add_used - add physical eraseblock to the scanning information.
* @ubi: UBI device description object
* @si: scanning information
* @pnum: the physical eraseblock number
@@ -400,7 +374,6 @@
int bitflips)
{
int err, vol_id, lnum;
- uint32_t leb_ver;
unsigned long long sqnum;
struct ubi_scan_volume *sv;
struct ubi_scan_leb *seb;
@@ -409,10 +382,9 @@
vol_id = be32_to_cpu(vid_hdr->vol_id);
lnum = be32_to_cpu(vid_hdr->lnum);
sqnum = be64_to_cpu(vid_hdr->sqnum);
- leb_ver = be32_to_cpu(vid_hdr->leb_ver);
- dbg_bld("PEB %d, LEB %d:%d, EC %d, sqnum %llu, ver %u, bitflips %d",
- pnum, vol_id, lnum, ec, sqnum, leb_ver, bitflips);
+ dbg_bld("PEB %d, LEB %d:%d, EC %d, sqnum %llu, bitflips %d",
+ pnum, vol_id, lnum, ec, sqnum, bitflips);
sv = add_volume(si, vol_id, pnum, vid_hdr);
if (IS_ERR(sv) < 0)
@@ -445,25 +417,20 @@
*/
dbg_bld("this LEB already exists: PEB %d, sqnum %llu, "
- "LEB ver %u, EC %d", seb->pnum, seb->sqnum,
- seb->leb_ver, seb->ec);
-
- /*
- * Make sure that the logical eraseblocks have different
- * versions. Otherwise the image is bad.
- */
- if (seb->leb_ver == leb_ver && leb_ver != 0) {
- ubi_err("two LEBs with same version %u", leb_ver);
- ubi_dbg_dump_seb(seb, 0);
- ubi_dbg_dump_vid_hdr(vid_hdr);
- return -EINVAL;
- }
+ "EC %d", seb->pnum, seb->sqnum, seb->ec);
/*
* Make sure that the logical eraseblocks have different
* sequence numbers. Otherwise the image is bad.
*
- * FIXME: remove 'sqnum != 0' check when leb_ver is removed.
+ * However, if the sequence number is zero, we assume it must
+ * be an ancient UBI image from the era when UBI did not have
+ * sequence numbers. We still can attach these images, unless
+ * there is a need to distinguish between old and new
+ * eraseblocks, in which case we'll refuse the image in
+ * 'compare_lebs()'. In other words, we attach old clean
+ * images, but refuse attaching old images with duplicated
+ * logical eraseblocks because there was an unclean reboot.
*/
if (seb->sqnum == sqnum && sqnum != 0) {
ubi_err("two LEBs with same sequence number %llu",
@@ -503,7 +470,6 @@
seb->pnum = pnum;
seb->scrub = ((cmp_res & 2) || bitflips);
seb->sqnum = sqnum;
- seb->leb_ver = leb_ver;
if (sv->highest_lnum == lnum)
sv->last_data_size =
@@ -540,7 +506,6 @@
seb->lnum = lnum;
seb->sqnum = sqnum;
seb->scrub = bitflips;
- seb->leb_ver = leb_ver;
if (sv->highest_lnum <= lnum) {
sv->highest_lnum = lnum;
@@ -554,8 +519,7 @@
}
/**
- * ubi_scan_find_sv - find information about a particular volume in the
- * scanning information.
+ * ubi_scan_find_sv - find volume in the scanning information.
* @si: scanning information
* @vol_id: the requested volume ID
*
@@ -584,8 +548,7 @@
}
/**
- * ubi_scan_find_seb - find information about a particular logical
- * eraseblock in the volume scanning information.
+ * ubi_scan_find_seb - find LEB in the volume scanning information.
* @sv: a pointer to the volume scanning information
* @lnum: the requested logical eraseblock
*
@@ -645,9 +608,9 @@
*
* This function erases physical eraseblock 'pnum', and writes the erase
* counter header to it. This function should only be used on UBI device
- * initialization stages, when the EBA unit had not been yet initialized. This
- * function returns zero in case of success and a negative error code in case
- * of failure.
+ * initialization stages, when the EBA sub-system had not been yet initialized.
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
*/
int ubi_scan_erase_peb(struct ubi_device *ubi, const struct ubi_scan_info *si,
int pnum, int ec)
@@ -687,9 +650,10 @@
* @si: scanning information
*
* This function returns a free physical eraseblock. It is supposed to be
- * called on the UBI initialization stages when the wear-leveling unit is not
- * initialized yet. This function picks a physical eraseblocks from one of the
- * lists, writes the EC header if it is needed, and removes it from the list.
+ * called on the UBI initialization stages when the wear-leveling sub-system is
+ * not initialized yet. This function picks a physical eraseblocks from one of
+ * the lists, writes the EC header if it is needed, and removes it from the
+ * list.
*
* This function returns scanning physical eraseblock information in case of
* success and an error code in case of failure.
@@ -742,8 +706,7 @@
}
/**
- * process_eb - read UBI headers, check them and add corresponding data
- * to the scanning information.
+ * process_eb - read, check UBI headers, and add them to scanning information.
* @ubi: UBI device description object
* @si: scanning information
* @pnum: the physical eraseblock number
@@ -751,7 +714,8 @@
* This function returns a zero if the physical eraseblock was successfully
* handled and a negative error code in case of failure.
*/
-static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum)
+static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si,
+ int pnum)
{
long long uninitialized_var(ec);
int err, bitflips = 0, vol_id, ec_corr = 0;
@@ -764,8 +728,9 @@
return err;
else if (err) {
/*
- * FIXME: this is actually duty of the I/O unit to initialize
- * this, but MTD does not provide enough information.
+ * FIXME: this is actually duty of the I/O sub-system to
+ * initialize this, but MTD does not provide enough
+ * information.
*/
si->bad_peb_count += 1;
return 0;
@@ -930,7 +895,7 @@
for (pnum = 0; pnum < ubi->peb_count; pnum++) {
cond_resched();
- dbg_msg("process PEB %d", pnum);
+ dbg_gen("process PEB %d", pnum);
err = process_eb(ubi, si, pnum);
if (err < 0)
goto out_vidh;
@@ -1079,8 +1044,7 @@
#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
/**
- * paranoid_check_si - check if the scanning information is correct and
- * consistent.
+ * paranoid_check_si - check the scanning information.
* @ubi: UBI device description object
* @si: scanning information
*
@@ -1265,11 +1229,6 @@
ubi_err("bad data_pad %d", sv->data_pad);
goto bad_vid_hdr;
}
-
- if (seb->leb_ver != be32_to_cpu(vidh->leb_ver)) {
- ubi_err("bad leb_ver %u", seb->leb_ver);
- goto bad_vid_hdr;
- }
}
if (!last_seb)
@@ -1299,8 +1258,7 @@
if (err < 0) {
kfree(buf);
return err;
- }
- else if (err)
+ } else if (err)
buf[pnum] = 1;
}
diff --git a/drivers/mtd/ubi/scan.h b/drivers/mtd/ubi/scan.h
index 966b9b6..61df208 100644
--- a/drivers/mtd/ubi/scan.h
+++ b/drivers/mtd/ubi/scan.h
@@ -34,7 +34,6 @@
* @u: unions RB-tree or @list links
* @u.rb: link in the per-volume RB-tree of &struct ubi_scan_leb objects
* @u.list: link in one of the eraseblock lists
- * @leb_ver: logical eraseblock version (obsolete)
*
* One object of this type is allocated for each physical eraseblock during
* scanning.
@@ -49,7 +48,6 @@
struct rb_node rb;
struct list_head list;
} u;
- uint32_t leb_ver;
};
/**
@@ -59,16 +57,16 @@
* @leb_count: number of logical eraseblocks in this volume
* @vol_type: volume type
* @used_ebs: number of used logical eraseblocks in this volume (only for
- * static volumes)
+ * static volumes)
* @last_data_size: amount of data in the last logical eraseblock of this
- * volume (always equivalent to the usable logical eraseblock size in case of
- * dynamic volumes)
+ * volume (always equivalent to the usable logical eraseblock
+ * size in case of dynamic volumes)
* @data_pad: how many bytes at the end of logical eraseblocks of this volume
- * are not used (due to volume alignment)
+ * are not used (due to volume alignment)
* @compat: compatibility flags of this volume
* @rb: link in the volume RB-tree
* @root: root of the RB-tree containing all the eraseblock belonging to this
- * volume (&struct ubi_scan_leb objects)
+ * volume (&struct ubi_scan_leb objects)
*
* One object of this type is allocated for each volume during scanning.
*/
@@ -92,8 +90,8 @@
* @free: list of free physical eraseblocks
* @erase: list of physical eraseblocks which have to be erased
* @alien: list of physical eraseblocks which should not be used by UBI (e.g.,
+ * those belonging to "preserve"-compatible internal volumes)
* @bad_peb_count: count of bad physical eraseblocks
- * those belonging to "preserve"-compatible internal volumes)
* @vols_found: number of volumes found during scanning
* @highest_vol_id: highest volume ID
* @alien_peb_count: count of physical eraseblocks in the @alien list
@@ -106,8 +104,8 @@
* @ec_count: a temporary variable used when calculating @mean_ec
*
* This data structure contains the result of scanning and may be used by other
- * UBI units to build final UBI data structures, further error-recovery and so
- * on.
+ * UBI sub-systems to build final UBI data structures, further error-recovery
+ * and so on.
*/
struct ubi_scan_info {
struct rb_root volumes;
@@ -132,8 +130,7 @@
struct ubi_vid_hdr;
/*
- * ubi_scan_move_to_list - move a physical eraseblock from the volume tree to a
- * list.
+ * ubi_scan_move_to_list - move a PEB from the volume tree to a list.
*
* @sv: volume scanning information
* @seb: scanning eraseblock infprmation
diff --git a/drivers/mtd/ubi/ubi-media.h b/drivers/mtd/ubi/ubi-media.h
index c3185d9..2ad9404 100644
--- a/drivers/mtd/ubi/ubi-media.h
+++ b/drivers/mtd/ubi/ubi-media.h
@@ -98,10 +98,11 @@
* Compatibility constants used by internal volumes.
*
* @UBI_COMPAT_DELETE: delete this internal volume before anything is written
- * to the flash
+ * to the flash
* @UBI_COMPAT_RO: attach this device in read-only mode
* @UBI_COMPAT_PRESERVE: preserve this internal volume - do not touch its
- * physical eraseblocks, don't allow the wear-leveling unit to move them
+ * physical eraseblocks, don't allow the wear-leveling
+ * sub-system to move them
* @UBI_COMPAT_REJECT: reject this UBI image
*/
enum {
@@ -123,7 +124,7 @@
* struct ubi_ec_hdr - UBI erase counter header.
* @magic: erase counter header magic number (%UBI_EC_HDR_MAGIC)
* @version: version of UBI implementation which is supposed to accept this
- * UBI image
+ * UBI image
* @padding1: reserved for future, zeroes
* @ec: the erase counter
* @vid_hdr_offset: where the VID header starts
@@ -159,24 +160,23 @@
* struct ubi_vid_hdr - on-flash UBI volume identifier header.
* @magic: volume identifier header magic number (%UBI_VID_HDR_MAGIC)
* @version: UBI implementation version which is supposed to accept this UBI
- * image (%UBI_VERSION)
+ * image (%UBI_VERSION)
* @vol_type: volume type (%UBI_VID_DYNAMIC or %UBI_VID_STATIC)
* @copy_flag: if this logical eraseblock was copied from another physical
- * eraseblock (for wear-leveling reasons)
+ * eraseblock (for wear-leveling reasons)
* @compat: compatibility of this volume (%0, %UBI_COMPAT_DELETE,
- * %UBI_COMPAT_IGNORE, %UBI_COMPAT_PRESERVE, or %UBI_COMPAT_REJECT)
+ * %UBI_COMPAT_IGNORE, %UBI_COMPAT_PRESERVE, or %UBI_COMPAT_REJECT)
* @vol_id: ID of this volume
* @lnum: logical eraseblock number
- * @leb_ver: version of this logical eraseblock (IMPORTANT: obsolete, to be
- * removed, kept only for not breaking older UBI users)
+ * @padding1: reserved for future, zeroes
* @data_size: how many bytes of data this logical eraseblock contains
* @used_ebs: total number of used logical eraseblocks in this volume
* @data_pad: how many bytes at the end of this physical eraseblock are not
- * used
+ * used
* @data_crc: CRC checksum of the data stored in this logical eraseblock
- * @padding1: reserved for future, zeroes
- * @sqnum: sequence number
* @padding2: reserved for future, zeroes
+ * @sqnum: sequence number
+ * @padding3: reserved for future, zeroes
* @hdr_crc: volume identifier header CRC checksum
*
* The @sqnum is the value of the global sequence counter at the time when this
@@ -224,10 +224,6 @@
* checksum is correct, this physical eraseblock is selected (P1). Otherwise
* the older one (P) is selected.
*
- * Note, there is an obsolete @leb_ver field which was used instead of @sqnum
- * in the past. But it is not used anymore and we keep it in order to be able
- * to deal with old UBI images. It will be removed at some point.
- *
* There are 2 sorts of volumes in UBI: user volumes and internal volumes.
* Internal volumes are not seen from outside and are used for various internal
* UBI purposes. In this implementation there is only one internal volume - the
@@ -248,9 +244,9 @@
* The @data_crc field contains the CRC checksum of the contents of the logical
* eraseblock if this is a static volume. In case of dynamic volumes, it does
* not contain the CRC checksum as a rule. The only exception is when the
- * data of the physical eraseblock was moved by the wear-leveling unit, then
- * the wear-leveling unit calculates the data CRC and stores it in the
- * @data_crc field. And of course, the @copy_flag is %in this case.
+ * data of the physical eraseblock was moved by the wear-leveling sub-system,
+ * then the wear-leveling sub-system calculates the data CRC and stores it in
+ * the @data_crc field. And of course, the @copy_flag is %in this case.
*
* The @data_size field is used only for static volumes because UBI has to know
* how many bytes of data are stored in this eraseblock. For dynamic volumes,
@@ -277,14 +273,14 @@
__u8 compat;
__be32 vol_id;
__be32 lnum;
- __be32 leb_ver; /* obsolete, to be removed, don't use */
+ __u8 padding1[4];
__be32 data_size;
__be32 used_ebs;
__be32 data_pad;
__be32 data_crc;
- __u8 padding1[4];
+ __u8 padding2[4];
__be64 sqnum;
- __u8 padding2[12];
+ __u8 padding3[12];
__be32 hdr_crc;
} __attribute__ ((packed));
diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index 67dcbd1..1c3fa18 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -74,15 +74,15 @@
#define UBI_IO_RETRIES 3
/*
- * Error codes returned by the I/O unit.
+ * Error codes returned by the I/O sub-system.
*
* UBI_IO_PEB_EMPTY: the physical eraseblock is empty, i.e. it contains only
- * 0xFF bytes
+ * %0xFF bytes
* UBI_IO_PEB_FREE: the physical eraseblock is free, i.e. it contains only a
- * valid erase counter header, and the rest are %0xFF bytes
+ * valid erase counter header, and the rest are %0xFF bytes
* UBI_IO_BAD_EC_HDR: the erase counter header is corrupted (bad magic or CRC)
* UBI_IO_BAD_VID_HDR: the volume identifier header is corrupted (bad magic or
- * CRC)
+ * CRC)
* UBI_IO_BITFLIPS: bit-flips were detected and corrected
*/
enum {
@@ -99,9 +99,9 @@
* @ec: erase counter
* @pnum: physical eraseblock number
*
- * This data structure is used in the WL unit. Each physical eraseblock has a
- * corresponding &struct wl_entry object which may be kept in different
- * RB-trees. See WL unit for details.
+ * This data structure is used in the WL sub-system. Each physical eraseblock
+ * has a corresponding &struct wl_entry object which may be kept in different
+ * RB-trees. See WL sub-system for details.
*/
struct ubi_wl_entry {
struct rb_node rb;
@@ -118,10 +118,10 @@
* @mutex: read/write mutex to implement read/write access serialization to
* the (@vol_id, @lnum) logical eraseblock
*
- * This data structure is used in the EBA unit to implement per-LEB locking.
- * When a logical eraseblock is being locked - corresponding
+ * This data structure is used in the EBA sub-system to implement per-LEB
+ * locking. When a logical eraseblock is being locked - corresponding
* &struct ubi_ltree_entry object is inserted to the lock tree (@ubi->ltree).
- * See EBA unit for details.
+ * See EBA sub-system for details.
*/
struct ubi_ltree_entry {
struct rb_node rb;
@@ -131,6 +131,27 @@
struct rw_semaphore mutex;
};
+/**
+ * struct ubi_rename_entry - volume re-name description data structure.
+ * @new_name_len: new volume name length
+ * @new_name: new volume name
+ * @remove: if not zero, this volume should be removed, not re-named
+ * @desc: descriptor of the volume
+ * @list: links re-name entries into a list
+ *
+ * This data structure is utilized in the multiple volume re-name code. Namely,
+ * UBI first creates a list of &struct ubi_rename_entry objects from the
+ * &struct ubi_rnvol_req request object, and then utilizes this list to do all
+ * the job.
+ */
+struct ubi_rename_entry {
+ int new_name_len;
+ char new_name[UBI_VOL_NAME_MAX + 1];
+ int remove;
+ struct ubi_volume_desc *desc;
+ struct list_head list;
+};
+
struct ubi_volume_desc;
/**
@@ -206,7 +227,7 @@
int alignment;
int data_pad;
int name_len;
- char name[UBI_VOL_NAME_MAX+1];
+ char name[UBI_VOL_NAME_MAX + 1];
int upd_ebs;
int ch_lnum;
@@ -225,7 +246,7 @@
#ifdef CONFIG_MTD_UBI_GLUEBI
/*
* Gluebi-related stuff may be compiled out.
- * TODO: this should not be built into UBI but should be a separate
+ * Note: this should not be built into UBI but should be a separate
* ubimtd driver which works on top of UBI and emulates MTD devices.
*/
struct ubi_volume_desc *gluebi_desc;
@@ -235,8 +256,7 @@
};
/**
- * struct ubi_volume_desc - descriptor of the UBI volume returned when it is
- * opened.
+ * struct ubi_volume_desc - UBI volume descriptor returned when it is opened.
* @vol: reference to the corresponding volume description object
* @mode: open mode (%UBI_READONLY, %UBI_READWRITE, or %UBI_EXCLUSIVE)
*/
@@ -273,7 +293,7 @@
* @vtbl_size: size of the volume table in bytes
* @vtbl: in-RAM volume table copy
* @volumes_mutex: protects on-flash volume table and serializes volume
- * changes, like creation, deletion, update, resize
+ * changes, like creation, deletion, update, re-size and re-name
*
* @max_ec: current highest erase counter value
* @mean_ec: current mean erase counter value
@@ -293,6 +313,7 @@
* @move_to, @move_to_put @erase_pending, @wl_scheduled, and @works
* fields
* @move_mutex: serializes eraseblock moves
+ * @work_sem: sycnhronizes the WL worker with use tasks
* @wl_scheduled: non-zero if the wear-leveling was scheduled
* @lookuptbl: a table to quickly find a &struct ubi_wl_entry object for any
* physical eraseblock
@@ -316,11 +337,11 @@
* @ro_mode: if the UBI device is in read-only mode
* @leb_size: logical eraseblock size
* @leb_start: starting offset of logical eraseblocks within physical
- * eraseblocks
+ * eraseblocks
* @ec_hdr_alsize: size of the EC header aligned to @hdrs_min_io_size
* @vid_hdr_alsize: size of the VID header aligned to @hdrs_min_io_size
* @vid_hdr_offset: starting offset of the volume identifier header (might be
- * unaligned)
+ * unaligned)
* @vid_hdr_aloffset: starting offset of the VID header aligned to
* @hdrs_min_io_size
* @vid_hdr_shift: contains @vid_hdr_offset - @vid_hdr_aloffset
@@ -331,6 +352,8 @@
* @peb_buf1: a buffer of PEB size used for different purposes
* @peb_buf2: another buffer of PEB size used for different purposes
* @buf_mutex: proptects @peb_buf1 and @peb_buf2
+ * @ckvol_mutex: serializes static volume checking when opening
+ * @mult_mutex: serializes operations on multiple volumes, like re-nameing
* @dbg_peb_buf: buffer of PEB size used for debugging
* @dbg_buf_mutex: proptects @dbg_peb_buf
*/
@@ -356,16 +379,16 @@
struct mutex volumes_mutex;
int max_ec;
- /* TODO: mean_ec is not updated run-time, fix */
+ /* Note, mean_ec is not updated run-time - should be fixed */
int mean_ec;
- /* EBA unit's stuff */
+ /* EBA sub-system's stuff */
unsigned long long global_sqnum;
spinlock_t ltree_lock;
struct rb_root ltree;
struct mutex alc_mutex;
- /* Wear-leveling unit's stuff */
+ /* Wear-leveling sub-system's stuff */
struct rb_root used;
struct rb_root free;
struct rb_root scrub;
@@ -388,7 +411,7 @@
int thread_enabled;
char bgt_name[sizeof(UBI_BGT_NAME_PATTERN)+2];
- /* I/O unit's stuff */
+ /* I/O sub-system's stuff */
long long flash_size;
int peb_count;
int peb_size;
@@ -411,6 +434,7 @@
void *peb_buf2;
struct mutex buf_mutex;
struct mutex ckvol_mutex;
+ struct mutex mult_mutex;
#ifdef CONFIG_MTD_UBI_DEBUG
void *dbg_peb_buf;
struct mutex dbg_buf_mutex;
@@ -427,12 +451,15 @@
/* vtbl.c */
int ubi_change_vtbl_record(struct ubi_device *ubi, int idx,
struct ubi_vtbl_record *vtbl_rec);
+int ubi_vtbl_rename_volumes(struct ubi_device *ubi,
+ struct list_head *rename_list);
int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si);
/* vmt.c */
int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req);
-int ubi_remove_volume(struct ubi_volume_desc *desc);
+int ubi_remove_volume(struct ubi_volume_desc *desc, int no_vtbl);
int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs);
+int ubi_rename_volumes(struct ubi_device *ubi, struct list_head *rename_list);
int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol);
void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol);
@@ -447,7 +474,8 @@
const void __user *buf, int count);
/* misc.c */
-int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf, int length);
+int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf,
+ int length);
int ubi_check_volume(struct ubi_device *ubi, int vol_id);
void ubi_calculate_reserved(struct ubi_device *ubi);
@@ -477,7 +505,6 @@
int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
struct ubi_vid_hdr *vid_hdr);
int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si);
-void ubi_eba_close(const struct ubi_device *ubi);
/* wl.c */
int ubi_wl_get_peb(struct ubi_device *ubi, int dtype);
diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c
index ddaa1a5..8b89cc1 100644
--- a/drivers/mtd/ubi/upd.c
+++ b/drivers/mtd/ubi/upd.c
@@ -39,7 +39,7 @@
*/
#include <linux/err.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/div64.h>
#include "ubi.h"
@@ -56,11 +56,11 @@
int err;
struct ubi_vtbl_record vtbl_rec;
- dbg_msg("set update marker for volume %d", vol->vol_id);
+ dbg_gen("set update marker for volume %d", vol->vol_id);
if (vol->upd_marker) {
ubi_assert(ubi->vtbl[vol->vol_id].upd_marker);
- dbg_msg("already set");
+ dbg_gen("already set");
return 0;
}
@@ -92,7 +92,7 @@
uint64_t tmp;
struct ubi_vtbl_record vtbl_rec;
- dbg_msg("clear update marker for volume %d", vol->vol_id);
+ dbg_gen("clear update marker for volume %d", vol->vol_id);
memcpy(&vtbl_rec, &ubi->vtbl[vol->vol_id],
sizeof(struct ubi_vtbl_record));
@@ -133,7 +133,7 @@
int i, err;
uint64_t tmp;
- dbg_msg("start update of volume %d, %llu bytes", vol->vol_id, bytes);
+ dbg_gen("start update of volume %d, %llu bytes", vol->vol_id, bytes);
ubi_assert(!vol->updating && !vol->changing_leb);
vol->updating = 1;
@@ -183,7 +183,7 @@
{
ubi_assert(!vol->updating && !vol->changing_leb);
- dbg_msg("start changing LEB %d:%d, %u bytes",
+ dbg_gen("start changing LEB %d:%d, %u bytes",
vol->vol_id, req->lnum, req->bytes);
if (req->bytes == 0)
return ubi_eba_atomic_leb_change(ubi, vol, req->lnum, NULL, 0,
@@ -237,16 +237,17 @@
int err;
if (vol->vol_type == UBI_DYNAMIC_VOLUME) {
- len = ALIGN(len, ubi->min_io_size);
- memset(buf + len, 0xFF, len - len);
+ int l = ALIGN(len, ubi->min_io_size);
- len = ubi_calc_data_len(ubi, buf, len);
+ memset(buf + len, 0xFF, l - len);
+ len = ubi_calc_data_len(ubi, buf, l);
if (len == 0) {
- dbg_msg("all %d bytes contain 0xFF - skip", len);
+ dbg_gen("all %d bytes contain 0xFF - skip", len);
return 0;
}
- err = ubi_eba_write_leb(ubi, vol, lnum, buf, 0, len, UBI_UNKNOWN);
+ err = ubi_eba_write_leb(ubi, vol, lnum, buf, 0, len,
+ UBI_UNKNOWN);
} else {
/*
* When writing static volume, and this is the last logical
@@ -267,6 +268,7 @@
/**
* ubi_more_update_data - write more update data.
+ * @ubi: UBI device description object
* @vol: volume description object
* @buf: write data (user-space memory buffer)
* @count: how much bytes to write
@@ -283,7 +285,7 @@
uint64_t tmp;
int lnum, offs, err = 0, len, to_write = count;
- dbg_msg("write %d of %lld bytes, %lld already passed",
+ dbg_gen("write %d of %lld bytes, %lld already passed",
count, vol->upd_bytes, vol->upd_received);
if (ubi->ro_mode)
@@ -384,6 +386,7 @@
/**
* ubi_more_leb_change_data - accept more data for atomic LEB change.
+ * @ubi: UBI device description object
* @vol: volume description object
* @buf: write data (user-space memory buffer)
* @count: how much bytes to write
@@ -400,7 +403,7 @@
{
int err;
- dbg_msg("write %d of %lld bytes, %lld already passed",
+ dbg_gen("write %d of %lld bytes, %lld already passed",
count, vol->upd_bytes, vol->upd_received);
if (ubi->ro_mode)
@@ -418,7 +421,8 @@
if (vol->upd_received == vol->upd_bytes) {
int len = ALIGN((int)vol->upd_bytes, ubi->min_io_size);
- memset(vol->upd_buf + vol->upd_bytes, 0xFF, len - vol->upd_bytes);
+ memset(vol->upd_buf + vol->upd_bytes, 0xFF,
+ len - vol->upd_bytes);
len = ubi_calc_data_len(ubi, vol->upd_buf, len);
err = ubi_eba_atomic_leb_change(ubi, vol, vol->ch_lnum,
vol->upd_buf, len, UBI_UNKNOWN);
diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c
index 5be58d8..3531ca9 100644
--- a/drivers/mtd/ubi/vmt.c
+++ b/drivers/mtd/ubi/vmt.c
@@ -28,9 +28,9 @@
#include "ubi.h"
#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
-static void paranoid_check_volumes(struct ubi_device *ubi);
+static int paranoid_check_volumes(struct ubi_device *ubi);
#else
-#define paranoid_check_volumes(ubi)
+#define paranoid_check_volumes(ubi) 0
#endif
static ssize_t vol_attribute_show(struct device *dev,
@@ -127,6 +127,7 @@
{
struct ubi_volume *vol = container_of(dev, struct ubi_volume, dev);
+ kfree(vol->eba_tbl);
kfree(vol);
}
@@ -201,7 +202,7 @@
*/
int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
{
- int i, err, vol_id = req->vol_id, dont_free = 0;
+ int i, err, vol_id = req->vol_id, do_free = 1;
struct ubi_volume *vol;
struct ubi_vtbl_record vtbl_rec;
uint64_t bytes;
@@ -217,7 +218,7 @@
spin_lock(&ubi->volumes_lock);
if (vol_id == UBI_VOL_NUM_AUTO) {
/* Find unused volume ID */
- dbg_msg("search for vacant volume ID");
+ dbg_gen("search for vacant volume ID");
for (i = 0; i < ubi->vtbl_slots; i++)
if (!ubi->volumes[i]) {
vol_id = i;
@@ -232,7 +233,7 @@
req->vol_id = vol_id;
}
- dbg_msg("volume ID %d, %llu bytes, type %d, name %s",
+ dbg_gen("volume ID %d, %llu bytes, type %d, name %s",
vol_id, (unsigned long long)req->bytes,
(int)req->vol_type, req->name);
@@ -252,7 +253,7 @@
goto out_unlock;
}
- /* Calculate how many eraseblocks are requested */
+ /* Calculate how many eraseblocks are requested */
vol->usable_leb_size = ubi->leb_size - ubi->leb_size % req->alignment;
bytes = req->bytes;
if (do_div(bytes, vol->usable_leb_size))
@@ -274,7 +275,7 @@
vol->data_pad = ubi->leb_size % vol->alignment;
vol->vol_type = req->vol_type;
vol->name_len = req->name_len;
- memcpy(vol->name, req->name, vol->name_len + 1);
+ memcpy(vol->name, req->name, vol->name_len);
vol->ubi = ubi;
/*
@@ -349,7 +350,7 @@
vtbl_rec.vol_type = UBI_VID_DYNAMIC;
else
vtbl_rec.vol_type = UBI_VID_STATIC;
- memcpy(vtbl_rec.name, vol->name, vol->name_len + 1);
+ memcpy(vtbl_rec.name, vol->name, vol->name_len);
err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec);
if (err)
@@ -360,19 +361,19 @@
ubi->vol_count += 1;
spin_unlock(&ubi->volumes_lock);
- paranoid_check_volumes(ubi);
- return 0;
+ err = paranoid_check_volumes(ubi);
+ return err;
out_sysfs:
/*
- * We have registered our device, we should not free the volume*
+ * We have registered our device, we should not free the volume
* description object in this function in case of an error - it is
* freed by the release function.
*
* Get device reference to prevent the release function from being
* called just after sysfs has been closed.
*/
- dont_free = 1;
+ do_free = 0;
get_device(&vol->dev);
volume_sysfs_close(vol);
out_gluebi:
@@ -382,17 +383,18 @@
out_cdev:
cdev_del(&vol->cdev);
out_mapping:
- kfree(vol->eba_tbl);
+ if (do_free)
+ kfree(vol->eba_tbl);
out_acc:
spin_lock(&ubi->volumes_lock);
ubi->rsvd_pebs -= vol->reserved_pebs;
ubi->avail_pebs += vol->reserved_pebs;
out_unlock:
spin_unlock(&ubi->volumes_lock);
- if (dont_free)
- put_device(&vol->dev);
- else
+ if (do_free)
kfree(vol);
+ else
+ put_device(&vol->dev);
ubi_err("cannot create volume %d, error %d", vol_id, err);
return err;
}
@@ -400,19 +402,20 @@
/**
* ubi_remove_volume - remove volume.
* @desc: volume descriptor
+ * @no_vtbl: do not change volume table if not zero
*
* This function removes volume described by @desc. The volume has to be opened
* in "exclusive" mode. Returns zero in case of success and a negative error
* code in case of failure. The caller has to have the @ubi->volumes_mutex
* locked.
*/
-int ubi_remove_volume(struct ubi_volume_desc *desc)
+int ubi_remove_volume(struct ubi_volume_desc *desc, int no_vtbl)
{
struct ubi_volume *vol = desc->vol;
struct ubi_device *ubi = vol->ubi;
int i, err, vol_id = vol->vol_id, reserved_pebs = vol->reserved_pebs;
- dbg_msg("remove UBI volume %d", vol_id);
+ dbg_gen("remove UBI volume %d", vol_id);
ubi_assert(desc->mode == UBI_EXCLUSIVE);
ubi_assert(vol == ubi->volumes[vol_id]);
@@ -435,9 +438,11 @@
if (err)
goto out_err;
- err = ubi_change_vtbl_record(ubi, vol_id, NULL);
- if (err)
- goto out_err;
+ if (!no_vtbl) {
+ err = ubi_change_vtbl_record(ubi, vol_id, NULL);
+ if (err)
+ goto out_err;
+ }
for (i = 0; i < vol->reserved_pebs; i++) {
err = ubi_eba_unmap_leb(ubi, vol, i);
@@ -445,8 +450,6 @@
goto out_err;
}
- kfree(vol->eba_tbl);
- vol->eba_tbl = NULL;
cdev_del(&vol->cdev);
volume_sysfs_close(vol);
@@ -465,8 +468,9 @@
ubi->vol_count -= 1;
spin_unlock(&ubi->volumes_lock);
- paranoid_check_volumes(ubi);
- return 0;
+ if (!no_vtbl)
+ err = paranoid_check_volumes(ubi);
+ return err;
out_err:
ubi_err("cannot remove volume %d, error %d", vol_id, err);
@@ -497,7 +501,7 @@
if (ubi->ro_mode)
return -EROFS;
- dbg_msg("re-size volume %d to from %d to %d PEBs",
+ dbg_gen("re-size volume %d to from %d to %d PEBs",
vol_id, vol->reserved_pebs, reserved_pebs);
if (vol->vol_type == UBI_STATIC_VOLUME &&
@@ -586,8 +590,8 @@
(long long)vol->used_ebs * vol->usable_leb_size;
}
- paranoid_check_volumes(ubi);
- return 0;
+ err = paranoid_check_volumes(ubi);
+ return err;
out_acc:
if (pebs > 0) {
@@ -602,6 +606,44 @@
}
/**
+ * ubi_rename_volumes - re-name UBI volumes.
+ * @ubi: UBI device description object
+ * @rename_list: list of &struct ubi_rename_entry objects
+ *
+ * This function re-names or removes volumes specified in the re-name list.
+ * Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+int ubi_rename_volumes(struct ubi_device *ubi, struct list_head *rename_list)
+{
+ int err;
+ struct ubi_rename_entry *re;
+
+ err = ubi_vtbl_rename_volumes(ubi, rename_list);
+ if (err)
+ return err;
+
+ list_for_each_entry(re, rename_list, list) {
+ if (re->remove) {
+ err = ubi_remove_volume(re->desc, 1);
+ if (err)
+ break;
+ } else {
+ struct ubi_volume *vol = re->desc->vol;
+
+ spin_lock(&ubi->volumes_lock);
+ vol->name_len = re->new_name_len;
+ memcpy(vol->name, re->new_name, re->new_name_len + 1);
+ spin_unlock(&ubi->volumes_lock);
+ }
+ }
+
+ if (!err)
+ err = paranoid_check_volumes(ubi);
+ return err;
+}
+
+/**
* ubi_add_volume - add volume.
* @ubi: UBI device description object
* @vol: volume description object
@@ -615,8 +657,7 @@
int err, vol_id = vol->vol_id;
dev_t dev;
- dbg_msg("add volume %d", vol_id);
- ubi_dbg_dump_vol_info(vol);
+ dbg_gen("add volume %d", vol_id);
/* Register character device for the volume */
cdev_init(&vol->cdev, &ubi_vol_cdev_operations);
@@ -650,8 +691,8 @@
return err;
}
- paranoid_check_volumes(ubi);
- return 0;
+ err = paranoid_check_volumes(ubi);
+ return err;
out_gluebi:
err = ubi_destroy_gluebi(vol);
@@ -672,7 +713,7 @@
{
int err;
- dbg_msg("free volume %d", vol->vol_id);
+ dbg_gen("free volume %d", vol->vol_id);
ubi->volumes[vol->vol_id] = NULL;
err = ubi_destroy_gluebi(vol);
@@ -686,8 +727,10 @@
* paranoid_check_volume - check volume information.
* @ubi: UBI device description object
* @vol_id: volume ID
+ *
+ * Returns zero if volume is all right and a a negative error code if not.
*/
-static void paranoid_check_volume(struct ubi_device *ubi, int vol_id)
+static int paranoid_check_volume(struct ubi_device *ubi, int vol_id)
{
int idx = vol_id2idx(ubi, vol_id);
int reserved_pebs, alignment, data_pad, vol_type, name_len, upd_marker;
@@ -705,16 +748,7 @@
goto fail;
}
spin_unlock(&ubi->volumes_lock);
- return;
- }
-
- if (vol->exclusive) {
- /*
- * The volume may be being created at the moment, do not check
- * it (e.g., it may be in the middle of ubi_create_volume().
- */
- spin_unlock(&ubi->volumes_lock);
- return;
+ return 0;
}
if (vol->reserved_pebs < 0 || vol->alignment < 0 || vol->data_pad < 0 ||
@@ -727,7 +761,7 @@
goto fail;
}
- n = vol->alignment % ubi->min_io_size;
+ n = vol->alignment & (ubi->min_io_size - 1);
if (vol->alignment != 1 && n) {
ubi_err("alignment is not multiple of min I/O unit");
goto fail;
@@ -824,31 +858,39 @@
if (alignment != vol->alignment || data_pad != vol->data_pad ||
upd_marker != vol->upd_marker || vol_type != vol->vol_type ||
- name_len!= vol->name_len || strncmp(name, vol->name, name_len)) {
+ name_len != vol->name_len || strncmp(name, vol->name, name_len)) {
ubi_err("volume info is different");
goto fail;
}
spin_unlock(&ubi->volumes_lock);
- return;
+ return 0;
fail:
ubi_err("paranoid check failed for volume %d", vol_id);
- ubi_dbg_dump_vol_info(vol);
+ if (vol)
+ ubi_dbg_dump_vol_info(vol);
ubi_dbg_dump_vtbl_record(&ubi->vtbl[vol_id], vol_id);
spin_unlock(&ubi->volumes_lock);
- BUG();
+ return -EINVAL;
}
/**
* paranoid_check_volumes - check information about all volumes.
* @ubi: UBI device description object
+ *
+ * Returns zero if volumes are all right and a a negative error code if not.
*/
-static void paranoid_check_volumes(struct ubi_device *ubi)
+static int paranoid_check_volumes(struct ubi_device *ubi)
{
- int i;
+ int i, err = 0;
- for (i = 0; i < ubi->vtbl_slots; i++)
- paranoid_check_volume(ubi, i);
+ for (i = 0; i < ubi->vtbl_slots; i++) {
+ err = paranoid_check_volume(ubi, i);
+ if (err)
+ break;
+ }
+
+ return err;
}
#endif
diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c
index af36b12..217d0e1 100644
--- a/drivers/mtd/ubi/vtbl.c
+++ b/drivers/mtd/ubi/vtbl.c
@@ -115,8 +115,58 @@
}
/**
- * vtbl_check - check if volume table is not corrupted and contains sensible
- * data.
+ * ubi_vtbl_rename_volumes - rename UBI volumes in the volume table.
+ * @ubi: UBI device description object
+ * @rename_list: list of &struct ubi_rename_entry objects
+ *
+ * This function re-names multiple volumes specified in @req in the volume
+ * table. Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+int ubi_vtbl_rename_volumes(struct ubi_device *ubi,
+ struct list_head *rename_list)
+{
+ int i, err;
+ struct ubi_rename_entry *re;
+ struct ubi_volume *layout_vol;
+
+ list_for_each_entry(re, rename_list, list) {
+ uint32_t crc;
+ struct ubi_volume *vol = re->desc->vol;
+ struct ubi_vtbl_record *vtbl_rec = &ubi->vtbl[vol->vol_id];
+
+ if (re->remove) {
+ memcpy(vtbl_rec, &empty_vtbl_record,
+ sizeof(struct ubi_vtbl_record));
+ continue;
+ }
+
+ vtbl_rec->name_len = cpu_to_be16(re->new_name_len);
+ memcpy(vtbl_rec->name, re->new_name, re->new_name_len);
+ memset(vtbl_rec->name + re->new_name_len, 0,
+ UBI_VOL_NAME_MAX + 1 - re->new_name_len);
+ crc = crc32(UBI_CRC32_INIT, vtbl_rec,
+ UBI_VTBL_RECORD_SIZE_CRC);
+ vtbl_rec->crc = cpu_to_be32(crc);
+ }
+
+ layout_vol = ubi->volumes[vol_id2idx(ubi, UBI_LAYOUT_VOLUME_ID)];
+ for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) {
+ err = ubi_eba_unmap_leb(ubi, layout_vol, i);
+ if (err)
+ return err;
+
+ err = ubi_eba_write_leb(ubi, layout_vol, i, ubi->vtbl, 0,
+ ubi->vtbl_size, UBI_LONGTERM);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+/**
+ * vtbl_check - check if volume table is not corrupted and sensible.
* @ubi: UBI device description object
* @vtbl: volume table
*
@@ -127,7 +177,7 @@
const struct ubi_vtbl_record *vtbl)
{
int i, n, reserved_pebs, alignment, data_pad, vol_type, name_len;
- int upd_marker;
+ int upd_marker, err;
uint32_t crc;
const char *name;
@@ -153,7 +203,7 @@
if (reserved_pebs == 0) {
if (memcmp(&vtbl[i], &empty_vtbl_record,
UBI_VTBL_RECORD_SIZE)) {
- dbg_err("bad empty record");
+ err = 2;
goto bad;
}
continue;
@@ -161,56 +211,57 @@
if (reserved_pebs < 0 || alignment < 0 || data_pad < 0 ||
name_len < 0) {
- dbg_err("negative values");
+ err = 3;
goto bad;
}
if (alignment > ubi->leb_size || alignment == 0) {
- dbg_err("bad alignment");
+ err = 4;
goto bad;
}
- n = alignment % ubi->min_io_size;
+ n = alignment & (ubi->min_io_size - 1);
if (alignment != 1 && n) {
- dbg_err("alignment is not multiple of min I/O unit");
+ err = 5;
goto bad;
}
n = ubi->leb_size % alignment;
if (data_pad != n) {
dbg_err("bad data_pad, has to be %d", n);
+ err = 6;
goto bad;
}
if (vol_type != UBI_VID_DYNAMIC && vol_type != UBI_VID_STATIC) {
- dbg_err("bad vol_type");
+ err = 7;
goto bad;
}
if (upd_marker != 0 && upd_marker != 1) {
- dbg_err("bad upd_marker");
+ err = 8;
goto bad;
}
if (reserved_pebs > ubi->good_peb_count) {
dbg_err("too large reserved_pebs, good PEBs %d",
ubi->good_peb_count);
+ err = 9;
goto bad;
}
if (name_len > UBI_VOL_NAME_MAX) {
- dbg_err("too long volume name, max %d",
- UBI_VOL_NAME_MAX);
+ err = 10;
goto bad;
}
if (name[0] == '\0') {
- dbg_err("NULL volume name");
+ err = 11;
goto bad;
}
if (name_len != strnlen(name, name_len + 1)) {
- dbg_err("bad name_len");
+ err = 12;
goto bad;
}
}
@@ -235,7 +286,7 @@
return 0;
bad:
- ubi_err("volume table check failed, record %d", i);
+ ubi_err("volume table check failed: record %d, error %d", i, err);
ubi_dbg_dump_vtbl_record(&vtbl[i], i);
return -EINVAL;
}
@@ -287,7 +338,6 @@
vid_hdr->data_pad = cpu_to_be32(0);
vid_hdr->lnum = cpu_to_be32(copy);
vid_hdr->sqnum = cpu_to_be64(++si->max_sqnum);
- vid_hdr->leb_ver = cpu_to_be32(old_seb ? old_seb->leb_ver + 1: 0);
/* The EC header is already there, write the VID header */
err = ubi_io_write_vid_hdr(ubi, new_seb->pnum, vid_hdr);
@@ -370,7 +420,7 @@
* to LEB 0.
*/
- dbg_msg("check layout volume");
+ dbg_gen("check layout volume");
/* Read both LEB 0 and LEB 1 into memory */
ubi_rb_for_each_entry(rb, seb, &sv->root, u.rb) {
@@ -384,7 +434,16 @@
err = ubi_io_read_data(ubi, leb[seb->lnum], seb->pnum, 0,
ubi->vtbl_size);
if (err == UBI_IO_BITFLIPS || err == -EBADMSG)
- /* Scrub the PEB later */
+ /*
+ * Scrub the PEB later. Note, -EBADMSG indicates an
+ * uncorrectable ECC error, but we have our own CRC and
+ * the data will be checked later. If the data is OK,
+ * the PEB will be scrubbed (because we set
+ * seb->scrub). If the data is not OK, the contents of
+ * the PEB will be recovered from the second copy, and
+ * seb->scrub will be cleared in
+ * 'ubi_scan_add_used()'.
+ */
seb->scrub = 1;
else if (err)
goto out_free;
@@ -400,7 +459,8 @@
if (!leb_corrupted[0]) {
/* LEB 0 is OK */
if (leb[1])
- leb_corrupted[1] = memcmp(leb[0], leb[1], ubi->vtbl_size);
+ leb_corrupted[1] = memcmp(leb[0], leb[1],
+ ubi->vtbl_size);
if (leb_corrupted[1]) {
ubi_warn("volume table copy #2 is corrupted");
err = create_vtbl(ubi, si, 1, leb[0]);
@@ -620,30 +680,32 @@
static int check_sv(const struct ubi_volume *vol,
const struct ubi_scan_volume *sv)
{
+ int err;
+
if (sv->highest_lnum >= vol->reserved_pebs) {
- dbg_err("bad highest_lnum");
+ err = 1;
goto bad;
}
if (sv->leb_count > vol->reserved_pebs) {
- dbg_err("bad leb_count");
+ err = 2;
goto bad;
}
if (sv->vol_type != vol->vol_type) {
- dbg_err("bad vol_type");
+ err = 3;
goto bad;
}
if (sv->used_ebs > vol->reserved_pebs) {
- dbg_err("bad used_ebs");
+ err = 4;
goto bad;
}
if (sv->data_pad != vol->data_pad) {
- dbg_err("bad data_pad");
+ err = 5;
goto bad;
}
return 0;
bad:
- ubi_err("bad scanning information");
+ ubi_err("bad scanning information, error %d", err);
ubi_dbg_dump_sv(sv);
ubi_dbg_dump_vol_info(vol);
return -EINVAL;
@@ -672,14 +734,13 @@
return -EINVAL;
}
- if (si->highest_vol_id >= ubi->vtbl_slots + UBI_INT_VOL_COUNT&&
+ if (si->highest_vol_id >= ubi->vtbl_slots + UBI_INT_VOL_COUNT &&
si->highest_vol_id < UBI_INTERNAL_VOL_START) {
ubi_err("too large volume ID %d found by scanning",
si->highest_vol_id);
return -EINVAL;
}
-
for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) {
cond_resched();
@@ -717,8 +778,7 @@
}
/**
- * ubi_read_volume_table - read volume table.
- * information.
+ * ubi_read_volume_table - read the volume table.
* @ubi: UBI device description object
* @si: scanning information
*
@@ -797,11 +857,10 @@
out_free:
vfree(ubi->vtbl);
- for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++)
- if (ubi->volumes[i]) {
- kfree(ubi->volumes[i]);
- ubi->volumes[i] = NULL;
- }
+ for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) {
+ kfree(ubi->volumes[i]);
+ ubi->volumes[i] = NULL;
+ }
return err;
}
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index a471a49..05d7093 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -19,22 +19,22 @@
*/
/*
- * UBI wear-leveling unit.
+ * UBI wear-leveling sub-system.
*
- * This unit is responsible for wear-leveling. It works in terms of physical
- * eraseblocks and erase counters and knows nothing about logical eraseblocks,
- * volumes, etc. From this unit's perspective all physical eraseblocks are of
- * two types - used and free. Used physical eraseblocks are those that were
- * "get" by the 'ubi_wl_get_peb()' function, and free physical eraseblocks are
- * those that were put by the 'ubi_wl_put_peb()' function.
+ * This sub-system is responsible for wear-leveling. It works in terms of
+ * physical* eraseblocks and erase counters and knows nothing about logical
+ * eraseblocks, volumes, etc. From this sub-system's perspective all physical
+ * eraseblocks are of two types - used and free. Used physical eraseblocks are
+ * those that were "get" by the 'ubi_wl_get_peb()' function, and free physical
+ * eraseblocks are those that were put by the 'ubi_wl_put_peb()' function.
*
* Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter
- * header. The rest of the physical eraseblock contains only 0xFF bytes.
+ * header. The rest of the physical eraseblock contains only %0xFF bytes.
*
- * When physical eraseblocks are returned to the WL unit by means of the
+ * When physical eraseblocks are returned to the WL sub-system by means of the
* 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is
* done asynchronously in context of the per-UBI device background thread,
- * which is also managed by the WL unit.
+ * which is also managed by the WL sub-system.
*
* The wear-leveling is ensured by means of moving the contents of used
* physical eraseblocks with low erase counter to free physical eraseblocks
@@ -43,34 +43,36 @@
* The 'ubi_wl_get_peb()' function accepts data type hints which help to pick
* an "optimal" physical eraseblock. For example, when it is known that the
* physical eraseblock will be "put" soon because it contains short-term data,
- * the WL unit may pick a free physical eraseblock with low erase counter, and
- * so forth.
+ * the WL sub-system may pick a free physical eraseblock with low erase
+ * counter, and so forth.
*
- * If the WL unit fails to erase a physical eraseblock, it marks it as bad.
+ * If the WL sub-system fails to erase a physical eraseblock, it marks it as
+ * bad.
*
- * This unit is also responsible for scrubbing. If a bit-flip is detected in a
- * physical eraseblock, it has to be moved. Technically this is the same as
- * moving it for wear-leveling reasons.
+ * This sub-system is also responsible for scrubbing. If a bit-flip is detected
+ * in a physical eraseblock, it has to be moved. Technically this is the same
+ * as moving it for wear-leveling reasons.
*
- * As it was said, for the UBI unit all physical eraseblocks are either "free"
- * or "used". Free eraseblock are kept in the @wl->free RB-tree, while used
- * eraseblocks are kept in a set of different RB-trees: @wl->used,
+ * As it was said, for the UBI sub-system all physical eraseblocks are either
+ * "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while
+ * used eraseblocks are kept in a set of different RB-trees: @wl->used,
* @wl->prot.pnum, @wl->prot.aec, and @wl->scrub.
*
* Note, in this implementation, we keep a small in-RAM object for each physical
* eraseblock. This is surely not a scalable solution. But it appears to be good
* enough for moderately large flashes and it is simple. In future, one may
- * re-work this unit and make it more scalable.
+ * re-work this sub-system and make it more scalable.
*
- * At the moment this unit does not utilize the sequence number, which was
- * introduced relatively recently. But it would be wise to do this because the
- * sequence number of a logical eraseblock characterizes how old is it. For
+ * At the moment this sub-system does not utilize the sequence number, which
+ * was introduced relatively recently. But it would be wise to do this because
+ * the sequence number of a logical eraseblock characterizes how old is it. For
* example, when we move a PEB with low erase counter, and we need to pick the
* target PEB, we pick a PEB with the highest EC if our PEB is "old" and we
* pick target PEB with an average EC if our PEB is not very "old". This is a
- * room for future re-works of the WL unit.
+ * room for future re-works of the WL sub-system.
*
- * FIXME: looks too complex, should be simplified (later).
+ * Note: the stuff with protection trees looks too complex and is difficult to
+ * understand. Should be fixed.
*/
#include <linux/slab.h>
@@ -92,20 +94,21 @@
/*
* Maximum difference between two erase counters. If this threshold is
- * exceeded, the WL unit starts moving data from used physical eraseblocks with
- * low erase counter to free physical eraseblocks with high erase counter.
+ * exceeded, the WL sub-system starts moving data from used physical
+ * eraseblocks with low erase counter to free physical eraseblocks with high
+ * erase counter.
*/
#define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD
/*
- * When a physical eraseblock is moved, the WL unit has to pick the target
+ * When a physical eraseblock is moved, the WL sub-system has to pick the target
* physical eraseblock to move to. The simplest way would be just to pick the
* one with the highest erase counter. But in certain workloads this could lead
* to an unlimited wear of one or few physical eraseblock. Indeed, imagine a
* situation when the picked physical eraseblock is constantly erased after the
* data is written to it. So, we have a constant which limits the highest erase
- * counter of the free physical eraseblock to pick. Namely, the WL unit does
- * not pick eraseblocks with erase counter greater then the lowest erase
+ * counter of the free physical eraseblock to pick. Namely, the WL sub-system
+ * does not pick eraseblocks with erase counter greater then the lowest erase
* counter plus %WL_FREE_MAX_DIFF.
*/
#define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD)
@@ -123,11 +126,11 @@
* @abs_ec: the absolute erase counter value when the protection ends
* @e: the wear-leveling entry of the physical eraseblock under protection
*
- * When the WL unit returns a physical eraseblock, the physical eraseblock is
- * protected from being moved for some "time". For this reason, the physical
- * eraseblock is not directly moved from the @wl->free tree to the @wl->used
- * tree. There is one more tree in between where this physical eraseblock is
- * temporarily stored (@wl->prot).
+ * When the WL sub-system returns a physical eraseblock, the physical
+ * eraseblock is protected from being moved for some "time". For this reason,
+ * the physical eraseblock is not directly moved from the @wl->free tree to the
+ * @wl->used tree. There is one more tree in between where this physical
+ * eraseblock is temporarily stored (@wl->prot).
*
* All this protection stuff is needed because:
* o we don't want to move physical eraseblocks just after we have given them
@@ -175,7 +178,6 @@
* @list: a link in the list of pending works
* @func: worker function
* @priv: private data of the worker function
- *
* @e: physical eraseblock to erase
* @torture: if the physical eraseblock has to be tortured
*
@@ -473,52 +475,47 @@
}
switch (dtype) {
- case UBI_LONGTERM:
- /*
- * For long term data we pick a physical eraseblock
- * with high erase counter. But the highest erase
- * counter we can pick is bounded by the the lowest
- * erase counter plus %WL_FREE_MAX_DIFF.
- */
- e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
- protect = LT_PROTECTION;
- break;
- case UBI_UNKNOWN:
- /*
- * For unknown data we pick a physical eraseblock with
- * medium erase counter. But we by no means can pick a
- * physical eraseblock with erase counter greater or
- * equivalent than the lowest erase counter plus
- * %WL_FREE_MAX_DIFF.
- */
- first = rb_entry(rb_first(&ubi->free),
- struct ubi_wl_entry, rb);
- last = rb_entry(rb_last(&ubi->free),
- struct ubi_wl_entry, rb);
+ case UBI_LONGTERM:
+ /*
+ * For long term data we pick a physical eraseblock with high
+ * erase counter. But the highest erase counter we can pick is
+ * bounded by the the lowest erase counter plus
+ * %WL_FREE_MAX_DIFF.
+ */
+ e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
+ protect = LT_PROTECTION;
+ break;
+ case UBI_UNKNOWN:
+ /*
+ * For unknown data we pick a physical eraseblock with medium
+ * erase counter. But we by no means can pick a physical
+ * eraseblock with erase counter greater or equivalent than the
+ * lowest erase counter plus %WL_FREE_MAX_DIFF.
+ */
+ first = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, rb);
+ last = rb_entry(rb_last(&ubi->free), struct ubi_wl_entry, rb);
- if (last->ec - first->ec < WL_FREE_MAX_DIFF)
- e = rb_entry(ubi->free.rb_node,
- struct ubi_wl_entry, rb);
- else {
- medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2;
- e = find_wl_entry(&ubi->free, medium_ec);
- }
- protect = U_PROTECTION;
- break;
- case UBI_SHORTTERM:
- /*
- * For short term data we pick a physical eraseblock
- * with the lowest erase counter as we expect it will
- * be erased soon.
- */
- e = rb_entry(rb_first(&ubi->free),
- struct ubi_wl_entry, rb);
- protect = ST_PROTECTION;
- break;
- default:
- protect = 0;
- e = NULL;
- BUG();
+ if (last->ec - first->ec < WL_FREE_MAX_DIFF)
+ e = rb_entry(ubi->free.rb_node,
+ struct ubi_wl_entry, rb);
+ else {
+ medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2;
+ e = find_wl_entry(&ubi->free, medium_ec);
+ }
+ protect = U_PROTECTION;
+ break;
+ case UBI_SHORTTERM:
+ /*
+ * For short term data we pick a physical eraseblock with the
+ * lowest erase counter as we expect it will be erased soon.
+ */
+ e = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, rb);
+ protect = ST_PROTECTION;
+ break;
+ default:
+ protect = 0;
+ e = NULL;
+ BUG();
}
/*
@@ -582,7 +579,8 @@
* This function returns zero in case of success and a negative error code in
* case of failure.
*/
-static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, int torture)
+static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
+ int torture)
{
int err;
struct ubi_ec_hdr *ec_hdr;
@@ -634,8 +632,7 @@
}
/**
- * check_protection_over - check if it is time to stop protecting some
- * physical eraseblocks.
+ * check_protection_over - check if it is time to stop protecting some PEBs.
* @ubi: UBI device description object
*
* This function is called after each erase operation, when the absolute erase
@@ -871,6 +868,10 @@
}
ubi_free_vid_hdr(ubi, vid_hdr);
+ if (scrubbing && !protect)
+ ubi_msg("scrubbed PEB %d, data moved to PEB %d",
+ e1->pnum, e2->pnum);
+
spin_lock(&ubi->wl_lock);
if (protect)
prot_tree_add(ubi, e1, pe, protect);
@@ -1054,8 +1055,8 @@
spin_unlock(&ubi->wl_lock);
/*
- * One more erase operation has happened, take care about protected
- * physical eraseblocks.
+ * One more erase operation has happened, take care about
+ * protected physical eraseblocks.
*/
check_protection_over(ubi);
@@ -1136,7 +1137,7 @@
}
/**
- * ubi_wl_put_peb - return a physical eraseblock to the wear-leveling unit.
+ * ubi_wl_put_peb - return a PEB to the wear-leveling sub-system.
* @ubi: UBI device description object
* @pnum: physical eraseblock to return
* @torture: if this physical eraseblock has to be tortured
@@ -1175,11 +1176,11 @@
/*
* User is putting the physical eraseblock which was selected
* as the target the data is moved to. It may happen if the EBA
- * unit already re-mapped the LEB in 'ubi_eba_copy_leb()' but
- * the WL unit has not put the PEB to the "used" tree yet, but
- * it is about to do this. So we just set a flag which will
- * tell the WL worker that the PEB is not needed anymore and
- * should be scheduled for erasure.
+ * sub-system already re-mapped the LEB in 'ubi_eba_copy_leb()'
+ * but the WL sub-system has not put the PEB to the "used" tree
+ * yet, but it is about to do this. So we just set a flag which
+ * will tell the WL worker that the PEB is not needed anymore
+ * and should be scheduled for erasure.
*/
dbg_wl("PEB %d is the target of data moving", pnum);
ubi_assert(!ubi->move_to_put);
@@ -1229,7 +1230,7 @@
{
struct ubi_wl_entry *e;
- ubi_msg("schedule PEB %d for scrubbing", pnum);
+ dbg_msg("schedule PEB %d for scrubbing", pnum);
retry:
spin_lock(&ubi->wl_lock);
@@ -1368,7 +1369,7 @@
int err;
if (kthread_should_stop())
- goto out;
+ break;
if (try_to_freeze())
continue;
@@ -1403,7 +1404,6 @@
cond_resched();
}
-out:
dbg_wl("background thread \"%s\" is killed", ubi->bgt_name);
return 0;
}
@@ -1426,8 +1426,7 @@
}
/**
- * ubi_wl_init_scan - initialize the wear-leveling unit using scanning
- * information.
+ * ubi_wl_init_scan - initialize the WL sub-system using scanning information.
* @ubi: UBI device description object
* @si: scanning information
*
@@ -1584,13 +1583,12 @@
}
/**
- * ubi_wl_close - close the wear-leveling unit.
+ * ubi_wl_close - close the wear-leveling sub-system.
* @ubi: UBI device description object
*/
void ubi_wl_close(struct ubi_device *ubi)
{
- dbg_wl("close the UBI wear-leveling unit");
-
+ dbg_wl("close the WL sub-system");
cancel_pending(ubi);
protection_trees_destroy(ubi);
tree_destroy(&ubi->used);
@@ -1602,8 +1600,7 @@
#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
/**
- * paranoid_check_ec - make sure that the erase counter of a physical eraseblock
- * is correct.
+ * paranoid_check_ec - make sure that the erase counter of a PEB is correct.
* @ubi: UBI device description object
* @pnum: the physical eraseblock number to check
* @ec: the erase counter to check
@@ -1644,13 +1641,12 @@
}
/**
- * paranoid_check_in_wl_tree - make sure that a wear-leveling entry is present
- * in a WL RB-tree.
+ * paranoid_check_in_wl_tree - check that wear-leveling entry is in WL RB-tree.
* @e: the wear-leveling entry to check
* @root: the root of the tree
*
- * This function returns zero if @e is in the @root RB-tree and %1 if it
- * is not.
+ * This function returns zero if @e is in the @root RB-tree and %1 if it is
+ * not.
*/
static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e,
struct rb_root *root)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index c28d7cb..0196a0d 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -19,6 +19,7 @@
//#define DEBUG
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
#include <linux/module.h>
#include <linux/virtio.h>
#include <linux/virtio_net.h>
@@ -54,9 +55,15 @@
struct tasklet_struct tasklet;
bool free_in_tasklet;
+ /* I like... big packets and I cannot lie! */
+ bool big_packets;
+
/* Receive & send queues. */
struct sk_buff_head recv;
struct sk_buff_head send;
+
+ /* Chain pages by the private ptr. */
+ struct page *pages;
};
static inline struct virtio_net_hdr *skb_vnet_hdr(struct sk_buff *skb)
@@ -69,6 +76,23 @@
sg_init_one(sg, skb_vnet_hdr(skb), sizeof(struct virtio_net_hdr));
}
+static void give_a_page(struct virtnet_info *vi, struct page *page)
+{
+ page->private = (unsigned long)vi->pages;
+ vi->pages = page;
+}
+
+static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask)
+{
+ struct page *p = vi->pages;
+
+ if (p)
+ vi->pages = (struct page *)p->private;
+ else
+ p = alloc_page(gfp_mask);
+ return p;
+}
+
static void skb_xmit_done(struct virtqueue *svq)
{
struct virtnet_info *vi = svq->vdev->priv;
@@ -88,6 +112,7 @@
unsigned len)
{
struct virtio_net_hdr *hdr = skb_vnet_hdr(skb);
+ int err;
if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
pr_debug("%s: short packet %i\n", dev->name, len);
@@ -95,10 +120,23 @@
goto drop;
}
len -= sizeof(struct virtio_net_hdr);
- BUG_ON(len > MAX_PACKET_LEN);
- skb_trim(skb, len);
+ if (len <= MAX_PACKET_LEN) {
+ unsigned int i;
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+ give_a_page(dev->priv, skb_shinfo(skb)->frags[i].page);
+ skb->data_len = 0;
+ skb_shinfo(skb)->nr_frags = 0;
+ }
+
+ err = pskb_trim(skb, len);
+ if (err) {
+ pr_debug("%s: pskb_trim failed %i %d\n", dev->name, len, err);
+ dev->stats.rx_dropped++;
+ goto drop;
+ }
+ skb->truesize += skb->data_len;
dev->stats.rx_bytes += skb->len;
dev->stats.rx_packets++;
@@ -160,7 +198,7 @@
{
struct sk_buff *skb;
struct scatterlist sg[2+MAX_SKB_FRAGS];
- int num, err;
+ int num, err, i;
sg_init_table(sg, 2+MAX_SKB_FRAGS);
for (;;) {
@@ -170,6 +208,24 @@
skb_put(skb, MAX_PACKET_LEN);
vnet_hdr_to_sg(sg, skb);
+
+ if (vi->big_packets) {
+ for (i = 0; i < MAX_SKB_FRAGS; i++) {
+ skb_frag_t *f = &skb_shinfo(skb)->frags[i];
+ f->page = get_a_page(vi, GFP_ATOMIC);
+ if (!f->page)
+ break;
+
+ f->page_offset = 0;
+ f->size = PAGE_SIZE;
+
+ skb->data_len += PAGE_SIZE;
+ skb->len += PAGE_SIZE;
+
+ skb_shinfo(skb)->nr_frags++;
+ }
+ }
+
num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
skb_queue_head(&vi->recv, skb);
@@ -335,16 +391,11 @@
free_old_xmit_skbs(vi);
/* If we has a buffer left over from last time, send it now. */
- if (unlikely(vi->last_xmit_skb)) {
- if (xmit_skb(vi, vi->last_xmit_skb) != 0) {
- /* Drop this skb: we only queue one. */
- vi->dev->stats.tx_dropped++;
- kfree_skb(skb);
- skb = NULL;
- goto stop_queue;
- }
- vi->last_xmit_skb = NULL;
- }
+ if (unlikely(vi->last_xmit_skb) &&
+ xmit_skb(vi, vi->last_xmit_skb) != 0)
+ goto stop_queue;
+
+ vi->last_xmit_skb = NULL;
/* Put new one in send queue and do transmit */
if (likely(skb)) {
@@ -370,6 +421,11 @@
netif_start_queue(dev);
goto again;
}
+ if (skb) {
+ /* Drop this skb: we only queue one. */
+ vi->dev->stats.tx_dropped++;
+ kfree_skb(skb);
+ }
goto done;
}
@@ -408,6 +464,22 @@
return 0;
}
+static int virtnet_set_tx_csum(struct net_device *dev, u32 data)
+{
+ struct virtnet_info *vi = netdev_priv(dev);
+ struct virtio_device *vdev = vi->vdev;
+
+ if (data && !virtio_has_feature(vdev, VIRTIO_NET_F_CSUM))
+ return -ENOSYS;
+
+ return ethtool_op_set_tx_hw_csum(dev, data);
+}
+
+static struct ethtool_ops virtnet_ethtool_ops = {
+ .set_tx_csum = virtnet_set_tx_csum,
+ .set_sg = ethtool_op_set_sg,
+};
+
static int virtnet_probe(struct virtio_device *vdev)
{
int err;
@@ -427,6 +499,7 @@
#ifdef CONFIG_NET_POLL_CONTROLLER
dev->poll_controller = virtnet_netpoll;
#endif
+ SET_ETHTOOL_OPS(dev, &virtnet_ethtool_ops);
SET_NETDEV_DEV(dev, &vdev->dev);
/* Do we support "hardware" checksums? */
@@ -462,11 +535,18 @@
vi->dev = dev;
vi->vdev = vdev;
vdev->priv = vi;
+ vi->pages = NULL;
/* If they give us a callback when all buffers are done, we don't need
* the timer. */
vi->free_in_tasklet = virtio_has_feature(vdev,VIRTIO_F_NOTIFY_ON_EMPTY);
+ /* If we can receive ANY GSO packets, we must allocate large ones. */
+ if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4)
+ || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)
+ || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN))
+ vi->big_packets = true;
+
/* We expect two virtqueues, receive then send. */
vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done);
if (IS_ERR(vi->rvq)) {
@@ -541,6 +621,10 @@
vdev->config->del_vq(vi->svq);
vdev->config->del_vq(vi->rvq);
unregister_netdev(vi->dev);
+
+ while (vi->pages)
+ __free_pages(get_a_page(vi, GFP_KERNEL), 0);
+
free_netdev(vi->dev);
}
@@ -553,7 +637,9 @@
VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM,
VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC,
VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6,
- VIRTIO_NET_F_HOST_ECN, VIRTIO_F_NOTIFY_ON_EMPTY,
+ VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6,
+ VIRTIO_NET_F_GUEST_ECN, /* We don't yet handle UFO input. */
+ VIRTIO_F_NOTIFY_ON_EMPTY,
};
static struct virtio_driver virtio_net = {
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index 3a7a11a..1d7ec31 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -4,7 +4,7 @@
config OF_GPIO
def_bool y
- depends on OF && PPC_OF && HAVE_GPIO_LIB
+ depends on OF && PPC_OF && GPIOLIB
help
OpenFirmware GPIO accessors
diff --git a/drivers/parport/parport_ax88796.c b/drivers/parport/parport_ax88796.c
index 4ec220b..6938d2e 100644
--- a/drivers/parport/parport_ax88796.c
+++ b/drivers/parport/parport_ax88796.c
@@ -406,6 +406,8 @@
#define parport_ax88796_resume NULL
#endif
+MODULE_ALIAS("platform:ax88796-pp");
+
static struct platform_driver axdrv = {
.driver = {
.name = "ax88796-pp",
diff --git a/drivers/power/ds2760_battery.c b/drivers/power/ds2760_battery.c
index 71be36f..308ddb2 100644
--- a/drivers/power/ds2760_battery.c
+++ b/drivers/power/ds2760_battery.c
@@ -433,6 +433,8 @@
#endif /* CONFIG_PM */
+MODULE_ALIAS("platform:ds2760-battery");
+
static struct platform_driver ds2760_battery_driver = {
.driver = {
.name = "ds2760-battery",
diff --git a/drivers/power/pda_power.c b/drivers/power/pda_power.c
index 82810b7bf..0471ec7 100644
--- a/drivers/power/pda_power.c
+++ b/drivers/power/pda_power.c
@@ -362,6 +362,8 @@
#define pda_power_resume NULL
#endif /* CONFIG_PM */
+MODULE_ALIAS("platform:pda-power");
+
static struct platform_driver pda_power_pdrv = {
.driver = {
.name = "pda-power",
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index 5ab3434..79954bd 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -15,6 +15,7 @@
#include <linux/err.h>
#include <linux/virtio.h>
#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
#include <linux/interrupt.h>
#include <linux/virtio_ring.h>
#include <linux/pfn.h>
@@ -87,16 +88,20 @@
return features;
}
-static void kvm_set_features(struct virtio_device *vdev, u32 features)
+static void kvm_finalize_features(struct virtio_device *vdev)
{
- unsigned int i;
+ unsigned int i, bits;
struct kvm_device_desc *desc = to_kvmdev(vdev)->desc;
/* Second half of bitmap is features we accept. */
u8 *out_features = kvm_vq_features(desc) + desc->feature_len;
+ /* Give virtio_ring a chance to accept features. */
+ vring_transport_features(vdev);
+
memset(out_features, 0, desc->feature_len);
- for (i = 0; i < min(desc->feature_len * 8, 32); i++) {
- if (features & (1 << i))
+ bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8;
+ for (i = 0; i < bits; i++) {
+ if (test_bit(i, vdev->features))
out_features[i / 8] |= (1 << (i % 8));
}
}
@@ -222,7 +227,7 @@
*/
static struct virtio_config_ops kvm_vq_configspace_ops = {
.get_features = kvm_get_features,
- .set_features = kvm_set_features,
+ .finalize_features = kvm_finalize_features,
.get = kvm_get,
.set = kvm_set,
.get_status = kvm_get_status,
@@ -333,6 +338,25 @@
return 0;
}
+/* code for early console output with virtio_console */
+static __init int early_put_chars(u32 vtermno, const char *buf, int count)
+{
+ char scratch[17];
+ unsigned int len = count;
+
+ if (len > sizeof(scratch) - 1)
+ len = sizeof(scratch) - 1;
+ scratch[len] = '\0';
+ memcpy(scratch, buf, len);
+ kvm_hypercall1(KVM_S390_VIRTIO_NOTIFY, __pa(scratch));
+ return len;
+}
+
+void s390_virtio_console_init(void)
+{
+ virtio_cons_early_init(early_put_chars);
+}
+
/*
* We do this after core stuff, but before the drivers.
*/
diff --git a/drivers/telephony/ixj.c b/drivers/telephony/ixj.c
index 49cd979..ec7aeb5 100644
--- a/drivers/telephony/ixj.c
+++ b/drivers/telephony/ixj.c
@@ -6095,15 +6095,15 @@
return retval;
}
-static int ixj_ioctl(struct inode *inode, struct file *file_p, unsigned int cmd, unsigned long arg)
+static long do_ixj_ioctl(struct file *file_p, unsigned int cmd, unsigned long arg)
{
IXJ_TONE ti;
IXJ_FILTER jf;
IXJ_FILTER_RAW jfr;
void __user *argp = (void __user *)arg;
-
- unsigned int raise, mant;
+ struct inode *inode = file_p->f_path.dentry->d_inode;
unsigned int minor = iminor(inode);
+ unsigned int raise, mant;
int board = NUM(inode);
IXJ *j = get_ixj(NUM(inode));
@@ -6661,6 +6661,15 @@
return retval;
}
+static long ixj_ioctl(struct file *file_p, unsigned int cmd, unsigned long arg)
+{
+ long ret;
+ lock_kernel();
+ ret = do_ixj_ioctl(file_p, cmd, arg);
+ unlock_kernel();
+ return ret;
+}
+
static int ixj_fasync(int fd, struct file *file_p, int mode)
{
IXJ *j = get_ixj(NUM(file_p->f_path.dentry->d_inode));
@@ -6674,7 +6683,7 @@
.read = ixj_enhanced_read,
.write = ixj_enhanced_write,
.poll = ixj_poll,
- .ioctl = ixj_ioctl,
+ .unlocked_ioctl = ixj_ioctl,
.release = ixj_release,
.fasync = ixj_fasync
};
diff --git a/drivers/usb/gadget/at91_udc.h b/drivers/usb/gadget/at91_udc.h
index a973f2a..c65d622 100644
--- a/drivers/usb/gadget/at91_udc.h
+++ b/drivers/usb/gadget/at91_udc.h
@@ -171,7 +171,7 @@
#endif
#define ERR(stuff...) pr_err("udc: " stuff)
-#define WARN(stuff...) pr_warning("udc: " stuff)
+#define WARNING(stuff...) pr_warning("udc: " stuff)
#define INFO(stuff...) pr_info("udc: " stuff)
#define DBG(stuff...) pr_debug("udc: " stuff)
diff --git a/drivers/usb/gadget/cdc2.c b/drivers/usb/gadget/cdc2.c
index d490d02..a39a4b9 100644
--- a/drivers/usb/gadget/cdc2.c
+++ b/drivers/usb/gadget/cdc2.c
@@ -170,7 +170,7 @@
* but if the controller isn't recognized at all then
* that assumption is a bit more likely to be wrong.
*/
- WARN(cdev, "controller '%s' not recognized; trying %s\n",
+ WARNING(cdev, "controller '%s' not recognized; trying %s\n",
gadget->name,
cdc_config_driver.label);
device_desc.bcdDevice =
diff --git a/drivers/usb/gadget/ether.c b/drivers/usb/gadget/ether.c
index d7aaaa2..bcac2e6 100644
--- a/drivers/usb/gadget/ether.c
+++ b/drivers/usb/gadget/ether.c
@@ -293,7 +293,7 @@
* but if the controller isn't recognized at all then
* that assumption is a bit more likely to be wrong.
*/
- WARN(cdev, "controller '%s' not recognized; trying %s\n",
+ WARNING(cdev, "controller '%s' not recognized; trying %s\n",
gadget->name,
eth_config_driver.label);
device_desc.bcdDevice =
diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c
index 15c24ed..ea2c31d 100644
--- a/drivers/usb/gadget/file_storage.c
+++ b/drivers/usb/gadget/file_storage.c
@@ -308,7 +308,7 @@
dev_vdbg(&(d)->gadget->dev , fmt , ## args)
#define ERROR(d, fmt, args...) \
dev_err(&(d)->gadget->dev , fmt , ## args)
-#define WARN(d, fmt, args...) \
+#define WARNING(d, fmt, args...) \
dev_warn(&(d)->gadget->dev , fmt , ## args)
#define INFO(d, fmt, args...) \
dev_info(&(d)->gadget->dev , fmt , ## args)
@@ -1091,7 +1091,7 @@
if (rc != 0 && rc != -ESHUTDOWN) {
/* We can't do much more than wait for a reset */
- WARN(fsg, "error in submission: %s --> %d\n",
+ WARNING(fsg, "error in submission: %s --> %d\n",
fsg->ep0->name, rc);
}
return rc;
@@ -1227,7 +1227,7 @@
/* Save the command for later */
if (fsg->cbbuf_cmnd_size)
- WARN(fsg, "CB[I] overwriting previous command\n");
+ WARNING(fsg, "CB[I] overwriting previous command\n");
fsg->cbbuf_cmnd_size = req->actual;
memcpy(fsg->cbbuf_cmnd, req->buf, fsg->cbbuf_cmnd_size);
@@ -1506,7 +1506,7 @@
* submissions if DMA is enabled. */
if (rc != -ESHUTDOWN && !(rc == -EOPNOTSUPP &&
req->length == 0))
- WARN(fsg, "error in submission: %s --> %d\n",
+ WARNING(fsg, "error in submission: %s --> %d\n",
ep->name, rc);
}
}
@@ -2294,7 +2294,7 @@
VDBG(fsg, "delayed bulk-in endpoint halt\n");
while (rc != 0) {
if (rc != -EAGAIN) {
- WARN(fsg, "usb_ep_set_halt -> %d\n", rc);
+ WARNING(fsg, "usb_ep_set_halt -> %d\n", rc);
rc = 0;
break;
}
@@ -2317,7 +2317,7 @@
VDBG(fsg, "delayed bulk-in endpoint wedge\n");
while (rc != 0) {
if (rc != -EAGAIN) {
- WARN(fsg, "usb_ep_set_wedge -> %d\n", rc);
+ WARNING(fsg, "usb_ep_set_wedge -> %d\n", rc);
rc = 0;
break;
}
@@ -3755,7 +3755,7 @@
if (gcnum >= 0)
mod_data.release = 0x0300 + gcnum;
else {
- WARN(fsg, "controller '%s' not recognized\n",
+ WARNING(fsg, "controller '%s' not recognized\n",
fsg->gadget->name);
mod_data.release = 0x0399;
}
diff --git a/drivers/usb/gadget/fsl_usb2_udc.c b/drivers/usb/gadget/fsl_usb2_udc.c
index 1695382..1cfccf1 100644
--- a/drivers/usb/gadget/fsl_usb2_udc.c
+++ b/drivers/usb/gadget/fsl_usb2_udc.c
@@ -1538,7 +1538,7 @@
/* If the ep is configured */
if (curr_ep->name == NULL) {
- WARN("Invalid EP?");
+ WARNING("Invalid EP?");
continue;
}
diff --git a/drivers/usb/gadget/fsl_usb2_udc.h b/drivers/usb/gadget/fsl_usb2_udc.h
index 98b1483..6131752 100644
--- a/drivers/usb/gadget/fsl_usb2_udc.h
+++ b/drivers/usb/gadget/fsl_usb2_udc.h
@@ -552,7 +552,7 @@
#endif
#define ERR(stuff...) pr_err("udc: " stuff)
-#define WARN(stuff...) pr_warning("udc: " stuff)
+#define WARNING(stuff...) pr_warning("udc: " stuff)
#define INFO(stuff...) pr_info("udc: " stuff)
/*-------------------------------------------------------------------------*/
diff --git a/drivers/usb/gadget/gmidi.c b/drivers/usb/gadget/gmidi.c
index 7f4d482..ea8651e 100644
--- a/drivers/usb/gadget/gmidi.c
+++ b/drivers/usb/gadget/gmidi.c
@@ -138,8 +138,6 @@
dev_vdbg(&(d)->gadget->dev , fmt , ## args)
#define ERROR(d, fmt, args...) \
dev_err(&(d)->gadget->dev , fmt , ## args)
-#define WARN(d, fmt, args...) \
- dev_warn(&(d)->gadget->dev , fmt , ## args)
#define INFO(d, fmt, args...) \
dev_info(&(d)->gadget->dev , fmt , ## args)
diff --git a/drivers/usb/gadget/goku_udc.c b/drivers/usb/gadget/goku_udc.c
index 48f1c63..60aa048 100644
--- a/drivers/usb/gadget/goku_udc.c
+++ b/drivers/usb/gadget/goku_udc.c
@@ -1768,7 +1768,7 @@
* usb_gadget_driver_{register,unregister}() must change.
*/
if (the_controller) {
- WARN(dev, "ignoring %s\n", pci_name(pdev));
+ WARNING(dev, "ignoring %s\n", pci_name(pdev));
return -EBUSY;
}
if (!pdev->irq) {
diff --git a/drivers/usb/gadget/goku_udc.h b/drivers/usb/gadget/goku_udc.h
index bc4eb1e..566cb23 100644
--- a/drivers/usb/gadget/goku_udc.h
+++ b/drivers/usb/gadget/goku_udc.h
@@ -285,7 +285,7 @@
#define ERROR(dev,fmt,args...) \
xprintk(dev , KERN_ERR , fmt , ## args)
-#define WARN(dev,fmt,args...) \
+#define WARNING(dev,fmt,args...) \
xprintk(dev , KERN_WARNING , fmt , ## args)
#define INFO(dev,fmt,args...) \
xprintk(dev , KERN_INFO , fmt , ## args)
diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c
index 04692d5..f4585d3e 100644
--- a/drivers/usb/gadget/inode.c
+++ b/drivers/usb/gadget/inode.c
@@ -262,8 +262,6 @@
#define ERROR(dev,fmt,args...) \
xprintk(dev , KERN_ERR , fmt , ## args)
-#define WARN(dev,fmt,args...) \
- xprintk(dev , KERN_WARNING , fmt , ## args)
#define INFO(dev,fmt,args...) \
xprintk(dev , KERN_INFO , fmt , ## args)
diff --git a/drivers/usb/gadget/net2280.c b/drivers/usb/gadget/net2280.c
index b67ab67..5cfb5eb 100644
--- a/drivers/usb/gadget/net2280.c
+++ b/drivers/usb/gadget/net2280.c
@@ -1007,7 +1007,7 @@
* 0122, and 0124; not all cases trigger the warning.
*/
if ((tmp & (1 << NAK_OUT_PACKETS)) == 0) {
- WARN (ep->dev, "%s lost packet sync!\n",
+ WARNING (ep->dev, "%s lost packet sync!\n",
ep->ep.name);
req->req.status = -EOVERFLOW;
} else if ((tmp = readl (&ep->regs->ep_avail)) != 0) {
diff --git a/drivers/usb/gadget/net2280.h b/drivers/usb/gadget/net2280.h
index 1f2af39..81a71db 100644
--- a/drivers/usb/gadget/net2280.h
+++ b/drivers/usb/gadget/net2280.h
@@ -272,7 +272,7 @@
#define ERROR(dev,fmt,args...) \
xprintk(dev , KERN_ERR , fmt , ## args)
-#define WARN(dev,fmt,args...) \
+#define WARNING(dev,fmt,args...) \
xprintk(dev , KERN_WARNING , fmt , ## args)
#define INFO(dev,fmt,args...) \
xprintk(dev , KERN_INFO , fmt , ## args)
diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c
index 4b79a85..395bd18 100644
--- a/drivers/usb/gadget/omap_udc.c
+++ b/drivers/usb/gadget/omap_udc.c
@@ -1120,7 +1120,7 @@
status = -EINVAL;
else if (value) {
if (ep->udc->ep0_set_config) {
- WARN("error changing config?\n");
+ WARNING("error changing config?\n");
omap_writew(UDC_CLR_CFG, UDC_SYSCON2);
}
omap_writew(UDC_STALL_CMD, UDC_SYSCON2);
@@ -1764,7 +1764,7 @@
u.r.bRequestType, u.r.bRequest, status);
if (udc->ep0_set_config) {
if (udc->ep0_reset_config)
- WARN("error resetting config?\n");
+ WARNING("error resetting config?\n");
else
omap_writew(UDC_CLR_CFG, UDC_SYSCON2);
}
@@ -3076,7 +3076,7 @@
* which would prevent entry to deep sleep...
*/
if ((devstat & UDC_ATT) != 0 && (devstat & UDC_SUS) == 0) {
- WARN("session active; suspend requires disconnect\n");
+ WARNING("session active; suspend requires disconnect\n");
omap_pullup(&udc->gadget, 0);
}
diff --git a/drivers/usb/gadget/omap_udc.h b/drivers/usb/gadget/omap_udc.h
index 8522bbb..29edc51 100644
--- a/drivers/usb/gadget/omap_udc.h
+++ b/drivers/usb/gadget/omap_udc.h
@@ -188,7 +188,7 @@
#endif
#define ERR(stuff...) pr_err("udc: " stuff)
-#define WARN(stuff...) pr_warning("udc: " stuff)
+#define WARNING(stuff...) pr_warning("udc: " stuff)
#define INFO(stuff...) pr_info("udc: " stuff)
#define DBG(stuff...) pr_debug("udc: " stuff)
diff --git a/drivers/usb/gadget/printer.c b/drivers/usb/gadget/printer.c
index 49cd9e1..e009008 100644
--- a/drivers/usb/gadget/printer.c
+++ b/drivers/usb/gadget/printer.c
@@ -179,7 +179,7 @@
#define ERROR(dev, fmt, args...) \
xprintk(dev, KERN_ERR, fmt, ## args)
-#define WARN(dev, fmt, args...) \
+#define WARNING(dev, fmt, args...) \
xprintk(dev, KERN_WARNING, fmt, ## args)
#define INFO(dev, fmt, args...) \
xprintk(dev, KERN_INFO, fmt, ## args)
diff --git a/drivers/usb/gadget/pxa25x_udc.c b/drivers/usb/gadget/pxa25x_udc.c
index 8fb0066..7e6725d 100644
--- a/drivers/usb/gadget/pxa25x_udc.c
+++ b/drivers/usb/gadget/pxa25x_udc.c
@@ -342,7 +342,7 @@
struct pxa25x_request *req;
req = container_of (_req, struct pxa25x_request, req);
- WARN_ON (!list_empty (&req->queue));
+ WARN_ON(!list_empty (&req->queue));
kfree(req);
}
@@ -1556,7 +1556,7 @@
* tell us about config change events,
* so later ones may fail...
*/
- WARN("config change %02x fail %d?\n",
+ WARNING("config change %02x fail %d?\n",
u.r.bRequest, i);
return;
/* TODO experiment: if has_cfr,
@@ -2330,7 +2330,7 @@
unsigned long flags;
if (!udc->mach->gpio_pullup && !udc->mach->udc_command)
- WARN("USB host won't detect disconnect!\n");
+ WARNING("USB host won't detect disconnect!\n");
udc->suspended = 1;
local_irq_save(flags);
diff --git a/drivers/usb/gadget/pxa25x_udc.h b/drivers/usb/gadget/pxa25x_udc.h
index 4d11ece..c8a1321 100644
--- a/drivers/usb/gadget/pxa25x_udc.h
+++ b/drivers/usb/gadget/pxa25x_udc.h
@@ -259,7 +259,7 @@
#define DBG(lvl, stuff...) do{if ((lvl) <= UDC_DEBUG) DMSG(stuff);}while(0)
#define ERR(stuff...) pr_err("udc: " stuff)
-#define WARN(stuff...) pr_warning("udc: " stuff)
+#define WARNING(stuff...) pr_warning("udc: " stuff)
#define INFO(stuff...) pr_info("udc: " stuff)
diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c
index 5458f43..3791e62 100644
--- a/drivers/usb/gadget/u_ether.c
+++ b/drivers/usb/gadget/u_ether.c
@@ -116,7 +116,6 @@
#undef DBG
#undef VDBG
#undef ERROR
-#undef WARN
#undef INFO
#define xprintk(d, level, fmt, args...) \
@@ -140,8 +139,6 @@
#define ERROR(dev, fmt, args...) \
xprintk(dev , KERN_ERR , fmt , ## args)
-#define WARN(dev, fmt, args...) \
- xprintk(dev , KERN_WARNING , fmt , ## args)
#define INFO(dev, fmt, args...) \
xprintk(dev , KERN_INFO , fmt , ## args)
diff --git a/drivers/usb/host/isp116x-hcd.c b/drivers/usb/host/isp116x-hcd.c
index 31178e1..ce1ca0b 100644
--- a/drivers/usb/host/isp116x-hcd.c
+++ b/drivers/usb/host/isp116x-hcd.c
@@ -882,7 +882,7 @@
for (i = 0; i < 100 && !list_empty(&hep->urb_list); i++)
msleep(3);
if (!list_empty(&hep->urb_list))
- WARN("ep %p not empty?\n", ep);
+ WARNING("ep %p not empty?\n", ep);
kfree(ep);
hep->hcpriv = NULL;
diff --git a/drivers/usb/host/isp116x.h b/drivers/usb/host/isp116x.h
index 595b90a99..aa211ba 100644
--- a/drivers/usb/host/isp116x.h
+++ b/drivers/usb/host/isp116x.h
@@ -338,7 +338,7 @@
#endif
#define ERR(stuff...) printk(KERN_ERR "116x: " stuff)
-#define WARN(stuff...) printk(KERN_WARNING "116x: " stuff)
+#define WARNING(stuff...) printk(KERN_WARNING "116x: " stuff)
#define INFO(stuff...) printk(KERN_INFO "116x: " stuff)
/* ------------------------------------------------- */
diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c
index 340d72d..8a74bbb 100644
--- a/drivers/usb/host/sl811-hcd.c
+++ b/drivers/usb/host/sl811-hcd.c
@@ -1026,7 +1026,7 @@
if (!list_empty(&hep->urb_list))
msleep(3);
if (!list_empty(&hep->urb_list))
- WARN("ep %p not empty?\n", ep);
+ WARNING("ep %p not empty?\n", ep);
kfree(ep);
hep->hcpriv = NULL;
diff --git a/drivers/usb/host/sl811.h b/drivers/usb/host/sl811.h
index 7690d98..b6b8c1f 100644
--- a/drivers/usb/host/sl811.h
+++ b/drivers/usb/host/sl811.h
@@ -261,6 +261,6 @@
#endif
#define ERR(stuff...) printk(KERN_ERR "sl811: " stuff)
-#define WARN(stuff...) printk(KERN_WARNING "sl811: " stuff)
+#define WARNING(stuff...) printk(KERN_WARNING "sl811: " stuff)
#define INFO(stuff...) printk(KERN_INFO "sl811: " stuff)
diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c
index 054dedd..b358c4e 100644
--- a/drivers/usb/misc/usbtest.c
+++ b/drivers/usb/misc/usbtest.c
@@ -81,7 +81,7 @@
#define ERROR(tdev, fmt, args...) \
dev_err(&(tdev)->intf->dev , fmt , ## args)
-#define WARN(tdev, fmt, args...) \
+#define WARNING(tdev, fmt, args...) \
dev_warn(&(tdev)->intf->dev , fmt , ## args)
/*-------------------------------------------------------------------------*/
@@ -1946,7 +1946,7 @@
status = get_endpoints (dev, intf);
if (status < 0) {
- WARN(dev, "couldn't get endpoints, %d\n",
+ WARNING(dev, "couldn't get endpoints, %d\n",
status);
return status;
}
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 7084e7e..5b78fd0 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -71,13 +71,6 @@
dev->id.device, dev->id.vendor);
}
-static struct bus_type virtio_bus = {
- .name = "virtio",
- .match = virtio_dev_match,
- .dev_attrs = virtio_dev_attrs,
- .uevent = virtio_uevent,
-};
-
static void add_status(struct virtio_device *dev, unsigned status)
{
dev->config->set_status(dev, dev->config->get_status(dev) | status);
@@ -120,12 +113,16 @@
set_bit(f, dev->features);
}
+ /* Transport features always preserved to pass to finalize_features. */
+ for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++)
+ if (device_features & (1 << i))
+ set_bit(i, dev->features);
+
err = drv->probe(dev);
if (err)
add_status(dev, VIRTIO_CONFIG_S_FAILED);
else {
- /* They should never have set feature bits beyond 32 */
- dev->config->set_features(dev, dev->features[0]);
+ dev->config->finalize_features(dev);
add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
}
return err;
@@ -147,13 +144,20 @@
return 0;
}
+static struct bus_type virtio_bus = {
+ .name = "virtio",
+ .match = virtio_dev_match,
+ .dev_attrs = virtio_dev_attrs,
+ .uevent = virtio_uevent,
+ .probe = virtio_dev_probe,
+ .remove = virtio_dev_remove,
+};
+
int register_virtio_driver(struct virtio_driver *driver)
{
/* Catch this early. */
BUG_ON(driver->feature_table_size && !driver->feature_table);
driver->driver.bus = &virtio_bus;
- driver->driver.probe = virtio_dev_probe;
- driver->driver.remove = virtio_dev_remove;
return driver_register(&driver->driver);
}
EXPORT_SYMBOL_GPL(register_virtio_driver);
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index eae72363..c7dc37c 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -94,12 +94,17 @@
return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES);
}
-/* virtio config->set_features() implementation */
-static void vp_set_features(struct virtio_device *vdev, u32 features)
+/* virtio config->finalize_features() implementation */
+static void vp_finalize_features(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
- iowrite32(features, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES);
+ /* Give virtio_ring a chance to accept features. */
+ vring_transport_features(vdev);
+
+ /* We only support 32 feature bits. */
+ BUILD_BUG_ON(ARRAY_SIZE(vdev->features) != 1);
+ iowrite32(vdev->features[0], vp_dev->ioaddr+VIRTIO_PCI_GUEST_FEATURES);
}
/* virtio config->get() implementation */
@@ -297,7 +302,7 @@
.find_vq = vp_find_vq,
.del_vq = vp_del_vq,
.get_features = vp_get_features,
- .set_features = vp_set_features,
+ .finalize_features = vp_finalize_features,
};
/* the PCI probing function */
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 72bf8bc..6eb5303 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -18,6 +18,7 @@
*/
#include <linux/virtio.h>
#include <linux/virtio_ring.h>
+#include <linux/virtio_config.h>
#include <linux/device.h>
#ifdef DEBUG
@@ -87,8 +88,11 @@
if (vq->num_free < out + in) {
pr_debug("Can't add buf len %i - avail = %i\n",
out + in, vq->num_free);
- /* We notify *even if* VRING_USED_F_NO_NOTIFY is set here. */
- vq->notify(&vq->vq);
+ /* FIXME: for historical reasons, we force a notify here if
+ * there are outgoing parts to the buffer. Presumably the
+ * host should service the ring ASAP. */
+ if (out)
+ vq->notify(&vq->vq);
END_USE(vq);
return -ENOSPC;
}
@@ -320,4 +324,19 @@
}
EXPORT_SYMBOL_GPL(vring_del_virtqueue);
+/* Manipulates transport-specific feature bits. */
+void vring_transport_features(struct virtio_device *vdev)
+{
+ unsigned int i;
+
+ for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
+ switch (i) {
+ default:
+ /* We don't understand this bit. */
+ clear_bit(i, vdev->features);
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(vring_transport_features);
+
MODULE_LICENSE("GPL");
diff --git a/fs/Kconfig b/fs/Kconfig
index 37db79a..97e3bde 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -902,65 +902,7 @@
menu "Pseudo filesystems"
-config PROC_FS
- bool "/proc file system support" if EMBEDDED
- default y
- help
- This is a virtual file system providing information about the status
- of the system. "Virtual" means that it doesn't take up any space on
- your hard disk: the files are created on the fly by the kernel when
- you try to access them. Also, you cannot read the files with older
- version of the program less: you need to use more or cat.
-
- It's totally cool; for example, "cat /proc/interrupts" gives
- information about what the different IRQs are used for at the moment
- (there is a small number of Interrupt ReQuest lines in your computer
- that are used by the attached devices to gain the CPU's attention --
- often a source of trouble if two devices are mistakenly configured
- to use the same IRQ). The program procinfo to display some
- information about your system gathered from the /proc file system.
-
- Before you can use the /proc file system, it has to be mounted,
- meaning it has to be given a location in the directory hierarchy.
- That location should be /proc. A command such as "mount -t proc proc
- /proc" or the equivalent line in /etc/fstab does the job.
-
- The /proc file system is explained in the file
- <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage
- ("man 5 proc").
-
- This option will enlarge your kernel by about 67 KB. Several
- programs depend on this, so everyone should say Y here.
-
-config PROC_KCORE
- bool "/proc/kcore support" if !ARM
- depends on PROC_FS && MMU
-
-config PROC_VMCORE
- bool "/proc/vmcore support (EXPERIMENTAL)"
- depends on PROC_FS && CRASH_DUMP
- default y
- help
- Exports the dump image of crashed kernel in ELF format.
-
-config PROC_SYSCTL
- bool "Sysctl support (/proc/sys)" if EMBEDDED
- depends on PROC_FS
- select SYSCTL
- default y
- ---help---
- The sysctl interface provides a means of dynamically changing
- certain kernel parameters and variables on the fly without requiring
- a recompile of the kernel or reboot of the system. The primary
- interface is through /proc/sys. If you say Y here a tree of
- modifiable sysctl entries will be generated beneath the
- /proc/sys directory. They are explained in the files
- in <file:Documentation/sysctl/>. Note that enabling this
- option will enlarge the kernel by at least 8 KB.
-
- As it is generally a good thing, you should say Y here unless
- building a kernel for install/rescue disks or your system is very
- limited in memory.
+source "fs/proc/Kconfig"
config SYSFS
bool "sysfs file system support" if EMBEDDED
@@ -2093,20 +2035,6 @@
To compile the coda client support as a module, choose M here: the
module will be called coda.
-config CODA_FS_OLD_API
- bool "Use 96-bit Coda file identifiers"
- depends on CODA_FS
- help
- A new kernel-userspace API had to be introduced for Coda v6.0
- to support larger 128-bit file identifiers as needed by the
- new realms implementation.
-
- However this new API is not backward compatible with older
- clients. If you really need to run the old Coda userspace
- cache manager then say Y.
-
- For most cases you probably want to say N.
-
config AFS_FS
tristate "Andrew File System support (AFS) (EXPERIMENTAL)"
depends on INET && EXPERIMENTAL
diff --git a/fs/aio.c b/fs/aio.c
index 0fb3117..0051fd9 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -586,7 +586,6 @@
struct task_struct *tsk = current;
task_lock(tsk);
- tsk->flags |= PF_BORROWED_MM;
active_mm = tsk->active_mm;
atomic_inc(&mm->mm_count);
tsk->mm = mm;
@@ -610,7 +609,6 @@
struct task_struct *tsk = current;
task_lock(tsk);
- tsk->flags &= ~PF_BORROWED_MM;
tsk->mm = NULL;
/* active_mm is still 'mm' */
enter_lazy_tlb(mm, tsk);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 742c8f5..3b6ff85 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1506,7 +1506,7 @@
const struct user_regset_view *view = task_user_regset_view(dump_task);
struct elf_thread_core_info *t;
struct elf_prpsinfo *psinfo;
- struct task_struct *g, *p;
+ struct core_thread *ct;
unsigned int i;
info->size = 0;
@@ -1545,31 +1545,26 @@
/*
* Allocate a structure for each thread.
*/
- rcu_read_lock();
- do_each_thread(g, p)
- if (p->mm == dump_task->mm) {
- t = kzalloc(offsetof(struct elf_thread_core_info,
- notes[info->thread_notes]),
- GFP_ATOMIC);
- if (unlikely(!t)) {
- rcu_read_unlock();
- return 0;
- }
- t->task = p;
- if (p == dump_task || !info->thread) {
- t->next = info->thread;
- info->thread = t;
- } else {
- /*
- * Make sure to keep the original task at
- * the head of the list.
- */
- t->next = info->thread->next;
- info->thread->next = t;
- }
+ for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
+ t = kzalloc(offsetof(struct elf_thread_core_info,
+ notes[info->thread_notes]),
+ GFP_KERNEL);
+ if (unlikely(!t))
+ return 0;
+
+ t->task = ct->task;
+ if (ct->task == dump_task || !info->thread) {
+ t->next = info->thread;
+ info->thread = t;
+ } else {
+ /*
+ * Make sure to keep the original task at
+ * the head of the list.
+ */
+ t->next = info->thread->next;
+ info->thread->next = t;
}
- while_each_thread(g, p);
- rcu_read_unlock();
+ }
/*
* Now fill in each thread's information.
@@ -1716,7 +1711,6 @@
{
#define NUM_NOTES 6
struct list_head *t;
- struct task_struct *g, *p;
info->notes = NULL;
info->prstatus = NULL;
@@ -1748,20 +1742,19 @@
info->thread_status_size = 0;
if (signr) {
+ struct core_thread *ct;
struct elf_thread_status *ets;
- rcu_read_lock();
- do_each_thread(g, p)
- if (current->mm == p->mm && current != p) {
- ets = kzalloc(sizeof(*ets), GFP_ATOMIC);
- if (!ets) {
- rcu_read_unlock();
- return 0;
- }
- ets->thread = p;
- list_add(&ets->list, &info->thread_list);
- }
- while_each_thread(g, p);
- rcu_read_unlock();
+
+ for (ct = current->mm->core_state->dumper.next;
+ ct; ct = ct->next) {
+ ets = kzalloc(sizeof(*ets), GFP_KERNEL);
+ if (!ets)
+ return 0;
+
+ ets->thread = ct->task;
+ list_add(&ets->list, &info->thread_list);
+ }
+
list_for_each(t, &info->thread_list) {
int sz;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index d051a32..1b59b1e 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1573,7 +1573,6 @@
struct memelfnote *notes = NULL;
struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */
struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */
- struct task_struct *g, *p;
LIST_HEAD(thread_list);
struct list_head *t;
elf_fpregset_t *fpu = NULL;
@@ -1622,20 +1621,19 @@
#endif
if (signr) {
+ struct core_thread *ct;
struct elf_thread_status *tmp;
- rcu_read_lock();
- do_each_thread(g,p)
- if (current->mm == p->mm && current != p) {
- tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
- if (!tmp) {
- rcu_read_unlock();
- goto cleanup;
- }
- tmp->thread = p;
- list_add(&tmp->list, &thread_list);
- }
- while_each_thread(g,p);
- rcu_read_unlock();
+
+ for (ct = current->mm->core_state->dumper.next;
+ ct; ct = ct->next) {
+ tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+ if (!tmp)
+ goto cleanup;
+
+ tmp->thread = ct->task;
+ list_add(&tmp->list, &thread_list);
+ }
+
list_for_each(t, &thread_list) {
struct elf_thread_status *tmp;
int sz;
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index e1c8548..bf4a3fd 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -28,11 +28,9 @@
char * coda_f2s(struct CodaFid *f)
{
static char s[60];
-#ifdef CONFIG_CODA_FS_OLD_API
- sprintf(s, "(%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2]);
-#else
+
sprintf(s, "(%08x.%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2], f->opaque[3]);
-#endif
+
return s;
}
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 40c36f7..0d9b80e 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -378,11 +378,7 @@
MODULE_DESCRIPTION("Coda Distributed File System VFS interface");
MODULE_ALIAS_CHARDEV_MAJOR(CODA_PSDEV_MAJOR);
MODULE_LICENSE("GPL");
-#ifdef CONFIG_CODA_FS_OLD_API
-MODULE_VERSION("5.3.21");
-#else
MODULE_VERSION("6.6");
-#endif
static int __init init_coda(void)
{
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index 359e531..ce432bc 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -52,12 +52,8 @@
inp->ih.opcode = opcode;
inp->ih.pid = current->pid;
inp->ih.pgid = task_pgrp_nr(current);
-#ifdef CONFIG_CODA_FS_OLD_API
- memset(&inp->ih.cred, 0, sizeof(struct coda_cred));
- inp->ih.cred.cr_fsuid = current->fsuid;
-#else
inp->ih.uid = current->fsuid;
-#endif
+
return (void*)inp;
}
@@ -166,20 +162,11 @@
union inputArgs *inp;
union outputArgs *outp;
int insize, outsize, error;
-#ifdef CONFIG_CODA_FS_OLD_API
- struct coda_cred cred = { 0, };
- cred.cr_fsuid = uid;
-#endif
insize = SIZE(release);
UPARG(CODA_CLOSE);
-#ifdef CONFIG_CODA_FS_OLD_API
- memcpy(&(inp->ih.cred), &cred, sizeof(cred));
-#else
inp->ih.uid = uid;
-#endif
-
inp->coda_close.VFid = *fid;
inp->coda_close.flags = flags;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 18e2c54..5235c67 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -25,7 +25,6 @@
#include <linux/slab.h>
#include <linux/raid/md.h>
#include <linux/kd.h>
-#include <linux/dirent.h>
#include <linux/route.h>
#include <linux/in6.h>
#include <linux/ipv6_route.h>
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 78878c5..eba87ff 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -116,7 +116,7 @@
if (xop->callback == NULL)
wait_event(recv_wq, (op->done != 0));
else {
- rv = -EINPROGRESS;
+ rv = FILE_LOCK_DEFERRED;
goto out;
}
diff --git a/fs/dquot.c b/fs/dquot.c
index 5ac77da..1346eeb 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -562,6 +562,8 @@
*/
static void dqput(struct dquot *dquot)
{
+ int ret;
+
if (!dquot)
return;
#ifdef __DQUOT_PARANOIA
@@ -594,7 +596,19 @@
if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && dquot_dirty(dquot)) {
spin_unlock(&dq_list_lock);
/* Commit dquot before releasing */
- dquot->dq_sb->dq_op->write_dquot(dquot);
+ ret = dquot->dq_sb->dq_op->write_dquot(dquot);
+ if (ret < 0) {
+ printk(KERN_ERR "VFS: cannot write quota structure on "
+ "device %s (error %d). Quota may get out of "
+ "sync!\n", dquot->dq_sb->s_id, ret);
+ /*
+ * We clear dirty bit anyway, so that we avoid
+ * infinite loop here
+ */
+ spin_lock(&dq_list_lock);
+ clear_dquot_dirty(dquot);
+ spin_unlock(&dq_list_lock);
+ }
goto we_slept;
}
/* Clear flag in case dquot was inactive (something bad happened) */
@@ -875,7 +889,10 @@
char *msg = NULL;
struct tty_struct *tty;
- if (!need_print_warning(dquot))
+ if (warntype == QUOTA_NL_IHARDBELOW ||
+ warntype == QUOTA_NL_ISOFTBELOW ||
+ warntype == QUOTA_NL_BHARDBELOW ||
+ warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(dquot))
return;
mutex_lock(&tty_mutex);
@@ -1083,6 +1100,35 @@
return QUOTA_OK;
}
+static int info_idq_free(struct dquot *dquot, ulong inodes)
+{
+ if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
+ dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit)
+ return QUOTA_NL_NOWARN;
+
+ if (dquot->dq_dqb.dqb_curinodes - inodes <= dquot->dq_dqb.dqb_isoftlimit)
+ return QUOTA_NL_ISOFTBELOW;
+ if (dquot->dq_dqb.dqb_curinodes >= dquot->dq_dqb.dqb_ihardlimit &&
+ dquot->dq_dqb.dqb_curinodes - inodes < dquot->dq_dqb.dqb_ihardlimit)
+ return QUOTA_NL_IHARDBELOW;
+ return QUOTA_NL_NOWARN;
+}
+
+static int info_bdq_free(struct dquot *dquot, qsize_t space)
+{
+ if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
+ toqb(dquot->dq_dqb.dqb_curspace) <= dquot->dq_dqb.dqb_bsoftlimit)
+ return QUOTA_NL_NOWARN;
+
+ if (toqb(dquot->dq_dqb.dqb_curspace - space) <=
+ dquot->dq_dqb.dqb_bsoftlimit)
+ return QUOTA_NL_BSOFTBELOW;
+ if (toqb(dquot->dq_dqb.dqb_curspace) >= dquot->dq_dqb.dqb_bhardlimit &&
+ toqb(dquot->dq_dqb.dqb_curspace - space) <
+ dquot->dq_dqb.dqb_bhardlimit)
+ return QUOTA_NL_BHARDBELOW;
+ return QUOTA_NL_NOWARN;
+}
/*
* Initialize quota pointers in inode
* Transaction must be started at entry
@@ -1139,6 +1185,28 @@
return 0;
}
+/* Wrapper to remove references to quota structures from inode */
+void vfs_dq_drop(struct inode *inode)
+{
+ /* Here we can get arbitrary inode from clear_inode() so we have
+ * to be careful. OTOH we don't need locking as quota operations
+ * are allowed to change only at mount time */
+ if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op
+ && inode->i_sb->dq_op->drop) {
+ int cnt;
+ /* Test before calling to rule out calls from proc and such
+ * where we are not allowed to block. Note that this is
+ * actually reliable test even without the lock - the caller
+ * must assure that nobody can come after the DQUOT_DROP and
+ * add quota pointers back anyway */
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+ if (inode->i_dquot[cnt] != NODQUOT)
+ break;
+ if (cnt < MAXQUOTAS)
+ inode->i_sb->dq_op->drop(inode);
+ }
+}
+
/*
* Following four functions update i_blocks+i_bytes fields and
* quota information (together with appropriate checks)
@@ -1248,6 +1316,7 @@
int dquot_free_space(struct inode *inode, qsize_t number)
{
unsigned int cnt;
+ char warntype[MAXQUOTAS];
/* First test before acquiring mutex - solves deadlocks when we
* re-enter the quota code and are already holding the mutex */
@@ -1256,6 +1325,7 @@
inode_sub_bytes(inode, number);
return QUOTA_OK;
}
+
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
/* Now recheck reliably when holding dqptr_sem */
if (IS_NOQUOTA(inode)) {
@@ -1266,6 +1336,7 @@
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (inode->i_dquot[cnt] == NODQUOT)
continue;
+ warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
dquot_decr_space(inode->i_dquot[cnt], number);
}
inode_sub_bytes(inode, number);
@@ -1274,6 +1345,7 @@
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
if (inode->i_dquot[cnt])
mark_dquot_dirty(inode->i_dquot[cnt]);
+ flush_warnings(inode->i_dquot, warntype);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
return QUOTA_OK;
}
@@ -1284,11 +1356,13 @@
int dquot_free_inode(const struct inode *inode, unsigned long number)
{
unsigned int cnt;
+ char warntype[MAXQUOTAS];
/* First test before acquiring mutex - solves deadlocks when we
* re-enter the quota code and are already holding the mutex */
if (IS_NOQUOTA(inode))
return QUOTA_OK;
+
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
/* Now recheck reliably when holding dqptr_sem */
if (IS_NOQUOTA(inode)) {
@@ -1299,6 +1373,7 @@
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (inode->i_dquot[cnt] == NODQUOT)
continue;
+ warntype[cnt] = info_idq_free(inode->i_dquot[cnt], number);
dquot_decr_inodes(inode->i_dquot[cnt], number);
}
spin_unlock(&dq_data_lock);
@@ -1306,6 +1381,7 @@
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
if (inode->i_dquot[cnt])
mark_dquot_dirty(inode->i_dquot[cnt]);
+ flush_warnings(inode->i_dquot, warntype);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
return QUOTA_OK;
}
@@ -1323,7 +1399,8 @@
struct dquot *transfer_to[MAXQUOTAS];
int cnt, ret = NO_QUOTA, chuid = (iattr->ia_valid & ATTR_UID) && inode->i_uid != iattr->ia_uid,
chgid = (iattr->ia_valid & ATTR_GID) && inode->i_gid != iattr->ia_gid;
- char warntype[MAXQUOTAS];
+ char warntype_to[MAXQUOTAS];
+ char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS];
/* First test before acquiring mutex - solves deadlocks when we
* re-enter the quota code and are already holding the mutex */
@@ -1332,7 +1409,7 @@
/* Clear the arrays */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
transfer_to[cnt] = transfer_from[cnt] = NODQUOT;
- warntype[cnt] = QUOTA_NL_NOWARN;
+ warntype_to[cnt] = QUOTA_NL_NOWARN;
}
down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
/* Now recheck reliably when holding dqptr_sem */
@@ -1364,8 +1441,9 @@
if (transfer_to[cnt] == NODQUOT)
continue;
transfer_from[cnt] = inode->i_dquot[cnt];
- if (check_idq(transfer_to[cnt], 1, warntype+cnt) == NO_QUOTA ||
- check_bdq(transfer_to[cnt], space, 0, warntype+cnt) == NO_QUOTA)
+ if (check_idq(transfer_to[cnt], 1, warntype_to + cnt) ==
+ NO_QUOTA || check_bdq(transfer_to[cnt], space, 0,
+ warntype_to + cnt) == NO_QUOTA)
goto warn_put_all;
}
@@ -1381,6 +1459,10 @@
/* Due to IO error we might not have transfer_from[] structure */
if (transfer_from[cnt]) {
+ warntype_from_inodes[cnt] =
+ info_idq_free(transfer_from[cnt], 1);
+ warntype_from_space[cnt] =
+ info_bdq_free(transfer_from[cnt], space);
dquot_decr_inodes(transfer_from[cnt], 1);
dquot_decr_space(transfer_from[cnt], space);
}
@@ -1400,7 +1482,9 @@
if (transfer_to[cnt])
mark_dquot_dirty(transfer_to[cnt]);
}
- flush_warnings(transfer_to, warntype);
+ flush_warnings(transfer_to, warntype_to);
+ flush_warnings(transfer_from, warntype_from_inodes);
+ flush_warnings(transfer_from, warntype_from_space);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (ret == QUOTA_OK && transfer_from[cnt] != NODQUOT)
@@ -1412,6 +1496,18 @@
return ret;
}
+/* Wrapper for transferring ownership of an inode */
+int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
+{
+ if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) {
+ vfs_dq_init(inode);
+ if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
+ return 1;
+ }
+ return 0;
+}
+
+
/*
* Write info of quota file to disk
*/
@@ -1752,6 +1848,22 @@
return error;
}
+/* Wrapper to turn on quotas when remounting rw */
+int vfs_dq_quota_on_remount(struct super_block *sb)
+{
+ int cnt;
+ int ret = 0, err;
+
+ if (!sb->s_qcop || !sb->s_qcop->quota_on)
+ return -ENOSYS;
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ err = sb->s_qcop->quota_on(sb, cnt, 0, NULL, 1);
+ if (err < 0 && !ret)
+ ret = err;
+ }
+ return ret;
+}
+
/* Generic routine for getting common part of quota structure */
static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di)
{
@@ -2087,8 +2199,11 @@
EXPORT_SYMBOL(dquot_mark_dquot_dirty);
EXPORT_SYMBOL(dquot_initialize);
EXPORT_SYMBOL(dquot_drop);
+EXPORT_SYMBOL(vfs_dq_drop);
EXPORT_SYMBOL(dquot_alloc_space);
EXPORT_SYMBOL(dquot_alloc_inode);
EXPORT_SYMBOL(dquot_free_space);
EXPORT_SYMBOL(dquot_free_inode);
EXPORT_SYMBOL(dquot_transfer);
+EXPORT_SYMBOL(vfs_dq_transfer);
+EXPORT_SYMBOL(vfs_dq_quota_on_remount);
diff --git a/fs/exec.c b/fs/exec.c
index 190ed1f..5e55901 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -25,19 +25,18 @@
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/fdtable.h>
-#include <linux/mman.h>
+#include <linux/mm.h>
#include <linux/stat.h>
#include <linux/fcntl.h>
#include <linux/smp_lock.h>
+#include <linux/swap.h>
#include <linux/string.h>
#include <linux/init.h>
-#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/spinlock.h>
#include <linux/key.h>
#include <linux/personality.h>
#include <linux/binfmts.h>
-#include <linux/swap.h>
#include <linux/utsname.h>
#include <linux/pid_namespace.h>
#include <linux/module.h>
@@ -47,7 +46,6 @@
#include <linux/mount.h>
#include <linux/security.h>
#include <linux/syscalls.h>
-#include <linux/rmap.h>
#include <linux/tsacct_kern.h>
#include <linux/cn_proc.h>
#include <linux/audit.h>
@@ -724,12 +722,10 @@
* Make sure that if there is a core dump in progress
* for the old mm, we get out and die instead of going
* through with the exec. We must hold mmap_sem around
- * checking core_waiters and changing tsk->mm. The
- * core-inducing thread will increment core_waiters for
- * each thread whose ->mm == old_mm.
+ * checking core_state and changing tsk->mm.
*/
down_read(&old_mm->mmap_sem);
- if (unlikely(old_mm->core_waiters)) {
+ if (unlikely(old_mm->core_state)) {
up_read(&old_mm->mmap_sem);
return -EINTR;
}
@@ -1328,6 +1324,7 @@
if (retval < 0)
goto out;
+ current->flags &= ~PF_KTHREAD;
retval = search_binary_handler(bprm,regs);
if (retval >= 0) {
/* execve success */
@@ -1382,17 +1379,14 @@
* name into corename, which must have space for at least
* CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
*/
-static int format_corename(char *corename, const char *pattern, long signr)
+static int format_corename(char *corename, int nr_threads, long signr)
{
- const char *pat_ptr = pattern;
+ const char *pat_ptr = core_pattern;
+ int ispipe = (*pat_ptr == '|');
char *out_ptr = corename;
char *const out_end = corename + CORENAME_MAX_SIZE;
int rc;
int pid_in_pattern = 0;
- int ispipe = 0;
-
- if (*pattern == '|')
- ispipe = 1;
/* Repeat as long as we have more pattern to process and more output
space */
@@ -1493,7 +1487,7 @@
* and core_uses_pid is set, then .%pid will be appended to
* the filename. Do not do this for piped commands. */
if (!ispipe && !pid_in_pattern
- && (core_uses_pid || atomic_read(¤t->mm->mm_users) != 1)) {
+ && (core_uses_pid || nr_threads)) {
rc = snprintf(out_ptr, out_end - out_ptr,
".%d", task_tgid_vnr(current));
if (rc > out_end - out_ptr)
@@ -1505,9 +1499,10 @@
return ispipe;
}
-static void zap_process(struct task_struct *start)
+static int zap_process(struct task_struct *start)
{
struct task_struct *t;
+ int nr = 0;
start->signal->flags = SIGNAL_GROUP_EXIT;
start->signal->group_stop_count = 0;
@@ -1515,72 +1510,99 @@
t = start;
do {
if (t != current && t->mm) {
- t->mm->core_waiters++;
sigaddset(&t->pending.signal, SIGKILL);
signal_wake_up(t, 1);
+ nr++;
}
- } while ((t = next_thread(t)) != start);
+ } while_each_thread(start, t);
+
+ return nr;
}
static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
- int exit_code)
+ struct core_state *core_state, int exit_code)
{
struct task_struct *g, *p;
unsigned long flags;
- int err = -EAGAIN;
+ int nr = -EAGAIN;
spin_lock_irq(&tsk->sighand->siglock);
if (!signal_group_exit(tsk->signal)) {
+ mm->core_state = core_state;
tsk->signal->group_exit_code = exit_code;
- zap_process(tsk);
- err = 0;
+ nr = zap_process(tsk);
}
spin_unlock_irq(&tsk->sighand->siglock);
- if (err)
- return err;
+ if (unlikely(nr < 0))
+ return nr;
- if (atomic_read(&mm->mm_users) == mm->core_waiters + 1)
+ if (atomic_read(&mm->mm_users) == nr + 1)
goto done;
-
+ /*
+ * We should find and kill all tasks which use this mm, and we should
+ * count them correctly into ->nr_threads. We don't take tasklist
+ * lock, but this is safe wrt:
+ *
+ * fork:
+ * None of sub-threads can fork after zap_process(leader). All
+ * processes which were created before this point should be
+ * visible to zap_threads() because copy_process() adds the new
+ * process to the tail of init_task.tasks list, and lock/unlock
+ * of ->siglock provides a memory barrier.
+ *
+ * do_exit:
+ * The caller holds mm->mmap_sem. This means that the task which
+ * uses this mm can't pass exit_mm(), so it can't exit or clear
+ * its ->mm.
+ *
+ * de_thread:
+ * It does list_replace_rcu(&leader->tasks, ¤t->tasks),
+ * we must see either old or new leader, this does not matter.
+ * However, it can change p->sighand, so lock_task_sighand(p)
+ * must be used. Since p->mm != NULL and we hold ->mmap_sem
+ * it can't fail.
+ *
+ * Note also that "g" can be the old leader with ->mm == NULL
+ * and already unhashed and thus removed from ->thread_group.
+ * This is OK, __unhash_process()->list_del_rcu() does not
+ * clear the ->next pointer, we will find the new leader via
+ * next_thread().
+ */
rcu_read_lock();
for_each_process(g) {
if (g == tsk->group_leader)
continue;
-
+ if (g->flags & PF_KTHREAD)
+ continue;
p = g;
do {
if (p->mm) {
- if (p->mm == mm) {
- /*
- * p->sighand can't disappear, but
- * may be changed by de_thread()
- */
+ if (unlikely(p->mm == mm)) {
lock_task_sighand(p, &flags);
- zap_process(p);
+ nr += zap_process(p);
unlock_task_sighand(p, &flags);
}
break;
}
- } while ((p = next_thread(p)) != g);
+ } while_each_thread(g, p);
}
rcu_read_unlock();
done:
- return mm->core_waiters;
+ atomic_set(&core_state->nr_threads, nr);
+ return nr;
}
-static int coredump_wait(int exit_code)
+static int coredump_wait(int exit_code, struct core_state *core_state)
{
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
- struct completion startup_done;
struct completion *vfork_done;
int core_waiters;
- init_completion(&mm->core_done);
- init_completion(&startup_done);
- mm->core_startup_done = &startup_done;
-
- core_waiters = zap_threads(tsk, mm, exit_code);
+ init_completion(&core_state->startup);
+ core_state->dumper.task = tsk;
+ core_state->dumper.next = NULL;
+ core_waiters = zap_threads(tsk, mm, core_state, exit_code);
up_write(&mm->mmap_sem);
if (unlikely(core_waiters < 0))
@@ -1597,12 +1619,32 @@
}
if (core_waiters)
- wait_for_completion(&startup_done);
+ wait_for_completion(&core_state->startup);
fail:
- BUG_ON(mm->core_waiters);
return core_waiters;
}
+static void coredump_finish(struct mm_struct *mm)
+{
+ struct core_thread *curr, *next;
+ struct task_struct *task;
+
+ next = mm->core_state->dumper.next;
+ while ((curr = next) != NULL) {
+ next = curr->next;
+ task = curr->task;
+ /*
+ * see exit_mm(), curr->task must not see
+ * ->task == NULL before we read ->next.
+ */
+ smp_mb();
+ curr->task = NULL;
+ wake_up_process(task);
+ }
+
+ mm->core_state = NULL;
+}
+
/*
* set_dumpable converts traditional three-value dumpable to two flags and
* stores them into mm->flags. It modifies lower two bits of mm->flags, but
@@ -1654,6 +1696,7 @@
int do_coredump(long signr, int exit_code, struct pt_regs * regs)
{
+ struct core_state core_state;
char corename[CORENAME_MAX_SIZE + 1];
struct mm_struct *mm = current->mm;
struct linux_binfmt * binfmt;
@@ -1677,7 +1720,7 @@
/*
* If another thread got here first, or we are not dumpable, bail out.
*/
- if (mm->core_waiters || !get_dumpable(mm)) {
+ if (mm->core_state || !get_dumpable(mm)) {
up_write(&mm->mmap_sem);
goto fail;
}
@@ -1692,7 +1735,7 @@
current->fsuid = 0; /* Dump root private */
}
- retval = coredump_wait(exit_code);
+ retval = coredump_wait(exit_code, &core_state);
if (retval < 0)
goto fail;
@@ -1707,7 +1750,7 @@
* uses lock_kernel()
*/
lock_kernel();
- ispipe = format_corename(corename, core_pattern, signr);
+ ispipe = format_corename(corename, retval, signr);
unlock_kernel();
/*
* Don't bother to check the RLIMIT_CORE value if core_pattern points
@@ -1786,7 +1829,7 @@
argv_free(helper_argv);
current->fsuid = fsuid;
- complete_all(&mm->core_done);
+ coredump_finish(mm);
fail:
return retval;
}
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index ef50cbc..31308a3 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -31,6 +31,7 @@
#include <linux/seq_file.h>
#include <linux/mount.h>
#include <linux/log2.h>
+#include <linux/quotaops.h>
#include <asm/uaccess.h>
#include "ext2.h"
#include "xattr.h"
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index eaa23d2..70c0dbd 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -14,7 +14,7 @@
ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size,
const char *name, size_t name_len)
{
- const int prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
+ const int prefix_len = XATTR_SECURITY_PREFIX_LEN;
const size_t total_len = prefix_len + name_len + 1;
if (list && total_len <= list_size) {
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c
index 83ee149..e8219f8 100644
--- a/fs/ext2/xattr_trusted.c
+++ b/fs/ext2/xattr_trusted.c
@@ -12,13 +12,11 @@
#include <linux/ext2_fs.h>
#include "xattr.h"
-#define XATTR_TRUSTED_PREFIX "trusted."
-
static size_t
ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
const char *name, size_t name_len)
{
- const int prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1;
+ const int prefix_len = XATTR_TRUSTED_PREFIX_LEN;
const size_t total_len = prefix_len + name_len + 1;
if (!capable(CAP_SYS_ADMIN))
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c
index f383e7c..92495d2 100644
--- a/fs/ext2/xattr_user.c
+++ b/fs/ext2/xattr_user.c
@@ -11,13 +11,11 @@
#include "ext2.h"
#include "xattr.h"
-#define XATTR_USER_PREFIX "user."
-
static size_t
ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size,
const char *name, size_t name_len)
{
- const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1;
+ const size_t prefix_len = XATTR_USER_PREFIX_LEN;
const size_t total_len = prefix_len + name_len + 1;
if (!test_opt(inode->i_sb, XATTR_USER))
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 8ca3bfd..2eea96e 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -272,7 +272,7 @@
while (n) {
/* Do the node's children first */
- if ((n)->rb_left) {
+ if (n->rb_left) {
n = n->rb_left;
continue;
}
@@ -301,24 +301,18 @@
parent->rb_right = NULL;
n = parent;
}
- root->rb_node = NULL;
}
-static struct dir_private_info *create_dir_info(loff_t pos)
+static struct dir_private_info *ext3_htree_create_dir_info(loff_t pos)
{
struct dir_private_info *p;
- p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL);
+ p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
if (!p)
return NULL;
- p->root.rb_node = NULL;
- p->curr_node = NULL;
- p->extra_fname = NULL;
- p->last_pos = 0;
p->curr_hash = pos2maj_hash(pos);
p->curr_minor_hash = pos2min_hash(pos);
- p->next_hash = 0;
return p;
}
@@ -433,7 +427,7 @@
int ret;
if (!info) {
- info = create_dir_info(filp->f_pos);
+ info = ext3_htree_create_dir_info(filp->f_pos);
if (!info)
return -ENOMEM;
filp->private_data = info;
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 7712682..47b678d 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -669,6 +669,14 @@
if (IS_ERR(inode))
goto iget_failed;
+ /*
+ * If the orphans has i_nlinks > 0 then it should be able to be
+ * truncated, otherwise it won't be removed from the orphan list
+ * during processing and an infinite loop will result.
+ */
+ if (inode->i_nlink && !ext3_can_truncate(inode))
+ goto bad_orphan;
+
if (NEXT_ORPHAN(inode) > max_ino)
goto bad_orphan;
brelse(bitmap_bh);
@@ -690,6 +698,7 @@
printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
NEXT_ORPHAN(inode));
printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
+ printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink);
/* Avoid freeing blocks if we got a bad deleted inode */
if (inode->i_nlink == 0)
inode->i_blocks = 0;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 6ae4ecf..3bf07d7 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2127,7 +2127,21 @@
if (this_bh) {
BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata");
- ext3_journal_dirty_metadata(handle, this_bh);
+
+ /*
+ * The buffer head should have an attached journal head at this
+ * point. However, if the data is corrupted and an indirect
+ * block pointed to itself, it would have been detached when
+ * the block was cleared. Check for this instead of OOPSing.
+ */
+ if (bh2jh(this_bh))
+ ext3_journal_dirty_metadata(handle, this_bh);
+ else
+ ext3_error(inode->i_sb, "ext3_free_data",
+ "circular indirect block detected, "
+ "inode=%lu, block=%llu",
+ inode->i_ino,
+ (unsigned long long)this_bh->b_blocknr);
}
}
@@ -2253,6 +2267,19 @@
}
}
+int ext3_can_truncate(struct inode *inode)
+{
+ if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+ return 0;
+ if (S_ISREG(inode->i_mode))
+ return 1;
+ if (S_ISDIR(inode->i_mode))
+ return 1;
+ if (S_ISLNK(inode->i_mode))
+ return !ext3_inode_is_fast_symlink(inode);
+ return 0;
+}
+
/*
* ext3_truncate()
*
@@ -2297,12 +2324,7 @@
unsigned blocksize = inode->i_sb->s_blocksize;
struct page *page;
- if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
- S_ISLNK(inode->i_mode)))
- return;
- if (ext3_inode_is_fast_symlink(inode))
- return;
- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+ if (!ext3_can_truncate(inode))
return;
/*
@@ -2513,6 +2535,16 @@
}
if (!buffer_uptodate(bh)) {
lock_buffer(bh);
+
+ /*
+ * If the buffer has the write error flag, we have failed
+ * to write out another inode in the same block. In this
+ * case, we don't have to read the block because we may
+ * read the old inode data successfully.
+ */
+ if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
+ set_buffer_uptodate(bh);
+
if (buffer_uptodate(bh)) {
/* someone brought it uptodate while we waited */
unlock_buffer(bh);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 0b8cf80..de13e91 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -240,13 +240,13 @@
{
unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) -
EXT3_DIR_REC_LEN(2) - infosize;
- return 0? 20: entry_space / sizeof(struct dx_entry);
+ return entry_space / sizeof(struct dx_entry);
}
static inline unsigned dx_node_limit (struct inode *dir)
{
unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
- return 0? 22: entry_space / sizeof(struct dx_entry);
+ return entry_space / sizeof(struct dx_entry);
}
/*
@@ -991,19 +991,21 @@
de = (struct ext3_dir_entry_2 *) bh->b_data;
top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
EXT3_DIR_REC_LEN(0));
- for (; de < top; de = ext3_next_entry(de))
- if (ext3_match (namelen, name, de)) {
- if (!ext3_check_dir_entry("ext3_find_entry",
- dir, de, bh,
- (block<<EXT3_BLOCK_SIZE_BITS(sb))
- +((char *)de - bh->b_data))) {
- brelse (bh);
+ for (; de < top; de = ext3_next_entry(de)) {
+ int off = (block << EXT3_BLOCK_SIZE_BITS(sb))
+ + ((char *) de - bh->b_data);
+
+ if (!ext3_check_dir_entry(__func__, dir, de, bh, off)) {
+ brelse(bh);
*err = ERR_BAD_DX_DIR;
goto errout;
}
- *res_dir = de;
- dx_release (frames);
- return bh;
+
+ if (ext3_match(namelen, name, de)) {
+ *res_dir = de;
+ dx_release(frames);
+ return bh;
+ }
}
brelse (bh);
/* Check to see if we should continue to search */
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 2845425..615788c 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -842,7 +842,7 @@
int data_opt = 0;
int option;
#ifdef CONFIG_QUOTA
- int qtype;
+ int qtype, qfmt;
char *qname;
#endif
@@ -1018,9 +1018,11 @@
case Opt_grpjquota:
qtype = GRPQUOTA;
set_qf_name:
- if (sb_any_quota_enabled(sb)) {
+ if ((sb_any_quota_enabled(sb) ||
+ sb_any_quota_suspended(sb)) &&
+ !sbi->s_qf_names[qtype]) {
printk(KERN_ERR
- "EXT3-fs: Cannot change journalled "
+ "EXT3-fs: Cannot change journaled "
"quota options when quota turned on.\n");
return 0;
}
@@ -1056,9 +1058,11 @@
case Opt_offgrpjquota:
qtype = GRPQUOTA;
clear_qf_name:
- if (sb_any_quota_enabled(sb)) {
+ if ((sb_any_quota_enabled(sb) ||
+ sb_any_quota_suspended(sb)) &&
+ sbi->s_qf_names[qtype]) {
printk(KERN_ERR "EXT3-fs: Cannot change "
- "journalled quota options when "
+ "journaled quota options when "
"quota turned on.\n");
return 0;
}
@@ -1069,10 +1073,20 @@
sbi->s_qf_names[qtype] = NULL;
break;
case Opt_jqfmt_vfsold:
- sbi->s_jquota_fmt = QFMT_VFS_OLD;
- break;
+ qfmt = QFMT_VFS_OLD;
+ goto set_qf_format;
case Opt_jqfmt_vfsv0:
- sbi->s_jquota_fmt = QFMT_VFS_V0;
+ qfmt = QFMT_VFS_V0;
+set_qf_format:
+ if ((sb_any_quota_enabled(sb) ||
+ sb_any_quota_suspended(sb)) &&
+ sbi->s_jquota_fmt != qfmt) {
+ printk(KERN_ERR "EXT3-fs: Cannot change "
+ "journaled quota options when "
+ "quota turned on.\n");
+ return 0;
+ }
+ sbi->s_jquota_fmt = qfmt;
break;
case Opt_quota:
case Opt_usrquota:
@@ -1084,7 +1098,8 @@
set_opt(sbi->s_mount_opt, GRPQUOTA);
break;
case Opt_noquota:
- if (sb_any_quota_enabled(sb)) {
+ if (sb_any_quota_enabled(sb) ||
+ sb_any_quota_suspended(sb)) {
printk(KERN_ERR "EXT3-fs: Cannot change quota "
"options when quota turned on.\n");
return 0;
@@ -1169,14 +1184,14 @@
}
if (!sbi->s_jquota_fmt) {
- printk(KERN_ERR "EXT3-fs: journalled quota format "
+ printk(KERN_ERR "EXT3-fs: journaled quota format "
"not specified.\n");
return 0;
}
} else {
if (sbi->s_jquota_fmt) {
- printk(KERN_ERR "EXT3-fs: journalled quota format "
- "specified with no journalling "
+ printk(KERN_ERR "EXT3-fs: journaled quota format "
+ "specified with no journaling "
"enabled.\n");
return 0;
}
@@ -1370,7 +1385,7 @@
int ret = ext3_quota_on_mount(sb, i);
if (ret < 0)
printk(KERN_ERR
- "EXT3-fs: Cannot turn on journalled "
+ "EXT3-fs: Cannot turn on journaled "
"quota: error %d\n", ret);
}
}
@@ -2712,7 +2727,7 @@
static int ext3_mark_dquot_dirty(struct dquot *dquot)
{
- /* Are we journalling quotas? */
+ /* Are we journaling quotas? */
if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
dquot_mark_dquot_dirty(dquot);
@@ -2759,23 +2774,42 @@
if (!test_opt(sb, QUOTA))
return -EINVAL;
- /* Not journalling quota or remount? */
- if ((!EXT3_SB(sb)->s_qf_names[USRQUOTA] &&
- !EXT3_SB(sb)->s_qf_names[GRPQUOTA]) || remount)
+ /* When remounting, no checks are needed and in fact, path is NULL */
+ if (remount)
return vfs_quota_on(sb, type, format_id, path, remount);
+
err = path_lookup(path, LOOKUP_FOLLOW, &nd);
if (err)
return err;
+
/* Quotafile not on the same filesystem? */
if (nd.path.mnt->mnt_sb != sb) {
path_put(&nd.path);
return -EXDEV;
}
- /* Quotafile not in fs root? */
- if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
- printk(KERN_WARNING
- "EXT3-fs: Quota file not on filesystem root. "
- "Journalled quota will not work.\n");
+ /* Journaling quota? */
+ if (EXT3_SB(sb)->s_qf_names[type]) {
+ /* Quotafile not of fs root? */
+ if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
+ printk(KERN_WARNING
+ "EXT3-fs: Quota file not on filesystem root. "
+ "Journaled quota will not work.\n");
+ }
+
+ /*
+ * When we journal data on quota file, we have to flush journal to see
+ * all updates to the file when we bypass pagecache...
+ */
+ if (ext3_should_journal_data(nd.path.dentry->d_inode)) {
+ /*
+ * We don't need to lock updates but journal_flush() could
+ * otherwise be livelocked...
+ */
+ journal_lock_updates(EXT3_SB(sb)->s_journal);
+ journal_flush(EXT3_SB(sb)->s_journal);
+ journal_unlock_updates(EXT3_SB(sb)->s_journal);
+ }
+
path_put(&nd.path);
return vfs_quota_on(sb, type, format_id, path, remount);
}
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index 821efaf..37b8109 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -15,7 +15,7 @@
ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size,
const char *name, size_t name_len)
{
- const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
+ const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
const size_t total_len = prefix_len + name_len + 1;
diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c
index 0327497..c7c41a4 100644
--- a/fs/ext3/xattr_trusted.c
+++ b/fs/ext3/xattr_trusted.c
@@ -13,13 +13,11 @@
#include <linux/ext3_fs.h>
#include "xattr.h"
-#define XATTR_TRUSTED_PREFIX "trusted."
-
static size_t
ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
const char *name, size_t name_len)
{
- const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1;
+ const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
const size_t total_len = prefix_len + name_len + 1;
if (!capable(CAP_SYS_ADMIN))
diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c
index 1abd8f9..430fe63 100644
--- a/fs/ext3/xattr_user.c
+++ b/fs/ext3/xattr_user.c
@@ -12,13 +12,11 @@
#include <linux/ext3_fs.h>
#include "xattr.h"
-#define XATTR_USER_PREFIX "user."
-
static size_t
ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size,
const char *name, size_t name_len)
{
- const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1;
+ const size_t prefix_len = XATTR_USER_PREFIX_LEN;
const size_t total_len = prefix_len + name_len + 1;
if (!test_opt(inode->i_sb, XATTR_USER))
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 34541d0..cd4a016 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -17,7 +17,6 @@
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/msdos_fs.h>
-#include <linux/dirent.h>
#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
#include <linux/compat.h>
@@ -124,10 +123,11 @@
* but ignore that right now.
* Ahem... Stack smashing in ring 0 isn't fun. Fixed.
*/
-static int uni16_to_x8(unsigned char *ascii, wchar_t *uni, int len,
+static int uni16_to_x8(unsigned char *ascii, const wchar_t *uni, int len,
int uni_xlate, struct nls_table *nls)
{
- wchar_t *ip, ec;
+ const wchar_t *ip;
+ wchar_t ec;
unsigned char *op, nc;
int charlen;
int k;
@@ -167,6 +167,16 @@
return (op - ascii);
}
+static inline int fat_uni_to_x8(struct msdos_sb_info *sbi, const wchar_t *uni,
+ unsigned char *buf, int size)
+{
+ if (sbi->options.utf8)
+ return utf8_wcstombs(buf, uni, size);
+ else
+ return uni16_to_x8(buf, uni, size, sbi->options.unicode_xlate,
+ sbi->nls_io);
+}
+
static inline int
fat_short2uni(struct nls_table *t, unsigned char *c, int clen, wchar_t *uni)
{
@@ -227,6 +237,19 @@
return len;
}
+static inline int fat_name_match(struct msdos_sb_info *sbi,
+ const unsigned char *a, int a_len,
+ const unsigned char *b, int b_len)
+{
+ if (a_len != b_len)
+ return 0;
+
+ if (sbi->options.name_check != 's')
+ return !nls_strnicmp(sbi->nls_io, a, b, a_len);
+ else
+ return !memcmp(a, b, a_len);
+}
+
enum { PARSE_INVALID = 1, PARSE_NOT_LONGNAME, PARSE_EOF, };
/**
@@ -302,6 +325,19 @@
}
/*
+ * Maximum buffer size of short name.
+ * [(MSDOS_NAME + '.') * max one char + nul]
+ * For msdos style, ['.' (hidden) + MSDOS_NAME + '.' + nul]
+ */
+#define FAT_MAX_SHORT_SIZE ((MSDOS_NAME + 1) * NLS_MAX_CHARSET_SIZE + 1)
+/*
+ * Maximum buffer size of unicode chars from slots.
+ * [(max longname slots * 13 (size in a slot) + nul) * sizeof(wchar_t)]
+ */
+#define FAT_MAX_UNI_CHARS ((MSDOS_SLOTS - 1) * 13 + 1)
+#define FAT_MAX_UNI_SIZE (FAT_MAX_UNI_CHARS * sizeof(wchar_t))
+
+/*
* Return values: negative -> error, 0 -> not found, positive -> found,
* value is the total amount of slots, including the shortname entry.
*/
@@ -312,29 +348,20 @@
struct msdos_sb_info *sbi = MSDOS_SB(sb);
struct buffer_head *bh = NULL;
struct msdos_dir_entry *de;
- struct nls_table *nls_io = sbi->nls_io;
struct nls_table *nls_disk = sbi->nls_disk;
- wchar_t bufuname[14];
unsigned char nr_slots;
- int xlate_len;
+ wchar_t bufuname[14];
wchar_t *unicode = NULL;
unsigned char work[MSDOS_NAME];
- unsigned char *bufname = NULL;
- int uni_xlate = sbi->options.unicode_xlate;
- int utf8 = sbi->options.utf8;
- int anycase = (sbi->options.name_check != 's');
+ unsigned char bufname[FAT_MAX_SHORT_SIZE];
unsigned short opt_shortname = sbi->options.shortname;
loff_t cpos = 0;
- int chl, i, j, last_u, err;
-
- bufname = __getname();
- if (!bufname)
- return -ENOMEM;
+ int chl, i, j, last_u, err, len;
err = -ENOENT;
- while(1) {
+ while (1) {
if (fat_get_entry(inode, &cpos, &bh, &de) == -1)
- goto EODir;
+ goto end_of_dir;
parse_record:
nr_slots = 0;
if (de->name[0] == DELETED_FLAG)
@@ -353,7 +380,7 @@
else if (status == PARSE_NOT_LONGNAME)
goto parse_record;
else if (status == PARSE_EOF)
- goto EODir;
+ goto end_of_dir;
}
memcpy(work, de->name, sizeof(de->name));
@@ -394,30 +421,24 @@
if (!last_u)
continue;
+ /* Compare shortname */
bufuname[last_u] = 0x0000;
- xlate_len = utf8
- ?utf8_wcstombs(bufname, bufuname, PATH_MAX)
- :uni16_to_x8(bufname, bufuname, PATH_MAX, uni_xlate, nls_io);
- if (xlate_len == name_len)
- if ((!anycase && !memcmp(name, bufname, xlate_len)) ||
- (anycase && !nls_strnicmp(nls_io, name, bufname,
- xlate_len)))
- goto Found;
+ len = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname));
+ if (fat_name_match(sbi, name, name_len, bufname, len))
+ goto found;
if (nr_slots) {
- xlate_len = utf8
- ?utf8_wcstombs(bufname, unicode, PATH_MAX)
- :uni16_to_x8(bufname, unicode, PATH_MAX, uni_xlate, nls_io);
- if (xlate_len != name_len)
- continue;
- if ((!anycase && !memcmp(name, bufname, xlate_len)) ||
- (anycase && !nls_strnicmp(nls_io, name, bufname,
- xlate_len)))
- goto Found;
+ void *longname = unicode + FAT_MAX_UNI_CHARS;
+ int size = PATH_MAX - FAT_MAX_UNI_SIZE;
+
+ /* Compare longname */
+ len = fat_uni_to_x8(sbi, unicode, longname, size);
+ if (fat_name_match(sbi, name, name_len, longname, len))
+ goto found;
}
}
-Found:
+found:
nr_slots++; /* include the de */
sinfo->slot_off = cpos - nr_slots * sizeof(*de);
sinfo->nr_slots = nr_slots;
@@ -425,9 +446,7 @@
sinfo->bh = bh;
sinfo->i_pos = fat_make_i_pos(sb, sinfo->bh, sinfo->de);
err = 0;
-EODir:
- if (bufname)
- __putname(bufname);
+end_of_dir:
if (unicode)
__putname(unicode);
@@ -453,23 +472,20 @@
struct msdos_sb_info *sbi = MSDOS_SB(sb);
struct buffer_head *bh;
struct msdos_dir_entry *de;
- struct nls_table *nls_io = sbi->nls_io;
struct nls_table *nls_disk = sbi->nls_disk;
- unsigned char long_slots;
- const char *fill_name;
- int fill_len;
+ unsigned char nr_slots;
wchar_t bufuname[14];
wchar_t *unicode = NULL;
- unsigned char c, work[MSDOS_NAME], bufname[56], *ptname = bufname;
- unsigned long lpos, dummy, *furrfu = &lpos;
- int uni_xlate = sbi->options.unicode_xlate;
- int isvfat = sbi->options.isvfat;
- int utf8 = sbi->options.utf8;
- int nocase = sbi->options.nocase;
+ unsigned char c, work[MSDOS_NAME];
+ unsigned char bufname[FAT_MAX_SHORT_SIZE], *ptname = bufname;
unsigned short opt_shortname = sbi->options.shortname;
+ int isvfat = sbi->options.isvfat;
+ int nocase = sbi->options.nocase;
+ const char *fill_name = NULL;
unsigned long inum;
- int chi, chl, i, i2, j, last, last_u, dotoffset = 0;
+ unsigned long lpos, dummy, *furrfu = &lpos;
loff_t cpos;
+ int chi, chl, i, i2, j, last, last_u, dotoffset = 0, fill_len = 0;
int ret = 0;
lock_super(sb);
@@ -489,43 +505,58 @@
cpos = 0;
}
}
- if (cpos & (sizeof(struct msdos_dir_entry)-1)) {
+ if (cpos & (sizeof(struct msdos_dir_entry) - 1)) {
ret = -ENOENT;
goto out;
}
bh = NULL;
-GetNew:
+get_new:
if (fat_get_entry(inode, &cpos, &bh, &de) == -1)
- goto EODir;
+ goto end_of_dir;
parse_record:
- long_slots = 0;
- /* Check for long filename entry */
- if (isvfat) {
+ nr_slots = 0;
+ /*
+ * Check for long filename entry, but if short_only, we don't
+ * need to parse long filename.
+ */
+ if (isvfat && !short_only) {
if (de->name[0] == DELETED_FLAG)
- goto RecEnd;
+ goto record_end;
if (de->attr != ATTR_EXT && (de->attr & ATTR_VOLUME))
- goto RecEnd;
+ goto record_end;
if (de->attr != ATTR_EXT && IS_FREE(de->name))
- goto RecEnd;
+ goto record_end;
} else {
if ((de->attr & ATTR_VOLUME) || IS_FREE(de->name))
- goto RecEnd;
+ goto record_end;
}
if (isvfat && de->attr == ATTR_EXT) {
int status = fat_parse_long(inode, &cpos, &bh, &de,
- &unicode, &long_slots);
+ &unicode, &nr_slots);
if (status < 0) {
filp->f_pos = cpos;
ret = status;
goto out;
} else if (status == PARSE_INVALID)
- goto RecEnd;
+ goto record_end;
else if (status == PARSE_NOT_LONGNAME)
goto parse_record;
else if (status == PARSE_EOF)
- goto EODir;
+ goto end_of_dir;
+
+ if (nr_slots) {
+ void *longname = unicode + FAT_MAX_UNI_CHARS;
+ int size = PATH_MAX - FAT_MAX_UNI_SIZE;
+ int len = fat_uni_to_x8(sbi, unicode, longname, size);
+
+ fill_name = longname;
+ fill_len = len;
+ /* !both && !short_only, so we don't need shortname. */
+ if (!both)
+ goto start_filldir;
+ }
}
if (sbi->options.dotsOK) {
@@ -587,12 +618,32 @@
}
}
if (!last)
- goto RecEnd;
+ goto record_end;
i = last + dotoffset;
j = last_u;
- lpos = cpos - (long_slots+1)*sizeof(struct msdos_dir_entry);
+ if (isvfat) {
+ bufuname[j] = 0x0000;
+ i = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname));
+ }
+ if (nr_slots) {
+ /* hack for fat_ioctl_filldir() */
+ struct fat_ioctl_filldir_callback *p = dirent;
+
+ p->longname = fill_name;
+ p->long_len = fill_len;
+ p->shortname = bufname;
+ p->short_len = i;
+ fill_name = NULL;
+ fill_len = 0;
+ } else {
+ fill_name = bufname;
+ fill_len = i;
+ }
+
+start_filldir:
+ lpos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry);
if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME))
inum = inode->i_ino;
else if (!memcmp(de->name, MSDOS_DOTDOT, MSDOS_NAME)) {
@@ -607,49 +658,17 @@
inum = iunique(sb, MSDOS_ROOT_INO);
}
- if (isvfat) {
- bufuname[j] = 0x0000;
- i = utf8 ? utf8_wcstombs(bufname, bufuname, sizeof(bufname))
- : uni16_to_x8(bufname, bufuname, sizeof(bufname), uni_xlate, nls_io);
- }
-
- fill_name = bufname;
- fill_len = i;
- if (!short_only && long_slots) {
- /* convert the unicode long name. 261 is maximum size
- * of unicode buffer. (13 * slots + nul) */
- void *longname = unicode + 261;
- int buf_size = PATH_MAX - (261 * sizeof(unicode[0]));
- int long_len = utf8
- ? utf8_wcstombs(longname, unicode, buf_size)
- : uni16_to_x8(longname, unicode, buf_size, uni_xlate, nls_io);
-
- if (!both) {
- fill_name = longname;
- fill_len = long_len;
- } else {
- /* hack for fat_ioctl_filldir() */
- struct fat_ioctl_filldir_callback *p = dirent;
-
- p->longname = longname;
- p->long_len = long_len;
- p->shortname = bufname;
- p->short_len = i;
- fill_name = NULL;
- fill_len = 0;
- }
- }
if (filldir(dirent, fill_name, fill_len, *furrfu, inum,
(de->attr & ATTR_DIR) ? DT_DIR : DT_REG) < 0)
- goto FillFailed;
+ goto fill_failed;
-RecEnd:
+record_end:
furrfu = &lpos;
filp->f_pos = cpos;
- goto GetNew;
-EODir:
+ goto get_new;
+end_of_dir:
filp->f_pos = cpos;
-FillFailed:
+fill_failed:
brelse(bh);
if (unicode)
__putname(unicode);
@@ -715,7 +734,7 @@
return -EFAULT; \
}
-FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, dirent)
+FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, __fat_dirent)
static int fat_ioctl_readdir(struct inode *inode, struct file *filp,
void __user *dirent, filldir_t filldir,
@@ -741,7 +760,7 @@
static int fat_dir_ioctl(struct inode *inode, struct file *filp,
unsigned int cmd, unsigned long arg)
{
- struct dirent __user *d1 = (struct dirent __user *)arg;
+ struct __fat_dirent __user *d1 = (struct __fat_dirent __user *)arg;
int short_only, both;
switch (cmd) {
@@ -757,7 +776,7 @@
return fat_generic_ioctl(inode, filp, cmd, arg);
}
- if (!access_ok(VERIFY_WRITE, d1, sizeof(struct dirent[2])))
+ if (!access_ok(VERIFY_WRITE, d1, sizeof(struct __fat_dirent[2])))
return -EFAULT;
/*
* Yes, we don't need this put_user() absolutely. However old
@@ -1082,7 +1101,7 @@
goto error_free;
}
- fat_date_unix2dos(ts->tv_sec, &time, &date);
+ fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
de = (struct msdos_dir_entry *)bhs[0]->b_data;
/* filling the new directory slots ("." and ".." entries) */
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 46a4508..23676f9 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -382,17 +382,20 @@
inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
& ~((loff_t)sbi->cluster_size - 1)) >> 9;
inode->i_mtime.tv_sec =
- date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date));
+ date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date),
+ sbi->options.tz_utc);
inode->i_mtime.tv_nsec = 0;
if (sbi->options.isvfat) {
int secs = de->ctime_cs / 100;
int csecs = de->ctime_cs % 100;
inode->i_ctime.tv_sec =
date_dos2unix(le16_to_cpu(de->ctime),
- le16_to_cpu(de->cdate)) + secs;
+ le16_to_cpu(de->cdate),
+ sbi->options.tz_utc) + secs;
inode->i_ctime.tv_nsec = csecs * 10000000;
inode->i_atime.tv_sec =
- date_dos2unix(0, le16_to_cpu(de->adate));
+ date_dos2unix(0, le16_to_cpu(de->adate),
+ sbi->options.tz_utc);
inode->i_atime.tv_nsec = 0;
} else
inode->i_ctime = inode->i_atime = inode->i_mtime;
@@ -591,11 +594,14 @@
raw_entry->attr = fat_attr(inode);
raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart);
raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16);
- fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time, &raw_entry->date);
+ fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time,
+ &raw_entry->date, sbi->options.tz_utc);
if (sbi->options.isvfat) {
__le16 atime;
- fat_date_unix2dos(inode->i_ctime.tv_sec,&raw_entry->ctime,&raw_entry->cdate);
- fat_date_unix2dos(inode->i_atime.tv_sec,&atime,&raw_entry->adate);
+ fat_date_unix2dos(inode->i_ctime.tv_sec, &raw_entry->ctime,
+ &raw_entry->cdate, sbi->options.tz_utc);
+ fat_date_unix2dos(inode->i_atime.tv_sec, &atime,
+ &raw_entry->adate, sbi->options.tz_utc);
raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 +
inode->i_ctime.tv_nsec / 10000000;
}
@@ -836,6 +842,8 @@
}
if (sbi->options.flush)
seq_puts(m, ",flush");
+ if (opts->tz_utc)
+ seq_puts(m, ",tz=UTC");
return 0;
}
@@ -848,7 +856,7 @@
Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
- Opt_obsolate, Opt_flush, Opt_err,
+ Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err,
};
static match_table_t fat_tokens = {
@@ -883,6 +891,7 @@
{Opt_obsolate, "cvf_options=%100s"},
{Opt_obsolate, "posix"},
{Opt_flush, "flush"},
+ {Opt_tz_utc, "tz=UTC"},
{Opt_err, NULL},
};
static match_table_t msdos_tokens = {
@@ -947,10 +956,11 @@
opts->utf8 = opts->unicode_xlate = 0;
opts->numtail = 1;
opts->usefree = opts->nocase = 0;
+ opts->tz_utc = 0;
*debug = 0;
if (!options)
- return 0;
+ goto out;
while ((p = strsep(&options, ",")) != NULL) {
int token;
@@ -1036,6 +1046,9 @@
case Opt_flush:
opts->flush = 1;
break;
+ case Opt_tz_utc:
+ opts->tz_utc = 1;
+ break;
/* msdos specific */
case Opt_dots:
@@ -1104,10 +1117,13 @@
return -EINVAL;
}
}
+
+out:
/* UTF-8 doesn't provide FAT semantics */
if (!strcmp(opts->iocharset, "utf8")) {
printk(KERN_ERR "FAT: utf8 is not a recommended IO charset"
- " for FAT filesystems, filesystem will be case sensitive!\n");
+ " for FAT filesystems, filesystem will be "
+ "case sensitive!\n");
}
/* If user doesn't specify allow_utime, it's initialized from dmask. */
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 61f2351..79fb98a 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -142,7 +142,7 @@
};
/* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */
-int date_dos2unix(unsigned short time, unsigned short date)
+int date_dos2unix(unsigned short time, unsigned short date, int tz_utc)
{
int month, year, secs;
@@ -156,16 +156,18 @@
((date & 31)-1+day_n[month]+(year/4)+year*365-((year & 3) == 0 &&
month < 2 ? 1 : 0)+3653);
/* days since 1.1.70 plus 80's leap day */
- secs += sys_tz.tz_minuteswest*60;
+ if (!tz_utc)
+ secs += sys_tz.tz_minuteswest*60;
return secs;
}
/* Convert linear UNIX date to a MS-DOS time/date pair. */
-void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date)
+void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, int tz_utc)
{
int day, year, nl_day, month;
- unix_date -= sys_tz.tz_minuteswest*60;
+ if (!tz_utc)
+ unix_date -= sys_tz.tz_minuteswest*60;
/* Jan 1 GMT 00:00:00 1980. But what about another time zone? */
if (unix_date < 315532800)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 2060bf0..51d0035 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -97,7 +97,7 @@
* timeout is unknown (unlink, rmdir, rename and in some cases
* lookup)
*/
-static void fuse_invalidate_entry_cache(struct dentry *entry)
+void fuse_invalidate_entry_cache(struct dentry *entry)
{
fuse_dentry_settime(entry, 0);
}
@@ -112,18 +112,16 @@
fuse_invalidate_entry_cache(entry);
}
-static void fuse_lookup_init(struct fuse_req *req, struct inode *dir,
- struct dentry *entry,
+static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_req *req,
+ u64 nodeid, struct qstr *name,
struct fuse_entry_out *outarg)
{
- struct fuse_conn *fc = get_fuse_conn(dir);
-
memset(outarg, 0, sizeof(struct fuse_entry_out));
req->in.h.opcode = FUSE_LOOKUP;
- req->in.h.nodeid = get_node_id(dir);
+ req->in.h.nodeid = nodeid;
req->in.numargs = 1;
- req->in.args[0].size = entry->d_name.len + 1;
- req->in.args[0].value = entry->d_name.name;
+ req->in.args[0].size = name->len + 1;
+ req->in.args[0].value = name->name;
req->out.numargs = 1;
if (fc->minor < 9)
req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
@@ -189,7 +187,8 @@
attr_version = fuse_get_attr_version(fc);
parent = dget_parent(entry);
- fuse_lookup_init(req, parent->d_inode, entry, &outarg);
+ fuse_lookup_init(fc, req, get_node_id(parent->d_inode),
+ &entry->d_name, &outarg);
request_send(fc, req);
dput(parent);
err = req->out.h.error;
@@ -225,7 +224,7 @@
return !nodeid || nodeid == FUSE_ROOT_ID;
}
-static struct dentry_operations fuse_dentry_operations = {
+struct dentry_operations fuse_dentry_operations = {
.d_revalidate = fuse_dentry_revalidate,
};
@@ -239,18 +238,78 @@
* Add a directory inode to a dentry, ensuring that no other dentry
* refers to this inode. Called with fc->inst_mutex.
*/
-static int fuse_d_add_directory(struct dentry *entry, struct inode *inode)
+static struct dentry *fuse_d_add_directory(struct dentry *entry,
+ struct inode *inode)
{
struct dentry *alias = d_find_alias(inode);
- if (alias) {
+ if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) {
/* This tries to shrink the subtree below alias */
fuse_invalidate_entry(alias);
dput(alias);
if (!list_empty(&inode->i_dentry))
- return -EBUSY;
+ return ERR_PTR(-EBUSY);
+ } else {
+ dput(alias);
}
- d_add(entry, inode);
- return 0;
+ return d_splice_alias(inode, entry);
+}
+
+int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
+ struct fuse_entry_out *outarg, struct inode **inode)
+{
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+ struct fuse_req *req;
+ struct fuse_req *forget_req;
+ u64 attr_version;
+ int err;
+
+ *inode = NULL;
+ err = -ENAMETOOLONG;
+ if (name->len > FUSE_NAME_MAX)
+ goto out;
+
+ req = fuse_get_req(fc);
+ err = PTR_ERR(req);
+ if (IS_ERR(req))
+ goto out;
+
+ forget_req = fuse_get_req(fc);
+ err = PTR_ERR(forget_req);
+ if (IS_ERR(forget_req)) {
+ fuse_put_request(fc, req);
+ goto out;
+ }
+
+ attr_version = fuse_get_attr_version(fc);
+
+ fuse_lookup_init(fc, req, nodeid, name, outarg);
+ request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ /* Zero nodeid is same as -ENOENT, but with valid timeout */
+ if (err || !outarg->nodeid)
+ goto out_put_forget;
+
+ err = -EIO;
+ if (!outarg->nodeid)
+ goto out_put_forget;
+ if (!fuse_valid_type(outarg->attr.mode))
+ goto out_put_forget;
+
+ *inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
+ &outarg->attr, entry_attr_timeout(outarg),
+ attr_version);
+ err = -ENOMEM;
+ if (!*inode) {
+ fuse_send_forget(fc, forget_req, outarg->nodeid, 1);
+ goto out;
+ }
+ err = 0;
+
+ out_put_forget:
+ fuse_put_request(fc, forget_req);
+ out:
+ return err;
}
static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
@@ -258,66 +317,48 @@
{
int err;
struct fuse_entry_out outarg;
- struct inode *inode = NULL;
+ struct inode *inode;
+ struct dentry *newent;
struct fuse_conn *fc = get_fuse_conn(dir);
- struct fuse_req *req;
- struct fuse_req *forget_req;
- u64 attr_version;
+ bool outarg_valid = true;
- if (entry->d_name.len > FUSE_NAME_MAX)
- return ERR_PTR(-ENAMETOOLONG);
-
- req = fuse_get_req(fc);
- if (IS_ERR(req))
- return ERR_CAST(req);
-
- forget_req = fuse_get_req(fc);
- if (IS_ERR(forget_req)) {
- fuse_put_request(fc, req);
- return ERR_CAST(forget_req);
+ err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
+ &outarg, &inode);
+ if (err == -ENOENT) {
+ outarg_valid = false;
+ err = 0;
}
+ if (err)
+ goto out_err;
- attr_version = fuse_get_attr_version(fc);
-
- fuse_lookup_init(req, dir, entry, &outarg);
- request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
- /* Zero nodeid is same as -ENOENT, but with valid timeout */
- if (!err && outarg.nodeid &&
- (invalid_nodeid(outarg.nodeid) ||
- !fuse_valid_type(outarg.attr.mode)))
- err = -EIO;
- if (!err && outarg.nodeid) {
- inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
- &outarg.attr, entry_attr_timeout(&outarg),
- attr_version);
- if (!inode) {
- fuse_send_forget(fc, forget_req, outarg.nodeid, 1);
- return ERR_PTR(-ENOMEM);
- }
- }
- fuse_put_request(fc, forget_req);
- if (err && err != -ENOENT)
- return ERR_PTR(err);
+ err = -EIO;
+ if (inode && get_node_id(inode) == FUSE_ROOT_ID)
+ goto out_iput;
if (inode && S_ISDIR(inode->i_mode)) {
mutex_lock(&fc->inst_mutex);
- err = fuse_d_add_directory(entry, inode);
+ newent = fuse_d_add_directory(entry, inode);
mutex_unlock(&fc->inst_mutex);
- if (err) {
- iput(inode);
- return ERR_PTR(err);
- }
- } else
- d_add(entry, inode);
+ err = PTR_ERR(newent);
+ if (IS_ERR(newent))
+ goto out_iput;
+ } else {
+ newent = d_splice_alias(inode, entry);
+ }
+ entry = newent ? newent : entry;
entry->d_op = &fuse_dentry_operations;
- if (!err)
+ if (outarg_valid)
fuse_change_entry_timeout(entry, &outarg);
else
fuse_invalidate_entry_cache(entry);
- return NULL;
+
+ return newent;
+
+ out_iput:
+ iput(inode);
+ out_err:
+ return ERR_PTR(err);
}
/*
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 8092f0d..67ff2c6 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1341,6 +1341,11 @@
pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0;
int err;
+ if (fl->fl_lmops && fl->fl_lmops->fl_grant) {
+ /* NLM needs asynchronous locks, which we don't support yet */
+ return -ENOLCK;
+ }
+
/* Unlock on close is handled by the flush method */
if (fl->fl_flags & FL_CLOSE)
return 0;
@@ -1365,7 +1370,9 @@
struct fuse_conn *fc = get_fuse_conn(inode);
int err;
- if (cmd == F_GETLK) {
+ if (cmd == F_CANCELLK) {
+ err = 0;
+ } else if (cmd == F_GETLK) {
if (fc->no_lock) {
posix_test_lock(file, fl);
err = 0;
@@ -1373,7 +1380,7 @@
err = fuse_getlk(file, fl);
} else {
if (fc->no_lock)
- err = posix_lock_file_wait(file, fl);
+ err = posix_lock_file(file, fl, NULL);
else
err = fuse_setlk(file, fl, 0);
}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index bae9486..3a87607 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -363,6 +363,9 @@
/** Do not send separate SETATTR request before open(O_TRUNC) */
unsigned atomic_o_trunc : 1;
+ /** Filesystem supports NFS exporting. Only set in INIT */
+ unsigned export_support : 1;
+
/*
* The following bitfields are only for optimization purposes
* and hence races in setting them will not cause malfunction
@@ -464,6 +467,8 @@
/** Device operations */
extern const struct file_operations fuse_dev_operations;
+extern struct dentry_operations fuse_dentry_operations;
+
/**
* Get a filled in inode
*/
@@ -471,6 +476,9 @@
int generation, struct fuse_attr *attr,
u64 attr_valid, u64 attr_version);
+int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
+ struct fuse_entry_out *outarg, struct inode **inode);
+
/**
* Send FORGET command
*/
@@ -604,6 +612,8 @@
*/
void fuse_invalidate_attr(struct inode *inode);
+void fuse_invalidate_entry_cache(struct dentry *entry);
+
/**
* Acquire reference to fuse_conn
*/
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 3141690..7d2f7d6 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -18,6 +18,7 @@
#include <linux/statfs.h>
#include <linux/random.h>
#include <linux/sched.h>
+#include <linux/exportfs.h>
MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
MODULE_DESCRIPTION("Filesystem in Userspace");
@@ -552,6 +553,174 @@
return fuse_iget(sb, 1, 0, &attr, 0, 0);
}
+struct fuse_inode_handle
+{
+ u64 nodeid;
+ u32 generation;
+};
+
+static struct dentry *fuse_get_dentry(struct super_block *sb,
+ struct fuse_inode_handle *handle)
+{
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+ struct inode *inode;
+ struct dentry *entry;
+ int err = -ESTALE;
+
+ if (handle->nodeid == 0)
+ goto out_err;
+
+ inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid);
+ if (!inode) {
+ struct fuse_entry_out outarg;
+ struct qstr name;
+
+ if (!fc->export_support)
+ goto out_err;
+
+ name.len = 1;
+ name.name = ".";
+ err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg,
+ &inode);
+ if (err && err != -ENOENT)
+ goto out_err;
+ if (err || !inode) {
+ err = -ESTALE;
+ goto out_err;
+ }
+ err = -EIO;
+ if (get_node_id(inode) != handle->nodeid)
+ goto out_iput;
+ }
+ err = -ESTALE;
+ if (inode->i_generation != handle->generation)
+ goto out_iput;
+
+ entry = d_alloc_anon(inode);
+ err = -ENOMEM;
+ if (!entry)
+ goto out_iput;
+
+ if (get_node_id(inode) != FUSE_ROOT_ID) {
+ entry->d_op = &fuse_dentry_operations;
+ fuse_invalidate_entry_cache(entry);
+ }
+
+ return entry;
+
+ out_iput:
+ iput(inode);
+ out_err:
+ return ERR_PTR(err);
+}
+
+static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
+ int connectable)
+{
+ struct inode *inode = dentry->d_inode;
+ bool encode_parent = connectable && !S_ISDIR(inode->i_mode);
+ int len = encode_parent ? 6 : 3;
+ u64 nodeid;
+ u32 generation;
+
+ if (*max_len < len)
+ return 255;
+
+ nodeid = get_fuse_inode(inode)->nodeid;
+ generation = inode->i_generation;
+
+ fh[0] = (u32)(nodeid >> 32);
+ fh[1] = (u32)(nodeid & 0xffffffff);
+ fh[2] = generation;
+
+ if (encode_parent) {
+ struct inode *parent;
+
+ spin_lock(&dentry->d_lock);
+ parent = dentry->d_parent->d_inode;
+ nodeid = get_fuse_inode(parent)->nodeid;
+ generation = parent->i_generation;
+ spin_unlock(&dentry->d_lock);
+
+ fh[3] = (u32)(nodeid >> 32);
+ fh[4] = (u32)(nodeid & 0xffffffff);
+ fh[5] = generation;
+ }
+
+ *max_len = len;
+ return encode_parent ? 0x82 : 0x81;
+}
+
+static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
+ struct fid *fid, int fh_len, int fh_type)
+{
+ struct fuse_inode_handle handle;
+
+ if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3)
+ return NULL;
+
+ handle.nodeid = (u64) fid->raw[0] << 32;
+ handle.nodeid |= (u64) fid->raw[1];
+ handle.generation = fid->raw[2];
+ return fuse_get_dentry(sb, &handle);
+}
+
+static struct dentry *fuse_fh_to_parent(struct super_block *sb,
+ struct fid *fid, int fh_len, int fh_type)
+{
+ struct fuse_inode_handle parent;
+
+ if (fh_type != 0x82 || fh_len < 6)
+ return NULL;
+
+ parent.nodeid = (u64) fid->raw[3] << 32;
+ parent.nodeid |= (u64) fid->raw[4];
+ parent.generation = fid->raw[5];
+ return fuse_get_dentry(sb, &parent);
+}
+
+static struct dentry *fuse_get_parent(struct dentry *child)
+{
+ struct inode *child_inode = child->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(child_inode);
+ struct inode *inode;
+ struct dentry *parent;
+ struct fuse_entry_out outarg;
+ struct qstr name;
+ int err;
+
+ if (!fc->export_support)
+ return ERR_PTR(-ESTALE);
+
+ name.len = 2;
+ name.name = "..";
+ err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
+ &name, &outarg, &inode);
+ if (err && err != -ENOENT)
+ return ERR_PTR(err);
+ if (err || !inode)
+ return ERR_PTR(-ESTALE);
+
+ parent = d_alloc_anon(inode);
+ if (!parent) {
+ iput(inode);
+ return ERR_PTR(-ENOMEM);
+ }
+ if (get_node_id(inode) != FUSE_ROOT_ID) {
+ parent->d_op = &fuse_dentry_operations;
+ fuse_invalidate_entry_cache(parent);
+ }
+
+ return parent;
+}
+
+static const struct export_operations fuse_export_operations = {
+ .fh_to_dentry = fuse_fh_to_dentry,
+ .fh_to_parent = fuse_fh_to_parent,
+ .encode_fh = fuse_encode_fh,
+ .get_parent = fuse_get_parent,
+};
+
static const struct super_operations fuse_super_operations = {
.alloc_inode = fuse_alloc_inode,
.destroy_inode = fuse_destroy_inode,
@@ -581,6 +750,11 @@
fc->no_lock = 1;
if (arg->flags & FUSE_ATOMIC_O_TRUNC)
fc->atomic_o_trunc = 1;
+ if (arg->minor >= 9) {
+ /* LOOKUP has dependency on proto version */
+ if (arg->flags & FUSE_EXPORT_SUPPORT)
+ fc->export_support = 1;
+ }
if (arg->flags & FUSE_BIG_WRITES)
fc->big_writes = 1;
} else {
@@ -607,7 +781,7 @@
arg->minor = FUSE_KERNEL_MINOR_VERSION;
arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
- FUSE_BIG_WRITES;
+ FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES;
req->in.h.opcode = FUSE_INIT;
req->in.numargs = 1;
req->in.args[0].size = sizeof(*arg);
@@ -652,6 +826,7 @@
sb->s_magic = FUSE_SUPER_MAGIC;
sb->s_op = &fuse_super_operations;
sb->s_maxbytes = MAX_LFS_FILESIZE;
+ sb->s_export_op = &fuse_export_operations;
file = fget(d.fd);
if (!file)
diff --git a/fs/hfs/bitmap.c b/fs/hfs/bitmap.c
index 24e7579..c6e9736 100644
--- a/fs/hfs/bitmap.c
+++ b/fs/hfs/bitmap.c
@@ -145,7 +145,7 @@
if (!*num_bits)
return 0;
- down(&HFS_SB(sb)->bitmap_lock);
+ mutex_lock(&HFS_SB(sb)->bitmap_lock);
bitmap = HFS_SB(sb)->bitmap;
pos = hfs_find_set_zero_bits(bitmap, HFS_SB(sb)->fs_ablocks, goal, num_bits);
@@ -162,7 +162,7 @@
HFS_SB(sb)->free_ablocks -= *num_bits;
hfs_bitmap_dirty(sb);
out:
- up(&HFS_SB(sb)->bitmap_lock);
+ mutex_unlock(&HFS_SB(sb)->bitmap_lock);
return pos;
}
@@ -205,7 +205,7 @@
if ((start + count) > HFS_SB(sb)->fs_ablocks)
return -2;
- down(&HFS_SB(sb)->bitmap_lock);
+ mutex_lock(&HFS_SB(sb)->bitmap_lock);
/* bitmap is always on a 32-bit boundary */
curr = HFS_SB(sb)->bitmap + (start / 32);
len = count;
@@ -236,7 +236,7 @@
}
out:
HFS_SB(sb)->free_ablocks += len;
- up(&HFS_SB(sb)->bitmap_lock);
+ mutex_unlock(&HFS_SB(sb)->bitmap_lock);
hfs_bitmap_dirty(sb);
return 0;
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index f6621a7..9b9d639 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -40,7 +40,7 @@
{
struct hfs_mdb *mdb = HFS_SB(sb)->mdb;
HFS_I(tree->inode)->flags = 0;
- init_MUTEX(&HFS_I(tree->inode)->extents_lock);
+ mutex_init(&HFS_I(tree->inode)->extents_lock);
switch (id) {
case HFS_EXT_CNID:
hfs_inode_read_fork(tree->inode, mdb->drXTExtRec, mdb->drXTFlSize,
diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c
index c176f67..2c16316 100644
--- a/fs/hfs/extent.c
+++ b/fs/hfs/extent.c
@@ -343,16 +343,16 @@
goto done;
}
- down(&HFS_I(inode)->extents_lock);
+ mutex_lock(&HFS_I(inode)->extents_lock);
res = hfs_ext_read_extent(inode, ablock);
if (!res)
dblock = hfs_ext_find_block(HFS_I(inode)->cached_extents,
ablock - HFS_I(inode)->cached_start);
else {
- up(&HFS_I(inode)->extents_lock);
+ mutex_unlock(&HFS_I(inode)->extents_lock);
return -EIO;
}
- up(&HFS_I(inode)->extents_lock);
+ mutex_unlock(&HFS_I(inode)->extents_lock);
done:
map_bh(bh_result, sb, HFS_SB(sb)->fs_start +
@@ -375,7 +375,7 @@
u32 start, len, goal;
int res;
- down(&HFS_I(inode)->extents_lock);
+ mutex_lock(&HFS_I(inode)->extents_lock);
if (HFS_I(inode)->alloc_blocks == HFS_I(inode)->first_blocks)
goal = hfs_ext_lastblock(HFS_I(inode)->first_extents);
else {
@@ -425,7 +425,7 @@
goto insert_extent;
}
out:
- up(&HFS_I(inode)->extents_lock);
+ mutex_unlock(&HFS_I(inode)->extents_lock);
if (!res) {
HFS_I(inode)->alloc_blocks += len;
mark_inode_dirty(inode);
@@ -487,7 +487,7 @@
if (blk_cnt == alloc_cnt)
goto out;
- down(&HFS_I(inode)->extents_lock);
+ mutex_lock(&HFS_I(inode)->extents_lock);
hfs_find_init(HFS_SB(sb)->ext_tree, &fd);
while (1) {
if (alloc_cnt == HFS_I(inode)->first_blocks) {
@@ -514,7 +514,7 @@
hfs_brec_remove(&fd);
}
hfs_find_exit(&fd);
- up(&HFS_I(inode)->extents_lock);
+ mutex_unlock(&HFS_I(inode)->extents_lock);
HFS_I(inode)->alloc_blocks = blk_cnt;
out:
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 147374b..9955232 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -11,6 +11,7 @@
#include <linux/slab.h>
#include <linux/types.h>
+#include <linux/mutex.h>
#include <linux/buffer_head.h>
#include <linux/fs.h>
@@ -53,7 +54,7 @@
struct list_head open_dir_list;
struct inode *rsrc_inode;
- struct semaphore extents_lock;
+ struct mutex extents_lock;
u16 alloc_blocks, clump_blocks;
sector_t fs_blocks;
@@ -139,7 +140,7 @@
struct nls_table *nls_io, *nls_disk;
- struct semaphore bitmap_lock;
+ struct mutex bitmap_lock;
unsigned long flags;
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 97f8446..dc4ec64 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -150,7 +150,7 @@
if (!inode)
return NULL;
- init_MUTEX(&HFS_I(inode)->extents_lock);
+ mutex_init(&HFS_I(inode)->extents_lock);
INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list);
hfs_cat_build_key(sb, (btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name);
inode->i_ino = HFS_SB(sb)->next_id++;
@@ -281,7 +281,7 @@
HFS_I(inode)->flags = 0;
HFS_I(inode)->rsrc_inode = NULL;
- init_MUTEX(&HFS_I(inode)->extents_lock);
+ mutex_init(&HFS_I(inode)->extents_lock);
INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list);
/* Initialize the inode */
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 8cf6797..ac2ec5e 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -372,7 +372,7 @@
sb->s_op = &hfs_super_operations;
sb->s_flags |= MS_NODIRATIME;
- init_MUTEX(&sbi->bitmap_lock);
+ mutex_init(&sbi->bitmap_lock);
res = hfs_mdb_get(sb);
if (res) {
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index 12e899c..fec8f61 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -199,16 +199,16 @@
goto done;
}
- down(&HFSPLUS_I(inode).extents_lock);
+ mutex_lock(&HFSPLUS_I(inode).extents_lock);
res = hfsplus_ext_read_extent(inode, ablock);
if (!res) {
dblock = hfsplus_ext_find_block(HFSPLUS_I(inode).cached_extents, ablock -
HFSPLUS_I(inode).cached_start);
} else {
- up(&HFSPLUS_I(inode).extents_lock);
+ mutex_unlock(&HFSPLUS_I(inode).extents_lock);
return -EIO;
}
- up(&HFSPLUS_I(inode).extents_lock);
+ mutex_unlock(&HFSPLUS_I(inode).extents_lock);
done:
dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock);
@@ -355,7 +355,7 @@
return -ENOSPC;
}
- down(&HFSPLUS_I(inode).extents_lock);
+ mutex_lock(&HFSPLUS_I(inode).extents_lock);
if (HFSPLUS_I(inode).alloc_blocks == HFSPLUS_I(inode).first_blocks)
goal = hfsplus_ext_lastblock(HFSPLUS_I(inode).first_extents);
else {
@@ -408,7 +408,7 @@
goto insert_extent;
}
out:
- up(&HFSPLUS_I(inode).extents_lock);
+ mutex_unlock(&HFSPLUS_I(inode).extents_lock);
if (!res) {
HFSPLUS_I(inode).alloc_blocks += len;
mark_inode_dirty(inode);
@@ -465,7 +465,7 @@
if (blk_cnt == alloc_cnt)
goto out;
- down(&HFSPLUS_I(inode).extents_lock);
+ mutex_lock(&HFSPLUS_I(inode).extents_lock);
hfs_find_init(HFSPLUS_SB(sb).ext_tree, &fd);
while (1) {
if (alloc_cnt == HFSPLUS_I(inode).first_blocks) {
@@ -492,7 +492,7 @@
hfs_brec_remove(&fd);
}
hfs_find_exit(&fd);
- up(&HFSPLUS_I(inode).extents_lock);
+ mutex_unlock(&HFSPLUS_I(inode).extents_lock);
HFSPLUS_I(inode).alloc_blocks = blk_cnt;
out:
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 9e59537..f027a90 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -11,6 +11,7 @@
#define _LINUX_HFSPLUS_FS_H
#include <linux/fs.h>
+#include <linux/mutex.h>
#include <linux/buffer_head.h>
#include "hfsplus_raw.h"
@@ -154,7 +155,7 @@
struct hfsplus_inode_info {
- struct semaphore extents_lock;
+ struct mutex extents_lock;
u32 clump_blocks, alloc_blocks;
sector_t fs_blocks;
/* Allocation extents from catalog record or volume header */
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 67e1c8b..cc3b5e2 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -163,7 +163,7 @@
inode->i_ino = dir->i_ino;
INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
- init_MUTEX(&HFSPLUS_I(inode).extents_lock);
+ mutex_init(&HFSPLUS_I(inode).extents_lock);
HFSPLUS_I(inode).flags = HFSPLUS_FLG_RSRC;
hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd);
@@ -316,7 +316,7 @@
inode->i_nlink = 1;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
- init_MUTEX(&HFSPLUS_I(inode).extents_lock);
+ mutex_init(&HFSPLUS_I(inode).extents_lock);
atomic_set(&HFSPLUS_I(inode).opencnt, 0);
HFSPLUS_I(inode).flags = 0;
memset(HFSPLUS_I(inode).first_extents, 0, sizeof(hfsplus_extent_rec));
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index ce97a54..3859118 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -34,7 +34,7 @@
return inode;
INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
- init_MUTEX(&HFSPLUS_I(inode).extents_lock);
+ mutex_init(&HFSPLUS_I(inode).extents_lock);
HFSPLUS_I(inode).flags = 0;
HFSPLUS_I(inode).rsrc_inode = NULL;
atomic_set(&HFSPLUS_I(inode).opencnt, 0);
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 6bd48f0..c2fb2dd 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -209,6 +209,11 @@
while (rs.len > 2) { /* There may be one byte for padding somewhere */
rr = (struct rock_ridge *)rs.chr;
+ /*
+ * Ignore rock ridge info if rr->len is out of range, but
+ * don't return -EIO because that would make the file
+ * invisible.
+ */
if (rr->len < 3)
goto out; /* Something got screwed up here */
sig = isonum_721(rs.chr);
@@ -216,8 +221,12 @@
goto eio;
rs.chr += rr->len;
rs.len -= rr->len;
+ /*
+ * As above, just ignore the rock ridge info if rr->len
+ * is bogus.
+ */
if (rs.len < 0)
- goto eio; /* corrupted isofs */
+ goto out; /* Something got screwed up here */
switch (sig) {
case SIG('R', 'R'):
@@ -307,6 +316,11 @@
repeat:
while (rs.len > 2) { /* There may be one byte for padding somewhere */
rr = (struct rock_ridge *)rs.chr;
+ /*
+ * Ignore rock ridge info if rr->len is out of range, but
+ * don't return -EIO because that would make the file
+ * invisible.
+ */
if (rr->len < 3)
goto out; /* Something got screwed up here */
sig = isonum_721(rs.chr);
@@ -314,8 +328,12 @@
goto eio;
rs.chr += rr->len;
rs.len -= rr->len;
+ /*
+ * As above, just ignore the rock ridge info if rr->len
+ * is bogus.
+ */
if (rs.len < 0)
- goto eio; /* corrupted isofs */
+ goto out; /* Something got screwed up here */
switch (sig) {
#ifndef CONFIG_ZISOFS /* No flag for SF or ZF */
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 5a8ca61..2eccbfa 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -36,7 +36,7 @@
/*
* When an ext3-ordered file is truncated, it is possible that many pages are
- * not sucessfully freed, because they are attached to a committing transaction.
+ * not successfully freed, because they are attached to a committing transaction.
* After the transaction commits, these pages are left on the LRU, with no
* ->mapping, and with attached buffers. These pages are trivially reclaimable
* by the VM, but their apparent absence upsets the VM accounting, and it makes
@@ -45,8 +45,8 @@
* So here, we have a buffer which has just come off the forget list. Look to
* see if we can strip all buffers from the backing page.
*
- * Called under lock_journal(), and possibly under journal_datalist_lock. The
- * caller provided us with a ref against the buffer, and we drop that here.
+ * Called under journal->j_list_lock. The caller provided us with a ref
+ * against the buffer, and we drop that here.
*/
static void release_buffer_page(struct buffer_head *bh)
{
@@ -78,6 +78,19 @@
}
/*
+ * Decrement reference counter for data buffer. If it has been marked
+ * 'BH_Freed', release it and the page to which it belongs if possible.
+ */
+static void release_data_buffer(struct buffer_head *bh)
+{
+ if (buffer_freed(bh)) {
+ clear_buffer_freed(bh);
+ release_buffer_page(bh);
+ } else
+ put_bh(bh);
+}
+
+/*
* Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is
* held. For ranking reasons we must trylock. If we lose, schedule away and
* return 0. j_list_lock is dropped in this case.
@@ -172,7 +185,7 @@
/*
* Submit all the data buffers to disk
*/
-static void journal_submit_data_buffers(journal_t *journal,
+static int journal_submit_data_buffers(journal_t *journal,
transaction_t *commit_transaction)
{
struct journal_head *jh;
@@ -180,6 +193,7 @@
int locked;
int bufs = 0;
struct buffer_head **wbuf = journal->j_wbuf;
+ int err = 0;
/*
* Whenever we unlock the journal and sleep, things can get added
@@ -231,7 +245,7 @@
if (locked)
unlock_buffer(bh);
BUFFER_TRACE(bh, "already cleaned up");
- put_bh(bh);
+ release_data_buffer(bh);
continue;
}
if (locked && test_clear_buffer_dirty(bh)) {
@@ -253,15 +267,17 @@
put_bh(bh);
} else {
BUFFER_TRACE(bh, "writeout complete: unfile");
+ if (unlikely(!buffer_uptodate(bh)))
+ err = -EIO;
__journal_unfile_buffer(jh);
jbd_unlock_bh_state(bh);
if (locked)
unlock_buffer(bh);
journal_remove_journal_head(bh);
- /* Once for our safety reference, once for
+ /* One for our safety reference, other for
* journal_remove_journal_head() */
put_bh(bh);
- put_bh(bh);
+ release_data_buffer(bh);
}
if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
@@ -271,6 +287,8 @@
}
spin_unlock(&journal->j_list_lock);
journal_do_submit_data(wbuf, bufs);
+
+ return err;
}
/*
@@ -410,8 +428,7 @@
* Now start flushing things to disk, in the order they appear
* on the transaction lists. Data blocks go first.
*/
- err = 0;
- journal_submit_data_buffers(journal, commit_transaction);
+ err = journal_submit_data_buffers(journal, commit_transaction);
/*
* Wait for all previously submitted IO to complete.
@@ -426,10 +443,21 @@
if (buffer_locked(bh)) {
spin_unlock(&journal->j_list_lock);
wait_on_buffer(bh);
- if (unlikely(!buffer_uptodate(bh)))
- err = -EIO;
spin_lock(&journal->j_list_lock);
}
+ if (unlikely(!buffer_uptodate(bh))) {
+ if (TestSetPageLocked(bh->b_page)) {
+ spin_unlock(&journal->j_list_lock);
+ lock_page(bh->b_page);
+ spin_lock(&journal->j_list_lock);
+ }
+ if (bh->b_page->mapping)
+ set_bit(AS_EIO, &bh->b_page->mapping->flags);
+
+ unlock_page(bh->b_page);
+ SetPageError(bh->b_page);
+ err = -EIO;
+ }
if (!inverted_lock(journal, bh)) {
put_bh(bh);
spin_lock(&journal->j_list_lock);
@@ -443,18 +471,22 @@
} else {
jbd_unlock_bh_state(bh);
}
- put_bh(bh);
+ release_data_buffer(bh);
cond_resched_lock(&journal->j_list_lock);
}
spin_unlock(&journal->j_list_lock);
- if (err)
- journal_abort(journal, err);
+ if (err) {
+ char b[BDEVNAME_SIZE];
+
+ printk(KERN_WARNING
+ "JBD: Detected IO errors while flushing file data "
+ "on %s\n", bdevname(journal->j_fs_dev, b));
+ err = 0;
+ }
journal_write_revoke_records(journal, commit_transaction);
- jbd_debug(3, "JBD: commit phase 2\n");
-
/*
* If we found any dirty or locked buffers, then we should have
* looped back up to the write_out_data label. If there weren't
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index b99c3b3..aa7143a 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -68,7 +68,6 @@
EXPORT_SYMBOL(journal_create);
EXPORT_SYMBOL(journal_load);
EXPORT_SYMBOL(journal_destroy);
-EXPORT_SYMBOL(journal_update_superblock);
EXPORT_SYMBOL(journal_abort);
EXPORT_SYMBOL(journal_errno);
EXPORT_SYMBOL(journal_ack_err);
@@ -1636,9 +1635,10 @@
static void journal_destroy_journal_head_cache(void)
{
- J_ASSERT(journal_head_cache != NULL);
- kmem_cache_destroy(journal_head_cache);
- journal_head_cache = NULL;
+ if (journal_head_cache) {
+ kmem_cache_destroy(journal_head_cache);
+ journal_head_cache = NULL;
+ }
}
/*
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index 1bb43e9..c7bd649 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -166,138 +166,123 @@
return NULL;
}
+void journal_destroy_revoke_caches(void)
+{
+ if (revoke_record_cache) {
+ kmem_cache_destroy(revoke_record_cache);
+ revoke_record_cache = NULL;
+ }
+ if (revoke_table_cache) {
+ kmem_cache_destroy(revoke_table_cache);
+ revoke_table_cache = NULL;
+ }
+}
+
int __init journal_init_revoke_caches(void)
{
+ J_ASSERT(!revoke_record_cache);
+ J_ASSERT(!revoke_table_cache);
+
revoke_record_cache = kmem_cache_create("revoke_record",
sizeof(struct jbd_revoke_record_s),
0,
SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
NULL);
if (!revoke_record_cache)
- return -ENOMEM;
+ goto record_cache_failure;
revoke_table_cache = kmem_cache_create("revoke_table",
sizeof(struct jbd_revoke_table_s),
0, SLAB_TEMPORARY, NULL);
- if (!revoke_table_cache) {
- kmem_cache_destroy(revoke_record_cache);
- revoke_record_cache = NULL;
- return -ENOMEM;
- }
+ if (!revoke_table_cache)
+ goto table_cache_failure;
+
return 0;
+
+table_cache_failure:
+ journal_destroy_revoke_caches();
+record_cache_failure:
+ return -ENOMEM;
}
-void journal_destroy_revoke_caches(void)
+static struct jbd_revoke_table_s *journal_init_revoke_table(int hash_size)
{
- kmem_cache_destroy(revoke_record_cache);
- revoke_record_cache = NULL;
- kmem_cache_destroy(revoke_table_cache);
- revoke_table_cache = NULL;
-}
+ int shift = 0;
+ int tmp = hash_size;
+ struct jbd_revoke_table_s *table;
-/* Initialise the revoke table for a given journal to a given size. */
+ table = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
+ if (!table)
+ goto out;
-int journal_init_revoke(journal_t *journal, int hash_size)
-{
- int shift, tmp;
-
- J_ASSERT (journal->j_revoke_table[0] == NULL);
-
- shift = 0;
- tmp = hash_size;
while((tmp >>= 1UL) != 0UL)
shift++;
- journal->j_revoke_table[0] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
- if (!journal->j_revoke_table[0])
- return -ENOMEM;
- journal->j_revoke = journal->j_revoke_table[0];
-
- /* Check that the hash_size is a power of two */
- J_ASSERT(is_power_of_2(hash_size));
-
- journal->j_revoke->hash_size = hash_size;
-
- journal->j_revoke->hash_shift = shift;
-
- journal->j_revoke->hash_table =
+ table->hash_size = hash_size;
+ table->hash_shift = shift;
+ table->hash_table =
kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
- if (!journal->j_revoke->hash_table) {
- kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
- journal->j_revoke = NULL;
- return -ENOMEM;
+ if (!table->hash_table) {
+ kmem_cache_free(revoke_table_cache, table);
+ table = NULL;
+ goto out;
}
for (tmp = 0; tmp < hash_size; tmp++)
- INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
+ INIT_LIST_HEAD(&table->hash_table[tmp]);
- journal->j_revoke_table[1] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
- if (!journal->j_revoke_table[1]) {
- kfree(journal->j_revoke_table[0]->hash_table);
- kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
- return -ENOMEM;
+out:
+ return table;
+}
+
+static void journal_destroy_revoke_table(struct jbd_revoke_table_s *table)
+{
+ int i;
+ struct list_head *hash_list;
+
+ for (i = 0; i < table->hash_size; i++) {
+ hash_list = &table->hash_table[i];
+ J_ASSERT(list_empty(hash_list));
}
+ kfree(table->hash_table);
+ kmem_cache_free(revoke_table_cache, table);
+}
+
+/* Initialise the revoke table for a given journal to a given size. */
+int journal_init_revoke(journal_t *journal, int hash_size)
+{
+ J_ASSERT(journal->j_revoke_table[0] == NULL);
+ J_ASSERT(is_power_of_2(hash_size));
+
+ journal->j_revoke_table[0] = journal_init_revoke_table(hash_size);
+ if (!journal->j_revoke_table[0])
+ goto fail0;
+
+ journal->j_revoke_table[1] = journal_init_revoke_table(hash_size);
+ if (!journal->j_revoke_table[1])
+ goto fail1;
+
journal->j_revoke = journal->j_revoke_table[1];
- /* Check that the hash_size is a power of two */
- J_ASSERT(is_power_of_2(hash_size));
-
- journal->j_revoke->hash_size = hash_size;
-
- journal->j_revoke->hash_shift = shift;
-
- journal->j_revoke->hash_table =
- kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
- if (!journal->j_revoke->hash_table) {
- kfree(journal->j_revoke_table[0]->hash_table);
- kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
- kmem_cache_free(revoke_table_cache, journal->j_revoke_table[1]);
- journal->j_revoke = NULL;
- return -ENOMEM;
- }
-
- for (tmp = 0; tmp < hash_size; tmp++)
- INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
-
spin_lock_init(&journal->j_revoke_lock);
return 0;
+
+fail1:
+ journal_destroy_revoke_table(journal->j_revoke_table[0]);
+fail0:
+ return -ENOMEM;
}
-/* Destoy a journal's revoke table. The table must already be empty! */
-
+/* Destroy a journal's revoke table. The table must already be empty! */
void journal_destroy_revoke(journal_t *journal)
{
- struct jbd_revoke_table_s *table;
- struct list_head *hash_list;
- int i;
-
- table = journal->j_revoke_table[0];
- if (!table)
- return;
-
- for (i=0; i<table->hash_size; i++) {
- hash_list = &table->hash_table[i];
- J_ASSERT (list_empty(hash_list));
- }
-
- kfree(table->hash_table);
- kmem_cache_free(revoke_table_cache, table);
journal->j_revoke = NULL;
-
- table = journal->j_revoke_table[1];
- if (!table)
- return;
-
- for (i=0; i<table->hash_size; i++) {
- hash_list = &table->hash_table[i];
- J_ASSERT (list_empty(hash_list));
- }
-
- kfree(table->hash_table);
- kmem_cache_free(revoke_table_cache, table);
- journal->j_revoke = NULL;
+ if (journal->j_revoke_table[0])
+ journal_destroy_revoke_table(journal->j_revoke_table[0]);
+ if (journal->j_revoke_table[1])
+ journal_destroy_revoke_table(journal->j_revoke_table[1]);
}
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 67ff202..8dee320 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1648,12 +1648,42 @@
return;
}
+/*
+ * journal_try_to_free_buffers() could race with journal_commit_transaction()
+ * The latter might still hold the a count on buffers when inspecting
+ * them on t_syncdata_list or t_locked_list.
+ *
+ * journal_try_to_free_buffers() will call this function to
+ * wait for the current transaction to finish syncing data buffers, before
+ * tryinf to free that buffer.
+ *
+ * Called with journal->j_state_lock held.
+ */
+static void journal_wait_for_transaction_sync_data(journal_t *journal)
+{
+ transaction_t *transaction = NULL;
+ tid_t tid;
+
+ spin_lock(&journal->j_state_lock);
+ transaction = journal->j_committing_transaction;
+
+ if (!transaction) {
+ spin_unlock(&journal->j_state_lock);
+ return;
+ }
+
+ tid = transaction->t_tid;
+ spin_unlock(&journal->j_state_lock);
+ log_wait_commit(journal, tid);
+}
/**
* int journal_try_to_free_buffers() - try to free page buffers.
* @journal: journal for operation
* @page: to try and free
- * @unused_gfp_mask: unused
+ * @gfp_mask: we use the mask to detect how hard should we try to release
+ * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to
+ * release the buffers.
*
*
* For all the buffers on this page,
@@ -1682,9 +1712,11 @@
* journal_try_to_free_buffer() is changing its state. But that
* cannot happen because we never reallocate freed data as metadata
* while the data is part of a transaction. Yes?
+ *
+ * Return 0 on failure, 1 on success
*/
int journal_try_to_free_buffers(journal_t *journal,
- struct page *page, gfp_t unused_gfp_mask)
+ struct page *page, gfp_t gfp_mask)
{
struct buffer_head *head;
struct buffer_head *bh;
@@ -1713,7 +1745,28 @@
if (buffer_jbd(bh))
goto busy;
} while ((bh = bh->b_this_page) != head);
+
ret = try_to_free_buffers(page);
+
+ /*
+ * There are a number of places where journal_try_to_free_buffers()
+ * could race with journal_commit_transaction(), the later still
+ * holds the reference to the buffers to free while processing them.
+ * try_to_free_buffers() failed to free those buffers. Some of the
+ * caller of releasepage() request page buffers to be dropped, otherwise
+ * treat the fail-to-free as errors (such as generic_file_direct_IO())
+ *
+ * So, if the caller of try_to_release_page() wants the synchronous
+ * behaviour(i.e make sure buffers are dropped upon return),
+ * let's wait for the current transaction to finish flush of
+ * dirty data buffers, then try to free those buffers again,
+ * with the journal locked.
+ */
+ if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
+ journal_wait_for_transaction_sync_data(journal);
+ ret = try_to_free_buffers(page);
+ }
+
busy:
return ret;
}
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 0288e6d..359c091 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -22,6 +22,7 @@
#include <linux/parser.h>
#include <linux/completion.h>
#include <linux/vfs.h>
+#include <linux/quotaops.h>
#include <linux/mount.h>
#include <linux/moduleparam.h>
#include <linux/kthread.h>
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 1f6dc51..31668b6 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -582,7 +582,15 @@
}
if (status < 0)
goto out_unlock;
- status = nlm_stat_to_errno(resp->status);
+ /*
+ * EAGAIN doesn't make sense for sleeping locks, and in some
+ * cases NLM_LCK_DENIED is returned for a permanent error. So
+ * turn it into an ENOLCK.
+ */
+ if (resp->status == nlm_lck_denied && (fl_flags & FL_SLEEP))
+ status = -ENOLCK;
+ else
+ status = nlm_stat_to_errno(resp->status);
out_unblock:
nlmclnt_finish_block(block);
out:
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 821b9ac..cf0d5c2 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -418,8 +418,8 @@
goto out;
case -EAGAIN:
ret = nlm_lck_denied;
- break;
- case -EINPROGRESS:
+ goto out;
+ case FILE_LOCK_DEFERRED:
if (wait)
break;
/* Filesystem lock operation is in progress
@@ -434,10 +434,6 @@
goto out;
}
- ret = nlm_lck_denied;
- if (!wait)
- goto out;
-
ret = nlm_lck_blocked;
/* Append to list of blocked */
@@ -507,7 +503,7 @@
}
error = vfs_test_lock(file->f_file, &lock->fl);
- if (error == -EINPROGRESS) {
+ if (error == FILE_LOCK_DEFERRED) {
ret = nlmsvc_defer_lock_rqst(rqstp, block);
goto out;
}
@@ -731,8 +727,7 @@
switch (error) {
case 0:
break;
- case -EAGAIN:
- case -EINPROGRESS:
+ case FILE_LOCK_DEFERRED:
dprintk("lockd: lock still blocked error %d\n", error);
nlmsvc_insert_block(block, NLM_NEVER);
nlmsvc_release_block(block);
diff --git a/fs/locks.c b/fs/locks.c
index dce8c747..0149030 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -779,8 +779,10 @@
if (!flock_locks_conflict(request, fl))
continue;
error = -EAGAIN;
- if (request->fl_flags & FL_SLEEP)
- locks_insert_block(fl, request);
+ if (!(request->fl_flags & FL_SLEEP))
+ goto out;
+ error = FILE_LOCK_DEFERRED;
+ locks_insert_block(fl, request);
goto out;
}
if (request->fl_flags & FL_ACCESS)
@@ -836,7 +838,7 @@
error = -EDEADLK;
if (posix_locks_deadlock(request, fl))
goto out;
- error = -EAGAIN;
+ error = FILE_LOCK_DEFERRED;
locks_insert_block(fl, request);
goto out;
}
@@ -1035,7 +1037,7 @@
might_sleep ();
for (;;) {
error = posix_lock_file(filp, fl, NULL);
- if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP))
+ if (error != FILE_LOCK_DEFERRED)
break;
error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
if (!error)
@@ -1107,9 +1109,7 @@
for (;;) {
error = __posix_lock_file(inode, &fl, NULL);
- if (error != -EAGAIN)
- break;
- if (!(fl.fl_flags & FL_SLEEP))
+ if (error != FILE_LOCK_DEFERRED)
break;
error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
if (!error) {
@@ -1531,7 +1531,7 @@
might_sleep();
for (;;) {
error = flock_lock_file(filp, fl);
- if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP))
+ if (error != FILE_LOCK_DEFERRED)
break;
error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
if (!error)
@@ -1716,17 +1716,17 @@
* fl_grant is set. Callers expecting ->lock() to return asynchronously
* will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if)
* the request is for a blocking lock. When ->lock() does return asynchronously,
- * it must return -EINPROGRESS, and call ->fl_grant() when the lock
+ * it must return FILE_LOCK_DEFERRED, and call ->fl_grant() when the lock
* request completes.
* If the request is for non-blocking lock the file system should return
- * -EINPROGRESS then try to get the lock and call the callback routine with
- * the result. If the request timed out the callback routine will return a
+ * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine
+ * with the result. If the request timed out the callback routine will return a
* nonzero return code and the file system should release the lock. The file
* system is also responsible to keep a corresponding posix lock when it
* grants a lock so the VFS can find out which locks are locally held and do
* the correct lock cleanup when required.
* The underlying filesystem must not drop the kernel lock or call
- * ->fl_grant() before returning to the caller with a -EINPROGRESS
+ * ->fl_grant() before returning to the caller with a FILE_LOCK_DEFERRED
* return code.
*/
int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
@@ -1738,6 +1738,30 @@
}
EXPORT_SYMBOL_GPL(vfs_lock_file);
+static int do_lock_file_wait(struct file *filp, unsigned int cmd,
+ struct file_lock *fl)
+{
+ int error;
+
+ error = security_file_lock(filp, fl->fl_type);
+ if (error)
+ return error;
+
+ for (;;) {
+ error = vfs_lock_file(filp, cmd, fl, NULL);
+ if (error != FILE_LOCK_DEFERRED)
+ break;
+ error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
+ if (!error)
+ continue;
+
+ locks_delete_block(fl);
+ break;
+ }
+
+ return error;
+}
+
/* Apply the lock described by l to an open file descriptor.
* This implements both the F_SETLK and F_SETLKW commands of fcntl().
*/
@@ -1795,26 +1819,7 @@
goto out;
}
- error = security_file_lock(filp, file_lock->fl_type);
- if (error)
- goto out;
-
- if (filp->f_op && filp->f_op->lock != NULL)
- error = filp->f_op->lock(filp, cmd, file_lock);
- else {
- for (;;) {
- error = posix_lock_file(filp, file_lock, NULL);
- if (error != -EAGAIN || cmd == F_SETLK)
- break;
- error = wait_event_interruptible(file_lock->fl_wait,
- !file_lock->fl_next);
- if (!error)
- continue;
-
- locks_delete_block(file_lock);
- break;
- }
- }
+ error = do_lock_file_wait(filp, cmd, file_lock);
/*
* Attempt to detect a close/fcntl race and recover by
@@ -1932,26 +1937,7 @@
goto out;
}
- error = security_file_lock(filp, file_lock->fl_type);
- if (error)
- goto out;
-
- if (filp->f_op && filp->f_op->lock != NULL)
- error = filp->f_op->lock(filp, cmd, file_lock);
- else {
- for (;;) {
- error = posix_lock_file(filp, file_lock, NULL);
- if (error != -EAGAIN || cmd == F_SETLK64)
- break;
- error = wait_event_interruptible(file_lock->fl_wait,
- !file_lock->fl_next);
- if (!error)
- continue;
-
- locks_delete_block(file_lock);
- break;
- }
- }
+ error = do_lock_file_wait(filp, cmd, file_lock);
/*
* Attempt to detect a close/fcntl race and recover by
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 84f6242..523d737 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -256,9 +256,6 @@
if (!s->s_root)
goto out_iput;
- if (!NO_TRUNCATE)
- s->s_root->d_op = &minix_dentry_operations;
-
if (!(s->s_flags & MS_RDONLY)) {
if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */
ms->s_state &= ~MINIX_VALID_FS;
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 326edfe..e6a0b19 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -2,11 +2,6 @@
#include <linux/pagemap.h>
#include <linux/minix_fs.h>
-/*
- * change the define below to 0 if you want names > info->s_namelen chars to be
- * truncated. Else they will be disallowed (ENAMETOOLONG).
- */
-#define NO_TRUNCATE 1
#define INODE_VERSION(inode) minix_sb(inode->i_sb)->s_version
#define MINIX_V1 0x0001 /* original minix fs */
#define MINIX_V2 0x0002 /* minix V2 fs */
@@ -83,7 +78,6 @@
extern const struct inode_operations minix_dir_inode_operations;
extern const struct file_operations minix_file_operations;
extern const struct file_operations minix_dir_operations;
-extern struct dentry_operations minix_dentry_operations;
static inline struct minix_sb_info *minix_sb(struct super_block *sb)
{
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 102241b..32b131c 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -18,30 +18,6 @@
return err;
}
-static int minix_hash(struct dentry *dentry, struct qstr *qstr)
-{
- unsigned long hash;
- int i;
- const unsigned char *name;
-
- i = minix_sb(dentry->d_inode->i_sb)->s_namelen;
- if (i >= qstr->len)
- return 0;
- /* Truncate the name in place, avoids having to define a compare
- function. */
- qstr->len = i;
- name = qstr->name;
- hash = init_name_hash();
- while (i--)
- hash = partial_name_hash(*name++, hash);
- qstr->hash = end_name_hash(hash);
- return 0;
-}
-
-struct dentry_operations minix_dentry_operations = {
- .d_hash = minix_hash,
-};
-
static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
{
struct inode * inode = NULL;
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index 1f7f295..e844b98 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -14,12 +14,7 @@
/* Characters that are undesirable in an MS-DOS file name */
static unsigned char bad_chars[] = "*?<>|\"";
-static unsigned char bad_if_strict_pc[] = "+=,; ";
-/* GEMDOS is less restrictive */
-static unsigned char bad_if_strict_atari[] = " ";
-
-#define bad_if_strict(opts) \
- ((opts)->atari ? bad_if_strict_atari : bad_if_strict_pc)
+static unsigned char bad_if_strict[] = "+=,; ";
/***** Formats an MS-DOS file name. Rejects invalid names. */
static int msdos_format_name(const unsigned char *name, int len,
@@ -40,21 +35,20 @@
/* Get rid of dot - test for it elsewhere */
name++;
len--;
- } else if (!opts->atari)
+ } else
return -EINVAL;
}
/*
- * disallow names that _really_ start with a dot for MS-DOS,
- * GEMDOS does not care
+ * disallow names that _really_ start with a dot
*/
- space = !opts->atari;
+ space = 1;
c = 0;
for (walk = res; len && walk - res < 8; walk++) {
c = *name++;
len--;
if (opts->name_check != 'r' && strchr(bad_chars, c))
return -EINVAL;
- if (opts->name_check == 's' && strchr(bad_if_strict(opts), c))
+ if (opts->name_check == 's' && strchr(bad_if_strict, c))
return -EINVAL;
if (c >= 'A' && c <= 'Z' && opts->name_check == 's')
return -EINVAL;
@@ -94,7 +88,7 @@
if (opts->name_check != 'r' && strchr(bad_chars, c))
return -EINVAL;
if (opts->name_check == 's' &&
- strchr(bad_if_strict(opts), c))
+ strchr(bad_if_strict, c))
return -EINVAL;
if (c < ' ' || c == ':' || c == '\\')
return -EINVAL;
@@ -243,6 +237,7 @@
int is_dir, int is_hid, int cluster,
struct timespec *ts, struct fat_slot_info *sinfo)
{
+ struct msdos_sb_info *sbi = MSDOS_SB(dir->i_sb);
struct msdos_dir_entry de;
__le16 time, date;
int err;
@@ -252,7 +247,7 @@
if (is_hid)
de.attr |= ATTR_HIDDEN;
de.lcase = 0;
- fat_date_unix2dos(ts->tv_sec, &time, &date);
+ fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
de.cdate = de.adate = 0;
de.ctime = 0;
de.ctime_cs = 0;
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 6b6225a..15c6fae 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -19,6 +19,13 @@
#define NFSDDBG_FACILITY NFSDDBG_LOCKD
+#ifdef CONFIG_LOCKD_V4
+#define nlm_stale_fh nlm4_stale_fh
+#define nlm_failed nlm4_failed
+#else
+#define nlm_stale_fh nlm_lck_denied_nolocks
+#define nlm_failed nlm_lck_denied_nolocks
+#endif
/*
* Note: we hold the dentry use count while the file is open.
*/
@@ -47,12 +54,10 @@
return 0;
case nfserr_dropit:
return nlm_drop_reply;
-#ifdef CONFIG_LOCKD_V4
case nfserr_stale:
- return nlm4_stale_fh;
-#endif
+ return nlm_stale_fh;
default:
- return nlm_lck_denied;
+ return nlm_failed;
}
}
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index efef715..7d6b34e 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -344,18 +344,18 @@
static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
whole_disk_show, NULL);
-void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags)
+int add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags)
{
struct hd_struct *p;
int err;
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p)
- return;
+ return -ENOMEM;
if (!init_part_stats(p)) {
- kfree(p);
- return;
+ err = -ENOMEM;
+ goto out0;
}
p->start_sect = start;
p->nr_sects = len;
@@ -378,15 +378,31 @@
/* delay uevent until 'holders' subdir is created */
p->dev.uevent_suppress = 1;
- device_add(&p->dev);
+ err = device_add(&p->dev);
+ if (err)
+ goto out1;
partition_sysfs_add_subdir(p);
p->dev.uevent_suppress = 0;
- if (flags & ADDPART_FLAG_WHOLEDISK)
+ if (flags & ADDPART_FLAG_WHOLEDISK) {
err = device_create_file(&p->dev, &dev_attr_whole_disk);
+ if (err)
+ goto out2;
+ }
/* suppress uevent if the disk supresses it */
if (!disk->dev.uevent_suppress)
kobject_uevent(&p->dev.kobj, KOBJ_ADD);
+
+ return 0;
+
+out2:
+ device_del(&p->dev);
+out1:
+ put_device(&p->dev);
+ free_part_stats(p);
+out0:
+ kfree(p);
+ return err;
}
/* Not exported, helper to add_disk(). */
@@ -483,10 +499,16 @@
if (!size)
continue;
if (from + size > get_capacity(disk)) {
- printk(" %s: p%d exceeds device capacity\n",
+ printk(KERN_ERR " %s: p%d exceeds device capacity\n",
disk->disk_name, p);
+ continue;
}
- add_partition(disk, p, from, size, state->parts[p].flags);
+ res = add_partition(disk, p, from, size, state->parts[p].flags);
+ if (res) {
+ printk(KERN_ERR " %s: p%d could not be added: %d\n",
+ disk->disk_name, p, -res);
+ continue;
+ }
#ifdef CONFIG_BLK_DEV_MD
if (state->parts[p].flags & ADDPART_FLAG_RAID)
md_autodetect_dev(bdev->bd_dev+p);
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c
index e7b0700..038a602 100644
--- a/fs/partitions/efi.c
+++ b/fs/partitions/efi.c
@@ -95,13 +95,6 @@
#include "check.h"
#include "efi.h"
-#undef EFI_DEBUG
-#ifdef EFI_DEBUG
-#define Dprintk(x...) printk(KERN_DEBUG x)
-#else
-#define Dprintk(x...)
-#endif
-
/* This allows a kernel command line option 'gpt' to override
* the test for invalid PMBR. Not __initdata because reloading
* the partition tables happens after init too.
@@ -305,10 +298,10 @@
/* Check the GUID Partition Table signature */
if (le64_to_cpu((*gpt)->signature) != GPT_HEADER_SIGNATURE) {
- Dprintk("GUID Partition Table Header signature is wrong:"
- "%lld != %lld\n",
- (unsigned long long)le64_to_cpu((*gpt)->signature),
- (unsigned long long)GPT_HEADER_SIGNATURE);
+ pr_debug("GUID Partition Table Header signature is wrong:"
+ "%lld != %lld\n",
+ (unsigned long long)le64_to_cpu((*gpt)->signature),
+ (unsigned long long)GPT_HEADER_SIGNATURE);
goto fail;
}
@@ -318,9 +311,8 @@
crc = efi_crc32((const unsigned char *) (*gpt), le32_to_cpu((*gpt)->header_size));
if (crc != origcrc) {
- Dprintk
- ("GUID Partition Table Header CRC is wrong: %x != %x\n",
- crc, origcrc);
+ pr_debug("GUID Partition Table Header CRC is wrong: %x != %x\n",
+ crc, origcrc);
goto fail;
}
(*gpt)->header_crc32 = cpu_to_le32(origcrc);
@@ -328,9 +320,9 @@
/* Check that the my_lba entry points to the LBA that contains
* the GUID Partition Table */
if (le64_to_cpu((*gpt)->my_lba) != lba) {
- Dprintk("GPT my_lba incorrect: %lld != %lld\n",
- (unsigned long long)le64_to_cpu((*gpt)->my_lba),
- (unsigned long long)lba);
+ pr_debug("GPT my_lba incorrect: %lld != %lld\n",
+ (unsigned long long)le64_to_cpu((*gpt)->my_lba),
+ (unsigned long long)lba);
goto fail;
}
@@ -339,15 +331,15 @@
*/
lastlba = last_lba(bdev);
if (le64_to_cpu((*gpt)->first_usable_lba) > lastlba) {
- Dprintk("GPT: first_usable_lba incorrect: %lld > %lld\n",
- (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba),
- (unsigned long long)lastlba);
+ pr_debug("GPT: first_usable_lba incorrect: %lld > %lld\n",
+ (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba),
+ (unsigned long long)lastlba);
goto fail;
}
if (le64_to_cpu((*gpt)->last_usable_lba) > lastlba) {
- Dprintk("GPT: last_usable_lba incorrect: %lld > %lld\n",
- (unsigned long long)le64_to_cpu((*gpt)->last_usable_lba),
- (unsigned long long)lastlba);
+ pr_debug("GPT: last_usable_lba incorrect: %lld > %lld\n",
+ (unsigned long long)le64_to_cpu((*gpt)->last_usable_lba),
+ (unsigned long long)lastlba);
goto fail;
}
@@ -360,7 +352,7 @@
le32_to_cpu((*gpt)->sizeof_partition_entry));
if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) {
- Dprintk("GUID Partitition Entry Array CRC check failed.\n");
+ pr_debug("GUID Partitition Entry Array CRC check failed.\n");
goto fail_ptes;
}
@@ -616,7 +608,7 @@
return 0;
}
- Dprintk("GUID Partition Table is valid! Yea!\n");
+ pr_debug("GUID Partition Table is valid! Yea!\n");
for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) {
if (!is_pte_valid(&ptes[i], last_lba(bdev)))
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index 0fdda2e..8652fb9 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -133,17 +133,17 @@
bool is_vista = false;
BUG_ON(!data || !ph);
- if (MAGIC_PRIVHEAD != BE64(data)) {
+ if (MAGIC_PRIVHEAD != get_unaligned_be64(data)) {
ldm_error("Cannot find PRIVHEAD structure. LDM database is"
" corrupt. Aborting.");
return false;
}
- ph->ver_major = BE16(data + 0x000C);
- ph->ver_minor = BE16(data + 0x000E);
- ph->logical_disk_start = BE64(data + 0x011B);
- ph->logical_disk_size = BE64(data + 0x0123);
- ph->config_start = BE64(data + 0x012B);
- ph->config_size = BE64(data + 0x0133);
+ ph->ver_major = get_unaligned_be16(data + 0x000C);
+ ph->ver_minor = get_unaligned_be16(data + 0x000E);
+ ph->logical_disk_start = get_unaligned_be64(data + 0x011B);
+ ph->logical_disk_size = get_unaligned_be64(data + 0x0123);
+ ph->config_start = get_unaligned_be64(data + 0x012B);
+ ph->config_size = get_unaligned_be64(data + 0x0133);
/* Version 2.11 is Win2k/XP and version 2.12 is Vista. */
if (ph->ver_major == 2 && ph->ver_minor == 12)
is_vista = true;
@@ -191,14 +191,14 @@
{
BUG_ON (!data || !toc);
- if (MAGIC_TOCBLOCK != BE64 (data)) {
+ if (MAGIC_TOCBLOCK != get_unaligned_be64(data)) {
ldm_crit ("Cannot find TOCBLOCK, database may be corrupt.");
return false;
}
strncpy (toc->bitmap1_name, data + 0x24, sizeof (toc->bitmap1_name));
toc->bitmap1_name[sizeof (toc->bitmap1_name) - 1] = 0;
- toc->bitmap1_start = BE64 (data + 0x2E);
- toc->bitmap1_size = BE64 (data + 0x36);
+ toc->bitmap1_start = get_unaligned_be64(data + 0x2E);
+ toc->bitmap1_size = get_unaligned_be64(data + 0x36);
if (strncmp (toc->bitmap1_name, TOC_BITMAP1,
sizeof (toc->bitmap1_name)) != 0) {
@@ -208,8 +208,8 @@
}
strncpy (toc->bitmap2_name, data + 0x46, sizeof (toc->bitmap2_name));
toc->bitmap2_name[sizeof (toc->bitmap2_name) - 1] = 0;
- toc->bitmap2_start = BE64 (data + 0x50);
- toc->bitmap2_size = BE64 (data + 0x58);
+ toc->bitmap2_start = get_unaligned_be64(data + 0x50);
+ toc->bitmap2_size = get_unaligned_be64(data + 0x58);
if (strncmp (toc->bitmap2_name, TOC_BITMAP2,
sizeof (toc->bitmap2_name)) != 0) {
ldm_crit ("TOCBLOCK's second bitmap is '%s', should be '%s'.",
@@ -237,22 +237,22 @@
{
BUG_ON (!data || !vm);
- if (MAGIC_VMDB != BE32 (data)) {
+ if (MAGIC_VMDB != get_unaligned_be32(data)) {
ldm_crit ("Cannot find the VMDB, database may be corrupt.");
return false;
}
- vm->ver_major = BE16 (data + 0x12);
- vm->ver_minor = BE16 (data + 0x14);
+ vm->ver_major = get_unaligned_be16(data + 0x12);
+ vm->ver_minor = get_unaligned_be16(data + 0x14);
if ((vm->ver_major != 4) || (vm->ver_minor != 10)) {
ldm_error ("Expected VMDB version %d.%d, got %d.%d. "
"Aborting.", 4, 10, vm->ver_major, vm->ver_minor);
return false;
}
- vm->vblk_size = BE32 (data + 0x08);
- vm->vblk_offset = BE32 (data + 0x0C);
- vm->last_vblk_seq = BE32 (data + 0x04);
+ vm->vblk_size = get_unaligned_be32(data + 0x08);
+ vm->vblk_offset = get_unaligned_be32(data + 0x0C);
+ vm->last_vblk_seq = get_unaligned_be32(data + 0x04);
ldm_debug ("Parsed VMDB successfully.");
return true;
@@ -507,7 +507,7 @@
goto out; /* Already logged */
/* Are there uncommitted transactions? */
- if (BE16(data + 0x10) != 0x01) {
+ if (get_unaligned_be16(data + 0x10) != 0x01) {
ldm_crit ("Database is not in a consistent state. Aborting.");
goto out;
}
@@ -802,7 +802,7 @@
return false;
len += VBLK_SIZE_CMP3;
- if (len != BE32 (buffer + 0x14))
+ if (len != get_unaligned_be32(buffer + 0x14))
return false;
comp = &vb->vblk.comp;
@@ -851,7 +851,7 @@
return false;
len += VBLK_SIZE_DGR3;
- if (len != BE32 (buffer + 0x14))
+ if (len != get_unaligned_be32(buffer + 0x14))
return false;
dgrp = &vb->vblk.dgrp;
@@ -895,7 +895,7 @@
return false;
len += VBLK_SIZE_DGR4;
- if (len != BE32 (buffer + 0x14))
+ if (len != get_unaligned_be32(buffer + 0x14))
return false;
dgrp = &vb->vblk.dgrp;
@@ -931,7 +931,7 @@
return false;
len += VBLK_SIZE_DSK3;
- if (len != BE32 (buffer + 0x14))
+ if (len != get_unaligned_be32(buffer + 0x14))
return false;
disk = &vb->vblk.disk;
@@ -968,7 +968,7 @@
return false;
len += VBLK_SIZE_DSK4;
- if (len != BE32 (buffer + 0x14))
+ if (len != get_unaligned_be32(buffer + 0x14))
return false;
disk = &vb->vblk.disk;
@@ -1034,14 +1034,14 @@
return false;
}
len += VBLK_SIZE_PRT3;
- if (len > BE32(buffer + 0x14)) {
+ if (len > get_unaligned_be32(buffer + 0x14)) {
ldm_error("len %d > BE32(buffer + 0x14) %d", len,
- BE32(buffer + 0x14));
+ get_unaligned_be32(buffer + 0x14));
return false;
}
part = &vb->vblk.part;
- part->start = BE64(buffer + 0x24 + r_name);
- part->volume_offset = BE64(buffer + 0x2C + r_name);
+ part->start = get_unaligned_be64(buffer + 0x24 + r_name);
+ part->volume_offset = get_unaligned_be64(buffer + 0x2C + r_name);
part->size = ldm_get_vnum(buffer + 0x34 + r_name);
part->parent_id = ldm_get_vnum(buffer + 0x34 + r_size);
part->disk_id = ldm_get_vnum(buffer + 0x34 + r_parent);
@@ -1139,9 +1139,9 @@
return false;
}
len += VBLK_SIZE_VOL5;
- if (len > BE32(buffer + 0x14)) {
+ if (len > get_unaligned_be32(buffer + 0x14)) {
ldm_error("len %d > BE32(buffer + 0x14) %d", len,
- BE32(buffer + 0x14));
+ get_unaligned_be32(buffer + 0x14));
return false;
}
volu = &vb->vblk.volu;
@@ -1294,9 +1294,9 @@
BUG_ON (!data || !frags);
- group = BE32 (data + 0x08);
- rec = BE16 (data + 0x0C);
- num = BE16 (data + 0x0E);
+ group = get_unaligned_be32(data + 0x08);
+ rec = get_unaligned_be16(data + 0x0C);
+ num = get_unaligned_be16(data + 0x0E);
if ((num < 1) || (num > 4)) {
ldm_error ("A VBLK claims to have %d parts.", num);
return false;
@@ -1425,12 +1425,12 @@
}
for (v = 0; v < perbuf; v++, data+=size) { /* For each vblk */
- if (MAGIC_VBLK != BE32 (data)) {
+ if (MAGIC_VBLK != get_unaligned_be32(data)) {
ldm_error ("Expected to find a VBLK.");
goto out;
}
- recs = BE16 (data + 0x0E); /* Number of records */
+ recs = get_unaligned_be16(data + 0x0E); /* Number of records */
if (recs == 1) {
if (!ldm_ldmdb_add (data, size, ldb))
goto out; /* Already logged */
diff --git a/fs/partitions/ldm.h b/fs/partitions/ldm.h
index 80f63b5..30e08e8 100644
--- a/fs/partitions/ldm.h
+++ b/fs/partitions/ldm.h
@@ -98,11 +98,6 @@
#define TOC_BITMAP1 "config" /* Names of the two defined */
#define TOC_BITMAP2 "log" /* bitmaps in the TOCBLOCK. */
-/* Most numbers we deal with are big-endian and won't be aligned. */
-#define BE16(x) ((u16)be16_to_cpu(get_unaligned((__be16*)(x))))
-#define BE32(x) ((u32)be32_to_cpu(get_unaligned((__be32*)(x))))
-#define BE64(x) ((u64)be64_to_cpu(get_unaligned((__be64*)(x))))
-
/* Borrowed from msdos.c */
#define SYS_IND(p) (get_unaligned(&(p)->sys_ind))
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
new file mode 100644
index 0000000..73cd7a4
--- /dev/null
+++ b/fs/proc/Kconfig
@@ -0,0 +1,59 @@
+config PROC_FS
+ bool "/proc file system support" if EMBEDDED
+ default y
+ help
+ This is a virtual file system providing information about the status
+ of the system. "Virtual" means that it doesn't take up any space on
+ your hard disk: the files are created on the fly by the kernel when
+ you try to access them. Also, you cannot read the files with older
+ version of the program less: you need to use more or cat.
+
+ It's totally cool; for example, "cat /proc/interrupts" gives
+ information about what the different IRQs are used for at the moment
+ (there is a small number of Interrupt ReQuest lines in your computer
+ that are used by the attached devices to gain the CPU's attention --
+ often a source of trouble if two devices are mistakenly configured
+ to use the same IRQ). The program procinfo to display some
+ information about your system gathered from the /proc file system.
+
+ Before you can use the /proc file system, it has to be mounted,
+ meaning it has to be given a location in the directory hierarchy.
+ That location should be /proc. A command such as "mount -t proc proc
+ /proc" or the equivalent line in /etc/fstab does the job.
+
+ The /proc file system is explained in the file
+ <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage
+ ("man 5 proc").
+
+ This option will enlarge your kernel by about 67 KB. Several
+ programs depend on this, so everyone should say Y here.
+
+config PROC_KCORE
+ bool "/proc/kcore support" if !ARM
+ depends on PROC_FS && MMU
+
+config PROC_VMCORE
+ bool "/proc/vmcore support (EXPERIMENTAL)"
+ depends on PROC_FS && CRASH_DUMP
+ default y
+ help
+ Exports the dump image of crashed kernel in ELF format.
+
+config PROC_SYSCTL
+ bool "Sysctl support (/proc/sys)" if EMBEDDED
+ depends on PROC_FS
+ select SYSCTL
+ default y
+ ---help---
+ The sysctl interface provides a means of dynamically changing
+ certain kernel parameters and variables on the fly without requiring
+ a recompile of the kernel or reboot of the system. The primary
+ interface is through /proc/sys. If you say Y here a tree of
+ modifiable sysctl entries will be generated beneath the
+ /proc/sys directory. They are explained in the files
+ in <file:Documentation/sysctl/>. Note that enabling this
+ option will enlarge the kernel by at least 8 KB.
+
+ As it is generally a good thing, you should say Y here unless
+ building a kernel for install/rescue disks or your system is very
+ limited in memory.
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 58c3e6a..a891fe4 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2376,29 +2376,82 @@
}
#ifdef CONFIG_TASK_IO_ACCOUNTING
-static int proc_pid_io_accounting(struct task_struct *task, char *buffer)
+static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
{
+ u64 rchar, wchar, syscr, syscw;
+ struct task_io_accounting ioac;
+
+ if (!whole) {
+ rchar = task->rchar;
+ wchar = task->wchar;
+ syscr = task->syscr;
+ syscw = task->syscw;
+ memcpy(&ioac, &task->ioac, sizeof(ioac));
+ } else {
+ unsigned long flags;
+ struct task_struct *t = task;
+ rchar = wchar = syscr = syscw = 0;
+ memset(&ioac, 0, sizeof(ioac));
+
+ rcu_read_lock();
+ do {
+ rchar += t->rchar;
+ wchar += t->wchar;
+ syscr += t->syscr;
+ syscw += t->syscw;
+
+ ioac.read_bytes += t->ioac.read_bytes;
+ ioac.write_bytes += t->ioac.write_bytes;
+ ioac.cancelled_write_bytes +=
+ t->ioac.cancelled_write_bytes;
+ t = next_thread(t);
+ } while (t != task);
+ rcu_read_unlock();
+
+ if (lock_task_sighand(task, &flags)) {
+ struct signal_struct *sig = task->signal;
+
+ rchar += sig->rchar;
+ wchar += sig->wchar;
+ syscr += sig->syscr;
+ syscw += sig->syscw;
+
+ ioac.read_bytes += sig->ioac.read_bytes;
+ ioac.write_bytes += sig->ioac.write_bytes;
+ ioac.cancelled_write_bytes +=
+ sig->ioac.cancelled_write_bytes;
+
+ unlock_task_sighand(task, &flags);
+ }
+ }
+
return sprintf(buffer,
-#ifdef CONFIG_TASK_XACCT
"rchar: %llu\n"
"wchar: %llu\n"
"syscr: %llu\n"
"syscw: %llu\n"
-#endif
"read_bytes: %llu\n"
"write_bytes: %llu\n"
"cancelled_write_bytes: %llu\n",
-#ifdef CONFIG_TASK_XACCT
- (unsigned long long)task->rchar,
- (unsigned long long)task->wchar,
- (unsigned long long)task->syscr,
- (unsigned long long)task->syscw,
-#endif
- (unsigned long long)task->ioac.read_bytes,
- (unsigned long long)task->ioac.write_bytes,
- (unsigned long long)task->ioac.cancelled_write_bytes);
+ (unsigned long long)rchar,
+ (unsigned long long)wchar,
+ (unsigned long long)syscr,
+ (unsigned long long)syscw,
+ (unsigned long long)ioac.read_bytes,
+ (unsigned long long)ioac.write_bytes,
+ (unsigned long long)ioac.cancelled_write_bytes);
}
-#endif
+
+static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
+{
+ return do_io_accounting(task, buffer, 0);
+}
+
+static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
+{
+ return do_io_accounting(task, buffer, 1);
+}
+#endif /* CONFIG_TASK_IO_ACCOUNTING */
/*
* Thread groups
@@ -2470,7 +2523,7 @@
REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter),
#endif
#ifdef CONFIG_TASK_IO_ACCOUNTING
- INF("io", S_IRUGO, pid_io_accounting),
+ INF("io", S_IRUGO, tgid_io_accounting),
#endif
};
@@ -2797,6 +2850,9 @@
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
#endif
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+ INF("io", S_IRUGO, tid_io_accounting),
+#endif
};
static int proc_tid_base_readdir(struct file * filp,
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 43e54e8..bc0a0dd 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -597,6 +597,7 @@
ent->pde_users = 0;
spin_lock_init(&ent->pde_unload_lock);
ent->pde_unload_completion = NULL;
+ INIT_LIST_HEAD(&ent->pde_openers);
out:
return ent;
}
@@ -789,6 +790,19 @@
spin_unlock(&de->pde_unload_lock);
continue_removing:
+ spin_lock(&de->pde_unload_lock);
+ while (!list_empty(&de->pde_openers)) {
+ struct pde_opener *pdeo;
+
+ pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
+ list_del(&pdeo->lh);
+ spin_unlock(&de->pde_unload_lock);
+ pdeo->release(pdeo->inode, pdeo->file);
+ kfree(pdeo);
+ spin_lock(&de->pde_unload_lock);
+ }
+ spin_unlock(&de->pde_unload_lock);
+
if (S_ISDIR(de->mode))
parent->nlink--;
de->nlink = 0;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index b08d100..02eca2e 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -111,27 +111,25 @@
return 0;
}
-static int proc_remount(struct super_block *sb, int *flags, char *data)
-{
- *flags |= MS_NODIRATIME;
- return 0;
-}
-
static const struct super_operations proc_sops = {
.alloc_inode = proc_alloc_inode,
.destroy_inode = proc_destroy_inode,
.drop_inode = generic_delete_inode,
.delete_inode = proc_delete_inode,
.statfs = simple_statfs,
- .remount_fs = proc_remount,
};
+static void __pde_users_dec(struct proc_dir_entry *pde)
+{
+ pde->pde_users--;
+ if (pde->pde_unload_completion && pde->pde_users == 0)
+ complete(pde->pde_unload_completion);
+}
+
static void pde_users_dec(struct proc_dir_entry *pde)
{
spin_lock(&pde->pde_unload_lock);
- pde->pde_users--;
- if (pde->pde_unload_completion && pde->pde_users == 0)
- complete(pde->pde_unload_completion);
+ __pde_users_dec(pde);
spin_unlock(&pde->pde_unload_lock);
}
@@ -318,36 +316,97 @@
struct proc_dir_entry *pde = PDE(inode);
int rv = 0;
int (*open)(struct inode *, struct file *);
+ int (*release)(struct inode *, struct file *);
+ struct pde_opener *pdeo;
+
+ /*
+ * What for, you ask? Well, we can have open, rmmod, remove_proc_entry
+ * sequence. ->release won't be called because ->proc_fops will be
+ * cleared. Depending on complexity of ->release, consequences vary.
+ *
+ * We can't wait for mercy when close will be done for real, it's
+ * deadlockable: rmmod foo </proc/foo . So, we're going to do ->release
+ * by hand in remove_proc_entry(). For this, save opener's credentials
+ * for later.
+ */
+ pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL);
+ if (!pdeo)
+ return -ENOMEM;
spin_lock(&pde->pde_unload_lock);
if (!pde->proc_fops) {
spin_unlock(&pde->pde_unload_lock);
+ kfree(pdeo);
return rv;
}
pde->pde_users++;
open = pde->proc_fops->open;
+ release = pde->proc_fops->release;
spin_unlock(&pde->pde_unload_lock);
if (open)
rv = open(inode, file);
- pde_users_dec(pde);
+ spin_lock(&pde->pde_unload_lock);
+ if (rv == 0 && release) {
+ /* To know what to release. */
+ pdeo->inode = inode;
+ pdeo->file = file;
+ /* Strictly for "too late" ->release in proc_reg_release(). */
+ pdeo->release = release;
+ list_add(&pdeo->lh, &pde->pde_openers);
+ } else
+ kfree(pdeo);
+ __pde_users_dec(pde);
+ spin_unlock(&pde->pde_unload_lock);
return rv;
}
+static struct pde_opener *find_pde_opener(struct proc_dir_entry *pde,
+ struct inode *inode, struct file *file)
+{
+ struct pde_opener *pdeo;
+
+ list_for_each_entry(pdeo, &pde->pde_openers, lh) {
+ if (pdeo->inode == inode && pdeo->file == file)
+ return pdeo;
+ }
+ return NULL;
+}
+
static int proc_reg_release(struct inode *inode, struct file *file)
{
struct proc_dir_entry *pde = PDE(inode);
int rv = 0;
int (*release)(struct inode *, struct file *);
+ struct pde_opener *pdeo;
spin_lock(&pde->pde_unload_lock);
+ pdeo = find_pde_opener(pde, inode, file);
if (!pde->proc_fops) {
- spin_unlock(&pde->pde_unload_lock);
+ /*
+ * Can't simply exit, __fput() will think that everything is OK,
+ * and move on to freeing struct file. remove_proc_entry() will
+ * find slacker in opener's list and will try to do non-trivial
+ * things with struct file. Therefore, remove opener from list.
+ *
+ * But if opener is removed from list, who will ->release it?
+ */
+ if (pdeo) {
+ list_del(&pdeo->lh);
+ spin_unlock(&pde->pde_unload_lock);
+ rv = pdeo->release(inode, file);
+ kfree(pdeo);
+ } else
+ spin_unlock(&pde->pde_unload_lock);
return rv;
}
pde->pde_users++;
release = pde->proc_fops->release;
+ if (pdeo) {
+ list_del(&pdeo->lh);
+ kfree(pdeo);
+ }
spin_unlock(&pde->pde_unload_lock);
if (release)
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 28cbca8..4422023 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -63,6 +63,7 @@
extern const struct file_operations proc_clear_refs_operations;
extern const struct file_operations proc_pagemap_operations;
extern const struct file_operations proc_net_operations;
+extern const struct file_operations proc_kmsg_operations;
extern const struct inode_operations proc_net_inode_operations;
void free_proc_entry(struct proc_dir_entry *de);
@@ -88,3 +89,10 @@
struct dentry *dentry);
int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
filldir_t filldir);
+
+struct pde_opener {
+ struct inode *inode;
+ struct file *file;
+ int (*release)(struct inode *, struct file *);
+ struct list_head lh;
+};
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index e78c81f..c2370c7 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -23,6 +23,10 @@
#define CORE_STR "CORE"
+#ifndef ELF_CORE_EFLAGS
+#define ELF_CORE_EFLAGS 0
+#endif
+
static int open_kcore(struct inode * inode, struct file * filp)
{
return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
@@ -164,11 +168,7 @@
elf->e_entry = 0;
elf->e_phoff = sizeof(struct elfhdr);
elf->e_shoff = 0;
-#if defined(CONFIG_H8300)
- elf->e_flags = ELF_FLAGS;
-#else
- elf->e_flags = 0;
-#endif
+ elf->e_flags = ELF_CORE_EFLAGS;
elf->e_ehsize = sizeof(struct elfhdr);
elf->e_phentsize= sizeof(struct elf_phdr);
elf->e_phnum = nphdr;
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index ff3b90b..9fd5df3 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -15,6 +15,8 @@
#include <asm/uaccess.h>
#include <asm/io.h>
+#include "internal.h"
+
extern wait_queue_head_t log_wait;
extern int do_syslog(int type, char __user *bug, int count);
diff --git a/fs/quota.c b/fs/quota.c
index db1cc9f..7f4386e 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -186,7 +186,7 @@
void sync_dquots(struct super_block *sb, int type)
{
- int cnt, dirty;
+ int cnt;
if (sb) {
if (sb->s_qcop->quota_sync)
@@ -198,11 +198,17 @@
restart:
list_for_each_entry(sb, &super_blocks, s_list) {
/* This test just improves performance so it needn't be reliable... */
- for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
- if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
- && info_any_dirty(&sb_dqopt(sb)->info[cnt]))
- dirty = 1;
- if (!dirty)
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ if (type != -1 && type != cnt)
+ continue;
+ if (!sb_has_quota_enabled(sb, cnt))
+ continue;
+ if (!info_dirty(&sb_dqopt(sb)->info[cnt]) &&
+ list_empty(&sb_dqopt(sb)->info[cnt].dqi_dirty_list))
+ continue;
+ break;
+ }
+ if (cnt == MAXQUOTAS)
continue;
sb->s_count++;
spin_unlock(&sb_lock);
diff --git a/fs/quota_v1.c b/fs/quota_v1.c
index a6cf926..5ae15b1 100644
--- a/fs/quota_v1.c
+++ b/fs/quota_v1.c
@@ -1,6 +1,7 @@
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/quota.h>
+#include <linux/quotaops.h>
#include <linux/dqblk_v1.h>
#include <linux/quotaio_v1.h>
#include <linux/kernel.h>
diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index 234ada9..b53827d 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -11,6 +11,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/slab.h>
+#include <linux/quotaops.h>
#include <asm/byteorder.h>
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index e396b2f..c8f60ee 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -34,15 +34,10 @@
** from within kupdate, it will ignore the immediate flag
*/
-#include <asm/uaccess.h>
-#include <asm/system.h>
-
#include <linux/time.h>
#include <linux/semaphore.h>
-
#include <linux/vmalloc.h>
#include <linux/reiserfs_fs.h>
-
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/fcntl.h>
@@ -54,6 +49,9 @@
#include <linux/writeback.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
+#include <linux/uaccess.h>
+
+#include <asm/system.h>
/* gets a struct reiserfs_journal_list * from a list head */
#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
@@ -558,13 +556,13 @@
static inline void lock_journal(struct super_block *p_s_sb)
{
PROC_INFO_INC(p_s_sb, journal.lock_journal);
- down(&SB_JOURNAL(p_s_sb)->j_lock);
+ mutex_lock(&SB_JOURNAL(p_s_sb)->j_mutex);
}
/* unlock the current transaction */
static inline void unlock_journal(struct super_block *p_s_sb)
{
- up(&SB_JOURNAL(p_s_sb)->j_lock);
+ mutex_unlock(&SB_JOURNAL(p_s_sb)->j_mutex);
}
static inline void get_journal_list(struct reiserfs_journal_list *jl)
@@ -1045,9 +1043,9 @@
}
/* make sure nobody is trying to flush this one at the same time */
- down(&jl->j_commit_lock);
+ mutex_lock(&jl->j_commit_mutex);
if (!journal_list_still_alive(s, trans_id)) {
- up(&jl->j_commit_lock);
+ mutex_unlock(&jl->j_commit_mutex);
goto put_jl;
}
BUG_ON(jl->j_trans_id == 0);
@@ -1057,7 +1055,7 @@
if (flushall) {
atomic_set(&(jl->j_older_commits_done), 1);
}
- up(&jl->j_commit_lock);
+ mutex_unlock(&jl->j_commit_mutex);
goto put_jl;
}
@@ -1181,7 +1179,7 @@
if (flushall) {
atomic_set(&(jl->j_older_commits_done), 1);
}
- up(&jl->j_commit_lock);
+ mutex_unlock(&jl->j_commit_mutex);
put_jl:
put_journal_list(s, jl);
@@ -1411,8 +1409,8 @@
/* if flushall == 0, the lock is already held */
if (flushall) {
- down(&journal->j_flush_sem);
- } else if (!down_trylock(&journal->j_flush_sem)) {
+ mutex_lock(&journal->j_flush_mutex);
+ } else if (mutex_trylock(&journal->j_flush_mutex)) {
BUG();
}
@@ -1642,7 +1640,7 @@
jl->j_state = 0;
put_journal_list(s, jl);
if (flushall)
- up(&journal->j_flush_sem);
+ mutex_unlock(&journal->j_flush_mutex);
put_fs_excl();
return err;
}
@@ -1772,12 +1770,12 @@
struct reiserfs_journal *journal = SB_JOURNAL(s);
chunk.nr = 0;
- down(&journal->j_flush_sem);
+ mutex_lock(&journal->j_flush_mutex);
if (!journal_list_still_alive(s, orig_trans_id)) {
goto done;
}
- /* we've got j_flush_sem held, nobody is going to delete any
+ /* we've got j_flush_mutex held, nobody is going to delete any
* of these lists out from underneath us
*/
while ((num_trans && transactions_flushed < num_trans) ||
@@ -1812,7 +1810,7 @@
}
done:
- up(&journal->j_flush_sem);
+ mutex_unlock(&journal->j_flush_mutex);
return ret;
}
@@ -2556,7 +2554,7 @@
INIT_LIST_HEAD(&jl->j_working_list);
INIT_LIST_HEAD(&jl->j_tail_bh_list);
INIT_LIST_HEAD(&jl->j_bh_list);
- sema_init(&jl->j_commit_lock, 1);
+ mutex_init(&jl->j_commit_mutex);
SB_JOURNAL(s)->j_num_lists++;
get_journal_list(jl);
return jl;
@@ -2837,8 +2835,8 @@
journal->j_last = NULL;
journal->j_first = NULL;
init_waitqueue_head(&(journal->j_join_wait));
- sema_init(&journal->j_lock, 1);
- sema_init(&journal->j_flush_sem, 1);
+ mutex_init(&journal->j_mutex);
+ mutex_init(&journal->j_flush_mutex);
journal->j_trans_id = 10;
journal->j_mount_id = 10;
@@ -4030,7 +4028,7 @@
* the new transaction is fully setup, and we've already flushed the
* ordered bh list
*/
- down(&jl->j_commit_lock);
+ mutex_lock(&jl->j_commit_mutex);
/* save the transaction id in case we need to commit it later */
commit_trans_id = jl->j_trans_id;
@@ -4196,7 +4194,7 @@
lock_kernel();
}
BUG_ON(!list_empty(&jl->j_tail_bh_list));
- up(&jl->j_commit_lock);
+ mutex_unlock(&jl->j_commit_mutex);
/* honor the flush wishes from the caller, simple commits can
** be done outside the journal lock, they are done below
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 1d40f2b..2ec748b 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -22,6 +22,7 @@
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/exportfs.h>
+#include <linux/quotaops.h>
#include <linux/vfs.h>
#include <linux/mnt_namespace.h>
#include <linux/mount.h>
@@ -182,7 +183,7 @@
int ret = reiserfs_quota_on_mount(s, i);
if (ret < 0)
reiserfs_warning(s,
- "reiserfs: cannot turn on journalled quota: error %d",
+ "reiserfs: cannot turn on journaled quota: error %d",
ret);
}
}
@@ -876,7 +877,9 @@
mount options were selected. */
unsigned long *blocks, /* strtol-ed from NNN of resize=NNN */
char **jdev_name,
- unsigned int *commit_max_age)
+ unsigned int *commit_max_age,
+ char **qf_names,
+ unsigned int *qfmt)
{
int c;
char *arg = NULL;
@@ -992,9 +995,11 @@
if (c == 'u' || c == 'g') {
int qtype = c == 'u' ? USRQUOTA : GRPQUOTA;
- if (sb_any_quota_enabled(s)) {
+ if ((sb_any_quota_enabled(s) ||
+ sb_any_quota_suspended(s)) &&
+ (!*arg != !REISERFS_SB(s)->s_qf_names[qtype])) {
reiserfs_warning(s,
- "reiserfs_parse_options: cannot change journalled quota options when quota turned on.");
+ "reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
return 0;
}
if (*arg) { /* Some filename specified? */
@@ -1011,46 +1016,54 @@
"reiserfs_parse_options: quotafile must be on filesystem root.");
return 0;
}
- REISERFS_SB(s)->s_qf_names[qtype] =
+ qf_names[qtype] =
kmalloc(strlen(arg) + 1, GFP_KERNEL);
- if (!REISERFS_SB(s)->s_qf_names[qtype]) {
+ if (!qf_names[qtype]) {
reiserfs_warning(s,
"reiserfs_parse_options: not enough memory for storing quotafile name.");
return 0;
}
- strcpy(REISERFS_SB(s)->s_qf_names[qtype], arg);
+ strcpy(qf_names[qtype], arg);
*mount_options |= 1 << REISERFS_QUOTA;
} else {
- kfree(REISERFS_SB(s)->s_qf_names[qtype]);
- REISERFS_SB(s)->s_qf_names[qtype] = NULL;
+ if (qf_names[qtype] !=
+ REISERFS_SB(s)->s_qf_names[qtype])
+ kfree(qf_names[qtype]);
+ qf_names[qtype] = NULL;
}
}
if (c == 'f') {
if (!strcmp(arg, "vfsold"))
- REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_OLD;
+ *qfmt = QFMT_VFS_OLD;
else if (!strcmp(arg, "vfsv0"))
- REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_V0;
+ *qfmt = QFMT_VFS_V0;
else {
reiserfs_warning(s,
"reiserfs_parse_options: unknown quota format specified.");
return 0;
}
+ if ((sb_any_quota_enabled(s) ||
+ sb_any_quota_suspended(s)) &&
+ *qfmt != REISERFS_SB(s)->s_jquota_fmt) {
+ reiserfs_warning(s,
+ "reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
+ return 0;
+ }
}
#else
if (c == 'u' || c == 'g' || c == 'f') {
reiserfs_warning(s,
- "reiserfs_parse_options: journalled quota options not supported.");
+ "reiserfs_parse_options: journaled quota options not supported.");
return 0;
}
#endif
}
#ifdef CONFIG_QUOTA
- if (!REISERFS_SB(s)->s_jquota_fmt
- && (REISERFS_SB(s)->s_qf_names[USRQUOTA]
- || REISERFS_SB(s)->s_qf_names[GRPQUOTA])) {
+ if (!REISERFS_SB(s)->s_jquota_fmt && !*qfmt
+ && (qf_names[USRQUOTA] || qf_names[GRPQUOTA])) {
reiserfs_warning(s,
- "reiserfs_parse_options: journalled quota format not specified.");
+ "reiserfs_parse_options: journaled quota format not specified.");
return 0;
}
/* This checking is not precise wrt the quota type but for our purposes it is sufficient */
@@ -1130,6 +1143,21 @@
}
}
+#ifdef CONFIG_QUOTA
+static void handle_quota_files(struct super_block *s, char **qf_names,
+ unsigned int *qfmt)
+{
+ int i;
+
+ for (i = 0; i < MAXQUOTAS; i++) {
+ if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i])
+ kfree(REISERFS_SB(s)->s_qf_names[i]);
+ REISERFS_SB(s)->s_qf_names[i] = qf_names[i];
+ }
+ REISERFS_SB(s)->s_jquota_fmt = *qfmt;
+}
+#endif
+
static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
{
struct reiserfs_super_block *rs;
@@ -1141,23 +1169,30 @@
struct reiserfs_journal *journal = SB_JOURNAL(s);
char *new_opts = kstrdup(arg, GFP_KERNEL);
int err;
+ char *qf_names[MAXQUOTAS];
+ unsigned int qfmt = 0;
#ifdef CONFIG_QUOTA
int i;
+
+ memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names));
#endif
rs = SB_DISK_SUPER_BLOCK(s);
if (!reiserfs_parse_options
- (s, arg, &mount_options, &blocks, NULL, &commit_max_age)) {
+ (s, arg, &mount_options, &blocks, NULL, &commit_max_age,
+ qf_names, &qfmt)) {
#ifdef CONFIG_QUOTA
- for (i = 0; i < MAXQUOTAS; i++) {
- kfree(REISERFS_SB(s)->s_qf_names[i]);
- REISERFS_SB(s)->s_qf_names[i] = NULL;
- }
+ for (i = 0; i < MAXQUOTAS; i++)
+ if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i])
+ kfree(qf_names[i]);
#endif
err = -EINVAL;
goto out_err;
}
+#ifdef CONFIG_QUOTA
+ handle_quota_files(s, qf_names, &qfmt);
+#endif
handle_attrs(s);
@@ -1570,6 +1605,8 @@
char *jdev_name;
struct reiserfs_sb_info *sbi;
int errval = -EINVAL;
+ char *qf_names[MAXQUOTAS] = {};
+ unsigned int qfmt = 0;
save_mount_options(s, data);
@@ -1597,9 +1634,12 @@
jdev_name = NULL;
if (reiserfs_parse_options
(s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name,
- &commit_max_age) == 0) {
+ &commit_max_age, qf_names, &qfmt) == 0) {
goto error;
}
+#ifdef CONFIG_QUOTA
+ handle_quota_files(s, qf_names, &qfmt);
+#endif
if (blocks) {
SWARN(silent, s, "jmacd-7: reiserfs_fill_super: resize option "
@@ -1819,7 +1859,7 @@
return (0);
- error:
+error:
if (jinit_done) { /* kill the commit thread, free journal ram */
journal_release_error(NULL, s);
}
@@ -1830,10 +1870,8 @@
#ifdef CONFIG_QUOTA
{
int j;
- for (j = 0; j < MAXQUOTAS; j++) {
- kfree(sbi->s_qf_names[j]);
- sbi->s_qf_names[j] = NULL;
- }
+ for (j = 0; j < MAXQUOTAS; j++)
+ kfree(qf_names[j]);
}
#endif
kfree(sbi);
@@ -1980,7 +2018,7 @@
static int reiserfs_mark_dquot_dirty(struct dquot *dquot)
{
- /* Are we journalling quotas? */
+ /* Are we journaling quotas? */
if (REISERFS_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
REISERFS_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
dquot_mark_dquot_dirty(dquot);
@@ -2026,6 +2064,7 @@
int err;
struct nameidata nd;
struct inode *inode;
+ struct reiserfs_transaction_handle th;
if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA)))
return -EINVAL;
@@ -2053,17 +2092,28 @@
}
mark_inode_dirty(inode);
}
- /* Not journalling quota? No more tests needed... */
- if (!REISERFS_SB(sb)->s_qf_names[USRQUOTA] &&
- !REISERFS_SB(sb)->s_qf_names[GRPQUOTA]) {
- path_put(&nd.path);
- return vfs_quota_on(sb, type, format_id, path, 0);
- }
- /* Quotafile not of fs root? */
- if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
- reiserfs_warning(sb,
+ /* Journaling quota? */
+ if (REISERFS_SB(sb)->s_qf_names[type]) {
+ /* Quotafile not of fs root? */
+ if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
+ reiserfs_warning(sb,
"reiserfs: Quota file not on filesystem root. "
"Journalled quota will not work.");
+ }
+
+ /*
+ * When we journal data on quota file, we have to flush journal to see
+ * all updates to the file when we bypass pagecache...
+ */
+ if (reiserfs_file_data_log(inode)) {
+ /* Just start temporary transaction and finish it */
+ err = journal_begin(&th, sb, 1);
+ if (err)
+ return err;
+ err = journal_end_sync(&th, sb, 1);
+ if (err)
+ return err;
+ }
path_put(&nd.path);
return vfs_quota_on(sb, type, format_id, path, 0);
}
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index 5e90a95..056008d 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -6,8 +6,6 @@
#include <linux/reiserfs_xattr.h>
#include <asm/uaccess.h>
-#define XATTR_SECURITY_PREFIX "security."
-
static int
security_get(struct inode *inode, const char *name, void *buffer, size_t size)
{
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index 024a938..60abe2b 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -7,8 +7,6 @@
#include <linux/reiserfs_xattr.h>
#include <asm/uaccess.h>
-#define XATTR_TRUSTED_PREFIX "trusted."
-
static int
trusted_get(struct inode *inode, const char *name, void *buffer, size_t size)
{
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 073f393..1384efc 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -10,8 +10,6 @@
# include <linux/reiserfs_acl.h>
#endif
-#define XATTR_USER_PREFIX "user."
-
static int
user_get(struct inode *inode, const char *name, void *buffer, size_t size)
{
diff --git a/fs/smbfs/cache.c b/fs/smbfs/cache.c
index 8182f05..8c177eb 100644
--- a/fs/smbfs/cache.c
+++ b/fs/smbfs/cache.c
@@ -13,7 +13,6 @@
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/mm.h>
-#include <linux/dirent.h>
#include <linux/smb_fs.h>
#include <linux/pagemap.h>
#include <linux/net.h>
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index d517a27..ee536e8 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -16,7 +16,6 @@
#include <linux/stat.h>
#include <linux/fcntl.h>
#include <linux/dcache.h>
-#include <linux/dirent.h>
#include <linux/nls.h>
#include <linux/smp_lock.h>
#include <linux/net.h>
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 506f724..227c9d7 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -76,6 +76,7 @@
#include <linux/errno.h>
#include <linux/fs.h>
+#include <linux/quotaops.h>
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/stat.h>
diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c
index b546ba6..155c10b 100644
--- a/fs/vfat/namei.c
+++ b/fs/vfat/namei.c
@@ -621,7 +621,7 @@
memcpy(de->name, msdos_name, MSDOS_NAME);
de->attr = is_dir ? ATTR_DIR : ATTR_ARCH;
de->lcase = lcase;
- fat_date_unix2dos(ts->tv_sec, &time, &date);
+ fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
de->time = de->ctime = time;
de->date = de->cdate = de->adate = date;
de->ctime_cs = 0;
diff --git a/include/asm-alpha/thread_info.h b/include/asm-alpha/thread_info.h
index fb31851..15fda43 100644
--- a/include/asm-alpha/thread_info.h
+++ b/include/asm-alpha/thread_info.h
@@ -50,10 +50,8 @@
#define current_thread_info() __current_thread_info
/* Thread information allocation. */
+#define THREAD_SIZE_ORDER 1
#define THREAD_SIZE (2*PAGE_SIZE)
-#define alloc_thread_info(tsk) \
- ((struct thread_info *) __get_free_pages(GFP_KERNEL,1))
-#define free_thread_info(ti) free_pages((unsigned long) (ti), 1)
#endif /* __ASSEMBLY__ */
diff --git a/include/asm-arm/ptrace.h b/include/asm-arm/ptrace.h
index 7aaa206..8382b75 100644
--- a/include/asm-arm/ptrace.h
+++ b/include/asm-arm/ptrace.h
@@ -139,8 +139,6 @@
return 0;
}
-#endif /* __KERNEL__ */
-
#define pc_pointer(v) \
((v) & ~PCMASK)
@@ -153,10 +151,10 @@
#define profile_pc(regs) instruction_pointer(regs)
#endif
-#ifdef __KERNEL__
#define predicate(x) ((x) & 0xf0000000)
#define PREDICATE_ALWAYS 0xe0000000
-#endif
+
+#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
diff --git a/include/asm-arm/thread_info.h b/include/asm-arm/thread_info.h
index f5a6647..d4be2d6 100644
--- a/include/asm-arm/thread_info.h
+++ b/include/asm-arm/thread_info.h
@@ -97,19 +97,6 @@
return (struct thread_info *)(sp & ~(THREAD_SIZE - 1));
}
-/* thread information allocation */
-#ifdef CONFIG_DEBUG_STACK_USAGE
-#define alloc_thread_info(tsk) \
- ((struct thread_info *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, \
- THREAD_SIZE_ORDER))
-#else
-#define alloc_thread_info(tsk) \
- ((struct thread_info *)__get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER))
-#endif
-
-#define free_thread_info(info) \
- free_pages((unsigned long)info, THREAD_SIZE_ORDER);
-
#define thread_saved_pc(tsk) \
((unsigned long)(pc_pointer(task_thread_info(tsk)->cpu_context.pc)))
#define thread_saved_fp(tsk) \
diff --git a/include/asm-avr32/thread_info.h b/include/asm-avr32/thread_info.h
index df68631..294b25f 100644
--- a/include/asm-avr32/thread_info.h
+++ b/include/asm-avr32/thread_info.h
@@ -61,10 +61,6 @@
return (struct thread_info *)addr;
}
-/* thread information allocation */
-#define alloc_thread_info(ti) \
- ((struct thread_info *) __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER))
-#define free_thread_info(ti) free_pages((unsigned long)(ti), 1)
#define get_thread_info(ti) get_task_struct((ti)->task)
#define put_thread_info(ti) put_task_struct((ti)->task)
diff --git a/include/asm-blackfin/ptrace.h b/include/asm-blackfin/ptrace.h
index b8346cd..a45a80e 100644
--- a/include/asm-blackfin/ptrace.h
+++ b/include/asm-blackfin/ptrace.h
@@ -83,14 +83,14 @@
#define PTRACE_GETREGS 12
#define PTRACE_SETREGS 13 /* ptrace signal */
-#ifdef CONFIG_BINFMT_ELF_FDPIC
#define PTRACE_GETFDPIC 31
#define PTRACE_GETFDPIC_EXEC 0
#define PTRACE_GETFDPIC_INTERP 1
-#endif
#define PS_S (0x0002)
+#ifdef __KERNEL__
+
/* user_mode returns true if only one bit is set in IPEND, other than the
master interrupt enable. */
#define user_mode(regs) (!(((regs)->ipend & ~0x10) & (((regs)->ipend & ~0x10) - 1)))
@@ -98,6 +98,8 @@
#define profile_pc(regs) instruction_pointer(regs)
extern void show_regs(struct pt_regs *);
+#endif /* __KERNEL__ */
+
#endif /* __ASSEMBLY__ */
/*
diff --git a/include/asm-blackfin/thread_info.h b/include/asm-blackfin/thread_info.h
index bc2fe5a..6427693 100644
--- a/include/asm-blackfin/thread_info.h
+++ b/include/asm-blackfin/thread_info.h
@@ -42,6 +42,7 @@
/*
* Size of kernel stack for each process. This must be a power of 2...
*/
+#define THREAD_SIZE_ORDER 1
#define THREAD_SIZE 8192 /* 2 pages */
#ifndef __ASSEMBLY__
@@ -94,10 +95,6 @@
return (struct thread_info *)((long)ti & ~((long)THREAD_SIZE-1));
}
-/* thread information allocation */
-#define alloc_thread_info(tsk) ((struct thread_info *) \
- __get_free_pages(GFP_KERNEL, 1))
-#define free_thread_info(ti) free_pages((unsigned long) (ti), 1)
#endif /* __ASSEMBLY__ */
/*
diff --git a/include/asm-cris/arch-v10/Kbuild b/include/asm-cris/arch-v10/Kbuild
index 60e7e1b..7a192e1 100644
--- a/include/asm-cris/arch-v10/Kbuild
+++ b/include/asm-cris/arch-v10/Kbuild
@@ -1,4 +1,3 @@
-header-y += ptrace.h
header-y += user.h
header-y += svinto.h
header-y += sv_addr_ag.h
diff --git a/include/asm-cris/arch-v10/ptrace.h b/include/asm-cris/arch-v10/ptrace.h
index fb14c5e..2f464ea 100644
--- a/include/asm-cris/arch-v10/ptrace.h
+++ b/include/asm-cris/arch-v10/ptrace.h
@@ -106,10 +106,14 @@
unsigned long return_ip; /* ip that _resume will return to */
};
+#ifdef __KERNEL__
+
/* bit 8 is user-mode flag */
#define user_mode(regs) (((regs)->dccr & 0x100) != 0)
#define instruction_pointer(regs) ((regs)->irp)
#define profile_pc(regs) instruction_pointer(regs)
extern void show_regs(struct pt_regs *);
+#endif /* __KERNEL__ */
+
#endif
diff --git a/include/asm-cris/arch-v32/Kbuild b/include/asm-cris/arch-v32/Kbuild
index a0ec545..35f2fc4 100644
--- a/include/asm-cris/arch-v32/Kbuild
+++ b/include/asm-cris/arch-v32/Kbuild
@@ -1,3 +1,2 @@
-header-y += ptrace.h
header-y += user.h
header-y += cryptocop.h
diff --git a/include/asm-cris/arch-v32/ptrace.h b/include/asm-cris/arch-v32/ptrace.h
index 516cc70..41f4e86 100644
--- a/include/asm-cris/arch-v32/ptrace.h
+++ b/include/asm-cris/arch-v32/ptrace.h
@@ -106,9 +106,13 @@
unsigned long return_ip; /* ip that _resume will return to */
};
+#ifdef __KERNEL__
+
#define user_mode(regs) (((regs)->ccs & (1 << (U_CCS_BITNR + CCS_SHIFT))) != 0)
#define instruction_pointer(regs) ((regs)->erp)
extern void show_regs(struct pt_regs *);
#define profile_pc(regs) instruction_pointer(regs)
+#endif /* __KERNEL__ */
+
#endif
diff --git a/include/asm-cris/ptrace.h b/include/asm-cris/ptrace.h
index 1ec69a7..d910925 100644
--- a/include/asm-cris/ptrace.h
+++ b/include/asm-cris/ptrace.h
@@ -4,11 +4,13 @@
#include <asm/arch/ptrace.h>
#ifdef __KERNEL__
+
/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
#define PTRACE_GETREGS 12
#define PTRACE_SETREGS 13
-#endif
#define profile_pc(regs) instruction_pointer(regs)
+#endif /* __KERNEL__ */
+
#endif /* _CRIS_PTRACE_H */
diff --git a/include/asm-cris/thread_info.h b/include/asm-cris/thread_info.h
index 784668a..7efe100 100644
--- a/include/asm-cris/thread_info.h
+++ b/include/asm-cris/thread_info.h
@@ -11,6 +11,8 @@
#ifdef __KERNEL__
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
#ifndef __ASSEMBLY__
#include <asm/types.h>
#include <asm/processor.h>
diff --git a/include/asm-frv/Kbuild b/include/asm-frv/Kbuild
index bc3f12c..0f8956d 100644
--- a/include/asm-frv/Kbuild
+++ b/include/asm-frv/Kbuild
@@ -3,4 +3,3 @@
header-y += registers.h
unifdef-y += termios.h
-unifdef-y += ptrace.h
diff --git a/include/asm-frv/thread_info.h b/include/asm-frv/thread_info.h
index 348b8f1..b7ac6bf 100644
--- a/include/asm-frv/thread_info.h
+++ b/include/asm-frv/thread_info.h
@@ -82,6 +82,8 @@
#define current_thread_info() ({ __current_thread_info; })
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
/* thread information allocation */
#ifdef CONFIG_DEBUG_STACK_USAGE
#define alloc_thread_info(tsk) \
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 2632328..a3f738c 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -34,9 +34,14 @@
#ifndef __WARN
#ifndef __ASSEMBLY__
extern void warn_on_slowpath(const char *file, const int line);
+extern void warn_slowpath(const char *file, const int line,
+ const char *fmt, ...) __attribute__((format(printf, 3, 4)));
#define WANT_WARN_ON_SLOWPATH
#endif
#define __WARN() warn_on_slowpath(__FILE__, __LINE__)
+#define __WARN_printf(arg...) warn_slowpath(__FILE__, __LINE__, arg)
+#else
+#define __WARN_printf(arg...) __WARN()
#endif
#ifndef WARN_ON
@@ -48,6 +53,15 @@
})
#endif
+#ifndef WARN
+#define WARN(condition, format...) ({ \
+ int __ret_warn_on = !!(condition); \
+ if (unlikely(__ret_warn_on)) \
+ __WARN_printf(format); \
+ unlikely(__ret_warn_on); \
+})
+#endif
+
#else /* !CONFIG_BUG */
#ifndef HAVE_ARCH_BUG
#define BUG()
@@ -63,6 +77,14 @@
unlikely(__ret_warn_on); \
})
#endif
+
+#ifndef WARN
+#define WARN(condition, format...) ({ \
+ int __ret_warn_on = !!(condition); \
+ unlikely(__ret_warn_on); \
+})
+#endif
+
#endif
#define WARN_ON_ONCE(condition) ({ \
@@ -75,6 +97,9 @@
unlikely(__ret_warn_once); \
})
+#define WARN_ON_RATELIMIT(condition, state) \
+ WARN_ON((condition) && __ratelimit(state))
+
#ifdef CONFIG_SMP
# define WARN_ON_SMP(x) WARN_ON(x)
#else
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index 6be061d..a3034d2 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -3,7 +3,7 @@
#include <linux/types.h>
-#ifdef CONFIG_HAVE_GPIO_LIB
+#ifdef CONFIG_GPIOLIB
#include <linux/compiler.h>
@@ -32,6 +32,8 @@
/**
* struct gpio_chip - abstract a GPIO controller
* @label: for diagnostics
+ * @dev: optional device providing the GPIOs
+ * @owner: helps prevent removal of modules exporting active GPIOs
* @direction_input: configures signal "offset" as input, or returns error
* @get: returns value for signal "offset"; for output signals this
* returns either the value actually sensed, or zero
@@ -59,6 +61,7 @@
*/
struct gpio_chip {
char *label;
+ struct device *dev;
struct module *owner;
int (*direction_input)(struct gpio_chip *chip,
@@ -74,6 +77,7 @@
int base;
u16 ngpio;
unsigned can_sleep:1;
+ unsigned exported:1;
};
extern const char *gpiochip_is_requested(struct gpio_chip *chip,
@@ -108,7 +112,18 @@
extern int __gpio_cansleep(unsigned gpio);
-#else
+#ifdef CONFIG_GPIO_SYSFS
+
+/*
+ * A sysfs interface can be exported by individual drivers if they want,
+ * but more typically is configured entirely from userspace.
+ */
+extern int gpio_export(unsigned gpio, bool direction_may_change);
+extern void gpio_unexport(unsigned gpio);
+
+#endif /* CONFIG_GPIO_SYSFS */
+
+#else /* !CONFIG_HAVE_GPIO_LIB */
static inline int gpio_is_valid(int number)
{
@@ -137,6 +152,20 @@
gpio_set_value(gpio, value);
}
-#endif
+#endif /* !CONFIG_HAVE_GPIO_LIB */
+
+#ifndef CONFIG_GPIO_SYSFS
+
+/* sysfs support is only available with gpiolib, where it's optional */
+
+static inline int gpio_export(unsigned gpio, bool direction_may_change)
+{
+ return -ENOSYS;
+}
+
+static inline void gpio_unexport(unsigned gpio)
+{
+}
+#endif /* CONFIG_GPIO_SYSFS */
#endif /* _ASM_GENERIC_GPIO_H */
diff --git a/include/asm-generic/int-ll64.h b/include/asm-generic/int-ll64.h
index 2609489..f9bc9ac 100644
--- a/include/asm-generic/int-ll64.h
+++ b/include/asm-generic/int-ll64.h
@@ -26,7 +26,7 @@
#ifdef __GNUC__
__extension__ typedef __signed__ long long __s64;
__extension__ typedef unsigned long long __u64;
-#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+#else
typedef __signed__ long long __s64;
typedef unsigned long long __u64;
#endif
diff --git a/include/asm-h8300/elf.h b/include/asm-h8300/elf.h
index 26bfc7e..a8b57d1 100644
--- a/include/asm-h8300/elf.h
+++ b/include/asm-h8300/elf.h
@@ -26,10 +26,10 @@
#define ELF_DATA ELFDATA2MSB
#define ELF_ARCH EM_H8_300
#if defined(__H8300H__)
-#define ELF_FLAGS 0x810000
+#define ELF_CORE_EFLAGS 0x810000
#endif
#if defined(__H8300S__)
-#define ELF_FLAGS 0x820000
+#define ELF_CORE_EFLAGS 0x820000
#endif
#define ELF_PLAT_INIT(_r) _r->er1 = 0
diff --git a/include/asm-h8300/thread_info.h b/include/asm-h8300/thread_info.h
index 27bb95e..aafd4d3 100644
--- a/include/asm-h8300/thread_info.h
+++ b/include/asm-h8300/thread_info.h
@@ -49,6 +49,7 @@
/*
* Size of kernel stack for each process. This must be a power of 2...
*/
+#define THREAD_SIZE_ORDER 1
#define THREAD_SIZE 8192 /* 2 pages */
@@ -65,10 +66,6 @@
return ti;
}
-/* thread information allocation */
-#define alloc_thread_info(tsk) ((struct thread_info *) \
- __get_free_pages(GFP_KERNEL, 1))
-#define free_thread_info(ti) free_pages((unsigned long) (ti), 1)
#endif /* __ASSEMBLY__ */
/*
diff --git a/include/asm-ia64/thread_info.h b/include/asm-ia64/thread_info.h
index 2422ac6..7c60fcd 100644
--- a/include/asm-ia64/thread_info.h
+++ b/include/asm-ia64/thread_info.h
@@ -54,6 +54,8 @@
}, \
}
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
#ifndef ASM_OFFSETS_C
/* how to get the thread information struct from C */
#define current_thread_info() ((struct thread_info *) ((char *) current + IA64_TASK_SIZE))
diff --git a/include/asm-m32r/thread_info.h b/include/asm-m32r/thread_info.h
index 1effcd0..8589d46 100644
--- a/include/asm-m32r/thread_info.h
+++ b/include/asm-m32r/thread_info.h
@@ -94,6 +94,8 @@
return ti;
}
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
/* thread information allocation */
#ifdef CONFIG_DEBUG_STACK_USAGE
#define alloc_thread_info(tsk) \
diff --git a/include/asm-m68k/thread_info.h b/include/asm-m68k/thread_info.h
index d635a37..abc0027 100644
--- a/include/asm-m68k/thread_info.h
+++ b/include/asm-m68k/thread_info.h
@@ -25,13 +25,7 @@
}
/* THREAD_SIZE should be 8k, so handle differently for 4k and 8k machines */
-#if PAGE_SHIFT == 13 /* 8k machines */
-#define alloc_thread_info(tsk) ((struct thread_info *)__get_free_pages(GFP_KERNEL,0))
-#define free_thread_info(ti) free_pages((unsigned long)(ti),0)
-#else /* otherwise assume 4k pages */
-#define alloc_thread_info(tsk) ((struct thread_info *)__get_free_pages(GFP_KERNEL,1))
-#define free_thread_info(ti) free_pages((unsigned long)(ti),1)
-#endif /* PAGE_SHIFT == 13 */
+#define THREAD_SIZE_ORDER (13 - PAGE_SHIFT)
#define init_thread_info (init_task.thread.info)
#define init_stack (init_thread_union.stack)
diff --git a/include/asm-m68knommu/ptrace.h b/include/asm-m68knommu/ptrace.h
index 47258e8..8c9194b 100644
--- a/include/asm-m68knommu/ptrace.h
+++ b/include/asm-m68knommu/ptrace.h
@@ -68,10 +68,8 @@
/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
#define PTRACE_GETREGS 12
#define PTRACE_SETREGS 13
-#ifdef CONFIG_FPU
#define PTRACE_GETFPREGS 14
#define PTRACE_SETFPREGS 15
-#endif
#ifdef __KERNEL__
diff --git a/include/asm-m68knommu/thread_info.h b/include/asm-m68knommu/thread_info.h
index 95996d9..0c9bc09 100644
--- a/include/asm-m68knommu/thread_info.h
+++ b/include/asm-m68knommu/thread_info.h
@@ -71,10 +71,6 @@
return ti;
}
-/* thread information allocation */
-#define alloc_thread_info(tsk) ((struct thread_info *) \
- __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER))
-#define free_thread_info(ti) free_pages((unsigned long) (ti), THREAD_SIZE_ORDER)
#endif /* __ASSEMBLY__ */
#define PREEMPT_ACTIVE 0x4000000
diff --git a/include/asm-mips/mach-generic/gpio.h b/include/asm-mips/mach-generic/gpio.h
index e6b376b..b4e7020 100644
--- a/include/asm-mips/mach-generic/gpio.h
+++ b/include/asm-mips/mach-generic/gpio.h
@@ -1,7 +1,7 @@
#ifndef __ASM_MACH_GENERIC_GPIO_H
#define __ASM_MACH_GENERIC_GPIO_H
-#ifdef CONFIG_HAVE_GPIO_LIB
+#ifdef CONFIG_GPIOLIB
#define gpio_get_value __gpio_get_value
#define gpio_set_value __gpio_set_value
#define gpio_cansleep __gpio_cansleep
diff --git a/include/asm-mips/thread_info.h b/include/asm-mips/thread_info.h
index b2772df..bb30606 100644
--- a/include/asm-mips/thread_info.h
+++ b/include/asm-mips/thread_info.h
@@ -82,6 +82,8 @@
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
#define THREAD_MASK (THREAD_SIZE - 1UL)
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
#ifdef CONFIG_DEBUG_STACK_USAGE
#define alloc_thread_info(tsk) \
({ \
diff --git a/include/asm-mn10300/ptrace.h b/include/asm-mn10300/ptrace.h
index b368468..7b06cc6 100644
--- a/include/asm-mn10300/ptrace.h
+++ b/include/asm-mn10300/ptrace.h
@@ -88,12 +88,16 @@
/* options set using PTRACE_SETOPTIONS */
#define PTRACE_O_TRACESYSGOOD 0x00000001
-#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+#if defined(__KERNEL__)
+
+#if !defined(__ASSEMBLY__)
#define user_mode(regs) (((regs)->epsw & EPSW_nSL) == EPSW_nSL)
#define instruction_pointer(regs) ((regs)->pc)
extern void show_regs(struct pt_regs *);
-#endif
+#endif /* !__ASSEMBLY */
#define profile_pc(regs) ((regs)->pc)
+#endif /* __KERNEL__ */
+
#endif /* _ASM_PTRACE_H */
diff --git a/include/asm-mn10300/thread_info.h b/include/asm-mn10300/thread_info.h
index e397e71..78a3881 100644
--- a/include/asm-mn10300/thread_info.h
+++ b/include/asm-mn10300/thread_info.h
@@ -112,6 +112,8 @@
return sp;
}
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
/* thread information allocation */
#ifdef CONFIG_DEBUG_STACK_USAGE
#define alloc_thread_info(tsk) kzalloc(THREAD_SIZE, GFP_KERNEL)
diff --git a/include/asm-parisc/ptrace.h b/include/asm-parisc/ptrace.h
index 93f990e..3e94c5d 100644
--- a/include/asm-parisc/ptrace.h
+++ b/include/asm-parisc/ptrace.h
@@ -33,7 +33,6 @@
unsigned long ipsw; /* CR22 */
};
-#define task_regs(task) ((struct pt_regs *) ((char *)(task) + TASK_REGS))
/*
* The numbers chosen here are somewhat arbitrary but absolutely MUST
* not overlap with any of the number assigned in <linux/ptrace.h>.
@@ -43,8 +42,11 @@
* since we have taken branch traps too)
*/
#define PTRACE_SINGLEBLOCK 12 /* resume execution until next branch */
+
#ifdef __KERNEL__
+#define task_regs(task) ((struct pt_regs *) ((char *)(task) + TASK_REGS))
+
/* XXX should we use iaoq[1] or iaoq[0] ? */
#define user_mode(regs) (((regs)->iaoq[0] & 3) ? 1 : 0)
#define user_space(regs) (((regs)->iasq[1] != 0) ? 1 : 0)
diff --git a/include/asm-parisc/thread_info.h b/include/asm-parisc/thread_info.h
index 2d9c750..9f81274 100644
--- a/include/asm-parisc/thread_info.h
+++ b/include/asm-parisc/thread_info.h
@@ -34,15 +34,11 @@
/* thread information allocation */
-#define THREAD_ORDER 2
+#define THREAD_SIZE_ORDER 2
/* Be sure to hunt all references to this down when you change the size of
* the kernel stack */
-#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER)
-#define THREAD_SHIFT (PAGE_SHIFT + THREAD_ORDER)
-
-#define alloc_thread_info(tsk) ((struct thread_info *) \
- __get_free_pages(GFP_KERNEL, THREAD_ORDER))
-#define free_thread_info(ti) free_pages((unsigned long) (ti), THREAD_ORDER)
+#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
+#define THREAD_SHIFT (PAGE_SHIFT + THREAD_SIZE_ORDER)
/* how to get the thread information struct from C */
#define current_thread_info() ((struct thread_info *)mfctl(30))
diff --git a/include/asm-powerpc/Kbuild b/include/asm-powerpc/Kbuild
index 04ce8f8..5ab7d7f 100644
--- a/include/asm-powerpc/Kbuild
+++ b/include/asm-powerpc/Kbuild
@@ -29,7 +29,6 @@
unifdef-y += nvram.h
unifdef-y += param.h
unifdef-y += posix_types.h
-unifdef-y += ptrace.h
unifdef-y += seccomp.h
unifdef-y += signal.h
unifdef-y += spu_info.h
diff --git a/include/asm-powerpc/gpio.h b/include/asm-powerpc/gpio.h
index 77ad3a8..ea04632 100644
--- a/include/asm-powerpc/gpio.h
+++ b/include/asm-powerpc/gpio.h
@@ -17,7 +17,7 @@
#include <linux/errno.h>
#include <asm-generic/gpio.h>
-#ifdef CONFIG_HAVE_GPIO_LIB
+#ifdef CONFIG_GPIOLIB
/*
* We don't (yet) implement inlined/rapid versions for on-chip gpios.
@@ -51,6 +51,6 @@
return -EINVAL;
}
-#endif /* CONFIG_HAVE_GPIO_LIB */
+#endif /* CONFIG_GPIOLIB */
#endif /* __ASM_POWERPC_GPIO_H */
diff --git a/include/asm-powerpc/thread_info.h b/include/asm-powerpc/thread_info.h
index b705c2a..a9db562 100644
--- a/include/asm-powerpc/thread_info.h
+++ b/include/asm-powerpc/thread_info.h
@@ -66,20 +66,12 @@
#if THREAD_SHIFT >= PAGE_SHIFT
-#define THREAD_ORDER (THREAD_SHIFT - PAGE_SHIFT)
-
-#ifdef CONFIG_DEBUG_STACK_USAGE
-#define alloc_thread_info(tsk) \
- ((struct thread_info *)__get_free_pages(GFP_KERNEL | \
- __GFP_ZERO, THREAD_ORDER))
-#else
-#define alloc_thread_info(tsk) \
- ((struct thread_info *)__get_free_pages(GFP_KERNEL, THREAD_ORDER))
-#endif
-#define free_thread_info(ti) free_pages((unsigned long)ti, THREAD_ORDER)
+#define THREAD_SIZE_ORDER (THREAD_SHIFT - PAGE_SHIFT)
#else /* THREAD_SHIFT < PAGE_SHIFT */
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
extern struct thread_info *alloc_thread_info(struct task_struct *tsk);
extern void free_thread_info(struct thread_info *ti);
diff --git a/include/asm-s390/kvm_virtio.h b/include/asm-s390/kvm_virtio.h
index 5c871a9..1461002 100644
--- a/include/asm-s390/kvm_virtio.h
+++ b/include/asm-s390/kvm_virtio.h
@@ -50,4 +50,14 @@
#define KVM_S390_VIRTIO_RESET 1
#define KVM_S390_VIRTIO_SET_STATUS 2
+#ifdef __KERNEL__
+/* early virtio console setup */
+#ifdef CONFIG_VIRTIO_CONSOLE
+extern void s390_virtio_console_init(void);
+#else
+static inline void s390_virtio_console_init(void)
+{
+}
+#endif /* CONFIG_VIRTIO_CONSOLE */
+#endif /* __KERNEL__ */
#endif
diff --git a/include/asm-s390/thread_info.h b/include/asm-s390/thread_info.h
index 99bbed9..91a8f93 100644
--- a/include/asm-s390/thread_info.h
+++ b/include/asm-s390/thread_info.h
@@ -78,10 +78,7 @@
return (struct thread_info *)((*(unsigned long *) __LC_KERNEL_STACK)-THREAD_SIZE);
}
-/* thread information allocation */
-#define alloc_thread_info(tsk) ((struct thread_info *) \
- __get_free_pages(GFP_KERNEL,THREAD_ORDER))
-#define free_thread_info(ti) free_pages((unsigned long) (ti),THREAD_ORDER)
+#define THREAD_SIZE_ORDER THREAD_ORDER
#endif
diff --git a/include/asm-sh/ptrace.h b/include/asm-sh/ptrace.h
index 8d6c92b..7d36dc3 100644
--- a/include/asm-sh/ptrace.h
+++ b/include/asm-sh/ptrace.h
@@ -5,7 +5,7 @@
* Copyright (C) 1999, 2000 Niibe Yutaka
*
*/
-#if defined(__SH5__) || defined(CONFIG_SUPERH64)
+#if defined(__SH5__)
struct pt_regs {
unsigned long long pc;
unsigned long long sr;
diff --git a/include/asm-sh/thread_info.h b/include/asm-sh/thread_info.h
index c50e5d3..5131e39 100644
--- a/include/asm-sh/thread_info.h
+++ b/include/asm-sh/thread_info.h
@@ -92,6 +92,8 @@
return ti;
}
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
/* thread information allocation */
#ifdef CONFIG_DEBUG_STACK_USAGE
#define alloc_thread_info(ti) kzalloc(THREAD_SIZE, GFP_KERNEL)
diff --git a/include/asm-sparc/thread_info_32.h b/include/asm-sparc/thread_info_32.h
index 91b9f58..2cf9db0 100644
--- a/include/asm-sparc/thread_info_32.h
+++ b/include/asm-sparc/thread_info_32.h
@@ -86,6 +86,8 @@
#define THREAD_INFO_ORDER 1
#endif
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
BTFIXUPDEF_CALL(struct thread_info *, alloc_thread_info, void)
#define alloc_thread_info(tsk) BTFIXUP_CALL(alloc_thread_info)()
diff --git a/include/asm-sparc/thread_info_64.h b/include/asm-sparc/thread_info_64.h
index c6d2e6c..960969d 100644
--- a/include/asm-sparc/thread_info_64.h
+++ b/include/asm-sparc/thread_info_64.h
@@ -155,6 +155,8 @@
#define __THREAD_INFO_ORDER 0
#endif /* PAGE_SHIFT == 13 */
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
#ifdef CONFIG_DEBUG_STACK_USAGE
#define alloc_thread_info(tsk) \
({ \
diff --git a/include/asm-um/thread_info.h b/include/asm-um/thread_info.h
index 356b83e..e07e728 100644
--- a/include/asm-um/thread_info.h
+++ b/include/asm-um/thread_info.h
@@ -53,21 +53,7 @@
return ti;
}
-#ifdef CONFIG_DEBUG_STACK_USAGE
-
-#define alloc_thread_info(tsk) \
- ((struct thread_info *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, \
- CONFIG_KERNEL_STACK_ORDER))
-#else
-
-/* thread information allocation */
-#define alloc_thread_info(tsk) \
- ((struct thread_info *) __get_free_pages(GFP_KERNEL, \
- CONFIG_KERNEL_STACK_ORDER))
-#endif
-
-#define free_thread_info(ti) \
- free_pages((unsigned long)(ti),CONFIG_KERNEL_STACK_ORDER)
+#define THREAD_SIZE_ORDER CONFIG_KERNEL_STACK_ORDER
#endif
diff --git a/include/asm-x86/Kbuild b/include/asm-x86/Kbuild
index 1e35545..00473f7 100644
--- a/include/asm-x86/Kbuild
+++ b/include/asm-x86/Kbuild
@@ -19,7 +19,6 @@
unifdef-y += mtrr.h
unifdef-y += posix_types_32.h
unifdef-y += posix_types_64.h
-unifdef-y += ptrace.h
unifdef-y += unistd_32.h
unifdef-y += unistd_64.h
unifdef-y += vm86.h
diff --git a/include/asm-x86/gpio.h b/include/asm-x86/gpio.h
index ff87fca..116e914 100644
--- a/include/asm-x86/gpio.h
+++ b/include/asm-x86/gpio.h
@@ -1,6 +1,62 @@
+/*
+ * Generic GPIO API implementation for x86.
+ *
+ * Derived from the generic GPIO API for powerpc:
+ *
+ * Copyright (c) 2007-2008 MontaVista Software, Inc.
+ *
+ * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
#ifndef _ASM_I386_GPIO_H
#define _ASM_I386_GPIO_H
+#ifdef CONFIG_X86_RDC321X
#include <gpio.h>
+#else /* CONFIG_X86_RDC321X */
+
+#include <asm-generic/gpio.h>
+
+#ifdef CONFIG_GPIOLIB
+
+/*
+ * Just call gpiolib.
+ */
+static inline int gpio_get_value(unsigned int gpio)
+{
+ return __gpio_get_value(gpio);
+}
+
+static inline void gpio_set_value(unsigned int gpio, int value)
+{
+ __gpio_set_value(gpio, value);
+}
+
+static inline int gpio_cansleep(unsigned int gpio)
+{
+ return __gpio_cansleep(gpio);
+}
+
+/*
+ * Not implemented, yet.
+ */
+static inline int gpio_to_irq(unsigned int gpio)
+{
+ return -ENOSYS;
+}
+
+static inline int irq_to_gpio(unsigned int irq)
+{
+ return -EINVAL;
+}
+
+#endif /* CONFIG_GPIOLIB */
+
+#endif /* CONFIG_X86_RDC321X */
#endif /* _ASM_I386_GPIO_H */
diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index 3f2de10..da0a675 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -152,6 +152,8 @@
#define THREAD_FLAGS GFP_KERNEL
#endif
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
#define alloc_thread_info(tsk) \
((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER))
diff --git a/include/asm-xtensa/ptrace.h b/include/asm-xtensa/ptrace.h
index 422c73e..089b0db 100644
--- a/include/asm-xtensa/ptrace.h
+++ b/include/asm-xtensa/ptrace.h
@@ -73,10 +73,10 @@
#define PTRACE_GETXTREGS 18
#define PTRACE_SETXTREGS 19
-#ifndef __ASSEMBLY__
-
#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+
/*
* This struct defines the way the registers are stored on the
* kernel stack during a system call or other kernel entry.
@@ -122,14 +122,14 @@
# ifndef CONFIG_SMP
# define profile_pc(regs) instruction_pointer(regs)
# endif
-#endif /* __KERNEL__ */
#else /* __ASSEMBLY__ */
-#ifdef __KERNEL__
# include <asm/asm-offsets.h>
#define PT_REGS_OFFSET (KERNEL_STACK_SIZE - PT_USER_SIZE)
-#endif
#endif /* !__ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
+
#endif /* _XTENSA_PTRACE_H */
diff --git a/include/asm-xtensa/thread_info.h b/include/asm-xtensa/thread_info.h
index a2c6406..7e4131d 100644
--- a/include/asm-xtensa/thread_info.h
+++ b/include/asm-xtensa/thread_info.h
@@ -111,10 +111,6 @@
return ti;
}
-/* thread information allocation */
-#define alloc_thread_info(tsk) ((struct thread_info *) __get_free_pages(GFP_KERNEL,1))
-#define free_thread_info(ti) free_pages((unsigned long) (ti), 1)
-
#else /* !__ASSEMBLY__ */
/* how to get the thread information struct from ASM */
@@ -160,6 +156,7 @@
#define TS_USEDFPU 0x0001 /* FPU was used by this task this quantum (SMP) */
#define THREAD_SIZE 8192 //(2*PAGE_SIZE)
+#define THREAD_SIZE_ORDER 1
#endif /* __KERNEL__ */
#endif /* _XTENSA_THREAD_INFO */
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 71d70d1..a18008c 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -189,7 +189,6 @@
unifdef-y += cuda.h
unifdef-y += cyclades.h
unifdef-y += dccp.h
-unifdef-y += dirent.h
unifdef-y += dlm.h
unifdef-y += dlm_plock.h
unifdef-y += edd.h
diff --git a/include/linux/acct.h b/include/linux/acct.h
index e8cae54..882dc72 100644
--- a/include/linux/acct.h
+++ b/include/linux/acct.h
@@ -120,17 +120,20 @@
struct vfsmount;
struct super_block;
struct pacct_struct;
+struct pid_namespace;
extern void acct_auto_close_mnt(struct vfsmount *m);
extern void acct_auto_close(struct super_block *sb);
extern void acct_init_pacct(struct pacct_struct *pacct);
extern void acct_collect(long exitcode, int group_dead);
extern void acct_process(void);
+extern void acct_exit_ns(struct pid_namespace *);
#else
#define acct_auto_close_mnt(x) do { } while (0)
#define acct_auto_close(x) do { } while (0)
#define acct_init_pacct(x) do { } while (0)
#define acct_collect(x,y) do { } while (0)
#define acct_process() do { } while (0)
+#define acct_exit_ns(ns) do { } while (0)
#endif
/*
diff --git a/include/linux/byteorder/big_endian.h b/include/linux/byteorder/big_endian.h
index 961ed4b..44f95b9 100644
--- a/include/linux/byteorder/big_endian.h
+++ b/include/linux/byteorder/big_endian.h
@@ -94,12 +94,12 @@
#define __le32_to_cpus(x) __swab32s((x))
#define __cpu_to_le16s(x) __swab16s((x))
#define __le16_to_cpus(x) __swab16s((x))
-#define __cpu_to_be64s(x) do {} while (0)
-#define __be64_to_cpus(x) do {} while (0)
-#define __cpu_to_be32s(x) do {} while (0)
-#define __be32_to_cpus(x) do {} while (0)
-#define __cpu_to_be16s(x) do {} while (0)
-#define __be16_to_cpus(x) do {} while (0)
+#define __cpu_to_be64s(x) do { (void)(x); } while (0)
+#define __be64_to_cpus(x) do { (void)(x); } while (0)
+#define __cpu_to_be32s(x) do { (void)(x); } while (0)
+#define __be32_to_cpus(x) do { (void)(x); } while (0)
+#define __cpu_to_be16s(x) do { (void)(x); } while (0)
+#define __be16_to_cpus(x) do { (void)(x); } while (0)
#ifdef __KERNEL__
#include <linux/byteorder/generic.h>
diff --git a/include/linux/byteorder/little_endian.h b/include/linux/byteorder/little_endian.h
index 05dc7c3..4cc170a 100644
--- a/include/linux/byteorder/little_endian.h
+++ b/include/linux/byteorder/little_endian.h
@@ -88,12 +88,12 @@
{
return __swab16p((__u16 *)p);
}
-#define __cpu_to_le64s(x) do {} while (0)
-#define __le64_to_cpus(x) do {} while (0)
-#define __cpu_to_le32s(x) do {} while (0)
-#define __le32_to_cpus(x) do {} while (0)
-#define __cpu_to_le16s(x) do {} while (0)
-#define __le16_to_cpus(x) do {} while (0)
+#define __cpu_to_le64s(x) do { (void)(x); } while (0)
+#define __le64_to_cpus(x) do { (void)(x); } while (0)
+#define __cpu_to_le32s(x) do { (void)(x); } while (0)
+#define __le32_to_cpus(x) do { (void)(x); } while (0)
+#define __cpu_to_le16s(x) do { (void)(x); } while (0)
+#define __le16_to_cpus(x) do { (void)(x); } while (0)
#define __cpu_to_be64s(x) __swab64s((x))
#define __be64_to_cpus(x) __swab64s((x))
#define __cpu_to_be32s(x) __swab32s((x))
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index e155aa7..c98dd7c 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -21,11 +21,13 @@
struct cgroupfs_root;
struct cgroup_subsys;
struct inode;
+struct cgroup;
extern int cgroup_init_early(void);
extern int cgroup_init(void);
extern void cgroup_init_smp(void);
extern void cgroup_lock(void);
+extern bool cgroup_lock_live_group(struct cgroup *cgrp);
extern void cgroup_unlock(void);
extern void cgroup_fork(struct task_struct *p);
extern void cgroup_fork_callbacks(struct task_struct *p);
@@ -205,50 +207,64 @@
* subsystem, followed by a period */
char name[MAX_CFTYPE_NAME];
int private;
- int (*open) (struct inode *inode, struct file *file);
- ssize_t (*read) (struct cgroup *cgrp, struct cftype *cft,
- struct file *file,
- char __user *buf, size_t nbytes, loff_t *ppos);
+
+ /*
+ * If non-zero, defines the maximum length of string that can
+ * be passed to write_string; defaults to 64
+ */
+ size_t max_write_len;
+
+ int (*open)(struct inode *inode, struct file *file);
+ ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft,
+ struct file *file,
+ char __user *buf, size_t nbytes, loff_t *ppos);
/*
* read_u64() is a shortcut for the common case of returning a
* single integer. Use it in place of read()
*/
- u64 (*read_u64) (struct cgroup *cgrp, struct cftype *cft);
+ u64 (*read_u64)(struct cgroup *cgrp, struct cftype *cft);
/*
* read_s64() is a signed version of read_u64()
*/
- s64 (*read_s64) (struct cgroup *cgrp, struct cftype *cft);
+ s64 (*read_s64)(struct cgroup *cgrp, struct cftype *cft);
/*
* read_map() is used for defining a map of key/value
* pairs. It should call cb->fill(cb, key, value) for each
* entry. The key/value pairs (and their ordering) should not
* change between reboots.
*/
- int (*read_map) (struct cgroup *cont, struct cftype *cft,
- struct cgroup_map_cb *cb);
+ int (*read_map)(struct cgroup *cont, struct cftype *cft,
+ struct cgroup_map_cb *cb);
/*
* read_seq_string() is used for outputting a simple sequence
* using seqfile.
*/
- int (*read_seq_string) (struct cgroup *cont, struct cftype *cft,
- struct seq_file *m);
+ int (*read_seq_string)(struct cgroup *cont, struct cftype *cft,
+ struct seq_file *m);
- ssize_t (*write) (struct cgroup *cgrp, struct cftype *cft,
- struct file *file,
- const char __user *buf, size_t nbytes, loff_t *ppos);
+ ssize_t (*write)(struct cgroup *cgrp, struct cftype *cft,
+ struct file *file,
+ const char __user *buf, size_t nbytes, loff_t *ppos);
/*
* write_u64() is a shortcut for the common case of accepting
* a single integer (as parsed by simple_strtoull) from
* userspace. Use in place of write(); return 0 or error.
*/
- int (*write_u64) (struct cgroup *cgrp, struct cftype *cft, u64 val);
+ int (*write_u64)(struct cgroup *cgrp, struct cftype *cft, u64 val);
/*
* write_s64() is a signed version of write_u64()
*/
- int (*write_s64) (struct cgroup *cgrp, struct cftype *cft, s64 val);
+ int (*write_s64)(struct cgroup *cgrp, struct cftype *cft, s64 val);
/*
+ * write_string() is passed a nul-terminated kernelspace
+ * buffer of maximum length determined by max_write_len.
+ * Returns 0 or -ve error code.
+ */
+ int (*write_string)(struct cgroup *cgrp, struct cftype *cft,
+ const char *buffer);
+ /*
* trigger() callback can be used to get some kick from the
* userspace, when the actual string written is not important
* at all. The private field can be used to determine the
@@ -256,7 +272,7 @@
*/
int (*trigger)(struct cgroup *cgrp, unsigned int event);
- int (*release) (struct inode *inode, struct file *file);
+ int (*release)(struct inode *inode, struct file *file);
};
struct cgroup_scanner {
@@ -348,7 +364,8 @@
return task_subsys_state(task, subsys_id)->cgroup;
}
-int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss);
+int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss,
+ char *nodename);
/* A cgroup_iter should be treated as an opaque object */
struct cgroup_iter {
diff --git a/include/linux/coda.h b/include/linux/coda.h
index b5cf078..96c8769 100644
--- a/include/linux/coda.h
+++ b/include/linux/coda.h
@@ -199,28 +199,6 @@
typedef u_int32_t vgid_t;
#endif /*_VUID_T_ */
-#ifdef CONFIG_CODA_FS_OLD_API
-struct CodaFid {
- u_int32_t opaque[3];
-};
-
-static __inline__ ino_t coda_f2i(struct CodaFid *fid)
-{
- if ( ! fid )
- return 0;
- if (fid->opaque[1] == 0xfffffffe || fid->opaque[1] == 0xffffffff)
- return ((fid->opaque[0] << 20) | (fid->opaque[2] & 0xfffff));
- else
- return (fid->opaque[2] + (fid->opaque[1]<<10) + (fid->opaque[0]<<20));
-}
-
-struct coda_cred {
- vuid_t cr_uid, cr_euid, cr_suid, cr_fsuid; /* Real, efftve, set, fs uid*/
- vgid_t cr_groupid, cr_egid, cr_sgid, cr_fsgid; /* same for groups */
-};
-
-#else /* not defined(CONFIG_CODA_FS_OLD_API) */
-
struct CodaFid {
u_int32_t opaque[4];
};
@@ -228,8 +206,6 @@
#define coda_f2i(fid)\
(fid ? (fid->opaque[3] ^ (fid->opaque[2]<<10) ^ (fid->opaque[1]<<20) ^ fid->opaque[0]) : 0)
-#endif
-
#ifndef _VENUS_VATTR_T_
#define _VENUS_VATTR_T_
/*
@@ -313,15 +289,7 @@
#define CIOC_KERNEL_VERSION _IOWR('c', 10, size_t)
-#if 0
-#define CODA_KERNEL_VERSION 0 /* don't care about kernel version number */
-#define CODA_KERNEL_VERSION 1 /* The old venus 4.6 compatible interface */
-#endif
-#ifdef CONFIG_CODA_FS_OLD_API
-#define CODA_KERNEL_VERSION 2 /* venus_lookup got an extra parameter */
-#else
#define CODA_KERNEL_VERSION 3 /* 128-bit file identifiers */
-#endif
/*
* Venus <-> Coda RPC arguments
@@ -329,16 +297,9 @@
struct coda_in_hdr {
u_int32_t opcode;
u_int32_t unique; /* Keep multiple outstanding msgs distinct */
-#ifdef CONFIG_CODA_FS_OLD_API
- u_int16_t pid; /* Common to all */
- u_int16_t pgid; /* Common to all */
- u_int16_t sid; /* Common to all */
- struct coda_cred cred; /* Common to all */
-#else
pid_t pid;
pid_t pgid;
vuid_t uid;
-#endif
};
/* Really important that opcode and unique are 1st two fields! */
@@ -613,11 +574,7 @@
/* CODA_PURGEUSER is a venus->kernel call */
struct coda_purgeuser_out {
struct coda_out_hdr oh;
-#ifdef CONFIG_CODA_FS_OLD_API
- struct coda_cred cred;
-#else
vuid_t uid;
-#endif
};
/* coda_zapfile: */
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 7464ba3..d7faf88 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -69,10 +69,11 @@
#endif
int cpu_up(unsigned int cpu);
-
extern void cpu_hotplug_init(void);
+extern void cpu_maps_update_begin(void);
+extern void cpu_maps_update_done(void);
-#else
+#else /* CONFIG_SMP */
static inline int register_cpu_notifier(struct notifier_block *nb)
{
@@ -87,10 +88,16 @@
{
}
+static inline void cpu_maps_update_begin(void)
+{
+}
+
+static inline void cpu_maps_update_done(void)
+{
+}
+
#endif /* CONFIG_SMP */
extern struct sysdev_class cpu_sysdev_class;
-extern void cpu_maps_update_begin(void);
-extern void cpu_maps_update_done(void);
#ifdef CONFIG_HOTPLUG_CPU
/* Stop CPUs going up and down. */
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 22c7ac5..6cd39a9 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -22,5 +22,13 @@
#define vmcore_elf_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x))
+static inline int is_kdump_kernel(void)
+{
+ return (elfcorehdr_addr != ELFCORE_ADDR_MAX) ? 1 : 0;
+}
+#else /* !CONFIG_CRASH_DUMP */
+static inline int is_kdump_kernel(void) { return 0; }
#endif /* CONFIG_CRASH_DUMP */
+
+extern unsigned long saved_max_pfn;
#endif /* LINUX_CRASHDUMP_H */
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index ab94bc0..f352f06 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -39,6 +39,8 @@
extern void __delayacct_blkio_end(void);
extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);
extern __u64 __delayacct_blkio_ticks(struct task_struct *);
+extern void __delayacct_freepages_start(void);
+extern void __delayacct_freepages_end(void);
static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
{
@@ -107,6 +109,18 @@
return 0;
}
+static inline void delayacct_freepages_start(void)
+{
+ if (current->delays)
+ __delayacct_freepages_start();
+}
+
+static inline void delayacct_freepages_end(void)
+{
+ if (current->delays)
+ __delayacct_freepages_end();
+}
+
#else
static inline void delayacct_set_flag(int flag)
{}
@@ -129,6 +143,11 @@
{ return 0; }
static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
{ return 0; }
+static inline void delayacct_freepages_start(void)
+{}
+static inline void delayacct_freepages_end(void)
+{}
+
#endif /* CONFIG_TASK_DELAY_ACCT */
#endif
diff --git a/include/linux/dirent.h b/include/linux/dirent.h
index 5d6023b..f072fb8 100644
--- a/include/linux/dirent.h
+++ b/include/linux/dirent.h
@@ -1,23 +1,6 @@
#ifndef _LINUX_DIRENT_H
#define _LINUX_DIRENT_H
-struct dirent {
- long d_ino;
- __kernel_off_t d_off;
- unsigned short d_reclen;
- char d_name[256]; /* We must not include limits.h! */
-};
-
-struct dirent64 {
- __u64 d_ino;
- __s64 d_off;
- unsigned short d_reclen;
- unsigned char d_type;
- char d_name[256];
-};
-
-#ifdef __KERNEL__
-
struct linux_dirent64 {
u64 d_ino;
s64 d_off;
@@ -26,7 +9,4 @@
char d_name[0];
};
-#endif /* __KERNEL__ */
-
-
#endif
diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
index 84cec2a..2efe7b8 100644
--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -284,8 +284,8 @@
#ifdef __hurd__
#define i_translator osd1.hurd1.h_i_translator
-#define i_frag osd2.hurd2.h_i_frag;
-#define i_fsize osd2.hurd2.h_i_fsize;
+#define i_frag osd2.hurd2.h_i_frag
+#define i_fsize osd2.hurd2.h_i_fsize
#define i_uid_high osd2.hurd2.h_i_uid_high
#define i_gid_high osd2.hurd2.h_i_gid_high
#define i_author osd2.hurd2.h_i_author
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 36c5403..80171ee 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -832,6 +832,7 @@
extern void ext3_dirty_inode(struct inode *);
extern int ext3_change_inode_journal_flag(struct inode *, int);
extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *);
+extern int ext3_can_truncate(struct inode *inode);
extern void ext3_truncate (struct inode *);
extern void ext3_set_inode_flags(struct inode *);
extern void ext3_get_inode_flags(struct ext3_inode_info *);
diff --git a/include/linux/fd1772.h b/include/linux/fd1772.h
deleted file mode 100644
index 871d6e4..0000000
--- a/include/linux/fd1772.h
+++ /dev/null
@@ -1,80 +0,0 @@
-#ifndef _LINUX_FD1772REG_H
-#define _LINUX_FD1772REG_H
-
-/*
-** WD1772 stuff - originally from the M68K Linux
- * Modified for Archimedes by Dave Gilbert (gilbertd@cs.man.ac.uk)
- */
-
-/* register codes */
-
-#define FDC1772SELREG_STP (0x80) /* command/status register */
-#define FDC1772SELREG_TRA (0x82) /* track register */
-#define FDC1772SELREG_SEC (0x84) /* sector register */
-#define FDC1772SELREG_DTA (0x86) /* data register */
-
-/* register names for FDC1772_READ/WRITE macros */
-
-#define FDC1772REG_CMD 0
-#define FDC1772REG_STATUS 0
-#define FDC1772REG_TRACK 2
-#define FDC1772REG_SECTOR 4
-#define FDC1772REG_DATA 6
-
-/* command opcodes */
-
-#define FDC1772CMD_RESTORE (0x00) /* - */
-#define FDC1772CMD_SEEK (0x10) /* | */
-#define FDC1772CMD_STEP (0x20) /* | TYP 1 Commands */
-#define FDC1772CMD_STIN (0x40) /* | */
-#define FDC1772CMD_STOT (0x60) /* - */
-#define FDC1772CMD_RDSEC (0x80) /* - TYP 2 Commands */
-#define FDC1772CMD_WRSEC (0xa0) /* - " */
-#define FDC1772CMD_RDADR (0xc0) /* - */
-#define FDC1772CMD_RDTRA (0xe0) /* | TYP 3 Commands */
-#define FDC1772CMD_WRTRA (0xf0) /* - */
-#define FDC1772CMD_FORCI (0xd0) /* - TYP 4 Command */
-
-/* command modifier bits */
-
-#define FDC1772CMDADD_SR6 (0x00) /* step rate settings */
-#define FDC1772CMDADD_SR12 (0x01)
-#define FDC1772CMDADD_SR2 (0x02)
-#define FDC1772CMDADD_SR3 (0x03)
-#define FDC1772CMDADD_V (0x04) /* verify */
-#define FDC1772CMDADD_H (0x08) /* wait for spin-up */
-#define FDC1772CMDADD_U (0x10) /* update track register */
-#define FDC1772CMDADD_M (0x10) /* multiple sector access */
-#define FDC1772CMDADD_E (0x04) /* head settling flag */
-#define FDC1772CMDADD_P (0x02) /* precompensation */
-#define FDC1772CMDADD_A0 (0x01) /* DAM flag */
-
-/* status register bits */
-
-#define FDC1772STAT_MOTORON (0x80) /* motor on */
-#define FDC1772STAT_WPROT (0x40) /* write protected (FDC1772CMD_WR*) */
-#define FDC1772STAT_SPINUP (0x20) /* motor speed stable (Type I) */
-#define FDC1772STAT_DELDAM (0x20) /* sector has deleted DAM (Type II+III) */
-#define FDC1772STAT_RECNF (0x10) /* record not found */
-#define FDC1772STAT_CRC (0x08) /* CRC error */
-#define FDC1772STAT_TR00 (0x04) /* Track 00 flag (Type I) */
-#define FDC1772STAT_LOST (0x04) /* Lost Data (Type II+III) */
-#define FDC1772STAT_IDX (0x02) /* Index status (Type I) */
-#define FDC1772STAT_DRQ (0x02) /* DRQ status (Type II+III) */
-#define FDC1772STAT_BUSY (0x01) /* FDC1772 is busy */
-
-
-/* PSG Port A Bit Nr 0 .. Side Sel .. 0 -> Side 1 1 -> Side 2 */
-#define DSKSIDE (0x01)
-
-#define DSKDRVNONE (0x06)
-#define DSKDRV0 (0x02)
-#define DSKDRV1 (0x04)
-
-/* step rates */
-#define FDC1772STEP_6 0x00
-#define FDC1772STEP_12 0x01
-#define FDC1772STEP_2 0x02
-#define FDC1772STEP_3 0x03
-
-#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4b86f80..49d8eb7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -886,6 +886,12 @@
#define FL_SLEEP 128 /* A blocking lock */
/*
+ * Special return value from posix_lock_file() and vfs_lock_file() for
+ * asynchronous locking.
+ */
+#define FILE_LOCK_DEFERRED 1
+
+/*
* The POSIX file lock owner is determined by
* the "struct files_struct" in the thread group
* (or NULL for no owner - BSD locks).
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index d482821..265635d 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -104,11 +104,14 @@
/**
* INIT request/reply flags
+ *
+ * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".."
*/
#define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1)
#define FUSE_FILE_OPS (1 << 2)
#define FUSE_ATOMIC_O_TRUNC (1 << 3)
+#define FUSE_EXPORT_SUPPORT (1 << 4)
#define FUSE_BIG_WRITES (1 << 5)
/**
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index e878741..118216f 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -541,7 +541,7 @@
extern char *disk_name (struct gendisk *hd, int part, char *buf);
extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev);
-extern void add_partition(struct gendisk *, int, sector_t, sector_t, int);
+extern int __must_check add_partition(struct gendisk *, int, sector_t, sector_t, int);
extern void delete_partition(struct gendisk *, int);
extern void printk_all_partitions(void);
diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 98be6c5..730a20b 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -79,6 +79,19 @@
WARN_ON(1);
}
+static inline int gpio_export(unsigned gpio, bool direction_may_change)
+{
+ /* GPIO can never have been requested or set as {in,out}put */
+ WARN_ON(1);
+ return -EINVAL;
+}
+
+static inline void gpio_unexport(unsigned gpio)
+{
+ /* GPIO can never have been exported */
+ WARN_ON(1);
+}
+
static inline int gpio_to_irq(unsigned gpio)
{
/* GPIO can never have been requested or set as input */
diff --git a/include/linux/i2c/max732x.h b/include/linux/i2c/max732x.h
new file mode 100644
index 0000000..e103366
--- /dev/null
+++ b/include/linux/i2c/max732x.h
@@ -0,0 +1,19 @@
+#ifndef __LINUX_I2C_MAX732X_H
+#define __LINUX_I2C_MAX732X_H
+
+/* platform data for the MAX732x 8/16-bit I/O expander driver */
+
+struct max732x_platform_data {
+ /* number of the first GPIO */
+ unsigned gpio_base;
+
+ void *context; /* param to setup/teardown */
+
+ int (*setup)(struct i2c_client *client,
+ unsigned gpio, unsigned ngpio,
+ void *context);
+ int (*teardown)(struct i2c_client *client,
+ unsigned gpio, unsigned ngpio,
+ void *context);
+};
+#endif /* __LINUX_I2C_MAX732X_H */
diff --git a/include/linux/idr.h b/include/linux/idr.h
index 9a2d762..fa035f9 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -15,6 +15,7 @@
#include <linux/types.h>
#include <linux/bitops.h>
#include <linux/init.h>
+#include <linux/rcupdate.h>
#if BITS_PER_LONG == 32
# define IDR_BITS 5
@@ -51,6 +52,7 @@
unsigned long bitmap; /* A zero bit means "space here" */
struct idr_layer *ary[1<<IDR_BITS];
int count; /* When zero, we can release it */
+ struct rcu_head rcu_head;
};
struct idr {
@@ -71,6 +73,28 @@
}
#define DEFINE_IDR(name) struct idr name = IDR_INIT(name)
+/* Actions to be taken after a call to _idr_sub_alloc */
+#define IDR_NEED_TO_GROW -2
+#define IDR_NOMORE_SPACE -3
+
+#define _idr_rc_to_errno(rc) ((rc) == -1 ? -EAGAIN : -ENOSPC)
+
+/**
+ * idr synchronization (stolen from radix-tree.h)
+ *
+ * idr_find() is able to be called locklessly, using RCU. The caller must
+ * ensure calls to this function are made within rcu_read_lock() regions.
+ * Other readers (lock-free or otherwise) and modifications may be running
+ * concurrently.
+ *
+ * It is still required that the caller manage the synchronization and
+ * lifetimes of the items. So if RCU lock-free lookups are used, typically
+ * this would mean that the items have their own locks, or are amenable to
+ * lock-free access; and that the items are freed by RCU (or only freed after
+ * having been deleted from the idr tree *and* a synchronize_rcu() grace
+ * period).
+ */
+
/*
* This is what we export.
*/
diff --git a/include/linux/init.h b/include/linux/init.h
index 21d658c..42ae954 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -275,13 +275,7 @@
#define security_initcall(fn) module_init(fn)
-/* These macros create a dummy inline: gcc 2.9x does not count alias
- as usage, hence the `unused function' warning when __init functions
- are declared static. We use the dummy __*_module_inline functions
- both to kill the warning and check the type of the init/cleanup
- function. */
-
-/* Each module must use one module_init(), or one no_module_init */
+/* Each module must use one module_init(). */
#define module_init(initfn) \
static inline initcall_t __inittest(void) \
{ return initfn; } \
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 93c45ac..021d8e7 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -122,7 +122,7 @@
.state = 0, \
.stack = &init_thread_info, \
.usage = ATOMIC_INIT(2), \
- .flags = 0, \
+ .flags = PF_KTHREAD, \
.lock_depth = -1, \
.prio = MAX_PRIO-20, \
.static_prio = MAX_PRIO-20, \
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index ea6c18a..ea330f9 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -36,6 +36,7 @@
int msg_ctlmni;
atomic_t msg_bytes;
atomic_t msg_hdrs;
+ int auto_msgmni;
size_t shm_ctlmax;
size_t shm_ctlall;
@@ -53,7 +54,7 @@
extern int register_ipcns_notifier(struct ipc_namespace *);
extern int cond_register_ipcns_notifier(struct ipc_namespace *);
-extern int unregister_ipcns_notifier(struct ipc_namespace *);
+extern void unregister_ipcns_notifier(struct ipc_namespace *);
extern int ipcns_notify(unsigned long);
#else /* CONFIG_SYSVIPC */
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 2b1c2e5..74bde13 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -11,6 +11,8 @@
#ifndef _LINUX_TRACE_IRQFLAGS_H
#define _LINUX_TRACE_IRQFLAGS_H
+#include <linux/typecheck.h>
+
#ifdef CONFIG_TRACE_IRQFLAGS
extern void trace_softirqs_on(unsigned long ip);
extern void trace_softirqs_off(unsigned long ip);
@@ -58,18 +60,24 @@
do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0)
#define local_irq_disable() \
do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0)
-#define local_irq_save(flags) \
- do { raw_local_irq_save(flags); trace_hardirqs_off(); } while (0)
+#define local_irq_save(flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ raw_local_irq_save(flags); \
+ trace_hardirqs_off(); \
+ } while (0)
-#define local_irq_restore(flags) \
- do { \
- if (raw_irqs_disabled_flags(flags)) { \
- raw_local_irq_restore(flags); \
- trace_hardirqs_off(); \
- } else { \
- trace_hardirqs_on(); \
- raw_local_irq_restore(flags); \
- } \
+
+#define local_irq_restore(flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ if (raw_irqs_disabled_flags(flags)) { \
+ raw_local_irq_restore(flags); \
+ trace_hardirqs_off(); \
+ } else { \
+ trace_hardirqs_on(); \
+ raw_local_irq_restore(flags); \
+ } \
} while (0)
#else /* !CONFIG_TRACE_IRQFLAGS_SUPPORT */
/*
@@ -78,8 +86,16 @@
*/
# define raw_local_irq_disable() local_irq_disable()
# define raw_local_irq_enable() local_irq_enable()
-# define raw_local_irq_save(flags) local_irq_save(flags)
-# define raw_local_irq_restore(flags) local_irq_restore(flags)
+# define raw_local_irq_save(flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ local_irq_save(flags); \
+ } while (0)
+# define raw_local_irq_restore(flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ local_irq_restore(flags); \
+ } while (0)
#endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */
#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
@@ -89,7 +105,11 @@
raw_safe_halt(); \
} while (0)
-#define local_save_flags(flags) raw_local_save_flags(flags)
+#define local_save_flags(flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ raw_local_save_flags(flags); \
+ } while (0)
#define irqs_disabled() \
({ \
@@ -99,7 +119,11 @@
raw_irqs_disabled_flags(_flags); \
})
-#define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags)
+#define irqs_disabled_flags(flags) \
+({ \
+ typecheck(unsigned long, flags); \
+ raw_irqs_disabled_flags(flags); \
+})
#endif /* CONFIG_X86 */
#endif
diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index 00c1801..57aefa1 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -6,6 +6,7 @@
#define _LINUX_KALLSYMS_H
#include <linux/errno.h>
+#include <linux/kernel.h>
#include <linux/stddef.h>
#define KSYM_NAME_LEN 128
@@ -105,18 +106,10 @@
print_symbol(fmt, (unsigned long)addr);
}
-#ifndef CONFIG_64BIT
-#define print_ip_sym(ip) \
-do { \
- printk("[<%08lx>]", ip); \
- print_symbol(" %s\n", ip); \
-} while(0)
-#else
-#define print_ip_sym(ip) \
-do { \
- printk("[<%016lx>]", ip); \
- print_symbol(" %s\n", ip); \
-} while(0)
-#endif
+static inline void print_ip_sym(unsigned long ip)
+{
+ printk("[<%p>]", (void *) ip);
+ print_symbol(" %s\n", ip);
+}
#endif /*_LINUX_KALLSYMS_H*/
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index f9cd7a5..fdbbf72 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -14,6 +14,8 @@
#include <linux/compiler.h>
#include <linux/bitops.h>
#include <linux/log2.h>
+#include <linux/typecheck.h>
+#include <linux/ratelimit.h>
#include <asm/byteorder.h>
#include <asm/bug.h>
@@ -188,11 +190,8 @@
asmlinkage int printk(const char * fmt, ...)
__attribute__ ((format (printf, 1, 2))) __cold;
-extern int printk_ratelimit_jiffies;
-extern int printk_ratelimit_burst;
+extern struct ratelimit_state printk_ratelimit_state;
extern int printk_ratelimit(void);
-extern int __ratelimit(int ratelimit_jiffies, int ratelimit_burst);
-extern int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst);
extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
unsigned int interval_msec);
#else
@@ -203,8 +202,6 @@
__attribute__ ((format (printf, 1, 2)));
static inline int __cold printk(const char *s, ...) { return 0; }
static inline int printk_ratelimit(void) { return 0; }
-static inline int __printk_ratelimit(int ratelimit_jiffies, \
- int ratelimit_burst) { return 0; }
static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \
unsigned int interval_msec) \
{ return false; }
@@ -441,26 +438,6 @@
const typeof( ((type *)0)->member ) *__mptr = (ptr); \
(type *)( (char *)__mptr - offsetof(type,member) );})
-/*
- * Check at compile time that something is of a particular type.
- * Always evaluates to 1 so you may use it easily in comparisons.
- */
-#define typecheck(type,x) \
-({ type __dummy; \
- typeof(x) __dummy2; \
- (void)(&__dummy == &__dummy2); \
- 1; \
-})
-
-/*
- * Check at compile time that 'function' is a certain type, or is a pointer
- * to that type (needs to use typedef for the function type.)
- */
-#define typecheck_fn(type,function) \
-({ typeof(type) __tmp = function; \
- (void)__tmp; \
-})
-
struct sysinfo;
extern int do_sysinfo(struct sysinfo *info);
diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index 0509c4c..a1a9157 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -19,6 +19,7 @@
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+#include <linux/gfp.h>
#include <linux/stddef.h>
#include <linux/errno.h>
#include <linux/compiler.h>
@@ -41,8 +42,8 @@
struct subprocess_info;
/* Allocate a subprocess_info structure */
-struct subprocess_info *call_usermodehelper_setup(char *path,
- char **argv, char **envp);
+struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
+ char **envp, gfp_t gfp_mask);
/* Set various pieces of state into the subprocess_info structure */
void call_usermodehelper_setkeys(struct subprocess_info *info,
@@ -69,8 +70,9 @@
call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait)
{
struct subprocess_info *info;
+ gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
- info = call_usermodehelper_setup(path, argv, envp);
+ info = call_usermodehelper_setup(path, argv, envp, gfp_mask);
if (info == NULL)
return -ENOMEM;
return call_usermodehelper_exec(info, wait);
@@ -81,8 +83,9 @@
struct key *session_keyring, enum umh_wait wait)
{
struct subprocess_info *info;
+ gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
- info = call_usermodehelper_setup(path, argv, envp);
+ info = call_usermodehelper_setup(path, argv, envp, gfp_mask);
if (info == NULL)
return -ENOMEM;
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 04a3556..0be7795 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -157,11 +157,10 @@
int nmissed;
size_t data_size;
struct hlist_head free_instances;
- struct hlist_head used_instances;
+ spinlock_t lock;
};
struct kretprobe_instance {
- struct hlist_node uflist; /* either on free list or used list */
struct hlist_node hlist;
struct kretprobe *rp;
kprobe_opcode_t *ret_addr;
@@ -201,7 +200,6 @@
}
#endif /* CONFIG_KPROBES_SANITY_TEST */
-extern spinlock_t kretprobe_lock;
extern struct mutex kprobe_mutex;
extern int arch_prepare_kprobe(struct kprobe *p);
extern void arch_arm_kprobe(struct kprobe *p);
@@ -214,6 +212,9 @@
/* Get the kprobe at this addr (if any) - called with preemption disabled */
struct kprobe *get_kprobe(void *addr);
+void kretprobe_hash_lock(struct task_struct *tsk,
+ struct hlist_head **head, unsigned long *flags);
+void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags);
struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk);
/* kprobe_running() will just return the current_kprobe on this CPU */
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 00dd957..aabc8a1 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -6,7 +6,8 @@
struct task_struct *kthread_create(int (*threadfn)(void *data),
void *data,
- const char namefmt[], ...);
+ const char namefmt[], ...)
+ __attribute__((format(printf, 3, 4)));
/**
* kthread_run - create and wake a thread.
diff --git a/include/linux/list.h b/include/linux/list.h
index 139ec41..453916b 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -61,14 +61,10 @@
* Insert a new entry after the specified head.
* This is good for implementing stacks.
*/
-#ifndef CONFIG_DEBUG_LIST
static inline void list_add(struct list_head *new, struct list_head *head)
{
__list_add(new, head, head->next);
}
-#else
-extern void list_add(struct list_head *new, struct list_head *head);
-#endif
/**
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e660877..fdf3967 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -35,7 +35,10 @@
extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask);
extern void mem_cgroup_uncharge_page(struct page *page);
+extern void mem_cgroup_uncharge_cache_page(struct page *page);
extern void mem_cgroup_move_lists(struct page *page, bool active);
+extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask);
+
extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
struct list_head *dst,
unsigned long *scanned, int order,
@@ -50,9 +53,9 @@
#define mm_match_cgroup(mm, cgroup) \
((cgroup) == mem_cgroup_from_task((mm)->owner))
-extern int mem_cgroup_prepare_migration(struct page *page);
+extern int
+mem_cgroup_prepare_migration(struct page *page, struct page *newpage);
extern void mem_cgroup_end_migration(struct page *page);
-extern void mem_cgroup_page_migration(struct page *page, struct page *newpage);
/*
* For memory reclaim.
@@ -97,6 +100,15 @@
{
}
+static inline void mem_cgroup_uncharge_cache_page(struct page *page)
+{
+}
+
+static inline int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
+{
+ return 0;
+}
+
static inline void mem_cgroup_move_lists(struct page *page, bool active)
{
}
@@ -112,7 +124,8 @@
return 1;
}
-static inline int mem_cgroup_prepare_migration(struct page *page)
+static inline int
+mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
{
return 0;
}
@@ -121,11 +134,6 @@
{
}
-static inline void
-mem_cgroup_page_migration(struct page *page, struct page *newpage)
-{
-}
-
static inline int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem)
{
return 0;
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 02a27ae..746f975 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -159,6 +159,17 @@
#endif
};
+struct core_thread {
+ struct task_struct *task;
+ struct core_thread *next;
+};
+
+struct core_state {
+ atomic_t nr_threads;
+ struct core_thread dumper;
+ struct completion startup;
+};
+
struct mm_struct {
struct vm_area_struct * mmap; /* list of VMAs */
struct rb_root mm_rb;
@@ -175,7 +186,6 @@
atomic_t mm_users; /* How many users with user space? */
atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */
int map_count; /* number of VMAs */
- int core_waiters;
struct rw_semaphore mmap_sem;
spinlock_t page_table_lock; /* Protects page tables and some counters */
@@ -219,8 +229,7 @@
unsigned long flags; /* Must use atomic bitops to access the bits */
- /* coredumping support */
- struct completion *core_startup_done, core_done;
+ struct core_state *core_state; /* coredumping support */
/* aio bits */
rwlock_t ioctx_list_lock; /* aio lock */
diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h
index 81cd36b..ba63858 100644
--- a/include/linux/msdos_fs.h
+++ b/include/linux/msdos_fs.h
@@ -2,11 +2,11 @@
#define _LINUX_MSDOS_FS_H
#include <linux/magic.h>
+#include <asm/byteorder.h>
/*
* The MS-DOS filesystem constants/structures
*/
-#include <asm/byteorder.h>
#define SECTOR_SIZE 512 /* sector size (bytes) */
#define SECTOR_BITS 9 /* log2(SECTOR_SIZE) */
@@ -89,24 +89,22 @@
#define IS_FSINFO(x) (le32_to_cpu((x)->signature1) == FAT_FSINFO_SIG1 \
&& le32_to_cpu((x)->signature2) == FAT_FSINFO_SIG2)
+struct __fat_dirent {
+ long d_ino;
+ __kernel_off_t d_off;
+ unsigned short d_reclen;
+ char d_name[256]; /* We must not include limits.h! */
+};
+
/*
* ioctl commands
*/
-#define VFAT_IOCTL_READDIR_BOTH _IOR('r', 1, struct dirent [2])
-#define VFAT_IOCTL_READDIR_SHORT _IOR('r', 2, struct dirent [2])
+#define VFAT_IOCTL_READDIR_BOTH _IOR('r', 1, struct __fat_dirent[2])
+#define VFAT_IOCTL_READDIR_SHORT _IOR('r', 2, struct __fat_dirent[2])
/* <linux/videotext.h> has used 0x72 ('r') in collision, so skip a few */
#define FAT_IOCTL_GET_ATTRIBUTES _IOR('r', 0x10, __u32)
#define FAT_IOCTL_SET_ATTRIBUTES _IOW('r', 0x11, __u32)
-/*
- * vfat shortname flags
- */
-#define VFAT_SFN_DISPLAY_LOWER 0x0001 /* convert to lowercase for display */
-#define VFAT_SFN_DISPLAY_WIN95 0x0002 /* emulate win95 rule for display */
-#define VFAT_SFN_DISPLAY_WINNT 0x0004 /* emulate winnt rule for display */
-#define VFAT_SFN_CREATE_WIN95 0x0100 /* emulate win95 rule for create */
-#define VFAT_SFN_CREATE_WINNT 0x0200 /* emulate winnt rule for create */
-
struct fat_boot_sector {
__u8 ignored[3]; /* Boot strap short or near jump */
__u8 system_id[8]; /* Name - can be used to special case
@@ -168,14 +166,6 @@
__u8 name11_12[4]; /* last 2 characters in name */
};
-struct fat_slot_info {
- loff_t i_pos; /* on-disk position of directory entry */
- loff_t slot_off; /* offset for slot or de start */
- int nr_slots; /* number of slots + 1(de) in filename */
- struct msdos_dir_entry *de;
- struct buffer_head *bh;
-};
-
#ifdef __KERNEL__
#include <linux/buffer_head.h>
@@ -184,6 +174,15 @@
#include <linux/fs.h>
#include <linux/mutex.h>
+/*
+ * vfat shortname flags
+ */
+#define VFAT_SFN_DISPLAY_LOWER 0x0001 /* convert to lowercase for display */
+#define VFAT_SFN_DISPLAY_WIN95 0x0002 /* emulate win95 rule for display */
+#define VFAT_SFN_DISPLAY_WINNT 0x0004 /* emulate winnt rule for display */
+#define VFAT_SFN_CREATE_WIN95 0x0100 /* emulate win95 rule for create */
+#define VFAT_SFN_CREATE_WINNT 0x0200 /* emulate winnt rule for create */
+
struct fat_mount_options {
uid_t fs_uid;
gid_t fs_gid;
@@ -202,10 +201,10 @@
utf8:1, /* Use of UTF-8 character set (Default) */
unicode_xlate:1, /* create escape sequences for unhandled Unicode */
numtail:1, /* Does first alias have a numeric '~1' type tail? */
- atari:1, /* Use Atari GEMDOS variation of MS-DOS fs */
flush:1, /* write things quickly */
nocase:1, /* Does this need case conversion? 0=need case conversion*/
- usefree:1; /* Use free_clusters for FAT32 */
+ usefree:1, /* Use free_clusters for FAT32 */
+ tz_utc:1; /* Filesystem timestamps are in UTC */
};
#define FAT_HASH_BITS 8
@@ -267,6 +266,14 @@
struct inode vfs_inode;
};
+struct fat_slot_info {
+ loff_t i_pos; /* on-disk position of directory entry */
+ loff_t slot_off; /* offset for slot or de start */
+ int nr_slots; /* number of slots + 1(de) in filename */
+ struct msdos_dir_entry *de;
+ struct buffer_head *bh;
+};
+
static inline struct msdos_sb_info *MSDOS_SB(struct super_block *sb)
{
return sb->s_fs_info;
@@ -428,8 +435,9 @@
extern void fat_fs_panic(struct super_block *s, const char *fmt, ...);
extern void fat_clusters_flush(struct super_block *sb);
extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster);
-extern int date_dos2unix(unsigned short time, unsigned short date);
-extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date);
+extern int date_dos2unix(unsigned short time, unsigned short date, int tz_utc);
+extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date,
+ int tz_utc);
extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs);
int fat_cache_init(void);
diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h
index f71201d..6316faf 100644
--- a/include/linux/mtd/ubi.h
+++ b/include/linux/mtd/ubi.h
@@ -45,13 +45,13 @@
* @size: how many physical eraseblocks are reserved for this volume
* @used_bytes: how many bytes of data this volume contains
* @used_ebs: how many physical eraseblocks of this volume actually contain any
- * data
+ * data
* @vol_type: volume type (%UBI_DYNAMIC_VOLUME or %UBI_STATIC_VOLUME)
* @corrupted: non-zero if the volume is corrupted (static volumes only)
* @upd_marker: non-zero if the volume has update marker set
* @alignment: volume alignment
* @usable_leb_size: how many bytes are available in logical eraseblocks of
- * this volume
+ * this volume
* @name_len: volume name length
* @name: volume name
* @cdev: UBI volume character device major and minor numbers
@@ -152,6 +152,7 @@
int ubi_leb_unmap(struct ubi_volume_desc *desc, int lnum);
int ubi_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype);
int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum);
+int ubi_sync(int ubi_num);
/*
* This function is the same as the 'ubi_leb_read()' function, but it does not
diff --git a/include/linux/net.h b/include/linux/net.h
index 2f999fb..4a9a30f 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -351,8 +351,7 @@
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
-extern int net_msg_cost;
-extern int net_msg_burst;
+extern struct ratelimit_state net_ratelimit_state;
#endif
#endif /* __KERNEL__ */
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index a2861d9..108f47e 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -12,7 +12,6 @@
#include <linux/types.h>
#include <linux/unistd.h>
-#include <linux/dirent.h>
#include <linux/fs.h>
#include <linux/posix_acl.h>
#include <linux/mount.h>
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index bd3d72d..da2698b 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -214,6 +214,8 @@
#define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */
#define CPU_DYING 0x0008 /* CPU (unsigned)v not running any task,
* not handling interrupts, soon dead */
+#define CPU_POST_DEAD 0x0009 /* CPU (unsigned)v dead, cpu_hotplug
+ * lock is dropped */
/* Used for CPU hotplug events occuring while tasks are frozen due to a suspend
* operation in progress
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index 0e66b57..c8a768e 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -82,9 +82,12 @@
}
#ifdef CONFIG_CGROUP_NS
-int ns_cgroup_clone(struct task_struct *tsk);
+int ns_cgroup_clone(struct task_struct *tsk, struct pid *pid);
#else
-static inline int ns_cgroup_clone(struct task_struct *tsk) { return 0; }
+static inline int ns_cgroup_clone(struct task_struct *tsk, struct pid *pid)
+{
+ return 0;
+}
#endif
#endif
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 119ae7b..c3b1761 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2400,6 +2400,9 @@
#define PCI_DEVICE_ID_INTEL_ICH10_4 0x3a30
#define PCI_DEVICE_ID_INTEL_ICH10_5 0x3a60
#define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f
+#define PCI_DEVICE_ID_INTEL_5100_16 0x65f0
+#define PCI_DEVICE_ID_INTEL_5100_21 0x65f5
+#define PCI_DEVICE_ID_INTEL_5100_22 0x65f6
#define PCI_DEVICE_ID_INTEL_5400_ERR 0x4030
#define PCI_DEVICE_ID_INTEL_5400_FBD0 0x4035
#define PCI_DEVICE_ID_INTEL_5400_FBD1 0x4036
diff --git a/include/linux/pid.h b/include/linux/pid.h
index c21c7e8..22921ac 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -48,7 +48,7 @@
*/
struct upid {
- /* Try to keep pid_chain in the same cacheline as nr for find_pid */
+ /* Try to keep pid_chain in the same cacheline as nr for find_vpid */
int nr;
struct pid_namespace *ns;
struct hlist_node pid_chain;
@@ -57,10 +57,10 @@
struct pid
{
atomic_t count;
+ unsigned int level;
/* lists of tasks that use this pid */
struct hlist_head tasks[PIDTYPE_MAX];
struct rcu_head rcu;
- unsigned int level;
struct upid numbers[1];
};
@@ -105,14 +105,12 @@
* or rcu_read_lock() held.
*
* find_pid_ns() finds the pid in the namespace specified
- * find_pid() find the pid by its global id, i.e. in the init namespace
* find_vpid() finr the pid by its virtual id, i.e. in the current namespace
*
- * see also find_task_by_pid() set in include/linux/sched.h
+ * see also find_task_by_vpid() set in include/linux/sched.h
*/
extern struct pid *find_pid_ns(int nr, struct pid_namespace *ns);
extern struct pid *find_vpid(int nr);
-extern struct pid *find_pid(int nr);
/*
* Lookup a PID in the hash table, and return with it's count elevated.
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index caff528..1af82c4 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -14,6 +14,8 @@
#define PIDMAP_ENTRIES ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8)
+struct bsd_acct_struct;
+
struct pid_namespace {
struct kref kref;
struct pidmap pidmap[PIDMAP_ENTRIES];
@@ -25,6 +27,9 @@
#ifdef CONFIG_PROC_FS
struct vfsmount *proc_mnt;
#endif
+#ifdef CONFIG_BSD_PROCESS_ACCT
+ struct bsd_acct_struct *bacct;
+#endif
};
extern struct pid_namespace init_pid_ns;
@@ -85,4 +90,7 @@
return tsk->nsproxy->pid_ns->child_reaper;
}
+void pidhash_init(void);
+void pidmap_init(void);
+
#endif /* _LINUX_PID_NS_H */
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 15a9eaf..f560d17 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -79,6 +79,7 @@
int pde_users; /* number of callers into module in progress */
spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
struct completion *pde_unload_completion;
+ struct list_head pde_openers; /* who did ->open, but not ->release */
};
struct kcore_list {
@@ -138,7 +139,6 @@
extern struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
extern const struct file_operations proc_kcore_operations;
-extern const struct file_operations proc_kmsg_operations;
extern const struct file_operations ppc_htab_operations;
extern int pid_ns_prepare_proc(struct pid_namespace *ns);
diff --git a/include/linux/profile.h b/include/linux/profile.h
index 05c1cc7..7e70872 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -8,8 +8,6 @@
#include <asm/errno.h>
-extern int prof_on __read_mostly;
-
#define CPU_PROFILING 1
#define SCHED_PROFILING 2
#define SLEEP_PROFILING 3
@@ -19,14 +17,31 @@
struct pt_regs;
struct notifier_block;
+#if defined(CONFIG_PROFILING) && defined(CONFIG_PROC_FS)
+void create_prof_cpu_mask(struct proc_dir_entry *de);
+#else
+static inline void create_prof_cpu_mask(struct proc_dir_entry *de)
+{
+}
+#endif
+
+enum profile_type {
+ PROFILE_TASK_EXIT,
+ PROFILE_MUNMAP
+};
+
+#ifdef CONFIG_PROFILING
+
+extern int prof_on __read_mostly;
+
/* init basic kernel profiler */
void __init profile_init(void);
-void profile_tick(int);
+void profile_tick(int type);
/*
* Add multiple profiler hits to a given address:
*/
-void profile_hits(int, void *ip, unsigned int nr_hits);
+void profile_hits(int type, void *ip, unsigned int nr_hits);
/*
* Single profiler hit:
@@ -40,19 +55,6 @@
profile_hits(type, ip, 1);
}
-#ifdef CONFIG_PROC_FS
-void create_prof_cpu_mask(struct proc_dir_entry *);
-#else
-#define create_prof_cpu_mask(x) do { (void)(x); } while (0)
-#endif
-
-enum profile_type {
- PROFILE_TASK_EXIT,
- PROFILE_MUNMAP
-};
-
-#ifdef CONFIG_PROFILING
-
struct task_struct;
struct mm_struct;
@@ -80,6 +82,28 @@
#else
+#define prof_on 0
+
+static inline void profile_init(void)
+{
+ return;
+}
+
+static inline void profile_tick(int type)
+{
+ return;
+}
+
+static inline void profile_hits(int type, void *ip, unsigned int nr_hits)
+{
+ return;
+}
+
+static inline void profile_hit(int type, void *ip)
+{
+ return;
+}
+
static inline int task_handoff_register(struct notifier_block * n)
{
return -ENOSYS;
diff --git a/include/linux/quota.h b/include/linux/quota.h
index dcddfb2..376a050 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -41,9 +41,6 @@
#define __DQUOT_VERSION__ "dquot_6.5.1"
#define __DQUOT_NUM_VERSION__ 6*10000+5*100+1
-typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
-typedef __u64 qsize_t; /* Type in which we store sizes */
-
/* Size of blocks in which are counted size limits */
#define QUOTABLOCK_BITS 10
#define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
@@ -138,6 +135,10 @@
#define QUOTA_NL_BHARDWARN 4 /* Block hardlimit reached */
#define QUOTA_NL_BSOFTLONGWARN 5 /* Block grace time expired */
#define QUOTA_NL_BSOFTWARN 6 /* Block softlimit reached */
+#define QUOTA_NL_IHARDBELOW 7 /* Usage got below inode hardlimit */
+#define QUOTA_NL_ISOFTBELOW 8 /* Usage got below inode softlimit */
+#define QUOTA_NL_BHARDBELOW 9 /* Usage got below block hardlimit */
+#define QUOTA_NL_BSOFTBELOW 10 /* Usage got below block softlimit */
enum {
QUOTA_NL_C_UNSPEC,
@@ -172,6 +173,9 @@
#include <asm/atomic.h>
+typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
+typedef __u64 qsize_t; /* Type in which we store sizes */
+
extern spinlock_t dq_data_lock;
/* Maximal numbers of writes for quota operation (insert/delete/update)
@@ -223,12 +227,10 @@
#define DQF_INFO_DIRTY (1 << DQF_INFO_DIRTY_B) /* Is info dirty? */
extern void mark_info_dirty(struct super_block *sb, int type);
-#define info_dirty(info) test_bit(DQF_INFO_DIRTY_B, &(info)->dqi_flags)
-#define info_any_dquot_dirty(info) (!list_empty(&(info)->dqi_dirty_list))
-#define info_any_dirty(info) (info_dirty(info) || info_any_dquot_dirty(info))
-
-#define sb_dqopt(sb) (&(sb)->s_dquot)
-#define sb_dqinfo(sb, type) (sb_dqopt(sb)->info+(type))
+static inline int info_dirty(struct mem_dqinfo *info)
+{
+ return test_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
+}
struct dqstats {
int lookups;
@@ -337,19 +339,6 @@
struct quota_format_ops *ops[MAXQUOTAS]; /* Operations for each type */
};
-#define sb_has_quota_enabled(sb, type) ((type)==USRQUOTA ? \
- (sb_dqopt(sb)->flags & DQUOT_USR_ENABLED) : (sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED))
-
-#define sb_any_quota_enabled(sb) (sb_has_quota_enabled(sb, USRQUOTA) | \
- sb_has_quota_enabled(sb, GRPQUOTA))
-
-#define sb_has_quota_suspended(sb, type) \
- ((type) == USRQUOTA ? (sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED) : \
- (sb_dqopt(sb)->flags & DQUOT_GRP_SUSPENDED))
-
-#define sb_any_quota_suspended(sb) (sb_has_quota_suspended(sb, USRQUOTA) | \
- sb_has_quota_suspended(sb, GRPQUOTA))
-
int register_quota_format(struct quota_format_type *fmt);
void unregister_quota_format(struct quota_format_type *fmt);
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index f867020..742187f 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -11,42 +11,85 @@
#define _LINUX_QUOTAOPS_
#include <linux/smp_lock.h>
-
#include <linux/fs.h>
+static inline struct quota_info *sb_dqopt(struct super_block *sb)
+{
+ return &sb->s_dquot;
+}
+
#if defined(CONFIG_QUOTA)
/*
* declaration of quota_function calls in kernel.
*/
-extern void sync_dquots(struct super_block *sb, int type);
+void sync_dquots(struct super_block *sb, int type);
-extern int dquot_initialize(struct inode *inode, int type);
-extern int dquot_drop(struct inode *inode);
+int dquot_initialize(struct inode *inode, int type);
+int dquot_drop(struct inode *inode);
-extern int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc);
-extern int dquot_alloc_inode(const struct inode *inode, unsigned long number);
+int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc);
+int dquot_alloc_inode(const struct inode *inode, unsigned long number);
-extern int dquot_free_space(struct inode *inode, qsize_t number);
-extern int dquot_free_inode(const struct inode *inode, unsigned long number);
+int dquot_free_space(struct inode *inode, qsize_t number);
+int dquot_free_inode(const struct inode *inode, unsigned long number);
-extern int dquot_transfer(struct inode *inode, struct iattr *iattr);
-extern int dquot_commit(struct dquot *dquot);
-extern int dquot_acquire(struct dquot *dquot);
-extern int dquot_release(struct dquot *dquot);
-extern int dquot_commit_info(struct super_block *sb, int type);
-extern int dquot_mark_dquot_dirty(struct dquot *dquot);
+int dquot_transfer(struct inode *inode, struct iattr *iattr);
+int dquot_commit(struct dquot *dquot);
+int dquot_acquire(struct dquot *dquot);
+int dquot_release(struct dquot *dquot);
+int dquot_commit_info(struct super_block *sb, int type);
+int dquot_mark_dquot_dirty(struct dquot *dquot);
-extern int vfs_quota_on(struct super_block *sb, int type, int format_id,
- char *path, int remount);
-extern int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
- int format_id, int type);
-extern int vfs_quota_off(struct super_block *sb, int type, int remount);
-extern int vfs_quota_sync(struct super_block *sb, int type);
-extern int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
-extern int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
-extern int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
-extern int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
+int vfs_quota_on(struct super_block *sb, int type, int format_id,
+ char *path, int remount);
+int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
+ int format_id, int type);
+int vfs_quota_off(struct super_block *sb, int type, int remount);
+int vfs_quota_sync(struct super_block *sb, int type);
+int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
+int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
+int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
+int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
+
+void vfs_dq_drop(struct inode *inode);
+int vfs_dq_transfer(struct inode *inode, struct iattr *iattr);
+int vfs_dq_quota_on_remount(struct super_block *sb);
+
+static inline struct mem_dqinfo *sb_dqinfo(struct super_block *sb, int type)
+{
+ return sb_dqopt(sb)->info + type;
+}
+
+/*
+ * Functions for checking status of quota
+ */
+
+static inline int sb_has_quota_enabled(struct super_block *sb, int type)
+{
+ if (type == USRQUOTA)
+ return sb_dqopt(sb)->flags & DQUOT_USR_ENABLED;
+ return sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED;
+}
+
+static inline int sb_any_quota_enabled(struct super_block *sb)
+{
+ return sb_has_quota_enabled(sb, USRQUOTA) ||
+ sb_has_quota_enabled(sb, GRPQUOTA);
+}
+
+static inline int sb_has_quota_suspended(struct super_block *sb, int type)
+{
+ if (type == USRQUOTA)
+ return sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED;
+ return sb_dqopt(sb)->flags & DQUOT_GRP_SUSPENDED;
+}
+
+static inline int sb_any_quota_suspended(struct super_block *sb)
+{
+ return sb_has_quota_suspended(sb, USRQUOTA) ||
+ sb_has_quota_suspended(sb, GRPQUOTA);
+}
/*
* Operations supported for diskquotas.
@@ -59,38 +102,16 @@
/* It is better to call this function outside of any transaction as it might
* need a lot of space in journal for dquot structure allocation. */
-static inline void DQUOT_INIT(struct inode *inode)
+static inline void vfs_dq_init(struct inode *inode)
{
BUG_ON(!inode->i_sb);
if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode))
inode->i_sb->dq_op->initialize(inode, -1);
}
-/* The same as with DQUOT_INIT */
-static inline void DQUOT_DROP(struct inode *inode)
-{
- /* Here we can get arbitrary inode from clear_inode() so we have
- * to be careful. OTOH we don't need locking as quota operations
- * are allowed to change only at mount time */
- if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op
- && inode->i_sb->dq_op->drop) {
- int cnt;
- /* Test before calling to rule out calls from proc and such
- * where we are not allowed to block. Note that this is
- * actually reliable test even without the lock - the caller
- * must assure that nobody can come after the DQUOT_DROP and
- * add quota pointers back anyway */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- if (inode->i_dquot[cnt] != NODQUOT)
- break;
- if (cnt < MAXQUOTAS)
- inode->i_sb->dq_op->drop(inode);
- }
-}
-
/* The following allocation/freeing/transfer functions *must* be called inside
* a transaction (deadlocks possible otherwise) */
-static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr)
{
if (sb_any_quota_enabled(inode->i_sb)) {
/* Used space is updated in alloc_space() */
@@ -102,15 +123,15 @@
return 0;
}
-static inline int DQUOT_PREALLOC_SPACE(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_space(struct inode *inode, qsize_t nr)
{
int ret;
- if (!(ret = DQUOT_PREALLOC_SPACE_NODIRTY(inode, nr)))
+ if (!(ret = vfs_dq_prealloc_space_nodirty(inode, nr)))
mark_inode_dirty(inode);
return ret;
}
-static inline int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_space_nodirty(struct inode *inode, qsize_t nr)
{
if (sb_any_quota_enabled(inode->i_sb)) {
/* Used space is updated in alloc_space() */
@@ -122,25 +143,25 @@
return 0;
}
-static inline int DQUOT_ALLOC_SPACE(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
{
int ret;
- if (!(ret = DQUOT_ALLOC_SPACE_NODIRTY(inode, nr)))
+ if (!(ret = vfs_dq_alloc_space_nodirty(inode, nr)))
mark_inode_dirty(inode);
return ret;
}
-static inline int DQUOT_ALLOC_INODE(struct inode *inode)
+static inline int vfs_dq_alloc_inode(struct inode *inode)
{
if (sb_any_quota_enabled(inode->i_sb)) {
- DQUOT_INIT(inode);
+ vfs_dq_init(inode);
if (inode->i_sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA)
return 1;
}
return 0;
}
-static inline void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
{
if (sb_any_quota_enabled(inode->i_sb))
inode->i_sb->dq_op->free_space(inode, nr);
@@ -148,35 +169,25 @@
inode_sub_bytes(inode, nr);
}
-static inline void DQUOT_FREE_SPACE(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr)
{
- DQUOT_FREE_SPACE_NODIRTY(inode, nr);
+ vfs_dq_free_space_nodirty(inode, nr);
mark_inode_dirty(inode);
}
-static inline void DQUOT_FREE_INODE(struct inode *inode)
+static inline void vfs_dq_free_inode(struct inode *inode)
{
if (sb_any_quota_enabled(inode->i_sb))
inode->i_sb->dq_op->free_inode(inode, 1);
}
-static inline int DQUOT_TRANSFER(struct inode *inode, struct iattr *iattr)
-{
- if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) {
- DQUOT_INIT(inode);
- if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
- return 1;
- }
- return 0;
-}
-
/* The following two functions cannot be called inside a transaction */
-static inline void DQUOT_SYNC(struct super_block *sb)
+static inline void vfs_dq_sync(struct super_block *sb)
{
sync_dquots(sb, -1);
}
-static inline int DQUOT_OFF(struct super_block *sb, int remount)
+static inline int vfs_dq_off(struct super_block *sb, int remount)
{
int ret = -ENOSYS;
@@ -185,22 +196,27 @@
return ret;
}
-static inline int DQUOT_ON_REMOUNT(struct super_block *sb)
-{
- int cnt;
- int ret = 0, err;
+#else
- if (!sb->s_qcop || !sb->s_qcop->quota_on)
- return -ENOSYS;
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- err = sb->s_qcop->quota_on(sb, cnt, 0, NULL, 1);
- if (err < 0 && !ret)
- ret = err;
- }
- return ret;
+static inline int sb_has_quota_enabled(struct super_block *sb, int type)
+{
+ return 0;
}
-#else
+static inline int sb_any_quota_enabled(struct super_block *sb)
+{
+ return 0;
+}
+
+static inline int sb_has_quota_suspended(struct super_block *sb, int type)
+{
+ return 0;
+}
+
+static inline int sb_any_quota_suspended(struct super_block *sb)
+{
+ return 0;
+}
/*
* NO-OP when quota not configured.
@@ -208,113 +224,144 @@
#define sb_dquot_ops (NULL)
#define sb_quotactl_ops (NULL)
-static inline void DQUOT_INIT(struct inode *inode)
+static inline void vfs_dq_init(struct inode *inode)
{
}
-static inline void DQUOT_DROP(struct inode *inode)
+static inline void vfs_dq_drop(struct inode *inode)
{
}
-static inline int DQUOT_ALLOC_INODE(struct inode *inode)
+static inline int vfs_dq_alloc_inode(struct inode *inode)
{
return 0;
}
-static inline void DQUOT_FREE_INODE(struct inode *inode)
+static inline void vfs_dq_free_inode(struct inode *inode)
{
}
-static inline void DQUOT_SYNC(struct super_block *sb)
+static inline void vfs_dq_sync(struct super_block *sb)
{
}
-static inline int DQUOT_OFF(struct super_block *sb, int remount)
+static inline int vfs_dq_off(struct super_block *sb, int remount)
{
return 0;
}
-static inline int DQUOT_ON_REMOUNT(struct super_block *sb)
+static inline int vfs_dq_quota_on_remount(struct super_block *sb)
{
return 0;
}
-static inline int DQUOT_TRANSFER(struct inode *inode, struct iattr *iattr)
+static inline int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
{
return 0;
}
-static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr)
{
inode_add_bytes(inode, nr);
return 0;
}
-static inline int DQUOT_PREALLOC_SPACE(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_space(struct inode *inode, qsize_t nr)
{
- DQUOT_PREALLOC_SPACE_NODIRTY(inode, nr);
+ vfs_dq_prealloc_space_nodirty(inode, nr);
mark_inode_dirty(inode);
return 0;
}
-static inline int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_space_nodirty(struct inode *inode, qsize_t nr)
{
inode_add_bytes(inode, nr);
return 0;
}
-static inline int DQUOT_ALLOC_SPACE(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
{
- DQUOT_ALLOC_SPACE_NODIRTY(inode, nr);
+ vfs_dq_alloc_space_nodirty(inode, nr);
mark_inode_dirty(inode);
return 0;
}
-static inline void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
{
inode_sub_bytes(inode, nr);
}
-static inline void DQUOT_FREE_SPACE(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr)
{
- DQUOT_FREE_SPACE_NODIRTY(inode, nr);
+ vfs_dq_free_space_nodirty(inode, nr);
mark_inode_dirty(inode);
}
#endif /* CONFIG_QUOTA */
-static inline int DQUOT_PREALLOC_BLOCK_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_block_nodirty(struct inode *inode, qsize_t nr)
{
- return DQUOT_PREALLOC_SPACE_NODIRTY(inode,
+ return vfs_dq_prealloc_space_nodirty(inode,
nr << inode->i_sb->s_blocksize_bits);
}
-static inline int DQUOT_PREALLOC_BLOCK(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_block(struct inode *inode, qsize_t nr)
{
- return DQUOT_PREALLOC_SPACE(inode,
+ return vfs_dq_prealloc_space(inode,
nr << inode->i_sb->s_blocksize_bits);
}
-static inline int DQUOT_ALLOC_BLOCK_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_block_nodirty(struct inode *inode, qsize_t nr)
{
- return DQUOT_ALLOC_SPACE_NODIRTY(inode,
+ return vfs_dq_alloc_space_nodirty(inode,
nr << inode->i_sb->s_blocksize_bits);
}
-static inline int DQUOT_ALLOC_BLOCK(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_block(struct inode *inode, qsize_t nr)
{
- return DQUOT_ALLOC_SPACE(inode,
+ return vfs_dq_alloc_space(inode,
nr << inode->i_sb->s_blocksize_bits);
}
-static inline void DQUOT_FREE_BLOCK_NODIRTY(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_block_nodirty(struct inode *inode, qsize_t nr)
{
- DQUOT_FREE_SPACE_NODIRTY(inode, nr << inode->i_sb->s_blocksize_bits);
+ vfs_dq_free_space_nodirty(inode, nr << inode->i_sb->s_blocksize_bits);
}
-static inline void DQUOT_FREE_BLOCK(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_block(struct inode *inode, qsize_t nr)
{
- DQUOT_FREE_SPACE(inode, nr << inode->i_sb->s_blocksize_bits);
+ vfs_dq_free_space(inode, nr << inode->i_sb->s_blocksize_bits);
}
+/*
+ * Define uppercase equivalents for compatibility with old function names
+ * Can go away when we think all users have been converted (15/04/2008)
+ */
+#define DQUOT_INIT(inode) vfs_dq_init(inode)
+#define DQUOT_DROP(inode) vfs_dq_drop(inode)
+#define DQUOT_PREALLOC_SPACE_NODIRTY(inode, nr) \
+ vfs_dq_prealloc_space_nodirty(inode, nr)
+#define DQUOT_PREALLOC_SPACE(inode, nr) vfs_dq_prealloc_space(inode, nr)
+#define DQUOT_ALLOC_SPACE_NODIRTY(inode, nr) \
+ vfs_dq_alloc_space_nodirty(inode, nr)
+#define DQUOT_ALLOC_SPACE(inode, nr) vfs_dq_alloc_space(inode, nr)
+#define DQUOT_PREALLOC_BLOCK_NODIRTY(inode, nr) \
+ vfs_dq_prealloc_block_nodirty(inode, nr)
+#define DQUOT_PREALLOC_BLOCK(inode, nr) vfs_dq_prealloc_block(inode, nr)
+#define DQUOT_ALLOC_BLOCK_NODIRTY(inode, nr) \
+ vfs_dq_alloc_block_nodirty(inode, nr)
+#define DQUOT_ALLOC_BLOCK(inode, nr) vfs_dq_alloc_block(inode, nr)
+#define DQUOT_ALLOC_INODE(inode) vfs_dq_alloc_inode(inode)
+#define DQUOT_FREE_SPACE_NODIRTY(inode, nr) \
+ vfs_dq_free_space_nodirty(inode, nr)
+#define DQUOT_FREE_SPACE(inode, nr) vfs_dq_free_space(inode, nr)
+#define DQUOT_FREE_BLOCK_NODIRTY(inode, nr) \
+ vfs_dq_free_block_nodirty(inode, nr)
+#define DQUOT_FREE_BLOCK(inode, nr) vfs_dq_free_block(inode, nr)
+#define DQUOT_FREE_INODE(inode) vfs_dq_free_inode(inode)
+#define DQUOT_TRANSFER(inode, iattr) vfs_dq_transfer(inode, iattr)
+#define DQUOT_SYNC(sb) vfs_dq_sync(sb)
+#define DQUOT_OFF(sb, remount) vfs_dq_off(sb, remount)
+#define DQUOT_ON_REMOUNT(sb) vfs_dq_quota_on_remount(sb)
+
#endif /* _LINUX_QUOTAOPS_ */
diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h
new file mode 100644
index 0000000..18a5b9b
--- /dev/null
+++ b/include/linux/ratelimit.h
@@ -0,0 +1,27 @@
+#ifndef _LINUX_RATELIMIT_H
+#define _LINUX_RATELIMIT_H
+#include <linux/param.h>
+
+#define DEFAULT_RATELIMIT_INTERVAL (5 * HZ)
+#define DEFAULT_RATELIMIT_BURST 10
+
+struct ratelimit_state {
+ int interval;
+ int burst;
+ int printed;
+ int missed;
+ unsigned long begin;
+};
+
+#define DEFINE_RATELIMIT_STATE(name, interval, burst) \
+ struct ratelimit_state name = {interval, burst,}
+
+extern int __ratelimit(struct ratelimit_state *rs);
+
+static inline int ratelimit(void)
+{
+ static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+ return __ratelimit(&rs);
+}
+#endif
diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h
index f04b64e..0967f03 100644
--- a/include/linux/rcupreempt.h
+++ b/include/linux/rcupreempt.h
@@ -115,16 +115,21 @@
static inline void rcu_enter_nohz(void)
{
+ static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1);
+
smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
__get_cpu_var(rcu_dyntick_sched).dynticks++;
- WARN_ON(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1);
+ WARN_ON_RATELIMIT(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1, &rs);
}
static inline void rcu_exit_nohz(void)
{
+ static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1);
+
smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
__get_cpu_var(rcu_dyntick_sched).dynticks++;
- WARN_ON(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1));
+ WARN_ON_RATELIMIT(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1),
+ &rs);
}
#else /* CONFIG_NO_HZ */
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 4aacaee..e9963af 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -526,8 +526,8 @@
** p is the array of __u32, i is the index into the array, v is the value
** to store there.
*/
-#define get_block_num(p, i) le32_to_cpu(get_unaligned((p) + (i)))
-#define put_block_num(p, i, v) put_unaligned(cpu_to_le32(v), (p) + (i))
+#define get_block_num(p, i) get_unaligned_le32((p) + (i))
+#define put_block_num(p, i, v) put_unaligned_le32((v), (p) + (i))
//
// in old version uniqueness field shows key type
diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h
index 336ee43..315517e 100644
--- a/include/linux/reiserfs_fs_sb.h
+++ b/include/linux/reiserfs_fs_sb.h
@@ -152,7 +152,7 @@
atomic_t j_nonzerolen;
atomic_t j_commit_left;
atomic_t j_older_commits_done; /* all commits older than this on disk */
- struct semaphore j_commit_lock;
+ struct mutex j_commit_mutex;
unsigned long j_trans_id;
time_t j_timestamp;
struct reiserfs_list_bitmap *j_list_bitmap;
@@ -193,8 +193,8 @@
struct buffer_head *j_header_bh;
time_t j_trans_start_time; /* time this transaction started */
- struct semaphore j_lock;
- struct semaphore j_flush_sem;
+ struct mutex j_mutex;
+ struct mutex j_flush_mutex;
wait_queue_head_t j_join_wait; /* wait for current transaction to finish before starting new one */
atomic_t j_jlock; /* lock for j_join_wait */
int j_list_bitmap_index; /* number of next list bitmap to use */
diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 6d9e1fc..fdeadd9 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -63,9 +63,14 @@
ssize_t res_counter_read(struct res_counter *counter, int member,
const char __user *buf, size_t nbytes, loff_t *pos,
int (*read_strategy)(unsigned long long val, char *s));
-ssize_t res_counter_write(struct res_counter *counter, int member,
- const char __user *buf, size_t nbytes, loff_t *pos,
- int (*write_strategy)(char *buf, unsigned long long *val));
+
+typedef int (*write_strategy_fn)(const char *buf, unsigned long long *val);
+
+int res_counter_memparse_write_strategy(const char *buf,
+ unsigned long long *res);
+
+int res_counter_write(struct res_counter *counter, int member,
+ const char *buffer, write_strategy_fn write_strategy);
/*
* the field descriptors. one for each member of res_counter
@@ -95,8 +100,10 @@
* counter->limit _locked call expects the counter->lock to be taken
*/
-int res_counter_charge_locked(struct res_counter *counter, unsigned long val);
-int res_counter_charge(struct res_counter *counter, unsigned long val);
+int __must_check res_counter_charge_locked(struct res_counter *counter,
+ unsigned long val);
+int __must_check res_counter_charge(struct res_counter *counter,
+ unsigned long val);
/*
* uncharge - tell that some portion of the resource is released
@@ -151,4 +158,20 @@
cnt->failcnt = 0;
spin_unlock_irqrestore(&cnt->lock, flags);
}
+
+static inline int res_counter_set_limit(struct res_counter *cnt,
+ unsigned long long limit)
+{
+ unsigned long flags;
+ int ret = -EBUSY;
+
+ spin_lock_irqsave(&cnt->lock, flags);
+ if (cnt->usage < limit) {
+ cnt->limit = limit;
+ ret = 0;
+ }
+ spin_unlock_irqrestore(&cnt->lock, flags);
+ return ret;
+}
+
#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6aca4a1..42036ff 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -506,6 +506,10 @@
unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
unsigned long inblock, oublock, cinblock, coublock;
+#ifdef CONFIG_TASK_XACCT
+ u64 rchar, wchar, syscr, syscw;
+#endif
+ struct task_io_accounting ioac;
/*
* Cumulative ns of scheduled CPU time for dead threads in the
@@ -668,6 +672,10 @@
/* io operations performed */
u32 swapin_count; /* total count of the number of swapin block */
/* io operations performed */
+
+ struct timespec freepages_start, freepages_end;
+ u64 freepages_delay; /* wait for memory reclaim */
+ u32 freepages_count; /* total count of memory reclaim */
};
#endif /* CONFIG_TASK_DELAY_ACCT */
@@ -1257,7 +1265,7 @@
#if defined(CONFIG_TASK_XACCT)
u64 acct_rss_mem1; /* accumulated rss usage */
u64 acct_vm_mem1; /* accumulated virtual memory usage */
- cputime_t acct_stimexpd;/* stime since last update */
+ cputime_t acct_timexpd; /* stime + utime since last update */
#endif
#ifdef CONFIG_CPUSETS
nodemask_t mems_allowed;
@@ -1496,7 +1504,7 @@
#define PF_KSWAPD 0x00040000 /* I am kswapd */
#define PF_SWAPOFF 0x00080000 /* I am in swapoff */
#define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */
-#define PF_BORROWED_MM 0x00200000 /* I am a kthread doing use_mm */
+#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
#define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */
#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
#define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */
@@ -1715,19 +1723,13 @@
* finds a task by its pid in the specified namespace
* find_task_by_vpid():
* finds a task by its virtual pid
- * find_task_by_pid():
- * finds a task by its global pid
*
- * see also find_pid() etc in include/linux/pid.h
+ * see also find_vpid() etc in include/linux/pid.h
*/
extern struct task_struct *find_task_by_pid_type_ns(int type, int pid,
struct pid_namespace *ns);
-static inline struct task_struct *__deprecated find_task_by_pid(pid_t nr)
-{
- return find_task_by_pid_type_ns(PIDTYPE_PID, nr, &init_pid_ns);
-}
extern struct task_struct *find_task_by_vpid(pid_t nr);
extern struct task_struct *find_task_by_pid_ns(pid_t nr,
struct pid_namespace *ns);
@@ -1800,7 +1802,6 @@
extern void force_sig_specific(int, struct task_struct *);
extern int send_sig(int, struct task_struct *, int);
extern void zap_other_threads(struct task_struct *p);
-extern int kill_proc(pid_t, int, int);
extern struct sigqueue *sigqueue_alloc(void);
extern void sigqueue_free(struct sigqueue *);
extern int send_sigqueue(struct sigqueue *, struct task_struct *, int group);
@@ -2054,9 +2055,6 @@
if (!signal_pending(p))
return 0;
- if (state & (__TASK_STOPPED | __TASK_TRACED))
- return 0;
-
return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
}
diff --git a/include/linux/sem.h b/include/linux/sem.h
index c8eaad9..1b191c1 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -78,6 +78,7 @@
#ifdef __KERNEL__
#include <asm/atomic.h>
+#include <linux/rcupdate.h>
struct task_struct;
@@ -93,23 +94,19 @@
time_t sem_otime; /* last semop time */
time_t sem_ctime; /* last change time */
struct sem *sem_base; /* ptr to first semaphore in array */
- struct sem_queue *sem_pending; /* pending operations to be processed */
- struct sem_queue **sem_pending_last; /* last pending operation */
- struct sem_undo *undo; /* undo requests on this array */
+ struct list_head sem_pending; /* pending operations to be processed */
+ struct list_head list_id; /* undo requests on this array */
unsigned long sem_nsems; /* no. of semaphores in array */
};
/* One queue for each sleeping process in the system. */
struct sem_queue {
- struct sem_queue * next; /* next entry in the queue */
- struct sem_queue ** prev; /* previous entry in the queue, *(q->prev) == q */
- struct task_struct* sleeper; /* this process */
- struct sem_undo * undo; /* undo structure */
+ struct list_head list; /* queue of pending operations */
+ struct task_struct *sleeper; /* this process */
+ struct sem_undo *undo; /* undo structure */
int pid; /* process id of requesting process */
int status; /* completion status of operation */
- struct sem_array * sma; /* semaphore array for operations */
- int id; /* internal sem id */
- struct sembuf * sops; /* array of pending operations */
+ struct sembuf *sops; /* array of pending operations */
int nsops; /* number of operations */
int alter; /* does the operation alter the array? */
};
@@ -118,8 +115,11 @@
* when the process exits.
*/
struct sem_undo {
- struct sem_undo * proc_next; /* next entry on this process */
- struct sem_undo * id_next; /* next entry on this semaphore set */
+ struct list_head list_proc; /* per-process list: all undos from one process. */
+ /* rcu protected */
+ struct rcu_head rcu; /* rcu struct for sem_undo() */
+ struct sem_undo_list *ulp; /* sem_undo_list for the process */
+ struct list_head list_id; /* per semaphore array list: all undos for one array */
int semid; /* semaphore set identifier */
short * semadj; /* array of adjustments, one per semaphore */
};
@@ -128,9 +128,9 @@
* that may be shared among all a CLONE_SYSVSEM task group.
*/
struct sem_undo_list {
- atomic_t refcnt;
- spinlock_t lock;
- struct sem_undo *proc_list;
+ atomic_t refcnt;
+ spinlock_t lock;
+ struct list_head list_proc;
};
struct sysv_sem {
diff --git a/include/linux/sm501.h b/include/linux/sm501.h
index b530fa6..214f932 100644
--- a/include/linux/sm501.h
+++ b/include/linux/sm501.h
@@ -46,24 +46,6 @@
unsigned long set,
unsigned long clear);
-/* sm501_gpio_set
- *
- * set the state of the given GPIO line
-*/
-
-extern void sm501_gpio_set(struct device *dev,
- unsigned long gpio,
- unsigned int to,
- unsigned int dir);
-
-/* sm501_gpio_get
- *
- * get the state of the given GPIO line
-*/
-
-extern unsigned long sm501_gpio_get(struct device *dev,
- unsigned long gpio);
-
/* Platform data definitions */
@@ -104,11 +86,19 @@
struct sm501_platdata_fbsub *fb_pnl;
};
-/* gpio i2c */
+/* gpio i2c
+ *
+ * Note, we have to pass in the bus number, as the number used will be
+ * passed to the i2c-gpio driver's platform_device.id, subsequently used
+ * to register the i2c bus.
+*/
struct sm501_platdata_gpio_i2c {
+ unsigned int bus_num;
unsigned int pin_sda;
unsigned int pin_scl;
+ int udelay;
+ int timeout;
};
/* sm501_initdata
@@ -131,6 +121,7 @@
#define SM501_USE_FBACCEL (1<<6)
#define SM501_USE_AC97 (1<<7)
#define SM501_USE_I2S (1<<8)
+#define SM501_USE_GPIO (1<<9)
#define SM501_USE_ALL (0xffffffff)
@@ -157,6 +148,8 @@
struct sm501_reg_init gpio_ddr_high;
};
+#define SM501_FLAG_SUSPEND_OFF (1<<4)
+
/* sm501_platdata
*
* This is passed with the platform device to allow the board
@@ -170,6 +163,12 @@
struct sm501_init_gpio *init_gpiop;
struct sm501_platdata_fb *fb;
+ int flags;
+ int gpio_base;
+
+ int (*get_power)(struct device *dev);
+ int (*set_power)(struct device *dev, unsigned int on);
+
struct sm501_platdata_gpio_i2c *gpio_i2c;
unsigned int gpio_i2c_nr;
};
diff --git a/include/linux/smb_fs.h b/include/linux/smb_fs.h
index 2c5cd55..923cd8a 100644
--- a/include/linux/smb_fs.h
+++ b/include/linux/smb_fs.h
@@ -43,18 +43,13 @@
}
/* macro names are short for word, double-word, long value (?) */
-#define WVAL(buf,pos) \
- (le16_to_cpu(get_unaligned((__le16 *)((u8 *)(buf) + (pos)))))
-#define DVAL(buf,pos) \
- (le32_to_cpu(get_unaligned((__le32 *)((u8 *)(buf) + (pos)))))
-#define LVAL(buf,pos) \
- (le64_to_cpu(get_unaligned((__le64 *)((u8 *)(buf) + (pos)))))
-#define WSET(buf,pos,val) \
- put_unaligned(cpu_to_le16((u16)(val)), (__le16 *)((u8 *)(buf) + (pos)))
-#define DSET(buf,pos,val) \
- put_unaligned(cpu_to_le32((u32)(val)), (__le32 *)((u8 *)(buf) + (pos)))
-#define LSET(buf,pos,val) \
- put_unaligned(cpu_to_le64((u64)(val)), (__le64 *)((u8 *)(buf) + (pos)))
+#define WVAL(buf, pos) (get_unaligned_le16((u8 *)(buf) + (pos)))
+#define DVAL(buf, pos) (get_unaligned_le32((u8 *)(buf) + (pos)))
+#define LVAL(buf, pos) (get_unaligned_le64((u8 *)(buf) + (pos)))
+
+#define WSET(buf, pos, val) put_unaligned_le16((val), (u8 *)(buf) + (pos))
+#define DSET(buf, pos, val) put_unaligned_le32((val), (u8 *)(buf) + (pos))
+#define LSET(buf, pos, val) put_unaligned_le64((val), (u8 *)(buf) + (pos))
/* where to find the base of the SMB packet proper */
#define smb_base(buf) ((u8 *)(((u8 *)(buf))+4))
diff --git a/include/linux/spi/mcp23s08.h b/include/linux/spi/mcp23s08.h
index 835ddf4..22ef107 100644
--- a/include/linux/spi/mcp23s08.h
+++ b/include/linux/spi/mcp23s08.h
@@ -1,18 +1,25 @@
-/* FIXME driver should be able to handle all four slaves that
- * can be hooked up to each chipselect, as well as IRQs...
- */
+/* FIXME driver should be able to handle IRQs... */
+
+struct mcp23s08_chip_info {
+ bool is_present; /* true iff populated */
+ u8 pullups; /* BIT(x) means enable pullup x */
+};
struct mcp23s08_platform_data {
- /* four slaves can share one SPI chipselect */
- u8 slave;
+ /* Four slaves (numbered 0..3) can share one SPI chipselect, and
+ * will provide 8..32 GPIOs using 1..4 gpio_chip instances.
+ */
+ struct mcp23s08_chip_info chip[4];
- /* number assigned to the first GPIO */
+ /* "base" is the number of the first GPIO. Dynamic assignment is
+ * not currently supported, and even if there are gaps in chip
+ * addressing the GPIO numbers are sequential .. so for example
+ * if only slaves 0 and 3 are present, their GPIOs range from
+ * base to base+15.
+ */
unsigned base;
- /* pins with pullups */
- u8 pullups;
-
void *context; /* param to setup/teardown */
int (*setup)(struct spi_device *spi,
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index d311a09..61e5610 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -46,6 +46,7 @@
* linux/spinlock.h: builds the final spin_*() APIs.
*/
+#include <linux/typecheck.h>
#include <linux/preempt.h>
#include <linux/linkage.h>
#include <linux/compiler.h>
@@ -191,23 +192,53 @@
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
-#define spin_lock_irqsave(lock, flags) flags = _spin_lock_irqsave(lock)
-#define read_lock_irqsave(lock, flags) flags = _read_lock_irqsave(lock)
-#define write_lock_irqsave(lock, flags) flags = _write_lock_irqsave(lock)
+#define spin_lock_irqsave(lock, flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ flags = _spin_lock_irqsave(lock); \
+ } while (0)
+#define read_lock_irqsave(lock, flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ flags = _read_lock_irqsave(lock); \
+ } while (0)
+#define write_lock_irqsave(lock, flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ flags = _write_lock_irqsave(lock); \
+ } while (0)
#ifdef CONFIG_DEBUG_LOCK_ALLOC
-#define spin_lock_irqsave_nested(lock, flags, subclass) \
- flags = _spin_lock_irqsave_nested(lock, subclass)
+#define spin_lock_irqsave_nested(lock, flags, subclass) \
+ do { \
+ typecheck(unsigned long, flags); \
+ flags = _spin_lock_irqsave_nested(lock, subclass); \
+ } while (0)
#else
-#define spin_lock_irqsave_nested(lock, flags, subclass) \
- flags = _spin_lock_irqsave(lock)
+#define spin_lock_irqsave_nested(lock, flags, subclass) \
+ do { \
+ typecheck(unsigned long, flags); \
+ flags = _spin_lock_irqsave(lock); \
+ } while (0)
#endif
#else
-#define spin_lock_irqsave(lock, flags) _spin_lock_irqsave(lock, flags)
-#define read_lock_irqsave(lock, flags) _read_lock_irqsave(lock, flags)
-#define write_lock_irqsave(lock, flags) _write_lock_irqsave(lock, flags)
+#define spin_lock_irqsave(lock, flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ _spin_lock_irqsave(lock, flags); \
+ } while (0)
+#define read_lock_irqsave(lock, flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ _read_lock_irqsave(lock, flags); \
+ } while (0)
+#define write_lock_irqsave(lock, flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ _write_lock_irqsave(lock, flags); \
+ } while (0)
#define spin_lock_irqsave_nested(lock, flags, subclass) \
spin_lock_irqsave(lock, flags)
@@ -260,16 +291,25 @@
} while (0)
#endif
-#define spin_unlock_irqrestore(lock, flags) \
- _spin_unlock_irqrestore(lock, flags)
+#define spin_unlock_irqrestore(lock, flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ _spin_unlock_irqrestore(lock, flags); \
+ } while (0)
#define spin_unlock_bh(lock) _spin_unlock_bh(lock)
-#define read_unlock_irqrestore(lock, flags) \
- _read_unlock_irqrestore(lock, flags)
+#define read_unlock_irqrestore(lock, flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ _read_unlock_irqrestore(lock, flags); \
+ } while (0)
#define read_unlock_bh(lock) _read_unlock_bh(lock)
-#define write_unlock_irqrestore(lock, flags) \
- _write_unlock_irqrestore(lock, flags)
+#define write_unlock_irqrestore(lock, flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ _write_unlock_irqrestore(lock, flags); \
+ } while (0)
#define write_unlock_bh(lock) _write_unlock_bh(lock)
#define spin_trylock_bh(lock) __cond_lock(lock, _spin_trylock_bh(lock))
diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
index 5d69c07..18269e9 100644
--- a/include/linux/taskstats.h
+++ b/include/linux/taskstats.h
@@ -31,7 +31,7 @@
*/
-#define TASKSTATS_VERSION 6
+#define TASKSTATS_VERSION 7
#define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN
* in linux/sched.h */
@@ -157,6 +157,10 @@
__u64 ac_utimescaled; /* utime scaled on frequency etc */
__u64 ac_stimescaled; /* stime scaled on frequency etc */
__u64 cpu_scaled_run_real_total; /* scaled cpu_run_real_total */
+
+ /* Delay waiting for memory reclaim */
+ __u64 freepages_count;
+ __u64 freepages_delay_total;
};
diff --git a/include/linux/typecheck.h b/include/linux/typecheck.h
new file mode 100644
index 0000000..eb5b74a
--- /dev/null
+++ b/include/linux/typecheck.h
@@ -0,0 +1,24 @@
+#ifndef TYPECHECK_H_INCLUDED
+#define TYPECHECK_H_INCLUDED
+
+/*
+ * Check at compile time that something is of a particular type.
+ * Always evaluates to 1 so you may use it easily in comparisons.
+ */
+#define typecheck(type,x) \
+({ type __dummy; \
+ typeof(x) __dummy2; \
+ (void)(&__dummy == &__dummy2); \
+ 1; \
+})
+
+/*
+ * Check at compile time that 'function' is a certain type, or is a pointer
+ * to that type (needs to use typedef for the function type.)
+ */
+#define typecheck_fn(type,function) \
+({ typeof(type) __tmp = function; \
+ (void)__tmp; \
+})
+
+#endif /* TYPECHECK_H_INCLUDED */
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 747c3a4..c932390 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -330,7 +330,7 @@
dev_vdbg(&(d)->gadget->dev , fmt , ## args)
#define ERROR(d, fmt, args...) \
dev_err(&(d)->gadget->dev , fmt , ## args)
-#define WARN(d, fmt, args...) \
+#define WARNING(d, fmt, args...) \
dev_warn(&(d)->gadget->dev , fmt , ## args)
#define INFO(d, fmt, args...) \
dev_info(&(d)->gadget->dev , fmt , ## args)
diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h
index 8eff0b5..b3c4a60 100644
--- a/include/linux/virtio_9p.h
+++ b/include/linux/virtio_9p.h
@@ -1,5 +1,7 @@
#ifndef _LINUX_VIRTIO_9P_H
#define _LINUX_VIRTIO_9P_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers. */
#include <linux/virtio_config.h>
/* The ID for virtio console */
diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h
index 979524e..c30c7bf 100644
--- a/include/linux/virtio_balloon.h
+++ b/include/linux/virtio_balloon.h
@@ -1,5 +1,7 @@
#ifndef _LINUX_VIRTIO_BALLOON_H
#define _LINUX_VIRTIO_BALLOON_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers. */
#include <linux/virtio_config.h>
/* The ID for virtio_balloon */
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index 5f79a5f..c1aef85 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -1,5 +1,7 @@
#ifndef _LINUX_VIRTIO_BLK_H
#define _LINUX_VIRTIO_BLK_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers. */
#include <linux/virtio_config.h>
/* The ID for virtio_block */
@@ -11,6 +13,7 @@
#define VIRTIO_BLK_F_SEG_MAX 2 /* Indicates maximum # of segments */
#define VIRTIO_BLK_F_GEOMETRY 4 /* Legacy geometry available */
#define VIRTIO_BLK_F_RO 5 /* Disk is read-only */
+#define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available*/
struct virtio_blk_config
{
@@ -26,6 +29,8 @@
__u8 heads;
__u8 sectors;
} geometry;
+ /* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */
+ __u32 blk_size;
} __attribute__((packed));
/* These two define direction. */
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index f364bbf..bf8ec28 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -1,5 +1,8 @@
#ifndef _LINUX_VIRTIO_CONFIG_H
#define _LINUX_VIRTIO_CONFIG_H
+/* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
+ * anyone can use the definitions to implement compatible drivers/servers. */
+
/* Virtio devices use a standardized configuration space to define their
* features and pass configuration information, but each implementation can
* store and access that space differently. */
@@ -15,6 +18,12 @@
/* We've given up on this device. */
#define VIRTIO_CONFIG_S_FAILED 0x80
+/* Some virtio feature bits (currently bits 28 through 31) are reserved for the
+ * transport being used (eg. virtio_ring), the rest are per-device feature
+ * bits. */
+#define VIRTIO_TRANSPORT_F_START 28
+#define VIRTIO_TRANSPORT_F_END 32
+
/* Do we get callbacks when the ring is completely used, even if we've
* suppressed them? */
#define VIRTIO_F_NOTIFY_ON_EMPTY 24
@@ -52,9 +61,10 @@
* @get_features: get the array of feature bits for this device.
* vdev: the virtio_device
* Returns the first 32 feature bits (all we currently need).
- * @set_features: confirm what device features we'll be using.
+ * @finalize_features: confirm what device features we'll be using.
* vdev: the virtio_device
- * feature: the first 32 feature bits
+ * This gives the final feature bits for the device: it can change
+ * the dev->feature bits if it wants.
*/
struct virtio_config_ops
{
@@ -70,7 +80,7 @@
void (*callback)(struct virtqueue *));
void (*del_vq)(struct virtqueue *vq);
u32 (*get_features)(struct virtio_device *vdev);
- void (*set_features)(struct virtio_device *vdev, u32 features);
+ void (*finalize_features)(struct virtio_device *vdev);
};
/* If driver didn't advertise the feature, it will never appear. */
diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h
index ed2d4ea..19a0da0 100644
--- a/include/linux/virtio_console.h
+++ b/include/linux/virtio_console.h
@@ -1,6 +1,8 @@
#ifndef _LINUX_VIRTIO_CONSOLE_H
#define _LINUX_VIRTIO_CONSOLE_H
#include <linux/virtio_config.h>
+/* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
+ * anyone can use the definitions to implement compatible drivers/servers. */
/* The ID for virtio console */
#define VIRTIO_ID_CONSOLE 3
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 38c0571..5e33761 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -1,5 +1,7 @@
#ifndef _LINUX_VIRTIO_NET_H
#define _LINUX_VIRTIO_NET_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers. */
#include <linux/virtio_config.h>
/* The ID for virtio_net */
diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h
index b315165..cdef357 100644
--- a/include/linux/virtio_pci.h
+++ b/include/linux/virtio_pci.h
@@ -9,9 +9,8 @@
* Authors:
* Anthony Liguori <aliguori@us.ibm.com>
*
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
+ * This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
*/
#ifndef _LINUX_VIRTIO_PCI_H
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index abe481e..c4a598f 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -120,6 +120,8 @@
void (*notify)(struct virtqueue *vq),
void (*callback)(struct virtqueue *vq));
void vring_del_virtqueue(struct virtqueue *vq);
+/* Filter out transport-specific feature bits. */
+void vring_transport_features(struct virtio_device *vdev);
irqreturn_t vring_interrupt(int irq, void *_vq);
#endif /* __KERNEL__ */
diff --git a/include/linux/virtio_rng.h b/include/linux/virtio_rng.h
index 331afb6..1a85dab 100644
--- a/include/linux/virtio_rng.h
+++ b/include/linux/virtio_rng.h
@@ -1,5 +1,7 @@
#ifndef _LINUX_VIRTIO_RNG_H
#define _LINUX_VIRTIO_RNG_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers. */
#include <linux/virtio_config.h>
/* The ID for virtio_rng */
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 14d4712..5c158c4 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -201,6 +201,8 @@
extern void init_workqueues(void);
int execute_in_process_context(work_func_t fn, struct execute_work *);
+extern int flush_work(struct work_struct *work);
+
extern int cancel_work_sync(struct work_struct *work);
/*
diff --git a/include/mtd/ubi-user.h b/include/mtd/ubi-user.h
index a7421f1..ccdc562 100644
--- a/include/mtd/ubi-user.h
+++ b/include/mtd/ubi-user.h
@@ -58,6 +58,13 @@
* device should be used. A &struct ubi_rsvol_req object has to be properly
* filled and a pointer to it has to be passed to the IOCTL.
*
+ * UBI volumes re-name
+ * ~~~~~~~~~~~~~~~~~~~
+ *
+ * To re-name several volumes atomically at one go, the %UBI_IOCRNVOL command
+ * of the UBI character device should be used. A &struct ubi_rnvol_req object
+ * has to be properly filled and a pointer to it has to be passed to the IOCTL.
+ *
* UBI volume update
* ~~~~~~~~~~~~~~~~~
*
@@ -104,6 +111,8 @@
#define UBI_IOCRMVOL _IOW(UBI_IOC_MAGIC, 1, int32_t)
/* Re-size an UBI volume */
#define UBI_IOCRSVOL _IOW(UBI_IOC_MAGIC, 2, struct ubi_rsvol_req)
+/* Re-name volumes */
+#define UBI_IOCRNVOL _IOW(UBI_IOC_MAGIC, 3, struct ubi_rnvol_req)
/* IOCTL commands of the UBI control character device */
@@ -128,6 +137,9 @@
/* Maximum MTD device name length supported by UBI */
#define MAX_UBI_MTD_NAME_LEN 127
+/* Maximum amount of UBI volumes that can be re-named at one go */
+#define UBI_MAX_RNVOL 32
+
/*
* UBI data type hint constants.
*
@@ -176,20 +188,20 @@
* it will be 512 in case of a 2KiB page NAND flash with 4 512-byte sub-pages.
*
* But in rare cases, if this optimizes things, the VID header may be placed to
- * a different offset. For example, the boot-loader might do things faster if the
- * VID header sits at the end of the first 2KiB NAND page with 4 sub-pages. As
- * the boot-loader would not normally need to read EC headers (unless it needs
- * UBI in RW mode), it might be faster to calculate ECC. This is weird example,
- * but it real-life example. So, in this example, @vid_hdr_offer would be
- * 2KiB-64 bytes = 1984. Note, that this position is not even 512-bytes
- * aligned, which is OK, as UBI is clever enough to realize this is 4th sub-page
- * of the first page and add needed padding.
+ * a different offset. For example, the boot-loader might do things faster if
+ * the VID header sits at the end of the first 2KiB NAND page with 4 sub-pages.
+ * As the boot-loader would not normally need to read EC headers (unless it
+ * needs UBI in RW mode), it might be faster to calculate ECC. This is weird
+ * example, but it real-life example. So, in this example, @vid_hdr_offer would
+ * be 2KiB-64 bytes = 1984. Note, that this position is not even 512-bytes
+ * aligned, which is OK, as UBI is clever enough to realize this is 4th
+ * sub-page of the first page and add needed padding.
*/
struct ubi_attach_req {
int32_t ubi_num;
int32_t mtd_num;
int32_t vid_hdr_offset;
- uint8_t padding[12];
+ int8_t padding[12];
};
/**
@@ -251,6 +263,48 @@
} __attribute__ ((packed));
/**
+ * struct ubi_rnvol_req - volumes re-name request.
+ * @count: count of volumes to re-name
+ * @padding1: reserved for future, not used, has to be zeroed
+ * @vol_id: ID of the volume to re-name
+ * @name_len: name length
+ * @padding2: reserved for future, not used, has to be zeroed
+ * @name: new volume name
+ *
+ * UBI allows to re-name up to %32 volumes at one go. The count of volumes to
+ * re-name is specified in the @count field. The ID of the volumes to re-name
+ * and the new names are specified in the @vol_id and @name fields.
+ *
+ * The UBI volume re-name operation is atomic, which means that should power cut
+ * happen, the volumes will have either old name or new name. So the possible
+ * use-cases of this command is atomic upgrade. Indeed, to upgrade, say, volumes
+ * A and B one may create temporary volumes %A1 and %B1 with the new contents,
+ * then atomically re-name A1->A and B1->B, in which case old %A and %B will
+ * be removed.
+ *
+ * If it is not desirable to remove old A and B, the re-name request has to
+ * contain 4 entries: A1->A, A->A1, B1->B, B->B1, in which case old A1 and B1
+ * become A and B, and old A and B will become A1 and B1.
+ *
+ * It is also OK to request: A1->A, A1->X, B1->B, B->Y, in which case old A1
+ * and B1 become A and B, and old A and B become X and Y.
+ *
+ * In other words, in case of re-naming into an existing volume name, the
+ * existing volume is removed, unless it is re-named as well at the same
+ * re-name request.
+ */
+struct ubi_rnvol_req {
+ int32_t count;
+ int8_t padding1[12];
+ struct {
+ int32_t vol_id;
+ int16_t name_len;
+ int8_t padding2[2];
+ char name[UBI_MAX_VOLUME_NAME + 1];
+ } ents[UBI_MAX_RNVOL];
+} __attribute__ ((packed));
+
+/**
* struct ubi_leb_change_req - a data structure used in atomic logical
* eraseblock change requests.
* @lnum: logical eraseblock number to change
@@ -261,8 +315,8 @@
struct ubi_leb_change_req {
int32_t lnum;
int32_t bytes;
- uint8_t dtype;
- uint8_t padding[7];
+ int8_t dtype;
+ int8_t padding[7];
} __attribute__ ((packed));
#endif /* __UBI_USER_H__ */
diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h
index dfd8bf6..d364fd5 100644
--- a/include/net/ieee80211_radiotap.h
+++ b/include/net/ieee80211_radiotap.h
@@ -262,7 +262,7 @@
struct ieee80211_radiotap_header *hdr =
(struct ieee80211_radiotap_header *)data;
- return le16_to_cpu(get_unaligned(&hdr->it_len));
+ return get_unaligned_le16(&hdr->it_len);
}
#endif /* IEEE80211_RADIOTAP_H */
diff --git a/init/do_mounts.c b/init/do_mounts.c
index a1de1bf..f769fac 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -12,6 +12,7 @@
#include <linux/device.h>
#include <linux/init.h>
#include <linux/fs.h>
+#include <linux/initrd.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_fs_sb.h>
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index 46dfd64..fedef93 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -10,8 +10,6 @@
#include "do_mounts.h"
-#define BUILD_CRAMDISK
-
int __initdata rd_prompt = 1;/* 1 = prompt for RAM disk, 0 = don't prompt */
static int __init prompt_ramdisk(char *str)
@@ -162,14 +160,8 @@
goto done;
if (nblocks == 0) {
-#ifdef BUILD_CRAMDISK
if (crd_load(in_fd, out_fd) == 0)
goto successful_load;
-#else
- printk(KERN_NOTICE
- "RAMDISK: Kernel does not support compressed "
- "RAM disk images\n");
-#endif
goto done;
}
@@ -267,8 +259,6 @@
return rd_load_image("/dev/root");
}
-#ifdef BUILD_CRAMDISK
-
/*
* gzip declarations
*/
@@ -313,33 +303,12 @@
static int __init fill_inbuf(void);
static void __init flush_window(void);
-static void __init *malloc(size_t size);
-static void __init free(void *where);
static void __init error(char *m);
-static void __init gzip_mark(void **);
-static void __init gzip_release(void **);
+
+#define NO_INFLATE_MALLOC
#include "../lib/inflate.c"
-static void __init *malloc(size_t size)
-{
- return kmalloc(size, GFP_KERNEL);
-}
-
-static void __init free(void *where)
-{
- kfree(where);
-}
-
-static void __init gzip_mark(void **ptr)
-{
-}
-
-static void __init gzip_release(void **ptr)
-{
-}
-
-
/* ===========================================================================
* Fill the input buffer. This is called only when the buffer is empty
* and at least one byte is really needed.
@@ -425,5 +394,3 @@
kfree(window);
return result;
}
-
-#endif /* BUILD_CRAMDISK */
diff --git a/init/initramfs.c b/init/initramfs.c
index 8eeeccb..644fc01 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -14,16 +14,6 @@
message = x;
}
-static void __init *malloc(size_t size)
-{
- return kmalloc(size, GFP_KERNEL);
-}
-
-static void __init free(void *where)
-{
- kfree(where);
-}
-
/* link hash */
#define N_ALIGN(len) ((((len) + 1) & ~3) + 2)
@@ -407,19 +397,11 @@
static void __init flush_window(void);
static void __init error(char *m);
-static void __init gzip_mark(void **);
-static void __init gzip_release(void **);
+
+#define NO_INFLATE_MALLOC
#include "../lib/inflate.c"
-static void __init gzip_mark(void **ptr)
-{
-}
-
-static void __init gzip_release(void **ptr)
-{
-}
-
/* ===========================================================================
* Write the output window window[0..outcnt-1] and update crc and bytes_out.
* (Used for the decompressed data only.)
diff --git a/init/main.c b/init/main.c
index 2769dc0..0604cbc 100644
--- a/init/main.c
+++ b/init/main.c
@@ -87,8 +87,6 @@
extern void fork_init(unsigned long);
extern void mca_init(void);
extern void sbus_init(void);
-extern void pidhash_init(void);
-extern void pidmap_init(void);
extern void prio_tree_init(void);
extern void radix_tree_init(void);
extern void free_initmem(void);
diff --git a/init/version.c b/init/version.c
index 9d17d70..52a8b98 100644
--- a/init/version.c
+++ b/init/version.c
@@ -13,10 +13,13 @@
#include <linux/utsrelease.h>
#include <linux/version.h>
+#ifndef CONFIG_KALLSYMS
#define version(a) Version_ ## a
#define version_string(a) version(a)
+extern int version_string(LINUX_VERSION_CODE);
int version_string(LINUX_VERSION_CODE);
+#endif
struct uts_namespace init_uts_ns = {
.kref = {
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index d349746..69bc859 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -27,15 +27,17 @@
}
/*
- * Routine that is called when a tunable has successfully been changed by
- * hand and it has a callback routine registered on the ipc namespace notifier
- * chain: we don't want such tunables to be recomputed anymore upon memory
- * add/remove or ipc namespace creation/removal.
- * They can come back to a recomputable state by being set to a <0 value.
+ * Routine that is called when the file "auto_msgmni" has successfully been
+ * written.
+ * Two values are allowed:
+ * 0: unregister msgmni's callback routine from the ipc namespace notifier
+ * chain. This means that msgmni won't be recomputed anymore upon memory
+ * add/remove or ipc namespace creation/removal.
+ * 1: register back the callback routine.
*/
-static void tunable_set_callback(int val)
+static void ipc_auto_callback(int val)
{
- if (val >= 0)
+ if (!val)
unregister_ipcns_notifier(current->nsproxy->ipc_ns);
else {
/*
@@ -71,7 +73,12 @@
rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos);
if (write && !rc && lenp_bef == *lenp)
- tunable_set_callback(*((int *)(ipc_table.data)));
+ /*
+ * Tunable has successfully been changed by hand. Disable its
+ * automatic adjustment. This simply requires unregistering
+ * the notifiers that trigger recalculation.
+ */
+ unregister_ipcns_notifier(current->nsproxy->ipc_ns);
return rc;
}
@@ -87,10 +94,39 @@
lenp, ppos);
}
+static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
+ struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct ctl_table ipc_table;
+ size_t lenp_bef = *lenp;
+ int oldval;
+ int rc;
+
+ memcpy(&ipc_table, table, sizeof(ipc_table));
+ ipc_table.data = get_ipc(table);
+ oldval = *((int *)(ipc_table.data));
+
+ rc = proc_dointvec_minmax(&ipc_table, write, filp, buffer, lenp, ppos);
+
+ if (write && !rc && lenp_bef == *lenp) {
+ int newval = *((int *)(ipc_table.data));
+ /*
+ * The file "auto_msgmni" has correctly been set.
+ * React by (un)registering the corresponding tunable, if the
+ * value has changed.
+ */
+ if (newval != oldval)
+ ipc_auto_callback(newval);
+ }
+
+ return rc;
+}
+
#else
#define proc_ipc_doulongvec_minmax NULL
#define proc_ipc_dointvec NULL
#define proc_ipc_callback_dointvec NULL
+#define proc_ipcauto_dointvec_minmax NULL
#endif
#ifdef CONFIG_SYSCTL_SYSCALL
@@ -142,14 +178,11 @@
rc = sysctl_ipc_data(table, name, nlen, oldval, oldlenp, newval,
newlen);
- if (newval && newlen && rc > 0) {
+ if (newval && newlen && rc > 0)
/*
* Tunable has successfully been changed from userland
*/
- int *data = get_ipc(table);
-
- tunable_set_callback(*data);
- }
+ unregister_ipcns_notifier(current->nsproxy->ipc_ns);
return rc;
}
@@ -158,6 +191,9 @@
#define sysctl_ipc_registered_data NULL
#endif
+static int zero;
+static int one = 1;
+
static struct ctl_table ipc_kern_table[] = {
{
.ctl_name = KERN_SHMMAX,
@@ -222,6 +258,16 @@
.proc_handler = proc_ipc_dointvec,
.strategy = sysctl_ipc_data,
},
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "auto_msgmni",
+ .data = &init_ipc_ns.auto_msgmni,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_ipcauto_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
{}
};
diff --git a/ipc/ipcns_notifier.c b/ipc/ipcns_notifier.c
index 70ff091..b9b31a4 100644
--- a/ipc/ipcns_notifier.c
+++ b/ipc/ipcns_notifier.c
@@ -55,25 +55,35 @@
int register_ipcns_notifier(struct ipc_namespace *ns)
{
+ int rc;
+
memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb));
ns->ipcns_nb.notifier_call = ipcns_callback;
ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI;
- return blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb);
+ rc = blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb);
+ if (!rc)
+ ns->auto_msgmni = 1;
+ return rc;
}
int cond_register_ipcns_notifier(struct ipc_namespace *ns)
{
+ int rc;
+
memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb));
ns->ipcns_nb.notifier_call = ipcns_callback;
ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI;
- return blocking_notifier_chain_cond_register(&ipcns_chain,
+ rc = blocking_notifier_chain_cond_register(&ipcns_chain,
&ns->ipcns_nb);
+ if (!rc)
+ ns->auto_msgmni = 1;
+ return rc;
}
-int unregister_ipcns_notifier(struct ipc_namespace *ns)
+void unregister_ipcns_notifier(struct ipc_namespace *ns)
{
- return blocking_notifier_chain_unregister(&ipcns_chain,
- &ns->ipcns_nb);
+ blocking_notifier_chain_unregister(&ipcns_chain, &ns->ipcns_nb);
+ ns->auto_msgmni = 0;
}
int ipcns_notify(unsigned long val)
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 3e84b95..1fdc2eb 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -314,15 +314,11 @@
* through std routines)
*/
static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
- size_t count, loff_t * off)
+ size_t count, loff_t *off)
{
struct mqueue_inode_info *info = MQUEUE_I(filp->f_path.dentry->d_inode);
char buffer[FILENT_SIZE];
- size_t slen;
- loff_t o;
-
- if (!count)
- return 0;
+ ssize_t ret;
spin_lock(&info->lock);
snprintf(buffer, sizeof(buffer),
@@ -335,21 +331,14 @@
pid_vnr(info->notify_owner));
spin_unlock(&info->lock);
buffer[sizeof(buffer)-1] = '\0';
- slen = strlen(buffer)+1;
- o = *off;
- if (o > slen)
- return 0;
+ ret = simple_read_from_buffer(u_data, count, off, buffer,
+ strlen(buffer));
+ if (ret <= 0)
+ return ret;
- if (o + count > slen)
- count = slen - o;
-
- if (copy_to_user(u_data, buffer + o, count))
- return -EFAULT;
-
- *off = o + count;
filp->f_path.dentry->d_inode->i_atime = filp->f_path.dentry->d_inode->i_ctime = CURRENT_TIME;
- return count;
+ return ret;
}
static int mqueue_flush_file(struct file *filp, fl_owner_t id)
diff --git a/ipc/sem.c b/ipc/sem.c
index e9418df..bf1bc36 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -272,9 +272,8 @@
ns->used_sems += nsems;
sma->sem_base = (struct sem *) &sma[1];
- /* sma->sem_pending = NULL; */
- sma->sem_pending_last = &sma->sem_pending;
- /* sma->undo = NULL; */
+ INIT_LIST_HEAD(&sma->sem_pending);
+ INIT_LIST_HEAD(&sma->list_id);
sma->sem_nsems = nsems;
sma->sem_ctime = get_seconds();
sem_unlock(sma);
@@ -331,38 +330,6 @@
return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
}
-/* Manage the doubly linked list sma->sem_pending as a FIFO:
- * insert new queue elements at the tail sma->sem_pending_last.
- */
-static inline void append_to_queue (struct sem_array * sma,
- struct sem_queue * q)
-{
- *(q->prev = sma->sem_pending_last) = q;
- *(sma->sem_pending_last = &q->next) = NULL;
-}
-
-static inline void prepend_to_queue (struct sem_array * sma,
- struct sem_queue * q)
-{
- q->next = sma->sem_pending;
- *(q->prev = &sma->sem_pending) = q;
- if (q->next)
- q->next->prev = &q->next;
- else /* sma->sem_pending_last == &sma->sem_pending */
- sma->sem_pending_last = &q->next;
-}
-
-static inline void remove_from_queue (struct sem_array * sma,
- struct sem_queue * q)
-{
- *(q->prev) = q->next;
- if (q->next)
- q->next->prev = q->prev;
- else /* sma->sem_pending_last == &q->next */
- sma->sem_pending_last = q->prev;
- q->prev = NULL; /* mark as removed */
-}
-
/*
* Determine whether a sequence of semaphore operations would succeed
* all at once. Return 0 if yes, 1 if need to sleep, else return error code.
@@ -438,16 +405,15 @@
int error;
struct sem_queue * q;
- q = sma->sem_pending;
- while(q) {
+ q = list_entry(sma->sem_pending.next, struct sem_queue, list);
+ while (&q->list != &sma->sem_pending) {
error = try_atomic_semop(sma, q->sops, q->nsops,
q->undo, q->pid);
/* Does q->sleeper still need to sleep? */
if (error <= 0) {
struct sem_queue *n;
- remove_from_queue(sma,q);
- q->status = IN_WAKEUP;
+
/*
* Continue scanning. The next operation
* that must be checked depends on the type of the
@@ -458,11 +424,26 @@
* for semaphore values to become 0.
* - if the operation didn't modify the array,
* then just continue.
+ * The order of list_del() and reading ->next
+ * is crucial: In the former case, the list_del()
+ * must be done first [because we might be the
+ * first entry in ->sem_pending], in the latter
+ * case the list_del() must be done last
+ * [because the list is invalid after the list_del()]
*/
- if (q->alter)
- n = sma->sem_pending;
- else
- n = q->next;
+ if (q->alter) {
+ list_del(&q->list);
+ n = list_entry(sma->sem_pending.next,
+ struct sem_queue, list);
+ } else {
+ n = list_entry(q->list.next, struct sem_queue,
+ list);
+ list_del(&q->list);
+ }
+
+ /* wake up the waiting thread */
+ q->status = IN_WAKEUP;
+
wake_up_process(q->sleeper);
/* hands-off: q will disappear immediately after
* writing q->status.
@@ -471,7 +452,7 @@
q->status = error;
q = n;
} else {
- q = q->next;
+ q = list_entry(q->list.next, struct sem_queue, list);
}
}
}
@@ -491,7 +472,7 @@
struct sem_queue * q;
semncnt = 0;
- for (q = sma->sem_pending; q; q = q->next) {
+ list_for_each_entry(q, &sma->sem_pending, list) {
struct sembuf * sops = q->sops;
int nsops = q->nsops;
int i;
@@ -503,13 +484,14 @@
}
return semncnt;
}
+
static int count_semzcnt (struct sem_array * sma, ushort semnum)
{
int semzcnt;
struct sem_queue * q;
semzcnt = 0;
- for (q = sma->sem_pending; q; q = q->next) {
+ list_for_each_entry(q, &sma->sem_pending, list) {
struct sembuf * sops = q->sops;
int nsops = q->nsops;
int i;
@@ -522,35 +504,41 @@
return semzcnt;
}
+void free_un(struct rcu_head *head)
+{
+ struct sem_undo *un = container_of(head, struct sem_undo, rcu);
+ kfree(un);
+}
+
/* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked
* as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex
* remains locked on exit.
*/
static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
{
- struct sem_undo *un;
- struct sem_queue *q;
+ struct sem_undo *un, *tu;
+ struct sem_queue *q, *tq;
struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
- /* Invalidate the existing undo structures for this semaphore set.
- * (They will be freed without any further action in exit_sem()
- * or during the next semop.)
- */
- for (un = sma->undo; un; un = un->id_next)
+ /* Free the existing undo structures for this semaphore set. */
+ assert_spin_locked(&sma->sem_perm.lock);
+ list_for_each_entry_safe(un, tu, &sma->list_id, list_id) {
+ list_del(&un->list_id);
+ spin_lock(&un->ulp->lock);
un->semid = -1;
+ list_del_rcu(&un->list_proc);
+ spin_unlock(&un->ulp->lock);
+ call_rcu(&un->rcu, free_un);
+ }
/* Wake up all pending processes and let them fail with EIDRM. */
- q = sma->sem_pending;
- while(q) {
- struct sem_queue *n;
- /* lazy remove_from_queue: we are killing the whole queue */
- q->prev = NULL;
- n = q->next;
+ list_for_each_entry_safe(q, tq, &sma->sem_pending, list) {
+ list_del(&q->list);
+
q->status = IN_WAKEUP;
wake_up_process(q->sleeper); /* doesn't sleep */
smp_wmb();
q->status = -EIDRM; /* hands-off q */
- q = n;
}
/* Remove the semaphore set from the IDR */
@@ -763,9 +751,12 @@
for (i = 0; i < nsems; i++)
sma->sem_base[i].semval = sem_io[i];
- for (un = sma->undo; un; un = un->id_next)
+
+ assert_spin_locked(&sma->sem_perm.lock);
+ list_for_each_entry(un, &sma->list_id, list_id) {
for (i = 0; i < nsems; i++)
un->semadj[i] = 0;
+ }
sma->sem_ctime = get_seconds();
/* maybe some queued-up processes were waiting for this */
update_queue(sma);
@@ -797,12 +788,15 @@
{
int val = arg.val;
struct sem_undo *un;
+
err = -ERANGE;
if (val > SEMVMX || val < 0)
goto out_unlock;
- for (un = sma->undo; un; un = un->id_next)
+ assert_spin_locked(&sma->sem_perm.lock);
+ list_for_each_entry(un, &sma->list_id, list_id)
un->semadj[semnum] = 0;
+
curr->semval = val;
curr->sempid = task_tgid_vnr(current);
sma->sem_ctime = get_seconds();
@@ -952,6 +946,8 @@
return -ENOMEM;
spin_lock_init(&undo_list->lock);
atomic_set(&undo_list->refcnt, 1);
+ INIT_LIST_HEAD(&undo_list->list_proc);
+
current->sysvsem.undo_list = undo_list;
}
*undo_listp = undo_list;
@@ -960,25 +956,27 @@
static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
{
- struct sem_undo **last, *un;
+ struct sem_undo *walk;
- last = &ulp->proc_list;
- un = *last;
- while(un != NULL) {
- if(un->semid==semid)
- break;
- if(un->semid==-1) {
- *last=un->proc_next;
- kfree(un);
- } else {
- last=&un->proc_next;
- }
- un=*last;
+ list_for_each_entry_rcu(walk, &ulp->list_proc, list_proc) {
+ if (walk->semid == semid)
+ return walk;
}
- return un;
+ return NULL;
}
-static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
+/**
+ * find_alloc_undo - Lookup (and if not present create) undo array
+ * @ns: namespace
+ * @semid: semaphore array id
+ *
+ * The function looks up (and if not present creates) the undo structure.
+ * The size of the undo structure depends on the size of the semaphore
+ * array, thus the alloc path is not that straightforward.
+ * Lifetime-rules: sem_undo is rcu-protected, on success, the function
+ * performs a rcu_read_lock().
+ */
+static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
{
struct sem_array *sma;
struct sem_undo_list *ulp;
@@ -990,13 +988,16 @@
if (error)
return ERR_PTR(error);
+ rcu_read_lock();
spin_lock(&ulp->lock);
un = lookup_undo(ulp, semid);
spin_unlock(&ulp->lock);
if (likely(un!=NULL))
goto out;
+ rcu_read_unlock();
/* no undo structure around - allocate one. */
+ /* step 1: figure out the size of the semaphore array */
sma = sem_lock_check(ns, semid);
if (IS_ERR(sma))
return ERR_PTR(PTR_ERR(sma));
@@ -1004,37 +1005,45 @@
nsems = sma->sem_nsems;
sem_getref_and_unlock(sma);
+ /* step 2: allocate new undo structure */
new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
if (!new) {
sem_putref(sma);
return ERR_PTR(-ENOMEM);
}
- new->semadj = (short *) &new[1];
- new->semid = semid;
- spin_lock(&ulp->lock);
- un = lookup_undo(ulp, semid);
- if (un) {
- spin_unlock(&ulp->lock);
- kfree(new);
- sem_putref(sma);
- goto out;
- }
+ /* step 3: Acquire the lock on semaphore array */
sem_lock_and_putref(sma);
if (sma->sem_perm.deleted) {
sem_unlock(sma);
- spin_unlock(&ulp->lock);
kfree(new);
un = ERR_PTR(-EIDRM);
goto out;
}
- new->proc_next = ulp->proc_list;
- ulp->proc_list = new;
- new->id_next = sma->undo;
- sma->undo = new;
- sem_unlock(sma);
+ spin_lock(&ulp->lock);
+
+ /*
+ * step 4: check for races: did someone else allocate the undo struct?
+ */
+ un = lookup_undo(ulp, semid);
+ if (un) {
+ kfree(new);
+ goto success;
+ }
+ /* step 5: initialize & link new undo structure */
+ new->semadj = (short *) &new[1];
+ new->ulp = ulp;
+ new->semid = semid;
+ assert_spin_locked(&ulp->lock);
+ list_add_rcu(&new->list_proc, &ulp->list_proc);
+ assert_spin_locked(&sma->sem_perm.lock);
+ list_add(&new->list_id, &sma->list_id);
un = new;
+
+success:
spin_unlock(&ulp->lock);
+ rcu_read_lock();
+ sem_unlock(sma);
out:
return un;
}
@@ -1090,9 +1099,8 @@
alter = 1;
}
-retry_undos:
if (undos) {
- un = find_undo(ns, semid);
+ un = find_alloc_undo(ns, semid);
if (IS_ERR(un)) {
error = PTR_ERR(un);
goto out_free;
@@ -1102,19 +1110,37 @@
sma = sem_lock_check(ns, semid);
if (IS_ERR(sma)) {
+ if (un)
+ rcu_read_unlock();
error = PTR_ERR(sma);
goto out_free;
}
/*
- * semid identifiers are not unique - find_undo may have
+ * semid identifiers are not unique - find_alloc_undo may have
* allocated an undo structure, it was invalidated by an RMID
- * and now a new array with received the same id. Check and retry.
+ * and now a new array with received the same id. Check and fail.
+ * This case can be detected checking un->semid. The existance of
+ * "un" itself is guaranteed by rcu.
*/
- if (un && un->semid == -1) {
- sem_unlock(sma);
- goto retry_undos;
+ error = -EIDRM;
+ if (un) {
+ if (un->semid == -1) {
+ rcu_read_unlock();
+ goto out_unlock_free;
+ } else {
+ /*
+ * rcu lock can be released, "un" cannot disappear:
+ * - sem_lock is acquired, thus IPC_RMID is
+ * impossible.
+ * - exit_sem is impossible, it always operates on
+ * current (or a dead task).
+ */
+
+ rcu_read_unlock();
+ }
}
+
error = -EFBIG;
if (max >= sma->sem_nsems)
goto out_unlock_free;
@@ -1138,17 +1164,15 @@
* task into the pending queue and go to sleep.
*/
- queue.sma = sma;
queue.sops = sops;
queue.nsops = nsops;
queue.undo = un;
queue.pid = task_tgid_vnr(current);
- queue.id = semid;
queue.alter = alter;
if (alter)
- append_to_queue(sma ,&queue);
+ list_add_tail(&queue.list, &sma->sem_pending);
else
- prepend_to_queue(sma ,&queue);
+ list_add(&queue.list, &sma->sem_pending);
queue.status = -EINTR;
queue.sleeper = current;
@@ -1174,7 +1198,6 @@
sma = sem_lock(ns, semid);
if (IS_ERR(sma)) {
- BUG_ON(queue.prev != NULL);
error = -EIDRM;
goto out_free;
}
@@ -1192,7 +1215,7 @@
*/
if (timeout && jiffies_left == 0)
error = -EAGAIN;
- remove_from_queue(sma,&queue);
+ list_del(&queue.list);
goto out_unlock_free;
out_unlock_free:
@@ -1243,56 +1266,62 @@
*/
void exit_sem(struct task_struct *tsk)
{
- struct sem_undo_list *undo_list;
- struct sem_undo *u, **up;
- struct ipc_namespace *ns;
+ struct sem_undo_list *ulp;
- undo_list = tsk->sysvsem.undo_list;
- if (!undo_list)
+ ulp = tsk->sysvsem.undo_list;
+ if (!ulp)
return;
tsk->sysvsem.undo_list = NULL;
- if (!atomic_dec_and_test(&undo_list->refcnt))
+ if (!atomic_dec_and_test(&ulp->refcnt))
return;
- ns = tsk->nsproxy->ipc_ns;
- /* There's no need to hold the semundo list lock, as current
- * is the last task exiting for this undo list.
- */
- for (up = &undo_list->proc_list; (u = *up); *up = u->proc_next, kfree(u)) {
+ for (;;) {
struct sem_array *sma;
- int nsems, i;
- struct sem_undo *un, **unp;
+ struct sem_undo *un;
int semid;
-
- semid = u->semid;
+ int i;
- if(semid == -1)
- continue;
- sma = sem_lock(ns, semid);
+ rcu_read_lock();
+ un = list_entry(rcu_dereference(ulp->list_proc.next),
+ struct sem_undo, list_proc);
+ if (&un->list_proc == &ulp->list_proc)
+ semid = -1;
+ else
+ semid = un->semid;
+ rcu_read_unlock();
+
+ if (semid == -1)
+ break;
+
+ sma = sem_lock_check(tsk->nsproxy->ipc_ns, un->semid);
+
+ /* exit_sem raced with IPC_RMID, nothing to do */
if (IS_ERR(sma))
continue;
- if (u->semid == -1)
- goto next_entry;
-
- BUG_ON(sem_checkid(sma, u->semid));
-
- /* remove u from the sma->undo list */
- for (unp = &sma->undo; (un = *unp); unp = &un->id_next) {
- if (u == un)
- goto found;
+ un = lookup_undo(ulp, semid);
+ if (un == NULL) {
+ /* exit_sem raced with IPC_RMID+semget() that created
+ * exactly the same semid. Nothing to do.
+ */
+ sem_unlock(sma);
+ continue;
}
- printk ("exit_sem undo list error id=%d\n", u->semid);
- goto next_entry;
-found:
- *unp = un->id_next;
- /* perform adjustments registered in u */
- nsems = sma->sem_nsems;
- for (i = 0; i < nsems; i++) {
+
+ /* remove un from the linked lists */
+ assert_spin_locked(&sma->sem_perm.lock);
+ list_del(&un->list_id);
+
+ spin_lock(&ulp->lock);
+ list_del_rcu(&un->list_proc);
+ spin_unlock(&ulp->lock);
+
+ /* perform adjustments registered in un */
+ for (i = 0; i < sma->sem_nsems; i++) {
struct sem * semaphore = &sma->sem_base[i];
- if (u->semadj[i]) {
- semaphore->semval += u->semadj[i];
+ if (un->semadj[i]) {
+ semaphore->semval += un->semadj[i];
/*
* Range checks of the new semaphore value,
* not defined by sus:
@@ -1316,10 +1345,11 @@
sma->sem_otime = get_seconds();
/* maybe some queued-up processes were waiting for this */
update_queue(sma);
-next_entry:
sem_unlock(sma);
+
+ call_rcu(&un->rcu, free_un);
}
- kfree(undo_list);
+ kfree(ulp);
}
#ifdef CONFIG_PROC_FS
diff --git a/ipc/shm.c b/ipc/shm.c
index a726aeb..e77ec698 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -112,23 +112,8 @@
}
/*
- * shm_lock_(check_)down routines are called in the paths where the rw_mutex
- * is held to protect access to the idr tree.
- */
-static inline struct shmid_kernel *shm_lock_down(struct ipc_namespace *ns,
- int id)
-{
- struct kern_ipc_perm *ipcp = ipc_lock_down(&shm_ids(ns), id);
-
- if (IS_ERR(ipcp))
- return (struct shmid_kernel *)ipcp;
-
- return container_of(ipcp, struct shmid_kernel, shm_perm);
-}
-
-/*
* shm_lock_(check_) routines are called in the paths where the rw_mutex
- * is not held.
+ * is not necessarily held.
*/
static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
{
@@ -211,7 +196,7 @@
down_write(&shm_ids(ns).rw_mutex);
/* remove from the list of attaches of the shm segment */
- shp = shm_lock_down(ns, sfd->id);
+ shp = shm_lock(ns, sfd->id);
BUG_ON(IS_ERR(shp));
shp->shm_lprid = task_tgid_vnr(current);
shp->shm_dtim = get_seconds();
@@ -932,7 +917,7 @@
out_nattch:
down_write(&shm_ids(ns).rw_mutex);
- shp = shm_lock_down(ns, shmid);
+ shp = shm_lock(ns, shmid);
BUG_ON(IS_ERR(shp));
shp->shm_nattch--;
if(shp->shm_nattch == 0 &&
diff --git a/ipc/util.c b/ipc/util.c
index 3339177..49b3ea6 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -688,10 +688,6 @@
* Look for an id in the ipc ids idr and lock the associated ipc object.
*
* The ipc object is locked on exit.
- *
- * This is the routine that should be called when the rw_mutex is not already
- * held, i.e. idr tree not protected: it protects the idr tree in read mode
- * during the idr_find().
*/
struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id)
@@ -699,18 +695,13 @@
struct kern_ipc_perm *out;
int lid = ipcid_to_idx(id);
- down_read(&ids->rw_mutex);
-
rcu_read_lock();
out = idr_find(&ids->ipcs_idr, lid);
if (out == NULL) {
rcu_read_unlock();
- up_read(&ids->rw_mutex);
return ERR_PTR(-EINVAL);
}
- up_read(&ids->rw_mutex);
-
spin_lock(&out->lock);
/* ipc_rmid() may have already freed the ID while ipc_lock
@@ -725,56 +716,6 @@
return out;
}
-/**
- * ipc_lock_down - Lock an ipc structure with rw_sem held
- * @ids: IPC identifier set
- * @id: ipc id to look for
- *
- * Look for an id in the ipc ids idr and lock the associated ipc object.
- *
- * The ipc object is locked on exit.
- *
- * This is the routine that should be called when the rw_mutex is already
- * held, i.e. idr tree protected.
- */
-
-struct kern_ipc_perm *ipc_lock_down(struct ipc_ids *ids, int id)
-{
- struct kern_ipc_perm *out;
- int lid = ipcid_to_idx(id);
-
- rcu_read_lock();
- out = idr_find(&ids->ipcs_idr, lid);
- if (out == NULL) {
- rcu_read_unlock();
- return ERR_PTR(-EINVAL);
- }
-
- spin_lock(&out->lock);
-
- /*
- * No need to verify that the structure is still valid since the
- * rw_mutex is held.
- */
- return out;
-}
-
-struct kern_ipc_perm *ipc_lock_check_down(struct ipc_ids *ids, int id)
-{
- struct kern_ipc_perm *out;
-
- out = ipc_lock_down(ids, id);
- if (IS_ERR(out))
- return out;
-
- if (ipc_checkid(out, id)) {
- ipc_unlock(out);
- return ERR_PTR(-EIDRM);
- }
-
- return out;
-}
-
struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id)
{
struct kern_ipc_perm *out;
@@ -846,7 +787,7 @@
int err;
down_write(&ids->rw_mutex);
- ipcp = ipc_lock_check_down(ids, id);
+ ipcp = ipc_lock_check(ids, id);
if (IS_ERR(ipcp)) {
err = PTR_ERR(ipcp);
goto out_up;
diff --git a/ipc/util.h b/ipc/util.h
index cdb966a..3646b45 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -102,11 +102,6 @@
void ipc_rcu_getref(void *ptr);
void ipc_rcu_putref(void *ptr);
-/*
- * ipc_lock_down: called with rw_mutex held
- * ipc_lock: called without that lock held
- */
-struct kern_ipc_perm *ipc_lock_down(struct ipc_ids *, int);
struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out);
@@ -155,7 +150,6 @@
rcu_read_unlock();
}
-struct kern_ipc_perm *ipc_lock_check_down(struct ipc_ids *ids, int id);
struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id);
int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids,
struct ipc_ops *ops, struct ipc_params *params);
diff --git a/kernel/Makefile b/kernel/Makefile
index 15ab63f..54f69837 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -2,7 +2,7 @@
# Makefile for the linux kernel.
#
-obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
+obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
cpu.o exit.o itimer.o time.o softirq.o resource.o \
sysctl.o capability.o ptrace.o timer.o user.o \
signal.o sys.o kmod.o workqueue.o pid.o \
@@ -24,6 +24,7 @@
CFLAGS_REMOVE_sched.o = -mno-spe -pg
endif
+obj-$(CONFIG_PROFILING) += profile.o
obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += time/
diff --git a/kernel/acct.c b/kernel/acct.c
index 91e1cfd..dd68b90 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -75,37 +75,39 @@
/*
* External references and all of the globals.
*/
-static void do_acct_process(struct pid_namespace *ns, struct file *);
+static void do_acct_process(struct bsd_acct_struct *acct,
+ struct pid_namespace *ns, struct file *);
/*
* This structure is used so that all the data protected by lock
* can be placed in the same cache line as the lock. This primes
* the cache line to have the data after getting the lock.
*/
-struct acct_glbs {
- spinlock_t lock;
+struct bsd_acct_struct {
volatile int active;
volatile int needcheck;
struct file *file;
struct pid_namespace *ns;
struct timer_list timer;
+ struct list_head list;
};
-static struct acct_glbs acct_globals __cacheline_aligned =
- {__SPIN_LOCK_UNLOCKED(acct_globals.lock)};
+static DEFINE_SPINLOCK(acct_lock);
+static LIST_HEAD(acct_list);
/*
* Called whenever the timer says to check the free space.
*/
-static void acct_timeout(unsigned long unused)
+static void acct_timeout(unsigned long x)
{
- acct_globals.needcheck = 1;
+ struct bsd_acct_struct *acct = (struct bsd_acct_struct *)x;
+ acct->needcheck = 1;
}
/*
* Check the amount of free space and suspend/resume accordingly.
*/
-static int check_free_space(struct file *file)
+static int check_free_space(struct bsd_acct_struct *acct, struct file *file)
{
struct kstatfs sbuf;
int res;
@@ -113,11 +115,11 @@
sector_t resume;
sector_t suspend;
- spin_lock(&acct_globals.lock);
- res = acct_globals.active;
- if (!file || !acct_globals.needcheck)
+ spin_lock(&acct_lock);
+ res = acct->active;
+ if (!file || !acct->needcheck)
goto out;
- spin_unlock(&acct_globals.lock);
+ spin_unlock(&acct_lock);
/* May block */
if (vfs_statfs(file->f_path.dentry, &sbuf))
@@ -136,35 +138,35 @@
act = 0;
/*
- * If some joker switched acct_globals.file under us we'ld better be
+ * If some joker switched acct->file under us we'ld better be
* silent and _not_ touch anything.
*/
- spin_lock(&acct_globals.lock);
- if (file != acct_globals.file) {
+ spin_lock(&acct_lock);
+ if (file != acct->file) {
if (act)
res = act>0;
goto out;
}
- if (acct_globals.active) {
+ if (acct->active) {
if (act < 0) {
- acct_globals.active = 0;
+ acct->active = 0;
printk(KERN_INFO "Process accounting paused\n");
}
} else {
if (act > 0) {
- acct_globals.active = 1;
+ acct->active = 1;
printk(KERN_INFO "Process accounting resumed\n");
}
}
- del_timer(&acct_globals.timer);
- acct_globals.needcheck = 0;
- acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ;
- add_timer(&acct_globals.timer);
- res = acct_globals.active;
+ del_timer(&acct->timer);
+ acct->needcheck = 0;
+ acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ;
+ add_timer(&acct->timer);
+ res = acct->active;
out:
- spin_unlock(&acct_globals.lock);
+ spin_unlock(&acct_lock);
return res;
}
@@ -172,39 +174,41 @@
* Close the old accounting file (if currently open) and then replace
* it with file (if non-NULL).
*
- * NOTE: acct_globals.lock MUST be held on entry and exit.
+ * NOTE: acct_lock MUST be held on entry and exit.
*/
-static void acct_file_reopen(struct file *file)
+static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file,
+ struct pid_namespace *ns)
{
struct file *old_acct = NULL;
struct pid_namespace *old_ns = NULL;
- if (acct_globals.file) {
- old_acct = acct_globals.file;
- old_ns = acct_globals.ns;
- del_timer(&acct_globals.timer);
- acct_globals.active = 0;
- acct_globals.needcheck = 0;
- acct_globals.file = NULL;
+ if (acct->file) {
+ old_acct = acct->file;
+ old_ns = acct->ns;
+ del_timer(&acct->timer);
+ acct->active = 0;
+ acct->needcheck = 0;
+ acct->file = NULL;
+ acct->ns = NULL;
+ list_del(&acct->list);
}
if (file) {
- acct_globals.file = file;
- acct_globals.ns = get_pid_ns(task_active_pid_ns(current));
- acct_globals.needcheck = 0;
- acct_globals.active = 1;
+ acct->file = file;
+ acct->ns = ns;
+ acct->needcheck = 0;
+ acct->active = 1;
+ list_add(&acct->list, &acct_list);
/* It's been deleted if it was used before so this is safe */
- init_timer(&acct_globals.timer);
- acct_globals.timer.function = acct_timeout;
- acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ;
- add_timer(&acct_globals.timer);
+ setup_timer(&acct->timer, acct_timeout, (unsigned long)acct);
+ acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ;
+ add_timer(&acct->timer);
}
if (old_acct) {
mnt_unpin(old_acct->f_path.mnt);
- spin_unlock(&acct_globals.lock);
- do_acct_process(old_ns, old_acct);
+ spin_unlock(&acct_lock);
+ do_acct_process(acct, old_ns, old_acct);
filp_close(old_acct, NULL);
- put_pid_ns(old_ns);
- spin_lock(&acct_globals.lock);
+ spin_lock(&acct_lock);
}
}
@@ -212,6 +216,8 @@
{
struct file *file;
int error;
+ struct pid_namespace *ns;
+ struct bsd_acct_struct *acct = NULL;
/* Difference from BSD - they don't do O_APPEND */
file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
@@ -228,18 +234,34 @@
return -EIO;
}
+ ns = task_active_pid_ns(current);
+ if (ns->bacct == NULL) {
+ acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
+ if (acct == NULL) {
+ filp_close(file, NULL);
+ return -ENOMEM;
+ }
+ }
+
error = security_acct(file);
if (error) {
+ kfree(acct);
filp_close(file, NULL);
return error;
}
- spin_lock(&acct_globals.lock);
+ spin_lock(&acct_lock);
+ if (ns->bacct == NULL) {
+ ns->bacct = acct;
+ acct = NULL;
+ }
+
mnt_pin(file->f_path.mnt);
- acct_file_reopen(file);
- spin_unlock(&acct_globals.lock);
+ acct_file_reopen(ns->bacct, file, ns);
+ spin_unlock(&acct_lock);
mntput(file->f_path.mnt); /* it's pinned, now give up active reference */
+ kfree(acct);
return 0;
}
@@ -269,11 +291,17 @@
error = acct_on(tmp);
putname(tmp);
} else {
+ struct bsd_acct_struct *acct;
+
+ acct = task_active_pid_ns(current)->bacct;
+ if (acct == NULL)
+ return 0;
+
error = security_acct(NULL);
if (!error) {
- spin_lock(&acct_globals.lock);
- acct_file_reopen(NULL);
- spin_unlock(&acct_globals.lock);
+ spin_lock(&acct_lock);
+ acct_file_reopen(acct, NULL, NULL);
+ spin_unlock(&acct_lock);
}
}
return error;
@@ -288,10 +316,16 @@
*/
void acct_auto_close_mnt(struct vfsmount *m)
{
- spin_lock(&acct_globals.lock);
- if (acct_globals.file && acct_globals.file->f_path.mnt == m)
- acct_file_reopen(NULL);
- spin_unlock(&acct_globals.lock);
+ struct bsd_acct_struct *acct;
+
+ spin_lock(&acct_lock);
+restart:
+ list_for_each_entry(acct, &acct_list, list)
+ if (acct->file && acct->file->f_path.mnt == m) {
+ acct_file_reopen(acct, NULL, NULL);
+ goto restart;
+ }
+ spin_unlock(&acct_lock);
}
/**
@@ -303,12 +337,31 @@
*/
void acct_auto_close(struct super_block *sb)
{
- spin_lock(&acct_globals.lock);
- if (acct_globals.file &&
- acct_globals.file->f_path.mnt->mnt_sb == sb) {
- acct_file_reopen(NULL);
+ struct bsd_acct_struct *acct;
+
+ spin_lock(&acct_lock);
+restart:
+ list_for_each_entry(acct, &acct_list, list)
+ if (acct->file && acct->file->f_path.mnt->mnt_sb == sb) {
+ acct_file_reopen(acct, NULL, NULL);
+ goto restart;
+ }
+ spin_unlock(&acct_lock);
+}
+
+void acct_exit_ns(struct pid_namespace *ns)
+{
+ struct bsd_acct_struct *acct;
+
+ spin_lock(&acct_lock);
+ acct = ns->bacct;
+ if (acct != NULL) {
+ if (acct->file != NULL)
+ acct_file_reopen(acct, NULL, NULL);
+
+ kfree(acct);
}
- spin_unlock(&acct_globals.lock);
+ spin_unlock(&acct_lock);
}
/*
@@ -425,7 +478,8 @@
/*
* do_acct_process does all actual work. Caller holds the reference to file.
*/
-static void do_acct_process(struct pid_namespace *ns, struct file *file)
+static void do_acct_process(struct bsd_acct_struct *acct,
+ struct pid_namespace *ns, struct file *file)
{
struct pacct_struct *pacct = ¤t->signal->pacct;
acct_t ac;
@@ -440,7 +494,7 @@
* First check to see if there is enough free_space to continue
* the process accounting system.
*/
- if (!check_free_space(file))
+ if (!check_free_space(acct, file))
return;
/*
@@ -577,34 +631,46 @@
spin_unlock_irq(¤t->sighand->siglock);
}
+static void acct_process_in_ns(struct pid_namespace *ns)
+{
+ struct file *file = NULL;
+ struct bsd_acct_struct *acct;
+
+ acct = ns->bacct;
+ /*
+ * accelerate the common fastpath:
+ */
+ if (!acct || !acct->file)
+ return;
+
+ spin_lock(&acct_lock);
+ file = acct->file;
+ if (unlikely(!file)) {
+ spin_unlock(&acct_lock);
+ return;
+ }
+ get_file(file);
+ spin_unlock(&acct_lock);
+
+ do_acct_process(acct, ns, file);
+ fput(file);
+}
+
/**
- * acct_process - now just a wrapper around do_acct_process
- * @exitcode: task exit code
+ * acct_process - now just a wrapper around acct_process_in_ns,
+ * which in turn is a wrapper around do_acct_process.
*
* handles process accounting for an exiting task
*/
void acct_process(void)
{
- struct file *file = NULL;
struct pid_namespace *ns;
/*
- * accelerate the common fastpath:
+ * This loop is safe lockless, since current is still
+ * alive and holds its namespace, which in turn holds
+ * its parent.
*/
- if (!acct_globals.file)
- return;
-
- spin_lock(&acct_globals.lock);
- file = acct_globals.file;
- if (unlikely(!file)) {
- spin_unlock(&acct_globals.lock);
- return;
- }
- get_file(file);
- ns = get_pid_ns(acct_globals.ns);
- spin_unlock(&acct_globals.lock);
-
- do_acct_process(ns, file);
- fput(file);
- put_pid_ns(ns);
+ for (ns = task_active_pid_ns(current); ns != NULL; ns = ns->parent)
+ acct_process_in_ns(ns);
}
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 15ac0e1..66ec9fd 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -89,11 +89,7 @@
/* Hierarchy-specific flags */
unsigned long flags;
- /* The path to use for release notifications. No locking
- * between setting and use - so if userspace updates this
- * while child cgroups exist, you could miss a
- * notification. We ensure that it's always a valid
- * NUL-terminated string */
+ /* The path to use for release notifications. */
char release_agent_path[PATH_MAX];
};
@@ -118,7 +114,7 @@
* extra work in the fork/exit path if none of the subsystems need to
* be called.
*/
-static int need_forkexit_callback;
+static int need_forkexit_callback __read_mostly;
static int need_mm_owner_callback __read_mostly;
/* convenient tests for these bits */
@@ -220,7 +216,7 @@
* task until after the first call to cgroup_iter_start(). This
* reduces the fork()/exit() overhead for people who have cgroups
* compiled into their kernel but not actually in use */
-static int use_task_css_set_links;
+static int use_task_css_set_links __read_mostly;
/* When we create or destroy a css_set, the operation simply
* takes/releases a reference count on all the cgroups referenced
@@ -241,17 +237,20 @@
*/
static void unlink_css_set(struct css_set *cg)
{
+ struct cg_cgroup_link *link;
+ struct cg_cgroup_link *saved_link;
+
write_lock(&css_set_lock);
hlist_del(&cg->hlist);
css_set_count--;
- while (!list_empty(&cg->cg_links)) {
- struct cg_cgroup_link *link;
- link = list_entry(cg->cg_links.next,
- struct cg_cgroup_link, cg_link_list);
+
+ list_for_each_entry_safe(link, saved_link, &cg->cg_links,
+ cg_link_list) {
list_del(&link->cg_link_list);
list_del(&link->cgrp_link_list);
kfree(link);
}
+
write_unlock(&css_set_lock);
}
@@ -363,15 +362,14 @@
static int allocate_cg_links(int count, struct list_head *tmp)
{
struct cg_cgroup_link *link;
+ struct cg_cgroup_link *saved_link;
int i;
INIT_LIST_HEAD(tmp);
for (i = 0; i < count; i++) {
link = kmalloc(sizeof(*link), GFP_KERNEL);
if (!link) {
- while (!list_empty(tmp)) {
- link = list_entry(tmp->next,
- struct cg_cgroup_link,
- cgrp_link_list);
+ list_for_each_entry_safe(link, saved_link, tmp,
+ cgrp_link_list) {
list_del(&link->cgrp_link_list);
kfree(link);
}
@@ -384,11 +382,10 @@
static void free_cg_links(struct list_head *tmp)
{
- while (!list_empty(tmp)) {
- struct cg_cgroup_link *link;
- link = list_entry(tmp->next,
- struct cg_cgroup_link,
- cgrp_link_list);
+ struct cg_cgroup_link *link;
+ struct cg_cgroup_link *saved_link;
+
+ list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) {
list_del(&link->cgrp_link_list);
kfree(link);
}
@@ -415,11 +412,11 @@
/* First see if we already have a cgroup group that matches
* the desired set */
- write_lock(&css_set_lock);
+ read_lock(&css_set_lock);
res = find_existing_css_set(oldcg, cgrp, template);
if (res)
get_css_set(res);
- write_unlock(&css_set_lock);
+ read_unlock(&css_set_lock);
if (res)
return res;
@@ -507,10 +504,6 @@
* knows that the cgroup won't be removed, as cgroup_rmdir()
* needs that mutex.
*
- * The cgroup_common_file_write handler for operations that modify
- * the cgroup hierarchy holds cgroup_mutex across the entire operation,
- * single threading all such cgroup modifications across the system.
- *
* The fork and exit callbacks cgroup_fork() and cgroup_exit(), don't
* (usually) take cgroup_mutex. These are the two most performance
* critical pieces of code here. The exception occurs on cgroup_exit(),
@@ -1093,6 +1086,8 @@
struct cgroupfs_root *root = sb->s_fs_info;
struct cgroup *cgrp = &root->top_cgroup;
int ret;
+ struct cg_cgroup_link *link;
+ struct cg_cgroup_link *saved_link;
BUG_ON(!root);
@@ -1112,10 +1107,9 @@
* root cgroup
*/
write_lock(&css_set_lock);
- while (!list_empty(&cgrp->css_sets)) {
- struct cg_cgroup_link *link;
- link = list_entry(cgrp->css_sets.next,
- struct cg_cgroup_link, cgrp_link_list);
+
+ list_for_each_entry_safe(link, saved_link, &cgrp->css_sets,
+ cgrp_link_list) {
list_del(&link->cg_link_list);
list_del(&link->cgrp_link_list);
kfree(link);
@@ -1281,18 +1275,14 @@
}
/*
- * Attach task with pid 'pid' to cgroup 'cgrp'. Call with
- * cgroup_mutex, may take task_lock of task
+ * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex
+ * held. May take task_lock of task
*/
-static int attach_task_by_pid(struct cgroup *cgrp, char *pidbuf)
+static int attach_task_by_pid(struct cgroup *cgrp, u64 pid)
{
- pid_t pid;
struct task_struct *tsk;
int ret;
- if (sscanf(pidbuf, "%d", &pid) != 1)
- return -EIO;
-
if (pid) {
rcu_read_lock();
tsk = find_task_by_vpid(pid);
@@ -1318,6 +1308,16 @@
return ret;
}
+static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
+{
+ int ret;
+ if (!cgroup_lock_live_group(cgrp))
+ return -ENODEV;
+ ret = attach_task_by_pid(cgrp, pid);
+ cgroup_unlock();
+ return ret;
+}
+
/* The various types of files and directories in a cgroup file system */
enum cgroup_filetype {
FILE_ROOT,
@@ -1327,12 +1327,54 @@
FILE_RELEASE_AGENT,
};
+/**
+ * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive.
+ * @cgrp: the cgroup to be checked for liveness
+ *
+ * On success, returns true; the lock should be later released with
+ * cgroup_unlock(). On failure returns false with no lock held.
+ */
+bool cgroup_lock_live_group(struct cgroup *cgrp)
+{
+ mutex_lock(&cgroup_mutex);
+ if (cgroup_is_removed(cgrp)) {
+ mutex_unlock(&cgroup_mutex);
+ return false;
+ }
+ return true;
+}
+
+static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
+ const char *buffer)
+{
+ BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
+ if (!cgroup_lock_live_group(cgrp))
+ return -ENODEV;
+ strcpy(cgrp->root->release_agent_path, buffer);
+ cgroup_unlock();
+ return 0;
+}
+
+static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
+ struct seq_file *seq)
+{
+ if (!cgroup_lock_live_group(cgrp))
+ return -ENODEV;
+ seq_puts(seq, cgrp->root->release_agent_path);
+ seq_putc(seq, '\n');
+ cgroup_unlock();
+ return 0;
+}
+
+/* A buffer size big enough for numbers or short strings */
+#define CGROUP_LOCAL_BUFFER_SIZE 64
+
static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
struct file *file,
const char __user *userbuf,
size_t nbytes, loff_t *unused_ppos)
{
- char buffer[64];
+ char buffer[CGROUP_LOCAL_BUFFER_SIZE];
int retval = 0;
char *end;
@@ -1361,68 +1403,36 @@
return retval;
}
-static ssize_t cgroup_common_file_write(struct cgroup *cgrp,
- struct cftype *cft,
- struct file *file,
- const char __user *userbuf,
- size_t nbytes, loff_t *unused_ppos)
+static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
+ struct file *file,
+ const char __user *userbuf,
+ size_t nbytes, loff_t *unused_ppos)
{
- enum cgroup_filetype type = cft->private;
- char *buffer;
+ char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
int retval = 0;
+ size_t max_bytes = cft->max_write_len;
+ char *buffer = local_buffer;
- if (nbytes >= PATH_MAX)
+ if (!max_bytes)
+ max_bytes = sizeof(local_buffer) - 1;
+ if (nbytes >= max_bytes)
return -E2BIG;
-
- /* +1 for nul-terminator */
- buffer = kmalloc(nbytes + 1, GFP_KERNEL);
- if (buffer == NULL)
- return -ENOMEM;
-
- if (copy_from_user(buffer, userbuf, nbytes)) {
- retval = -EFAULT;
- goto out1;
+ /* Allocate a dynamic buffer if we need one */
+ if (nbytes >= sizeof(local_buffer)) {
+ buffer = kmalloc(nbytes + 1, GFP_KERNEL);
+ if (buffer == NULL)
+ return -ENOMEM;
}
- buffer[nbytes] = 0; /* nul-terminate */
- strstrip(buffer); /* strip -just- trailing whitespace */
+ if (nbytes && copy_from_user(buffer, userbuf, nbytes))
+ return -EFAULT;
- mutex_lock(&cgroup_mutex);
-
- /*
- * This was already checked for in cgroup_file_write(), but
- * check again now we're holding cgroup_mutex.
- */
- if (cgroup_is_removed(cgrp)) {
- retval = -ENODEV;
- goto out2;
- }
-
- switch (type) {
- case FILE_TASKLIST:
- retval = attach_task_by_pid(cgrp, buffer);
- break;
- case FILE_NOTIFY_ON_RELEASE:
- clear_bit(CGRP_RELEASABLE, &cgrp->flags);
- if (simple_strtoul(buffer, NULL, 10) != 0)
- set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
- else
- clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
- break;
- case FILE_RELEASE_AGENT:
- BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
- strcpy(cgrp->root->release_agent_path, buffer);
- break;
- default:
- retval = -EINVAL;
- goto out2;
- }
-
- if (retval == 0)
+ buffer[nbytes] = 0; /* nul-terminate */
+ strstrip(buffer);
+ retval = cft->write_string(cgrp, cft, buffer);
+ if (!retval)
retval = nbytes;
-out2:
- mutex_unlock(&cgroup_mutex);
-out1:
- kfree(buffer);
+ if (buffer != local_buffer)
+ kfree(buffer);
return retval;
}
@@ -1438,6 +1448,8 @@
return cft->write(cgrp, cft, file, buf, nbytes, ppos);
if (cft->write_u64 || cft->write_s64)
return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
+ if (cft->write_string)
+ return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos);
if (cft->trigger) {
int ret = cft->trigger(cgrp, (unsigned int)cft->private);
return ret ? ret : nbytes;
@@ -1450,7 +1462,7 @@
char __user *buf, size_t nbytes,
loff_t *ppos)
{
- char tmp[64];
+ char tmp[CGROUP_LOCAL_BUFFER_SIZE];
u64 val = cft->read_u64(cgrp, cft);
int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
@@ -1462,56 +1474,13 @@
char __user *buf, size_t nbytes,
loff_t *ppos)
{
- char tmp[64];
+ char tmp[CGROUP_LOCAL_BUFFER_SIZE];
s64 val = cft->read_s64(cgrp, cft);
int len = sprintf(tmp, "%lld\n", (long long) val);
return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
}
-static ssize_t cgroup_common_file_read(struct cgroup *cgrp,
- struct cftype *cft,
- struct file *file,
- char __user *buf,
- size_t nbytes, loff_t *ppos)
-{
- enum cgroup_filetype type = cft->private;
- char *page;
- ssize_t retval = 0;
- char *s;
-
- if (!(page = (char *)__get_free_page(GFP_KERNEL)))
- return -ENOMEM;
-
- s = page;
-
- switch (type) {
- case FILE_RELEASE_AGENT:
- {
- struct cgroupfs_root *root;
- size_t n;
- mutex_lock(&cgroup_mutex);
- root = cgrp->root;
- n = strnlen(root->release_agent_path,
- sizeof(root->release_agent_path));
- n = min(n, (size_t) PAGE_SIZE);
- strncpy(s, root->release_agent_path, n);
- mutex_unlock(&cgroup_mutex);
- s += n;
- break;
- }
- default:
- retval = -EINVAL;
- goto out;
- }
- *s++ = '\n';
-
- retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page);
-out:
- free_page((unsigned long)page);
- return retval;
-}
-
static ssize_t cgroup_file_read(struct file *file, char __user *buf,
size_t nbytes, loff_t *ppos)
{
@@ -1569,6 +1538,7 @@
static struct file_operations cgroup_seqfile_operations = {
.read = seq_read,
+ .write = cgroup_file_write,
.llseek = seq_lseek,
.release = cgroup_seqfile_release,
};
@@ -1756,15 +1726,11 @@
int cgroup_task_count(const struct cgroup *cgrp)
{
int count = 0;
- struct list_head *l;
+ struct cg_cgroup_link *link;
read_lock(&css_set_lock);
- l = cgrp->css_sets.next;
- while (l != &cgrp->css_sets) {
- struct cg_cgroup_link *link =
- list_entry(l, struct cg_cgroup_link, cgrp_link_list);
+ list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
count += atomic_read(&link->cg->ref.refcount);
- l = l->next;
}
read_unlock(&css_set_lock);
return count;
@@ -2227,6 +2193,18 @@
return notify_on_release(cgrp);
}
+static int cgroup_write_notify_on_release(struct cgroup *cgrp,
+ struct cftype *cft,
+ u64 val)
+{
+ clear_bit(CGRP_RELEASABLE, &cgrp->flags);
+ if (val)
+ set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
+ else
+ clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
+ return 0;
+}
+
/*
* for the common functions, 'private' gives the type of file
*/
@@ -2235,7 +2213,7 @@
.name = "tasks",
.open = cgroup_tasks_open,
.read = cgroup_tasks_read,
- .write = cgroup_common_file_write,
+ .write_u64 = cgroup_tasks_write,
.release = cgroup_tasks_release,
.private = FILE_TASKLIST,
},
@@ -2243,15 +2221,16 @@
{
.name = "notify_on_release",
.read_u64 = cgroup_read_notify_on_release,
- .write = cgroup_common_file_write,
+ .write_u64 = cgroup_write_notify_on_release,
.private = FILE_NOTIFY_ON_RELEASE,
},
};
static struct cftype cft_release_agent = {
.name = "release_agent",
- .read = cgroup_common_file_read,
- .write = cgroup_common_file_write,
+ .read_seq_string = cgroup_release_agent_show,
+ .write_string = cgroup_release_agent_write,
+ .max_write_len = PATH_MAX,
.private = FILE_RELEASE_AGENT,
};
@@ -2869,16 +2848,17 @@
* cgroup_clone - clone the cgroup the given subsystem is attached to
* @tsk: the task to be moved
* @subsys: the given subsystem
+ * @nodename: the name for the new cgroup
*
* Duplicate the current cgroup in the hierarchy that the given
* subsystem is attached to, and move this task into the new
* child.
*/
-int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys)
+int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
+ char *nodename)
{
struct dentry *dentry;
int ret = 0;
- char nodename[MAX_CGROUP_TYPE_NAMELEN];
struct cgroup *parent, *child;
struct inode *inode;
struct css_set *cg;
@@ -2903,8 +2883,6 @@
cg = tsk->cgroups;
parent = task_cgroup(tsk, subsys->subsys_id);
- snprintf(nodename, MAX_CGROUP_TYPE_NAMELEN, "%d", tsk->pid);
-
/* Pin the hierarchy */
atomic_inc(&parent->root->sb->s_active);
@@ -3078,27 +3056,24 @@
while (!list_empty(&release_list)) {
char *argv[3], *envp[3];
int i;
- char *pathbuf;
+ char *pathbuf = NULL, *agentbuf = NULL;
struct cgroup *cgrp = list_entry(release_list.next,
struct cgroup,
release_list);
list_del_init(&cgrp->release_list);
spin_unlock(&release_list_lock);
pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
- if (!pathbuf) {
- spin_lock(&release_list_lock);
- continue;
- }
-
- if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0) {
- kfree(pathbuf);
- spin_lock(&release_list_lock);
- continue;
- }
+ if (!pathbuf)
+ goto continue_free;
+ if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
+ goto continue_free;
+ agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
+ if (!agentbuf)
+ goto continue_free;
i = 0;
- argv[i++] = cgrp->root->release_agent_path;
- argv[i++] = (char *)pathbuf;
+ argv[i++] = agentbuf;
+ argv[i++] = pathbuf;
argv[i] = NULL;
i = 0;
@@ -3112,8 +3087,10 @@
* be a slow process */
mutex_unlock(&cgroup_mutex);
call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
- kfree(pathbuf);
mutex_lock(&cgroup_mutex);
+ continue_free:
+ kfree(pathbuf);
+ kfree(agentbuf);
spin_lock(&release_list_lock);
}
spin_unlock(&release_list_lock);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 2cc409c..10ba5f1 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -285,6 +285,11 @@
set_cpus_allowed_ptr(current, &old_allowed);
out_release:
cpu_hotplug_done();
+ if (!err) {
+ if (raw_notifier_call_chain(&cpu_chain, CPU_POST_DEAD | mod,
+ hcpu) == NOTIFY_BAD)
+ BUG();
+ }
return err;
}
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index d573891..91cf85b 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -227,10 +227,6 @@
* The task_struct fields mems_allowed and mems_generation may only
* be accessed in the context of that task, so require no locks.
*
- * The cpuset_common_file_write handler for operations that modify
- * the cpuset hierarchy holds cgroup_mutex across the entire operation,
- * single threading all such cpuset modifications across the system.
- *
* The cpuset_common_file_read() handlers only hold callback_mutex across
* small pieces of code, such as when reading out possibly multi-word
* cpumasks and nodemasks.
@@ -369,7 +365,7 @@
my_cpusets_mem_gen = top_cpuset.mems_generation;
} else {
rcu_read_lock();
- my_cpusets_mem_gen = task_cs(current)->mems_generation;
+ my_cpusets_mem_gen = task_cs(tsk)->mems_generation;
rcu_read_unlock();
}
@@ -500,11 +496,16 @@
/*
* rebuild_sched_domains()
*
- * If the flag 'sched_load_balance' of any cpuset with non-empty
- * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset
- * which has that flag enabled, or if any cpuset with a non-empty
- * 'cpus' is removed, then call this routine to rebuild the
- * scheduler's dynamic sched domains.
+ * This routine will be called to rebuild the scheduler's dynamic
+ * sched domains:
+ * - if the flag 'sched_load_balance' of any cpuset with non-empty
+ * 'cpus' changes,
+ * - or if the 'cpus' allowed changes in any cpuset which has that
+ * flag enabled,
+ * - or if the 'sched_relax_domain_level' of any cpuset which has
+ * that flag enabled and with non-empty 'cpus' changes,
+ * - or if any cpuset with non-empty 'cpus' is removed,
+ * - or if a cpu gets offlined.
*
* This routine builds a partial partition of the systems CPUs
* (the set of non-overlappping cpumask_t's in the array 'part'
@@ -609,8 +610,13 @@
while (__kfifo_get(q, (void *)&cp, sizeof(cp))) {
struct cgroup *cont;
struct cpuset *child; /* scans child cpusets of cp */
+
+ if (cpus_empty(cp->cpus_allowed))
+ continue;
+
if (is_sched_load_balance(cp))
csa[csn++] = cp;
+
list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {
child = cgroup_cs(cont);
__kfifo_put(q, (void *)&child, sizeof(cp));
@@ -703,36 +709,6 @@
/* Don't kfree(dattr) -- partition_sched_domains() does that. */
}
-static inline int started_after_time(struct task_struct *t1,
- struct timespec *time,
- struct task_struct *t2)
-{
- int start_diff = timespec_compare(&t1->start_time, time);
- if (start_diff > 0) {
- return 1;
- } else if (start_diff < 0) {
- return 0;
- } else {
- /*
- * Arbitrarily, if two processes started at the same
- * time, we'll say that the lower pointer value
- * started first. Note that t2 may have exited by now
- * so this may not be a valid pointer any longer, but
- * that's fine - it still serves to distinguish
- * between two tasks started (effectively)
- * simultaneously.
- */
- return t1 > t2;
- }
-}
-
-static inline int started_after(void *p1, void *p2)
-{
- struct task_struct *t1 = p1;
- struct task_struct *t2 = p2;
- return started_after_time(t1, &t2->start_time, t2);
-}
-
/**
* cpuset_test_cpumask - test a task's cpus_allowed versus its cpuset's
* @tsk: task to test
@@ -768,15 +744,49 @@
}
/**
+ * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset.
+ * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed
+ *
+ * Called with cgroup_mutex held
+ *
+ * The cgroup_scan_tasks() function will scan all the tasks in a cgroup,
+ * calling callback functions for each.
+ *
+ * Return 0 if successful, -errno if not.
+ */
+static int update_tasks_cpumask(struct cpuset *cs)
+{
+ struct cgroup_scanner scan;
+ struct ptr_heap heap;
+ int retval;
+
+ /*
+ * cgroup_scan_tasks() will initialize heap->gt for us.
+ * heap_init() is still needed here for we should not change
+ * cs->cpus_allowed when heap_init() fails.
+ */
+ retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL);
+ if (retval)
+ return retval;
+
+ scan.cg = cs->css.cgroup;
+ scan.test_task = cpuset_test_cpumask;
+ scan.process_task = cpuset_change_cpumask;
+ scan.heap = &heap;
+ retval = cgroup_scan_tasks(&scan);
+
+ heap_free(&heap);
+ return retval;
+}
+
+/**
* update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it
* @cs: the cpuset to consider
* @buf: buffer of cpu numbers written to this cpuset
*/
-static int update_cpumask(struct cpuset *cs, char *buf)
+static int update_cpumask(struct cpuset *cs, const char *buf)
{
struct cpuset trialcs;
- struct cgroup_scanner scan;
- struct ptr_heap heap;
int retval;
int is_load_balanced;
@@ -792,7 +802,6 @@
* that parsing. The validate_change() call ensures that cpusets
* with tasks have cpus.
*/
- buf = strstrip(buf);
if (!*buf) {
cpus_clear(trialcs.cpus_allowed);
} else {
@@ -811,10 +820,6 @@
if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed))
return 0;
- retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, &started_after);
- if (retval)
- return retval;
-
is_load_balanced = is_sched_load_balance(&trialcs);
mutex_lock(&callback_mutex);
@@ -825,12 +830,9 @@
* Scan tasks in the cpuset, and update the cpumasks of any
* that need an update.
*/
- scan.cg = cs->css.cgroup;
- scan.test_task = cpuset_test_cpumask;
- scan.process_task = cpuset_change_cpumask;
- scan.heap = &heap;
- cgroup_scan_tasks(&scan);
- heap_free(&heap);
+ retval = update_tasks_cpumask(cs);
+ if (retval < 0)
+ return retval;
if (is_load_balanced)
rebuild_sched_domains();
@@ -886,74 +888,25 @@
mutex_unlock(&callback_mutex);
}
-/*
- * Handle user request to change the 'mems' memory placement
- * of a cpuset. Needs to validate the request, update the
- * cpusets mems_allowed and mems_generation, and for each
- * task in the cpuset, rebind any vma mempolicies and if
- * the cpuset is marked 'memory_migrate', migrate the tasks
- * pages to the new memory.
- *
- * Call with cgroup_mutex held. May take callback_mutex during call.
- * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
- * lock each such tasks mm->mmap_sem, scan its vma's and rebind
- * their mempolicies to the cpusets new mems_allowed.
- */
-
static void *cpuset_being_rebound;
-static int update_nodemask(struct cpuset *cs, char *buf)
+/**
+ * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset.
+ * @cs: the cpuset in which each task's mems_allowed mask needs to be changed
+ * @oldmem: old mems_allowed of cpuset cs
+ *
+ * Called with cgroup_mutex held
+ * Return 0 if successful, -errno if not.
+ */
+static int update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem)
{
- struct cpuset trialcs;
- nodemask_t oldmem;
struct task_struct *p;
struct mm_struct **mmarray;
int i, n, ntasks;
int migrate;
int fudge;
- int retval;
struct cgroup_iter it;
-
- /*
- * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
- * it's read-only
- */
- if (cs == &top_cpuset)
- return -EACCES;
-
- trialcs = *cs;
-
- /*
- * An empty mems_allowed is ok iff there are no tasks in the cpuset.
- * Since nodelist_parse() fails on an empty mask, we special case
- * that parsing. The validate_change() call ensures that cpusets
- * with tasks have memory.
- */
- buf = strstrip(buf);
- if (!*buf) {
- nodes_clear(trialcs.mems_allowed);
- } else {
- retval = nodelist_parse(buf, trialcs.mems_allowed);
- if (retval < 0)
- goto done;
-
- if (!nodes_subset(trialcs.mems_allowed,
- node_states[N_HIGH_MEMORY]))
- return -EINVAL;
- }
- oldmem = cs->mems_allowed;
- if (nodes_equal(oldmem, trialcs.mems_allowed)) {
- retval = 0; /* Too easy - nothing to do */
- goto done;
- }
- retval = validate_change(cs, &trialcs);
- if (retval < 0)
- goto done;
-
- mutex_lock(&callback_mutex);
- cs->mems_allowed = trialcs.mems_allowed;
- cs->mems_generation = cpuset_mems_generation++;
- mutex_unlock(&callback_mutex);
+ int retval;
cpuset_being_rebound = cs; /* causes mpol_dup() rebind */
@@ -1020,7 +973,7 @@
mpol_rebind_mm(mm, &cs->mems_allowed);
if (migrate)
- cpuset_migrate_mm(mm, &oldmem, &cs->mems_allowed);
+ cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed);
mmput(mm);
}
@@ -1032,6 +985,70 @@
return retval;
}
+/*
+ * Handle user request to change the 'mems' memory placement
+ * of a cpuset. Needs to validate the request, update the
+ * cpusets mems_allowed and mems_generation, and for each
+ * task in the cpuset, rebind any vma mempolicies and if
+ * the cpuset is marked 'memory_migrate', migrate the tasks
+ * pages to the new memory.
+ *
+ * Call with cgroup_mutex held. May take callback_mutex during call.
+ * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
+ * lock each such tasks mm->mmap_sem, scan its vma's and rebind
+ * their mempolicies to the cpusets new mems_allowed.
+ */
+static int update_nodemask(struct cpuset *cs, const char *buf)
+{
+ struct cpuset trialcs;
+ nodemask_t oldmem;
+ int retval;
+
+ /*
+ * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
+ * it's read-only
+ */
+ if (cs == &top_cpuset)
+ return -EACCES;
+
+ trialcs = *cs;
+
+ /*
+ * An empty mems_allowed is ok iff there are no tasks in the cpuset.
+ * Since nodelist_parse() fails on an empty mask, we special case
+ * that parsing. The validate_change() call ensures that cpusets
+ * with tasks have memory.
+ */
+ if (!*buf) {
+ nodes_clear(trialcs.mems_allowed);
+ } else {
+ retval = nodelist_parse(buf, trialcs.mems_allowed);
+ if (retval < 0)
+ goto done;
+
+ if (!nodes_subset(trialcs.mems_allowed,
+ node_states[N_HIGH_MEMORY]))
+ return -EINVAL;
+ }
+ oldmem = cs->mems_allowed;
+ if (nodes_equal(oldmem, trialcs.mems_allowed)) {
+ retval = 0; /* Too easy - nothing to do */
+ goto done;
+ }
+ retval = validate_change(cs, &trialcs);
+ if (retval < 0)
+ goto done;
+
+ mutex_lock(&callback_mutex);
+ cs->mems_allowed = trialcs.mems_allowed;
+ cs->mems_generation = cpuset_mems_generation++;
+ mutex_unlock(&callback_mutex);
+
+ retval = update_tasks_nodemask(cs, &oldmem);
+done:
+ return retval;
+}
+
int current_cpuset_is_being_rebound(void)
{
return task_cs(current) == cpuset_being_rebound;
@@ -1044,7 +1061,8 @@
if (val != cs->relax_domain_level) {
cs->relax_domain_level = val;
- rebuild_sched_domains();
+ if (!cpus_empty(cs->cpus_allowed) && is_sched_load_balance(cs))
+ rebuild_sched_domains();
}
return 0;
@@ -1256,72 +1274,14 @@
FILE_SPREAD_SLAB,
} cpuset_filetype_t;
-static ssize_t cpuset_common_file_write(struct cgroup *cont,
- struct cftype *cft,
- struct file *file,
- const char __user *userbuf,
- size_t nbytes, loff_t *unused_ppos)
-{
- struct cpuset *cs = cgroup_cs(cont);
- cpuset_filetype_t type = cft->private;
- char *buffer;
- int retval = 0;
-
- /* Crude upper limit on largest legitimate cpulist user might write. */
- if (nbytes > 100U + 6 * max(NR_CPUS, MAX_NUMNODES))
- return -E2BIG;
-
- /* +1 for nul-terminator */
- buffer = kmalloc(nbytes + 1, GFP_KERNEL);
- if (!buffer)
- return -ENOMEM;
-
- if (copy_from_user(buffer, userbuf, nbytes)) {
- retval = -EFAULT;
- goto out1;
- }
- buffer[nbytes] = 0; /* nul-terminate */
-
- cgroup_lock();
-
- if (cgroup_is_removed(cont)) {
- retval = -ENODEV;
- goto out2;
- }
-
- switch (type) {
- case FILE_CPULIST:
- retval = update_cpumask(cs, buffer);
- break;
- case FILE_MEMLIST:
- retval = update_nodemask(cs, buffer);
- break;
- default:
- retval = -EINVAL;
- goto out2;
- }
-
- if (retval == 0)
- retval = nbytes;
-out2:
- cgroup_unlock();
-out1:
- kfree(buffer);
- return retval;
-}
-
static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
{
int retval = 0;
struct cpuset *cs = cgroup_cs(cgrp);
cpuset_filetype_t type = cft->private;
- cgroup_lock();
-
- if (cgroup_is_removed(cgrp)) {
- cgroup_unlock();
+ if (!cgroup_lock_live_group(cgrp))
return -ENODEV;
- }
switch (type) {
case FILE_CPU_EXCLUSIVE:
@@ -1367,12 +1327,9 @@
struct cpuset *cs = cgroup_cs(cgrp);
cpuset_filetype_t type = cft->private;
- cgroup_lock();
-
- if (cgroup_is_removed(cgrp)) {
- cgroup_unlock();
+ if (!cgroup_lock_live_group(cgrp))
return -ENODEV;
- }
+
switch (type) {
case FILE_SCHED_RELAX_DOMAIN_LEVEL:
retval = update_relax_domain_level(cs, val);
@@ -1386,6 +1343,32 @@
}
/*
+ * Common handling for a write to a "cpus" or "mems" file.
+ */
+static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
+ const char *buf)
+{
+ int retval = 0;
+
+ if (!cgroup_lock_live_group(cgrp))
+ return -ENODEV;
+
+ switch (cft->private) {
+ case FILE_CPULIST:
+ retval = update_cpumask(cgroup_cs(cgrp), buf);
+ break;
+ case FILE_MEMLIST:
+ retval = update_nodemask(cgroup_cs(cgrp), buf);
+ break;
+ default:
+ retval = -EINVAL;
+ break;
+ }
+ cgroup_unlock();
+ return retval;
+}
+
+/*
* These ascii lists should be read in a single call, by using a user
* buffer large enough to hold the entire map. If read in smaller
* chunks, there is no guarantee of atomicity. Since the display format
@@ -1504,14 +1487,16 @@
{
.name = "cpus",
.read = cpuset_common_file_read,
- .write = cpuset_common_file_write,
+ .write_string = cpuset_write_resmask,
+ .max_write_len = (100U + 6 * NR_CPUS),
.private = FILE_CPULIST,
},
{
.name = "mems",
.read = cpuset_common_file_read,
- .write = cpuset_common_file_write,
+ .write_string = cpuset_write_resmask,
+ .max_write_len = (100U + 6 * MAX_NUMNODES),
.private = FILE_MEMLIST,
},
@@ -1792,7 +1777,7 @@
scan.scan.heap = NULL;
scan.to = to->css.cgroup;
- if (cgroup_scan_tasks((struct cgroup_scanner *)&scan))
+ if (cgroup_scan_tasks(&scan.scan))
printk(KERN_ERR "move_member_tasks_to_cpuset: "
"cgroup_scan_tasks failed\n");
}
@@ -1852,6 +1837,7 @@
struct cpuset *child; /* scans child cpusets of cp */
struct list_head queue;
struct cgroup *cont;
+ nodemask_t oldmems;
INIT_LIST_HEAD(&queue);
@@ -1871,6 +1857,8 @@
nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
continue;
+ oldmems = cp->mems_allowed;
+
/* Remove offline cpus and mems from this cpuset. */
mutex_lock(&callback_mutex);
cpus_and(cp->cpus_allowed, cp->cpus_allowed, cpu_online_map);
@@ -1882,6 +1870,10 @@
if (cpus_empty(cp->cpus_allowed) ||
nodes_empty(cp->mems_allowed))
remove_tasks_in_empty_cpuset(cp);
+ else {
+ update_tasks_cpumask(cp);
+ update_tasks_nodemask(cp, &oldmems);
+ }
}
}
@@ -1974,7 +1966,6 @@
}
/**
-
* cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
* @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
* @pmask: pointer to cpumask_t variable to receive cpus_allowed set.
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 10e43fd..b3179da 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -145,8 +145,11 @@
d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
+ tmp = d->freepages_delay_total + tsk->delays->freepages_delay;
+ d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp;
d->blkio_count += tsk->delays->blkio_count;
d->swapin_count += tsk->delays->swapin_count;
+ d->freepages_count += tsk->delays->freepages_count;
spin_unlock_irqrestore(&tsk->delays->lock, flags);
done:
@@ -165,3 +168,16 @@
return ret;
}
+void __delayacct_freepages_start(void)
+{
+ delayacct_start(¤t->delays->freepages_start);
+}
+
+void __delayacct_freepages_end(void)
+{
+ delayacct_end(¤t->delays->freepages_start,
+ ¤t->delays->freepages_end,
+ ¤t->delays->freepages_delay,
+ ¤t->delays->freepages_count);
+}
+
diff --git a/kernel/exit.c b/kernel/exit.c
index 93d2711..ad933bb 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -85,7 +85,6 @@
BUG_ON(!sig);
BUG_ON(!atomic_read(&sig->count));
- rcu_read_lock();
sighand = rcu_dereference(tsk->sighand);
spin_lock(&sighand->siglock);
@@ -121,6 +120,18 @@
sig->nivcsw += tsk->nivcsw;
sig->inblock += task_io_get_inblock(tsk);
sig->oublock += task_io_get_oublock(tsk);
+#ifdef CONFIG_TASK_XACCT
+ sig->rchar += tsk->rchar;
+ sig->wchar += tsk->wchar;
+ sig->syscr += tsk->syscr;
+ sig->syscw += tsk->syscw;
+#endif /* CONFIG_TASK_XACCT */
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+ sig->ioac.read_bytes += tsk->ioac.read_bytes;
+ sig->ioac.write_bytes += tsk->ioac.write_bytes;
+ sig->ioac.cancelled_write_bytes +=
+ tsk->ioac.cancelled_write_bytes;
+#endif /* CONFIG_TASK_IO_ACCOUNTING */
sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
sig = NULL; /* Marker for below. */
}
@@ -136,7 +147,6 @@
tsk->signal = NULL;
tsk->sighand = NULL;
spin_unlock(&sighand->siglock);
- rcu_read_unlock();
__cleanup_sighand(sighand);
clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
@@ -432,7 +442,7 @@
* We don't want to have TIF_FREEZE set if the system-wide hibernation
* or suspend transition begins right now.
*/
- current->flags |= PF_NOFREEZE;
+ current->flags |= (PF_NOFREEZE | PF_KTHREAD);
if (current->nsproxy != &init_nsproxy) {
get_nsproxy(&init_nsproxy);
@@ -666,26 +676,40 @@
static void exit_mm(struct task_struct * tsk)
{
struct mm_struct *mm = tsk->mm;
+ struct core_state *core_state;
mm_release(tsk, mm);
if (!mm)
return;
/*
* Serialize with any possible pending coredump.
- * We must hold mmap_sem around checking core_waiters
+ * We must hold mmap_sem around checking core_state
* and clearing tsk->mm. The core-inducing thread
- * will increment core_waiters for each thread in the
+ * will increment ->nr_threads for each thread in the
* group with ->mm != NULL.
*/
down_read(&mm->mmap_sem);
- if (mm->core_waiters) {
+ core_state = mm->core_state;
+ if (core_state) {
+ struct core_thread self;
up_read(&mm->mmap_sem);
- down_write(&mm->mmap_sem);
- if (!--mm->core_waiters)
- complete(mm->core_startup_done);
- up_write(&mm->mmap_sem);
- wait_for_completion(&mm->core_done);
+ self.task = tsk;
+ self.next = xchg(&core_state->dumper.next, &self);
+ /*
+ * Implies mb(), the result of xchg() must be visible
+ * to core_state->dumper.
+ */
+ if (atomic_dec_and_test(&core_state->nr_threads))
+ complete(&core_state->startup);
+
+ for (;;) {
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ if (!self.task) /* see coredump_finish() */
+ break;
+ schedule();
+ }
+ __set_task_state(tsk, TASK_RUNNING);
down_read(&mm->mmap_sem);
}
atomic_inc(&mm->mm_count);
@@ -1354,6 +1378,21 @@
psig->coublock +=
task_io_get_oublock(p) +
sig->oublock + sig->coublock;
+#ifdef CONFIG_TASK_XACCT
+ psig->rchar += p->rchar + sig->rchar;
+ psig->wchar += p->wchar + sig->wchar;
+ psig->syscr += p->syscr + sig->syscr;
+ psig->syscw += p->syscw + sig->syscw;
+#endif /* CONFIG_TASK_XACCT */
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+ psig->ioac.read_bytes +=
+ p->ioac.read_bytes + sig->ioac.read_bytes;
+ psig->ioac.write_bytes +=
+ p->ioac.write_bytes + sig->ioac.write_bytes;
+ psig->ioac.cancelled_write_bytes +=
+ p->ioac.cancelled_write_bytes +
+ sig->ioac.cancelled_write_bytes;
+#endif /* CONFIG_TASK_IO_ACCOUNTING */
spin_unlock_irq(&p->parent->sighand->siglock);
}
diff --git a/kernel/fork.c b/kernel/fork.c
index 552c8d8..b99d73e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -93,6 +93,23 @@
static struct kmem_cache *task_struct_cachep;
#endif
+#ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+static inline struct thread_info *alloc_thread_info(struct task_struct *tsk)
+{
+#ifdef CONFIG_DEBUG_STACK_USAGE
+ gfp_t mask = GFP_KERNEL | __GFP_ZERO;
+#else
+ gfp_t mask = GFP_KERNEL;
+#endif
+ return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER);
+}
+
+static inline void free_thread_info(struct thread_info *ti)
+{
+ free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
+}
+#endif
+
/* SLAB cache for signal_struct structures (tsk->signal) */
static struct kmem_cache *signal_cachep;
@@ -383,7 +400,7 @@
INIT_LIST_HEAD(&mm->mmlist);
mm->flags = (current->mm) ? current->mm->flags
: MMF_DUMP_FILTER_DEFAULT;
- mm->core_waiters = 0;
+ mm->core_state = NULL;
mm->nr_ptes = 0;
set_mm_counter(mm, file_rss, 0);
set_mm_counter(mm, anon_rss, 0);
@@ -457,7 +474,7 @@
/**
* get_task_mm - acquire a reference to the task's mm
*
- * Returns %NULL if the task has no mm. Checks PF_BORROWED_MM (meaning
+ * Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning
* this kernel workthread has transiently adopted a user mm with use_mm,
* to do its AIO) is not set and if so returns a reference to it, after
* bumping up the use count. User must release the mm via mmput()
@@ -470,7 +487,7 @@
task_lock(task);
mm = task->mm;
if (mm) {
- if (task->flags & PF_BORROWED_MM)
+ if (task->flags & PF_KTHREAD)
mm = NULL;
else
atomic_inc(&mm->mm_users);
@@ -795,6 +812,12 @@
sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
+#ifdef CONFIG_TASK_XACCT
+ sig->rchar = sig->wchar = sig->syscr = sig->syscw = 0;
+#endif
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+ memset(&sig->ioac, 0, sizeof(sig->ioac));
+#endif
sig->sum_sched_runtime = 0;
INIT_LIST_HEAD(&sig->cpu_timers[0]);
INIT_LIST_HEAD(&sig->cpu_timers[1]);
@@ -1090,6 +1113,12 @@
if (clone_flags & CLONE_THREAD)
p->tgid = current->tgid;
+ if (current->nsproxy != p->nsproxy) {
+ retval = ns_cgroup_clone(p, pid);
+ if (retval)
+ goto bad_fork_free_pid;
+ }
+
p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
/*
* Clear TID on mm_release()?
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 5bc6e5e..f8914b9 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -260,9 +260,7 @@
}
} else {
if (desc->wake_depth == 0) {
- printk(KERN_WARNING "Unbalanced IRQ %d "
- "wake disable\n", irq);
- WARN_ON(1);
+ WARN(1, "Unbalanced IRQ %d wake disable\n", irq);
} else if (--desc->wake_depth == 0) {
ret = set_irq_wake_real(irq, on);
if (ret)
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 6fc0040..38fc10a 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -176,7 +176,7 @@
high = kallsyms_num_syms;
while (high - low > 1) {
- mid = (low + high) / 2;
+ mid = low + (high - low) / 2;
if (kallsyms_addresses[mid] <= addr)
low = mid;
else
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 2989f67..2456d1a 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -352,16 +352,17 @@
* @path: path to usermode executable
* @argv: arg vector for process
* @envp: environment for process
+ * @gfp_mask: gfp mask for memory allocation
*
* Returns either %NULL on allocation failure, or a subprocess_info
* structure. This should be passed to call_usermodehelper_exec to
* exec the process and free the structure.
*/
-struct subprocess_info *call_usermodehelper_setup(char *path,
- char **argv, char **envp)
+struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
+ char **envp, gfp_t gfp_mask)
{
struct subprocess_info *sub_info;
- sub_info = kzalloc(sizeof(struct subprocess_info), GFP_ATOMIC);
+ sub_info = kzalloc(sizeof(struct subprocess_info), gfp_mask);
if (!sub_info)
goto out;
@@ -494,7 +495,7 @@
struct subprocess_info *sub_info;
int ret;
- sub_info = call_usermodehelper_setup(path, argv, envp);
+ sub_info = call_usermodehelper_setup(path, argv, envp, GFP_KERNEL);
if (sub_info == NULL)
return -ENOMEM;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1485ca8..75bc2cd 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -62,6 +62,7 @@
addr = ((kprobe_opcode_t *)(kallsyms_lookup_name(name)))
#endif
+static int kprobes_initialized;
static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
@@ -69,8 +70,15 @@
static bool kprobe_enabled;
DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */
-DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */
static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
+static struct {
+ spinlock_t lock ____cacheline_aligned;
+} kretprobe_table_locks[KPROBE_TABLE_SIZE];
+
+static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
+{
+ return &(kretprobe_table_locks[hash].lock);
+}
/*
* Normally, functions that we'd want to prohibit kprobes in, are marked
@@ -368,26 +376,53 @@
return;
}
-/* Called with kretprobe_lock held */
void __kprobes recycle_rp_inst(struct kretprobe_instance *ri,
struct hlist_head *head)
{
+ struct kretprobe *rp = ri->rp;
+
/* remove rp inst off the rprobe_inst_table */
hlist_del(&ri->hlist);
- if (ri->rp) {
- /* remove rp inst off the used list */
- hlist_del(&ri->uflist);
- /* put rp inst back onto the free list */
- INIT_HLIST_NODE(&ri->uflist);
- hlist_add_head(&ri->uflist, &ri->rp->free_instances);
+ INIT_HLIST_NODE(&ri->hlist);
+ if (likely(rp)) {
+ spin_lock(&rp->lock);
+ hlist_add_head(&ri->hlist, &rp->free_instances);
+ spin_unlock(&rp->lock);
} else
/* Unregistering */
hlist_add_head(&ri->hlist, head);
}
-struct hlist_head __kprobes *kretprobe_inst_table_head(struct task_struct *tsk)
+void kretprobe_hash_lock(struct task_struct *tsk,
+ struct hlist_head **head, unsigned long *flags)
{
- return &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)];
+ unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
+ spinlock_t *hlist_lock;
+
+ *head = &kretprobe_inst_table[hash];
+ hlist_lock = kretprobe_table_lock_ptr(hash);
+ spin_lock_irqsave(hlist_lock, *flags);
+}
+
+void kretprobe_table_lock(unsigned long hash, unsigned long *flags)
+{
+ spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
+ spin_lock_irqsave(hlist_lock, *flags);
+}
+
+void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags)
+{
+ unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
+ spinlock_t *hlist_lock;
+
+ hlist_lock = kretprobe_table_lock_ptr(hash);
+ spin_unlock_irqrestore(hlist_lock, *flags);
+}
+
+void kretprobe_table_unlock(unsigned long hash, unsigned long *flags)
+{
+ spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
+ spin_unlock_irqrestore(hlist_lock, *flags);
}
/*
@@ -401,17 +436,21 @@
struct kretprobe_instance *ri;
struct hlist_head *head, empty_rp;
struct hlist_node *node, *tmp;
- unsigned long flags = 0;
+ unsigned long hash, flags = 0;
- INIT_HLIST_HEAD(&empty_rp);
- spin_lock_irqsave(&kretprobe_lock, flags);
- head = kretprobe_inst_table_head(tk);
+ if (unlikely(!kprobes_initialized))
+ /* Early boot. kretprobe_table_locks not yet initialized. */
+ return;
+
+ hash = hash_ptr(tk, KPROBE_HASH_BITS);
+ head = &kretprobe_inst_table[hash];
+ kretprobe_table_lock(hash, &flags);
hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
if (ri->task == tk)
recycle_rp_inst(ri, &empty_rp);
}
- spin_unlock_irqrestore(&kretprobe_lock, flags);
-
+ kretprobe_table_unlock(hash, &flags);
+ INIT_HLIST_HEAD(&empty_rp);
hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
hlist_del(&ri->hlist);
kfree(ri);
@@ -423,24 +462,29 @@
struct kretprobe_instance *ri;
struct hlist_node *pos, *next;
- hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, uflist) {
- hlist_del(&ri->uflist);
+ hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, hlist) {
+ hlist_del(&ri->hlist);
kfree(ri);
}
}
static void __kprobes cleanup_rp_inst(struct kretprobe *rp)
{
- unsigned long flags;
+ unsigned long flags, hash;
struct kretprobe_instance *ri;
struct hlist_node *pos, *next;
+ struct hlist_head *head;
+
/* No race here */
- spin_lock_irqsave(&kretprobe_lock, flags);
- hlist_for_each_entry_safe(ri, pos, next, &rp->used_instances, uflist) {
- ri->rp = NULL;
- hlist_del(&ri->uflist);
+ for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) {
+ kretprobe_table_lock(hash, &flags);
+ head = &kretprobe_inst_table[hash];
+ hlist_for_each_entry_safe(ri, pos, next, head, hlist) {
+ if (ri->rp == rp)
+ ri->rp = NULL;
+ }
+ kretprobe_table_unlock(hash, &flags);
}
- spin_unlock_irqrestore(&kretprobe_lock, flags);
free_rp_inst(rp);
}
@@ -831,32 +875,37 @@
struct pt_regs *regs)
{
struct kretprobe *rp = container_of(p, struct kretprobe, kp);
- unsigned long flags = 0;
+ unsigned long hash, flags = 0;
+ struct kretprobe_instance *ri;
/*TODO: consider to only swap the RA after the last pre_handler fired */
- spin_lock_irqsave(&kretprobe_lock, flags);
+ hash = hash_ptr(current, KPROBE_HASH_BITS);
+ spin_lock_irqsave(&rp->lock, flags);
if (!hlist_empty(&rp->free_instances)) {
- struct kretprobe_instance *ri;
-
ri = hlist_entry(rp->free_instances.first,
- struct kretprobe_instance, uflist);
+ struct kretprobe_instance, hlist);
+ hlist_del(&ri->hlist);
+ spin_unlock_irqrestore(&rp->lock, flags);
+
ri->rp = rp;
ri->task = current;
if (rp->entry_handler && rp->entry_handler(ri, regs)) {
- spin_unlock_irqrestore(&kretprobe_lock, flags);
+ spin_unlock_irqrestore(&rp->lock, flags);
return 0;
}
arch_prepare_kretprobe(ri, regs);
/* XXX(hch): why is there no hlist_move_head? */
- hlist_del(&ri->uflist);
- hlist_add_head(&ri->uflist, &ri->rp->used_instances);
- hlist_add_head(&ri->hlist, kretprobe_inst_table_head(ri->task));
- } else
+ INIT_HLIST_NODE(&ri->hlist);
+ kretprobe_table_lock(hash, &flags);
+ hlist_add_head(&ri->hlist, &kretprobe_inst_table[hash]);
+ kretprobe_table_unlock(hash, &flags);
+ } else {
rp->nmissed++;
- spin_unlock_irqrestore(&kretprobe_lock, flags);
+ spin_unlock_irqrestore(&rp->lock, flags);
+ }
return 0;
}
@@ -892,7 +941,7 @@
rp->maxactive = NR_CPUS;
#endif
}
- INIT_HLIST_HEAD(&rp->used_instances);
+ spin_lock_init(&rp->lock);
INIT_HLIST_HEAD(&rp->free_instances);
for (i = 0; i < rp->maxactive; i++) {
inst = kmalloc(sizeof(struct kretprobe_instance) +
@@ -901,8 +950,8 @@
free_rp_inst(rp);
return -ENOMEM;
}
- INIT_HLIST_NODE(&inst->uflist);
- hlist_add_head(&inst->uflist, &rp->free_instances);
+ INIT_HLIST_NODE(&inst->hlist);
+ hlist_add_head(&inst->hlist, &rp->free_instances);
}
rp->nmissed = 0;
@@ -1009,6 +1058,7 @@
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
INIT_HLIST_HEAD(&kprobe_table[i]);
INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
+ spin_lock_init(&(kretprobe_table_locks[i].lock));
}
/*
@@ -1050,6 +1100,7 @@
err = arch_init_kprobes();
if (!err)
err = register_die_notifier(&kprobe_exceptions_nb);
+ kprobes_initialized = (err == 0);
if (!err)
init_test_probes();
@@ -1286,13 +1337,8 @@
EXPORT_SYMBOL_GPL(unregister_jprobe);
EXPORT_SYMBOL_GPL(register_jprobes);
EXPORT_SYMBOL_GPL(unregister_jprobes);
-#ifdef CONFIG_KPROBES
EXPORT_SYMBOL_GPL(jprobe_return);
-#endif
-
-#ifdef CONFIG_KPROBES
EXPORT_SYMBOL_GPL(register_kretprobe);
EXPORT_SYMBOL_GPL(unregister_kretprobe);
EXPORT_SYMBOL_GPL(register_kretprobes);
EXPORT_SYMBOL_GPL(unregister_kretprobes);
-#endif
diff --git a/kernel/marker.c b/kernel/marker.c
index 1abfb92..971da53 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -441,7 +441,7 @@
hlist_del(&e->hlist);
/* Make sure the call_rcu has been executed */
if (e->rcu_pending)
- rcu_barrier();
+ rcu_barrier_sched();
kfree(e);
return 0;
}
@@ -476,7 +476,7 @@
hlist_del(&(*entry)->hlist);
/* Make sure the call_rcu has been executed */
if ((*entry)->rcu_pending)
- rcu_barrier();
+ rcu_barrier_sched();
kfree(*entry);
*entry = e;
trace_mark(core_marker_format, "name %s format %s",
@@ -655,7 +655,7 @@
* make sure it's executed now.
*/
if (entry->rcu_pending)
- rcu_barrier();
+ rcu_barrier_sched();
old = marker_entry_add_probe(entry, probe, probe_private);
if (IS_ERR(old)) {
ret = PTR_ERR(old);
@@ -670,10 +670,7 @@
entry->rcu_pending = 1;
/* write rcu_pending before calling the RCU callback */
smp_wmb();
-#ifdef CONFIG_PREEMPT_RCU
- synchronize_sched(); /* Until we have the call_rcu_sched() */
-#endif
- call_rcu(&entry->rcu, free_old_closure);
+ call_rcu_sched(&entry->rcu, free_old_closure);
end:
mutex_unlock(&markers_mutex);
return ret;
@@ -704,7 +701,7 @@
if (!entry)
goto end;
if (entry->rcu_pending)
- rcu_barrier();
+ rcu_barrier_sched();
old = marker_entry_remove_probe(entry, probe, probe_private);
mutex_unlock(&markers_mutex);
marker_update_probes(); /* may update entry */
@@ -716,10 +713,7 @@
entry->rcu_pending = 1;
/* write rcu_pending before calling the RCU callback */
smp_wmb();
-#ifdef CONFIG_PREEMPT_RCU
- synchronize_sched(); /* Until we have the call_rcu_sched() */
-#endif
- call_rcu(&entry->rcu, free_old_closure);
+ call_rcu_sched(&entry->rcu, free_old_closure);
remove_marker(name); /* Ignore busy error message */
ret = 0;
end:
@@ -786,7 +780,7 @@
goto end;
}
if (entry->rcu_pending)
- rcu_barrier();
+ rcu_barrier_sched();
old = marker_entry_remove_probe(entry, NULL, probe_private);
mutex_unlock(&markers_mutex);
marker_update_probes(); /* may update entry */
@@ -797,10 +791,7 @@
entry->rcu_pending = 1;
/* write rcu_pending before calling the RCU callback */
smp_wmb();
-#ifdef CONFIG_PREEMPT_RCU
- synchronize_sched(); /* Until we have the call_rcu_sched() */
-#endif
- call_rcu(&entry->rcu, free_old_closure);
+ call_rcu_sched(&entry->rcu, free_old_closure);
remove_marker(entry->name); /* Ignore busy error message */
end:
mutex_unlock(&markers_mutex);
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c
index 48d7ed6..43c2111 100644
--- a/kernel/ns_cgroup.c
+++ b/kernel/ns_cgroup.c
@@ -7,6 +7,7 @@
#include <linux/module.h>
#include <linux/cgroup.h>
#include <linux/fs.h>
+#include <linux/proc_fs.h>
#include <linux/slab.h>
#include <linux/nsproxy.h>
@@ -24,9 +25,12 @@
struct ns_cgroup, css);
}
-int ns_cgroup_clone(struct task_struct *task)
+int ns_cgroup_clone(struct task_struct *task, struct pid *pid)
{
- return cgroup_clone(task, &ns_subsys);
+ char name[PROC_NUMBUF];
+
+ snprintf(name, PROC_NUMBUF, "%d", pid_vnr(pid));
+ return cgroup_clone(task, &ns_subsys, name);
}
/*
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index adc7851..21575fc 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -157,12 +157,6 @@
goto out;
}
- err = ns_cgroup_clone(tsk);
- if (err) {
- put_nsproxy(new_ns);
- goto out;
- }
-
tsk->nsproxy = new_ns;
out:
@@ -209,7 +203,7 @@
goto out;
}
- err = ns_cgroup_clone(current);
+ err = ns_cgroup_clone(current, task_pid(current));
if (err)
put_nsproxy(*new_nsp);
diff --git a/kernel/panic.c b/kernel/panic.c
index 425567f..12c5a0a 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -318,6 +318,28 @@
add_taint(TAINT_WARN);
}
EXPORT_SYMBOL(warn_on_slowpath);
+
+
+void warn_slowpath(const char *file, int line, const char *fmt, ...)
+{
+ va_list args;
+ char function[KSYM_SYMBOL_LEN];
+ unsigned long caller = (unsigned long)__builtin_return_address(0);
+ sprint_symbol(function, caller);
+
+ printk(KERN_WARNING "------------[ cut here ]------------\n");
+ printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file,
+ line, function);
+ va_start(args, fmt);
+ vprintk(fmt, args);
+ va_end(args);
+
+ print_modules();
+ dump_stack();
+ print_oops_end_marker();
+ add_taint(TAINT_WARN);
+}
+EXPORT_SYMBOL(warn_slowpath);
#endif
#ifdef CONFIG_CC_STACKPROTECTOR
diff --git a/kernel/pid.c b/kernel/pid.c
index 30bd5d4..064e76a 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -309,12 +309,6 @@
}
EXPORT_SYMBOL_GPL(find_vpid);
-struct pid *find_pid(int nr)
-{
- return find_pid_ns(nr, &init_pid_ns);
-}
-EXPORT_SYMBOL_GPL(find_pid);
-
/*
* attach_pid() must be called with the tasklist_lock write-held.
*/
@@ -435,6 +429,7 @@
return pid;
}
+EXPORT_SYMBOL_GPL(find_get_pid);
pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
{
@@ -482,7 +477,7 @@
/*
* Used by proc to find the first pid that is greater then or equal to nr.
*
- * If there is a pid at nr this function is exactly the same as find_pid.
+ * If there is a pid at nr this function is exactly the same as find_pid_ns.
*/
struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
{
@@ -497,7 +492,6 @@
return pid;
}
-EXPORT_SYMBOL_GPL(find_get_pid);
/*
* The pid hash table is scaled according to the amount of memory in the
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 98702b4..ea567b7 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -12,6 +12,7 @@
#include <linux/pid_namespace.h>
#include <linux/syscalls.h>
#include <linux/err.h>
+#include <linux/acct.h>
#define BITS_PER_PAGE (PAGE_SIZE*8)
@@ -71,7 +72,7 @@
struct pid_namespace *ns;
int i;
- ns = kmem_cache_alloc(pid_ns_cachep, GFP_KERNEL);
+ ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL);
if (ns == NULL)
goto out;
@@ -84,17 +85,13 @@
goto out_free_map;
kref_init(&ns->kref);
- ns->last_pid = 0;
- ns->child_reaper = NULL;
ns->level = level;
set_bit(0, ns->pidmap[0].page);
atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
- for (i = 1; i < PIDMAP_ENTRIES; i++) {
- ns->pidmap[i].page = NULL;
+ for (i = 1; i < PIDMAP_ENTRIES; i++)
atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
- }
return ns;
@@ -185,6 +182,7 @@
/* Child reaper for the pid namespace is going away */
pid_ns->child_reaper = NULL;
+ acct_exit_ns(pid_ns);
return;
}
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index dbd8398..9a21681 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -449,9 +449,6 @@
spin_unlock_irqrestore(&idr_lock, flags);
}
sigqueue_free(tmr->sigq);
- if (unlikely(tmr->it_process) &&
- tmr->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
- put_task_struct(tmr->it_process);
kmem_cache_free(posix_timers_cache, tmr);
}
@@ -856,11 +853,10 @@
* This keeps any tasks waiting on the spin lock from thinking
* they got something (see the lock code above).
*/
- if (timer->it_process) {
- if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
- put_task_struct(timer->it_process);
- timer->it_process = NULL;
- }
+ if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
+ put_task_struct(timer->it_process);
+ timer->it_process = NULL;
+
unlock_timer(timer, flags);
release_posix_timer(timer, IT_ID_SET);
return 0;
@@ -885,11 +881,10 @@
* This keeps any tasks waiting on the spin lock from thinking
* they got something (see the lock code above).
*/
- if (timer->it_process) {
- if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
- put_task_struct(timer->it_process);
- timer->it_process = NULL;
- }
+ if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
+ put_task_struct(timer->it_process);
+ timer->it_process = NULL;
+
unlock_timer(timer, flags);
release_posix_timer(timer, IT_ID_SET);
}
diff --git a/kernel/printk.c b/kernel/printk.c
index 3f7a2a9..a7f7559 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1308,6 +1308,8 @@
}
#if defined CONFIG_PRINTK
+
+DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10);
/*
* printk rate limiting, lifted from the networking subsystem.
*
@@ -1315,22 +1317,9 @@
* every printk_ratelimit_jiffies to make a denial-of-service
* attack impossible.
*/
-int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst)
-{
- return __ratelimit(ratelimit_jiffies, ratelimit_burst);
-}
-EXPORT_SYMBOL(__printk_ratelimit);
-
-/* minimum time in jiffies between messages */
-int printk_ratelimit_jiffies = 5 * HZ;
-
-/* number of messages we send before ratelimiting */
-int printk_ratelimit_burst = 10;
-
int printk_ratelimit(void)
{
- return __printk_ratelimit(printk_ratelimit_jiffies,
- printk_ratelimit_burst);
+ return __ratelimit(&printk_ratelimit_state);
}
EXPORT_SYMBOL(printk_ratelimit);
diff --git a/kernel/profile.c b/kernel/profile.c
index 5892641..cd26bed 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -112,8 +112,6 @@
/* Profile event notifications */
-#ifdef CONFIG_PROFILING
-
static BLOCKING_NOTIFIER_HEAD(task_exit_notifier);
static ATOMIC_NOTIFIER_HEAD(task_free_notifier);
static BLOCKING_NOTIFIER_HEAD(munmap_notifier);
@@ -203,8 +201,6 @@
}
EXPORT_SYMBOL_GPL(unregister_timer_hook);
-#endif /* CONFIG_PROFILING */
-
#ifdef CONFIG_SMP
/*
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index d3c61b4..f275c8e 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -13,6 +13,7 @@
#include <linux/slab.h>
#include <linux/res_counter.h>
#include <linux/uaccess.h>
+#include <linux/mm.h>
void res_counter_init(struct res_counter *counter)
{
@@ -102,44 +103,37 @@
return *res_counter_member(counter, member);
}
-ssize_t res_counter_write(struct res_counter *counter, int member,
- const char __user *userbuf, size_t nbytes, loff_t *pos,
- int (*write_strategy)(char *st_buf, unsigned long long *val))
+int res_counter_memparse_write_strategy(const char *buf,
+ unsigned long long *res)
{
- int ret;
- char *buf, *end;
+ char *end;
+ /* FIXME - make memparse() take const char* args */
+ *res = memparse((char *)buf, &end);
+ if (*end != '\0')
+ return -EINVAL;
+
+ *res = PAGE_ALIGN(*res);
+ return 0;
+}
+
+int res_counter_write(struct res_counter *counter, int member,
+ const char *buf, write_strategy_fn write_strategy)
+{
+ char *end;
unsigned long flags;
unsigned long long tmp, *val;
- buf = kmalloc(nbytes + 1, GFP_KERNEL);
- ret = -ENOMEM;
- if (buf == NULL)
- goto out;
-
- buf[nbytes] = '\0';
- ret = -EFAULT;
- if (copy_from_user(buf, userbuf, nbytes))
- goto out_free;
-
- ret = -EINVAL;
-
- strstrip(buf);
if (write_strategy) {
- if (write_strategy(buf, &tmp)) {
- goto out_free;
- }
+ if (write_strategy(buf, &tmp))
+ return -EINVAL;
} else {
tmp = simple_strtoull(buf, &end, 10);
if (*end != '\0')
- goto out_free;
+ return -EINVAL;
}
spin_lock_irqsave(&counter->lock, flags);
val = res_counter_member(counter, member);
*val = tmp;
spin_unlock_irqrestore(&counter->lock, flags);
- ret = nbytes;
-out_free:
- kfree(buf);
-out:
- return ret;
+ return 0;
}
diff --git a/kernel/sched.c b/kernel/sched.c
index 6acf749..0047bd9 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4046,6 +4046,8 @@
cpustat->nice = cputime64_add(cpustat->nice, tmp);
else
cpustat->user = cputime64_add(cpustat->user, tmp);
+ /* Account for user time used */
+ acct_update_integrals(p);
}
/*
diff --git a/kernel/signal.c b/kernel/signal.c
index 6c0958e..82c3545 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -338,13 +338,9 @@
spin_unlock_irqrestore(¤t->sighand->siglock, flags);
}
-static int collect_signal(int sig, struct sigpending *list, siginfo_t *info)
+static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
{
struct sigqueue *q, *first = NULL;
- int still_pending = 0;
-
- if (unlikely(!sigismember(&list->signal, sig)))
- return 0;
/*
* Collect the siginfo appropriate to this signal. Check if
@@ -352,33 +348,30 @@
*/
list_for_each_entry(q, &list->list, list) {
if (q->info.si_signo == sig) {
- if (first) {
- still_pending = 1;
- break;
- }
+ if (first)
+ goto still_pending;
first = q;
}
}
+
+ sigdelset(&list->signal, sig);
+
if (first) {
+still_pending:
list_del_init(&first->list);
copy_siginfo(info, &first->info);
__sigqueue_free(first);
- if (!still_pending)
- sigdelset(&list->signal, sig);
} else {
-
/* Ok, it wasn't in the queue. This must be
a fast-pathed signal or we must have been
out of queue space. So zero out the info.
*/
- sigdelset(&list->signal, sig);
info->si_signo = sig;
info->si_errno = 0;
info->si_code = 0;
info->si_pid = 0;
info->si_uid = 0;
}
- return 1;
}
static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
@@ -396,8 +389,7 @@
}
}
- if (!collect_signal(sig, pending, info))
- sig = 0;
+ collect_signal(sig, pending, info);
}
return sig;
@@ -462,8 +454,7 @@
* is to alert stop-signal processing code when another
* processor has come along and cleared the flag.
*/
- if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT))
- tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
+ tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
}
if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) {
/*
@@ -1125,7 +1116,7 @@
* is probably wrong. Should make it like BSD or SYSV.
*/
-static int kill_something_info(int sig, struct siginfo *info, int pid)
+static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
{
int ret;
@@ -1237,17 +1228,6 @@
}
EXPORT_SYMBOL(kill_pid);
-int
-kill_proc(pid_t pid, int sig, int priv)
-{
- int ret;
-
- rcu_read_lock();
- ret = kill_pid_info(sig, __si_special(priv), find_pid(pid));
- rcu_read_unlock();
- return ret;
-}
-
/*
* These functions support sending signals using preallocated sigqueue
* structures. This is needed "because realtime applications cannot
@@ -1379,10 +1359,9 @@
info.si_uid = tsk->uid;
- /* FIXME: find out whether or not this is supposed to be c*time. */
- info.si_utime = cputime_to_jiffies(cputime_add(tsk->utime,
+ info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime,
tsk->signal->utime));
- info.si_stime = cputime_to_jiffies(cputime_add(tsk->stime,
+ info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime,
tsk->signal->stime));
info.si_status = tsk->exit_code & 0x7f;
@@ -1450,9 +1429,8 @@
info.si_uid = tsk->uid;
- /* FIXME: find out whether or not this is supposed to be c*time. */
- info.si_utime = cputime_to_jiffies(tsk->utime);
- info.si_stime = cputime_to_jiffies(tsk->stime);
+ info.si_utime = cputime_to_clock_t(tsk->utime);
+ info.si_stime = cputime_to_clock_t(tsk->stime);
info.si_code = why;
switch (why) {
@@ -1491,10 +1469,10 @@
* is a deadlock situation, and pointless because our tracer
* is dead so don't allow us to stop.
* If SIGKILL was already sent before the caller unlocked
- * ->siglock we must see ->core_waiters != 0. Otherwise it
+ * ->siglock we must see ->core_state != NULL. Otherwise it
* is safe to enter schedule().
*/
- if (unlikely(current->mm->core_waiters) &&
+ if (unlikely(current->mm->core_state) &&
unlikely(current->mm == current->parent->mm))
return 0;
@@ -1507,9 +1485,8 @@
*/
static int sigkill_pending(struct task_struct *tsk)
{
- return ((sigismember(&tsk->pending.signal, SIGKILL) ||
- sigismember(&tsk->signal->shared_pending.signal, SIGKILL)) &&
- !unlikely(sigismember(&tsk->blocked, SIGKILL)));
+ return sigismember(&tsk->pending.signal, SIGKILL) ||
+ sigismember(&tsk->signal->shared_pending.signal, SIGKILL);
}
/*
@@ -1525,8 +1502,6 @@
*/
static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
{
- int killed = 0;
-
if (arch_ptrace_stop_needed(exit_code, info)) {
/*
* The arch code has something special to do before a
@@ -1542,7 +1517,8 @@
spin_unlock_irq(¤t->sighand->siglock);
arch_ptrace_stop(exit_code, info);
spin_lock_irq(¤t->sighand->siglock);
- killed = sigkill_pending(current);
+ if (sigkill_pending(current))
+ return;
}
/*
@@ -1559,7 +1535,7 @@
__set_current_state(TASK_TRACED);
spin_unlock_irq(¤t->sighand->siglock);
read_lock(&tasklist_lock);
- if (!unlikely(killed) && may_ptrace_stop()) {
+ if (may_ptrace_stop()) {
do_notify_parent_cldstop(current, CLD_TRAPPED);
read_unlock(&tasklist_lock);
schedule();
@@ -1658,8 +1634,7 @@
} else {
struct task_struct *t;
- if (unlikely((sig->flags & (SIGNAL_STOP_DEQUEUED | SIGNAL_UNKILLABLE))
- != SIGNAL_STOP_DEQUEUED) ||
+ if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
unlikely(signal_group_exit(sig)))
return 0;
/*
@@ -1920,7 +1895,6 @@
EXPORT_SYMBOL_GPL(dequeue_signal);
EXPORT_SYMBOL(flush_signals);
EXPORT_SYMBOL(force_sig);
-EXPORT_SYMBOL(kill_proc);
EXPORT_SYMBOL(ptrace_notify);
EXPORT_SYMBOL(send_sig);
EXPORT_SYMBOL(send_sig_info);
@@ -2196,7 +2170,7 @@
}
asmlinkage long
-sys_kill(int pid, int sig)
+sys_kill(pid_t pid, int sig)
{
struct siginfo info;
@@ -2209,7 +2183,7 @@
return kill_something_info(sig, &info, pid);
}
-static int do_tkill(int tgid, int pid, int sig)
+static int do_tkill(pid_t tgid, pid_t pid, int sig)
{
int error;
struct siginfo info;
@@ -2255,7 +2229,7 @@
* exists but it's not belonging to the target process anymore. This
* method solves the problem of threads exiting and PIDs getting reused.
*/
-asmlinkage long sys_tgkill(int tgid, int pid, int sig)
+asmlinkage long sys_tgkill(pid_t tgid, pid_t pid, int sig)
{
/* This is only valid for single tasks */
if (pid <= 0 || tgid <= 0)
@@ -2268,7 +2242,7 @@
* Send a signal to only one task, even if it's a CLONE_THREAD task.
*/
asmlinkage long
-sys_tkill(int pid, int sig)
+sys_tkill(pid_t pid, int sig)
{
/* This is only valid for single tasks */
if (pid <= 0)
@@ -2278,7 +2252,7 @@
}
asmlinkage long
-sys_rt_sigqueueinfo(int pid, int sig, siginfo_t __user *uinfo)
+sys_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t __user *uinfo)
{
siginfo_t info;
diff --git a/kernel/sys.c b/kernel/sys.c
index 14e9728..0c9d3fa 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1343,8 +1343,6 @@
DECLARE_RWSEM(uts_sem);
-EXPORT_SYMBOL(uts_sem);
-
asmlinkage long sys_newuname(struct new_utsname __user * name)
{
int errno = 0;
@@ -1795,7 +1793,7 @@
goto out;
}
- info = call_usermodehelper_setup(argv[0], argv, envp);
+ info = call_usermodehelper_setup(argv[0], argv, envp, GFP_ATOMIC);
if (info == NULL) {
argv_free(argv);
goto out;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index bd66ac5..55eca15 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -57,6 +57,7 @@
cond_syscall(sys_get_robust_list);
cond_syscall(compat_sys_get_robust_list);
cond_syscall(sys_epoll_create);
+cond_syscall(sys_epoll_create1);
cond_syscall(sys_epoll_ctl);
cond_syscall(sys_epoll_wait);
cond_syscall(sys_epoll_pwait);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 1a8299d..35a50db 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -624,7 +624,7 @@
{
.ctl_name = KERN_PRINTK_RATELIMIT,
.procname = "printk_ratelimit",
- .data = &printk_ratelimit_jiffies,
+ .data = &printk_ratelimit_state.interval,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
@@ -633,7 +633,7 @@
{
.ctl_name = KERN_PRINTK_RATELIMIT_BURST,
.procname = "printk_ratelimit_burst",
- .data = &printk_ratelimit_burst,
+ .data = &printk_ratelimit_state.burst,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c
index c09350d..c35da23a 100644
--- a/kernel/sysctl_check.c
+++ b/kernel/sysctl_check.c
@@ -1532,6 +1532,8 @@
sysctl_check_leaf(namespaces, table, &fail);
}
sysctl_check_bin_path(table, &fail);
+ if (table->mode > 0777)
+ set_fail(&fail, table, "bogus .mode");
if (fail) {
set_fail(&fail, table, NULL);
error = -EINVAL;
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 06b1754..bd6be76 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -35,7 +35,7 @@
*/
#define TASKSTATS_CPUMASK_MAXLEN (100+6*NR_CPUS)
-static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 };
+static DEFINE_PER_CPU(__u32, taskstats_seqnum);
static int family_registered;
struct kmem_cache *taskstats_cache;
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index 6352808..ce2d723 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -161,7 +161,7 @@
__trace_special(tr, data, 2, regs->ip, 0);
while (i < sample_max_depth) {
- frame.next_fp = 0;
+ frame.next_fp = NULL;
frame.return_address = 0;
if (!copy_stack_frame(fp, &frame))
break;
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 4ab1b58..3da47cc 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -28,14 +28,14 @@
void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
{
struct timespec uptime, ts;
- s64 ac_etime;
+ u64 ac_etime;
BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN);
/* calculate task elapsed time in timespec */
do_posix_clock_monotonic_gettime(&uptime);
ts = timespec_sub(uptime, tsk->start_time);
- /* rebase elapsed time to usec */
+ /* rebase elapsed time to usec (should never be negative) */
ac_etime = timespec_to_ns(&ts);
do_div(ac_etime, NSEC_PER_USEC);
stats->ac_etime = ac_etime;
@@ -84,9 +84,9 @@
{
struct mm_struct *mm;
- /* convert pages-jiffies to Mbyte-usec */
- stats->coremem = jiffies_to_usecs(p->acct_rss_mem1) * PAGE_SIZE / MB;
- stats->virtmem = jiffies_to_usecs(p->acct_vm_mem1) * PAGE_SIZE / MB;
+ /* convert pages-usec to Mbyte-usec */
+ stats->coremem = p->acct_rss_mem1 * PAGE_SIZE / MB;
+ stats->virtmem = p->acct_vm_mem1 * PAGE_SIZE / MB;
mm = get_task_mm(p);
if (mm) {
/* adjust to KB unit */
@@ -118,12 +118,19 @@
void acct_update_integrals(struct task_struct *tsk)
{
if (likely(tsk->mm)) {
- long delta = cputime_to_jiffies(
- cputime_sub(tsk->stime, tsk->acct_stimexpd));
+ cputime_t time, dtime;
+ struct timeval value;
+ u64 delta;
+
+ time = tsk->stime + tsk->utime;
+ dtime = cputime_sub(time, tsk->acct_timexpd);
+ jiffies_to_timeval(cputime_to_jiffies(dtime), &value);
+ delta = value.tv_sec;
+ delta = delta * USEC_PER_SEC + value.tv_usec;
if (delta == 0)
return;
- tsk->acct_stimexpd = tsk->stime;
+ tsk->acct_timexpd = time;
tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
}
@@ -135,7 +142,7 @@
*/
void acct_clear_integrals(struct task_struct *tsk)
{
- tsk->acct_stimexpd = 0;
+ tsk->acct_timexpd = 0;
tsk->acct_rss_mem1 = 0;
tsk->acct_vm_mem1 = 0;
}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 6fd158b..ec7e4f6 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -125,7 +125,7 @@
}
static void insert_work(struct cpu_workqueue_struct *cwq,
- struct work_struct *work, int tail)
+ struct work_struct *work, struct list_head *head)
{
set_wq_data(work, cwq);
/*
@@ -133,10 +133,7 @@
* result of list_add() below, see try_to_grab_pending().
*/
smp_wmb();
- if (tail)
- list_add_tail(&work->entry, &cwq->worklist);
- else
- list_add(&work->entry, &cwq->worklist);
+ list_add_tail(&work->entry, head);
wake_up(&cwq->more_work);
}
@@ -146,7 +143,7 @@
unsigned long flags;
spin_lock_irqsave(&cwq->lock, flags);
- insert_work(cwq, work, 1);
+ insert_work(cwq, work, &cwq->worklist);
spin_unlock_irqrestore(&cwq->lock, flags);
}
@@ -162,14 +159,11 @@
*/
int queue_work(struct workqueue_struct *wq, struct work_struct *work)
{
- int ret = 0;
+ int ret;
- if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
- BUG_ON(!list_empty(&work->entry));
- __queue_work(wq_per_cpu(wq, get_cpu()), work);
- put_cpu();
- ret = 1;
- }
+ ret = queue_work_on(get_cpu(), wq, work);
+ put_cpu();
+
return ret;
}
EXPORT_SYMBOL_GPL(queue_work);
@@ -361,14 +355,14 @@
}
static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
- struct wq_barrier *barr, int tail)
+ struct wq_barrier *barr, struct list_head *head)
{
INIT_WORK(&barr->work, wq_barrier_func);
__set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work));
init_completion(&barr->done);
- insert_work(cwq, &barr->work, tail);
+ insert_work(cwq, &barr->work, head);
}
static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
@@ -388,7 +382,7 @@
active = 0;
spin_lock_irq(&cwq->lock);
if (!list_empty(&cwq->worklist) || cwq->current_work != NULL) {
- insert_wq_barrier(cwq, &barr, 1);
+ insert_wq_barrier(cwq, &barr, &cwq->worklist);
active = 1;
}
spin_unlock_irq(&cwq->lock);
@@ -426,6 +420,57 @@
}
EXPORT_SYMBOL_GPL(flush_workqueue);
+/**
+ * flush_work - block until a work_struct's callback has terminated
+ * @work: the work which is to be flushed
+ *
+ * Returns false if @work has already terminated.
+ *
+ * It is expected that, prior to calling flush_work(), the caller has
+ * arranged for the work to not be requeued, otherwise it doesn't make
+ * sense to use this function.
+ */
+int flush_work(struct work_struct *work)
+{
+ struct cpu_workqueue_struct *cwq;
+ struct list_head *prev;
+ struct wq_barrier barr;
+
+ might_sleep();
+ cwq = get_wq_data(work);
+ if (!cwq)
+ return 0;
+
+ lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+ lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_);
+
+ prev = NULL;
+ spin_lock_irq(&cwq->lock);
+ if (!list_empty(&work->entry)) {
+ /*
+ * See the comment near try_to_grab_pending()->smp_rmb().
+ * If it was re-queued under us we are not going to wait.
+ */
+ smp_rmb();
+ if (unlikely(cwq != get_wq_data(work)))
+ goto out;
+ prev = &work->entry;
+ } else {
+ if (cwq->current_work != work)
+ goto out;
+ prev = &cwq->worklist;
+ }
+ insert_wq_barrier(cwq, &barr, prev->next);
+out:
+ spin_unlock_irq(&cwq->lock);
+ if (!prev)
+ return 0;
+
+ wait_for_completion(&barr.done);
+ return 1;
+}
+EXPORT_SYMBOL_GPL(flush_work);
+
/*
* Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
* so this work can't be re-armed in any way.
@@ -473,7 +518,7 @@
spin_lock_irq(&cwq->lock);
if (unlikely(cwq->current_work == work)) {
- insert_wq_barrier(cwq, &barr, 0);
+ insert_wq_barrier(cwq, &barr, cwq->worklist.next);
running = 1;
}
spin_unlock_irq(&cwq->lock);
@@ -644,10 +689,10 @@
struct work_struct *work = per_cpu_ptr(works, cpu);
INIT_WORK(work, func);
- set_bit(WORK_STRUCT_PENDING, work_data_bits(work));
- __queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu), work);
+ schedule_work_on(cpu, work);
}
- flush_workqueue(keventd_wq);
+ for_each_online_cpu(cpu)
+ flush_work(per_cpu_ptr(works, cpu));
put_online_cpus();
free_percpu(works);
return 0;
@@ -784,7 +829,7 @@
err = create_workqueue_thread(cwq, singlethread_cpu);
start_workqueue_thread(cwq, -1);
} else {
- get_online_cpus();
+ cpu_maps_update_begin();
spin_lock(&workqueue_lock);
list_add(&wq->list, &workqueues);
spin_unlock(&workqueue_lock);
@@ -796,7 +841,7 @@
err = create_workqueue_thread(cwq, cpu);
start_workqueue_thread(cwq, cpu);
}
- put_online_cpus();
+ cpu_maps_update_done();
}
if (err) {
@@ -810,8 +855,8 @@
static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
{
/*
- * Our caller is either destroy_workqueue() or CPU_DEAD,
- * get_online_cpus() protects cwq->thread.
+ * Our caller is either destroy_workqueue() or CPU_POST_DEAD,
+ * cpu_add_remove_lock protects cwq->thread.
*/
if (cwq->thread == NULL)
return;
@@ -821,7 +866,7 @@
flush_cpu_workqueue(cwq);
/*
- * If the caller is CPU_DEAD and cwq->worklist was not empty,
+ * If the caller is CPU_POST_DEAD and cwq->worklist was not empty,
* a concurrent flush_workqueue() can insert a barrier after us.
* However, in that case run_workqueue() won't return and check
* kthread_should_stop() until it flushes all work_struct's.
@@ -845,14 +890,14 @@
const cpumask_t *cpu_map = wq_cpu_map(wq);
int cpu;
- get_online_cpus();
+ cpu_maps_update_begin();
spin_lock(&workqueue_lock);
list_del(&wq->list);
spin_unlock(&workqueue_lock);
for_each_cpu_mask_nr(cpu, *cpu_map)
cleanup_workqueue_thread(per_cpu_ptr(wq->cpu_wq, cpu));
- put_online_cpus();
+ cpu_maps_update_done();
free_percpu(wq->cpu_wq);
kfree(wq);
@@ -866,6 +911,7 @@
unsigned int cpu = (unsigned long)hcpu;
struct cpu_workqueue_struct *cwq;
struct workqueue_struct *wq;
+ int ret = NOTIFY_OK;
action &= ~CPU_TASKS_FROZEN;
@@ -873,7 +919,7 @@
case CPU_UP_PREPARE:
cpu_set(cpu, cpu_populated_map);
}
-
+undo:
list_for_each_entry(wq, &workqueues, list) {
cwq = per_cpu_ptr(wq->cpu_wq, cpu);
@@ -883,7 +929,9 @@
break;
printk(KERN_ERR "workqueue [%s] for %i failed\n",
wq->name, cpu);
- return NOTIFY_BAD;
+ action = CPU_UP_CANCELED;
+ ret = NOTIFY_BAD;
+ goto undo;
case CPU_ONLINE:
start_workqueue_thread(cwq, cpu);
@@ -891,7 +939,7 @@
case CPU_UP_CANCELED:
start_workqueue_thread(cwq, -1);
- case CPU_DEAD:
+ case CPU_POST_DEAD:
cleanup_workqueue_thread(cwq);
break;
}
@@ -899,11 +947,11 @@
switch (action) {
case CPU_UP_CANCELED:
- case CPU_DEAD:
+ case CPU_POST_DEAD:
cpu_clear(cpu, cpu_populated_map);
}
- return NOTIFY_OK;
+ return ret;
}
void __init init_workqueues(void)
diff --git a/lib/cmdline.c b/lib/cmdline.c
index f596c08d..5ba8a94 100644
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -116,7 +116,7 @@
/**
* memparse - parse a string with mem suffixes into a number
* @ptr: Where parse begins
- * @retptr: (output) Pointer to next char after parse completes
+ * @retptr: (output) Optional pointer to next char after parse completes
*
* Parses a string into a number. The number stored at @ptr is
* potentially suffixed with %K (for kilobytes, or 1024 bytes),
@@ -126,11 +126,13 @@
* megabyte, or one gigabyte, respectively.
*/
-unsigned long long memparse (char *ptr, char **retptr)
+unsigned long long memparse(char *ptr, char **retptr)
{
- unsigned long long ret = simple_strtoull (ptr, retptr, 0);
+ char *endptr; /* local pointer to end of parsed string */
- switch (**retptr) {
+ unsigned long long ret = simple_strtoull(ptr, &endptr, 0);
+
+ switch (*endptr) {
case 'G':
case 'g':
ret <<= 10;
@@ -140,10 +142,14 @@
case 'K':
case 'k':
ret <<= 10;
- (*retptr)++;
+ endptr++;
default:
break;
}
+
+ if (retptr)
+ *retptr = endptr;
+
return ret;
}
diff --git a/lib/idr.c b/lib/idr.c
index 7a02e17..3476f82 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -6,6 +6,8 @@
* Modified by George Anzinger to reuse immediately and to use
* find bit instructions. Also removed _irq on spinlocks.
*
+ * Modified by Nadia Derbey to make it RCU safe.
+ *
* Small id to pointer translation service.
*
* It uses a radix tree like structure as a sparse array indexed
@@ -35,7 +37,7 @@
static struct kmem_cache *idr_layer_cache;
-static struct idr_layer *alloc_layer(struct idr *idp)
+static struct idr_layer *get_from_free_list(struct idr *idp)
{
struct idr_layer *p;
unsigned long flags;
@@ -50,15 +52,28 @@
return(p);
}
+static void idr_layer_rcu_free(struct rcu_head *head)
+{
+ struct idr_layer *layer;
+
+ layer = container_of(head, struct idr_layer, rcu_head);
+ kmem_cache_free(idr_layer_cache, layer);
+}
+
+static inline void free_layer(struct idr_layer *p)
+{
+ call_rcu(&p->rcu_head, idr_layer_rcu_free);
+}
+
/* only called when idp->lock is held */
-static void __free_layer(struct idr *idp, struct idr_layer *p)
+static void __move_to_free_list(struct idr *idp, struct idr_layer *p)
{
p->ary[0] = idp->id_free;
idp->id_free = p;
idp->id_free_cnt++;
}
-static void free_layer(struct idr *idp, struct idr_layer *p)
+static void move_to_free_list(struct idr *idp, struct idr_layer *p)
{
unsigned long flags;
@@ -66,7 +81,7 @@
* Depends on the return element being zeroed.
*/
spin_lock_irqsave(&idp->lock, flags);
- __free_layer(idp, p);
+ __move_to_free_list(idp, p);
spin_unlock_irqrestore(&idp->lock, flags);
}
@@ -96,7 +111,7 @@
* @gfp_mask: memory allocation flags
*
* This function should be called prior to locking and calling the
- * following function. It preallocates enough memory to satisfy
+ * idr_get_new* functions. It preallocates enough memory to satisfy
* the worst possible allocation.
*
* If the system is REALLY out of memory this function returns 0,
@@ -109,7 +124,7 @@
new = kmem_cache_alloc(idr_layer_cache, gfp_mask);
if (new == NULL)
return (0);
- free_layer(idp, new);
+ move_to_free_list(idp, new);
}
return 1;
}
@@ -143,7 +158,7 @@
/* if already at the top layer, we need to grow */
if (!(p = pa[l])) {
*starting_id = id;
- return -2;
+ return IDR_NEED_TO_GROW;
}
/* If we need to go up one layer, continue the
@@ -160,16 +175,17 @@
id = ((id >> sh) ^ n ^ m) << sh;
}
if ((id >= MAX_ID_BIT) || (id < 0))
- return -3;
+ return IDR_NOMORE_SPACE;
if (l == 0)
break;
/*
* Create the layer below if it is missing.
*/
if (!p->ary[m]) {
- if (!(new = alloc_layer(idp)))
+ new = get_from_free_list(idp);
+ if (!new)
return -1;
- p->ary[m] = new;
+ rcu_assign_pointer(p->ary[m], new);
p->count++;
}
pa[l--] = p;
@@ -192,7 +208,7 @@
p = idp->top;
layers = idp->layers;
if (unlikely(!p)) {
- if (!(p = alloc_layer(idp)))
+ if (!(p = get_from_free_list(idp)))
return -1;
layers = 1;
}
@@ -204,7 +220,7 @@
layers++;
if (!p->count)
continue;
- if (!(new = alloc_layer(idp))) {
+ if (!(new = get_from_free_list(idp))) {
/*
* The allocation failed. If we built part of
* the structure tear it down.
@@ -214,7 +230,7 @@
p = p->ary[0];
new->ary[0] = NULL;
new->bitmap = new->count = 0;
- __free_layer(idp, new);
+ __move_to_free_list(idp, new);
}
spin_unlock_irqrestore(&idp->lock, flags);
return -1;
@@ -225,10 +241,10 @@
__set_bit(0, &new->bitmap);
p = new;
}
- idp->top = p;
+ rcu_assign_pointer(idp->top, p);
idp->layers = layers;
v = sub_alloc(idp, &id, pa);
- if (v == -2)
+ if (v == IDR_NEED_TO_GROW)
goto build_up;
return(v);
}
@@ -244,7 +260,8 @@
* Successfully found an empty slot. Install the user
* pointer and mark the slot full.
*/
- pa[0]->ary[id & IDR_MASK] = (struct idr_layer *)ptr;
+ rcu_assign_pointer(pa[0]->ary[id & IDR_MASK],
+ (struct idr_layer *)ptr);
pa[0]->count++;
idr_mark_full(pa, id);
}
@@ -277,12 +294,8 @@
* This is a cheap hack until the IDR code can be fixed to
* return proper error values.
*/
- if (rv < 0) {
- if (rv == -1)
- return -EAGAIN;
- else /* Will be -3 */
- return -ENOSPC;
- }
+ if (rv < 0)
+ return _idr_rc_to_errno(rv);
*id = rv;
return 0;
}
@@ -312,12 +325,8 @@
* This is a cheap hack until the IDR code can be fixed to
* return proper error values.
*/
- if (rv < 0) {
- if (rv == -1)
- return -EAGAIN;
- else /* Will be -3 */
- return -ENOSPC;
- }
+ if (rv < 0)
+ return _idr_rc_to_errno(rv);
*id = rv;
return 0;
}
@@ -325,7 +334,8 @@
static void idr_remove_warning(int id)
{
- printk("idr_remove called for id=%d which is not allocated.\n", id);
+ printk(KERN_WARNING
+ "idr_remove called for id=%d which is not allocated.\n", id);
dump_stack();
}
@@ -334,6 +344,7 @@
struct idr_layer *p = idp->top;
struct idr_layer **pa[MAX_LEVEL];
struct idr_layer ***paa = &pa[0];
+ struct idr_layer *to_free;
int n;
*paa = NULL;
@@ -349,13 +360,18 @@
n = id & IDR_MASK;
if (likely(p != NULL && test_bit(n, &p->bitmap))){
__clear_bit(n, &p->bitmap);
- p->ary[n] = NULL;
+ rcu_assign_pointer(p->ary[n], NULL);
+ to_free = NULL;
while(*paa && ! --((**paa)->count)){
- free_layer(idp, **paa);
+ if (to_free)
+ free_layer(to_free);
+ to_free = **paa;
**paa-- = NULL;
}
if (!*paa)
idp->layers = 0;
+ if (to_free)
+ free_layer(to_free);
} else
idr_remove_warning(id);
}
@@ -368,22 +384,34 @@
void idr_remove(struct idr *idp, int id)
{
struct idr_layer *p;
+ struct idr_layer *to_free;
/* Mask off upper bits we don't use for the search. */
id &= MAX_ID_MASK;
sub_remove(idp, (idp->layers - 1) * IDR_BITS, id);
if (idp->top && idp->top->count == 1 && (idp->layers > 1) &&
- idp->top->ary[0]) { // We can drop a layer
-
+ idp->top->ary[0]) {
+ /*
+ * Single child at leftmost slot: we can shrink the tree.
+ * This level is not needed anymore since when layers are
+ * inserted, they are inserted at the top of the existing
+ * tree.
+ */
+ to_free = idp->top;
p = idp->top->ary[0];
- idp->top->bitmap = idp->top->count = 0;
- free_layer(idp, idp->top);
- idp->top = p;
+ rcu_assign_pointer(idp->top, p);
--idp->layers;
+ to_free->bitmap = to_free->count = 0;
+ free_layer(to_free);
}
while (idp->id_free_cnt >= IDR_FREE_MAX) {
- p = alloc_layer(idp);
+ p = get_from_free_list(idp);
+ /*
+ * Note: we don't call the rcu callback here, since the only
+ * layers that fall into the freelist are those that have been
+ * preallocated.
+ */
kmem_cache_free(idr_layer_cache, p);
}
return;
@@ -424,15 +452,13 @@
id += 1 << n;
while (n < fls(id)) {
- if (p) {
- memset(p, 0, sizeof *p);
- free_layer(idp, p);
- }
+ if (p)
+ free_layer(p);
n += IDR_BITS;
p = *--paa;
}
}
- idp->top = NULL;
+ rcu_assign_pointer(idp->top, NULL);
idp->layers = 0;
}
EXPORT_SYMBOL(idr_remove_all);
@@ -444,7 +470,7 @@
void idr_destroy(struct idr *idp)
{
while (idp->id_free_cnt) {
- struct idr_layer *p = alloc_layer(idp);
+ struct idr_layer *p = get_from_free_list(idp);
kmem_cache_free(idr_layer_cache, p);
}
}
@@ -459,7 +485,8 @@
* return indicates that @id is not valid or you passed %NULL in
* idr_get_new().
*
- * The caller must serialize idr_find() vs idr_get_new() and idr_remove().
+ * This function can be called under rcu_read_lock(), given that the leaf
+ * pointers lifetimes are correctly managed.
*/
void *idr_find(struct idr *idp, int id)
{
@@ -467,7 +494,7 @@
struct idr_layer *p;
n = idp->layers * IDR_BITS;
- p = idp->top;
+ p = rcu_dereference(idp->top);
/* Mask off upper bits we don't use for the search. */
id &= MAX_ID_MASK;
@@ -477,7 +504,7 @@
while (n > 0 && p) {
n -= IDR_BITS;
- p = p->ary[(id >> n) & IDR_MASK];
+ p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]);
}
return((void *)p);
}
@@ -510,7 +537,7 @@
struct idr_layer **paa = &pa[0];
n = idp->layers * IDR_BITS;
- p = idp->top;
+ p = rcu_dereference(idp->top);
max = 1 << n;
id = 0;
@@ -518,7 +545,7 @@
while (n > 0 && p) {
n -= IDR_BITS;
*paa++ = p;
- p = p->ary[(id >> n) & IDR_MASK];
+ p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]);
}
if (p) {
@@ -548,7 +575,7 @@
* A -ENOENT return indicates that @id was not found.
* A -EINVAL return indicates that @id was not within valid constraints.
*
- * The caller must serialize vs idr_find(), idr_get_new(), and idr_remove().
+ * The caller must serialize with writers.
*/
void *idr_replace(struct idr *idp, void *ptr, int id)
{
@@ -574,7 +601,7 @@
return ERR_PTR(-ENOENT);
old_p = p->ary[n];
- p->ary[n] = ptr;
+ rcu_assign_pointer(p->ary[n], ptr);
return old_p;
}
@@ -694,12 +721,8 @@
restart:
/* get vacant slot */
t = idr_get_empty_slot(&ida->idr, idr_id, pa);
- if (t < 0) {
- if (t == -1)
- return -EAGAIN;
- else /* will be -3 */
- return -ENOSPC;
- }
+ if (t < 0)
+ return _idr_rc_to_errno(t);
if (t * IDA_BITMAP_BITS >= MAX_ID_BIT)
return -ENOSPC;
@@ -720,7 +743,8 @@
return -EAGAIN;
memset(bitmap, 0, sizeof(struct ida_bitmap));
- pa[0]->ary[idr_id & IDR_MASK] = (void *)bitmap;
+ rcu_assign_pointer(pa[0]->ary[idr_id & IDR_MASK],
+ (void *)bitmap);
pa[0]->count++;
}
@@ -749,7 +773,7 @@
* allocation.
*/
if (ida->idr.id_free_cnt || ida->free_bitmap) {
- struct idr_layer *p = alloc_layer(&ida->idr);
+ struct idr_layer *p = get_from_free_list(&ida->idr);
if (p)
kmem_cache_free(idr_layer_cache, p);
}
diff --git a/lib/inflate.c b/lib/inflate.c
index 9762294..1a8e8a9 100644
--- a/lib/inflate.c
+++ b/lib/inflate.c
@@ -230,6 +230,45 @@
#define NEEDBITS(n) {while(k<(n)){b|=((ulg)NEXTBYTE())<<k;k+=8;}}
#define DUMPBITS(n) {b>>=(n);k-=(n);}
+#ifndef NO_INFLATE_MALLOC
+/* A trivial malloc implementation, adapted from
+ * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
+ */
+
+static unsigned long malloc_ptr;
+static int malloc_count;
+
+static void *malloc(int size)
+{
+ void *p;
+
+ if (size < 0)
+ error("Malloc error");
+ if (!malloc_ptr)
+ malloc_ptr = free_mem_ptr;
+
+ malloc_ptr = (malloc_ptr + 3) & ~3; /* Align */
+
+ p = (void *)malloc_ptr;
+ malloc_ptr += size;
+
+ if (free_mem_end_ptr && malloc_ptr >= free_mem_end_ptr)
+ error("Out of memory");
+
+ malloc_count++;
+ return p;
+}
+
+static void free(void *where)
+{
+ malloc_count--;
+ if (!malloc_count)
+ malloc_ptr = free_mem_ptr;
+}
+#else
+#define malloc(a) kmalloc(a, GFP_KERNEL)
+#define free(a) kfree(a)
+#endif
/*
Huffman code decoding is performed using a multi-level table lookup.
@@ -1045,7 +1084,6 @@
int e; /* last block flag */
int r; /* result code */
unsigned h; /* maximum struct huft's malloc'ed */
- void *ptr;
/* initialize window, bit buffer */
wp = 0;
@@ -1057,12 +1095,12 @@
h = 0;
do {
hufts = 0;
- gzip_mark(&ptr);
- if ((r = inflate_block(&e)) != 0) {
- gzip_release(&ptr);
- return r;
- }
- gzip_release(&ptr);
+#ifdef ARCH_HAS_DECOMP_WDOG
+ arch_decomp_wdog();
+#endif
+ r = inflate_block(&e);
+ if (r)
+ return r;
if (hufts > h)
h = hufts;
} while (!e);
diff --git a/lib/kobject.c b/lib/kobject.c
index 7444015..bd732ff 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -164,9 +164,8 @@
return -ENOENT;
if (!kobj->name || !kobj->name[0]) {
- pr_debug("kobject: (%p): attempted to be registered with empty "
+ WARN(1, "kobject: (%p): attempted to be registered with empty "
"name!\n", kobj);
- WARN_ON(1);
return -EINVAL;
}
@@ -583,12 +582,10 @@
void kobject_put(struct kobject *kobj)
{
if (kobj) {
- if (!kobj->state_initialized) {
- printk(KERN_WARNING "kobject: '%s' (%p): is not "
+ if (!kobj->state_initialized)
+ WARN(1, KERN_WARNING "kobject: '%s' (%p): is not "
"initialized, yet kobject_put() is being "
"called.\n", kobject_name(kobj), kobj);
- WARN_ON(1);
- }
kref_put(&kobj->kref, kobject_release);
}
}
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 4350ba9..1a39f4e 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -20,18 +20,14 @@
struct list_head *prev,
struct list_head *next)
{
- if (unlikely(next->prev != prev)) {
- printk(KERN_ERR "list_add corruption. next->prev should be "
- "prev (%p), but was %p. (next=%p).\n",
- prev, next->prev, next);
- BUG();
- }
- if (unlikely(prev->next != next)) {
- printk(KERN_ERR "list_add corruption. prev->next should be "
- "next (%p), but was %p. (prev=%p).\n",
- next, prev->next, prev);
- BUG();
- }
+ WARN(next->prev != prev,
+ "list_add corruption. next->prev should be "
+ "prev (%p), but was %p. (next=%p).\n",
+ prev, next->prev, next);
+ WARN(prev->next != next,
+ "list_add corruption. prev->next should be "
+ "next (%p), but was %p. (prev=%p).\n",
+ next, prev->next, prev);
next->prev = new;
new->next = next;
new->prev = prev;
@@ -40,20 +36,6 @@
EXPORT_SYMBOL(__list_add);
/**
- * list_add - add a new entry
- * @new: new entry to be added
- * @head: list head to add it after
- *
- * Insert a new entry after the specified head.
- * This is good for implementing stacks.
- */
-void list_add(struct list_head *new, struct list_head *head)
-{
- __list_add(new, head, head->next);
-}
-EXPORT_SYMBOL(list_add);
-
-/**
* list_del - deletes entry from list.
* @entry: the element to delete from the list.
* Note: list_empty on entry does not return true after this, the entry is
@@ -61,16 +43,12 @@
*/
void list_del(struct list_head *entry)
{
- if (unlikely(entry->prev->next != entry)) {
- printk(KERN_ERR "list_del corruption. prev->next should be %p, "
- "but was %p\n", entry, entry->prev->next);
- BUG();
- }
- if (unlikely(entry->next->prev != entry)) {
- printk(KERN_ERR "list_del corruption. next->prev should be %p, "
- "but was %p\n", entry, entry->next->prev);
- BUG();
- }
+ WARN(entry->prev->next != entry,
+ "list_del corruption. prev->next should be %p, "
+ "but was %p\n", entry, entry->prev->next);
+ WARN(entry->next->prev != entry,
+ "list_del corruption. next->prev should be %p, "
+ "but was %p\n", entry, entry->next->prev);
__list_del(entry->prev, entry->next);
entry->next = LIST_POISON1;
entry->prev = LIST_POISON2;
diff --git a/lib/lzo/lzo1x_decompress.c b/lib/lzo/lzo1x_decompress.c
index 77f0f9b..5dc6b29 100644
--- a/lib/lzo/lzo1x_decompress.c
+++ b/lib/lzo/lzo1x_decompress.c
@@ -138,8 +138,7 @@
t += 31 + *ip++;
}
m_pos = op - 1;
- m_pos -= le16_to_cpu(get_unaligned(
- (const unsigned short *)ip)) >> 2;
+ m_pos -= get_unaligned_le16(ip) >> 2;
ip += 2;
} else if (t >= 16) {
m_pos = op;
@@ -157,8 +156,7 @@
}
t += 7 + *ip++;
}
- m_pos -= le16_to_cpu(get_unaligned(
- (const unsigned short *)ip)) >> 2;
+ m_pos -= get_unaligned_le16(ip) >> 2;
ip += 2;
if (m_pos == op)
goto eof_found;
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 485e304..3513667 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -3,6 +3,9 @@
*
* Isolated from kernel/printk.c by Dave Young <hidave.darkstar@gmail.com>
*
+ * 2008-05-01 rewrite the function and use a ratelimit_state data struct as
+ * parameter. Now every user can use their own standalone ratelimit_state.
+ *
* This file is released under the GPLv2.
*
*/
@@ -11,41 +14,43 @@
#include <linux/jiffies.h>
#include <linux/module.h>
+static DEFINE_SPINLOCK(ratelimit_lock);
+static unsigned long flags;
+
/*
* __ratelimit - rate limiting
- * @ratelimit_jiffies: minimum time in jiffies between two callbacks
- * @ratelimit_burst: number of callbacks we do before ratelimiting
+ * @rs: ratelimit_state data
*
- * This enforces a rate limit: not more than @ratelimit_burst callbacks
- * in every ratelimit_jiffies
+ * This enforces a rate limit: not more than @rs->ratelimit_burst callbacks
+ * in every @rs->ratelimit_jiffies
*/
-int __ratelimit(int ratelimit_jiffies, int ratelimit_burst)
+int __ratelimit(struct ratelimit_state *rs)
{
- static DEFINE_SPINLOCK(ratelimit_lock);
- static unsigned toks = 10 * 5 * HZ;
- static unsigned long last_msg;
- static int missed;
- unsigned long flags;
- unsigned long now = jiffies;
+ if (!rs->interval)
+ return 1;
spin_lock_irqsave(&ratelimit_lock, flags);
- toks += now - last_msg;
- last_msg = now;
- if (toks > (ratelimit_burst * ratelimit_jiffies))
- toks = ratelimit_burst * ratelimit_jiffies;
- if (toks >= ratelimit_jiffies) {
- int lost = missed;
+ if (!rs->begin)
+ rs->begin = jiffies;
- missed = 0;
- toks -= ratelimit_jiffies;
- spin_unlock_irqrestore(&ratelimit_lock, flags);
- if (lost)
- printk(KERN_WARNING "%s: %d messages suppressed\n",
- __func__, lost);
- return 1;
+ if (time_is_before_jiffies(rs->begin + rs->interval)) {
+ if (rs->missed)
+ printk(KERN_WARNING "%s: %d callbacks suppressed\n",
+ __func__, rs->missed);
+ rs->begin = 0;
+ rs->printed = 0;
+ rs->missed = 0;
}
- missed++;
+ if (rs->burst && rs->burst > rs->printed)
+ goto print;
+
+ rs->missed++;
spin_unlock_irqrestore(&ratelimit_lock, flags);
return 0;
+
+print:
+ rs->printed++;
+ spin_unlock_irqrestore(&ratelimit_lock, flags);
+ return 1;
}
EXPORT_SYMBOL(__ratelimit);
diff --git a/mm/filemap.c b/mm/filemap.c
index 7675b91..2d3ec1ff 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -115,7 +115,7 @@
{
struct address_space *mapping = page->mapping;
- mem_cgroup_uncharge_page(page);
+ mem_cgroup_uncharge_cache_page(page);
radix_tree_delete(&mapping->page_tree, page->index);
page->mapping = NULL;
mapping->nrpages--;
@@ -474,12 +474,12 @@
mapping->nrpages++;
__inc_zone_page_state(page, NR_FILE_PAGES);
} else
- mem_cgroup_uncharge_page(page);
+ mem_cgroup_uncharge_cache_page(page);
write_unlock_irq(&mapping->tree_lock);
radix_tree_preload_end();
} else
- mem_cgroup_uncharge_page(page);
+ mem_cgroup_uncharge_cache_page(page);
out:
return error;
}
@@ -2563,9 +2563,8 @@
* Otherwise return zero.
*
* The @gfp_mask argument specifies whether I/O may be performed to release
- * this page (__GFP_IO), and whether the call may block (__GFP_WAIT).
+ * this page (__GFP_IO), and whether the call may block (__GFP_WAIT & __GFP_FS).
*
- * NOTE: @gfp_mask may go away, and this function may become non-blocking.
*/
int try_to_release_page(struct page *page, gfp_t gfp_mask)
{
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e46451e..fba566c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -35,9 +35,9 @@
#include <asm/uaccess.h>
-struct cgroup_subsys mem_cgroup_subsys;
-static const int MEM_CGROUP_RECLAIM_RETRIES = 5;
-static struct kmem_cache *page_cgroup_cache;
+struct cgroup_subsys mem_cgroup_subsys __read_mostly;
+static struct kmem_cache *page_cgroup_cache __read_mostly;
+#define MEM_CGROUP_RECLAIM_RETRIES 5
/*
* Statistics for memory cgroup.
@@ -166,7 +166,6 @@
struct list_head lru; /* per cgroup LRU list */
struct page *page;
struct mem_cgroup *mem_cgroup;
- int ref_cnt; /* cached, mapped, migrating */
int flags;
};
#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */
@@ -185,6 +184,7 @@
enum charge_type {
MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
MEM_CGROUP_CHARGE_TYPE_MAPPED,
+ MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */
};
/*
@@ -296,7 +296,7 @@
MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1;
mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, false);
- list_del_init(&pc->lru);
+ list_del(&pc->lru);
}
static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz,
@@ -354,6 +354,9 @@
struct mem_cgroup_per_zone *mz;
unsigned long flags;
+ if (mem_cgroup_subsys.disabled)
+ return;
+
/*
* We cannot lock_page_cgroup while holding zone's lru_lock,
* because other holders of lock_page_cgroup can be interrupted
@@ -524,7 +527,8 @@
* < 0 if the cgroup is over its limit
*/
static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask, enum charge_type ctype)
+ gfp_t gfp_mask, enum charge_type ctype,
+ struct mem_cgroup *memcg)
{
struct mem_cgroup *mem;
struct page_cgroup *pc;
@@ -532,35 +536,8 @@
unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
struct mem_cgroup_per_zone *mz;
- if (mem_cgroup_subsys.disabled)
- return 0;
-
- /*
- * Should page_cgroup's go to their own slab?
- * One could optimize the performance of the charging routine
- * by saving a bit in the page_flags and using it as a lock
- * to see if the cgroup page already has a page_cgroup associated
- * with it
- */
-retry:
- lock_page_cgroup(page);
- pc = page_get_page_cgroup(page);
- /*
- * The page_cgroup exists and
- * the page has already been accounted.
- */
- if (pc) {
- VM_BUG_ON(pc->page != page);
- VM_BUG_ON(pc->ref_cnt <= 0);
-
- pc->ref_cnt++;
- unlock_page_cgroup(page);
- goto done;
- }
- unlock_page_cgroup(page);
-
- pc = kmem_cache_zalloc(page_cgroup_cache, gfp_mask);
- if (pc == NULL)
+ pc = kmem_cache_alloc(page_cgroup_cache, gfp_mask);
+ if (unlikely(pc == NULL))
goto err;
/*
@@ -569,16 +546,18 @@
* thread group leader migrates. It's possible that mm is not
* set, if so charge the init_mm (happens for pagecache usage).
*/
- if (!mm)
- mm = &init_mm;
-
- rcu_read_lock();
- mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
- /*
- * For every charge from the cgroup, increment reference count
- */
- css_get(&mem->css);
- rcu_read_unlock();
+ if (likely(!memcg)) {
+ rcu_read_lock();
+ mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
+ /*
+ * For every charge from the cgroup, increment reference count
+ */
+ css_get(&mem->css);
+ rcu_read_unlock();
+ } else {
+ mem = memcg;
+ css_get(&memcg->css);
+ }
while (res_counter_charge(&mem->res, PAGE_SIZE)) {
if (!(gfp_mask & __GFP_WAIT))
@@ -603,25 +582,24 @@
}
}
- pc->ref_cnt = 1;
pc->mem_cgroup = mem;
pc->page = page;
- pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
+ /*
+ * If a page is accounted as a page cache, insert to inactive list.
+ * If anon, insert to active list.
+ */
if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE)
pc->flags = PAGE_CGROUP_FLAG_CACHE;
+ else
+ pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
lock_page_cgroup(page);
- if (page_get_page_cgroup(page)) {
+ if (unlikely(page_get_page_cgroup(page))) {
unlock_page_cgroup(page);
- /*
- * Another charge has been added to this page already.
- * We take lock_page_cgroup(page) again and read
- * page->cgroup, increment refcnt.... just retry is OK.
- */
res_counter_uncharge(&mem->res, PAGE_SIZE);
css_put(&mem->css);
kmem_cache_free(page_cgroup_cache, pc);
- goto retry;
+ goto done;
}
page_assign_page_cgroup(page, pc);
@@ -642,24 +620,65 @@
int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
{
+ if (mem_cgroup_subsys.disabled)
+ return 0;
+
+ /*
+ * If already mapped, we don't have to account.
+ * If page cache, page->mapping has address_space.
+ * But page->mapping may have out-of-use anon_vma pointer,
+ * detecit it by PageAnon() check. newly-mapped-anon's page->mapping
+ * is NULL.
+ */
+ if (page_mapped(page) || (page->mapping && !PageAnon(page)))
+ return 0;
+ if (unlikely(!mm))
+ mm = &init_mm;
return mem_cgroup_charge_common(page, mm, gfp_mask,
- MEM_CGROUP_CHARGE_TYPE_MAPPED);
+ MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
}
int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask)
{
- if (!mm)
+ if (mem_cgroup_subsys.disabled)
+ return 0;
+
+ /*
+ * Corner case handling. This is called from add_to_page_cache()
+ * in usual. But some FS (shmem) precharges this page before calling it
+ * and call add_to_page_cache() with GFP_NOWAIT.
+ *
+ * For GFP_NOWAIT case, the page may be pre-charged before calling
+ * add_to_page_cache(). (See shmem.c) check it here and avoid to call
+ * charge twice. (It works but has to pay a bit larger cost.)
+ */
+ if (!(gfp_mask & __GFP_WAIT)) {
+ struct page_cgroup *pc;
+
+ lock_page_cgroup(page);
+ pc = page_get_page_cgroup(page);
+ if (pc) {
+ VM_BUG_ON(pc->page != page);
+ VM_BUG_ON(!pc->mem_cgroup);
+ unlock_page_cgroup(page);
+ return 0;
+ }
+ unlock_page_cgroup(page);
+ }
+
+ if (unlikely(!mm))
mm = &init_mm;
+
return mem_cgroup_charge_common(page, mm, gfp_mask,
- MEM_CGROUP_CHARGE_TYPE_CACHE);
+ MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
}
/*
- * Uncharging is always a welcome operation, we never complain, simply
- * uncharge.
+ * uncharge if !page_mapped(page)
*/
-void mem_cgroup_uncharge_page(struct page *page)
+static void
+__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
{
struct page_cgroup *pc;
struct mem_cgroup *mem;
@@ -674,74 +693,15 @@
*/
lock_page_cgroup(page);
pc = page_get_page_cgroup(page);
- if (!pc)
+ if (unlikely(!pc))
goto unlock;
VM_BUG_ON(pc->page != page);
- VM_BUG_ON(pc->ref_cnt <= 0);
- if (--(pc->ref_cnt) == 0) {
- mz = page_cgroup_zoneinfo(pc);
- spin_lock_irqsave(&mz->lru_lock, flags);
- __mem_cgroup_remove_list(mz, pc);
- spin_unlock_irqrestore(&mz->lru_lock, flags);
-
- page_assign_page_cgroup(page, NULL);
- unlock_page_cgroup(page);
-
- mem = pc->mem_cgroup;
- res_counter_uncharge(&mem->res, PAGE_SIZE);
- css_put(&mem->css);
-
- kmem_cache_free(page_cgroup_cache, pc);
- return;
- }
-
-unlock:
- unlock_page_cgroup(page);
-}
-
-/*
- * Returns non-zero if a page (under migration) has valid page_cgroup member.
- * Refcnt of page_cgroup is incremented.
- */
-int mem_cgroup_prepare_migration(struct page *page)
-{
- struct page_cgroup *pc;
-
- if (mem_cgroup_subsys.disabled)
- return 0;
-
- lock_page_cgroup(page);
- pc = page_get_page_cgroup(page);
- if (pc)
- pc->ref_cnt++;
- unlock_page_cgroup(page);
- return pc != NULL;
-}
-
-void mem_cgroup_end_migration(struct page *page)
-{
- mem_cgroup_uncharge_page(page);
-}
-
-/*
- * We know both *page* and *newpage* are now not-on-LRU and PG_locked.
- * And no race with uncharge() routines because page_cgroup for *page*
- * has extra one reference by mem_cgroup_prepare_migration.
- */
-void mem_cgroup_page_migration(struct page *page, struct page *newpage)
-{
- struct page_cgroup *pc;
- struct mem_cgroup_per_zone *mz;
- unsigned long flags;
-
- lock_page_cgroup(page);
- pc = page_get_page_cgroup(page);
- if (!pc) {
- unlock_page_cgroup(page);
- return;
- }
+ if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
+ && ((pc->flags & PAGE_CGROUP_FLAG_CACHE)
+ || page_mapped(page)))
+ goto unlock;
mz = page_cgroup_zoneinfo(pc);
spin_lock_irqsave(&mz->lru_lock, flags);
@@ -751,21 +711,133 @@
page_assign_page_cgroup(page, NULL);
unlock_page_cgroup(page);
- pc->page = newpage;
- lock_page_cgroup(newpage);
- page_assign_page_cgroup(newpage, pc);
+ mem = pc->mem_cgroup;
+ res_counter_uncharge(&mem->res, PAGE_SIZE);
+ css_put(&mem->css);
- mz = page_cgroup_zoneinfo(pc);
- spin_lock_irqsave(&mz->lru_lock, flags);
- __mem_cgroup_add_list(mz, pc);
- spin_unlock_irqrestore(&mz->lru_lock, flags);
+ kmem_cache_free(page_cgroup_cache, pc);
+ return;
+unlock:
+ unlock_page_cgroup(page);
+}
- unlock_page_cgroup(newpage);
+void mem_cgroup_uncharge_page(struct page *page)
+{
+ __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED);
+}
+
+void mem_cgroup_uncharge_cache_page(struct page *page)
+{
+ VM_BUG_ON(page_mapped(page));
+ __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
}
/*
+ * Before starting migration, account against new page.
+ */
+int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
+{
+ struct page_cgroup *pc;
+ struct mem_cgroup *mem = NULL;
+ enum charge_type ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
+ int ret = 0;
+
+ if (mem_cgroup_subsys.disabled)
+ return 0;
+
+ lock_page_cgroup(page);
+ pc = page_get_page_cgroup(page);
+ if (pc) {
+ mem = pc->mem_cgroup;
+ css_get(&mem->css);
+ if (pc->flags & PAGE_CGROUP_FLAG_CACHE)
+ ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
+ }
+ unlock_page_cgroup(page);
+ if (mem) {
+ ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL,
+ ctype, mem);
+ css_put(&mem->css);
+ }
+ return ret;
+}
+
+/* remove redundant charge if migration failed*/
+void mem_cgroup_end_migration(struct page *newpage)
+{
+ /*
+ * At success, page->mapping is not NULL.
+ * special rollback care is necessary when
+ * 1. at migration failure. (newpage->mapping is cleared in this case)
+ * 2. the newpage was moved but not remapped again because the task
+ * exits and the newpage is obsolete. In this case, the new page
+ * may be a swapcache. So, we just call mem_cgroup_uncharge_page()
+ * always for avoiding mess. The page_cgroup will be removed if
+ * unnecessary. File cache pages is still on radix-tree. Don't
+ * care it.
+ */
+ if (!newpage->mapping)
+ __mem_cgroup_uncharge_common(newpage,
+ MEM_CGROUP_CHARGE_TYPE_FORCE);
+ else if (PageAnon(newpage))
+ mem_cgroup_uncharge_page(newpage);
+}
+
+/*
+ * A call to try to shrink memory usage under specified resource controller.
+ * This is typically used for page reclaiming for shmem for reducing side
+ * effect of page allocation from shmem, which is used by some mem_cgroup.
+ */
+int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
+{
+ struct mem_cgroup *mem;
+ int progress = 0;
+ int retry = MEM_CGROUP_RECLAIM_RETRIES;
+
+ if (mem_cgroup_subsys.disabled)
+ return 0;
+
+ rcu_read_lock();
+ mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
+ css_get(&mem->css);
+ rcu_read_unlock();
+
+ do {
+ progress = try_to_free_mem_cgroup_pages(mem, gfp_mask);
+ } while (!progress && --retry);
+
+ css_put(&mem->css);
+ if (!retry)
+ return -ENOMEM;
+ return 0;
+}
+
+int mem_cgroup_resize_limit(struct mem_cgroup *memcg, unsigned long long val)
+{
+
+ int retry_count = MEM_CGROUP_RECLAIM_RETRIES;
+ int progress;
+ int ret = 0;
+
+ while (res_counter_set_limit(&memcg->res, val)) {
+ if (signal_pending(current)) {
+ ret = -EINTR;
+ break;
+ }
+ if (!retry_count) {
+ ret = -EBUSY;
+ break;
+ }
+ progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL);
+ if (!progress)
+ retry_count--;
+ }
+ return ret;
+}
+
+
+/*
* This routine traverse page_cgroup in given list and drop them all.
- * This routine ignores page_cgroup->ref_cnt.
* *And* this routine doesn't reclaim page itself, just removes page_cgroup.
*/
#define FORCE_UNCHARGE_BATCH (128)
@@ -790,12 +862,20 @@
page = pc->page;
get_page(page);
spin_unlock_irqrestore(&mz->lru_lock, flags);
- mem_cgroup_uncharge_page(page);
- put_page(page);
- if (--count <= 0) {
- count = FORCE_UNCHARGE_BATCH;
+ /*
+ * Check if this page is on LRU. !LRU page can be found
+ * if it's under page migration.
+ */
+ if (PageLRU(page)) {
+ __mem_cgroup_uncharge_common(page,
+ MEM_CGROUP_CHARGE_TYPE_FORCE);
+ put_page(page);
+ if (--count <= 0) {
+ count = FORCE_UNCHARGE_BATCH;
+ cond_resched();
+ }
+ } else
cond_resched();
- }
spin_lock_irqsave(&mz->lru_lock, flags);
}
spin_unlock_irqrestore(&mz->lru_lock, flags);
@@ -810,9 +890,6 @@
int ret = -EBUSY;
int node, zid;
- if (mem_cgroup_subsys.disabled)
- return 0;
-
css_get(&mem->css);
/*
* page reclaim code (kswapd etc..) will move pages between
@@ -838,32 +915,34 @@
return ret;
}
-static int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp)
-{
- *tmp = memparse(buf, &buf);
- if (*buf != '\0')
- return -EINVAL;
-
- /*
- * Round up the value to the closest page size
- */
- *tmp = ((*tmp + PAGE_SIZE - 1) >> PAGE_SHIFT) << PAGE_SHIFT;
- return 0;
-}
-
static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
{
return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res,
cft->private);
}
-
-static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
- struct file *file, const char __user *userbuf,
- size_t nbytes, loff_t *ppos)
+/*
+ * The user of this function is...
+ * RES_LIMIT.
+ */
+static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
+ const char *buffer)
{
- return res_counter_write(&mem_cgroup_from_cont(cont)->res,
- cft->private, userbuf, nbytes, ppos,
- mem_cgroup_write_strategy);
+ struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+ unsigned long long val;
+ int ret;
+
+ switch (cft->private) {
+ case RES_LIMIT:
+ /* This function does all necessary parse...reuse it */
+ ret = res_counter_memparse_write_strategy(buffer, &val);
+ if (!ret)
+ ret = mem_cgroup_resize_limit(memcg, val);
+ break;
+ default:
+ ret = -EINVAL; /* should be BUG() ? */
+ break;
+ }
+ return ret;
}
static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
@@ -940,7 +1019,7 @@
{
.name = "limit_in_bytes",
.private = RES_LIMIT,
- .write = mem_cgroup_write,
+ .write_string = mem_cgroup_write,
.read_u64 = mem_cgroup_read,
},
{
@@ -1070,8 +1149,6 @@
static int mem_cgroup_populate(struct cgroup_subsys *ss,
struct cgroup *cont)
{
- if (mem_cgroup_subsys.disabled)
- return 0;
return cgroup_add_files(cont, ss, mem_cgroup_files,
ARRAY_SIZE(mem_cgroup_files));
}
@@ -1084,9 +1161,6 @@
struct mm_struct *mm;
struct mem_cgroup *mem, *old_mem;
- if (mem_cgroup_subsys.disabled)
- return;
-
mm = get_task_mm(p);
if (mm == NULL)
return;
diff --git a/mm/migrate.c b/mm/migrate.c
index 376cceb..d8c65a6 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -358,6 +358,9 @@
__inc_zone_page_state(newpage, NR_FILE_PAGES);
write_unlock_irq(&mapping->tree_lock);
+ if (!PageSwapCache(newpage)) {
+ mem_cgroup_uncharge_cache_page(page);
+ }
return 0;
}
@@ -611,7 +614,6 @@
rc = fallback_migrate_page(mapping, newpage, page);
if (!rc) {
- mem_cgroup_page_migration(page, newpage);
remove_migration_ptes(page, newpage);
} else
newpage->mapping = NULL;
@@ -641,6 +643,14 @@
/* page was freed from under us. So we are done. */
goto move_newpage;
+ charge = mem_cgroup_prepare_migration(page, newpage);
+ if (charge == -ENOMEM) {
+ rc = -ENOMEM;
+ goto move_newpage;
+ }
+ /* prepare cgroup just returns 0 or -ENOMEM */
+ BUG_ON(charge);
+
rc = -EAGAIN;
if (TestSetPageLocked(page)) {
if (!force)
@@ -692,19 +702,14 @@
goto rcu_unlock;
}
- charge = mem_cgroup_prepare_migration(page);
/* Establish migration ptes or remove ptes */
try_to_unmap(page, 1);
if (!page_mapped(page))
rc = move_to_new_page(newpage, page);
- if (rc) {
+ if (rc)
remove_migration_ptes(page, page);
- if (charge)
- mem_cgroup_end_migration(page);
- } else if (charge)
- mem_cgroup_end_migration(newpage);
rcu_unlock:
if (rcu_locked)
rcu_read_unlock();
@@ -725,6 +730,8 @@
}
move_newpage:
+ if (!charge)
+ mem_cgroup_end_migration(newpage);
/*
* Move the new page to the LRU. If migration was not successful
* then this will free the page.
diff --git a/mm/pdflush.c b/mm/pdflush.c
index 9d834aa..0cbe0c6 100644
--- a/mm/pdflush.c
+++ b/mm/pdflush.c
@@ -130,7 +130,7 @@
* Thread creation: For how long have there been zero
* available threads?
*/
- if (jiffies - last_empty_jifs > 1 * HZ) {
+ if (time_after(jiffies, last_empty_jifs + 1 * HZ)) {
/* unlocked list_empty() test is OK here */
if (list_empty(&pdflush_list)) {
/* unlocked test is OK here */
@@ -151,7 +151,7 @@
if (nr_pdflush_threads <= MIN_PDFLUSH_THREADS)
continue;
pdf = list_entry(pdflush_list.prev, struct pdflush_work, list);
- if (jiffies - pdf->when_i_went_to_sleep > 1 * HZ) {
+ if (time_after(jiffies, pdf->when_i_went_to_sleep + 1 * HZ)) {
/* Limit exit rate */
pdf->when_i_went_to_sleep = jiffies;
break; /* exeunt */
diff --git a/mm/rmap.c b/mm/rmap.c
index bf0a5b7..abbd29f 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -576,14 +576,8 @@
VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
if (atomic_inc_and_test(&page->_mapcount))
__page_set_anon_rmap(page, vma, address);
- else {
+ else
__page_check_anon_rmap(page, vma, address);
- /*
- * We unconditionally charged during prepare, we uncharge here
- * This takes care of balancing the reference counts
- */
- mem_cgroup_uncharge_page(page);
- }
}
/**
@@ -614,12 +608,6 @@
{
if (atomic_inc_and_test(&page->_mapcount))
__inc_zone_page_state(page, NR_FILE_MAPPED);
- else
- /*
- * We unconditionally charged during prepare, we uncharge here
- * This takes care of balancing the reference counts
- */
- mem_cgroup_uncharge_page(page);
}
#ifdef CONFIG_DEBUG_VM
diff --git a/mm/shmem.c b/mm/shmem.c
index 9ffbea9..f92fea9 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -922,20 +922,26 @@
error = 1;
if (!inode)
goto out;
- /* Precharge page while we can wait, compensate afterwards */
+ /* Precharge page using GFP_KERNEL while we can wait */
error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
if (error)
goto out;
error = radix_tree_preload(GFP_KERNEL);
- if (error)
- goto uncharge;
+ if (error) {
+ mem_cgroup_uncharge_cache_page(page);
+ goto out;
+ }
error = 1;
spin_lock(&info->lock);
ptr = shmem_swp_entry(info, idx, NULL);
- if (ptr && ptr->val == entry.val)
+ if (ptr && ptr->val == entry.val) {
error = add_to_page_cache(page, inode->i_mapping,
idx, GFP_NOWAIT);
+ /* does mem_cgroup_uncharge_cache_page on error */
+ } else /* we must compensate for our precharge above */
+ mem_cgroup_uncharge_cache_page(page);
+
if (error == -EEXIST) {
struct page *filepage = find_get_page(inode->i_mapping, idx);
error = 1;
@@ -961,8 +967,6 @@
shmem_swp_unmap(ptr);
spin_unlock(&info->lock);
radix_tree_preload_end();
-uncharge:
- mem_cgroup_uncharge_page(page);
out:
unlock_page(page);
page_cache_release(page);
@@ -1311,17 +1315,14 @@
shmem_swp_unmap(entry);
spin_unlock(&info->lock);
unlock_page(swappage);
+ page_cache_release(swappage);
if (error == -ENOMEM) {
/* allow reclaim from this memory cgroup */
- error = mem_cgroup_cache_charge(swappage,
- current->mm, gfp & ~__GFP_HIGHMEM);
- if (error) {
- page_cache_release(swappage);
+ error = mem_cgroup_shrink_usage(current->mm,
+ gfp);
+ if (error)
goto failed;
- }
- mem_cgroup_uncharge_page(swappage);
}
- page_cache_release(swappage);
goto repeat;
}
} else if (sgp == SGP_READ && !filepage) {
@@ -1358,6 +1359,8 @@
}
if (!filepage) {
+ int ret;
+
spin_unlock(&info->lock);
filepage = shmem_alloc_page(gfp, info, idx);
if (!filepage) {
@@ -1386,10 +1389,18 @@
swap = *entry;
shmem_swp_unmap(entry);
}
- if (error || swap.val || 0 != add_to_page_cache_lru(
- filepage, mapping, idx, GFP_NOWAIT)) {
+ ret = error || swap.val;
+ if (ret)
+ mem_cgroup_uncharge_cache_page(filepage);
+ else
+ ret = add_to_page_cache_lru(filepage, mapping,
+ idx, GFP_NOWAIT);
+ /*
+ * At add_to_page_cache_lru() failure, uncharge will
+ * be done automatically.
+ */
+ if (ret) {
spin_unlock(&info->lock);
- mem_cgroup_uncharge_page(filepage);
page_cache_release(filepage);
shmem_unacct_blocks(info->flags, 1);
shmem_free_blocks(inode, 1);
@@ -1398,7 +1409,6 @@
goto failed;
goto repeat;
}
- mem_cgroup_uncharge_page(filepage);
info->flags |= SHMEM_PAGEIN;
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 967d30c..26672c6 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -38,6 +38,7 @@
#include <linux/kthread.h>
#include <linux/freezer.h>
#include <linux/memcontrol.h>
+#include <linux/delayacct.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -1316,6 +1317,8 @@
struct zone *zone;
enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
+ delayacct_freepages_start();
+
if (scan_global_lru(sc))
count_vm_event(ALLOCSTALL);
/*
@@ -1396,6 +1399,8 @@
} else
mem_cgroup_record_reclaim_priority(sc->mem_cgroup, priority);
+ delayacct_freepages_end();
+
return ret;
}
diff --git a/net/802/psnap.c b/net/802/psnap.c
index ea46439..b3cfe5a 100644
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -31,11 +31,9 @@
*/
static struct datalink_proto *find_snap_client(unsigned char *desc)
{
- struct list_head *entry;
struct datalink_proto *proto = NULL, *p;
- list_for_each_rcu(entry, &snap_list) {
- p = list_entry(entry, struct datalink_proto, node);
+ list_for_each_entry_rcu(p, &snap_list, node) {
if (!memcmp(p->type, desc, 5)) {
proto = p;
break;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index a570e2a..f686467 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -67,7 +67,7 @@
{
.ctl_name = NET_CORE_MSG_COST,
.procname = "message_cost",
- .data = &net_msg_cost,
+ .data = &net_ratelimit_state.interval,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
@@ -76,7 +76,7 @@
{
.ctl_name = NET_CORE_MSG_BURST,
.procname = "message_burst",
- .data = &net_msg_burst,
+ .data = &net_ratelimit_state.burst,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
diff --git a/net/core/utils.c b/net/core/utils.c
index 8031eb5..72e0ebe 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -31,17 +31,16 @@
#include <asm/system.h>
#include <asm/uaccess.h>
-int net_msg_cost __read_mostly = 5*HZ;
-int net_msg_burst __read_mostly = 10;
int net_msg_warn __read_mostly = 1;
EXPORT_SYMBOL(net_msg_warn);
+DEFINE_RATELIMIT_STATE(net_ratelimit_state, 5 * HZ, 10);
/*
* All net warning printk()s should be guarded by this function.
*/
int net_ratelimit(void)
{
- return __printk_ratelimit(net_msg_cost, net_msg_burst);
+ return __ratelimit(&net_ratelimit_state);
}
EXPORT_SYMBOL(net_ratelimit);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index dd919d8..f440a9f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -264,7 +264,6 @@
static int inet_create(struct net *net, struct socket *sock, int protocol)
{
struct sock *sk;
- struct list_head *p;
struct inet_protosw *answer;
struct inet_sock *inet;
struct proto *answer_prot;
@@ -281,13 +280,12 @@
sock->state = SS_UNCONNECTED;
/* Look for the requested type/protocol pair. */
- answer = NULL;
lookup_protocol:
err = -ESOCKTNOSUPPORT;
rcu_read_lock();
- list_for_each_rcu(p, &inetsw[sock->type]) {
- answer = list_entry(p, struct inet_protosw, list);
+ list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {
+ err = 0;
/* Check the non-wild match. */
if (protocol == answer->protocol) {
if (protocol != IPPROTO_IP)
@@ -302,10 +300,9 @@
break;
}
err = -EPROTONOSUPPORT;
- answer = NULL;
}
- if (unlikely(answer == NULL)) {
+ if (unlikely(err)) {
if (try_loading_module < 2) {
rcu_read_unlock();
/*
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3d828bc..60461ad 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -83,7 +83,6 @@
struct inet_sock *inet;
struct ipv6_pinfo *np;
struct sock *sk;
- struct list_head *p;
struct inet_protosw *answer;
struct proto *answer_prot;
unsigned char answer_flags;
@@ -97,13 +96,12 @@
build_ehash_secret();
/* Look for the requested type/protocol pair. */
- answer = NULL;
lookup_protocol:
err = -ESOCKTNOSUPPORT;
rcu_read_lock();
- list_for_each_rcu(p, &inetsw6[sock->type]) {
- answer = list_entry(p, struct inet_protosw, list);
+ list_for_each_entry_rcu(answer, &inetsw6[sock->type], list) {
+ err = 0;
/* Check the non-wild match. */
if (protocol == answer->protocol) {
if (protocol != IPPROTO_IP)
@@ -118,10 +116,9 @@
break;
}
err = -EPROTONOSUPPORT;
- answer = NULL;
}
- if (!answer) {
+ if (err) {
if (try_loading_module < 2) {
rcu_read_unlock();
/*
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 007c1a6..63ada43 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -35,8 +35,22 @@
return &namespaces->net_ns->sysctl_table_headers;
}
+/* Return standard mode bits for table entry. */
+static int net_ctl_permissions(struct ctl_table_root *root,
+ struct nsproxy *nsproxy,
+ struct ctl_table *table)
+{
+ /* Allow network administrator to have same access as root. */
+ if (capable(CAP_NET_ADMIN)) {
+ int mode = (table->mode >> 6) & 7;
+ return (mode << 6) | (mode << 3) | mode;
+ }
+ return table->mode;
+}
+
static struct ctl_table_root net_sysctl_root = {
.lookup = net_ctl_header_lookup,
+ .permissions = net_ctl_permissions,
};
static LIST_HEAD(net_sysctl_ro_tables);
diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl
index 340ad69..3eca625 100755
--- a/scripts/checkstack.pl
+++ b/scripts/checkstack.pl
@@ -26,12 +26,17 @@
# $& (whole re) matches the complete objdump line with the stack growth
# $1 (first bracket) matches the size of the stack growth
#
+# $dre is similar, but for dynamic stack redutions:
+# $& (whole re) matches the complete objdump line with the stack growth
+# $1 (first bracket) matches the dynamic amount of the stack growth
+#
# use anything else and feel the pain ;)
-my (@stack, $re, $x, $xs);
+my (@stack, $re, $dre, $x, $xs);
{
my $arch = shift;
if ($arch eq "") {
$arch = `uname -m`;
+ chomp($arch);
}
$x = "[0-9a-f]"; # hex character
@@ -46,9 +51,11 @@
} elsif ($arch =~ /^i[3456]86$/) {
#c0105234: 81 ec ac 05 00 00 sub $0x5ac,%esp
$re = qr/^.*[as][du][db] \$(0x$x{1,8}),\%esp$/o;
+ $dre = qr/^.*[as][du][db] (%.*),\%esp$/o;
} elsif ($arch eq 'x86_64') {
# 2f60: 48 81 ec e8 05 00 00 sub $0x5e8,%rsp
$re = qr/^.*[as][du][db] \$(0x$x{1,8}),\%rsp$/o;
+ $dre = qr/^.*[as][du][db] (\%.*),\%rsp$/o;
} elsif ($arch eq 'ia64') {
#e0000000044011fc: 01 0f fc 8c adds r12=-384,r12
$re = qr/.*adds.*r12=-(([0-9]{2}|[3-9])[0-9]{2}),r12/o;
@@ -85,7 +92,7 @@
# 0: 00 e8 38 01 LINK 0x4e0;
$re = qr/.*[[:space:]]LINK[[:space:]]*(0x$x{1,8})/o;
} else {
- print("wrong or unknown architecture\n");
+ print("wrong or unknown architecture \"$arch\"\n");
exit
}
}
@@ -141,6 +148,22 @@
next if ($size < 100);
push @stack, "$intro$size\n";
}
+ elsif (defined $dre && $line =~ m/$dre/) {
+ my $size = "Dynamic ($1)";
+
+ next if $line !~ m/^($xs*)/;
+ my $addr = $1;
+ $addr =~ s/ /0/g;
+ $addr = "0x$addr";
+
+ my $intro = "$addr $func [$file]:";
+ my $padlen = 56 - length($intro);
+ while ($padlen > 0) {
+ $intro .= ' ';
+ $padlen -= 8;
+ }
+ push @stack, "$intro$size\n";
+ }
}
print sort bysize @stack;
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index ddd92ce..7bd296c 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -41,6 +41,7 @@
short type;
short access;
struct list_head list;
+ struct rcu_head rcu;
};
struct dev_cgroup {
@@ -59,6 +60,11 @@
return css_to_devcgroup(cgroup_subsys_state(cgroup, devices_subsys_id));
}
+static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
+{
+ return css_to_devcgroup(task_subsys_state(task, devices_subsys_id));
+}
+
struct cgroup_subsys devices_subsys;
static int devcgroup_can_attach(struct cgroup_subsys *ss,
@@ -128,11 +134,19 @@
}
if (whcopy != NULL)
- list_add_tail(&whcopy->list, &dev_cgroup->whitelist);
+ list_add_tail_rcu(&whcopy->list, &dev_cgroup->whitelist);
spin_unlock(&dev_cgroup->lock);
return 0;
}
+static void whitelist_item_free(struct rcu_head *rcu)
+{
+ struct dev_whitelist_item *item;
+
+ item = container_of(rcu, struct dev_whitelist_item, rcu);
+ kfree(item);
+}
+
/*
* called under cgroup_lock()
* since the list is visible to other tasks, we need the spinlock also
@@ -156,8 +170,8 @@
remove:
walk->access &= ~wh->access;
if (!walk->access) {
- list_del(&walk->list);
- kfree(walk);
+ list_del_rcu(&walk->list);
+ call_rcu(&walk->rcu, whitelist_item_free);
}
}
spin_unlock(&dev_cgroup->lock);
@@ -188,7 +202,7 @@
}
wh->minor = wh->major = ~0;
wh->type = DEV_ALL;
- wh->access = ACC_MKNOD | ACC_READ | ACC_WRITE;
+ wh->access = ACC_MASK;
list_add(&wh->list, &dev_cgroup->whitelist);
} else {
parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup);
@@ -250,11 +264,10 @@
static void set_majmin(char *str, unsigned m)
{
- memset(str, 0, MAJMINLEN);
if (m == ~0)
- sprintf(str, "*");
+ strcpy(str, "*");
else
- snprintf(str, MAJMINLEN, "%u", m);
+ sprintf(str, "%u", m);
}
static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft,
@@ -264,15 +277,15 @@
struct dev_whitelist_item *wh;
char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN];
- spin_lock(&devcgroup->lock);
- list_for_each_entry(wh, &devcgroup->whitelist, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(wh, &devcgroup->whitelist, list) {
set_access(acc, wh->access);
set_majmin(maj, wh->major);
set_majmin(min, wh->minor);
seq_printf(m, "%c %s:%s %s\n", type_to_char(wh->type),
maj, min, acc);
}
- spin_unlock(&devcgroup->lock);
+ rcu_read_unlock();
return 0;
}
@@ -312,10 +325,10 @@
* when adding a new allow rule to a device whitelist, the rule
* must be allowed in the parent device
*/
-static int parent_has_perm(struct cgroup *childcg,
+static int parent_has_perm(struct dev_cgroup *childcg,
struct dev_whitelist_item *wh)
{
- struct cgroup *pcg = childcg->parent;
+ struct cgroup *pcg = childcg->css.cgroup->parent;
struct dev_cgroup *parent;
int ret;
@@ -341,39 +354,19 @@
* new access is only allowed if you're in the top-level cgroup, or your
* parent cgroup has the access you're asking for.
*/
-static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft,
- struct file *file, const char __user *userbuf,
- size_t nbytes, loff_t *ppos)
+static int devcgroup_update_access(struct dev_cgroup *devcgroup,
+ int filetype, const char *buffer)
{
- struct cgroup *cur_cgroup;
- struct dev_cgroup *devcgroup, *cur_devcgroup;
- int filetype = cft->private;
- char *buffer, *b;
+ struct dev_cgroup *cur_devcgroup;
+ const char *b;
+ char *endp;
int retval = 0, count;
struct dev_whitelist_item wh;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- devcgroup = cgroup_to_devcgroup(cgroup);
- cur_cgroup = task_cgroup(current, devices_subsys.subsys_id);
- cur_devcgroup = cgroup_to_devcgroup(cur_cgroup);
-
- buffer = kmalloc(nbytes+1, GFP_KERNEL);
- if (!buffer)
- return -ENOMEM;
-
- if (copy_from_user(buffer, userbuf, nbytes)) {
- retval = -EFAULT;
- goto out1;
- }
- buffer[nbytes] = 0; /* nul-terminate */
-
- cgroup_lock();
- if (cgroup_is_removed(cgroup)) {
- retval = -ENODEV;
- goto out2;
- }
+ cur_devcgroup = task_devcgroup(current);
memset(&wh, 0, sizeof(wh));
b = buffer;
@@ -392,32 +385,23 @@
wh.type = DEV_CHAR;
break;
default:
- retval = -EINVAL;
- goto out2;
+ return -EINVAL;
}
b++;
- if (!isspace(*b)) {
- retval = -EINVAL;
- goto out2;
- }
+ if (!isspace(*b))
+ return -EINVAL;
b++;
if (*b == '*') {
wh.major = ~0;
b++;
} else if (isdigit(*b)) {
- wh.major = 0;
- while (isdigit(*b)) {
- wh.major = wh.major*10+(*b-'0');
- b++;
- }
+ wh.major = simple_strtoul(b, &endp, 10);
+ b = endp;
} else {
- retval = -EINVAL;
- goto out2;
+ return -EINVAL;
}
- if (*b != ':') {
- retval = -EINVAL;
- goto out2;
- }
+ if (*b != ':')
+ return -EINVAL;
b++;
/* read minor */
@@ -425,19 +409,13 @@
wh.minor = ~0;
b++;
} else if (isdigit(*b)) {
- wh.minor = 0;
- while (isdigit(*b)) {
- wh.minor = wh.minor*10+(*b-'0');
- b++;
- }
+ wh.minor = simple_strtoul(b, &endp, 10);
+ b = endp;
} else {
- retval = -EINVAL;
- goto out2;
+ return -EINVAL;
}
- if (!isspace(*b)) {
- retval = -EINVAL;
- goto out2;
- }
+ if (!isspace(*b))
+ return -EINVAL;
for (b++, count = 0; count < 3; count++, b++) {
switch (*b) {
case 'r':
@@ -454,8 +432,7 @@
count = 3;
break;
default:
- retval = -EINVAL;
- goto out2;
+ return -EINVAL;
}
}
@@ -463,38 +440,39 @@
retval = 0;
switch (filetype) {
case DEVCG_ALLOW:
- if (!parent_has_perm(cgroup, &wh))
- retval = -EPERM;
- else
- retval = dev_whitelist_add(devcgroup, &wh);
- break;
+ if (!parent_has_perm(devcgroup, &wh))
+ return -EPERM;
+ return dev_whitelist_add(devcgroup, &wh);
case DEVCG_DENY:
dev_whitelist_rm(devcgroup, &wh);
break;
default:
- retval = -EINVAL;
- goto out2;
+ return -EINVAL;
}
+ return 0;
+}
- if (retval == 0)
- retval = nbytes;
-
-out2:
+static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft,
+ const char *buffer)
+{
+ int retval;
+ if (!cgroup_lock_live_group(cgrp))
+ return -ENODEV;
+ retval = devcgroup_update_access(cgroup_to_devcgroup(cgrp),
+ cft->private, buffer);
cgroup_unlock();
-out1:
- kfree(buffer);
return retval;
}
static struct cftype dev_cgroup_files[] = {
{
.name = "allow",
- .write = devcgroup_access_write,
+ .write_string = devcgroup_access_write,
.private = DEVCG_ALLOW,
},
{
.name = "deny",
- .write = devcgroup_access_write,
+ .write_string = devcgroup_access_write,
.private = DEVCG_DENY,
},
{
@@ -535,8 +513,8 @@
if (!dev_cgroup)
return 0;
- spin_lock(&dev_cgroup->lock);
- list_for_each_entry(wh, &dev_cgroup->whitelist, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(wh, &dev_cgroup->whitelist, list) {
if (wh->type & DEV_ALL)
goto acc_check;
if ((wh->type & DEV_BLOCK) && !S_ISBLK(inode->i_mode))
@@ -552,10 +530,10 @@
continue;
if ((mask & MAY_READ) && !(wh->access & ACC_READ))
continue;
- spin_unlock(&dev_cgroup->lock);
+ rcu_read_unlock();
return 0;
}
- spin_unlock(&dev_cgroup->lock);
+ rcu_read_unlock();
return -EPERM;
}
@@ -570,7 +548,7 @@
if (!dev_cgroup)
return 0;
- spin_lock(&dev_cgroup->lock);
+ rcu_read_lock();
list_for_each_entry(wh, &dev_cgroup->whitelist, list) {
if (wh->type & DEV_ALL)
goto acc_check;
@@ -585,9 +563,9 @@
acc_check:
if (!(wh->access & ACC_MKNOD))
continue;
- spin_unlock(&dev_cgroup->lock);
+ rcu_read_unlock();
return 0;
}
- spin_unlock(&dev_cgroup->lock);
+ rcu_read_unlock();
return -EPERM;
}