NLS: update handling of Unicode
This patch (as1239) updates the kernel's treatment of Unicode. The
character-set conversion routines are well behind the current state of
the Unicode specification: They don't recognize the existence of code
points beyond plane 0 or of surrogate pairs in the UTF-16 encoding.
The old wchar_t 16-bit type is retained because it's still used in
lots of places. This shouldn't cause any new problems; if a
conversion now results in an invalid 16-bit code then before it must
have yielded an undefined code.
Difficult-to-read names like "utf_mbstowcs" are replaced with more
transparent names like "utf8s_to_utf16s" and the ordering of the
parameters is rationalized (buffer lengths come immediate after the
pointers they refer to, and the inputs precede the outputs).
Fortunately the low-level conversion routines are used in only a few
places; the interfaces to the higher-level uni2char and char2uni
methods have been left unchanged.
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
diff --git a/fs/isofs/joliet.c b/fs/isofs/joliet.c
index 92c14b8..a048de8 100644
--- a/fs/isofs/joliet.c
+++ b/fs/isofs/joliet.c
@@ -37,37 +37,6 @@
return (op - ascii);
}
-/* Convert big endian wide character string to utf8 */
-static int
-wcsntombs_be(__u8 *s, const __u8 *pwcs, int inlen, int maxlen)
-{
- const __u8 *ip;
- __u8 *op;
- int size;
- __u16 c;
-
- op = s;
- ip = pwcs;
- while ((*ip || ip[1]) && (maxlen > 0) && (inlen > 0)) {
- c = (*ip << 8) | ip[1];
- if (c > 0x7f) {
- size = utf8_wctomb(op, c, maxlen);
- if (size == -1) {
- /* Ignore character and move on */
- maxlen--;
- } else {
- op += size;
- maxlen -= size;
- }
- } else {
- *op++ = (__u8) c;
- }
- ip += 2;
- inlen--;
- }
- return (op - s);
-}
-
int
get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, struct inode * inode)
{
@@ -79,8 +48,9 @@
nls = ISOFS_SB(inode->i_sb)->s_nls_iocharset;
if (utf8) {
- len = wcsntombs_be(outname, de->name,
- de->name_len[0] >> 1, PAGE_SIZE);
+ len = utf16s_to_utf8s((const wchar_t *) de->name,
+ de->name_len[0] >> 1, UTF16_BIG_ENDIAN,
+ outname, PAGE_SIZE);
} else {
len = uni16_to_x8(outname, (__be16 *) de->name,
de->name_len[0] >> 1, nls);