NLS: update handling of Unicode This patch (as1239) updates the kernel's treatment of Unicode. The character-set conversion routines are well behind the current state of the Unicode specification: They don't recognize the existence of code points beyond plane 0 or of surrogate pairs in the UTF-16 encoding. The old wchar_t 16-bit type is retained because it's still used in lots of places. This shouldn't cause any new problems; if a conversion now results in an invalid 16-bit code then before it must have yielded an undefined code. Difficult-to-read names like "utf_mbstowcs" are replaced with more transparent names like "utf8s_to_utf16s" and the ordering of the parameters is rationalized (buffer lengths come immediate after the pointers they refer to, and the inputs precede the outputs). Fortunately the low-level conversion routines are used in only a few places; the interfaces to the higher-level uni2char and char2uni methods have been left unchanged. Signed-off-by: Alan Stern <stern@rowland.harvard.edu> Acked-by: Clemens Ladisch <clemens@ladisch.de> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

commit: 74675a58507e769beee7d949dbed788af3c4139d [log] [tgz]
author: Alan Stern <stern@rowland.harvard.edu> Thu Apr 30 10:08:18 2009 -0400
committer: Greg Kroah-Hartman <gregkh@suse.de> Mon Jun 15 21:44:43 2009 -0700
tree: d4ae3cc06dbfadecf1eaf6ed0aef249fc87b07e6
parent: a853a3d4eb2edb066248a39f0634f6f5858816a0 [diff] [blame]
diff --git a/fs/isofs/joliet.c b/fs/isofs/joliet.c
index 92c14b8..a048de8 100644
--- a/fs/isofs/joliet.c
+++ b/fs/isofs/joliet.c

@@ -37,37 +37,6 @@
 	return (op - ascii);
 }
 
-/* Convert big endian wide character string to utf8 */
-static int
-wcsntombs_be(__u8 *s, const __u8 *pwcs, int inlen, int maxlen)
-{
-	const __u8 *ip;
-	__u8 *op;
-	int size;
-	__u16 c;
-
-	op = s;
-	ip = pwcs;
-	while ((*ip || ip[1]) && (maxlen > 0) && (inlen > 0)) {
-		c = (*ip << 8) | ip[1];
-		if (c > 0x7f) {
-			size = utf8_wctomb(op, c, maxlen);
-			if (size == -1) {
-				/* Ignore character and move on */
-				maxlen--;
-			} else {
-				op += size;
-				maxlen -= size;
-			}
-		} else {
-			*op++ = (__u8) c;
-		}
-		ip += 2;
-		inlen--;
-	}
-	return (op - s);
-}
-
 int
 get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, struct inode * inode)
 {
@@ -79,8 +48,9 @@
 	nls = ISOFS_SB(inode->i_sb)->s_nls_iocharset;
 
 	if (utf8) {
-		len = wcsntombs_be(outname, de->name,
-				de->name_len[0] >> 1, PAGE_SIZE);
+		len = utf16s_to_utf8s((const wchar_t *) de->name,
+				de->name_len[0] >> 1, UTF16_BIG_ENDIAN,
+				outname, PAGE_SIZE);
 	} else {
 		len = uni16_to_x8(outname, (__be16 *) de->name,
 				de->name_len[0] >> 1, nls);
commit	74675a58507e769beee7d949dbed788af3c4139d	[log] [tgz]
author	Alan Stern <stern@rowland.harvard.edu>	Thu Apr 30 10:08:18 2009 -0400
committer	Greg Kroah-Hartman <gregkh@suse.de>	Mon Jun 15 21:44:43 2009 -0700
tree	d4ae3cc06dbfadecf1eaf6ed0aef249fc87b07e6
parent	a853a3d4eb2edb066248a39f0634f6f5858816a0 [diff] [blame]