Sorry, attachement was removed from previous message. If inline patch
also will be damaged (probably by auto line brakes), contact me and I
send you a copy personally.

--- /usr/src/sys/msdosfs/direntry.h     Thu Mar 14 09:27:09 2002
+++ direntry.h  Mon Feb  2 17:29:04 2009
@@ -126,9 +126,12 @@
 void dos2unixtime(u_int dd, u_int dt, u_int dh, struct timespec *tsp);
 int dos2unixfn(u_char dn[11], u_char *un, int lower);
 int unix2dosfn(u_char *un, u_char dn[12], int unlen, u_int gen);
-int unix2winfn(u_char *un, int unlen, struct winentry *wep, int cnt,
int chksum);
-int winChkName(u_char *un, int unlen, struct winentry *wep, int chksum);
-int win2unixfn(struct winentry *wep, struct dirent *dp, int chksum);
+int unix2winfn(u_int16_t *un, int unlen, struct winentry *wep, int
cnt, int chksum);
+int winChkName(u_int16_t *un, int unlen, struct winentry *wep, int chksum);
+int win2unixfn(struct winentry *wep, u_int16_t *utf16, int *utf16len,
int chksum);
 u_int8_t winChksum(u_int8_t *name);
 int winSlotCnt(u_char *un, int unlen);
+int utf16len(u_char *, int);
+int utf8to16(u_char *, int, u_int16_t *);
+int utf16to8(u_int16_t *, int, u_char *, int);
 #endif /* _KERNEL */
--- /usr/src/sys/msdosfs/msdosfs_conv.c Fri May 14 13:05:05 2004
+++ msdosfs_conv.c      Tue Feb  3 12:58:29 2009
@@ -564,19 +564,21 @@
  */
 int
 unix2winfn(un, unlen, wep, cnt, chksum)
-       u_char *un;
+       u_int16_t *un;
        int unlen;
        struct winentry *wep;
        int cnt;
        int chksum;
 {
        u_int8_t *cp;
+       u_int16_t *up;
+       u_int16_t ch;
        int i;

        /*
         * Drop trailing blanks and dots
         */
-       for (cp = un + unlen; *--cp == ' ' || *cp == '.'; unlen--);
+       for (up = un + unlen; *--up == ' ' || *up == '.'; unlen--);

        un += (cnt - 1) * WIN_CHARS;
        unlen -= (cnt - 1) * WIN_CHARS;
@@ -597,20 +599,23 @@
        for (cp = wep->wePart1, i = sizeof(wep->wePart1)/2; --i >= 0;) {
                if (--unlen < 0)
                        goto done;
-               *cp++ = *un++;
-               *cp++ = 0;
+               ch = *un++;
+               *cp++ = ch & 0xff;
+               *cp++ = ch >> 8;
        }
        for (cp = wep->wePart2, i = sizeof(wep->wePart2)/2; --i >= 0;) {
                if (--unlen < 0)
                        goto done;
-               *cp++ = *un++;
-               *cp++ = 0;
+               ch = *un++;
+               *cp++ = ch & 0xff;
+               *cp++ = ch >> 8;
        }
        for (cp = wep->wePart3, i = sizeof(wep->wePart3)/2; --i >= 0;) {
                if (--unlen < 0)
                        goto done;
-               *cp++ = *un++;
-               *cp++ = 0;
+               ch = *un++;
+               *cp++ = ch & 0xff;
+               *cp++ = ch >> 8;
        }
        if (!unlen)
                wep->weCnt |= WIN_LAST;
@@ -629,12 +634,13 @@
  */
 int
 winChkName(un, unlen, wep, chksum)
-       u_char *un;
+       u_int16_t *un;
        int unlen;
        struct winentry *wep;
        int chksum;
 {
        u_int8_t *cp;
+       u_int16_t ch;
        int i;

        /*
@@ -662,30 +668,39 @@
         * Compare the name parts
         */
        for (cp = wep->wePart1, i = sizeof(wep->wePart1)/2; --i >= 0;) {
+               ch = *cp++;
+               ch |= *cp++ << 8;
                if (--unlen < 0) {
-                       if (!*cp++ && !*cp)
+                       if (!ch)
                                return chksum;
                        return -1;
                }
-               if (u2l[*cp++] != u2l[*un++] || *cp++)
+               /*
+                * TODO ignore case on comparasion
+                */
+               if (ch != *un++)
                        return -1;
        }
        for (cp = wep->wePart2, i = sizeof(wep->wePart2)/2; --i >= 0;) {
+               ch = *cp++;
+               ch |= *cp++ << 8;
                if (--unlen < 0) {
-                       if (!*cp++ && !*cp)
+                       if (!ch)
                                return chksum;
                        return -1;
                }
-               if (u2l[*cp++] != u2l[*un++] || *cp++)
+               if (ch != *un++)
                        return -1;
        }
        for (cp = wep->wePart3, i = sizeof(wep->wePart3)/2; --i >= 0;) {
+               ch = *cp++;
+               ch |= *cp++ << 8;
                if (--unlen < 0) {
-                       if (!*cp++ && !*cp)
+                       if (!ch)
                                return chksum;
                        return -1;
                }
-               if (u2l[*cp++] != u2l[*un++] || *cp++)
+               if (ch != *un++)
                        return -1;
        }
        return chksum;
@@ -696,13 +711,15 @@
  * Returns the checksum or -1 if impossible
  */
 int
-win2unixfn(wep, dp, chksum)
+win2unixfn(wep, utf16, utf16len, chksum)
        struct winentry *wep;
-       struct dirent *dp;
+       u_int16_t *utf16;
+       int *utf16len;
        int chksum;
 {
        u_int8_t *cp;
-       u_int8_t *np, *ep = dp->d_name + WIN_MAXLEN;
+       u_int16_t *np, *ep = utf16 + WIN_MAXLEN;
+       u_int16_t ch;
        int i;

        if ((wep->weCnt&WIN_CNT) > howmany(WIN_MAXLEN, WIN_CHARS)
@@ -717,7 +734,7 @@
                /*
                 * This works even though d_namlen is one byte!
                 */
-               dp->d_namlen = (wep->weCnt&WIN_CNT) * WIN_CHARS;
+               *utf16len = (wep->weCnt&WIN_CNT) * WIN_CHARS;
        } else if (chksum != wep->weChksum)
                chksum = -1;
        if (chksum == -1)
@@ -727,15 +744,17 @@
         * Offset of this entry
         */
        i = ((wep->weCnt&WIN_CNT) - 1) * WIN_CHARS;
-       np = (u_int8_t *)dp->d_name + i;
+       np = utf16 + i;
        
        /*
         * Convert the name parts
         */
        for (cp = wep->wePart1, i = sizeof(wep->wePart1)/2; --i >= 0;) {
-               switch (*np++ = *cp++) {
+               ch = *cp++;
+               ch |= *cp++ << 8;
+               switch (*np++ = ch) {
                case 0:
-                       dp->d_namlen -= sizeof(wep->wePart2)/2
+                       *utf16len -= sizeof(wep->wePart2)/2
                            + sizeof(wep->wePart3)/2 + i + 1;
                        return chksum;
                case '/':
@@ -751,13 +770,13 @@
                        np[-1] = 0;
                        return -1;
                }
-               if (*cp++)
-                       return -1;
        }
        for (cp = wep->wePart2, i = sizeof(wep->wePart2)/2; --i >= 0;) {
-               switch (*np++ = *cp++) {
+               ch = *cp++;
+               ch |= *cp++ << 8;
+               switch (*np++ = ch) {
                case 0:
-                       dp->d_namlen -= sizeof(wep->wePart3)/2 + i + 1;
+                       *utf16len -= sizeof(wep->wePart3)/2 + i + 1;
                        return chksum;
                case '/':
                        np[-1] = 0;
@@ -772,13 +791,13 @@
                        np[-1] = 0;
                        return -1;
                }
-               if (*cp++)
-                       return -1;
        }
        for (cp = wep->wePart3, i = sizeof(wep->wePart3)/2; --i >= 0;) {
-               switch (*np++ = *cp++) {
+               ch = *cp++;
+               ch |= *cp++ << 8;
+               switch (*np++ = ch) {
                case 0:
-                       dp->d_namlen -= i + 1;
+                       *utf16len -= i + 1;
                        return chksum;
                case '/':
                        np[-1] = 0;
@@ -792,8 +811,6 @@
                        np[-1] = 0;
                        return -1;
                }
-               if (*cp++)
-                       return -1;
        }
        return chksum;
 }
@@ -821,10 +838,180 @@
        u_char *un;
        int unlen;
 {
-       for (un += unlen; unlen > 0; unlen--)
-               if (*--un != ' ' && *un != '.')
+       u_char *en;
+       for (en = un + unlen; unlen > 0; unlen--)
+               if (*--en != ' ' && *en != '.')
                        break;
+       unlen = utf16len(un, unlen);
        if (unlen > WIN_MAXLEN)
                return 0;
        return howmany(unlen, WIN_CHARS);
+}
+
+int utf16len(u_char *un, int unlen) {
+       u_char *cp;
+       u_char c, c2;
+       int count;
+       for (cp = un + unlen; unlen > 0; unlen--)
+               if (*--cp != ' ' && *cp != '.')
+                       break;
+       count = 0;
+       for (; unlen > 0; unlen--) {
+               c = *un++;
+               if (c < 0x80)
+                       count++;
+               /* deny overlong */
+               else if (c < 0xc2)
+                       return 0;
+               else if (c < 0xe0) {
+                       if (--unlen == 0)
+                               return 0;
+                       c2 = *un++;
+                       if (c2 < 0x80 || c2 > 0xbf)
+                               return 0;
+                       count++;
+               }
+               else if (c < 0xf0) {
+                       if ((unlen -= 2) == 0)
+                               return 0;
+                       c2 = *un++;
+                       /* deny overlong */
+                       if (c == 0xe0 && c2 < 0xa0)
+                               return 0;
+                       if (c2 < 0x80 || c2 > 0xbf)
+                               return 0;
+                       c2 = *un++;
+                       if (c2 < 0x80 || c2 > 0xbf)
+                               return 0;
+                       count++;
+               }
+               else if (c < 0xf5) {
+                       if ((unlen -= 3) == 0)
+                               return 0;
+                       c2 = *un++;
+                       /* deny overlong */
+                       if (c == 0xf0 && c2 < 0x90)
+                               return 0;
+                       if (c == 0xf4 && c2 > 0x8f)
+                               return 0;
+                       if (c2 < 0x80 || c2 > 0xbf)
+                               return 0;
+                       c2 = *un++;
+                       if (c2 < 0x80 || c2 > 0xbf)
+                               return 0;
+                       c2 = *un++;
+                       if (c2 < 0x80 || c2 > 0xbf)
+                               return 0;
+                       count += 2;
+               }
+               else
+                       return 0;
+       }
+       return count;
+}
+
+int utf8to16(u_char *un, int unlen, u_int16_t *utf16) {
+       u_char c, c2, c3, c4;
+       int count = 0;
+       u_int32_t ch;
+       for (; unlen > 0; unlen--) {
+               c = *un++;
+               if (c < 0x80)
+                       utf16[count++] = c;
+               /* deny overlong */
+               else if (c < 0xc2)
+                       return 0;
+               else if (c < 0xe0) {
+                       if (--unlen == 0)
+                               return 0;
+                       c2 = *un++;
+                       if (c2 < 0x80 || c2 > 0xbf)
+                               return 0;
+                       utf16[count++] = ((c & 0x1f) << 6) | (c2 & 0x3f);
+               }
+               else if (c < 0xf0) {
+                       if ((unlen -= 2) == 0)
+                               return 0;
+                       c2 = *un++;
+                       /* deny overlong */
+                       if (c == 0xe0 && c2 < 0xa0)
+                               return 0;
+                       if (c2 < 0x80 || c2 > 0xbf)
+                               return 0;
+                       c3 = *un++;
+                       if (c3 < 0x80 || c3 > 0xbf)
+                               return 0;
+                       utf16[count++] = ((c & 0x0f) << 12) | ((c2 & 0x3f) << 
6) |
+                                       (c3 & 0x3f);
+               }
+               else if (c < 0xf5) {
+                       if ((unlen -= 3) == 0)
+                               return 0;
+                       c2 = *un++;
+                       /* deny overlong */
+                       if (c == 0xf0 && c2 < 0x90)
+                               return 0;
+                       if (c == 0xf4 && c2 > 0x8f)
+                               return 0;
+                       if (c2 < 0x80 || c2 > 0xbf)
+                               return 0;
+                       c3 = *un++;
+                       if (c3 < 0x80 || c3 > 0xbf)
+                               return 0;
+                       c4 = *un++;
+                       if (c4 < 0x80 || c4 > 0xbf)
+                               return 0;
+                       ch = (((c & 0x07) << 18) | ((c2 & 0x3f) << 12) |
+                                       ((c3 & 0x3f) << 6) | (c4 & 0x3f)) - 
0x10000;
+                       utf16[count++] = (ch & 0x03ff) | 0xd800;
+                       utf16[count++] = (ch >> 10) | 0xdc00;
+               }
+               else
+                       return 0;
+       }
+       return count;
+}
+
+int utf16to8(u_int16_t *utf16, int utflen, u_char *un, int unlen) {
+       int i;
+       u_int16_t c, c2;
+       u_int32_t ch;
+       int count = 0;
+       for (i = 0; i < utflen && count < unlen; i++) {
+               c = utf16[i];
+               if (c < 0x80)
+                       un[count++] = c;
+               else if (c < 0x800) {
+                       if (count + 2 > unlen)
+                               return 0;
+                       un[count++] = 0xc0 | (c >> 6);
+                       un[count++] = 0x80 | (c & 0x3f);
+               }
+               else if (c >= 0xd800 && c < 0xdc00) {
+                       if (i++ >= utflen)
+                               return 0;
+                       c2 = utf16[i];
+                       if (c2 < 0xdc00 || c2 >= 0xe000)
+                               return 0;
+                       if (count + 4 > unlen)
+                               return 0;
+                       ch = ((c & 0x03ff) | ((c2 & 0x03ff) << 10)) + 0x10000;
+                       un[count++] = 0xf0 | (ch >> 18);
+                       un[count++] = 0x80 | ((ch >> 12) & 0x3f);
+                       un[count++] = 0x80 | ((ch >> 6) & 0x3f);
+                       un[count++] = 0x80 | (ch & 0x3f);
+               }
+               else if (c >= 0xdc00 && c < 0xe000)
+                       return 0;
+               else {
+                       if (count + 3 > unlen)
+                               return 0;
+                       un[count++] = 0xe0 | (c >> 12);
+                       un[count++] = 0x80 | ((c >> 6) & 0x3f);
+                       un[count++] = 0x80 | (c & 0x3f);
+               }
+       }
+       if (i < utflen)
+               return 0;
+       return count;
 }
--- /usr/src/sys/msdosfs/msdosfs_lookup.c       Tue Dec 18 11:00:46 2007
+++ msdosfs_lookup.c    Tue Feb  3 12:59:13 2009
@@ -218,6 +218,8 @@
                        break;

        tdp = NULL;
+       u_int16_t utf16[MAXNAMLEN];
+       int utf16len = utf8to16((u_char *)cnp->cn_nameptr, adjlen, utf16);
        /*
         * The outer loop ranges over the clusters that make up the
         * directory.  Note that the root directory is different from all
@@ -280,8 +282,8 @@
                                        if (pmp->pm_flags & 
MSDOSFSMNT_SHORTNAME)
                                                continue;

-                                       chksum = winChkName((u_char 
*)cnp->cn_nameptr,
-                                                           adjlen,
+                                       chksum = winChkName(utf16,
+                                                           utf16len,
                                                            (struct winentry 
*)dep,
                                                            chksum);
                                        continue;
@@ -663,6 +665,9 @@
                u_char *un = (u_char *)cnp->cn_nameptr;
                int unlen = cnp->cn_namelen;
                int cnt = 1;
+               u_int16_t utf16[MAXNAMLEN];
+               int utf16len = utf8to16(un, unlen, utf16);
+               /* TODO check utf16len and do something */

                while (--ddep->de_fndcnt >= 0) {
                        if (!(ddep->de_fndoffset & pmp->pm_crbomask)) {
@@ -688,7 +693,7 @@
                                ndep--;
                                ddep->de_fndoffset -= sizeof(struct direntry);
                        }
-                       if (!unix2winfn(un, unlen, (struct winentry *)ndep, 
cnt++, chksum))
+                       if (!unix2winfn(utf16, utf16len, (struct winentry 
*)ndep, cnt++, chksum))
                                break;
                }
        }
--- /usr/src/sys/msdosfs/msdosfs_vnops.c        Tue Jun 17 04:29:38 2008
+++ msdosfs_vnops.c     Mon Feb  2 17:51:06 2009
@@ -1395,6 +1395,8 @@
        int ncookies = 0;
        off_t offset, wlast = -1;
        int chksum = -1;
+       u_int16_t utf16[WIN_MAXLEN];
+       int utf16len = 0;

 #ifdef MSDOSFS_DEBUG
        printf("msdosfs_readdir(): vp %08x, uio %08x, cred %08x, eofflagp 
%08x\n",
@@ -1540,7 +1542,7 @@
                                if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME)
                                        continue;
                                wep = (struct winentry *)dentp;
-                               chksum = win2unixfn(wep, &dirbuf, chksum);
+                               chksum = win2unixfn(wep, utf16, &utf16len, 
chksum);
                                if (wep->weCnt & WIN_LAST)
                                        wlast = offset;
                                continue;
@@ -1595,8 +1597,10 @@
                                dirbuf.d_namlen = dos2unixfn(dentp->deName,
                                    (u_char *)dirbuf.d_name,
                                    pmp->pm_flags & MSDOSFSMNT_SHORTNAME);
-                       else
+                       else {
+                               dirbuf.d_namlen = utf16to8(utf16, utf16len, 
dirbuf.d_name, MAXNAMLEN);
                                dirbuf.d_name[dirbuf.d_namlen] = 0;
+                       }
                        chksum = -1;
                        dirbuf.d_reclen = DIRENT_SIZE(&dirbuf);
                        if (uio->uio_resid < dirbuf.d_reclen) {

Reply via email to