> On 13 Jan 2023, at 01:35, Maxim Dounin <mdou...@mdounin.ru> wrote: > > # HG changeset patch > # User Maxim Dounin <mdou...@mdounin.ru> > # Date 1673548890 -10800 > # Thu Jan 12 21:41:30 2023 +0300 > # Node ID 60d845f9505fe1b97c1e04b680523b790e29fdb1 > # Parent 07b0bee87f32be91a33210bc06973e07c4c1dac9 > Win32: non-ASCII names support in autoindex (ticket #458). > > Notably, ngx_open_dir() now supports opening directories with non-ASCII > characters, and directory entries returned by ngx_read_dir() are properly > converted to UTF-8. > > diff -r 07b0bee87f32 -r 60d845f9505f src/os/win32/ngx_files.c > --- a/src/os/win32/ngx_files.c Wed Dec 21 14:53:27 2022 +0300 > +++ b/src/os/win32/ngx_files.c Thu Jan 12 21:41:30 2023 +0300 > @@ -13,7 +13,11 @@ > > static ngx_int_t ngx_win32_check_filename(u_char *name, u_short *u, > size_t len); > -static u_short *ngx_utf8_to_utf16(u_short *utf16, u_char *utf8, size_t *len); > +static u_short *ngx_utf8_to_utf16(u_short *utf16, u_char *utf8, size_t *len, > + size_t reserved); > +static u_char *ngx_utf16_to_utf8(u_char *utf8, u_short *utf16, size_t *len, > + size_t *allocated); > +uint32_t ngx_utf16_decode(u_short **u, size_t n); > > > /* FILE_FLAG_BACKUP_SEMANTICS allows to obtain a handle to a directory */ > @@ -28,7 +32,7 @@ ngx_open_file(u_char *name, u_long mode, > u_short utf16[NGX_UTF16_BUFLEN]; > > len = NGX_UTF16_BUFLEN; > - u = ngx_utf8_to_utf16(utf16, name, &len); > + u = ngx_utf8_to_utf16(utf16, name, &len, 0); > > if (u == NULL) { > return INVALID_HANDLE_VALUE; > @@ -269,7 +273,7 @@ ngx_file_info(u_char *file, ngx_file_inf > > len = NGX_UTF16_BUFLEN; > > - u = ngx_utf8_to_utf16(utf16, file, &len); > + u = ngx_utf8_to_utf16(utf16, file, &len, 0); > > if (u == NULL) { > return NGX_FILE_ERROR; > @@ -427,49 +431,51 @@ ngx_realpath(u_char *path, u_char *resol > ngx_int_t > ngx_open_dir(ngx_str_t *name, ngx_dir_t *dir) > { > - u_char *pattern, *p; > + size_t len; > + u_short *u, *p; > ngx_err_t err; > + u_short utf16[NGX_UTF16_BUFLEN]; > > - pattern = malloc(name->len + 3); > - if (pattern == NULL) { > + len = NGX_UTF16_BUFLEN - 2; > + u = ngx_utf8_to_utf16(utf16, name->data, &len, 2); > + > + if (u == NULL) { > return NGX_ERROR; > } > > - p = ngx_cpymem(pattern, name->data, name->len); > + if (ngx_win32_check_filename(name->data, u, len) != NGX_OK) { > + goto failed; > + } > + > + p = &u[len - 1]; > > *p++ = '/'; > *p++ = '*'; > *p = '\0'; > > - dir->dir = FindFirstFile((const char *) pattern, &dir->finddata); > + dir->dir = FindFirstFileW(u, &dir->finddata); > > if (dir->dir == INVALID_HANDLE_VALUE) { > - err = ngx_errno; > - ngx_free(pattern); > - ngx_set_errno(err); > - return NGX_ERROR; > + goto failed; > } > > - ngx_free(pattern); > + if (u != utf16) { > + ngx_free(u); > + } > > dir->valid_info = 1; > dir->ready = 1; > + dir->name = NULL; > + dir->allocated = 0; > > return NGX_OK; > -} > > +failed: > > -ngx_int_t > -ngx_read_dir(ngx_dir_t *dir) > -{ > - if (dir->ready) { > - dir->ready = 0; > - return NGX_OK; > - } > - > - if (FindNextFile(dir->dir, &dir->finddata) != 0) { > - dir->type = 1; > - return NGX_OK; > + if (u != utf16) { > + err = ngx_errno; > + ngx_free(u); > + ngx_set_errno(err); > } > > return NGX_ERROR; > @@ -477,8 +483,56 @@ ngx_read_dir(ngx_dir_t *dir) > > > ngx_int_t > +ngx_read_dir(ngx_dir_t *dir) > +{ > + u_char *name; > + size_t len, allocated; > + > + if (dir->ready) { > + dir->ready = 0; > + goto convert; > + } > + > + if (FindNextFileW(dir->dir, &dir->finddata) != 0) { > + dir->type = 1; > + goto convert; > + } > + > + return NGX_ERROR; > + > +convert: > + > + name = dir->name; > + len = dir->allocated; > + > + name = ngx_utf16_to_utf8(name, dir->finddata.cFileName, &len, > &allocated); > + if (name == NULL) {
Note that you just return on NULL, but see below. > + return NGX_ERROR; > + } > + > + if (name != dir->name) { > + > + if (dir->name) { > + ngx_free(dir->name); > + } > + > + dir->name = name; > + dir->allocated = allocated; > + } > + > + dir->namelen = len - 1; > + > + return NGX_OK; > +} > + > + > +ngx_int_t > ngx_close_dir(ngx_dir_t *dir) > { > + if (dir->name) { > + ngx_free(dir->name); > + } > + > if (FindClose(dir->dir) == 0) { > return NGX_ERROR; > } > @@ -816,7 +870,7 @@ failed: > > > static u_short * > -ngx_utf8_to_utf16(u_short *utf16, u_char *utf8, size_t *len) > +ngx_utf8_to_utf16(u_short *utf16, u_char *utf8, size_t *len, size_t reserved) > { > u_char *p; > u_short *u, *last; > @@ -865,7 +919,7 @@ ngx_utf8_to_utf16(u_short *utf16, u_char > > /* the given buffer is not enough, allocate a new one */ > > - u = malloc(((p - utf8) + ngx_strlen(p) + 1) * sizeof(u_short)); > + u = malloc(((p - utf8) + ngx_strlen(p) + 1 + reserved) * > sizeof(u_short)); > if (u == NULL) { > return NULL; > } > @@ -910,3 +964,170 @@ ngx_utf8_to_utf16(u_short *utf16, u_char > > /* unreachable */ > } > + > + > +static u_char * > +ngx_utf16_to_utf8(u_char *utf8, u_short *utf16, size_t *len, size_t > *allocated) > +{ > + u_char *p, *last; > + u_short *u, *j; > + uint32_t n; > + > + u = utf16; > + p = utf8; > + last = utf8 + *len; > + > + while (p < last) { > + > + if (*u < 0x80) { > + *p++ = (u_char) *u; > + > + if (*u == 0) { > + *len = p - utf8; > + return utf8; > + } > + > + u++; > + > + continue; > + } > + > + if (p >= last - 4) { > + *len = p - utf8; > + break; > + } > + > + n = ngx_utf16_decode(&u, 2); > + > + if (n > 0x10ffff) { > + ngx_free(utf8); > + ngx_set_errno(NGX_EILSEQ); This can result in double-free on a subsequent ngx_close_dir() call. > + return NULL; > + } > + > + if (n >= 0x10000) { > + *p++ = (u_char) (0xf0 + (n >> 18)); > + *p++ = (u_char) (0x80 + ((n >> 12) & 0x3f)); > + *p++ = (u_char) (0x80 + ((n >> 6) & 0x3f)); > + *p++ = (u_char) (0x80 + (n & 0x3f)); > + continue; > + stray ws-only line (one more below) > + } > + > + if (n >= 0x0800) { > + *p++ = (u_char) (0xe0 + (n >> 12)); > + *p++ = (u_char) (0x80 + ((n >> 6) & 0x3f)); > + *p++ = (u_char) (0x80 + (n & 0x3f)); > + continue; > + } > + > + *p++ = (u_char) (0xc0 + (n >> 6)); > + *p++ = (u_char) (0x80 + (n & 0x3f)); > + } > + > + /* the given buffer is not enough, allocate a new one */ > + > + for (j = u; *j; j++) { /* void */ } > + > + p = malloc((j - utf16) * 4 + 1); [Note that pointer arithmetics is in u_short (2 bytes) units.] It seems you are allocating memory assuming that utf16 -> utf8 conversion can result in memory expansion up to 4 bytes per unit. At first glance that's true, because UTF-8 symbol may take 4 bytes. IIUC, 4-byte UTF-8 can result only from UTF-16 surrogate pairs, this gives 2x growth (2-paired UTF-16 to 4-bytes UTF-8). Otherwise, 1/2/3-byte encoded UTF-8 can result from 2-byte UTF-16, which gives from 1x to 3x growth per unit. So, "malloc((j - utf16) * 3 + 1)" should be enough there. > + if (p == NULL) { > + return NULL; > + } Old memory is still there, allocated and referenced by dir->name, see above. It seems ok, it should be freed by ngx_close_dir() call. If we happen to change callers to not call ngx_close_dir() on error, this could turn into memory leaks, though. > + > + if (allocated) { > + *allocated = (j - utf16) * 4 + 1; > + } > + > + ngx_memcpy(p, utf8, *len); > + > + utf8 = p; > + p += *len; > + > + for ( ;; ) { > + > + if (*u < 0x80) { > + *p++ = (u_char) *u; > + > + if (*u == 0) { > + *len = p - utf8; > + return utf8; > + } > + > + u++; > + > + continue; > + } > + > + n = ngx_utf16_decode(&u, 2); > + > + if (n > 0x10ffff) { > + ngx_free(utf8); > + ngx_set_errno(NGX_EILSEQ); > + return NULL; > + } > + > + if (n >= 0x10000) { > + *p++ = (u_char) (0xf0 + (n >> 18)); > + *p++ = (u_char) (0x80 + ((n >> 12) & 0x3f)); > + *p++ = (u_char) (0x80 + ((n >> 6) & 0x3f)); > + *p++ = (u_char) (0x80 + (n & 0x3f)); > + continue; > + > + } > + > + if (n >= 0x0800) { > + *p++ = (u_char) (0xe0 + (n >> 12)); > + *p++ = (u_char) (0x80 + ((n >> 6) & 0x3f)); > + *p++ = (u_char) (0x80 + (n & 0x3f)); > + continue; > + } > + > + *p++ = (u_char) (0xc0 + (n >> 6)); > + *p++ = (u_char) (0x80 + (n & 0x3f)); > + } > + > + /* unreachable */ > +} > + > + > +/* > + * ngx_utf16_decode() decodes one or two UTF-16 code units > + * the return values: > + * 0x80 - 0x10ffff valid character > + * 0x110000 - 0xfffffffd invalid sequence > + * 0xfffffffe incomplete sequence > + * 0xffffffff error > + */ > + > +uint32_t > +ngx_utf16_decode(u_short **u, size_t n) > +{ > + uint32_t k, m; > + > + k = **u; > + > + if (k < 0xd800 || k > 0xdfff) { > + (*u)++; > + return k; > + } > + > + if (k > 0xdbff) { > + (*u)++; > + return 0xffffffff; > + } > + > + if (n < 2) { > + return 0xfffffffe; > + } > + > + (*u)++; > + > + m = *(*u)++; > + > + if (m < 0xdc00 || m > 0xdfff) { > + return 0xffffffff; > + > + } > + > + return 0x10000 + ((k - 0xd800) << 10) + (m - 0xdc00); > +} > diff -r 07b0bee87f32 -r 60d845f9505f src/os/win32/ngx_files.h > --- a/src/os/win32/ngx_files.h Wed Dec 21 14:53:27 2022 +0300 > +++ b/src/os/win32/ngx_files.h Thu Jan 12 21:41:30 2023 +0300 > @@ -30,7 +30,11 @@ typedef struct { > > typedef struct { > HANDLE dir; > - WIN32_FIND_DATA finddata; > + WIN32_FIND_DATAW finddata; > + > + u_char *name; > + size_t namelen; > + size_t allocated; > > unsigned valid_info:1; > unsigned type:1; > @@ -205,8 +209,8 @@ ngx_int_t ngx_close_dir(ngx_dir_t *dir); > #define ngx_dir_access(a) (a) > > > -#define ngx_de_name(dir) ((u_char *) (dir)->finddata.cFileName) > -#define ngx_de_namelen(dir) ngx_strlen((dir)->finddata.cFileName) > +#define ngx_de_name(dir) (dir)->name > +#define ngx_de_namelen(dir) (dir)->namelen > > ngx_int_t ngx_de_info(u_char *name, ngx_dir_t *dir); > #define ngx_de_info_n "dummy()" Otherwise, looks good. -- Sergey Kandaurov _______________________________________________ nginx-devel mailing list nginx-devel@nginx.org https://mailman.nginx.org/mailman/listinfo/nginx-devel