> On 19 Feb 2023, at 21:18, Maxim Dounin <mdou...@mdounin.ru> wrote: > > Hello! > > On Fri, Feb 17, 2023 at 06:53:11PM +0400, Sergey Kandaurov wrote: > >>> On 13 Jan 2023, at 01:35, Maxim Dounin <mdou...@mdounin.ru> wrote: >>> >>> # HG changeset patch >>> # User Maxim Dounin <mdou...@mdounin.ru> >>> # Date 1673548899 -10800 >>> # Thu Jan 12 21:41:39 2023 +0300 >>> # Node ID d05c0adf5890aecc68ce8906ef19ca07502ed071 >>> # Parent 60d845f9505fe1b97c1e04b680523b790e29fdb1 >>> Win32: non-ASCII names support in "include" with wildcards. >>> >>> Notably, ngx_open_glob() now supports opening directories with non-ASCII >>> characters, and pathnames returned by ngx_read_glob() are converted to >>> UTF-8. >>> >>> diff -r 60d845f9505f -r d05c0adf5890 src/os/win32/ngx_files.c >>> --- a/src/os/win32/ngx_files.c Thu Jan 12 21:41:30 2023 +0300 >>> +++ b/src/os/win32/ngx_files.c Thu Jan 12 21:41:39 2023 +0300 >>> @@ -546,14 +546,27 @@ ngx_open_glob(ngx_glob_t *gl) >>> { >>> u_char *p; >>> size_t len; >>> + u_short *u; >>> ngx_err_t err; >>> + u_short utf16[NGX_UTF16_BUFLEN]; >>> >>> - gl->dir = FindFirstFile((const char *) gl->pattern, &gl->finddata); >>> + len = NGX_UTF16_BUFLEN; >>> + u = ngx_utf8_to_utf16(utf16, gl->pattern, &len, 0); >>> + >>> + if (u == NULL) { >>> + return NGX_ERROR; >>> + } >>> + >>> + gl->dir = FindFirstFileW(u, &gl->finddata); >>> >>> if (gl->dir == INVALID_HANDLE_VALUE) { >>> >>> err = ngx_errno; >>> >>> + if (u != utf16) { >>> + ngx_free(u); >>> + } >>> + >>> if ((err == ERROR_FILE_NOT_FOUND || err == ERROR_PATH_NOT_FOUND) >>> && gl->test) >>> { >>> @@ -561,6 +574,8 @@ ngx_open_glob(ngx_glob_t *gl) >>> return NGX_OK; >>> } >>> >>> + ngx_set_errno(err); >>> + >>> return NGX_ERROR; >>> } >>> >>> @@ -570,18 +585,10 @@ ngx_open_glob(ngx_glob_t *gl) >>> } >>> } >>> >>> - len = ngx_strlen(gl->finddata.cFileName); >>> - gl->name.len = gl->last + len; >>> - >>> - gl->name.data = ngx_alloc(gl->name.len + 1, gl->log); >>> - if (gl->name.data == NULL) { >>> - return NGX_ERROR; >>> + if (u != utf16) { >>> + ngx_free(u); >>> } >>> >>> - ngx_memcpy(gl->name.data, gl->pattern, gl->last); >>> - ngx_cpystrn(gl->name.data + gl->last, (u_char *) >>> gl->finddata.cFileName, >>> - len + 1); >>> - >>> gl->ready = 1; >>> >>> return NGX_OK; >>> @@ -591,40 +598,25 @@ ngx_open_glob(ngx_glob_t *gl) >>> ngx_int_t >>> ngx_read_glob(ngx_glob_t *gl, ngx_str_t *name) >>> { >>> - size_t len; >>> - ngx_err_t err; >>> + u_char *p; >>> + size_t len; >>> + ngx_err_t err; >>> + u_char utf8[NGX_UTF16_BUFLEN]; >> >> Mixing UTF16-specific macro with u_char utf8[] looks suspicious. > > That's just some preallocated buffer size. If it feels better, a > separate macro can be used instead. This will also make it > possible to fine-tune the buffer: >
I don't have a strong position here, just caught my eye. Feel free to commit the version you prefer. > diff -r aca8812f2482 src/os/win32/ngx_files.c > --- a/src/os/win32/ngx_files.c Sat Feb 18 12:58:54 2023 +0300 > +++ b/src/os/win32/ngx_files.c Sat Feb 18 13:05:34 2023 +0300 > @@ -10,6 +10,7 @@ > > > #define NGX_UTF16_BUFLEN 256 > +#define NGX_UTF8_BUFLEN 512 > > static ngx_int_t ngx_win32_check_filename(u_char *name, u_short *u, > size_t len); > @@ -601,7 +602,7 @@ ngx_read_glob(ngx_glob_t *gl, ngx_str_t > u_char *p; > size_t len; > ngx_err_t err; > - u_char utf8[NGX_UTF16_BUFLEN]; > + u_char utf8[NGX_UTF8_BUFLEN]; > > if (gl->no_match) { > return NGX_DONE; > @@ -632,7 +633,7 @@ ngx_read_glob(ngx_glob_t *gl, ngx_str_t > > convert: > > - len = NGX_UTF16_BUFLEN; > + len = NGX_UTF8_BUFLEN; > p = ngx_utf16_to_utf8(utf8, gl->finddata.cFileName, &len, NULL); > > if (p == NULL) { > > >> It may have sense to replace this with dynamic allocation >> inside ngx_utf16_to_utf8(), similar to ngx_read_dir() 1st call, >> or use NGX_MAX_PATH. > > Much like in other functions in this file, the idea is to avoid > dynamic allocations in most cases by providing an easy to > implement stack-based buffer. The approach used in ngx_read_dir() > is more complex since the result of the conversion needs to be > maintained between calls (to be available via the ngx_de_name() > macro). For ngx_read_glob(), a better approach might be to > combine conversion buffer and the final glob() result buffer > (which also includes directory name, and currently allocated on > each ngx_read_glob() call). Though I don't think it worth the > effort, especially given that ngx_read_glob() is only used during > configuration parsing. > > As for NGX_MAX_PATH, it does not look like a good replacement for > a number of reasons. In particular, a) it might be incorrectly > interpreted as a place when only paths up to MAX_PATH are > supported, b) MAX_PATH on win32 is in characters, while the buffer > in question is in bytes (and to be used for UTF-8), so it is no > better than NGX_UTF16_BUFLEN. > Agree. >>> >>> if (gl->no_match) { >>> return NGX_DONE; >>> } >>> >>> if (gl->ready) { >>> - *name = gl->name; >>> - >>> gl->ready = 0; >>> - return NGX_OK; >>> + goto convert; >>> } >>> >>> ngx_free(gl->name.data); >>> gl->name.data = NULL; >>> >>> - if (FindNextFile(gl->dir, &gl->finddata) != 0) { >>> - >>> - len = ngx_strlen(gl->finddata.cFileName); >>> - gl->name.len = gl->last + len; >>> - >>> - gl->name.data = ngx_alloc(gl->name.len + 1, gl->log); >>> - if (gl->name.data == NULL) { >>> - return NGX_ERROR; >>> - } >>> - >>> - ngx_memcpy(gl->name.data, gl->pattern, gl->last); >>> - ngx_cpystrn(gl->name.data + gl->last, (u_char *) >>> gl->finddata.cFileName, >>> - len + 1); >>> - >>> - *name = gl->name; >>> - >>> - return NGX_OK; >>> + if (FindNextFileW(gl->dir, &gl->finddata) != 0) { >>> + goto convert; >>> } >>> >>> err = ngx_errno; >>> @@ -637,6 +629,43 @@ ngx_read_glob(ngx_glob_t *gl, ngx_str_t >>> "FindNextFile(%s) failed", gl->pattern); >>> >>> return NGX_ERROR; >>> + >>> +convert: >>> + >>> + len = NGX_UTF16_BUFLEN; >>> + p = ngx_utf16_to_utf8(utf8, gl->finddata.cFileName, &len, NULL); >> >> Note that passing utf8 off the stack can result in an attempt to free >> the stack pointer inside ngx_utf16_to_utf8() on NGX_EILSEQ. > > Yes, thanks, this shouldn't be a problem with the > ngx_utf16_to_utf8() already fixed based on the 1st patch comments. > >>> + >> >> Nitpicking: there (and in subsequent patches) you're starting to add >> an empty line after ngx_utf16_to_utf8(); it's not in the 1st path. > > In the first patch there is an empty line before the > ngx_utf16_to_utf8() call, which somewhat balance things. Either > way, added an empty line there as well. > > [...] > -- Sergey Kandaurov _______________________________________________ nginx-devel mailing list nginx-devel@nginx.org https://mailman.nginx.org/mailman/listinfo/nginx-devel