Hello! On Fri, Feb 17, 2023 at 06:53:11PM +0400, Sergey Kandaurov wrote:
> > On 13 Jan 2023, at 01:35, Maxim Dounin <mdou...@mdounin.ru> wrote: > > > > # HG changeset patch > > # User Maxim Dounin <mdou...@mdounin.ru> > > # Date 1673548899 -10800 > > # Thu Jan 12 21:41:39 2023 +0300 > > # Node ID d05c0adf5890aecc68ce8906ef19ca07502ed071 > > # Parent 60d845f9505fe1b97c1e04b680523b790e29fdb1 > > Win32: non-ASCII names support in "include" with wildcards. > > > > Notably, ngx_open_glob() now supports opening directories with non-ASCII > > characters, and pathnames returned by ngx_read_glob() are converted to > > UTF-8. > > > > diff -r 60d845f9505f -r d05c0adf5890 src/os/win32/ngx_files.c > > --- a/src/os/win32/ngx_files.c Thu Jan 12 21:41:30 2023 +0300 > > +++ b/src/os/win32/ngx_files.c Thu Jan 12 21:41:39 2023 +0300 > > @@ -546,14 +546,27 @@ ngx_open_glob(ngx_glob_t *gl) > > { > > u_char *p; > > size_t len; > > + u_short *u; > > ngx_err_t err; > > + u_short utf16[NGX_UTF16_BUFLEN]; > > > > - gl->dir = FindFirstFile((const char *) gl->pattern, &gl->finddata); > > + len = NGX_UTF16_BUFLEN; > > + u = ngx_utf8_to_utf16(utf16, gl->pattern, &len, 0); > > + > > + if (u == NULL) { > > + return NGX_ERROR; > > + } > > + > > + gl->dir = FindFirstFileW(u, &gl->finddata); > > > > if (gl->dir == INVALID_HANDLE_VALUE) { > > > > err = ngx_errno; > > > > + if (u != utf16) { > > + ngx_free(u); > > + } > > + > > if ((err == ERROR_FILE_NOT_FOUND || err == ERROR_PATH_NOT_FOUND) > > && gl->test) > > { > > @@ -561,6 +574,8 @@ ngx_open_glob(ngx_glob_t *gl) > > return NGX_OK; > > } > > > > + ngx_set_errno(err); > > + > > return NGX_ERROR; > > } > > > > @@ -570,18 +585,10 @@ ngx_open_glob(ngx_glob_t *gl) > > } > > } > > > > - len = ngx_strlen(gl->finddata.cFileName); > > - gl->name.len = gl->last + len; > > - > > - gl->name.data = ngx_alloc(gl->name.len + 1, gl->log); > > - if (gl->name.data == NULL) { > > - return NGX_ERROR; > > + if (u != utf16) { > > + ngx_free(u); > > } > > > > - ngx_memcpy(gl->name.data, gl->pattern, gl->last); > > - ngx_cpystrn(gl->name.data + gl->last, (u_char *) > > gl->finddata.cFileName, > > - len + 1); > > - > > gl->ready = 1; > > > > return NGX_OK; > > @@ -591,40 +598,25 @@ ngx_open_glob(ngx_glob_t *gl) > > ngx_int_t > > ngx_read_glob(ngx_glob_t *gl, ngx_str_t *name) > > { > > - size_t len; > > - ngx_err_t err; > > + u_char *p; > > + size_t len; > > + ngx_err_t err; > > + u_char utf8[NGX_UTF16_BUFLEN]; > > Mixing UTF16-specific macro with u_char utf8[] looks suspicious. That's just some preallocated buffer size. If it feels better, a separate macro can be used instead. This will also make it possible to fine-tune the buffer: diff -r aca8812f2482 src/os/win32/ngx_files.c --- a/src/os/win32/ngx_files.c Sat Feb 18 12:58:54 2023 +0300 +++ b/src/os/win32/ngx_files.c Sat Feb 18 13:05:34 2023 +0300 @@ -10,6 +10,7 @@ #define NGX_UTF16_BUFLEN 256 +#define NGX_UTF8_BUFLEN 512 static ngx_int_t ngx_win32_check_filename(u_char *name, u_short *u, size_t len); @@ -601,7 +602,7 @@ ngx_read_glob(ngx_glob_t *gl, ngx_str_t u_char *p; size_t len; ngx_err_t err; - u_char utf8[NGX_UTF16_BUFLEN]; + u_char utf8[NGX_UTF8_BUFLEN]; if (gl->no_match) { return NGX_DONE; @@ -632,7 +633,7 @@ ngx_read_glob(ngx_glob_t *gl, ngx_str_t convert: - len = NGX_UTF16_BUFLEN; + len = NGX_UTF8_BUFLEN; p = ngx_utf16_to_utf8(utf8, gl->finddata.cFileName, &len, NULL); if (p == NULL) { > It may have sense to replace this with dynamic allocation > inside ngx_utf16_to_utf8(), similar to ngx_read_dir() 1st call, > or use NGX_MAX_PATH. Much like in other functions in this file, the idea is to avoid dynamic allocations in most cases by providing an easy to implement stack-based buffer. The approach used in ngx_read_dir() is more complex since the result of the conversion needs to be maintained between calls (to be available via the ngx_de_name() macro). For ngx_read_glob(), a better approach might be to combine conversion buffer and the final glob() result buffer (which also includes directory name, and currently allocated on each ngx_read_glob() call). Though I don't think it worth the effort, especially given that ngx_read_glob() is only used during configuration parsing. As for NGX_MAX_PATH, it does not look like a good replacement for a number of reasons. In particular, a) it might be incorrectly interpreted as a place when only paths up to MAX_PATH are supported, b) MAX_PATH on win32 is in characters, while the buffer in question is in bytes (and to be used for UTF-8), so it is no better than NGX_UTF16_BUFLEN. > > > > if (gl->no_match) { > > return NGX_DONE; > > } > > > > if (gl->ready) { > > - *name = gl->name; > > - > > gl->ready = 0; > > - return NGX_OK; > > + goto convert; > > } > > > > ngx_free(gl->name.data); > > gl->name.data = NULL; > > > > - if (FindNextFile(gl->dir, &gl->finddata) != 0) { > > - > > - len = ngx_strlen(gl->finddata.cFileName); > > - gl->name.len = gl->last + len; > > - > > - gl->name.data = ngx_alloc(gl->name.len + 1, gl->log); > > - if (gl->name.data == NULL) { > > - return NGX_ERROR; > > - } > > - > > - ngx_memcpy(gl->name.data, gl->pattern, gl->last); > > - ngx_cpystrn(gl->name.data + gl->last, (u_char *) > > gl->finddata.cFileName, > > - len + 1); > > - > > - *name = gl->name; > > - > > - return NGX_OK; > > + if (FindNextFileW(gl->dir, &gl->finddata) != 0) { > > + goto convert; > > } > > > > err = ngx_errno; > > @@ -637,6 +629,43 @@ ngx_read_glob(ngx_glob_t *gl, ngx_str_t > > "FindNextFile(%s) failed", gl->pattern); > > > > return NGX_ERROR; > > + > > +convert: > > + > > + len = NGX_UTF16_BUFLEN; > > + p = ngx_utf16_to_utf8(utf8, gl->finddata.cFileName, &len, NULL); > > Note that passing utf8 off the stack can result in an attempt to free > the stack pointer inside ngx_utf16_to_utf8() on NGX_EILSEQ. Yes, thanks, this shouldn't be a problem with the ngx_utf16_to_utf8() already fixed based on the 1st patch comments. > > + > > Nitpicking: there (and in subsequent patches) you're starting to add > an empty line after ngx_utf16_to_utf8(); it's not in the 1st path. In the first patch there is an empty line before the ngx_utf16_to_utf8() call, which somewhat balance things. Either way, added an empty line there as well. [...] -- Maxim Dounin http://mdounin.ru/ _______________________________________________ nginx-devel mailing list nginx-devel@nginx.org https://mailman.nginx.org/mailman/listinfo/nginx-devel