Corinna Vinschen via Cygwin wrote:
On Jun 27 15:32, Christian Franke via Cygwin wrote:
$ touch $'t-\xef\x80\x80'
The name mapping is:
"t-\xEF\x80\x80" -(open, ...)-> L"t-\xDB59" -(readdir)-> "t-"
Did you copy/paste this from the old mail, by any chance?

Sorry, I accidentally mixed two cases with same readdir() result:

"t-\xEF\x80\x80" -(open, ...)-> L"t-\xF000" -(readdir)-> "t-"
"t-\xED\xAD\x99' -(open, ...)-> L"t-\xDB59" -(readdir)-> "t-"

$ touch $'t-\xed\xad\x99'
$ touch $'t-\xef\x80\x80'
$ ls | uniq -c
      2 t-

Does no longer occur in 3.7.0-0.165.g1b60f4861b70 but see below.


Using the latest test DLL the mapping is

   "t-\xEF\x80\x80" -(open, ...)-> L"t-\xF000"

And that's basically correct, albeit it leads to problems.

You know that we defined the area from 0xf000 to 0xf0ff as our private
use area to create filenames with characters invalid in DOS filenames
by transposing these chars into the private use area.  When converting
the filenames back, the 0xf0XX chars are transposed back to 0xXX.

Yes.


But yeah, I found the bug here.  The problem is that the transpose table
incorrectly contains NUL as transposable character.  So if you create
L"t-\xF000", that's fine.  However, when converting this name back to
UTF-8, the filename becomes L"t-\0".  Oops.

I dropped the ASCII NUL from the list of transposable characters and
now what you get is this:

   $ touch $'t-\xef\x80\x80'
   $ touch $'t-\xef\x80\x81'
   $ ls -l
   total 0
   -rw-r--r-- 1 corinna vinschen 0 Jun 27 16:49 't-'$'\001'
   -rw-r--r-- 1 corinna vinschen 0 Jun 27 16:49 't-'$'\357\200\200'

Apart from the incorrect transposition of ASCII NUL, the transposition
works transparently:

   $ echo foo > $'t-\xef\x80\x81'
   $ cat $'t-\xef\x80\x81'
   foo
   $ cat $'t-\x01'
   foo

I'll apply the patch shortly.

$ touch $'t-\xed\xad\x90'
$ touch $'t-\xed\xad\x91'
$ touch $'t-\xed\xad\x92'
$ touch $'t-\xed\xad\x93'
$ touch $'t-\xed\xad\x94'
$ ls | uniq -c
      5 t-

$ ls -s
ls: cannot access 't-': No such file or directory
ls: cannot access 't-': No such file or directory
ls: cannot access 't-': No such file or directory
ls: cannot access 't-': No such file or directory
ls: cannot access 't-': No such file or directory
total 0
? t-  ? t-  ? t-  ? t-  ? t-

All results found by several runs with different seeds of the attached test program have in common that the Windows path name contains an invalid word in UTF-16 High Surrogate range:

$ ./randnames 42
$'t-\xEC\x9E\xB3\xEF\x82\x80\xEF\x83\xA0': access() failed, errno=2:
$'t-\xED\xA4\xA8\x80\xE0': original path
L"t-\xD928\xF080\xF0E0": Windows path

$'t-\xEE\x9E\xB3\xEF\x83\xA1': access() failed, errno=2:
$'t-\xED\xA6\xB0\xE1': original path
L"t-\xD9B0\xF0E1": Windows path
...
$'t-\xE7\xBE\xB3\xEF\x82\xB3': access() failed, errno=2:
$'t-\xED\xA2\x96\xB3': original path
L"t-\xD896\xF0B3": Windows path


--
Thanks,
Christian


#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <wchar.h>
#include <windows.h>

static void print_c(FILE * f, const char * s)
{
  fputs("$'", f);
  char c;
  for (int i = 0; (c = s[i]); i++) {
    if (c == '\'')
      fputs("'\\'$'", f);
    else if (' ' <= c && c <= '~')
      fputc(c, f);
    else
      fprintf(f, "\\x%02X", c & 0xff);
  }
  fputc('\'', f);
}

static void print_w(FILE * f, const wchar_t * s)
{
  fputs("L\"", f);
  wchar_t c;
  for (int i = 0; (c = s[i]); i++) {
    if (c == L'"' || c == L'\\')
      fprintf(f, "\\%c", c);
    else if (L' ' <= c && c <= L'~')
      fputc(c, f);
    else
      fprintf(f, "\\x%04X", c & 0xffff);
  }
  fputc('"', f);
}

static void get_winname(wchar_t * name)
{
  WIN32_FIND_DATAW e;
  HANDLE h = FindFirstFileW(L"*", &e);
  if (h == INVALID_HANDLE_VALUE) {
    fprintf(stderr, "FindFirstFileW(): Error=%u\n", GetLastError());
    exit(1);
  }
  int i = 0;
  do {
    if (!wcscmp(e.cFileName, L".") || !wcscmp(e.cFileName, L".."))
      continue;
    wcscpy(name, e.cFileName);
    i++;
  } while (FindNextFileW(h, &e));
  FindClose(h);
  if (i != 1) {
    fprintf(stderr, "Error: %d Win32 files found\n", i);
    exit(1);
  }
}

static void get_cygname(char * name)
{
  DIR * d = opendir("."); 
  if (!d) {
    perror("opendir");
    exit(1);
  }
  int i = 0;
  const struct dirent * e;
  while ((e = readdir(d))) {
    if (!strcmp(e->d_name, ".") || !strcmp(e->d_name, ".."))
      continue;
    strcpy(name, e->d_name);
    i++;
  }
  closedir(d);
  if (i != 1) {
    fprintf(stderr, "Error: %d Cygwin files found\n", i);
    exit(1);
  }
}

static void randname(char * name, int maxlen)
{
  int len = 1 + rand() % (maxlen + 1 - 1);
  for (int i = 0; i < len; i++) {
    char c = 1 + rand() % (256 - 2 - 1);
    if (c >= '/')
      c++;
    if (c >= '\\')
      c++;
    name[i] = c;
  }
  name[len] = 0;
}

static int testname(const char * name)
{
  int fd = open(name, O_WRONLY|O_CREAT, 0644);
  if (fd < 0) {
    print_c(stdout, name); printf(": open() failed, errno=%d\n", errno);
    exit(1);
  }
  close(fd);

  char cygname[MAX_PATH];
  get_cygname(cygname);
  wchar_t winname[MAX_PATH];
  get_winname(winname);

  int rc = 1;
  if (access(cygname, 0)) {
    print_c(stdout, cygname); printf(": access() failed, errno=%d:\n", errno);
    print_c(stdout, name); printf(": original path\n"); 
    print_w(stdout, winname); printf(": Windows path\n\n");
    rc = 0;
  }

  if (unlink(name)) {
    print_c(stdout, name); printf(": unlink() failed, errno=%d\n", errno);
    print_w(stdout, winname); printf(": Windows path\n");
    exit(1);
  }
  return rc;
}

int main(int argc, char **argv)
{
  if (argc > 1)
    srand(atoi(argv[1]));

  const char * dir = "test.tmp";
  rmdir(dir);
  if (mkdir(dir, 0755)) {
    perror(dir); return 1;
  }
  if (chdir(dir)) {
    perror(dir); return 1;
  }

  int errs = 0;
  for (int i = 0; i < 100000; i++) {
    char name[8] = "t-";
    randname(name + 2, sizeof(name) - 1 - 2);
    if (!testname(name) && ++errs >= 10)
      break;
  }
  return 0;
}
-- 
Problem reports:      https://cygwin.com/problems.html
FAQ:                  https://cygwin.com/faq/
Documentation:        https://cygwin.com/docs.html
Unsubscribe info:     https://cygwin.com/ml/#unsubscribe-simple

Reply via email to