Package: dosfstools Version: 3.0.6-1 Severity: important Tags: patch If a filesystem has files with names containing non-ASCII characters, dosfsck prints these characters encoded, not human-readable. This is very inconvenient, especially when the program asks user to make decision about deleting or fixing files.
For example: Checking file /:0GV:0H0:0Gu:0Gy:0Gr:0H02.:0H2:0H5:0H2 (\217\220\210\214\205\2202.\222\225\222) /:0GV:0H0:0Gu:0Gy:0Gr:0H02.:0H2:0H5:0H2 Bad file name. 1) Drop file 2) Rename file 3) Auto-rename 4) Keep it I patched this program to convert printed names using iconv and wcstombs(). Now the output looks like: Checking file /Пример2.тхт (ПРИМЕР2.ТХТ) /Пример2.тхт Bad file name. 1) Drop file 2) Rename file 3) Auto-rename 4) Keep it Also, I added command line option -c N (use codepage N to decode short filenames, default = 437) The patch is rather big and ugly. With it dosfsck depends on libiconv with locale support, which may be unavailable in embedded systems. -- System Information: Debian Release: squeeze/sid APT prefers testing APT policy: (990, 'testing'), (500, 'stable') Architecture: i386 (i686) Kernel: Linux 2.6.33.2 (SMP w/2 CPU cores) Locale: LANG=ru_RU.UTF-8, LC_CTYPE=ru_RU.UTF-8 (charmap=UTF-8) Shell: /bin/sh linked to /bin/bash Versions of packages dosfstools depends on: ii libc6 2.10.1-5 GNU C Library: Shared libraries dosfstools recommends no packages. dosfstools suggests no packages. -- debconf-show failed
commit 3aa58b29b80ed05d1d9bf3d62c50f4e29e75953c Author: Alexander Korolkov <al-...@inbox.ru> Date: Fri Sep 10 16:41:03 2010 +0400 iconv diff --git a/src/check.c b/src/check.c index 0378da5..0df753d 100644 --- a/src/check.c +++ b/src/check.c @@ -188,7 +188,42 @@ loff_t alloc_rootdir_entry(DOS_FS *fs, DIR_ENT *de, const char *pattern) return offset; } +static unsigned uunesc(unsigned char x) +{ + if (x >= '0' && x <= '9') + return x - '0'; + else if (x >= 'A' && x <= 'Z') + return x - 'A' + 10; + else if (x >= 'a' && x <= 'z') + return x - 'a' + 36; + else if (x == '+') + return 62; + else if (x == '-') + return 63; + printf("Internal error at %s:%d\n", __FILE__, __LINE__); + exit(0); +} +static void uni2local(char *dest, const unsigned char *src) +{ + while (*src) { + if (*src != ':') { + *dest = *src; ++dest; ++src; + } + else { + wchar_t uni[2]; char *t = dest; size_t result; + uni[0] = (uunesc(src[1]) << 12) + (uunesc(src[2]) << 6) + uunesc(src[3]); + uni[1] = 0; + result = wcstombs(dest, uni, 32); + if (result != (size_t)(-1)) { + dest += result; + src += 4; + } + else { *dest = *src; ++dest; ++src; } /* fallback: just show encoded name */ + } + } + *dest = 0; +} /** * Construct a full path (starting with '/') for the specified dentry, * relative to the partition. All components are "long" names where possible. @@ -210,7 +245,10 @@ static char *path_name(DOS_FILE *file) /* Append the long name to the path, * or the short name if there isn't a long one */ - strcpy(strrchr(path,0),file->lfn?file->lfn:file_name(file->dir_ent.name)); + if (file->lfn) + uni2local(strrchr(path,0),file->lfn); + else + strcpy(strrchr(path,0),file_name(file->dir_ent.name)); } return path; } diff --git a/src/dosfsck.c b/src/dosfsck.c index 7657018..a5b9ec0 100644 --- a/src/dosfsck.c +++ b/src/dosfsck.c @@ -32,6 +32,7 @@ #include <stdlib.h> #include <unistd.h> #include <getopt.h> +#include <locale.h> #include "common.h" #include "dosfsck.h" @@ -47,13 +48,13 @@ int atari_format = 0; unsigned n_files = 0; void *mem_queue = NULL; - static void usage(char *name) { fprintf(stderr,"usage: %s [-aAflrtvVwy] [-d path -d ...] " "[-u path -u ...]\n%15sdevice\n",name,""); fprintf(stderr," -a automatically repair the file system\n"); fprintf(stderr," -A toggle Atari file system format\n"); + fprintf(stderr," -c N use DOS codepage N to decode short file names (default: %d)\n", DEFAULT_DOS_CODEPAGE); fprintf(stderr," -d path drop that file\n"); fprintf(stderr," -f salvage unused chains to files\n"); fprintf(stderr," -l list path names\n"); @@ -106,12 +107,13 @@ int main(int argc,char **argv) unsigned n_files_check=0, n_files_verify=0; unsigned long free_clusters; + setlocale(LC_ALL, ""); /* initialize locale */ memset(&fs, 0, sizeof(fs)); rw = salvage_files = verify = 0; interactive = 1; check_atari(); - while ((c = getopt(argc,argv,"Aad:flnprtu:vVwy")) != EOF) + while ((c = getopt(argc,argv,"Aac:d:flnprtu:vVwy")) != EOF) switch (c) { case 'A': /* toggle Atari format */ atari_format = !atari_format; @@ -123,6 +125,9 @@ int main(int argc,char **argv) interactive = 0; salvage_files = 1; break; + case 'c': + dos_codepage = atoi(optarg); + break; case 'd': file_add(optarg,fdt_drop); break; diff --git a/src/file.c b/src/file.c index cb8a94e..4db66b4 100644 --- a/src/file.c +++ b/src/file.c @@ -41,14 +41,61 @@ #include "common.h" #include "file.h" +#include <iconv.h> +#include <langinfo.h> +#include <locale.h> FDSC *fp_root = NULL; +int dos_codepage = DEFAULT_DOS_CODEPAGE; + +static iconv_t iconv_init_codepage(int codepage) +{ + iconv_t result; + char codepage_name[16]; + snprintf(codepage_name, sizeof(codepage_name), "CP%d", codepage); + result = iconv_open(nl_langinfo(CODESET), codepage_name); + if (result == (iconv_t)-1) + perror(codepage_name); + return result; +} static void put_char(char **p,unsigned char c) { - if ((c >= ' ' && c < 0x7f) || c >= 0xa0) *(*p)++ = c; - else { + int success = 0; + if (c >= 0x20) { + static iconv_t to_local; + static int initialized = 0; + if (!initialized) { + initialized = 1; + setlocale(LC_ALL, ""); /* initialize locale */ + to_local = iconv_init_codepage(dos_codepage); + if (to_local == (iconv_t)-1 && dos_codepage != DEFAULT_DOS_CODEPAGE) { + printf("Trying to set fallback DOS codepage %d\n", DEFAULT_DOS_CODEPAGE); + to_local = iconv_init_codepage(DEFAULT_DOS_CODEPAGE); + if (to_local == (iconv_t)-1) + initialized = 2; /* no conversion available */ + } + } + + if (initialized == 1) { + if (c >= 0x20) { + char in[1] = { c }; + char *pin = in, *p_orig = *p; + size_t bytes_in = 1; + size_t bytes_out = 4; + success = (iconv(to_local, &pin, &bytes_in, p, &bytes_out) != -1); + } + } + else if (initialized == 2) { + if (c >= 0x20 && c < 0x7f) { + **p = c; + ++p; + success = 1; + } + } + } + if (!success) { /* fallback: print octal */ *(*p)++ = '\\'; *(*p)++ = '0'+(c >> 6); *(*p)++ = '0'+((c >> 3) & 7); diff --git a/src/file.h b/src/file.h index b38523b..f7548a3 100644 --- a/src/file.h +++ b/src/file.h @@ -35,6 +35,9 @@ typedef struct _fptr { extern FDSC *fp_root; +#define DEFAULT_DOS_CODEPAGE 437 +extern int dos_codepage; + char *file_name(unsigned char *fixed); diff --git a/src/lfn.c b/src/lfn.c index 97e91dd..b7a4af7 100644 --- a/src/lfn.c +++ b/src/lfn.c @@ -71,19 +71,60 @@ static unsigned char fat_uni2esc[64] = { /* for maxlen param */ #define UNTIL_0 INT_MAX -/* Convert name part in 'lfn' from unicode to ASCII */ -#define CNV_THIS_PART(lfn) \ +static void copy_lfn_part_w( wchar_t *dst, LFN_ENT *lfn ) +{ + int i; + for (i = 0; i < 5; ++i) + dst[i] = lfn->name0_4[i * 2] + (lfn->name0_4[i * 2 + 1] << 8); + for (i = 0; i < 6; ++i) + dst[i + 5] = lfn->name5_10[i * 2] + (lfn->name5_10[i * 2 + 1] << 8); + for (i = 0; i < 2; ++i) + dst[i + 11] = lfn->name11_12[i * 2] + (lfn->name11_12[i * 2 + 1] << 8); +} + +static char *_wcstombs_alloc(wchar_t *s, int len) +{ + char *r = alloc(len + 1); + wcstombs(r, s, len + 1); + puts(r); + return r; +} + +#define CNV_THIS_PART_FALLBACK(lfn) \ ({ \ char __part_uni[CHARS_PER_LFN*2]; \ copy_lfn_part( __part_uni, lfn ); \ cnv_unicode( __part_uni, CHARS_PER_LFN, 0 ); \ }) +/* Convert name part in 'lfn' from unicode to ASCII */ +/* Used only for output, so we better convert chars to something readable */ +#define CNV_THIS_PART(lfn) \ + ({ \ + int __part_len; \ + wchar_t __part_uni[CHARS_PER_LFN + 1]; \ + copy_lfn_part_w( __part_uni, lfn ); \ + __part_uni[CHARS_PER_LFN] = 0; \ + __part_len = wcstombs(NULL, __part_uni, 0); \ + (__part_len != -1) ? _wcstombs_alloc(__part_uni, __part_len) : CNV_THIS_PART_FALLBACK(lfn); \ + }) + /* Convert name parts collected so far (from previous slots) from unicode to * ASCII */ +/* Used only for output, so we better convert chars to something readable */ #define CNV_PARTS_SO_FAR() \ + ({ \ + wchar_t __part_uni[lfn_parts * CHARS_PER_LFN + 1]; \ + int _i; \ + char *_p = lfn_unicode+(lfn_slot*CHARS_PER_LFN*2); \ + for (_i = 0; _i < lfn_parts * CHARS_PER_LFN; ++_i) \ + __part_uni[_i] = _p[_i * 2] + (_p[_i * 2 + 1] << 8); \ + __part_uni[lfn_parts * CHARS_PER_LFN] = 0; \ + _i = wcstombs(NULL, __part_uni, 0); \ + (_i != -1) ? _wcstombs_alloc(__part_uni, _i) : \ (cnv_unicode( lfn_unicode+(lfn_slot*CHARS_PER_LFN*2), \ - lfn_parts*CHARS_PER_LFN, 0 )) + lfn_parts*CHARS_PER_LFN, 0 )); \ + }) /* This function converts an unicode string to a normal ASCII string, assuming * ISO-8859-1 charset. Characters not in 8859-1 are converted to the same