Package: dosfstools
Version: 3.0.6-1
Severity: important
Tags: patch

If a filesystem has files with names containing non-ASCII characters,
dosfsck prints these characters encoded, not human-readable. This is
very inconvenient, especially when the program asks user to make
decision about deleting or fixing files.

For example:

Checking file /:0GV:0H0:0Gu:0Gy:0Gr:0H02.:0H2:0H5:0H2 
(\217\220\210\214\205\2202.\222\225\222)
/:0GV:0H0:0Gu:0Gy:0Gr:0H02.:0H2:0H5:0H2
  Bad file name.
1) Drop file
2) Rename file
3) Auto-rename
4) Keep it

I patched this program to convert printed names using iconv and
wcstombs(). Now the output looks like:

Checking file /Пример2.тхт (ПРИМЕР2.ТХТ)
/Пример2.тхт
  Bad file name.
1) Drop file
2) Rename file
3) Auto-rename
4) Keep it

Also, I added command line option -c N (use codepage N to decode
short filenames, default = 437)

The patch is rather big and ugly. With it dosfsck depends on libiconv
with locale support, which may be unavailable in embedded systems.

-- System Information:
Debian Release: squeeze/sid
  APT prefers testing
  APT policy: (990, 'testing'), (500, 'stable')
Architecture: i386 (i686)

Kernel: Linux 2.6.33.2 (SMP w/2 CPU cores)
Locale: LANG=ru_RU.UTF-8, LC_CTYPE=ru_RU.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/bash

Versions of packages dosfstools depends on:
ii  libc6                         2.10.1-5   GNU C Library: Shared libraries

dosfstools recommends no packages.

dosfstools suggests no packages.

-- debconf-show failed
commit 3aa58b29b80ed05d1d9bf3d62c50f4e29e75953c
Author: Alexander Korolkov <al-...@inbox.ru>
Date:   Fri Sep 10 16:41:03 2010 +0400

    iconv

diff --git a/src/check.c b/src/check.c
index 0378da5..0df753d 100644
--- a/src/check.c
+++ b/src/check.c
@@ -188,7 +188,42 @@ loff_t alloc_rootdir_entry(DOS_FS *fs, DIR_ENT *de, const char *pattern)
     return offset;
 }
 
+static unsigned uunesc(unsigned char x)
+{
+    if (x >= '0' && x <= '9')
+	return x - '0';
+    else if (x >= 'A' && x <= 'Z')
+	return x - 'A' + 10;
+    else if (x >= 'a' && x <= 'z')
+	return x - 'a' + 36;
+    else if (x == '+')
+	return 62;
+    else if (x == '-')
+	return 63;
+    printf("Internal error at %s:%d\n", __FILE__, __LINE__);
+    exit(0);
+}
 
+static void uni2local(char *dest, const unsigned char *src)
+{
+    while (*src) {
+	if (*src != ':') {
+	    *dest = *src; ++dest; ++src;
+	}
+	else {
+	    wchar_t uni[2]; char *t = dest; size_t result;
+	    uni[0] = (uunesc(src[1]) << 12) + (uunesc(src[2]) << 6) + uunesc(src[3]);
+	    uni[1] = 0;
+	    result = wcstombs(dest, uni, 32);
+	    if (result != (size_t)(-1)) {
+		dest += result;
+		src += 4;
+	    }
+	    else { *dest = *src; ++dest; ++src; }	/* fallback: just show encoded name */
+	}
+    }
+    *dest = 0;
+}
 /**
  * Construct a full path (starting with '/') for the specified dentry,
  * relative to the partition. All components are "long" names where possible.
@@ -210,7 +245,10 @@ static char *path_name(DOS_FILE *file)
         /* Append the long name to the path,
          * or the short name if there isn't a long one
          */
-	strcpy(strrchr(path,0),file->lfn?file->lfn:file_name(file->dir_ent.name));
+	if (file->lfn)
+	    uni2local(strrchr(path,0),file->lfn);
+	else
+	    strcpy(strrchr(path,0),file_name(file->dir_ent.name));
     }
     return path;
 }
diff --git a/src/dosfsck.c b/src/dosfsck.c
index 7657018..a5b9ec0 100644
--- a/src/dosfsck.c
+++ b/src/dosfsck.c
@@ -32,6 +32,7 @@
 #include <stdlib.h>
 #include <unistd.h>
 #include <getopt.h>
+#include <locale.h>
 
 #include "common.h"
 #include "dosfsck.h"
@@ -47,13 +48,13 @@ int atari_format = 0;
 unsigned n_files = 0;
 void *mem_queue = NULL;
 
-
 static void usage(char *name)
 {
     fprintf(stderr,"usage: %s [-aAflrtvVwy] [-d path -d ...] "
       "[-u path -u ...]\n%15sdevice\n",name,"");
     fprintf(stderr,"  -a       automatically repair the file system\n");
     fprintf(stderr,"  -A       toggle Atari file system format\n");
+    fprintf(stderr,"  -c N     use DOS codepage N to decode short file names (default: %d)\n", DEFAULT_DOS_CODEPAGE);
     fprintf(stderr,"  -d path  drop that file\n");
     fprintf(stderr,"  -f       salvage unused chains to files\n");
     fprintf(stderr,"  -l       list path names\n");
@@ -106,12 +107,13 @@ int main(int argc,char **argv)
 	unsigned n_files_check=0, n_files_verify=0;
     unsigned long free_clusters;
 
+    setlocale(LC_ALL, "");	    /* initialize locale */
     memset(&fs, 0, sizeof(fs));
     rw = salvage_files = verify = 0;
     interactive = 1;
     check_atari();
 
-    while ((c = getopt(argc,argv,"Aad:flnprtu:vVwy")) != EOF)
+    while ((c = getopt(argc,argv,"Aac:d:flnprtu:vVwy")) != EOF)
 	switch (c) {
 	    case 'A': /* toggle Atari format */
 	  	atari_format = !atari_format;
@@ -123,6 +125,9 @@ int main(int argc,char **argv)
 		interactive = 0;
 		salvage_files = 1;
 		break;
+	    case 'c':
+		dos_codepage = atoi(optarg);
+		break;
 	    case 'd':
 		file_add(optarg,fdt_drop);
 		break;
diff --git a/src/file.c b/src/file.c
index cb8a94e..4db66b4 100644
--- a/src/file.c
+++ b/src/file.c
@@ -41,14 +41,61 @@
 #include "common.h"
 #include "file.h"
 
+#include <iconv.h>
+#include <langinfo.h>
+#include <locale.h>
 
 FDSC *fp_root = NULL;
 
+int dos_codepage = DEFAULT_DOS_CODEPAGE;
+
+static iconv_t iconv_init_codepage(int codepage)
+{
+    iconv_t result;
+    char codepage_name[16];
+    snprintf(codepage_name, sizeof(codepage_name), "CP%d", codepage);
+    result = iconv_open(nl_langinfo(CODESET), codepage_name);
+    if (result == (iconv_t)-1)
+	perror(codepage_name);
+    return result;
+}
 
 static void put_char(char **p,unsigned char c)
 {
-    if ((c >= ' ' && c < 0x7f) || c >= 0xa0) *(*p)++ = c;
-    else {
+    int success = 0;
+    if (c >= 0x20) {
+	static iconv_t to_local;
+	static int initialized = 0;
+	if (!initialized) {
+	    initialized = 1;
+	    setlocale(LC_ALL, "");	    /* initialize locale */
+	    to_local = iconv_init_codepage(dos_codepage);
+	    if (to_local == (iconv_t)-1 && dos_codepage != DEFAULT_DOS_CODEPAGE) {
+		printf("Trying to set fallback DOS codepage %d\n", DEFAULT_DOS_CODEPAGE);
+		to_local = iconv_init_codepage(DEFAULT_DOS_CODEPAGE);
+		if (to_local == (iconv_t)-1)
+		    initialized = 2;	    /* no conversion available */
+	    }
+	}
+
+	if (initialized == 1) {
+	    if (c >= 0x20) {
+		char in[1] = { c };
+		char *pin = in, *p_orig = *p;
+		size_t bytes_in = 1;
+		size_t bytes_out = 4;
+		success = (iconv(to_local, &pin, &bytes_in, p, &bytes_out) != -1);
+	    }
+	}
+	else if (initialized == 2) {
+	    if (c >= 0x20 && c < 0x7f) {
+		**p = c;
+		++p;
+		success = 1;
+	    }
+	}
+    }
+    if (!success) {	/* fallback: print octal */
 	*(*p)++ = '\\';
 	*(*p)++ = '0'+(c >> 6);
 	*(*p)++ = '0'+((c >> 3) & 7);
diff --git a/src/file.h b/src/file.h
index b38523b..f7548a3 100644
--- a/src/file.h
+++ b/src/file.h
@@ -35,6 +35,9 @@ typedef struct _fptr {
 
 extern FDSC *fp_root;
 
+#define DEFAULT_DOS_CODEPAGE 437
+extern int dos_codepage;
+
 
 char *file_name(unsigned char *fixed);
 
diff --git a/src/lfn.c b/src/lfn.c
index 97e91dd..b7a4af7 100644
--- a/src/lfn.c
+++ b/src/lfn.c
@@ -71,19 +71,60 @@ static unsigned char fat_uni2esc[64] = {
 /* for maxlen param */
 #define UNTIL_0		INT_MAX
 
-/* Convert name part in 'lfn' from unicode to ASCII */
-#define CNV_THIS_PART(lfn)				\
+static void copy_lfn_part_w( wchar_t *dst, LFN_ENT *lfn )
+{
+    int i;
+    for (i = 0; i < 5; ++i)
+        dst[i] = lfn->name0_4[i * 2] + (lfn->name0_4[i * 2 + 1] << 8);
+    for (i = 0; i < 6; ++i)
+        dst[i + 5] = lfn->name5_10[i * 2] + (lfn->name5_10[i * 2 + 1] << 8);
+    for (i = 0; i < 2; ++i)
+        dst[i + 11] = lfn->name11_12[i * 2] + (lfn->name11_12[i * 2 + 1] << 8);
+}
+
+static char *_wcstombs_alloc(wchar_t *s, int len)
+{
+    char *r = alloc(len + 1);
+    wcstombs(r, s, len + 1);
+    puts(r);
+    return r;
+}
+
+#define CNV_THIS_PART_FALLBACK(lfn)				\
     ({							\
 	char __part_uni[CHARS_PER_LFN*2];		\
 	copy_lfn_part( __part_uni, lfn );		\
 	cnv_unicode( __part_uni, CHARS_PER_LFN, 0 );	\
     })
 
+/* Convert name part in 'lfn' from unicode to ASCII */
+/* Used only for output, so we better convert chars to something readable */
+#define CNV_THIS_PART(lfn)				\
+    ({							\
+	int __part_len;					\
+	wchar_t __part_uni[CHARS_PER_LFN + 1];		\
+	copy_lfn_part_w( __part_uni, lfn );		\
+	__part_uni[CHARS_PER_LFN] = 0;			\
+	__part_len = wcstombs(NULL, __part_uni, 0);	\
+	(__part_len != -1) ? _wcstombs_alloc(__part_uni, __part_len) : CNV_THIS_PART_FALLBACK(lfn); \
+    })
+
 /* Convert name parts collected so far (from previous slots) from unicode to
  * ASCII */
+/* Used only for output, so we better convert chars to something readable */
 #define CNV_PARTS_SO_FAR()					\
+    ({								\
+	wchar_t __part_uni[lfn_parts * CHARS_PER_LFN + 1];	\
+	int _i;							\
+	char *_p = lfn_unicode+(lfn_slot*CHARS_PER_LFN*2);	\
+	for (_i = 0; _i < lfn_parts * CHARS_PER_LFN; ++_i)	\
+		__part_uni[_i] = _p[_i * 2] + (_p[_i * 2 + 1] << 8);		\
+	__part_uni[lfn_parts * CHARS_PER_LFN] = 0;		\
+	_i = wcstombs(NULL, __part_uni, 0);			\
+	(_i != -1) ? _wcstombs_alloc(__part_uni, _i) : 		\
 	(cnv_unicode( lfn_unicode+(lfn_slot*CHARS_PER_LFN*2),	\
-		      lfn_parts*CHARS_PER_LFN, 0 ))
+		      lfn_parts*CHARS_PER_LFN, 0 ));		\
+    })
 
 /* This function converts an unicode string to a normal ASCII string, assuming
  * ISO-8859-1 charset. Characters not in 8859-1 are converted to the same

Reply via email to