On Wed, Sep 11, 2002 at 07:54:19PM +1000, Jay Hap-hang Yu wrote: > > It'd be great if someone come up with some perl script to convert > filenames to UTF8. I guess that can be done with iconv + perl >
Spend the whole morning on this one. Actually, it is an excuse to get away from other things. :) Minimal test and don't recommand to anyone except for testing purpose. I don't think store filename in UTF-8 now is wise. Well... someone have to eat the crab first! Need libreadline-dev to compile. Read the source first. Patch, bug reports are welcomed -- hashao| 故贵以贱为本,高以下为基。是以侯王自称孤、寡、不谷。此非 hashao| 以贱为本邪?非乎?故致誉无誉。是故不欲□□如玉,珞珞如石。
/* Released under the Gnu Public License (GPL). * It might make your file system unusable. Use it on your own risk. * -- hashao * * Please visit: http://www.debian.org/intl/zh/ */ /* Convert the encoding of a directory name recursively. * + content of symbolic links are also converted. (good/bad?) */ /* Usage: dirconv -f encode -t encode [-w] filename * -w: Do not warn. */ /* To compile: * gcc -lreadline -o dirconv dirconv.c * You need libreadline-dev to compile it, of course. */ /* ChangeLog: * * 2002-09-15: * + first release. Only limited test. * + content of symbolic links are also converted. (good/bad?) * + Don't know what will happen on vfat systems. I will not * try it on my own vfat system! * + Use it on your own risk. (hashao) */ #define _GNU_SOURCE 1 #define SYMLEN 4096 /* Symbolic buffer length. */ #include <unistd.h> #include <errno.h> #include <limits.h> #include <ftw.h> #include <iconv.h> #include <stdio.h> #include <readline/readline.h> iconv_t icd = (iconv_t)(-1); /* global iconv handler. */ int maxlen = 256; /* max len of the constr. */ char *convstr = NULL; /* buff hold the converted file name. */ int do_iconv(const char *file) { char *inbuf, *outbuf; size_t inleft, outleft; size_t retval; memset(convstr, 0, maxlen); inbuf = (char*)file; outbuf = convstr; inleft = strlen(inbuf); outleft = maxlen; retval = iconv(icd, NULL, NULL, NULL, NULL); retval = iconv(icd, &inbuf, &inleft, &outbuf, &outleft); /* convert ok. */ if (retval != (size_t)(-1)) return retval; /* encoding error. */ if (errno != E2BIG) return retval; /* No enough output space. */ free(convstr); maxlen = maxlen + MB_LEN_MAX*inleft; convstr = (char*)malloc(maxlen); return do_iconv(file); } int do_rename(const char* file, struct FTW *s) { int retval; char *newfullpath; newfullpath = (char*)malloc(sizeof(convstr)+ s->base + 1); memset(newfullpath, 0, sizeof(convstr) + s->base + 1); strncat(newfullpath, file, s->base); strcat(newfullpath, convstr); retval = rename(file, newfullpath); if (!retval) printf("converted %s to %s\n", file, newfullpath); else printf("!!! failed to convert %s\n", file); free(newfullpath); } /* Convert a symbolic link and its content. */ int do_symlink(const char* file, struct FTW *s) { int retval; char *newfullpath; char symbuf[SYMLEN]; char *newlink; newfullpath = (char*)malloc(sizeof(convstr)+ s->base + 1); memset(newfullpath, 0, sizeof(convstr) + s->base + 1); strncat(newfullpath, file, s->base); strcat(newfullpath, convstr); memset(symbuf, 0, SYMLEN); /* Get the content of symbolic link and convert it too. */ retval = readlink(file, symbuf, SYMLEN); if (retval == -1){ printf("!!! failed to read symbolic link: %s\n", file); return -1; } { char *inbuf, *outbuf; size_t inleft, outleft; memset(convstr, 0, maxlen); inbuf = symbuf; outbuf = newlink = (char*)malloc(strlen(symbuf)*MB_LEN_MAX); inleft = strlen(symbuf); outleft = SYMLEN; retval = iconv(icd, NULL, NULL, NULL, NULL); retval = iconv(icd, &inbuf, &inleft, &outbuf, &outleft); if (retval == (size_t)(-1)){ printf("!!! Cannot iconv symlink content %s for %s\n", symbuf, file); return -1; } } /* Relink to the new symlink. */ retval = symlink(newlink, newfullpath); if (retval){ printf("!!! Cannot create symlink %s to %s.\n", newfullpath, newlink); return -1; }else { printf("converted symlink %s to %s points to \n", file, newfullpath, newlink); retval = unlink(file); if (retval){ printf("!!! Cannot unlink symlink %s\n", file); return -1; } } return 0; } int walk_func(const char* file, const struct stat *sb, int flag, struct FTW *s) { int retval; char *basefile; basefile = (char*)(file + s->base); retval = do_iconv(basefile); if (retval == (size_t)(-1)){ printf("!!! Cannot iconv %s, skip.\n", basefile); if (S_ISDIR(sb->st_mode)) printf("In dir %s\n", file); return 0; } if S_ISLNK(sb->st_mode) { do_symlink(file, s); }else { do_rename(file, s); } if (S_ISDIR(sb->st_mode)) printf("In dir %s\n", file); /* Return non-0 will stop ftw(). */ return 0; } int do_walk(char* root) { int retval; //printf("==== Test(1): FTW_PHYS (don't follow symbolic links) ====\n"); /* Cannot set FTW_CHDIR. Maybe a bug in libc. With it set, stop * recursive at the first level. */ /* Do depth first, do no follow symbolic directory. */ retval = nftw(root, walk_func, 100, FTW_DEPTH|FTW_PHYS); return retval; } /* Hold configuration for this program. */ struct conf { char* from; char* to; char* root; /* Root diretory to convert. */ int warn; /* warning flag. */ }; int do_init(struct conf *cf) { convstr = (char*)malloc(maxlen); icd = iconv_open(cf->to, cf->from); return -1; } /* Read command line options. */ int do_opt(int argc, char* argv[], struct conf *cf) { int retval; int c; cf->to = cf->from = cf->root = NULL; cf->warn = 1; while(1) { int curoptind = optind ? optind : 1; c = getopt(argc, argv, "-t:f:w"); if (c == -1) break; switch (c) { case 1: if (!cf->root) cf->root = optarg; break; case 't': cf->to = optarg; break; case 'f': cf->from = optarg; break; case 'w': cf->warn = 0; break; case '?': return -1; break; } } if(!(cf->to && cf->from && cf->root)) { printf("Usage: %s [-w] -f encode -t encode filename \n", argv[0]); return -1; } return 0; } int main(int argc, char* argv[]) { int retval; struct conf cf; struct stat *myst; retval = do_opt(argc, argv, &cf); if(retval != 0) return 1; do_init(&cf); if (icd == (iconv_t)(-1)) { printf("cannot do conversion between %s and %s\n", cf.to, cf.from); return 2; } myst = (struct stat*) malloc(sizeof(struct stat)); retval = stat(cf.root, myst); free(myst); if (retval == -1) { printf("%s cannot be converted.\n", cf.root); perror(cf.root); return 3; } if (cf.warn){ /* Warn the user. */ char* c; printf ("I am going to convert %s and its subdirectory (if any) \n" "from [%s] to [%s]. It is **dangerous**!!! It might mess \n" "up all your file system!\n" "=== %s: [%s] -> [%s] ===\n", cf.root, cf.from, cf.to, cf.root, cf.from, cf.to); c = readline("Are you sure?! [N/y]: "); if (strcasecmp(c, "y")) return 0; c = readline("really? [N/y]: "); if (strcasecmp(c, "y")) return 0; } do_walk(cf.root); }

