Torsten Bögershausen skrev 2012-09-01 08.11:> Allow path names to be encoded in
UTF-8 in the repository
> and checkout out as e.g. ISO-8859-1 in the working tree.
Ack for attempting this.
Did it myself if 2007, but times weren't ripe then, I guess.
> +i18n.pathEncoding::
> + This option is only used by some implementations of git.
> + When "git init" sets core.supportspathencoding to true,
> + i18n.pathEncoding can be set to re-encode path names when
> + a working tree is checked out.
> + Path names may be e.g. encoded in ISO-8859-1 and are stored as
> + UTF-8 encoded in the repository.
> + When not set, the encoding of path names is the same in working tree
> + and the repository.
"If set, then core.precomposeunicode is ignored on Mac OS X."
> diff --git a/compat/reencode_pathname.c b/compat/reencode_pathname.c
> new file mode 100644
> index 0000000..3bdc776
> --- /dev/null
> +++ b/compat/reencode_pathname.c
> @@ -0,0 +1,441 @@
> +/*
> + * Converts pathnames from one encoding into another.
> + * The pathnames are stored as UTF-8 in the repository,
> + * and might be checkout out as e.g. ISO-8859-1 in the working tree
> + *
> + * On MacOS X decomposed unicode is converted into precomposed unicode.
, ignoring the setting of core.precomposeunicode.
[...]
> + */
> +
> +#define REENCODE_PATHNAME_C
> +#include "cache.h"
> +#include "utf8.h"
> +#include "reencode_pathname.h"
> +
> +#if defined(OLD_ICONV) || (defined(__sun__) && !defined(_XPG6))
> + typedef const char *iconv_ibp;
> +#else
> + typedef char *iconv_ibp;
> +#endif
> +
> +const static char *repo_path_encoding = "UTF-8";
> +
> +static iconv_t iconv_open_or_die(const char *tocode, const char *fromcode)
> +{
> + iconv_t my_iconv;
> + my_iconv = iconv_open(tocode, fromcode);
join these two lines
> + if (my_iconv == (iconv_t) -1)
> + die_errno(_("iconv_open(%s,%s) failed"), tocode, fromcode);
> + return my_iconv;
> +}
> +
> +static size_t has_non_ascii(const char *s, size_t maxlen, size_t *strlen_c)
> +{
> + const uint8_t *ptr = (const uint8_t *)s;
> + size_t strlen_chars = 0;
> + size_t ret = 0;
> +
> + if (!ptr || !*ptr)
> + return 0;
> +
> + while (*ptr && maxlen) {
> + if (*ptr & 0x80)
> + ret++;
> + strlen_chars++;
> + ptr++;
> + maxlen--;
> + }
> + if (strlen_c)
> + *strlen_c = strlen_chars;
> +
> + return ret;
> +}
> +
> +#ifdef PRECOMPOSE_UNICODE
> +void probe_utf8_pathname_composition(char *path, int len)
> +{
> + static const char *auml_nfc = "\xc3\xa4";
> + static const char *auml_nfd = "\x61\xcc\x88";
> + int output_fd;
> + if (precomposed_unicode != -1)
> + return; /* We found it defined in the global config, respect it */
a bland line here would be nice
> + strcpy(path + len, auml_nfc);
> + output_fd = open(path, O_CREAT|O_EXCL|O_RDWR, 0600);
> + if (output_fd >= 0) {
> + close(output_fd);
> + strcpy(path + len, auml_nfd);
> + /* Indicate to the user, that we can configure it to true */
> + if (!access(path, R_OK))
> + git_config_set("core.precomposeunicode", "false");
> + /* To be backward compatible, set precomposed_unicode to 0 */
> + precomposed_unicode = 0;
> + strcpy(path + len, auml_nfc);
> + if (unlink(path))
> + die_errno(_("failed to unlink '%s'"), path);
> + }
> +}
> +#endif
[...]
> +struct dirent_psx *renc_pn_readdir(RENC_FN_DIR *renc_pn_dir)
> +{
> + struct dirent *res;
> + res = readdir(renc_pn_dir->dirp);
> + if (res) {
> + size_t namelenz = strlen(res->d_name) + 1; /* \0 */
> + size_t new_len_needed = 0;
> + int ret_errno = errno;
> +
> + renc_pn_dir->dirent_utf8->d_ino = res->d_ino;
> + renc_pn_dir->dirent_utf8->d_type = res->d_type;
> + do {
> + if (new_len_needed > renc_pn_dir->dirent_utf8->max_name_len) {
indent
[...]
> diff --git a/environment.c b/environment.c
> index 85edd7f..ba81575 100644
> --- a/environment.c
> +++ b/environment.c
> @@ -59,6 +59,7 @@ int grafts_replace_parents = 1;
> int core_apply_sparse_checkout;
> int merge_log_config = -1;
> int precomposed_unicode = -1; /* see probe_utf8_pathname_composition() */
> +const char *wt_path_encoding = NULL;
indent
> struct startup_info *startup_info;
> unsigned long pack_size_limit_cfg;
>
> diff --git a/git-compat-util.h b/git-compat-util.h
> index 35b095e..877b060 100644
> --- a/git-compat-util.h
> +++ b/git-compat-util.h
> @@ -153,13 +153,21 @@
> #endif
> #endif
>
> -/* used on Mac OS X */
> -#ifdef PRECOMPOSE_UNICODE
> -#include "compat/precompose_utf8.h"
> +#if defined(PATH_ENCODING) || defined(PRECOMPOSE_UNICODE)
> +#include "compat/reencode_pathname.h"
> #else
> -#define precompose_str(in,i_nfd2nfc)
> -#define precompose_argv(c,v)
> -#define probe_utf8_pathname_composition(a,b)
> +#define reencode_argv(c,v)
> +#endif
> +
> +/* needed for Mac OS X */
> +#ifndef PRECOMPOSE_UNICODE
> +#define probe_utf8_pathname_composition(a,b);
> +#endif
> +
> +#ifndef PATH_ENCODING
> +#define str_worktree2repolen(in, insz) (NULL)
> +#define str_repo2worktree(in) (NULL)
> +#define str_worktree2repo(in) (NULL)
> #endif
>
> #ifndef NO_LIBGEN_H
> diff --git a/parse-options.c b/parse-options.c
> index c1c66bd..5840c18 100644
> --- a/parse-options.c
> +++ b/parse-options.c
> @@ -476,7 +476,7 @@ int parse_options(int argc, const char **argv, const char
*prefix,
> usage_with_options(usagestr, options);
> }
>
> - precompose_argv(argc, argv);
> + reencode_argv(argc, argv);
> return parse_options_end(&ctx);
> }
>
> diff --git a/t/t3911-i18n-filename-8859.sh b/t/t3911-i18n-filename-8859.sh
> new file mode 100755
> index 0000000..aa2be57
> --- /dev/null
> +++ b/t/t3911-i18n-filename-8859.sh
> @@ -0,0 +1,251 @@
> +#!/bin/sh
> +#
> +# Copyright (c) 2010 Torsten Bögershausen
> +#
> +
> +test_description='file system encodings UTF-8 ISO8859-1'
> +
> +. ./test-lib.sh
> +
> +fname_UTF_8=`printf '\303\206\302\242'`
> +fname_ISO8859_1=`printf '\306\242'`
> +Euro_utf8=`printf '\342\202\254'`
> +supportspathencoding=`git config core.supportspathencoding` || :
> +
> +
> +add_file_dir_link() {
> + local bname=$1
> + local fname=$2
> + test_expect_success "add file $fname.f $bname" '
> + git checkout master &&
> + git checkout -b add_f_$bname &&
> + >$fname.f &&
> + git add $fname.f &&
> + git commit -m "add fname"
> + '
> +
> + test_expect_success "add dir $fname.d $bname" '
> + git checkout master &&
> + git checkout -b add_d_$bname &&
> + mkdir $fname.d &&
> + touch $fname.d/$fname.f &&
> + git add $fname.d/$fname.f &&
> + git commit -m "add fname.d/fname"
> + '
> +
> + i=0
> + for src in x $fname; do
> + for dst in x $fname; do
> + test_expect_success "add link $dst.l->$src.f on branch
add_l_${i}_$bname" '
> + git checkout master &&
> + git checkout -b add_l_${i}_$bname &&
> + ln -s $src.f $dst.l &&
> + git add $dst.l &&
> + git commit -m "add fname.l $i"
> + '
> + i=$(($i+1))
> + done
> + done
> +}
> +
> +test_expect_success "setup add rm x" '
> + >x &&
> + git add x &&
> + git commit -m "1st commit" &&
> + git rm x &&
> + git commit -m "rm x"
> +'
> +
> +#combinations to be tested:
> +# UTF-8 -> ISO8859-1
> +# ISO8859-1 -> UTF-8
> +
> +if test "$supportspathencoding"
> +then
> + srcencodings="ISO8859-1 UTF-8"
> + for srcenc in $srcencodings
> + do
> + case $srcenc in
> + ISO8859-1)
> + dstenc=UTF-8
> + ;;
> + UTF-8)
> + dstenc=ISO8859-1
> + ;;
> + UTF-8-MAC)
> + dstenc=UTF-8
> + ;;
> + *)
> + echo >&2 "Wrong encoding $srcenc"
> + exit 1
> + ;;
> + esac
> + eval fname_src=\$fname_$(echo $srcenc | sed -e 's/-/_/g' -e
's/_MAC//')
> + eval fname_dst=\$fname_$(echo $dstenc | sed -e 's/-/_/g')
> + test_expect_success "setup $srcenc" '
> + git checkout master &&
> + git config i18n.pathencoding $srcenc
> + '
> + add_file_dir_link $srcenc $fname_src
> +
> + test_expect_success "setup $dstenc" '
> + git checkout master &&
> + echo "git checkout Master" >&2
> + ls -l >&2
> + git config i18n.pathencoding $dstenc
> + '
> +
> + test_expect_success "checkout file $dstenc (was $srcenc)" '
> + git checkout add_f_$srcenc
> + '
> +
> + test_expect_success "exists file $dstenc (was $srcenc)" '
> + test -f $fname_dst.f
> + '
> +
> + test_expect_success "log file $dstenc (was $srcenc)" '
> + git log $fname_dst.f
> + '
> +
> + test_expect_success "git mv" '
> + git checkout -b mv_file_$srcenc &&
> + git mv $fname_dst.f XX.f &&
> + git commit -m "git mv fname_dst.f XX.f"
> + '
> +
> + test_expect_success "checkout dir $dstenc (was $srcenc)" '
> + git checkout add_d_$srcenc
> + '
> +
> + test_expect_success "exist dir $dstenc (was $srcenc)" '
> + test -d $fname_dst.d
> + '
> +
> + test_expect_success "log dir $dstenc (was $srcenc)" '
> + git log $fname_dst.d
> + '
> +
> + i=0
> + for src in x $fname_dst; do
> + for dst in x $fname_dst; do
> + test_expect_success "checkout link $dst.l->$src.f
branch add_l_${i}_$srcenc" '
> + git checkout add_l_${i}_$srcenc
> + '
> + test_expect_success "exist link $dst.l->$src.f branch
add_l_${i}_$srcenc" '
> + test -L $dst.l
> + '
> + test_expect_success "log link $dst.l->$src.f branch
add_l_${i}_$srcenc" '
> + git log $dst.l
> + '
> + test_expect_success "readlink $dst.l->$src.f branch
add_l_${i}_$srcenc" '
> + echo "$src.f" >expect &&
> + readlink "$dst.l" > actual &&
> + test_cmp expect actual &&
> + rm expect actual
> + '
> + i=$(($i+1))
> + done
> + done
> + done
> + # Make sure that Euro sign can NOT be checked out in 8859
"8859-1", The euro sign exists in 8859-15.
> + #fname_src=Euro
> + test_expect_success "setup UTF-8" '
> + git checkout master &&
> + git config i18n.pathencoding UTF-8
> + '
> + add_file_dir_link Euro $Euro_utf8
> +
> + test_expect_success "setup ISO8859-1" '
> + git checkout master &&
> + rm -rf * &&
> + git config i18n.pathencoding ISO8859-1
> + '
> + test_expect_success "checkout file Euro branch add_f_Euro" '
> + git checkout add_f_Euro
Missing && ?
> + echo * >actual &&
> + echo "*" >expect &&
> + test_cmp expect actual &&
> + rm expect actual
> + '
> +
> + test_expect_success "checkout dir Euro branch add_d_Euro" '
> + rm -rf * &&
> + test_must_fail git checkout add_d_Euro
> + '
> +
> + test_expect_success "Cleanup" '
> + git config i18n.pathencoding UTF-8 &&
> + git checkout master &&
> + rm -rf * &&
> + git reset --hard &&
> + git config i18n.pathencoding ISO8859-1
> + '
> +
> + test_expect_success "checkout link Euro.l->x.f branch add_l_1_Euro" '
> + ! git checkout add_l_1_Euro
> + '
> +
> + test_expect_success "No link Euro.l->x.f" '
> + echo * >actual &&
> + echo "*" >expect &&
> + test_cmp expect actual &&
> + rm expect actual
> + '
> +
> + test_expect_success "Cleanup after Euro.l->x.f" '
> + git config i18n.pathencoding UTF-8 &&
> + git checkout master &&
> + rm -rf * &&
> + git reset --hard &&
> + git config i18n.pathencoding ISO8859-1
> + '
> +
> + # Checkoing out a soft link pointing to a filename outside
"checking"
> + # 8859-1 should fail
> + test_expect_failure "checkout link x.l->Euro.f branch add_l_2_Euro" '
> + ! git checkout add_l_2_Euro
> + '
> +
> + test_expect_success "No link x.f->Euro.l" '
> + echo * >actual &&
> + echo "*" >expect &&
> + test_cmp expect actual &&
> + rm expect actual
> + '
> +
> + test_expect_success "Cleanup after link x.l->Euro.f branch" '
> + git config i18n.pathencoding UTF-8 &&
> + git checkout master &&
> + rm -rf * &&
> + git reset --hard &&
> + git config i18n.pathencoding ISO8859-1
> + '
> +
> + test_expect_success "checkout link Euro.l->Euro.f branch add_l_3_Euro" '
> + ! git checkout add_l_3_Euro
> + '
> +
> + test_expect_success "No link Euro.l->Euro.f" '
> + echo * >actual &&
> + echo "*" >expect &&
> + test_cmp expect actual &&
> + rm expect actual
> + '
> +
> +else
> + test_expect_success "setup 8859" '
"8859-1"
> + git config i18n.pathencoding ISO8859-1 &&
> + git checkout -b add_file_8859 &&
> + > $fname_src.f &&
> + git add $fname_src.f &&
> + git commit -m "add fname_src" &&
> + git config i18n.pathencoding UTF-8 &&
> + rm -rf * &&
> + git reset --hard
> + '
> + test_expect_success "Silent support of pathencoding" '
> + test_must_fail test -f $fname_UTF_8.f
> + '
> +fi
> +
> +test_done
-- robin
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html