From: Lars Schneider
Check that new content is valid with respect to the user defined
'working-tree-encoding' attribute.
Signed-off-by: Lars Schneider
---
convert.c| 48 ++
t/t0028-working-tree-encoding.sh | 56
2 files changed, 104 insertions(+)
diff --git a/convert.c b/convert.c
index aa59ecfe49..b80d666a6b 100644
--- a/convert.c
+++ b/convert.c
@@ -266,6 +266,51 @@ static int will_convert_lf_to_crlf(size_t len, struct
text_stat *stats,
}
+static int validate_encoding(const char *path, const char *enc,
+ const char *data, size_t len, int die_on_error)
+{
+ /* We only check for UTF here as UTF?? can be an alias for UTF-?? */
+ if (istarts_with(enc, "UTF")) {
+ /*
+* Check for detectable errors in UTF encodings
+*/
+ if (has_prohibited_utf_bom(enc, data, len)) {
+ const char *error_msg = _(
+ "BOM is prohibited in '%s' if encoded as %s");
+ /*
+* This advice is shown for UTF-??BE and UTF-??LE
encodings.
+*/
+ const char *advise_msg = _(
+ "The file '%s' contains a byte order "
+ "mark (BOM). Please use %.6s as "
+ "working-tree-encoding.");
+ advise(advise_msg, path, enc);
+ if (die_on_error)
+ die(error_msg, path, enc);
+ else {
+ return error(error_msg, path, enc);
+ }
+
+ } else if (is_missing_required_utf_bom(enc, data, len)) {
+ const char *error_msg = _(
+ "BOM is required in '%s' if encoded as %s");
+ const char *advise_msg = _(
+ "The file '%s' is missing a byte order "
+ "mark (BOM). Please use %sBE or %sLE "
+ "(depending on the byte order) as "
+ "working-tree-encoding.");
+ advise(advise_msg, path, enc, enc);
+ if (die_on_error)
+ die(error_msg, path, enc);
+ else {
+ return error(error_msg, path, enc);
+ }
+ }
+
+ }
+ return 0;
+}
+
static const char *default_encoding = "UTF-8";
static int encode_to_git(const char *path, const char *src, size_t src_len,
@@ -291,6 +336,9 @@ static int encode_to_git(const char *path, const char *src,
size_t src_len,
if (!buf && !src)
return 1;
+ if (validate_encoding(path, enc, src, src_len, die_on_error))
+ return 0;
+
dst = reencode_string_len(src, src_len, default_encoding, enc,
_len);
if (!dst) {
diff --git a/t/t0028-working-tree-encoding.sh b/t/t0028-working-tree-encoding.sh
index e492945a01..e8408dfe5c 100755
--- a/t/t0028-working-tree-encoding.sh
+++ b/t/t0028-working-tree-encoding.sh
@@ -62,6 +62,46 @@ test_expect_success 'check $GIT_DIR/info/attributes support'
'
for i in 16 32
do
+ test_expect_success "check prohibited UTF-${i} BOM" '
+ test_when_finished "git reset --hard HEAD" &&
+
+ echo "*.utf${i}be text working-tree-encoding=utf-${i}be"
>>.gitattributes &&
+ echo "*.utf${i}le text working-tree-encoding=utf-${i}LE"
>>.gitattributes &&
+
+ # Here we add a UTF-16 (resp. UTF-32) files with BOM
(big/little-endian)
+ # but we tell Git to treat it as UTF-16BE/UTF-16LE (resp.
UTF-32).
+ # In these cases the BOM is prohibited.
+ cp bebom.utf${i}be.raw bebom.utf${i}be &&
+ test_must_fail git add bebom.utf${i}be 2>err.out &&
+ test_i18ngrep "fatal: BOM is prohibited .* utf-${i}be" err.out
&&
+
+ cp lebom.utf${i}le.raw lebom.utf${i}be &&
+ test_must_fail git add lebom.utf${i}be 2>err.out &&
+ test_i18ngrep "fatal: BOM is prohibited .* utf-${i}be" err.out
&&
+
+ cp bebom.utf${i}be.raw bebom.utf${i}le &&
+ test_must_fail git add bebom.utf${i}le 2>err.out &&
+ test_i18ngrep "fatal: BOM is prohibited .* utf-${i}LE" err.out
&&
+
+ cp lebom.utf${i}le.raw lebom.utf${i}le &&
+ test_must_fail git add lebom.utf${i}le 2>err.out &&
+ test_i18ngrep "fatal: BOM is prohibited .* utf-${i}LE" err.out
+ '
+
+ test_expect_success "check required UTF-${i} BOM" '
+ test_when_finished "git