Re: [PATCH/RFC 2/2] File commited with CRLF should roundtrip diff and apply

2017-08-16 Thread Junio C Hamano
I'll be sending a few patches that apply on top of applying these
two patches to show what I meant in my previous review comments.
The net change to apply.c, when you combine your 2/2 with these,
would become like the attached, which I think makes more sense.

Instead of queuing a squashed result, I thought it may help to send
them as incremental fixes with their own justification.

diff --git a/apply.c b/apply.c
index f2d599141d..c06f7014a2 100644
--- a/apply.c
+++ b/apply.c
@@ -220,6 +220,7 @@ struct patch {
unsigned int recount:1;
unsigned int conflicted_threeway:1;
unsigned int direct_to_threeway:1;
+   unsigned int crlf_in_old:1;
struct fragment *fragments;
char *result;
size_t resultsize;
@@ -1662,6 +1663,19 @@ static void check_whitespace(struct apply_state *state,
record_ws_error(state, result, line + 1, len - 2, state->linenr);
 }
 
+/*
+ * Check if the patch has context lines with CRLF or
+ * the patch wants to remove lines with CRLF.
+ */
+static void check_old_for_crlf(struct patch *patch, const char *line, int len)
+{
+   if (len >= 2 && line[len-1] == '\n' && line[len-2] == '\r') {
+   patch->ws_rule |= WS_CR_AT_EOL;
+   patch->crlf_in_old = 1;
+   }
+}
+
+
 /*
  * Parse a unified diff. Note that this really needs to parse each
  * fragment separately, since the only way to know the difference
@@ -1712,11 +1726,14 @@ static int parse_fragment(struct apply_state *state,
if (!deleted && !added)
leading++;
trailing++;
+   check_old_for_crlf(patch, line, len);
if (!state->apply_in_reverse &&
state->ws_error_action == correct_ws_error)
check_whitespace(state, line, len, 
patch->ws_rule);
break;
case '-':
+   if (!state->apply_in_reverse)
+   check_old_for_crlf(patch, line, len);
if (state->apply_in_reverse &&
state->ws_error_action != nowarn_ws_error)
check_whitespace(state, line, len, 
patch->ws_rule);
@@ -1725,6 +1742,8 @@ static int parse_fragment(struct apply_state *state,
trailing = 0;
break;
case '+':
+   if (state->apply_in_reverse)
+   check_old_for_crlf(patch, line, len);
if (!state->apply_in_reverse &&
state->ws_error_action != nowarn_ws_error)
check_whitespace(state, line, len, 
patch->ws_rule);
@@ -2268,8 +2287,12 @@ static void show_stats(struct apply_state *state, struct 
patch *patch)
add, pluses, del, minuses);
 }
 
-static int read_old_data(struct stat *st, const char *path, struct strbuf *buf)
+static int read_old_data(struct stat *st, const char *path, struct strbuf *buf,
+struct patch *patch)
 {
+   enum safe_crlf safe_crlf = (patch->crlf_in_old
+   ? SAFE_CRLF_KEEP_CRLF : SAFE_CRLF_FALSE);
+
switch (st->st_mode & S_IFMT) {
case S_IFLNK:
if (strbuf_readlink(buf, path, st->st_size) < 0)
@@ -2278,7 +2301,7 @@ static int read_old_data(struct stat *st, const char 
*path, struct strbuf *buf)
case S_IFREG:
if (strbuf_read_file(buf, path, st->st_size) != st->st_size)
return error(_("unable to open or read %s"), path);
-   convert_to_git(_index, path, buf->buf, buf->len, buf, 0);
+   convert_to_git(_index, path, buf->buf, buf->len, buf, 
safe_crlf);
return 0;
default:
return -1;
@@ -3384,6 +3407,7 @@ static int load_patch_target(struct apply_state *state,
 struct strbuf *buf,
 const struct cache_entry *ce,
 struct stat *st,
+struct patch *patch,
 const char *name,
 unsigned expected_mode)
 {
@@ -3399,7 +3423,7 @@ static int load_patch_target(struct apply_state *state,
} else if (has_symlink_leading_path(name, strlen(name))) {
return error(_("reading from '%s' beyond a symbolic 
link"), name);
} else {
-   if (read_old_data(st, name, buf))
+   if (read_old_data(st, name, buf, patch))
return error(_("failed to read %s"), name);
}
}
@@ -3432,7 +3456,7 @@ static int load_preimage(struct apply_state *state,
/* We have a patched copy in memory; use that. */
strbuf_add(, previous->result, 

[PATCH/RFC 2/2] File commited with CRLF should roundtrip diff and apply

2017-08-13 Thread tboegi
From: Torsten Bögershausen 

When a file had been commited with CRLF and core.autocrlf is true,
the following does not roundtrip, `git apply` fails:

printf "Added line\r\n" >>file &&
git diff >patch &&
git checkout -- . &&
git apply patch

Before applying the patch, the file from working tree is converted into the
index format (clean filter, CRLF conversion, ...)
Here, when commited with CRLF, the line endings should not be converted.

Analyze the patch if there is any context line with CRLF,
or if any line with CRLF is to be removed.

If yes, the new flag has_crlf is set in "struct patch", and two things
will happen:
- read_old_data() will not convert CRLF into LF by calling
  convert_to_git(..., SAFE_CRLF_KEEP_CRLF);
- The WS_CR_AT_EOL bit is set in the "white space rule",
  CRLF are no longer treated as white space.

Thanks to Junio C Hamano, his input became the base for t4140.

Reported-by: Anthony Sottile 
Signed-off-by: Torsten Bögershausen 
---


The last version did not pass t4124, fix this.



apply.c  | 37 -
 apply.h  |  4 
 t/t4124-apply-ws-rule.sh |  3 +--
 t/t4140-apply-CRLF.sh| 46 ++
 4 files changed, 79 insertions(+), 11 deletions(-)
 create mode 100755 t/t4140-apply-CRLF.sh

diff --git a/apply.c b/apply.c
index f2d599141d..63455cd65f 100644
--- a/apply.c
+++ b/apply.c
@@ -220,6 +220,7 @@ struct patch {
unsigned int recount:1;
unsigned int conflicted_threeway:1;
unsigned int direct_to_threeway:1;
+   unsigned int has_crlf:1;
struct fragment *fragments;
char *result;
size_t resultsize;
@@ -1662,6 +1663,17 @@ static void check_whitespace(struct apply_state *state,
record_ws_error(state, result, line + 1, len - 2, state->linenr);
 }
 
+/* Check if the patch has context lines with CRLF or
+   the patch wants to remove lines with CRLF */
+static void check_old_for_crlf(struct patch *patch, const char *line, int len)
+{
+   if (len >= 2 && line[len-1] == '\n' && line[len-2] == '\r') {
+   patch->ws_rule |= WS_CR_AT_EOL;
+   patch->has_crlf = 1;
+   }
+}
+
+
 /*
  * Parse a unified diff. Note that this really needs to parse each
  * fragment separately, since the only way to know the difference
@@ -1712,11 +1724,13 @@ static int parse_fragment(struct apply_state *state,
if (!deleted && !added)
leading++;
trailing++;
+   check_old_for_crlf(patch, line, len);
if (!state->apply_in_reverse &&
state->ws_error_action == correct_ws_error)
check_whitespace(state, line, len, 
patch->ws_rule);
break;
case '-':
+   check_old_for_crlf(patch, line, len);
if (state->apply_in_reverse &&
state->ws_error_action != nowarn_ws_error)
check_whitespace(state, line, len, 
patch->ws_rule);
@@ -2268,8 +2282,10 @@ static void show_stats(struct apply_state *state, struct 
patch *patch)
add, pluses, del, minuses);
 }
 
-static int read_old_data(struct stat *st, const char *path, struct strbuf *buf)
+static int read_old_data(struct stat *st, const char *path, struct strbuf 
*buf, int flags)
 {
+   enum safe_crlf safe_crlf = flags & APPLY_FLAGS_CR_AT_EOL ?
+   SAFE_CRLF_KEEP_CRLF : SAFE_CRLF_FALSE;
switch (st->st_mode & S_IFMT) {
case S_IFLNK:
if (strbuf_readlink(buf, path, st->st_size) < 0)
@@ -2278,7 +2294,7 @@ static int read_old_data(struct stat *st, const char 
*path, struct strbuf *buf)
case S_IFREG:
if (strbuf_read_file(buf, path, st->st_size) != st->st_size)
return error(_("unable to open or read %s"), path);
-   convert_to_git(_index, path, buf->buf, buf->len, buf, 0);
+   convert_to_git(_index, path, buf->buf, buf->len, buf, 
safe_crlf);
return 0;
default:
return -1;
@@ -3385,7 +3401,8 @@ static int load_patch_target(struct apply_state *state,
 const struct cache_entry *ce,
 struct stat *st,
 const char *name,
-unsigned expected_mode)
+unsigned expected_mode,
+int flags)
 {
if (state->cached || state->check_index) {
if (read_file_or_gitlink(ce, buf))
@@ -3399,7 +3416,7 @@ static int load_patch_target(struct apply_state *state,
} else if (has_symlink_leading_path(name, strlen(name))) {
return error(_("reading from '%s' beyond a symbolic