Re: [PATCH 4/4] diff: mark any file larger than core.bigfilethreshold binary

2014-06-23 Thread Duy Nguyen
On Thu, Jun 19, 2014 at 7:27 PM, Thomas Braun
thomas.br...@virtuell-zuhause.de wrote:
 @@ -2721,6 +2721,11 @@ int diff_populate_filespec(struct diff_filespec *s, 
 unsigned int flags)
   }
   if (size_only)
   return 0;
 + if ((flags  DIFF_POPULATE_IS_BINARY) 
 + s-size  big_file_threshold  s-is_binary == -1) {
 + s-is_binary = 1;
 + return 0;
 + }

 Why do you check for s-is_binary == -1 here? I think it does not matter
 what s_is_binary says here.

If some .gitattributes to mark one file not-binary, we should respect
that, I think. Same for below too.

 I would also add a note to the documentation e. g:

 diff --git a/Documentation/config.txt b/Documentation/config.txt
 index 9f467d3..7a2f27d 100644
 --- a/Documentation/config.txt
 +++ b/Documentation/config.txt
 @@ -499,7 +499,8 @@ core.bigFileThreshold::
 Files larger than this size are stored deflated, without
 attempting delta compression.  Storing large files without
 delta compression avoids excessive memory usage, at the
 -   slight expense of increased disk usage.
 +   slight expense of increased disk usage.  Additionally files
 +   larger than this size are allways treated as binary.
  +
  Default is 512 MiB on all platforms.  This should be reasonable
  for most projects as source code and other text files can still

Thanks. Will do. Sorry a little busy these days and could not reply earlier.
-- 
Duy
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/4] diff: mark any file larger than core.bigfilethreshold binary

2014-06-23 Thread Thomas Braun
Am 23.06.2014 14:18, schrieb Duy Nguyen:
 On Thu, Jun 19, 2014 at 7:27 PM, Thomas Braun
 thomas.br...@virtuell-zuhause.de wrote:
 @@ -2721,6 +2721,11 @@ int diff_populate_filespec(struct diff_filespec *s, 
 unsigned int flags)
   }
   if (size_only)
   return 0;
 + if ((flags  DIFF_POPULATE_IS_BINARY) 
 + s-size  big_file_threshold  s-is_binary == -1) {
 + s-is_binary = 1;
 + return 0;
 + }

 Why do you check for s-is_binary == -1 here? I think it does not matter
 what s_is_binary says here.
 
 If some .gitattributes to mark one file not-binary, we should respect
 that, I think. Same for below too.

Reading diffcore.h I thought is_binary being -1 means it is not yet
decided if it is binary or text.
Respecting .gitattributes is obviously a good thing :)

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/4] diff: mark any file larger than core.bigfilethreshold binary

2014-06-19 Thread Thomas Braun
Am Donnerstag, den 29.05.2014, 19:57 +0700 schrieb Nguyễn Thái Ngọc Duy:

Hi,

sorry for chiming in so late.

I've just played around with patch 3 and 4 of that series.
And I like it very much as I work often with large files so any further 
enhancement in that area is really nice.

(see comments below)

 Too large files may lead to failure to allocate memory. If it happens
 here, it could impact quite a few commands that involve
 diff. Moreover, too large files are inefficient to compare anyway (and
 most likely non-text), so mark them binary and skip looking at their
 content.
 
 Noticed-by: Dale R. Worley wor...@alum.mit.edu
 Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
 ---
  diff.c   | 26 ++
  diffcore.h   |  1 +
  t/t1050-large.sh |  4 
  3 files changed, 23 insertions(+), 8 deletions(-)
 
 diff --git a/diff.c b/diff.c
 index 54281cb..0a2f865 100644
 --- a/diff.c
 +++ b/diff.c
 @@ -2185,8 +2185,8 @@ int diff_filespec_is_binary(struct diff_filespec *one)
   one-is_binary = one-driver-binary;
   else {
   if (!one-data  DIFF_FILE_VALID(one))
 - diff_populate_filespec(one, 0);
 - if (one-data)
 + diff_populate_filespec(one, 
 DIFF_POPULATE_IS_BINARY);
 + if (one-is_binary == -1  one-data)
   one-is_binary = buffer_is_binary(one-data,
   one-size);
   if (one-is_binary == -1)
 @@ -2721,6 +2721,11 @@ int diff_populate_filespec(struct diff_filespec *s, 
 unsigned int flags)
   }
   if (size_only)
   return 0;
 + if ((flags  DIFF_POPULATE_IS_BINARY) 
 + s-size  big_file_threshold  s-is_binary == -1) {
 + s-is_binary = 1;
 + return 0;
 + }

Why do you check for s-is_binary == -1 here? I think it does not matter
what s_is_binary says here.

   fd = open(s-path, O_RDONLY);
   if (fd  0)
   goto err_empty;
 @@ -2742,16 +2747,21 @@ int diff_populate_filespec(struct diff_filespec *s, 
 unsigned int flags)
   }
   else {
   enum object_type type;
 - if (size_only) {
 + if (size_only || (flags  DIFF_POPULATE_IS_BINARY)) {
   type = sha1_object_info(s-sha1, s-size);
   if (type  0)
   die(unable to read %s, sha1_to_hex(s-sha1));
 - } else {
 - s-data = read_sha1_file(s-sha1, type, s-size);
 - if (!s-data)
 - die(unable to read %s, sha1_to_hex(s-sha1));
 - s-should_free = 1;
 + if (size_only)
 + return 0;
 + if (s-size  big_file_threshold  s-is_binary == -1) 
 {
same as above.
 + s-is_binary = 1;
 + return 0;
 + }
   }
 + s-data = read_sha1_file(s-sha1, type, s-size);
 + if (!s-data)
 + die(unable to read %s, sha1_to_hex(s-sha1));
 + s-should_free = 1;
   }
   return 0;
  }
 diff --git a/diffcore.h b/diffcore.h
 index a186d7c..e7760d9 100644
 --- a/diffcore.h
 +++ b/diffcore.h
 @@ -56,6 +56,7 @@ extern void fill_filespec(struct diff_filespec *, const 
 unsigned char *,
 int, unsigned short);
  
  #define DIFF_POPULATE_SIZE_ONLY 1
 +#define DIFF_POPULATE_IS_BINARY 2
  extern int diff_populate_filespec(struct diff_filespec *, unsigned int);
  extern void diff_free_filespec_data(struct diff_filespec *);
  extern void diff_free_filespec_blob(struct diff_filespec *);
 diff --git a/t/t1050-large.sh b/t/t1050-large.sh
 index 333909b..4d922e2 100755
 --- a/t/t1050-large.sh
 +++ b/t/t1050-large.sh
 @@ -112,6 +112,10 @@ test_expect_success 'diff --raw' '
   git diff --raw HEAD^
  '
  
 +test_expect_success 'diff --stat' '
 + git diff --stat HEAD^ HEAD
 +'
 +
  test_expect_success 'hash-object' '
   git hash-object large1
  '

I would also add a note to the documentation e. g:

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 9f467d3..7a2f27d 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -499,7 +499,8 @@ core.bigFileThreshold::
Files larger than this size are stored deflated, without
attempting delta compression.  Storing large files without
delta compression avoids excessive memory usage, at the
-   slight expense of increased disk usage.
+   slight expense of increased disk usage.  Additionally files
+   larger than this size are allways treated as binary.
 +
 Default is 512 MiB on all platforms.  This should be reasonable
 for most 

[PATCH 4/4] diff: mark any file larger than core.bigfilethreshold binary

2014-05-29 Thread Nguyễn Thái Ngọc Duy
Too large files may lead to failure to allocate memory. If it happens
here, it could impact quite a few commands that involve
diff. Moreover, too large files are inefficient to compare anyway (and
most likely non-text), so mark them binary and skip looking at their
content.

Noticed-by: Dale R. Worley wor...@alum.mit.edu
Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
---
 diff.c   | 26 ++
 diffcore.h   |  1 +
 t/t1050-large.sh |  4 
 3 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/diff.c b/diff.c
index 54281cb..0a2f865 100644
--- a/diff.c
+++ b/diff.c
@@ -2185,8 +2185,8 @@ int diff_filespec_is_binary(struct diff_filespec *one)
one-is_binary = one-driver-binary;
else {
if (!one-data  DIFF_FILE_VALID(one))
-   diff_populate_filespec(one, 0);
-   if (one-data)
+   diff_populate_filespec(one, 
DIFF_POPULATE_IS_BINARY);
+   if (one-is_binary == -1  one-data)
one-is_binary = buffer_is_binary(one-data,
one-size);
if (one-is_binary == -1)
@@ -2721,6 +2721,11 @@ int diff_populate_filespec(struct diff_filespec *s, 
unsigned int flags)
}
if (size_only)
return 0;
+   if ((flags  DIFF_POPULATE_IS_BINARY) 
+   s-size  big_file_threshold  s-is_binary == -1) {
+   s-is_binary = 1;
+   return 0;
+   }
fd = open(s-path, O_RDONLY);
if (fd  0)
goto err_empty;
@@ -2742,16 +2747,21 @@ int diff_populate_filespec(struct diff_filespec *s, 
unsigned int flags)
}
else {
enum object_type type;
-   if (size_only) {
+   if (size_only || (flags  DIFF_POPULATE_IS_BINARY)) {
type = sha1_object_info(s-sha1, s-size);
if (type  0)
die(unable to read %s, sha1_to_hex(s-sha1));
-   } else {
-   s-data = read_sha1_file(s-sha1, type, s-size);
-   if (!s-data)
-   die(unable to read %s, sha1_to_hex(s-sha1));
-   s-should_free = 1;
+   if (size_only)
+   return 0;
+   if (s-size  big_file_threshold  s-is_binary == -1) 
{
+   s-is_binary = 1;
+   return 0;
+   }
}
+   s-data = read_sha1_file(s-sha1, type, s-size);
+   if (!s-data)
+   die(unable to read %s, sha1_to_hex(s-sha1));
+   s-should_free = 1;
}
return 0;
 }
diff --git a/diffcore.h b/diffcore.h
index a186d7c..e7760d9 100644
--- a/diffcore.h
+++ b/diffcore.h
@@ -56,6 +56,7 @@ extern void fill_filespec(struct diff_filespec *, const 
unsigned char *,
  int, unsigned short);
 
 #define DIFF_POPULATE_SIZE_ONLY 1
+#define DIFF_POPULATE_IS_BINARY 2
 extern int diff_populate_filespec(struct diff_filespec *, unsigned int);
 extern void diff_free_filespec_data(struct diff_filespec *);
 extern void diff_free_filespec_blob(struct diff_filespec *);
diff --git a/t/t1050-large.sh b/t/t1050-large.sh
index 333909b..4d922e2 100755
--- a/t/t1050-large.sh
+++ b/t/t1050-large.sh
@@ -112,6 +112,10 @@ test_expect_success 'diff --raw' '
git diff --raw HEAD^
 '
 
+test_expect_success 'diff --stat' '
+   git diff --stat HEAD^ HEAD
+'
+
 test_expect_success 'hash-object' '
git hash-object large1
 '
-- 
1.9.1.346.ga2b5940

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html