Re: [PATCH v7 2/4] cat-file: teach cat-file a '--literally' option

2015-04-07 Thread Eric Sunshine
On Sat, Apr 4, 2015 at 1:44 AM, Karthik Nayak karthik@gmail.com wrote:
 Currently 'git cat-file' throws an error while trying to
 print the type or size of a broken/corrupt object which is
 created using 'git hash-object --literally'. This is
 because these objects are usually of unknown types.

This focus of this explanation is off-the-mark. The fact that such
objects can be created by 'hash-object --literally' is tangental to
the real purpose of the new 'cat-file --literally' option, which is
that it can help with diagnosing broken/corrupt objects encountered
in-the-wild.

Even mentioning 'hash-object --literally' here may be misleading and
confusing since its purpose it to intentionally create broken objects
for stress-testing git itself. I'd probably drop the reference
altogether, but if you insist upon mentioning 'hash-object
--literally', perhaps make it a very minor parenthetical comment at
the end of the commit message saying that 'cat-file --literally' was
inspired by its hash-object counterpart, or some such.

More below.

 Teach git cat-file a '--literally' option where it prints
 the type or size of a broken/corrupt object without throwing
 an error.

 Modify '-t' and '-s' options to call sha1_object_info_extended()
 directly to support the '--literally' option.

 Helped-by: Junio C Hamano gits...@pobox.com
 Helped-by: Eric Sunshine sunsh...@sunshineco.com
 Signed-off-by: Karthik Nayak karthik@gmail.com
 ---
 diff --git a/builtin/cat-file.c b/builtin/cat-file.c
 index df99df4..91ceae0 100644
 --- a/builtin/cat-file.c
 +++ b/builtin/cat-file.c
 @@ -9,13 +9,20 @@
  #include userdiff.h
  #include streaming.h

 -static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
 +static int cat_one_file(int opt, const char *exp_type, const char *obj_name,
 +   int literally)
  {
 unsigned char sha1[20];
 enum object_type type;
 char *buf;
 unsigned long size;
 struct object_context obj_context;
 +   struct object_info oi = {NULL};
 +   struct strbuf sb = STRBUF_INIT;
 +   unsigned flags = LOOKUP_REPLACE_OBJECT;
 +
 +   if (literally)
 +   flags |= LOOKUP_LITERALLY;

 if (get_sha1_with_context(obj_name, 0, sha1, obj_context))
 die(Not a valid object name %s, obj_name);
 @@ -23,16 +30,24 @@ static int cat_one_file(int opt, const char *exp_type, 
 const char *obj_name)
 buf = NULL;
 switch (opt) {
 case 't':
 -   type = sha1_object_info(sha1, NULL);
 -   if (type  0) {
 -   printf(%s\n, typename(type));
 +   oi.typep = type;
 +   oi.typename = sb;

These two lines are common to the -t and -s cases. Would it make sense
to instead move them to just after 'oi' and 'sb' are declared? However
(see below)...

 +   if (sha1_object_info_extended(sha1, oi, flags)  0)
 +   die(git cat-file: could not get object info);
 +   if (type = 0  sb.len) {
 +   printf(%s\n, sb.buf);
 +   strbuf_release(sb);

Here you release the strbuf...

 return 0;
 }
 break;

 case 's':
 -   type = sha1_object_info(sha1, size);
 -   if (type  0) {
 +   oi.typep = type;
 +   oi.typename = sb;

Why do you need to collect 'typename' for the -s case?
sha1_object_info_extended() promises that 'type' will be zero in the
--literally case for unknown types, so checking 'sb.len' in the
conditional below doesn't buy you anything, does it?

In fact, it's not even clear why you need to collect 'type' in the -s
case? The return value of sha1_object_info_extended() already tells
you whether or not the 'size' was retrieved successfully (--literally
or not).

 +   oi.sizep = size;
 +   if (sha1_object_info_extended(sha1, oi, flags)  0)
 +   die(git cat-file: could not get object info);
 +   if (type = 0  sb.len) {
 printf(%lu\n, size);

But here you do not release the strbuf.

 return 0;
 }
 @@ -369,6 +385,8 @@ int cmd_cat_file(int argc, const char **argv, const char 
 *prefix)
 OPT_SET_INT('p', NULL, opt, N_(pretty-print object's 
 content), 'p'),
 OPT_SET_INT(0, textconv, opt,
 N_(for blob objects, run textconv on object's 
 content), 'c'),
 +   OPT_BOOL( 0, literally, literally,
 + N_(get information about corrupt objects for 
 debugging Git)),

This option neither gets information nor is it for debugging Git.
Rather, it's useful for diagnosing broken/corrupt objects in
combination with other options. Perhaps rephrase something like this:

allow -s and -t to work with broken/corrupt objects

 { 

[PATCH v7 2/4] cat-file: teach cat-file a '--literally' option

2015-04-03 Thread Karthik Nayak
Currently 'git cat-file' throws an error while trying to
print the type or size of a broken/corrupt object which is
created using 'git hash-object --literally'. This is
because these objects are usually of unknown types.

Teach git cat-file a '--literally' option where it prints
the type or size of a broken/corrupt object without throwing
an error.

Modify '-t' and '-s' options to call sha1_object_info_extended()
directly to support the '--literally' option.

Helped-by: Junio C Hamano gits...@pobox.com
Helped-by: Eric Sunshine sunsh...@sunshineco.com
Signed-off-by: Karthik Nayak karthik@gmail.com
---
 builtin/cat-file.c | 38 +-
 1 file changed, 29 insertions(+), 9 deletions(-)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index df99df4..91ceae0 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -9,13 +9,20 @@
 #include userdiff.h
 #include streaming.h
 
-static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
+static int cat_one_file(int opt, const char *exp_type, const char *obj_name,
+   int literally)
 {
unsigned char sha1[20];
enum object_type type;
char *buf;
unsigned long size;
struct object_context obj_context;
+   struct object_info oi = {NULL};
+   struct strbuf sb = STRBUF_INIT;
+   unsigned flags = LOOKUP_REPLACE_OBJECT;
+
+   if (literally)
+   flags |= LOOKUP_LITERALLY;
 
if (get_sha1_with_context(obj_name, 0, sha1, obj_context))
die(Not a valid object name %s, obj_name);
@@ -23,16 +30,24 @@ static int cat_one_file(int opt, const char *exp_type, 
const char *obj_name)
buf = NULL;
switch (opt) {
case 't':
-   type = sha1_object_info(sha1, NULL);
-   if (type  0) {
-   printf(%s\n, typename(type));
+   oi.typep = type;
+   oi.typename = sb;
+   if (sha1_object_info_extended(sha1, oi, flags)  0)
+   die(git cat-file: could not get object info);
+   if (type = 0  sb.len) {
+   printf(%s\n, sb.buf);
+   strbuf_release(sb);
return 0;
}
break;
 
case 's':
-   type = sha1_object_info(sha1, size);
-   if (type  0) {
+   oi.typep = type;
+   oi.typename = sb;
+   oi.sizep = size;
+   if (sha1_object_info_extended(sha1, oi, flags)  0)
+   die(git cat-file: could not get object info);
+   if (type = 0  sb.len) {
printf(%lu\n, size);
return 0;
}
@@ -323,7 +338,7 @@ static int batch_objects(struct batch_options *opt)
 }
 
 static const char * const cat_file_usage[] = {
-   N_(git cat-file (-t | -s | -e | -p | type | --textconv) object),
+   N_(git cat-file (-t [--literally]|-s 
[--literally]|-e|-p|type|--textconv) object),
N_(git cat-file (--batch | --batch-check)  list-of-objects),
NULL
 };
@@ -359,6 +374,7 @@ int cmd_cat_file(int argc, const char **argv, const char 
*prefix)
int opt = 0;
const char *exp_type = NULL, *obj_name = NULL;
struct batch_options batch = {0};
+   int literally = 0;
 
const struct option options[] = {
OPT_GROUP(N_(type can be one of: blob, tree, commit, tag)),
@@ -369,6 +385,8 @@ int cmd_cat_file(int argc, const char **argv, const char 
*prefix)
OPT_SET_INT('p', NULL, opt, N_(pretty-print object's 
content), 'p'),
OPT_SET_INT(0, textconv, opt,
N_(for blob objects, run textconv on object's 
content), 'c'),
+   OPT_BOOL( 0, literally, literally,
+ N_(get information about corrupt objects for 
debugging Git)),
{ OPTION_CALLBACK, 0, batch, batch, format,
N_(show info and content of objects fed from the 
standard input),
PARSE_OPT_OPTARG, batch_option_callback },
@@ -380,7 +398,7 @@ int cmd_cat_file(int argc, const char **argv, const char 
*prefix)
 
git_config(git_cat_file_config, NULL);
 
-   if (argc != 3  argc != 2)
+   if (argc  2 || argc  4)
usage_with_options(cat_file_usage, options);
 
argc = parse_options(argc, argv, prefix, options, cat_file_usage, 0);
@@ -405,5 +423,7 @@ int cmd_cat_file(int argc, const char **argv, const char 
*prefix)
if (batch.enabled)
return batch_objects(batch);
 
-   return cat_one_file(opt, exp_type, obj_name);
+   if (literally  opt != 't'  opt != 's')
+   die(git cat-file --literally: use with -s or -t);
+   return cat_one_file(opt, exp_type, obj_name, literally);
 }
-- 
2.4.0.rc1.249.g9f2ee54

--
To unsubscribe from this list: send the line