[PATCH v6 1/4] sha1_file.c: support reading from a loose object of unknown type

2015-04-02 Thread Karthik Nayak
Update sha1_loose_object_info() to optionally allow it to read
from a loose object file of unknown/bogus type; as the function
usually returns the type of the object it read in the form of enum
for known types, add an optional typename field to receive the
name of the type in textual form and a flag to indicate the reading
of a loose object file of unknown/bogus type.

Add parse_sha1_header_extended() which acts as a wrapper around
parse_sha1_header() allowing more information to be obtained.

Add unpack_sha1_header_to_strbuf() to unpack sha1 headers of
unknown/corrupt objects which have a unknown sha1 header size to
a strbuf structure. This was written by Junio C Hamano but tested
by me.

Helped-by: Junio C Hamano gits...@pobox.com
Helped-by: Eric Sunshine sunsh...@sunshineco.com
Signed-off-by: Karthik Nayak karthik@gmail.com
---
 cache.h |   2 ++
 sha1_file.c | 111 
 2 files changed, 91 insertions(+), 22 deletions(-)

diff --git a/cache.h b/cache.h
index 4d02efc..949ef4c 100644
--- a/cache.h
+++ b/cache.h
@@ -830,6 +830,7 @@ extern int is_ntfs_dotgit(const char *name);
 
 /* object replacement */
 #define LOOKUP_REPLACE_OBJECT 1
+#define LOOKUP_LITERALLY 2
 extern void *read_sha1_file_extended(const unsigned char *sha1, enum 
object_type *type, unsigned long *size, unsigned flag);
 static inline void *read_sha1_file(const unsigned char *sha1, enum object_type 
*type, unsigned long *size)
 {
@@ -1296,6 +1297,7 @@ struct object_info {
unsigned long *sizep;
unsigned long *disk_sizep;
unsigned char *delta_base_sha1;
+   struct strbuf *typename;
 
/* Response */
enum {
diff --git a/sha1_file.c b/sha1_file.c
index 69a60ec..8b58b94 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1564,6 +1564,34 @@ int unpack_sha1_header(git_zstream *stream, unsigned 
char *map, unsigned long ma
return git_inflate(stream, 0);
 }
 
+static int unpack_sha1_header_to_strbuf(git_zstream *stream, unsigned char 
*map,
+   unsigned long mapsize,
+   struct strbuf *header)
+{
+   unsigned char buffer[32], *cp;
+   unsigned long bufsiz = sizeof(buffer);
+   int status;
+
+   status = unpack_sha1_header(stream, map, mapsize, buffer, bufsiz);
+
+   if (status) {
+   strbuf_add(header, buffer, stream-next_out - buffer);
+   return status;
+   }
+
+   do {
+   status = git_inflate(stream, 0);
+   strbuf_add(header, buffer, stream-next_out - buffer);
+   for (cp = buffer; cp  stream-next_out; cp++)
+   if (!*cp)
+   /* Found the NUL at the end of the header */
+   return 0;
+   stream-next_out = buffer;
+   stream-avail_out = bufsiz;
+   } while (status != Z_STREAM_END);
+   return -1;
+}
+
 static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long 
size, const unsigned char *sha1)
 {
int bytes = strlen(buffer) + 1;
@@ -1614,27 +1642,24 @@ static void *unpack_sha1_rest(git_zstream *stream, void 
*buffer, unsigned long s
  * too permissive for what we want to check. So do an anal
  * object header parse by hand.
  */
-int parse_sha1_header(const char *hdr, unsigned long *sizep)
+int parse_sha1_header_extended(const char *hdr, struct object_info *oi,
+  unsigned int flags)
 {
-   char type[10];
-   int i;
+   struct strbuf typename = STRBUF_INIT;
unsigned long size;
+   int type;
 
/*
 * The type can be at most ten bytes (including the
 * terminating '\0' that we add), and is followed by
 * a space.
 */
-   i = 0;
for (;;) {
char c = *hdr++;
if (c == ' ')
break;
-   type[i++] = c;
-   if (i = sizeof(type))
-   return -1;
+   strbuf_addch(typename, c);
}
-   type[i] = 0;
 
/*
 * The length must follow immediately, and be in canonical
@@ -1652,12 +1677,39 @@ int parse_sha1_header(const char *hdr, unsigned long 
*sizep)
size = size * 10 + c;
}
}
-   *sizep = size;
 
+   type = type_from_string_gently(typename.buf, typename.len, 1);
+   if (oi-sizep)
+   *oi-sizep = size;
+   if (oi-typename)
+   strbuf_addbuf(oi-typename, typename);
+   strbuf_release(typename);
+
+   /*
+* Set type to 0 if its an unknown object and
+* we're obtaining the type using '--literally'
+* option.
+*/
+   if ((flags  LOOKUP_LITERALLY)  (type == -1))
+   type = 0;
+   else if (type == -1)
+   die(invalid object type);
+   if (oi-typep)
+   *oi-typep = 

Re: [PATCH v6 1/4] sha1_file.c: support reading from a loose object of unknown type

2015-04-02 Thread Junio C Hamano
Karthik Nayak karthik@gmail.com writes:

 +static int unpack_sha1_header_to_strbuf(git_zstream *stream, unsigned char 
 *map,
 + unsigned long mapsize,
 + struct strbuf *header)
 +{
 + unsigned char buffer[32], *cp;
 + unsigned long bufsiz = sizeof(buffer);
 + int status;
 +
 + status = unpack_sha1_header(stream, map, mapsize, buffer, bufsiz);

I briefly wondered if this can return Z_BUF_ERROR, but it is OK
because we do not call inflate with Z_FINISH in unpack_sha1_header()
for obvious reasons ;-)

 + if (status) {
 + strbuf_add(header, buffer, stream-next_out - buffer);
 + return status;
 + }

 + do {
 + status = git_inflate(stream, 0);
 + strbuf_add(header, buffer, stream-next_out - buffer);
 + for (cp = buffer; cp  stream-next_out; cp++)
 + if (!*cp)
 + /* Found the NUL at the end of the header */
 + return 0;
 + stream-next_out = buffer;
 + stream-avail_out = bufsiz;
 + } while (status != Z_STREAM_END);
 + return -1;
 +}

OK.

 @@ -1614,27 +1642,24 @@ static void *unpack_sha1_rest(git_zstream *stream, 
 void *buffer, unsigned long s
   * too permissive for what we want to check. So do an anal
   * object header parse by hand.
   */
 -int parse_sha1_header(const char *hdr, unsigned long *sizep)
 +int parse_sha1_header_extended(const char *hdr, struct object_info *oi,
 +unsigned int flags)
  {
 - char type[10];
 - int i;
 + struct strbuf typename = STRBUF_INIT;
   unsigned long size;
 + int type;
  
   /*
* The type can be at most ten bytes (including the

Is this still a valid comment?

* terminating '\0' that we add), and is followed by
* a space.
*/
 - i = 0;
   for (;;) {
   char c = *hdr++;
   if (c == ' ')
   break;
 - type[i++] = c;
 - if (i = sizeof(type))
 - return -1;
 + strbuf_addch(typename, c);
   }
 - type[i] = 0;
  
   /*
* The length must follow immediately, and be in canonical

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html