On Tue, Oct 09, 2012 at 07:01:44PM +0700, Nguyen Thai Ngoc Duy wrote:

> On Tue, Oct 09, 2012 at 04:38:32PM +0700, Nguyen Thai Ngoc Duy wrote:
> > #5  0x0815e736 in userdiff_find_by_path (path=0x820e7f0
> > "HEAD:Documentation/.gitattributes") at userdiff.c:278
> > #6  0x081058ca in grep_source_load_driver (gs=0xbfffd978) at grep.c:1504
> 
> A bandage patch may look like this. But it does not solve the real
> problem:
> 
>  - we should be able to look up in-tree .gitattributes, not just
>    ignore like this patch does
> 
>  - gs->name seems to be prepared for human consumption, not for
>    accessing files. grep_file() with opt->relative is true can put
>    quotes in gs->name, for example.

Right. For the second, you would probably want gs->identifier in the
case of GREP_SOURCE_FILE. But that identifier information is not
available at all for GREP_SOURCE_SHA1, which is what is breaking the
first point.

> I feel like we should make this function a callback and let git-grep
> deal with driver loading itself.

I think we just need to have callers of grep_source_init provide us with
the actual pathname (or NULL, in the case of GREP_SOURCE_BUF). That is
where the information is lost.

Like this incomplete and untested patch, which should fix the quoting
problem for GREP_SOURCE_FILE, but leave the sha1 bits broken (see the
in-code comment). I'm traveling this week, so I doubt I'll have time to
look for a few more days. If you want to work on it, please do.

diff --git a/builtin/grep.c b/builtin/grep.c
index 82530a6..be602dd 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -86,7 +86,7 @@ static void add_work(struct grep_opt *opt, enum 
grep_source_type type,
 static int skip_first_line;
 
 static void add_work(struct grep_opt *opt, enum grep_source_type type,
-                    const char *name, const void *id)
+                    const char *name, const char *path, const void *id)
 {
        grep_lock();
 
@@ -94,7 +94,7 @@ static void add_work(struct grep_opt *opt, enum 
grep_source_type type,
                pthread_cond_wait(&cond_write, &grep_mutex);
        }
 
-       grep_source_init(&todo[todo_end].source, type, name, id);
+       grep_source_init(&todo[todo_end].source, type, name, path, id);
        if (opt->binary != GREP_BINARY_TEXT)
                grep_source_load_driver(&todo[todo_end].source);
        todo[todo_end].done = 0;
@@ -378,14 +378,21 @@ static int grep_sha1(struct grep_opt *opt, const unsigned 
char *sha1,
        if (opt->relative && opt->prefix_length) {
                quote_path_relative(filename + tree_name_len, -1, &pathbuf,
                                    opt->prefix);
+               /* XXX Why do we insert here instead of just putting it in
+                * first? */
                strbuf_insert(&pathbuf, 0, filename, tree_name_len);
        } else {
                strbuf_addstr(&pathbuf, filename);
        }
 
+       /* XXX We seem to get all kinds of junk via the filename field here,
+        * including partial filenames, sha1:path, etc. We could parse it
+        * ourselves, but that is probably insanity. We should ask the
+        * caller to break it down more for us. For now, just pass NULL. */
+
 #ifndef NO_PTHREADS
        if (use_threads) {
-               add_work(opt, GREP_SOURCE_SHA1, pathbuf.buf, sha1);
+               add_work(opt, GREP_SOURCE_SHA1, pathbuf.buf, NULL, sha1);
                strbuf_release(&pathbuf);
                return 0;
        } else
@@ -394,7 +401,7 @@ static int grep_sha1(struct grep_opt *opt, const unsigned 
char *sha1,
                struct grep_source gs;
                int hit;
 
-               grep_source_init(&gs, GREP_SOURCE_SHA1, pathbuf.buf, sha1);
+               grep_source_init(&gs, GREP_SOURCE_SHA1, pathbuf.buf, NULL, 
sha1);
                strbuf_release(&pathbuf);
                hit = grep_source(opt, &gs);
 
@@ -414,7 +421,7 @@ static int grep_file(struct grep_opt *opt, const char 
*filename)
 
 #ifndef NO_PTHREADS
        if (use_threads) {
-               add_work(opt, GREP_SOURCE_FILE, buf.buf, filename);
+               add_work(opt, GREP_SOURCE_FILE, buf.buf, filename, filename);
                strbuf_release(&buf);
                return 0;
        } else
@@ -423,7 +430,7 @@ static int grep_file(struct grep_opt *opt, const char 
*filename)
                struct grep_source gs;
                int hit;
 
-               grep_source_init(&gs, GREP_SOURCE_FILE, buf.buf, filename);
+               grep_source_init(&gs, GREP_SOURCE_FILE, buf.buf, filename, 
filename);
                strbuf_release(&buf);
                hit = grep_source(opt, &gs);
 
diff --git a/grep.c b/grep.c
index edc7776..06bc1c6 100644
--- a/grep.c
+++ b/grep.c
@@ -1373,7 +1373,7 @@ int grep_buffer(struct grep_opt *opt, char *buf, unsigned 
long size)
        struct grep_source gs;
        int r;
 
-       grep_source_init(&gs, GREP_SOURCE_BUF, NULL, NULL);
+       grep_source_init(&gs, GREP_SOURCE_BUF, NULL, NULL, NULL);
        gs.buf = buf;
        gs.size = size;
 
@@ -1384,10 +1384,12 @@ void grep_source_init(struct grep_source *gs, enum 
grep_source_type type,
 }
 
 void grep_source_init(struct grep_source *gs, enum grep_source_type type,
-                     const char *name, const void *identifier)
+                     const char *name, const char *path,
+                     const void *identifier)
 {
        gs->type = type;
        gs->name = name ? xstrdup(name) : NULL;
+       gs->path = path ? xstrdup(path) : NULL;
        gs->buf = NULL;
        gs->size = 0;
        gs->driver = NULL;
@@ -1409,6 +1411,8 @@ void grep_source_clear(struct grep_source *gs)
 {
        free(gs->name);
        gs->name = NULL;
+       free(gs->path);
+       gs->path = NULL;
        free(gs->identifier);
        gs->identifier = NULL;
        grep_source_clear_data(gs);
diff --git a/grep.h b/grep.h
index c256ac6..c2cf57b 100644
--- a/grep.h
+++ b/grep.h
@@ -158,11 +158,13 @@ void grep_source_init(struct grep_source *gs, enum 
grep_source_type type,
        char *buf;
        unsigned long size;
 
+       char *path; /* for attribute lookups */
        struct userdiff_driver *driver;
 };
 
 void grep_source_init(struct grep_source *gs, enum grep_source_type type,
-                     const char *name, const void *identifier);
+                     const char *name, const char *path,
+                     const void *identifier);
 void grep_source_clear_data(struct grep_source *gs);
 void grep_source_clear(struct grep_source *gs);
 void grep_source_load_driver(struct grep_source *gs);
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to