[PATCH 4/6] fsck: check tag objects' headers
We inspect commit objects pretty much in detail in git-fsck, but we just glanced over the tag objects. Let's be stricter. This work was sponsored by GitHub Inc. Signed-off-by: Johannes Schindelin johannes.schinde...@gmx.de --- fsck.c | 88 +- 1 file changed, 87 insertions(+), 1 deletion(-) diff --git a/fsck.c b/fsck.c index db6aaa4..d30946b 100644 --- a/fsck.c +++ b/fsck.c @@ -6,6 +6,7 @@ #include commit.h #include tag.h #include fsck.h +#include refs.h static int fsck_walk_tree(struct tree *tree, fsck_walk_func walk, void *data) { @@ -355,6 +356,90 @@ static int fsck_commit(struct commit *commit, const char *data, return ret; } +static int fsck_tag_buffer(struct tag *tag, const char *data, + unsigned long size, fsck_error error_func) +{ + unsigned char commit_sha1[20]; + int ret = 0; + const char *buffer; + char *tmp = NULL, *eol; + + if (data) + buffer = data; + else { + enum object_type type; + + buffer = tmp = read_sha1_file(tag-object.sha1, type, size); + if (!buffer) + return error_func(tag-object, FSCK_ERROR, + cannot read tag object); + + if (type != OBJ_TAG) { + ret = error_func(tag-object, FSCK_ERROR, + expected tag got %s, + typename(type)); + goto done; + } + } + + if (must_have_empty_line(buffer, size, tag-object, error_func)) + goto done; + + if (!skip_prefix(buffer, object , buffer)) { + ret = error_func(tag-object, FSCK_ERROR, invalid format - expected 'object' line); + goto done; + } + if (get_sha1_hex(buffer, commit_sha1) || buffer[40] != '\n') { + ret = error_func(tag-object, FSCK_ERROR, invalid 'object' line format - bad sha1); + goto done; + } + buffer += 41; + + if (!skip_prefix(buffer, type , buffer)) { + ret = error_func(tag-object, FSCK_ERROR, invalid format - expected 'type' line); + goto done; + } + eol = strchr(buffer, '\n'); + if (!eol) { + ret = error_func(tag-object, FSCK_ERROR, invalid format - unexpected end after 'type' line); + goto done; + } + *eol = '\0'; + if (type_from_string_gently(buffer) 0) + ret = error_func(tag-object, FSCK_ERROR, invalid 'type' value); + *eol = '\n'; + if (ret) + goto done; + buffer = eol + 1; + + if (!skip_prefix(buffer, tag , buffer)) { + ret = error_func(tag-object, FSCK_ERROR, invalid format - expected 'tag' line); + goto done; + } + eol = strchr(buffer, '\n'); + if (!eol) { + ret = error_func(tag-object, FSCK_ERROR, invalid format - unexpected end after 'type' line); + goto done; + } + *eol = '\0'; + if (check_refname_format(buffer, REFNAME_ALLOW_ONELEVEL)) + ret = error_func(tag-object, FSCK_ERROR, invalid 'tag' name: %s, buffer); + *eol = '\n'; + if (ret) + goto done; + buffer = eol + 1; + + if (!skip_prefix(buffer, tagger , buffer)) { + /* early tags do not contain 'tagger' lines; warn only */ + error_func(tag-object, FSCK_WARN, invalid format - expected 'tagger' line); + } + ret = fsck_ident(buffer, tag-object, error_func); + +done: + free(tmp); + return ret; +} + static int fsck_tag(struct tag *tag, const char *data, unsigned long size, fsck_error error_func) { @@ -362,7 +447,8 @@ static int fsck_tag(struct tag *tag, const char *data, if (!tagged) return error_func(tag-object, FSCK_ERROR, could not load tagged object); - return 0; + + return fsck_tag_buffer(tag, data, size, error_func); } int fsck_object(struct object *obj, void *data, unsigned long size, -- 2.0.0.rc3.9669.g840d1f9 -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/6] fsck: check tag objects' headers
Johannes Schindelin johannes.schinde...@gmx.de writes: We inspect commit objects pretty much in detail in git-fsck, but we just glanced over the tag objects. Let's be stricter. This work was sponsored by GitHub Inc. Is it only this commit, or all of these patches in the series? Does GitHub want their name sprinkled over all changes they sponsor? Signed-off-by: Johannes Schindelin johannes.schinde...@gmx.de --- fsck.c | 88 +- 1 file changed, 87 insertions(+), 1 deletion(-) diff --git a/fsck.c b/fsck.c index db6aaa4..d30946b 100644 --- a/fsck.c +++ b/fsck.c @@ -6,6 +6,7 @@ #include commit.h #include tag.h #include fsck.h +#include refs.h static int fsck_walk_tree(struct tree *tree, fsck_walk_func walk, void *data) { @@ -355,6 +356,90 @@ static int fsck_commit(struct commit *commit, const char *data, return ret; } +static int fsck_tag_buffer(struct tag *tag, const char *data, + unsigned long size, fsck_error error_func) +{ + unsigned char commit_sha1[20]; + int ret = 0; + const char *buffer; + char *tmp = NULL, *eol; Call it to_free or something; naming it 'tmp' would tempt people who later touch this code to reuse it temporarily to hold something unrelated (I know they will notice that mistake later, but noticing mistake after wasting time is too late). + if (data) + buffer = data; + else { + enum object_type type; + + buffer = tmp = read_sha1_file(tag-object.sha1, type, size); + if (!buffer) + return error_func(tag-object, FSCK_ERROR, + cannot read tag object); + + if (type != OBJ_TAG) { + ret = error_func(tag-object, FSCK_ERROR, + expected tag got %s, + typename(type)); + goto done; + } + } + + if (must_have_empty_line(buffer, size, tag-object, error_func)) + goto done; + + if (!skip_prefix(buffer, object , buffer)) { + ret = error_func(tag-object, FSCK_ERROR, invalid format - expected 'object' line); + goto done; + } + if (get_sha1_hex(buffer, commit_sha1) || buffer[40] != '\n') { This code is not making the mistake to assume that tagged objects are always commits, so let's not call it commit_sha1; I'd suggest just calling it sha1[] (or even tmp or junk, as the parsed value is not used). + *eol = '\0'; + if (type_from_string_gently(buffer) 0) + ret = error_func(tag-object, FSCK_ERROR, invalid 'type' value); + *eol = '\n'; + if (ret) + goto done; I can see that it is reverted back to '\n' immediately after calling type_from_string_gently(), but it is a bit unfortunate that const char *data needs to be touched this way. Since the callee is introduced in this series, perhaps it can be made to take a counted string? + if (!skip_prefix(buffer, tag , buffer)) { + ret = error_func(tag-object, FSCK_ERROR, invalid format - expected 'tag' line); + goto done; + } + eol = strchr(buffer, '\n'); + if (!eol) { + ret = error_func(tag-object, FSCK_ERROR, invalid format - unexpected end after 'type' line); + goto done; + } + *eol = '\0'; + if (check_refname_format(buffer, REFNAME_ALLOW_ONELEVEL)) + ret = error_func(tag-object, FSCK_ERROR, invalid 'tag' name: %s, buffer); + *eol = '\n'; I actually think this check is harmful. It often matches the name of the ref, but there is nothing inherently refname like in the tag name proper. And I think it is unnecessary. Don't we already warn if it does not match the name of the ref we use to point at it? It would mean that anything that does not conform to the check-refname-format will get a warning one way or the other, either it is pointed at by a ref whose name is kosher and does not match, or a ref itself has a name that does not pass check-refname-format. (goes and looks) Yikes. I assumed too much. We do not seem to do much checking on refs in that (1) After git rev-parse HEAD .git/refs/heads/ee..oo fsck does not complain about the malformed ee..oo branch; (2) After git tag -a foo -m foo cp .git/refs/tags/foo .git/refs/tags/bar fsck does not complain that refs/tags/bar talks about foo But these two are something we would want to fix in a larger context within git fack anyway, so my comments above still stand. + if (!skip_prefix(buffer, tagger , buffer)) { + /* early tags do not contain 'tagger' lines; warn only */ + error_func(tag-object, FSCK_WARN, invalid format - expected 'tagger' line); Nice. Even nicer that this explains why people should not touch 'ret' here. + } + ret =
Re: [PATCH 4/6] fsck: check tag objects' headers
Junio C Hamano gits...@pobox.com writes: +if (check_refname_format(buffer, REFNAME_ALLOW_ONELEVEL)) +ret = error_func(tag-object, FSCK_ERROR, invalid 'tag' name: %s, buffer); +*eol = '\n'; I actually think this check is harmful. Let me take this one back; we do a moral equivalent when we create a tag, like this: strbuf_addf(sb, refs/tags/%s, name); return check_refname_format(sb-buf, 0); So validating using check_refname_format() is indeed a very good thing to do. As you have length and buffer here, I would suggest updating this part of your patch to print into a strbuf strbuf_addf(sb, refs/tags/%.*s, (eol - buffer), buffer); if (check_refname_format(sb.buf)) ret = ... and keep the constness of the incoming data. -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html