Here's a rather hackish implementation of the write side. Any
thoughts on the format? (Obviously the implementation needs work.
For example, it needs to be optional.
Thoughts so far:
- I want to put the value of "prefix" into an extended header.
- Should blobs have their sha1 hashes in an extended header? Pros:
it makes figuring out substitutions easier. Cons: it adds 512 bytes
per file.
- I want to support tags as roots.
- I (or someone) need to write a verifier / verified unpacker. Does
git accept Python code?
This thing is tested in the sense that GNU tar unpacks its output
without any warnings or other fanfare.
--Andy
diff --git a/archive-tar.c b/archive-tar.c
index 719b629..c6bf7e4 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -2,6 +2,8 @@
* Copyright (c) 2005, 2006 Rene Scharfe
*/
#include "cache.h"
+#include "tree.h"
+#include "object.h"
#include "tar.h"
#include "archive.h"
#include "streaming.h"
@@ -200,6 +202,74 @@ static int write_extended_header(struct archiver_args *args,
return 0;
}
+/*
+ * A GIT-SCM object header is a global extended header that embeds a single
+ * git object. This object serves a purpose described by the "purpose"
+ * field. Valid purposes include:
+ *
+ * - "root" -- an object that, by itself, in conjunction with other roots,
+ * or in conjunction with external data, identifies a root to use to
+ * verify this archive.
+ * - "vrfy" -- an object that can be use to prove that the contents
+ * of this archive are as described.
+ *
+ * There's one basic rule to observe: every "vrfy" object must hash to
+ * a SHA-1 that matches something described in a "root", another "vrfy" object,
+ * or something typed in by a user decoding the archive.
+ *
+ * (Of course, if you want the archive to be usefully verifiable, all of the
+ * non-GIT-SCM contents should also be attributable to an appropriate
+ * "vrfy" object.)
+ *
+ * The fields are:
+ * GIT-SCM.obj.purpose: the purpose of the embedded object
+ * GIT-SCM.obj.sha1: the sha1 of the embedded object
+ * GIT-SCM.obj.type: the type of the embedded object
+ * GIT-SCM.obj.data: the data in the embedded object
+ *
+ * The block header is intentionally unspecified, except that it must
+ * have typeflag 'g'. (This is to allow some flexibility in trying to
+ * preserve compatibility with old tar implementations.)
+ */
+static int write_gitscm_obj_header(struct archiver_args *args,
+ const char *purpose,
+ const unsigned char *sha1)
+{
+ struct strbuf ext_header = STRBUF_INIT;
+ struct ustar_header header;
+ unsigned int mode;
+ enum object_type type;
+ unsigned long size;
+ void *buffer;
+ const char *typestr;
+ int err = 0;
+
+ strbuf_append_ext_header(&ext_header, "GIT-SCM.obj.purpose",
+ purpose, strlen(purpose));
+ strbuf_append_ext_header(&ext_header, "GIT-SCM.obj.sha1",
+ sha1_to_hex(sha1), 40);
+
+ buffer = read_sha1_file(sha1, &type, &size);
+ typestr = typename(type);
+
+ strbuf_append_ext_header(&ext_header, "GIT-SCM.obj.type",
+ typestr, strlen(typestr));
+ strbuf_append_ext_header(&ext_header, "GIT-SCM.obj.data",
+ buffer, size);
+ free(buffer);
+ buffer = NULL;
+
+ memset(&header, 0, sizeof(header));
+ *header.typeflag = TYPEFLAG_GLOBAL_HEADER;
+ mode = 0100666;
+ strcpy(header.name, "pax_global_header");
+ prepare_header(args, &header, mode, ext_header.len);
+ write_blocked(&header, sizeof(header));
+ write_blocked(ext_header.buf, ext_header.len);
+ strbuf_release(&ext_header);
+ return err;
+}
+
static int write_tar_entry(struct archiver_args *args,
const unsigned char *sha1,
const char *path, size_t pathlen,
@@ -212,6 +282,10 @@ static int write_tar_entry(struct archiver_args *args,
void *buffer;
int err = 0;
+ if (S_ISDIR(mode)) {
+ write_gitscm_obj_header(args, "vrfy", sha1);
+ }
+
memset(&header, 0, sizeof(header));
if (S_ISDIR(mode) || S_ISGITLINK(mode)) {
@@ -384,8 +458,11 @@ static int write_tar_archive(const struct archiver *ar,
if (args->commit_sha1)
err = write_global_extended_header(args);
- if (!err)
+ if (!err) {
+ write_gitscm_obj_header(args, "root", args->commit_sha1);
+ write_gitscm_obj_header(args, "vrfy", args->tree->object.sha1);
err = write_archive_entries(args, write_tar_entry);
+ }
if (!err)
write_trailer();
return err;