[fossil-dev] [PATCH] Fix a problem with bidirectional git import/export

Nickolas Lloyd Mon, 12 Sep 2016 11:05:30 -0700

Hi,

Following is a patch against fossil commit [4d1f2d302b] that fixes an
issue that can appear when performing multiple imports/exports with a
git repository.


The problem arises when fossil tries to export a blob to git with a mark
which has been previously used by git when importing a commit to
fossil.  This happens because fossil creates marks based solely on the
rid of the object (2*rid for blobs, 2*rid+1 for commits), which can
clash with git-created marks.

E.g. if we import the following two objects from git:
    blob :19 as rid=10
    commit :20 as rid=11
and we then later export a commit referring to rid=10, we get something
like the following:
    commit refs/heads/trunk
    mark :25
    ...
    M 100644 :20 file.txt
    ...
because fossil thinks that a blob with rid=10 should be marked as `:20'.
This produces an error from git that mark `:20' is not a blob, but
actually a commit.  This is fairly easy to work around, we just have to
keep track of all marks that have been declared previously and make sure
that any new ones that we create are unique.

Please let me know if you have any questions/comments.  I'd be happy to
hear your feedback.

Thanks,
Nick

---

Index: src/export.c
==================================================================
--- src/export.c
+++ src/export.c
@@ -132,23 +132,28 @@
 
 /*
 ** create_mark()
 **   Create a new (mark,rid,uuid) entry for the given rid in the 'xmark' table,
 **   and return that information as a struct mark_t in *mark.
+**   *unused_mark is a value representing a mark that is free for use--that is,
+**   it does not appear in the marks file, and has not been used during this   
+**   export run.  Specifically, it is the supremum of the set of used marks    
+**   plus one.                                                                 
 **   This function returns -1 in the case where 'rid' does not exist, otherwise
 **   it returns 0.
 **   mark->name is dynamically allocated and is owned by the caller upon 
return.
 */
-int create_mark(int rid, struct mark_t *mark){
+int create_mark(int rid, struct mark_t *mark, unsigned int *unused_mark){
   char sid[13];
   char *zUuid = rid_to_uuid(rid);
   if(!zUuid){
     fossil_trace("Undefined rid=%d\n", rid);
     return -1;
   }
   mark->rid = rid;
-  sqlite3_snprintf(sizeof(sid), sid, ":%d", COMMITMARK(rid));
+  sqlite3_snprintf(sizeof(sid), sid, ":%d", *unused_mark);
+  *unused_mark += 1;
   mark->name = fossil_strdup(sid);
   sqlite3_snprintf(sizeof(mark->uuid), mark->uuid, "%s", zUuid);
   free(zUuid);
   insert_commit_xref(mark->rid, mark->name, mark->uuid);
   return 0;
@@ -156,19 +161,22 @@
 
 /*
 ** mark_name_from_rid()
 **   Find the mark associated with the given rid.  Mark names always start
 **   with ':', and are pulled from the 'xmark' temporary table.
-**   This function returns NULL if the rid does not exist in the 'xmark' table.
-**   Otherwise, it returns the name of the mark, which is dynamically allocated
-**   and is owned by the caller of this function.
+**   If the given rid doesn't have a mark associated with it yet, one is
+**   created with a value of *unused_mark.
+**   *unused_mark functions exactly as in create_mark().
+**   This function returns NULL if the rid does not have an associated UUID,
+**   (i.e. is not valid).  Otherwise, it returns the name of the mark, which is
+**   dynamically allocated and is owned by the caller of this function.
 */
-char * mark_name_from_rid(int rid){
+char * mark_name_from_rid(int rid, unsigned int *unused_mark){
   char *zMark = db_text(0, "SELECT tname FROM xmark WHERE trid=%d", rid);
   if(zMark==NULL){
     struct mark_t mark;
-    if(create_mark(rid, &mark)==0){
+    if(create_mark(rid, &mark, unused_mark)==0){
       zMark = mark.name;
     }else{
       return NULL;
     }
   }
@@ -185,16 +193,18 @@
 **   database.  Otherwise, 0 is returned.
 **   mark->name is dynamically allocated, and owned by the caller.
 */
 int parse_mark(char *line, struct mark_t *mark){
   char *cur_tok;
+  char type_;
   cur_tok = strtok(line, " \t");
   if(!cur_tok||strlen(cur_tok)<2){
     return -1;
   }
   mark->rid = atoi(&cur_tok[1]);
-  if(cur_tok[0]!='c'){
+  type_ = cur_tok[0];
+  if(type_!='c'&&type_!='b'){
     /* This is probably a blob mark */
     mark->name = NULL;
     return 0;
   }
 
@@ -202,11 +212,18 @@
   if(!cur_tok){
     /* This mark was generated by an older version of Fossil and doesn't
     ** include the mark name and uuid.  create_mark() will name the new mark
     ** exactly as it was when exported to git, so that we should have a
     ** valid mapping from git sha1<->mark name<->fossil sha1. */
-    return create_mark(mark->rid, mark);
+    unsigned int mid;
+    if( type_=='c' ){
+      mid = COMMITMARK(mark->rid);
+    }
+    else{
+      mid = BLOBMARK(mark->rid);
+    }
+    return create_mark(mark->rid, mark, &mid);
   }else{
     mark->name = fossil_strdup(cur_tok);
   }
 
   cur_tok = strtok(NULL, "\n");
@@ -233,18 +250,21 @@
 /*
 ** import_marks()
 **   Import the marks specified in file 'f' into the 'xmark' table.
 **   If 'blobs' is non-null, insert all blob marks into it.
 **   If 'vers' is non-null, insert all commit marks into it.
+**   If 'unused_marks' is non-null, upon return of this function, all values
+**   x >= *unused_marks are free to use as marks, i.e. they do not clash with
+**   any marks appearing in the marks file.
 **   Each line in the file must be at most 100 characters in length.  This
 **   seems like a reasonable maximum for a 40-character uuid, and 1-13
 **   character rid.
 **   The function returns -1 if any of the lines in file 'f' are malformed,
 **   or the rid/uuid information doesn't match what is in the repository
 **   database.  Otherwise, 0 is returned.
 */
-int import_marks(FILE* f, Bag *blobs, Bag *vers){
+int import_marks(FILE* f, Bag *blobs, Bag *vers, unsigned int *unused_mark){
   char line[101];
   while(fgets(line, sizeof(line), f)){
     struct mark_t mark;
     if(strlen(line)==100&&line[99]!='\n'){
       /* line too long */
@@ -251,22 +271,45 @@
       return -1;
     }
     if( parse_mark(line, &mark)<0 ){
       return -1;
     }else if( line[0]=='b' ){
-      /* Don't import blob marks into 'xmark' table--git doesn't use them,
-      ** so they need to be left free for git to reuse. */
       if(blobs!=NULL){
         bag_insert(blobs, mark.rid);
       }
-    }else if( vers!=NULL ){
-      bag_insert(vers, mark.rid);
+    }else{
+      if( vers!=NULL ){
+        bag_insert(vers, mark.rid);
+      }
+    }
+    if( unused_mark!=NULL ){
+      unsigned int mid = atoi(mark.name + 1);
+      if( mid>=*unused_mark ){
+        *unused_mark = mid + 1;
+      }
     }
     free(mark.name);
   }
   return 0;
 }
+
+void export_mark(FILE* f, int rid, char obj_type)
+{
+  unsigned int z = 0;
+  char *zUuid = rid_to_uuid(rid);
+  char *zMark;
+  if(zUuid==NULL){
+    fossil_trace("No uuid matching rid=%d when exporting marks\n", rid);
+    return;
+  }
+  /* Since rid is already in the 'xmark' table, the value of z won't be
+  ** used, but pass in a valid pointer just to be safe. */
+  zMark = mark_name_from_rid(rid, &z);
+  fprintf(f, "%c%d %s %s\n", obj_type, rid, zMark, zUuid);
+  free(zMark);
+  free(zUuid);
+}
 
 /*
 **  If 'blobs' is non-null, it must point to a Bag of blob rids to be
 **  written to disk.  Blob rids are written as 'b<rid>'.
 **  If 'vers' is non-null, it must point to a Bag of commit rids to be
@@ -275,32 +318,24 @@
 **  This function does not fail, but may produce errors if a uuid cannot
 **  be found for an rid in 'vers'.
 */
 void export_marks(FILE* f, Bag *blobs, Bag *vers){
   int rid;
-  if( blobs!=NULL ){
+
+  if( blobs!=NULL ) {
     rid = bag_first(blobs);
-    if(rid!=0){
+    if( rid!=0 ){
       do{
-        fprintf(f, "b%d\n", rid);
-      }while((rid = bag_next(blobs, rid))!=0);
+        export_mark(f, rid, 'b');
+      }while( (rid = bag_next(blobs, rid))!=0 );
     }
   }
   if( vers!=NULL ){
     rid = bag_first(vers);
     if( rid!=0 ){
       do{
-        char *zUuid = rid_to_uuid(rid);
-        char *zMark;
-        if(zUuid==NULL){
-          fossil_trace("No uuid matching rid=%d when exporting marks\n", rid);
-          continue;
-        }
-        zMark = mark_name_from_rid(rid);
-        fprintf(f, "c%d %s %s\n", rid, zMark, zUuid);
-        free(zMark);
-        free(zUuid);
+        export_mark(f, rid, 'c');
       }while( (rid = bag_next(vers, rid))!=0 );
     }
   }
 }
 
@@ -336,10 +371,11 @@
 */
 void export_cmd(void){
   Stmt q, q2, q3;
   int i;
   Bag blobs, vers;
+  unsigned int unused_mark = 1;
   const char *markfile_in;
   const char *markfile_out;
 
   bag_init(&blobs);
   bag_init(&vers);
@@ -362,11 +398,11 @@
 
     f = fossil_fopen(markfile_in, "r");
     if( f==0 ){
       fossil_fatal("cannot open %s for reading", markfile_in);
     }
-    if(import_marks(f, &blobs, &vers)<0){
+    if(import_marks(f, &blobs, &vers, &unused_mark)<0){
       fossil_fatal("error importing marks from file: %s\n", markfile_in);
     }
     db_prepare(&qb, "INSERT OR IGNORE INTO oldblob VALUES (:rid)");
     db_prepare(&qc, "INSERT OR IGNORE INTO oldcommit VALUES (:rid)");
     rid = bag_first(&blobs);
@@ -416,15 +452,18 @@
   while( db_step(&q)==SQLITE_ROW ){
     int rid = db_column_int(&q, 0);
     Blob content;
 
     while( !bag_find(&blobs, rid) ){
+      char *zMark;
       content_get(rid, &content);
       db_bind_int(&q2, ":rid", rid);
       db_step(&q2);
       db_reset(&q2);
-      printf("blob\nmark :%d\ndata %d\n", BLOBMARK(rid), blob_size(&content));
+      zMark = mark_name_from_rid(rid, &unused_mark);
+      printf("blob\nmark %s\ndata %d\n", zMark, blob_size(&content));
+      free(zMark);
       bag_insert(&blobs, rid);
       fwrite(blob_buffer(&content), 1, blob_size(&content), stdout);
       printf("\n");
       blob_reset(&content);
 
@@ -470,11 +509,11 @@
     if( zBranch==0 ) zBranch = "trunk";
     zBr = mprintf("%s", zBranch);
     for(i=0; zBr[i]; i++){
       if( !fossil_isalnum(zBr[i]) ) zBr[i] = '_';
     }
-    zMark = mark_name_from_rid(ckinId);
+    zMark = mark_name_from_rid(ckinId, &unused_mark);
     printf("commit refs/heads/%s\nmark %s\n", zBr, zMark);
     free(zMark);
     free(zBr);
     printf("committer");
     print_person(zUser);
@@ -487,21 +526,21 @@
       "   AND pid IN (SELECT objid FROM event)",
       ckinId
     );
     if( db_step(&q3) == SQLITE_ROW ){
       int pid = db_column_int(&q3, 0);
-      zMark = mark_name_from_rid(pid);
+      zMark = mark_name_from_rid(pid, &unused_mark);
       printf("from %s\n", zMark);
       free(zMark);
       db_prepare(&q4,
         "SELECT pid FROM plink"
         " WHERE cid=%d AND NOT isprim"
         "   AND NOT EXISTS(SELECT 1 FROM phantom WHERE rid=pid)"
         " ORDER BY pid",
         ckinId);
       while( db_step(&q4)==SQLITE_ROW ){
-        zMark = mark_name_from_rid(db_column_int(&q4, 0));
+        zMark = mark_name_from_rid(db_column_int(&q4, 0), &unused_mark);
         printf("merge %s\n", zMark);
         free(zMark);
       }
       db_finalize(&q4);
     }else{
@@ -519,17 +558,19 @@
       int zNew = db_column_int(&q4,1);
       int mPerm = db_column_int(&q4,2);
       if( zNew==0)
         printf("D %s\n", zName);
       else if( bag_find(&blobs, zNew) ) {
+        zMark = mark_name_from_rid(zNew, &unused_mark);
         const char *zPerm;
         switch( mPerm ){
           case PERM_LNK:  zPerm = "120000";   break;
           case PERM_EXE:  zPerm = "100755";   break;
           default:        zPerm = "100644";   break;
         }
-        printf("M %s :%d %s\n", zPerm, BLOBMARK(zNew), zName);
+        printf("M %s %s %s\n", zPerm, zMark, zName);
+        free(zMark);
       }
     }
     db_finalize(&q4);
     db_finalize(&q3);
     printf("\n");

Index: src/import.c
==================================================================
--- src/import.c
+++ src/import.c
@@ -1770,11 +1770,11 @@
     if( markfile_in ){
       FILE *f = fossil_fopen(markfile_in, "r");
       if( !f ){
         fossil_fatal("cannot open %s for reading\n", markfile_in);
       }
-      if(import_marks(f, &blobs, NULL)<0){
+      if(import_marks(f, &blobs, NULL, NULL)<0){
         fossil_fatal("error importing marks from file: %s\n", markfile_in);
       }
       fclose(f);
     }
 
@@ -1795,13 +1795,13 @@
       db_prepare(&q_marks, "SELECT DISTINCT trid FROM xmark");
       while( db_step(&q_marks)==SQLITE_ROW ){
         rid = db_column_int(&q_marks, 0);
         if( db_int(0, "SELECT count(objid) FROM event"
                       " WHERE objid=%d AND type='ci'", rid)==0 ){
-          if( bag_find(&blobs, rid)==0 ){
-            bag_insert(&blobs, rid);
-          }
+          /* Blob marks exported by git aren't saved between runs, so they need
+          ** to be left free for git to re-use in the future.
+          */
         }else{
           bag_insert(&vers, rid);
         }
       }
       db_finalize(&q_marks);


_______________________________________________
fossil-dev mailing list
fossil-dev@mailinglists.sqlite.org
http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/fossil-dev

[fossil-dev] [PATCH] Fix a problem with bidirectional git import/export

Reply via email to