Hi,

Is there anyone that's familiar with Fossil's import and export system
that would be able/willing to review this?  Other than code reviews are
there any steps I should take to increase the likelihood of the patch
being accepted?

Thanks!

-Nick

Nickolas Lloyd <nick.ll...@qorvo.com> writes:

> Hi,
>
> Following is a patch against fossil commit [4d1f2d302b] that fixes an
> issue that can appear when performing multiple imports/exports with a
> git repository.
>
> The problem arises when fossil tries to export a blob to git with a mark
> which has been previously used by git when importing a commit to
> fossil.  This happens because fossil creates marks based solely on the
> rid of the object (2*rid for blobs, 2*rid+1 for commits), which can
> clash with git-created marks.
>
> E.g. if we import the following two objects from git:
>     blob :19 as rid=10
>     commit :20 as rid=11
> and we then later export a commit referring to rid=10, we get something
> like the following:
>     commit refs/heads/trunk
>     mark :25
>     ...
>     M 100644 :20 file.txt
>     ...
> because fossil thinks that a blob with rid=10 should be marked as `:20'.
> This produces an error from git that mark `:20' is not a blob, but
> actually a commit.  This is fairly easy to work around, we just have to
> keep track of all marks that have been declared previously and make sure
> that any new ones that we create are unique.
>
> Please let me know if you have any questions/comments.  I'd be happy to
> hear your feedback.
>
> Thanks,
> Nick
>
> ---
>
> Index: src/export.c
> ==================================================================
> --- src/export.c
> +++ src/export.c
> @@ -132,23 +132,28 @@
>  
>  /*
>  ** create_mark()
>  **   Create a new (mark,rid,uuid) entry for the given rid in the 'xmark' 
> table,
>  **   and return that information as a struct mark_t in *mark.
> +**   *unused_mark is a value representing a mark that is free for use--that 
> is,
> +**   it does not appear in the marks file, and has not been used during this 
>   
> +**   export run.  Specifically, it is the supremum of the set of used marks  
>   
> +**   plus one.                                                               
>   
>  **   This function returns -1 in the case where 'rid' does not exist, 
> otherwise
>  **   it returns 0.
>  **   mark->name is dynamically allocated and is owned by the caller upon 
> return.
>  */
> -int create_mark(int rid, struct mark_t *mark){
> +int create_mark(int rid, struct mark_t *mark, unsigned int *unused_mark){
>    char sid[13];
>    char *zUuid = rid_to_uuid(rid);
>    if(!zUuid){
>      fossil_trace("Undefined rid=%d\n", rid);
>      return -1;
>    }
>    mark->rid = rid;
> -  sqlite3_snprintf(sizeof(sid), sid, ":%d", COMMITMARK(rid));
> +  sqlite3_snprintf(sizeof(sid), sid, ":%d", *unused_mark);
> +  *unused_mark += 1;
>    mark->name = fossil_strdup(sid);
>    sqlite3_snprintf(sizeof(mark->uuid), mark->uuid, "%s", zUuid);
>    free(zUuid);
>    insert_commit_xref(mark->rid, mark->name, mark->uuid);
>    return 0;
> @@ -156,19 +161,22 @@
>  
>  /*
>  ** mark_name_from_rid()
>  **   Find the mark associated with the given rid.  Mark names always start
>  **   with ':', and are pulled from the 'xmark' temporary table.
> -**   This function returns NULL if the rid does not exist in the 'xmark' 
> table.
> -**   Otherwise, it returns the name of the mark, which is dynamically 
> allocated
> -**   and is owned by the caller of this function.
> +**   If the given rid doesn't have a mark associated with it yet, one is
> +**   created with a value of *unused_mark.
> +**   *unused_mark functions exactly as in create_mark().
> +**   This function returns NULL if the rid does not have an associated UUID,
> +**   (i.e. is not valid).  Otherwise, it returns the name of the mark, which 
> is
> +**   dynamically allocated and is owned by the caller of this function.
>  */
> -char * mark_name_from_rid(int rid){
> +char * mark_name_from_rid(int rid, unsigned int *unused_mark){
>    char *zMark = db_text(0, "SELECT tname FROM xmark WHERE trid=%d", rid);
>    if(zMark==NULL){
>      struct mark_t mark;
> -    if(create_mark(rid, &mark)==0){
> +    if(create_mark(rid, &mark, unused_mark)==0){
>        zMark = mark.name;
>      }else{
>        return NULL;
>      }
>    }
> @@ -185,16 +193,18 @@
>  **   database.  Otherwise, 0 is returned.
>  **   mark->name is dynamically allocated, and owned by the caller.
>  */
>  int parse_mark(char *line, struct mark_t *mark){
>    char *cur_tok;
> +  char type_;
>    cur_tok = strtok(line, " \t");
>    if(!cur_tok||strlen(cur_tok)<2){
>      return -1;
>    }
>    mark->rid = atoi(&cur_tok[1]);
> -  if(cur_tok[0]!='c'){
> +  type_ = cur_tok[0];
> +  if(type_!='c'&&type_!='b'){
>      /* This is probably a blob mark */
>      mark->name = NULL;
>      return 0;
>    }
>  
> @@ -202,11 +212,18 @@
>    if(!cur_tok){
>      /* This mark was generated by an older version of Fossil and doesn't
>      ** include the mark name and uuid.  create_mark() will name the new mark
>      ** exactly as it was when exported to git, so that we should have a
>      ** valid mapping from git sha1<->mark name<->fossil sha1. */
> -    return create_mark(mark->rid, mark);
> +    unsigned int mid;
> +    if( type_=='c' ){
> +      mid = COMMITMARK(mark->rid);
> +    }
> +    else{
> +      mid = BLOBMARK(mark->rid);
> +    }
> +    return create_mark(mark->rid, mark, &mid);
>    }else{
>      mark->name = fossil_strdup(cur_tok);
>    }
>  
>    cur_tok = strtok(NULL, "\n");
> @@ -233,18 +250,21 @@
>  /*
>  ** import_marks()
>  **   Import the marks specified in file 'f' into the 'xmark' table.
>  **   If 'blobs' is non-null, insert all blob marks into it.
>  **   If 'vers' is non-null, insert all commit marks into it.
> +**   If 'unused_marks' is non-null, upon return of this function, all values
> +**   x >= *unused_marks are free to use as marks, i.e. they do not clash with
> +**   any marks appearing in the marks file.
>  **   Each line in the file must be at most 100 characters in length.  This
>  **   seems like a reasonable maximum for a 40-character uuid, and 1-13
>  **   character rid.
>  **   The function returns -1 if any of the lines in file 'f' are malformed,
>  **   or the rid/uuid information doesn't match what is in the repository
>  **   database.  Otherwise, 0 is returned.
>  */
> -int import_marks(FILE* f, Bag *blobs, Bag *vers){
> +int import_marks(FILE* f, Bag *blobs, Bag *vers, unsigned int *unused_mark){
>    char line[101];
>    while(fgets(line, sizeof(line), f)){
>      struct mark_t mark;
>      if(strlen(line)==100&&line[99]!='\n'){
>        /* line too long */
> @@ -251,22 +271,45 @@
>        return -1;
>      }
>      if( parse_mark(line, &mark)<0 ){
>        return -1;
>      }else if( line[0]=='b' ){
> -      /* Don't import blob marks into 'xmark' table--git doesn't use them,
> -      ** so they need to be left free for git to reuse. */
>        if(blobs!=NULL){
>          bag_insert(blobs, mark.rid);
>        }
> -    }else if( vers!=NULL ){
> -      bag_insert(vers, mark.rid);
> +    }else{
> +      if( vers!=NULL ){
> +        bag_insert(vers, mark.rid);
> +      }
> +    }
> +    if( unused_mark!=NULL ){
> +      unsigned int mid = atoi(mark.name + 1);
> +      if( mid>=*unused_mark ){
> +        *unused_mark = mid + 1;
> +      }
>      }
>      free(mark.name);
>    }
>    return 0;
>  }
> +
> +void export_mark(FILE* f, int rid, char obj_type)
> +{
> +  unsigned int z = 0;
> +  char *zUuid = rid_to_uuid(rid);
> +  char *zMark;
> +  if(zUuid==NULL){
> +    fossil_trace("No uuid matching rid=%d when exporting marks\n", rid);
> +    return;
> +  }
> +  /* Since rid is already in the 'xmark' table, the value of z won't be
> +  ** used, but pass in a valid pointer just to be safe. */
> +  zMark = mark_name_from_rid(rid, &z);
> +  fprintf(f, "%c%d %s %s\n", obj_type, rid, zMark, zUuid);
> +  free(zMark);
> +  free(zUuid);
> +}
>  
>  /*
>  **  If 'blobs' is non-null, it must point to a Bag of blob rids to be
>  **  written to disk.  Blob rids are written as 'b<rid>'.
>  **  If 'vers' is non-null, it must point to a Bag of commit rids to be
> @@ -275,32 +318,24 @@
>  **  This function does not fail, but may produce errors if a uuid cannot
>  **  be found for an rid in 'vers'.
>  */
>  void export_marks(FILE* f, Bag *blobs, Bag *vers){
>    int rid;
> -  if( blobs!=NULL ){
> +
> +  if( blobs!=NULL ) {
>      rid = bag_first(blobs);
> -    if(rid!=0){
> +    if( rid!=0 ){
>        do{
> -        fprintf(f, "b%d\n", rid);
> -      }while((rid = bag_next(blobs, rid))!=0);
> +        export_mark(f, rid, 'b');
> +      }while( (rid = bag_next(blobs, rid))!=0 );
>      }
>    }
>    if( vers!=NULL ){
>      rid = bag_first(vers);
>      if( rid!=0 ){
>        do{
> -        char *zUuid = rid_to_uuid(rid);
> -        char *zMark;
> -        if(zUuid==NULL){
> -          fossil_trace("No uuid matching rid=%d when exporting marks\n", 
> rid);
> -          continue;
> -        }
> -        zMark = mark_name_from_rid(rid);
> -        fprintf(f, "c%d %s %s\n", rid, zMark, zUuid);
> -        free(zMark);
> -        free(zUuid);
> +        export_mark(f, rid, 'c');
>        }while( (rid = bag_next(vers, rid))!=0 );
>      }
>    }
>  }
>  
> @@ -336,10 +371,11 @@
>  */
>  void export_cmd(void){
>    Stmt q, q2, q3;
>    int i;
>    Bag blobs, vers;
> +  unsigned int unused_mark = 1;
>    const char *markfile_in;
>    const char *markfile_out;
>  
>    bag_init(&blobs);
>    bag_init(&vers);
> @@ -362,11 +398,11 @@
>  
>      f = fossil_fopen(markfile_in, "r");
>      if( f==0 ){
>        fossil_fatal("cannot open %s for reading", markfile_in);
>      }
> -    if(import_marks(f, &blobs, &vers)<0){
> +    if(import_marks(f, &blobs, &vers, &unused_mark)<0){
>        fossil_fatal("error importing marks from file: %s\n", markfile_in);
>      }
>      db_prepare(&qb, "INSERT OR IGNORE INTO oldblob VALUES (:rid)");
>      db_prepare(&qc, "INSERT OR IGNORE INTO oldcommit VALUES (:rid)");
>      rid = bag_first(&blobs);
> @@ -416,15 +452,18 @@
>    while( db_step(&q)==SQLITE_ROW ){
>      int rid = db_column_int(&q, 0);
>      Blob content;
>  
>      while( !bag_find(&blobs, rid) ){
> +      char *zMark;
>        content_get(rid, &content);
>        db_bind_int(&q2, ":rid", rid);
>        db_step(&q2);
>        db_reset(&q2);
> -      printf("blob\nmark :%d\ndata %d\n", BLOBMARK(rid), 
> blob_size(&content));
> +      zMark = mark_name_from_rid(rid, &unused_mark);
> +      printf("blob\nmark %s\ndata %d\n", zMark, blob_size(&content));
> +      free(zMark);
>        bag_insert(&blobs, rid);
>        fwrite(blob_buffer(&content), 1, blob_size(&content), stdout);
>        printf("\n");
>        blob_reset(&content);
>  
> @@ -470,11 +509,11 @@
>      if( zBranch==0 ) zBranch = "trunk";
>      zBr = mprintf("%s", zBranch);
>      for(i=0; zBr[i]; i++){
>        if( !fossil_isalnum(zBr[i]) ) zBr[i] = '_';
>      }
> -    zMark = mark_name_from_rid(ckinId);
> +    zMark = mark_name_from_rid(ckinId, &unused_mark);
>      printf("commit refs/heads/%s\nmark %s\n", zBr, zMark);
>      free(zMark);
>      free(zBr);
>      printf("committer");
>      print_person(zUser);
> @@ -487,21 +526,21 @@
>        "   AND pid IN (SELECT objid FROM event)",
>        ckinId
>      );
>      if( db_step(&q3) == SQLITE_ROW ){
>        int pid = db_column_int(&q3, 0);
> -      zMark = mark_name_from_rid(pid);
> +      zMark = mark_name_from_rid(pid, &unused_mark);
>        printf("from %s\n", zMark);
>        free(zMark);
>        db_prepare(&q4,
>          "SELECT pid FROM plink"
>          " WHERE cid=%d AND NOT isprim"
>          "   AND NOT EXISTS(SELECT 1 FROM phantom WHERE rid=pid)"
>          " ORDER BY pid",
>          ckinId);
>        while( db_step(&q4)==SQLITE_ROW ){
> -        zMark = mark_name_from_rid(db_column_int(&q4, 0));
> +        zMark = mark_name_from_rid(db_column_int(&q4, 0), &unused_mark);
>          printf("merge %s\n", zMark);
>          free(zMark);
>        }
>        db_finalize(&q4);
>      }else{
> @@ -519,17 +558,19 @@
>        int zNew = db_column_int(&q4,1);
>        int mPerm = db_column_int(&q4,2);
>        if( zNew==0)
>          printf("D %s\n", zName);
>        else if( bag_find(&blobs, zNew) ) {
> +        zMark = mark_name_from_rid(zNew, &unused_mark);
>          const char *zPerm;
>          switch( mPerm ){
>            case PERM_LNK:  zPerm = "120000";   break;
>            case PERM_EXE:  zPerm = "100755";   break;
>            default:        zPerm = "100644";   break;
>          }
> -        printf("M %s :%d %s\n", zPerm, BLOBMARK(zNew), zName);
> +        printf("M %s %s %s\n", zPerm, zMark, zName);
> +        free(zMark);
>        }
>      }
>      db_finalize(&q4);
>      db_finalize(&q3);
>      printf("\n");
>
> Index: src/import.c
> ==================================================================
> --- src/import.c
> +++ src/import.c
> @@ -1770,11 +1770,11 @@
>      if( markfile_in ){
>        FILE *f = fossil_fopen(markfile_in, "r");
>        if( !f ){
>          fossil_fatal("cannot open %s for reading\n", markfile_in);
>        }
> -      if(import_marks(f, &blobs, NULL)<0){
> +      if(import_marks(f, &blobs, NULL, NULL)<0){
>          fossil_fatal("error importing marks from file: %s\n", markfile_in);
>        }
>        fclose(f);
>      }
>  
> @@ -1795,13 +1795,13 @@
>        db_prepare(&q_marks, "SELECT DISTINCT trid FROM xmark");
>        while( db_step(&q_marks)==SQLITE_ROW ){
>          rid = db_column_int(&q_marks, 0);
>          if( db_int(0, "SELECT count(objid) FROM event"
>                        " WHERE objid=%d AND type='ci'", rid)==0 ){
> -          if( bag_find(&blobs, rid)==0 ){
> -            bag_insert(&blobs, rid);
> -          }
> +          /* Blob marks exported by git aren't saved between runs, so they 
> need
> +          ** to be left free for git to re-use in the future.
> +          */
>          }else{
>            bag_insert(&vers, rid);
>          }
>        }
>        db_finalize(&q_marks);
_______________________________________________
fossil-dev mailing list
fossil-dev@mailinglists.sqlite.org
http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/fossil-dev

Reply via email to