Hi, Is there anyone that's familiar with Fossil's import and export system that would be able/willing to review this? Other than code reviews are there any steps I should take to increase the likelihood of the patch being accepted?
Thanks! -Nick Nickolas Lloyd <nick.ll...@qorvo.com> writes: > Hi, > > Following is a patch against fossil commit [4d1f2d302b] that fixes an > issue that can appear when performing multiple imports/exports with a > git repository. > > The problem arises when fossil tries to export a blob to git with a mark > which has been previously used by git when importing a commit to > fossil. This happens because fossil creates marks based solely on the > rid of the object (2*rid for blobs, 2*rid+1 for commits), which can > clash with git-created marks. > > E.g. if we import the following two objects from git: > blob :19 as rid=10 > commit :20 as rid=11 > and we then later export a commit referring to rid=10, we get something > like the following: > commit refs/heads/trunk > mark :25 > ... > M 100644 :20 file.txt > ... > because fossil thinks that a blob with rid=10 should be marked as `:20'. > This produces an error from git that mark `:20' is not a blob, but > actually a commit. This is fairly easy to work around, we just have to > keep track of all marks that have been declared previously and make sure > that any new ones that we create are unique. > > Please let me know if you have any questions/comments. I'd be happy to > hear your feedback. > > Thanks, > Nick > > --- > > Index: src/export.c > ================================================================== > --- src/export.c > +++ src/export.c > @@ -132,23 +132,28 @@ > > /* > ** create_mark() > ** Create a new (mark,rid,uuid) entry for the given rid in the 'xmark' > table, > ** and return that information as a struct mark_t in *mark. > +** *unused_mark is a value representing a mark that is free for use--that > is, > +** it does not appear in the marks file, and has not been used during this > > +** export run. Specifically, it is the supremum of the set of used marks > > +** plus one. > > ** This function returns -1 in the case where 'rid' does not exist, > otherwise > ** it returns 0. > ** mark->name is dynamically allocated and is owned by the caller upon > return. > */ > -int create_mark(int rid, struct mark_t *mark){ > +int create_mark(int rid, struct mark_t *mark, unsigned int *unused_mark){ > char sid[13]; > char *zUuid = rid_to_uuid(rid); > if(!zUuid){ > fossil_trace("Undefined rid=%d\n", rid); > return -1; > } > mark->rid = rid; > - sqlite3_snprintf(sizeof(sid), sid, ":%d", COMMITMARK(rid)); > + sqlite3_snprintf(sizeof(sid), sid, ":%d", *unused_mark); > + *unused_mark += 1; > mark->name = fossil_strdup(sid); > sqlite3_snprintf(sizeof(mark->uuid), mark->uuid, "%s", zUuid); > free(zUuid); > insert_commit_xref(mark->rid, mark->name, mark->uuid); > return 0; > @@ -156,19 +161,22 @@ > > /* > ** mark_name_from_rid() > ** Find the mark associated with the given rid. Mark names always start > ** with ':', and are pulled from the 'xmark' temporary table. > -** This function returns NULL if the rid does not exist in the 'xmark' > table. > -** Otherwise, it returns the name of the mark, which is dynamically > allocated > -** and is owned by the caller of this function. > +** If the given rid doesn't have a mark associated with it yet, one is > +** created with a value of *unused_mark. > +** *unused_mark functions exactly as in create_mark(). > +** This function returns NULL if the rid does not have an associated UUID, > +** (i.e. is not valid). Otherwise, it returns the name of the mark, which > is > +** dynamically allocated and is owned by the caller of this function. > */ > -char * mark_name_from_rid(int rid){ > +char * mark_name_from_rid(int rid, unsigned int *unused_mark){ > char *zMark = db_text(0, "SELECT tname FROM xmark WHERE trid=%d", rid); > if(zMark==NULL){ > struct mark_t mark; > - if(create_mark(rid, &mark)==0){ > + if(create_mark(rid, &mark, unused_mark)==0){ > zMark = mark.name; > }else{ > return NULL; > } > } > @@ -185,16 +193,18 @@ > ** database. Otherwise, 0 is returned. > ** mark->name is dynamically allocated, and owned by the caller. > */ > int parse_mark(char *line, struct mark_t *mark){ > char *cur_tok; > + char type_; > cur_tok = strtok(line, " \t"); > if(!cur_tok||strlen(cur_tok)<2){ > return -1; > } > mark->rid = atoi(&cur_tok[1]); > - if(cur_tok[0]!='c'){ > + type_ = cur_tok[0]; > + if(type_!='c'&&type_!='b'){ > /* This is probably a blob mark */ > mark->name = NULL; > return 0; > } > > @@ -202,11 +212,18 @@ > if(!cur_tok){ > /* This mark was generated by an older version of Fossil and doesn't > ** include the mark name and uuid. create_mark() will name the new mark > ** exactly as it was when exported to git, so that we should have a > ** valid mapping from git sha1<->mark name<->fossil sha1. */ > - return create_mark(mark->rid, mark); > + unsigned int mid; > + if( type_=='c' ){ > + mid = COMMITMARK(mark->rid); > + } > + else{ > + mid = BLOBMARK(mark->rid); > + } > + return create_mark(mark->rid, mark, &mid); > }else{ > mark->name = fossil_strdup(cur_tok); > } > > cur_tok = strtok(NULL, "\n"); > @@ -233,18 +250,21 @@ > /* > ** import_marks() > ** Import the marks specified in file 'f' into the 'xmark' table. > ** If 'blobs' is non-null, insert all blob marks into it. > ** If 'vers' is non-null, insert all commit marks into it. > +** If 'unused_marks' is non-null, upon return of this function, all values > +** x >= *unused_marks are free to use as marks, i.e. they do not clash with > +** any marks appearing in the marks file. > ** Each line in the file must be at most 100 characters in length. This > ** seems like a reasonable maximum for a 40-character uuid, and 1-13 > ** character rid. > ** The function returns -1 if any of the lines in file 'f' are malformed, > ** or the rid/uuid information doesn't match what is in the repository > ** database. Otherwise, 0 is returned. > */ > -int import_marks(FILE* f, Bag *blobs, Bag *vers){ > +int import_marks(FILE* f, Bag *blobs, Bag *vers, unsigned int *unused_mark){ > char line[101]; > while(fgets(line, sizeof(line), f)){ > struct mark_t mark; > if(strlen(line)==100&&line[99]!='\n'){ > /* line too long */ > @@ -251,22 +271,45 @@ > return -1; > } > if( parse_mark(line, &mark)<0 ){ > return -1; > }else if( line[0]=='b' ){ > - /* Don't import blob marks into 'xmark' table--git doesn't use them, > - ** so they need to be left free for git to reuse. */ > if(blobs!=NULL){ > bag_insert(blobs, mark.rid); > } > - }else if( vers!=NULL ){ > - bag_insert(vers, mark.rid); > + }else{ > + if( vers!=NULL ){ > + bag_insert(vers, mark.rid); > + } > + } > + if( unused_mark!=NULL ){ > + unsigned int mid = atoi(mark.name + 1); > + if( mid>=*unused_mark ){ > + *unused_mark = mid + 1; > + } > } > free(mark.name); > } > return 0; > } > + > +void export_mark(FILE* f, int rid, char obj_type) > +{ > + unsigned int z = 0; > + char *zUuid = rid_to_uuid(rid); > + char *zMark; > + if(zUuid==NULL){ > + fossil_trace("No uuid matching rid=%d when exporting marks\n", rid); > + return; > + } > + /* Since rid is already in the 'xmark' table, the value of z won't be > + ** used, but pass in a valid pointer just to be safe. */ > + zMark = mark_name_from_rid(rid, &z); > + fprintf(f, "%c%d %s %s\n", obj_type, rid, zMark, zUuid); > + free(zMark); > + free(zUuid); > +} > > /* > ** If 'blobs' is non-null, it must point to a Bag of blob rids to be > ** written to disk. Blob rids are written as 'b<rid>'. > ** If 'vers' is non-null, it must point to a Bag of commit rids to be > @@ -275,32 +318,24 @@ > ** This function does not fail, but may produce errors if a uuid cannot > ** be found for an rid in 'vers'. > */ > void export_marks(FILE* f, Bag *blobs, Bag *vers){ > int rid; > - if( blobs!=NULL ){ > + > + if( blobs!=NULL ) { > rid = bag_first(blobs); > - if(rid!=0){ > + if( rid!=0 ){ > do{ > - fprintf(f, "b%d\n", rid); > - }while((rid = bag_next(blobs, rid))!=0); > + export_mark(f, rid, 'b'); > + }while( (rid = bag_next(blobs, rid))!=0 ); > } > } > if( vers!=NULL ){ > rid = bag_first(vers); > if( rid!=0 ){ > do{ > - char *zUuid = rid_to_uuid(rid); > - char *zMark; > - if(zUuid==NULL){ > - fossil_trace("No uuid matching rid=%d when exporting marks\n", > rid); > - continue; > - } > - zMark = mark_name_from_rid(rid); > - fprintf(f, "c%d %s %s\n", rid, zMark, zUuid); > - free(zMark); > - free(zUuid); > + export_mark(f, rid, 'c'); > }while( (rid = bag_next(vers, rid))!=0 ); > } > } > } > > @@ -336,10 +371,11 @@ > */ > void export_cmd(void){ > Stmt q, q2, q3; > int i; > Bag blobs, vers; > + unsigned int unused_mark = 1; > const char *markfile_in; > const char *markfile_out; > > bag_init(&blobs); > bag_init(&vers); > @@ -362,11 +398,11 @@ > > f = fossil_fopen(markfile_in, "r"); > if( f==0 ){ > fossil_fatal("cannot open %s for reading", markfile_in); > } > - if(import_marks(f, &blobs, &vers)<0){ > + if(import_marks(f, &blobs, &vers, &unused_mark)<0){ > fossil_fatal("error importing marks from file: %s\n", markfile_in); > } > db_prepare(&qb, "INSERT OR IGNORE INTO oldblob VALUES (:rid)"); > db_prepare(&qc, "INSERT OR IGNORE INTO oldcommit VALUES (:rid)"); > rid = bag_first(&blobs); > @@ -416,15 +452,18 @@ > while( db_step(&q)==SQLITE_ROW ){ > int rid = db_column_int(&q, 0); > Blob content; > > while( !bag_find(&blobs, rid) ){ > + char *zMark; > content_get(rid, &content); > db_bind_int(&q2, ":rid", rid); > db_step(&q2); > db_reset(&q2); > - printf("blob\nmark :%d\ndata %d\n", BLOBMARK(rid), > blob_size(&content)); > + zMark = mark_name_from_rid(rid, &unused_mark); > + printf("blob\nmark %s\ndata %d\n", zMark, blob_size(&content)); > + free(zMark); > bag_insert(&blobs, rid); > fwrite(blob_buffer(&content), 1, blob_size(&content), stdout); > printf("\n"); > blob_reset(&content); > > @@ -470,11 +509,11 @@ > if( zBranch==0 ) zBranch = "trunk"; > zBr = mprintf("%s", zBranch); > for(i=0; zBr[i]; i++){ > if( !fossil_isalnum(zBr[i]) ) zBr[i] = '_'; > } > - zMark = mark_name_from_rid(ckinId); > + zMark = mark_name_from_rid(ckinId, &unused_mark); > printf("commit refs/heads/%s\nmark %s\n", zBr, zMark); > free(zMark); > free(zBr); > printf("committer"); > print_person(zUser); > @@ -487,21 +526,21 @@ > " AND pid IN (SELECT objid FROM event)", > ckinId > ); > if( db_step(&q3) == SQLITE_ROW ){ > int pid = db_column_int(&q3, 0); > - zMark = mark_name_from_rid(pid); > + zMark = mark_name_from_rid(pid, &unused_mark); > printf("from %s\n", zMark); > free(zMark); > db_prepare(&q4, > "SELECT pid FROM plink" > " WHERE cid=%d AND NOT isprim" > " AND NOT EXISTS(SELECT 1 FROM phantom WHERE rid=pid)" > " ORDER BY pid", > ckinId); > while( db_step(&q4)==SQLITE_ROW ){ > - zMark = mark_name_from_rid(db_column_int(&q4, 0)); > + zMark = mark_name_from_rid(db_column_int(&q4, 0), &unused_mark); > printf("merge %s\n", zMark); > free(zMark); > } > db_finalize(&q4); > }else{ > @@ -519,17 +558,19 @@ > int zNew = db_column_int(&q4,1); > int mPerm = db_column_int(&q4,2); > if( zNew==0) > printf("D %s\n", zName); > else if( bag_find(&blobs, zNew) ) { > + zMark = mark_name_from_rid(zNew, &unused_mark); > const char *zPerm; > switch( mPerm ){ > case PERM_LNK: zPerm = "120000"; break; > case PERM_EXE: zPerm = "100755"; break; > default: zPerm = "100644"; break; > } > - printf("M %s :%d %s\n", zPerm, BLOBMARK(zNew), zName); > + printf("M %s %s %s\n", zPerm, zMark, zName); > + free(zMark); > } > } > db_finalize(&q4); > db_finalize(&q3); > printf("\n"); > > Index: src/import.c > ================================================================== > --- src/import.c > +++ src/import.c > @@ -1770,11 +1770,11 @@ > if( markfile_in ){ > FILE *f = fossil_fopen(markfile_in, "r"); > if( !f ){ > fossil_fatal("cannot open %s for reading\n", markfile_in); > } > - if(import_marks(f, &blobs, NULL)<0){ > + if(import_marks(f, &blobs, NULL, NULL)<0){ > fossil_fatal("error importing marks from file: %s\n", markfile_in); > } > fclose(f); > } > > @@ -1795,13 +1795,13 @@ > db_prepare(&q_marks, "SELECT DISTINCT trid FROM xmark"); > while( db_step(&q_marks)==SQLITE_ROW ){ > rid = db_column_int(&q_marks, 0); > if( db_int(0, "SELECT count(objid) FROM event" > " WHERE objid=%d AND type='ci'", rid)==0 ){ > - if( bag_find(&blobs, rid)==0 ){ > - bag_insert(&blobs, rid); > - } > + /* Blob marks exported by git aren't saved between runs, so they > need > + ** to be left free for git to re-use in the future. > + */ > }else{ > bag_insert(&vers, rid); > } > } > db_finalize(&q_marks); _______________________________________________ fossil-dev mailing list fossil-dev@mailinglists.sqlite.org http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/fossil-dev