Hi,
I have a set of working patches for "fossil export" and "fossil import"
as attached.
Please note these are clean reverse engineered dedes independent of the
code in the git using the code analysis result included.
These codes are BSD-2-Clause licensed and I agree to copyright
assignment to Hipp, Wyrick & Company, Inc. to any patches submitted to
this ML.
Please review and consider.
Osamu
PS: The previous "fossil import" code could not handle some
non-standard filenames.
From: Osamu Aoki
Date: Tue, 11 Oct 2016 23:45:30 +0900
Subject: Fix export
Newline and UTF-8 characters in filename is now allowed
The git-fast-export code was analyzed as below. This analysis was used to
create an independent code as this patch.
== Git fast-export filename encoding spec ==
Filenames exported by "git fast-export" are encoded and quoted if they contain
some non-plain ASCII alphanumeric characters.
Filenames imported by "git fast-import" are unencoded if it is recorded within
double quotes.
This memo documents the details of encoding and quotation to enable creating a
clean reverse engineered GPL unencumbered code in BSD-2-Clause license.
In Git 2.9.3, the focal point defining this feature:
* quote.c quote_c_style
Escape odd characters and quote the escaped string
Return TRUE if used
* builtin/fast-export.c print_path_1
Use quote_c_style if if finds needs to quote and escape
Otherwise, quote entire string if ' ' is found in it
If neither, use the original filename to export
* quote.c unquote_c_style
Unescape escaped characters
The following table summarizes the end result for each character found in the
filename for the combined effects of the above 2 functions:
01234567 89abcdef
0x00 OOOa btnvfrOO
0x10
0x20 QAEA (quote space and escape double quote)
0x30
0x40
0x50 EAAA (escape backslash)
0x60
0x70 AAAO (DEL is octal!)
0x80
0x90
0xa0
0xb0
0xc0
0xd0
0xe0
0xf0
Here each character is represented as HEX number adding row and column index.
The default of the quoting flag is FALSE. The meaning of the conversion rule
indicators are the following:
---
src/export.c | 60
1 file changed, 56 insertions(+), 4 deletions(-)
diff --git a/src/export.c b/src/export.c
index f524cdc..e677351 100644
--- a/src/export.c
+++ b/src/export.c
@@ -305,6 +305,49 @@ void export_marks(FILE* f, Bag *blobs, Bag *vers){
}
/*
+** Quote and escape a filename to be exported if it contains some special
+** characters. This implements not only the minimum requirements
+** '\\', '"' and '\n' but also 3 digits octal escapes for all high bits
+** characters and other standard single character escapes such as
+** '\a'..'\r' on par with what the git-fast-export does.
+*/
+static void quote_git_filename(const char *zName, char *name){
+ int i, j;
+ int needQuote;
+ static char escs[] = "abtnvfr";
+ needQuote = 0;
+ for(i=0; zName[i]!=0; i++){
+if( zName[i]<=' ' || zName[i]>'~' || zName[i]=='\\' || zName[i]=='"' ){
+ needQuote = 1;
+}
+ }
+ j = 0;
+ if( needQuote==1 ){
+name[j++] = '"';
+ }
+ for(i=0; zName[i]!=0; i++){
+if( zName[i]=='\\' || zName[i]=='"' ){
+ name[j++] = '\\';
+ name[j++] = zName[i];
+}else if ( zName[i]>=' ' && zName[i]<='~' ){
+ name[j++] = zName[i];
+}else if( zName[i]>='\a' && zName[i]<='\r' ){
+ name[j++] = '\\';
+ name[j++] = escs[( zName[i] - '\a')];
+}else{
+ name[j++] = '\\';
+ name[j++] = ((zName[i] >> 6) & '\03') + '0';
+ name[j++] = ((zName[i] >> 3) & '\07') + '0';
+ name[j++] = ((zName[i] >> 0) & '\07') + '0';
+}
+ }
+ if( needQuote==1 ){
+name[j++] = '"';
+ }
+ name[j] = '\0';
+}
+
+/*
** COMMAND: export
**
** Usage: %fossil export --git ?OPTIONS? ?REPOSITORY?
@@ -516,19 +559,28 @@ void export_cmd(void){
);
while( db_step(&q4)==SQLITE_ROW ){
const char *zName = db_column_text(&q4,0);
+ char *name;
int zNew = db_column_int(&q4,1);
int mPerm = db_column_int(&q4,2);
- if( zNew==0)
-printf("D %s\n", zName);
- else if( bag_find(&blobs, zNew) ) {
+ if( zName==0 || zName=="" ){
+name = fossil_malloc( 1 );
+name[0] = '\0';
+ }else{
+name = fossil_malloc( 4*strlen(zName) + 2 + 1 );
+ }
+ quote_git_filename(zName, name);
+ if( zNew==0 ){
+printf("D %s\n", name);
+ }else if( bag_find(&blobs, zNew) ){
const char *zPerm;
switch( mPerm )