Re: [patch] Re: getfsent(3) and spaces in fstab
imho - expensive algorithm... i want to see anything more simple... like gtok() instead es_strsep() + remove_escapes()? I have adopted my patch to use your neat gtok() function, but I came to the conclusion that a two-pass algorithm is necessary: The first pass detects whether a line from fstab is the old or the new style format (old style lines may only have unescaped white spaces before a trailing #-comment). Then, the second pass extracts the information. I admit this is rather complicated, but I don't how to handle two sets of delimiters (:\n and \n\r\t) with only one pass. Using gtok() to detect the style of line is not an option IMO, since it would convert escape sequences. Now, the following lines can be processed: 1) old style: file system:mount point:mount type:dump:passno([' ','\t']*#comment)* 2) new style format as described in fstab(5) + an optional #-comment at the end of the line 3) empty lines, white space lines, deliberately many white spaces + comment In both the old and the new style lines, white spaces can be written as escape sequences or in double quotes. Could somebody please review my patch - if there are no objections (but I am sure there are some more details that can be improved), I will write a PR in order Regards, Simon --- fstab.c.origFri Aug 1 17:18:00 2003 +++ fstab.c Thu Aug 7 15:46:39 2003 @@ -84,6 +84,60 @@ _fs_fstab.fs_spec = buf; } +/* + * Gets a token from a string *s, that is either empty or is separated by + * a set of delimiters *delim. + * Characters that are in *delim, can occur in the token if the are escaped, + * i.e. have a '\' prepended. The character '\' itself is encoded as '\\'. + * *s can have a trailing comment (indicated by a '#'), which will cause the + * characters after the '#' to be ignored. To encode a '#' within a token, + * use '\#'. + * + * If a token is found, gtok sets the last character after its end + * to '\0' and returns a pointer it. Otherwise the return value is NULL. + * As a side effect, the input string *s modified and points to the next + * character after the end of the current token, i.e. after the '\0'. + */ +char *gtok(char **s, char const *delim) +{ + int quoted, escaped; + static char const esc_set[] = { 't', 'r', 'n', 'a', 0 }; + static char const esc_rep[] = { '\t', '\r', '\n', '\a', 0 }; + char *tok, *r, *w, *p; + + if (!s || !*s || !*(tok = *s + strspn(*s, delim)) || *tok == '#') + return NULL; + + for (quoted = escaped = 0, r = w = tok; *r; r++) { + if (!escaped) { + if (*r == '\\') { + escaped = 1; + continue; + } + if (*r == '\') { + quoted ^= -1; + continue; + } + if (!quoted) { + if (strchr(delim, *r)) { + r++; + break; + } + } + } else { + escaped = 0; + if ((p = strchr(esc_set, *r)) != NULL) { + *w++ = esc_rep[p - esc_set]; + continue; + } + } + *w++ = *r; + } + *w = 0; + *s = r; + return tok; +} + static int fstabscan() { @@ -91,21 +145,73 @@ #defineMAXLINELENGTH 1024 static char line[MAXLINELENGTH]; char subline[MAXLINELENGTH]; - int typexx; + int typexx, escaped=0, quoted=0, ws_sep=0; for (;;) { if (!(p = fgets(line, sizeof(line), _fs_fp))) return(0); -/* OLD_STYLE_FSTAB */ ++LineNo; - if (*line == '#' || *line == '\n') - continue; - if (!strpbrk(p, \t)) { - _fs_fstab.fs_spec = strsep(p, :\n); - _fs_fstab.fs_file = strsep(p, :\n); + + /* Detect whether line is in old or new fstab style */ + for (cp=p; *cp != '\n'; ++cp) { + if (*cp == '\\') { + escaped = (escaped ? 0 : 1); + continue; + } + if (!escaped) { + /* Quotes */ + if (*cp == '\') { + quoted = (quoted ? 0 : 1); + continue; + } + if (quoted) + continue; + /* new white separator found */ + if (cp p strspn (cp, \n\r\t) + !strspn(cp-1, \t)) +
Re: [patch] Re: getfsent(3) and spaces in fstab
On Mon, Aug 04, 2003 at 02:33:31AM +0200, Simon Barner wrote: Hi Terry, You need to add '\\' to the list of characters that can be escaped, or you've just traded the inability to specify '\t' or ' ' for an inability to speciy '\\'. Oh yes, I have overlook this special case. I revised my patch in order to get this right. Simon helo. imho - expensive algorithm... i want to see anything more simple... like gtok() instead es_strsep() + remove_escapes()? #include stdio.h #include stdlib.h #include string.h char *gtok(char **s, char const *delim) { int quoted, escaped; static char const esc_set[] = { 't', 'r', 'n', 'a', 0 }; static char const esc_rep[] = { '\t', '\r', '\n', '\a', 0 }; char *tok, *r, *w, *p; if (!s || !*s || !*(tok = *s + strspn(*s, delim)) || *tok == '#') return NULL; for (quoted = escaped = 0, r = w = tok; *r; r++) { if (!escaped) { if (*r == '\\') { escaped = 1; continue; } if (*r == '\') { quoted ^= -1; continue; } if (!quoted strchr(delim, *r)) { r++; break; } } else { escaped = 0; if ((p = strchr(esc_set, *r)) != NULL) { *w++ = esc_rep[p - esc_set]; continue; } } *w++ = *r; } *w = 0; *s = r; return tok; } #if 0 main() { char *s, *t, buf[0x1000]; while (fgets(buf, sizeof buf, stdin)) for (s = buf; t = gtok(s, \t\r\n); ) printf(\%s\\n, t); return 0; } #endif /swp --- fstab.c.orig Fri Aug 1 17:18:00 2003 +++ fstab.c Mon Aug 4 01:46:55 2003 @@ -49,6 +49,7 @@ #include errno.h #include fstab.h #include paths.h +#include stdbool.h #include stdio.h #include stdlib.h #include string.h @@ -84,6 +85,140 @@ _fs_fstab.fs_spec = buf; } +/* + * Get next token from string *stringp, where tokens are possibly-empty + * strings separated by characters from delim. + * + * Writes NULs into the string at *stringp to end tokens. + * delim need not remain constant from call to call. + * On return, *stringp points past the last NUL written (if there might + * be further tokens), or is NULL (if there are definitely no more tokens). + * + * If *stringp is NULL, es_strsep returns NULL. + * + * In contrast to strsep(3), es_strsep will allow escaped delimiters + * within a token. These escaped characters as well as the special case + * '\\' will be converted appropriately ('\delim' - 'delim, '\\' - '\' + * + */ +char * +es_strsep(char **stringp, const char *delim) +{ + boolescaped=false; + char*s, *t, *u; + int i; + + + if (*stringp == '\0') /* empty string */ + return NULL; + s = *stringp; + s += strspn (s, delim); /* skip delimiters */ + + if (*s == '\0') /* string consists only of delimiters */ + return NULL; + + /* + * skip a string consisting of non-delimiters, + * escapted delimiters or '\\' + */ + for (t = s; *t != '\0'; ++t) { + if (*t == '\\') { + if (escaped) { /* convert \\ to \ */ + --t; + u = t; + escaped = false; + while (u[0] != '\0') { + u[0] = u[1]; + ++u; + } + } else /* start \-Sequence */ + escaped = true; + continue; + } + + /* search for delimiter */ + for (i=0; delim[i] != '\0'; ++i) { + if (*t == delim[i]) + break; + } + + /* un-escaped delimiter found = end of token */ + if (!escaped delim[i] != '\0') + break; + + /* escaped delimiter found = remove / */ + if (escaped) { + --t; + u = t; +escaped = false; + while (u[0] != '\0') { + u[0] = u[1]; + ++u; + } + } + } + + if (*t != '\0') { + *t = '\0'; /* end current token */ + *stringp = t+1; /* *t != '\0' = *(t+1) is valid */ + } else + *stringp = 0; /* end of string reached */
Re: [patch] Re: getfsent(3) and spaces in fstab
Hi Terry, You need to add '\\' to the list of characters that can be escaped, or you've just traded the inability to specify '\t' or ' ' for an inability to speciy '\\'. Oh yes, I have overlook this special case. I revised my patch in order to get this right. Simon --- fstab.c.origFri Aug 1 17:18:00 2003 +++ fstab.c Mon Aug 4 01:46:55 2003 @@ -49,6 +49,7 @@ #include errno.h #include fstab.h #include paths.h +#include stdbool.h #include stdio.h #include stdlib.h #include string.h @@ -84,6 +85,140 @@ _fs_fstab.fs_spec = buf; } +/* + * Get next token from string *stringp, where tokens are possibly-empty + * strings separated by characters from delim. + * + * Writes NULs into the string at *stringp to end tokens. + * delim need not remain constant from call to call. + * On return, *stringp points past the last NUL written (if there might + * be further tokens), or is NULL (if there are definitely no more tokens). + * + * If *stringp is NULL, es_strsep returns NULL. + * + * In contrast to strsep(3), es_strsep will allow escaped delimiters + * within a token. These escaped characters as well as the special case + * '\\' will be converted appropriately ('\delim' - 'delim, '\\' - '\' + * + */ +char * +es_strsep(char **stringp, const char *delim) +{ + boolescaped=false; + char*s, *t, *u; + int i; + + + if (*stringp == '\0') /* empty string */ + return NULL; + s = *stringp; + s += strspn (s, delim); /* skip delimiters */ + + if (*s == '\0') /* string consists only of delimiters */ + return NULL; + + /* +* skip a string consisting of non-delimiters, +* escapted delimiters or '\\' + */ + for (t = s; *t != '\0'; ++t) { + if (*t == '\\') { + if (escaped) { /* convert \\ to \ */ + --t; + u = t; + escaped = false; + while (u[0] != '\0') { + u[0] = u[1]; + ++u; + } + } else /* start \-Sequence */ + escaped = true; + continue; + } + + /* search for delimiter */ + for (i=0; delim[i] != '\0'; ++i) { + if (*t == delim[i]) + break; + } + + /* un-escaped delimiter found = end of token */ + if (!escaped delim[i] != '\0') + break; + + /* escaped delimiter found = remove / */ + if (escaped) { + --t; + u = t; +escaped = false; + while (u[0] != '\0') { + u[0] = u[1]; + ++u; + } + } + } + + if (*t != '\0') { + *t = '\0'; /* end current token */ + *stringp = t+1; /* *t != '\0' = *(t+1) is valid */ + } else + *stringp = 0; /* end of string reached */ + + return s; /* return current token */ +} + +/* + * This function converts escaped characters: + * '\delim' - 'delim', '\\' - '\' + * + * If there are unescaped delimiters, 'false' will be return to indicate + * an error, otherwise remove_escape returns 'true'. + */ +bool remove_escapes (char **s, const char* delim) { + boolescaped=false; + char*t, *u; + int i; + + for (t = *s; *t != '\0'; ++t) { + if (*t == '\\') { + if (escaped) { /* convert \\ to \ */ + --t; + u = t; + escaped = false; + while (u[0] != '\0') { + u[0] = u[1]; + ++u; + } + } else /* start \-Sequence */ + escaped = true; + continue; + } + + /* search for delimiter */ + for (i=0; delim[i] != '\0'; ++i) { + if (*t == delim[i]) + break; + } + + /* un-escaped delimiter found = error */ + if (!escaped delim[i] != '\0') + return false; + + /* escaped delimiter found = remove / */ + if (escaped) { + --t; + u = t; +escaped = false; + while (u[0] != '\0') { + u[0] = u[1]; + ++u; + } + } + } + + return true; +} + static int
Re: [patch] Re: getfsent(3) and spaces in fstab
Simon Barner wrote: The attached patch will allow blanks and tabs for file systems and path names, as long as the are protected by a '\'. For the old fstab style, blanks and tabs are not allowed as delimiters (as it was in the old implementation). You need to add '\\' to the delimited list, so that it is not skipped. You need to add '\\' to the list of characters that can be escaped, or you've just traded the inability to specify '\t' or ' ' for an inability to speciy '\\'. -- Terry ___ [EMAIL PROTECTED] mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-hackers To unsubscribe, send any mail to [EMAIL PROTECTED]
[patch] Re: getfsent(3) and spaces in fstab
Hi, For all the 'good rules' of orthonogolity and consistency etc it is a good thing, however I still feel that at the level of the fstab where you are mounting entire file system trees, the simplest naming formats are probably the best. A philosophical point only, but it would keep the fstab format minimalistic and clean. This point of view could probably be stated in the man page once the '\ ' form is implemented, as a suggested practice. Yes, you are definitely right with this point, and I aggree that such a hint should be added to the fstab(5) man page. But as somebody else pointed out, one of FreeBSD's advantages is its interoperability with other systems, and this was one of the reasons why I decided to extent 'getfsent'. The attached patch will allow blanks and tabs for file systems and path names, as long as the are protected by a '\'. For the old fstab style, blanks and tabs are not allowed as delimiters (as it was in the old implementation). Thus /foo\ bar:/mnt\ point:ufs is a valid old-style fstab entry, while /foo\ bar:/mnt\ point:ufs /foo\ bar :/mnt\ point:ufs etc. are not. For the current fstab style, unescaped blanks and tabs are handled as delimiters. Simon --- fstab.c.origFri Aug 1 17:18:00 2003 +++ fstab.c Fri Aug 1 17:17:46 2003 @@ -84,6 +84,78 @@ _fs_fstab.fs_spec = buf; } +/* + * Get next token from string *stringp, where tokens are possibly-empty + * strings separated by characters from delim. + * + * Writes NULs into the string at *stringp to end tokens. + * delim need not remain constant from call to call. + * On return, *stringp points past the last NUL written (if there might + * be further tokens), or is NULL (if there are definitely no more tokens). + * + * If *stringp is NULL, es_strsep returns NULL. + * + * In contrast to strsep(3), es_strsep will allow escaped delimiters + * within a token. + */ +char * +es_strsep(char **stringp, const char *delim) +{ + char*s, *t; + size_t n; + + if (*stringp == '\0') /* empty string */ + return NULL; + s = *stringp; + s += strspn (s, delim); /* skip delimiters */ + + if (*s == '\0') /* string consists only of delimiters */ + return NULL; + + /* skip a string consisting of non-delimiters or escapted delimiters */ + t = s; + for (;;) { + /* skip non-delimiters */ + n = strcspn (t, delim); + if (n == 0) /* delimiters found - end of token */ + break; + t += n; + if (*t == '\0') /* end of string reached */ + break; + + /* skip escaped delimiters */ + if (*(t-1) == '\\') /* n != 0 = *(t-1) is valid */ + ++t; + if (*t == '\0') /* end of string reached */ + break; + } + + if (*t != '\0') { + *t = '\0'; /* end current token */ + *stringp = t+1; /* *t != '\0' = *(t+1) is valid */ + } else + *stringp = 0; /* end of string reached */ + + return s; /* return current token */ +} + +/* + * This function removes all '\' characters from a string. + * + * It will NOT handle escape sequences as '\t' or '\n'! + */ +void +remove_escapes (char **s) { + char *p = *s, *q; + while ((p = strchr (p, '\\')) != 0) { + q = p; + while (q[0] != '\0') { + q[0] = q[1]; + ++q; + } + } +} + static int fstabscan() { @@ -101,9 +173,17 @@ ++LineNo; if (*line == '#' || *line == '\n') continue; - if (!strpbrk(p, \t)) { + + /* escapted white-spaces only are allowed in old-style format */ + cp = p; + while ((cp = strpbrk(cp, \t)) != 0 + cp != p cp[-1] == '\\') + ++cp; + if (cp == 0) { _fs_fstab.fs_spec = strsep(p, :\n); + remove_escapes (_fs_fstab.fs_spec); _fs_fstab.fs_file = strsep(p, :\n); + remove_escapes (_fs_fstab.fs_file); fixfsfile(); _fs_fstab.fs_type = strsep(p, :\n); if (_fs_fstab.fs_type) { @@ -124,14 +204,18 @@ goto bad; } /* OLD_STYLE_FSTAB */ - while ((cp = strsep(p, \t\n)) != NULL *cp == '\0') + while ((cp = es_strsep(p, \t\n)) != NULL *cp == '\0') ; _fs_fstab.fs_spec = cp; + remove_escapes (_fs_fstab.fs_spec); if (!_fs_fstab.fs_spec || *_fs_fstab.fs_spec == '#') continue; -