Re: [patch] Re: getfsent(3) and spaces in fstab

2003-08-14 Thread Simon Barner
 imho - expensive algorithm... i want to see anything more simple... 
 like gtok() instead es_strsep() + remove_escapes()?

I have adopted my patch to use your neat gtok() function, but I came to
the conclusion that a two-pass algorithm is necessary:

The first pass detects whether a line from fstab is the old or the new
style format (old style lines may only have unescaped white spaces
before a trailing #-comment).

Then, the second pass extracts the information.

I admit this is rather complicated, but I don't how to handle two sets
of delimiters (:\n and  \n\r\t) with only one pass. Using gtok() to
detect the style of line is not an option IMO, since it would convert
escape sequences.

Now, the following lines can be processed:

1) old style:
file system:mount point:mount type:dump:passno([' ','\t']*#comment)*

2) new style
format as described in fstab(5) + an optional #-comment at the end of the line

3) empty lines, white space lines, deliberately many white spaces + comment

In both the old and the new style lines, white spaces can be written as
escape sequences or in double quotes.

Could somebody please review my patch - if there are no objections (but
I am sure there are some more details that can be improved), I will
write a PR in order

Regards,
 Simon
--- fstab.c.origFri Aug  1 17:18:00 2003
+++ fstab.c Thu Aug  7 15:46:39 2003
@@ -84,6 +84,60 @@
_fs_fstab.fs_spec = buf;
 }
 
+/*
+ * Gets a token from a string *s, that is either empty or is separated by
+ * a set of delimiters *delim.
+ * Characters that are in *delim, can occur in the token if the are escaped,
+ * i.e. have a '\' prepended. The character '\' itself is encoded as '\\'.
+ * *s can have a trailing comment (indicated by a '#'), which will cause the
+ * characters after the '#' to be ignored. To encode a '#' within a token,
+ * use '\#'.
+ *
+ * If a token is found, gtok sets the last character after its end
+ * to '\0' and returns a pointer it. Otherwise the return value is NULL.
+ * As a side effect, the input string *s modified and points to the next
+ * character after the end of the current token, i.e. after the '\0'.
+ */
+char *gtok(char **s, char const *delim)
+{
+   int quoted, escaped;
+   static char const esc_set[] = {  't',  'r',  'n',  'a', 0 };
+   static char const esc_rep[] = { '\t', '\r', '\n', '\a', 0 };
+   char *tok, *r, *w, *p;
+
+   if (!s || !*s || !*(tok = *s + strspn(*s, delim)) || *tok == '#')
+   return NULL;
+
+   for (quoted = escaped = 0, r = w = tok; *r; r++) {
+   if (!escaped) {
+   if (*r == '\\') {
+   escaped = 1;
+   continue;
+   }
+   if (*r == '\') {
+   quoted ^= -1;
+   continue;
+   }
+   if (!quoted) {
+   if (strchr(delim, *r)) {
+   r++;
+   break;
+   }
+   }
+   } else {
+   escaped = 0;
+   if ((p = strchr(esc_set, *r)) != NULL) {
+   *w++ = esc_rep[p - esc_set];
+   continue;
+   }
+   }
+   *w++ = *r;
+   }
+   *w = 0;
+   *s = r;
+   return tok;
+}
+
 static int
 fstabscan()
 {
@@ -91,21 +145,73 @@
 #defineMAXLINELENGTH   1024
static char line[MAXLINELENGTH];
char subline[MAXLINELENGTH];
-   int typexx;
+   int typexx, escaped=0, quoted=0, ws_sep=0;
 
for (;;) {
 
if (!(p = fgets(line, sizeof(line), _fs_fp)))
return(0);
-/* OLD_STYLE_FSTAB */
++LineNo;
-   if (*line == '#' || *line == '\n')
-   continue;
-   if (!strpbrk(p,  \t)) {
-   _fs_fstab.fs_spec = strsep(p, :\n);
-   _fs_fstab.fs_file = strsep(p, :\n);
+   
+   /* Detect whether line is in old or new fstab style */
+   for (cp=p; *cp != '\n'; ++cp) {
+   if (*cp == '\\') {
+   escaped = (escaped ? 0 : 1);
+   continue;
+   }
+   if (!escaped) {
+   /* Quotes */
+   if (*cp == '\') {
+   quoted = (quoted ? 0 : 1);
+   continue;
+   }
+   if (quoted)
+   continue;
+   /* new white separator found */
+   if (cp  p  strspn (cp,  \n\r\t) 
+   !strspn(cp-1,  \t))
+  

Re: [patch] Re: getfsent(3) and spaces in fstab

2003-08-10 Thread mitrohin a.s.
On Mon, Aug 04, 2003 at 02:33:31AM +0200, Simon Barner wrote:
 Hi Terry,
 
  You need to add '\\' to the list of characters that can be escaped,
  or you've just traded the inability to specify '\t' or ' ' for an
  inability to speciy '\\'.
 
 Oh yes, I have overlook this special case. I revised my patch in order
 to get this right.
 
 Simon

helo.

imho - expensive algorithm... i want to see anything more simple... 
like gtok() instead es_strsep() + remove_escapes()?

#include stdio.h
#include stdlib.h
#include string.h

char *gtok(char **s, char const *delim)
{
int quoted, escaped;
static char const esc_set[] = {  't',  'r',  'n',  'a', 0 };
static char const esc_rep[] = { '\t', '\r', '\n', '\a', 0 };
char *tok, *r, *w, *p;

if (!s || !*s || !*(tok = *s + strspn(*s, delim)) || *tok == '#')
return NULL;

for (quoted = escaped = 0, r = w = tok; *r; r++) {
if (!escaped) {
if (*r == '\\') {
escaped = 1;
continue;
}
if (*r == '\') {
quoted ^= -1;
continue;
}
if (!quoted  strchr(delim, *r)) {
r++;
break;
}
} else {
escaped = 0;
if ((p = strchr(esc_set, *r)) != NULL) {
*w++ = esc_rep[p - esc_set];
continue;
}
}
*w++ = *r;
}
*w = 0;
*s = r;

return tok;
}

#if 0
main()
{
char *s, *t, buf[0x1000];

while (fgets(buf, sizeof buf, stdin))
for (s = buf; t = gtok(s,  \t\r\n); )
printf(\%s\\n, t);

return 0;
}
#endif

/swp

 --- fstab.c.orig  Fri Aug  1 17:18:00 2003
 +++ fstab.c   Mon Aug  4 01:46:55 2003
 @@ -49,6 +49,7 @@
  #include errno.h
  #include fstab.h
  #include paths.h
 +#include stdbool.h
  #include stdio.h
  #include stdlib.h
  #include string.h
 @@ -84,6 +85,140 @@
   _fs_fstab.fs_spec = buf;
  }
  
 +/*
 + * Get next token from string *stringp, where tokens are possibly-empty
 + * strings separated by characters from delim.
 + *
 + * Writes NULs into the string at *stringp to end tokens.
 + * delim need not remain constant from call to call.
 + * On return, *stringp points past the last NUL written (if there might
 + * be further tokens), or is NULL (if there are definitely no more tokens).
 + *
 + * If *stringp is NULL, es_strsep returns NULL.
 + *
 + * In contrast to strsep(3), es_strsep will allow escaped delimiters
 + * within a token. These escaped characters as well as the special case
 + * '\\' will be converted appropriately ('\delim' - 'delim, '\\' - '\'
 + *
 + */
 +char *
 +es_strsep(char **stringp, const char *delim)
 +{
 + boolescaped=false;
 + char*s, *t, *u;
 + int i;
 +
 +
 + if (*stringp == '\0')   /* empty string */
 + return NULL;
 + s = *stringp;
 + s += strspn (s, delim); /* skip delimiters */
 +
 + if (*s == '\0') /* string consists only of delimiters */
 + return NULL;
 + 
 + /*
 +  * skip a string consisting of non-delimiters,
 +  * escapted delimiters or '\\'
 + */
 + for (t = s; *t != '\0'; ++t) {
 + if (*t == '\\') {
 + if (escaped) {  /* convert \\ to \ */
 + --t;
 + u = t;
 + escaped = false;
 + while (u[0] != '\0') {
 + u[0] = u[1];
 + ++u;
 + }
 + } else  /* start \-Sequence */
 + escaped = true;
 + continue;
 + }
 + 
 + /* search for delimiter */
 + for (i=0; delim[i] != '\0'; ++i) {
 + if (*t == delim[i])
 + break;
 + }
 + 
 + /* un-escaped delimiter found = end of token */
 + if (!escaped  delim[i] != '\0')
 + break;
 + 
 + /* escaped delimiter found = remove / */
 + if (escaped) {
 + --t;
 + u = t;
 +escaped = false;
 + while (u[0] != '\0') {
 + u[0] = u[1];
 + ++u;
 + }
 + }
 + }
 +
 + if (*t != '\0') {
 + *t = '\0';  /* end current token */
 + *stringp = t+1; /* *t != '\0' = *(t+1) is valid */
 + } else
 + *stringp = 0;   /* end of string reached */  

Re: [patch] Re: getfsent(3) and spaces in fstab

2003-08-03 Thread Simon Barner
Hi Terry,

 You need to add '\\' to the list of characters that can be escaped,
 or you've just traded the inability to specify '\t' or ' ' for an
 inability to speciy '\\'.

Oh yes, I have overlook this special case. I revised my patch in order
to get this right.

Simon
--- fstab.c.origFri Aug  1 17:18:00 2003
+++ fstab.c Mon Aug  4 01:46:55 2003
@@ -49,6 +49,7 @@
 #include errno.h
 #include fstab.h
 #include paths.h
+#include stdbool.h
 #include stdio.h
 #include stdlib.h
 #include string.h
@@ -84,6 +85,140 @@
_fs_fstab.fs_spec = buf;
 }
 
+/*
+ * Get next token from string *stringp, where tokens are possibly-empty
+ * strings separated by characters from delim.
+ *
+ * Writes NULs into the string at *stringp to end tokens.
+ * delim need not remain constant from call to call.
+ * On return, *stringp points past the last NUL written (if there might
+ * be further tokens), or is NULL (if there are definitely no more tokens).
+ *
+ * If *stringp is NULL, es_strsep returns NULL.
+ *
+ * In contrast to strsep(3), es_strsep will allow escaped delimiters
+ * within a token. These escaped characters as well as the special case
+ * '\\' will be converted appropriately ('\delim' - 'delim, '\\' - '\'
+ *
+ */
+char *
+es_strsep(char **stringp, const char *delim)
+{
+   boolescaped=false;
+   char*s, *t, *u;
+   int i;
+
+
+   if (*stringp == '\0')   /* empty string */
+   return NULL;
+   s = *stringp;
+   s += strspn (s, delim); /* skip delimiters */
+
+   if (*s == '\0') /* string consists only of delimiters */
+   return NULL;
+   
+   /*
+* skip a string consisting of non-delimiters,
+* escapted delimiters or '\\'
+   */
+   for (t = s; *t != '\0'; ++t) {
+   if (*t == '\\') {
+   if (escaped) {  /* convert \\ to \ */
+   --t;
+   u = t;
+   escaped = false;
+   while (u[0] != '\0') {
+   u[0] = u[1];
+   ++u;
+   }
+   } else  /* start \-Sequence */
+   escaped = true;
+   continue;
+   }
+   
+   /* search for delimiter */
+   for (i=0; delim[i] != '\0'; ++i) {
+   if (*t == delim[i])
+   break;
+   }
+   
+   /* un-escaped delimiter found = end of token */
+   if (!escaped  delim[i] != '\0')
+   break;
+   
+   /* escaped delimiter found = remove / */
+   if (escaped) {
+   --t;
+   u = t;
+escaped = false;
+   while (u[0] != '\0') {
+   u[0] = u[1];
+   ++u;
+   }
+   }
+   }
+
+   if (*t != '\0') {
+   *t = '\0';  /* end current token */
+   *stringp = t+1; /* *t != '\0' = *(t+1) is valid */
+   } else
+   *stringp = 0;   /* end of string reached */ 
+   
+   return s;   /* return current token */
+}
+
+/*
+ * This function converts escaped characters:
+ * '\delim' - 'delim', '\\' - '\'
+ * 
+ * If there are unescaped delimiters, 'false' will be return to indicate
+ * an error, otherwise remove_escape returns 'true'.
+ */
+bool remove_escapes (char **s, const char* delim) {
+   boolescaped=false;
+   char*t, *u;
+   int i;
+   
+   for (t = *s; *t != '\0'; ++t) {
+   if (*t == '\\') {
+   if (escaped) {  /* convert \\ to \ */
+   --t;
+   u = t;
+   escaped = false;
+   while (u[0] != '\0') {
+   u[0] = u[1];
+   ++u;
+   }
+   } else  /* start \-Sequence */
+   escaped = true;
+   continue;
+   }
+   
+   /* search for delimiter */
+   for (i=0; delim[i] != '\0'; ++i) {
+   if (*t == delim[i])
+   break;
+   }
+   
+   /* un-escaped delimiter found = error */
+   if (!escaped  delim[i] != '\0')
+   return false;
+
+   /* escaped delimiter found = remove / */
+   if (escaped) {
+   --t;
+   u = t;
+escaped = false;
+   while (u[0] != '\0') {
+   u[0] = u[1];
+   ++u;
+   }
+   }
+   }
+   
+   return true;
+}
+
 static int
 

Re: [patch] Re: getfsent(3) and spaces in fstab

2003-08-02 Thread Terry Lambert
Simon Barner wrote:
 The attached patch will allow blanks and tabs for file systems and
 path names, as long as the are protected by a '\'.
 
 For the old fstab style, blanks and tabs are not allowed as delimiters
 (as it was in the old implementation).

You need to add '\\' to the delimited list, so that it is not
skipped.

You need to add '\\' to the list of characters that can be escaped,
or you've just traded the inability to specify '\t' or ' ' for an
inability to speciy '\\'.

-- Terry
___
[EMAIL PROTECTED] mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-hackers
To unsubscribe, send any mail to [EMAIL PROTECTED]


[patch] Re: getfsent(3) and spaces in fstab

2003-08-01 Thread Simon Barner
Hi,

 For all the 'good rules' of orthonogolity and consistency etc 
 it is a good thing, however I still feel that at the level of
 the fstab where you are mounting entire file system trees, 
 the simplest naming formats are probably the best. 
 A philosophical point only, but it would keep the fstab format
 minimalistic and clean.
 
 This point of view could probably be stated in the man page once
 the '\ ' form is implemented, as a suggested practice.

Yes, you are definitely right with this point, and I aggree that such a
hint should be added to the fstab(5) man page.

But as somebody else pointed out, one of FreeBSD's advantages is its
interoperability with other systems, and this was one of the reasons why
I decided to extent 'getfsent'.

The attached patch will allow blanks and tabs for file systems and
path names, as long as the are protected by a '\'.

For the old fstab style, blanks and tabs are not allowed as delimiters
(as it was in the old implementation).

Thus

/foo\ bar:/mnt\ point:ufs

is a valid old-style fstab entry, while

 /foo\ bar:/mnt\ point:ufs
/foo\ bar :/mnt\ point:ufs

etc. are not.

For the current fstab style, unescaped blanks and tabs are handled as
delimiters.

Simon
--- fstab.c.origFri Aug  1 17:18:00 2003
+++ fstab.c Fri Aug  1 17:17:46 2003
@@ -84,6 +84,78 @@
_fs_fstab.fs_spec = buf;
 }
 
+/*
+ * Get next token from string *stringp, where tokens are possibly-empty
+ * strings separated by characters from delim.
+ *
+ * Writes NULs into the string at *stringp to end tokens.
+ * delim need not remain constant from call to call.
+ * On return, *stringp points past the last NUL written (if there might
+ * be further tokens), or is NULL (if there are definitely no more tokens).
+ *
+ * If *stringp is NULL, es_strsep returns NULL.
+ *
+ * In contrast to strsep(3), es_strsep will allow escaped delimiters
+ * within a token.
+ */
+char *
+es_strsep(char **stringp, const char *delim)
+{
+   char*s, *t;
+   size_t  n;
+
+   if (*stringp == '\0')   /* empty string */
+   return NULL;
+   s = *stringp;
+   s += strspn (s, delim); /* skip delimiters */
+
+   if (*s == '\0') /* string consists only of delimiters */
+   return NULL;
+   
+   /* skip a string consisting of non-delimiters or escapted delimiters */
+   t = s;
+   for (;;) {
+   /* skip non-delimiters */
+   n = strcspn (t, delim);
+   if (n == 0) /* delimiters found - end of token */
+   break;
+   t += n;
+   if (*t == '\0') /* end of string reached */
+   break;
+
+   /* skip escaped delimiters */
+   if (*(t-1) == '\\') /* n != 0 = *(t-1) is valid */
+   ++t;
+   if (*t == '\0') /* end of string reached */
+   break;
+   }
+
+   if (*t != '\0') {
+   *t = '\0';  /* end current token */
+   *stringp = t+1; /* *t != '\0' = *(t+1) is valid */
+   } else
+   *stringp = 0;   /* end of string reached */ 
+   
+   return s;   /* return current token */
+}
+
+/*
+ * This function removes all '\' characters from a string.
+ *
+ * It will NOT handle escape sequences as '\t' or '\n'!
+ */
+void
+remove_escapes (char **s) {
+   char *p = *s, *q;
+   while ((p = strchr (p, '\\')) != 0) {
+   q = p;
+   while (q[0] != '\0') {
+   q[0] = q[1];
+   ++q;
+   }
+   }
+}
+
 static int
 fstabscan()
 {
@@ -101,9 +173,17 @@
++LineNo;
if (*line == '#' || *line == '\n')
continue;
-   if (!strpbrk(p,  \t)) {
+
+   /* escapted white-spaces only are allowed in old-style format */
+   cp = p;
+   while ((cp = strpbrk(cp,  \t)) != 0 
+   cp != p  cp[-1] == '\\')
+   ++cp;
+   if (cp == 0) {
_fs_fstab.fs_spec = strsep(p, :\n);
+   remove_escapes (_fs_fstab.fs_spec);
_fs_fstab.fs_file = strsep(p, :\n);
+   remove_escapes (_fs_fstab.fs_file);
fixfsfile();
_fs_fstab.fs_type = strsep(p, :\n);
if (_fs_fstab.fs_type) {
@@ -124,14 +204,18 @@
goto bad;
}
 /* OLD_STYLE_FSTAB */
-   while ((cp = strsep(p,  \t\n)) != NULL  *cp == '\0')
+   while ((cp = es_strsep(p,  \t\n)) != NULL  *cp == '\0')
;
_fs_fstab.fs_spec = cp;
+   remove_escapes (_fs_fstab.fs_spec);
if (!_fs_fstab.fs_spec || *_fs_fstab.fs_spec == '#')
continue;
-