Author: delphij
Date: Mon Jul 27 19:21:23 2015
New Revision: 285923
URL: https://svnweb.freebsd.org/changeset/base/285923

Log:
  Rewrite scan procedure with a FSM.  This improves code readability by
  making clear transits between different states, and avoids bug with
  handling repeated $'s.
  
  Reviewed by:  bapt
  Differential Revision: https://reviews.freebsd.org/D3221

Modified:
  head/usr.bin/ident/ident.c

Modified: head/usr.bin/ident/ident.c
==============================================================================
--- head/usr.bin/ident/ident.c  Mon Jul 27 17:53:11 2015        (r285922)
+++ head/usr.bin/ident/ident.c  Mon Jul 27 19:21:23 2015        (r285923)
@@ -1,5 +1,6 @@
 /*-
  * Copyright (c) 2015 Baptiste Daroussin <[email protected]>
+ * Copyright (c) 2015 Xin LI <[email protected]>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -38,68 +39,23 @@ __FBSDID("$FreeBSD$");
 #include <unistd.h>
 #include <xlocale.h>
 
-static bool
-parse_id(FILE *fp, struct sbuf *buf, locale_t l)
-{
-       int c;
-       bool isid = false;
-       bool subversion = false;
-
-       sbuf_putc(buf, '$');
-       while ((c = fgetc(fp)) != EOF) {
-               sbuf_putc(buf, c);
-               if (!isid) {
-                       if (c == '$') {
-                               sbuf_clear(buf);
-                               sbuf_putc(buf, '$');
-                               continue;
-                       }
-                       if (c == ':') {
-                                c = fgetc(fp);
-                                /* accept :: for subversion compatibility */
-                                if (c == ':') {
-                                       subversion = true;
-                                       sbuf_putc(buf, c);
-                                       c = fgetc(fp);
-                               }
-                               if (c == ' ') {
-                                       sbuf_putc(buf, c);
-                                       isid = true;
-                                       continue;
-                               }
-                               return (false);
-                       }
-
-                       if (!isalpha_l(c, l))
-                               return (false);
-               } else {
-                       if (c == '\n')
-                               return (false);
-                       if (c == '$') {
-                               sbuf_finish(buf);
-                               /* should end with a space */
-                               c = sbuf_data(buf)[sbuf_len(buf) - 2];
-                               if (!subversion) {
-                                       if (c != ' ')
-                                               return (0);
-                               } else if (subversion) {
-                                       if (c != ' ' && c != '#')
-                                               return (0);
-                               }
-                               printf("     %s\n", sbuf_data(buf));
-                               return (true);
-                       }
-               }
-       }
-
-       return (false);
-}
+typedef enum {
+       /* state        condition to transit to next state */
+       INIT,           /* '$' */
+       DELIM_SEEN,     /* letter */
+       KEYWORD,        /* punctuation mark */
+       PUNC_SEEN,      /* ':' -> _SVN; space -> TEXT */
+       PUNC_SEEN_SVN,  /* space */
+       TEXT
+} analyzer_states;
 
 static int
 scan(FILE *fp, const char *name, bool quiet)
 {
        int c;
        bool hasid = false;
+       bool subversion = false;
+       analyzer_states state = INIT;
        struct sbuf *id = sbuf_new_auto();
        locale_t l;
 
@@ -109,10 +65,123 @@ scan(FILE *fp, const char *name, bool qu
                printf("%s:\n", name);
 
        while ((c = fgetc(fp)) != EOF) {
-               if (c == '$') {
-                       sbuf_clear(id);
-                       if (parse_id(fp, id, l))
-                               hasid = true;
+               switch (state) {
+               case INIT:
+                       if (c == '$') {
+                               /* Transit to DELIM_SEEN if we see $ */
+                               state = DELIM_SEEN;
+                       } else {
+                               /* Otherwise, stay in INIT state */
+                               continue;
+                       }
+                       break;
+               case DELIM_SEEN:
+                       if (isalpha_l(c, l)) {
+                               /* Transit to KEYWORD if we see letter */
+                               sbuf_clear(id);
+                               sbuf_putc(id, '$');
+                               sbuf_putc(id, c);
+                               state = KEYWORD;
+
+                               continue;
+                       } else if (c == '$') {
+                               /* Or, stay in DELIM_SEEN if more $ */
+                               continue;
+                       } else {
+                               /* Otherwise, transit back to INIT */
+                               state = INIT;
+                       }
+                       break;
+               case KEYWORD:
+                       sbuf_putc(id, c);
+
+                       if (isalpha_l(c, l)) {
+                               /*
+                                * Stay in KEYWORD if additional letter is seen
+                                */
+                               continue;
+                       } else if (c == ':') {
+                               /*
+                                * See ':' for the first time, transit to
+                                * PUNC_SEEN.
+                                */
+                               state = PUNC_SEEN;
+                               subversion = false;
+                       } else if (c == '$') {
+                               /*
+                                * Incomplete ident.  Go back to DELIM_SEEN
+                                * state because we see a '$' which could be
+                                * the beginning of a keyword.
+                                */
+                               state = DELIM_SEEN;
+                       } else {
+                               /*
+                                * Go back to INIT state otherwise.
+                                */
+                               state = INIT;
+                       }
+                       break;
+               case PUNC_SEEN:
+               case PUNC_SEEN_SVN:
+                       sbuf_putc(id, c);
+
+                       switch (c) {
+                       case ':':
+                               /*
+                                * If we see '::' (seen : in PUNC_SEEN),
+                                * activate subversion treatment and transit
+                                * to PUNC_SEEN_SVN state.
+                                *
+                                * If more than two :'s were seen, the ident
+                                * is invalid and we would therefore go back
+                                * to INIT state.
+                                */
+                               if (state == PUNC_SEEN) {
+                                       state = PUNC_SEEN_SVN;
+                                       subversion = true;
+                               } else {
+                                       state = INIT;
+                               }
+                               break;
+                       case ' ':
+                               /*
+                                * A space after ':' or '::' indicates we are 
at the
+                                * last component of potential ident.
+                                */
+                               state = TEXT;
+                               break;
+                       default:
+                               /* All other characters are invalid */
+                               state = INIT;
+                               break;
+                       }
+                       break;
+               case TEXT:
+                       sbuf_putc(id, c);
+
+                       if (iscntrl_l(c, l)) {
+                               /* Control characters are not allowed in this 
state */
+                               state = INIT;
+                       } else if (c == '$') {
+                               sbuf_finish(id);
+                               /*
+                                * valid ident should end with a space.
+                                *
+                                * subversion extension uses '#' to indicate 
that
+                                * the keyword expansion have exceeded the fixed
+                                * width, so it is also permitted if we are in
+                                * subversion mode.  No length check is enforced
+                                * because GNU RCS ident(1) does not do it 
either.
+                                */
+                               c = sbuf_data(id)[sbuf_len(id) - 2];
+                               if (c == ' ' || (subversion && c == '#')) {
+                                       printf("     %s\n", sbuf_data(id));
+                                       hasid = true;
+                               }
+                               state = INIT;
+                       }
+                       /* Other characters: stay in the state */
+                       break;
                }
        }
        sbuf_delete(id);
_______________________________________________
[email protected] mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "[email protected]"

Reply via email to