Re: [apparmor] [PATCH 01/10] clean up the lexer

Seth Arnold Wed, 24 Jul 2013 00:35:03 -0700

On Sun, Jul 21, 2013 at 10:32:44PM -0700, John Johansen wrote:
> - Make indenting consistent
> - Move common match + fn patterns into a single shared entry with mulitstate
>   headers
> - add names table to convert lexer state #s to state names used in the code
> - Create/use macros for common patterns of DUMP, DEBUG, return ...
>   this fixes a few places where preprocess or DEBUG output was not
>   available
> - update RE patterns for bugs eg. {WS} inside a character class [] does
>   not match whitespace
> 
> all told despite adding code to provide better support to debug and
> preprocessing, the code is about 150 lines shorter, and has few corner
> cases cleaned up.


Oh man, this is beautiful. Lovely. Thanks. :)

A few comments inline..

> Signed-off-by: John Johansen <[email protected]>
> ---
>  parser/parser_lex.l  | 661 
> +++++++++++++++++++--------------------------------
>  parser/parser_yacc.y |   1 +
>  2 files changed, 251 insertions(+), 411 deletions(-)
> 
> diff --git a/parser/parser_lex.l b/parser/parser_lex.l
> index 539e16a..1b14625 100644
> --- a/parser/parser_lex.l
> +++ b/parser/parser_lex.l
> @@ -46,7 +46,8 @@
>  #endif
>  /* #define DEBUG */
>  #ifdef DEBUG
> -#define PDEBUG(fmt, args...) printf("Lexer (state %d): " fmt, YY_START, ## 
> args)
> +static int yy_top_state(void);
> +#define PDEBUG(fmt, args...) printf("Lexer (Line %d) (state %s): " fmt, 
> current_lineno, state_names[YY_START], ## args)
>  #else
>  #define PDEBUG(fmt, args...) /* Do nothing */
>  #endif
> @@ -54,8 +55,44 @@
>  
>  #define DUMP_PREPROCESS do { if (preprocess_only) ECHO; } while (0)
>  
> +#
> +#define RETURN_TOKEN(X) \
> +do { \
> +     DUMP_PREPROCESS; \
> +     PDEBUG("Matched: %s\n", yytext); \
> +     return (X); \
> +} while (0)
> +
> +#define POP_AND_RETURN(X) \
> +do { \
> +     DUMP_PREPROCESS; \
> +     PDEBUG(" (ret_to(%s)): Matched: %s\n", state_names[yy_top_state()], 
> yytext); \
> +     yy_pop_state(); \
> +     return (X); \
> +} while (0)
> +
> +#define PUSH_AND_RETURN(X, Y) \
> +do { \
> +     DUMP_PREPROCESS; \
> +     PDEBUG(" (push(%s)): Matched: %s\n", state_names[(X)], yytext); \
> +     yy_push_state(X); \
> +     return (Y); \
> +} while (0)
> +
> +#define BEGIN_AND_RETURN(X, Y) \
> +do { \
> +     DUMP_PREPROCESS; \
> +     PDEBUG(" (begin(%s)): Matched: %s\n", state_names[(X)], yytext); \
> +     BEGIN(X); \
> +     return (Y); \
> +} while (0)
> +
> +
>  #define YY_NO_INPUT
>  
> +#define STATE_TABLE_ENT(X) [(X)] = #X
> +static const char *const state_names[];
> +
>  struct ignored_suffix_t {
>       char * text;
>       int len;
> @@ -199,8 +236,9 @@ POST_VAR_ID       
> {POST_VAR_ID_CHARS}|(,{POST_VAR_ID_CHARS})
>  LIST_VALUE_ID_CHARS  [^ \t\n"!,]{-}[()]
>  LIST_VALUE_ID        {LIST_VALUE_ID_CHARS}+
>  ID_CHARS_NOEQ        [^ \t\n"!,]{-}[=]
> +LEADING_ID_CHARS_NOEQ [^ \t\n"!,]{-}[=()+&]
>  ID_NOEQ              {ID_CHARS_NOEQ}|(,{ID_CHARS_NOEQ})
> -IDS_NOEQ     {ID_NOEQ}+
> +IDS_NOEQ       {LEADING_ID_CHARS_NOEQ}{ID_NOEQ}*
>  ALLOWED_QUOTED_ID    [^\0"]|\\\"
>  QUOTED_ID    \"{ALLOWED_QUOTED_ID}*\"
>  
> @@ -221,10 +259,12 @@ OPEN_PAREN      \(
>  CLOSE_PAREN  \)
>  COMMA                \,
>  EQUALS               =
> +NOTEQUALS    !=

Not used...

>  ADD_ASSIGN   \+=
>  ARROW                ->
>  LT_EQUAL     <=
>  
> +/* IF adding new state please update state_names table at eof */
>  %x SUB_ID
>  %x SUB_VALUE
>  %x EXTCOND_MODE
> @@ -247,483 +287,282 @@ LT_EQUAL      <=
>       }
>  %}
>  
> -<INCLUDE>{
> -     {WS}+   { /* Eat whitespace */ }
> -     \<([^\> \t\n]+)\>       {       /* <filename> */
> -             char *filename = strdup(yytext);
> -             filename[strlen(filename) - 1] = '\0';
> -             include_filename(filename + 1, 1);
> -             free(filename);
> -             yy_pop_state();
> -             }
> +<INITIAL,INCLUDE,LIST_VAL_MODE,EXTCOND_MODE,ASSIGN_MODE,NETWORK_MODE,CHANGE_PROFILE_MODE,RLIMIT_MODE,MOUNT_MODE>{
> +     {WS}+   {  DUMP_PREPROCESS; /* Ignoring whitespace */ }
> +}
>  
> -     \"([^\" \t\n]+)\"       {       /* "filename" */
> +<INCLUDE>{
> +     (\<([^\> \t\n]+)\>|\"([^\" \t\n]+)\")   {       /* <filename> */
>               char *filename = strdup(yytext);
>               filename[strlen(filename) - 1] = '\0';
> -             include_filename(filename + 1, 0);
> +             include_filename(filename + 1, *filename == '<');
>               free(filename);
>               yy_pop_state();
> -             }
> +     }
>  
> -     [^\<\>\"{WS}]+ {        /* filename */
> +     [^\<\>\" \t\n]+ {       /* filename */
>               include_filename(yytext, 0);
>               yy_pop_state();
> -             }
> +     }
>  }
>  
>  <<EOF>> {
>       fclose(yyin);
>       pop_include_stack();
>       yypop_buffer_state();
> -     if ( !YY_CURRENT_BUFFER ) yyterminate();
> +     if ( !YY_CURRENT_BUFFER )
> +             yyterminate();
>  }
>  
>  <INITIAL,MOUNT_MODE>{
>       {VARIABLE_NAME}/{WS}*=  {
> -                             /* we match to the = in the lexer so that
> -                              * can switch scanner state.  By the time
> -                              * the parser see the = it may be to late
> -                              * as bison may have requested the next
> -                              * token from the scanner
> -                              */
> -                             DUMP_PREPROCESS;
> -                             PDEBUG("conditional %s=\n", yytext);
> -                             yylval.id = processid(yytext, yyleng);
> -                             yy_push_state(EXTCOND_MODE);
> -                             return TOK_CONDID;
> -                     }
> +             /* we match to the = in the lexer so that can switch scanner
> +              * state.  By the time the parser see the = it may be to late
> +              * as bison may have requested the next token from the scanner
> +              */

While we're making drastic changes, some of these comment errors that
have grated on me for years are finally fair game :) "so that we can",
and "too late", please.

> +             DUMP_PREPROCESS;
> +             yylval.id = processid(yytext, yyleng);
> +             PUSH_AND_RETURN(EXTCOND_MODE, TOK_CONDID);

PUSH_AND_RETURN() already includes the DUMP_PREPROCESS.

> +     }
> +
>       {VARIABLE_NAME}/{WS}+in{WS}*\(  {
> -                             /* we match to 'in' in the lexer so that
> -                              * we can switch scanner state.  By the time
> -                              * the parser see the 'in' it may be to late
> -                              * as bison may have requested the next
> -                              * token from the scanner
> -                              */
> -                             DUMP_PREPROCESS;
> -                             PDEBUG("conditional %s=\n", yytext);
> -                             yylval.id = processid(yytext, yyleng);
> -                             yy_push_state(EXTCOND_MODE);
> -                             return TOK_CONDID;
> -                     }
> +             /* we match to 'in' in the lexer so that we can switch scanner
> +              * state.  By the time the parser see the 'in' it may be to
> +              * late as bison may have requested the next token from the
> +              * scanner
> +              */

Again, "too late".

> +             DUMP_PREPROCESS;
> +             yylval.id = processid(yytext, yyleng);
> +             PUSH_AND_RETURN(EXTCOND_MODE, TOK_CONDID);

PUSH_AND_RETURN() already includes the DUMP_PREPROCESS.

> +     }
>  }
>  
>  <SUB_ID>{
> -     ({IDS}|{QUOTED_ID})     {
> -                       /* Ugh, this is a gross hack. I used to use
> -                        * {IDS} to match all TOK_IDs, but that would
> -                        * also match TOK_MODE + TOK_END_OF_RULE
> -                        * without any spaces in between (because it's
> -                        * a longer match). So now, when I want to
> -                        * match any random string, I go into a
> -                        * separate state. */
> -                     DUMP_PREPROCESS;
> -                     yylval.id =  processid(yytext, yyleng);
> -                     PDEBUG("Found sub name: \"%s\"\n",  yylval.id);
> -                     yy_pop_state();
> -                     return TOK_ID;
> -             }
> -
> -     [^\n]   {
> -                     DUMP_PREPROCESS;
> -                     /* Something we didn't expect */
> -                     yyerror(_("Found unexpected character: '%s'"), yytext);
> -             }
> +     ({IDS}|{QUOTED_ID}) {
> +             /* Go into separate state to match generic ID strings */
> +             yylval.id =  processid(yytext, yyleng);
> +             POP_AND_RETURN(TOK_ID);
> +     }
>  }
>  
>  <SUB_VALUE>{
> -     ({IDS}|{QUOTED_ID})     {
> -                       /* Ugh, this is a gross hack. I used to use
> -                        * {IDS} to match all TOK_IDs, but that would
> -                        * also match TOK_MODE + TOK_END_OF_RULE
> -                        * without any spaces in between (because it's
> -                        * a longer match). So now, when I want to
> -                        * match any random string, I go into a
> -                        * separate state. */
> -                     DUMP_PREPROCESS;
> -                     yylval.id =  processid(yytext, yyleng);
> -                     PDEBUG("Found sub value: \"%s\"\n",  yylval.id);
> -                     yy_pop_state();
> -                     return TOK_VALUE;
> -             }
> -
> -     [^\n]   {
> -                     DUMP_PREPROCESS;
> -                     /* Something we didn't expect */
> -                     yyerror(_("Found unexpected character: '%s'"), yytext);
> -             }
> +     ({IDS}|{QUOTED_ID}) {
> +             /* Go into separate state to match generic VALUE strings */
> +             yylval.id =  processid(yytext, yyleng);
> +             POP_AND_RETURN(TOK_VALUE);
> +     }
>  }

Can <SUB_ID> and <SUB_VALUE> be combined here? Is the clarity increased
if they are combined?

>  
>  <LIST_VAL_MODE>{
> -     {CLOSE_PAREN}   {
> -                     DUMP_PREPROCESS;
> -                     PDEBUG("listval: )\n");
> -                     yy_pop_state();
> -                     return TOK_CLOSEPAREN;
> -                     }
> -
> -     {WS}+           { DUMP_PREPROCESS; /* Eat whitespace */ }
> +     {CLOSE_PAREN} { POP_AND_RETURN(TOK_CLOSEPAREN); }
>  
>       {COMMA} {
> -                     DUMP_PREPROCESS;
> -                     PDEBUG("listval: , \n");
> -                     /* East comma, its an optional separator */
> -                     }
> -
> -     ({LIST_VALUE_ID}|{QUOTED_ID})   {
> -                     DUMP_PREPROCESS;
> -                     yylval.id = processid(yytext, yyleng);
> -                     PDEBUG("listval: \"%s\"\n", yylval.id);
> -                     return TOK_VALUE;
> -                     }
> -
> -     [^\n]           {
> -                     DUMP_PREPROCESS;
> -                     /* Something we didn't expect */
> -                     yyerror(_("Found unexpected character: '%s'"), yytext);
> -                     }
> +             DUMP_PREPROCESS;
> +             PDEBUG("listval: , \n");
> +             /* Eat comma, its an optional separator */
> +     }
> +
> +     ({LIST_VALUE_ID}|{QUOTED_ID}) {
> +             yylval.id = processid(yytext, yyleng);
> +             RETURN_TOKEN(TOK_VALUE);
> +     }
>  }
>  
>  <EXTCOND_MODE>{
> -     {WS}+           { DUMP_PREPROCESS; /* Eat whitespace */ }
> -
> -     {EQUALS}{WS}*/[^(\n]{-}{WS}     {
> -                     DUMP_PREPROCESS;
> -                     BEGIN(SUB_VALUE);
> -                     return TOK_EQUALS;
> -             }
> -
> -     {EQUALS}        {
> -                     DUMP_PREPROCESS;
> -                     return TOK_EQUALS;
> -             }
> -
> -     {OPEN_PAREN}    {
> -                     DUMP_PREPROCESS;
> -                     PDEBUG("extcond listv\n");
> -                     /* Don't push state here as this is a transition
> -                      * start condition and we want to return to the start
> -                      * condition that invoked <EXTCOND_MODE> when
> -                      * LIST_VAL_ID is done
> -                      */
> -                     BEGIN(LIST_VAL_MODE);
> -                     return TOK_OPENPAREN;
> -             }
> +     {EQUALS}{WS}*/[^(\n]{-}{WS} { BEGIN_AND_RETURN(SUB_VALUE, TOK_EQUALS);}
>  
> -     in      {
> -                     DUMP_PREPROCESS;
> -                     return TOK_IN;
> -             }
> +     {EQUALS} { RETURN_TOKEN(TOK_EQUALS); }
>  
> -     [^\n]   {
> -                     DUMP_PREPROCESS;
> -                     /* Something we didn't expect */
> -                     yyerror(_("Found unexpected character: '%s' %d"), 
> yytext, *yytext);
> -             }
> +     /* Don't push state here as this is a transition start condition and
> +      * we want to return to the start condition that invoked <EXTCOND_MODE>
> +      * when LIST_VAL_ID is done
> +      */
> +     {OPEN_PAREN} { BEGIN_AND_RETURN(LIST_VAL_MODE, TOK_OPENPAREN); }
>  
> +     in { RETURN_TOKEN(TOK_IN); }
>  }
>  
>  <ASSIGN_MODE>{
> -     {WS}+           { DUMP_PREPROCESS; /* Eat whitespace */ }
> -
> -     ({IDS}|{QUOTED_ID})             {
> -                     DUMP_PREPROCESS;
> -                     yylval.var_val = processid(yytext, yyleng);
> -                     PDEBUG("Found assignment value: \"%s\"\n", 
> yylval.var_val);
> -                     return TOK_VALUE;
> -                     }
> +     ({IDS}|{QUOTED_ID}) {
> +             yylval.var_val = processid(yytext, yyleng);
> +             RETURN_TOKEN(TOK_VALUE);
> +     }
>  
> -     {END_OF_RULE}   {
> -                     DUMP_PREPROCESS;
> -                     yylval.id = strdup(yytext);
> -                     yyerror(_("Variable declarations do not accept trailing 
> commas"));
> -                     }
> +     {END_OF_RULE} {
> +             yylval.id = strdup(yytext);
> +             DUMP_PREPROCESS;
> +             yyerror(_("Variable declarations do not accept trailing 
> commas"));
> +     }

It wasn't introduced here, but I don't understand the strdup(),
yyerror() is going to exit anyway.

>  
> -     \\\n            { DUMP_PREPROCESS; current_lineno++ ; }
> +     \\\n    { DUMP_PREPROCESS; current_lineno++ ; }
>  
> -     \r?\n           {
> -                     DUMP_PREPROCESS;
> -                     current_lineno++;
> -                     yy_pop_state();
> -                     }
> -     [^\n]           {
> -                     DUMP_PREPROCESS;
> -                     /* Something we didn't expect */
> -                     yyerror(_("Found unexpected character: '%s'"), yytext);
> -                     }
> +     \r?\n   {
> +             DUMP_PREPROCESS;
> +             current_lineno++;
> +             yy_pop_state();
> +     }
>  }
>  
>  <NETWORK_MODE>{
> -     {WS}+           { DUMP_PREPROCESS; /* Eat whitespace */ }
> -
> -     {IDS}           {
> -                     DUMP_PREPROCESS;
> -                     yylval.id = strdup(yytext);
> -                     return TOK_ID;
> -                     }
> -     {END_OF_RULE}   {
> -                     DUMP_PREPROCESS;
> -                     yy_pop_state();
> -                     return TOK_END_OF_RULE;
> -             }
> -     [^\n]           {
> -                     DUMP_PREPROCESS;
> -                       /* Something we didn't expect */
> -                     yylval.id = strdup(yytext);
> -                     yyerror(_("(network_mode) Found unexpected character: 
> '%s'"), yylval.id);
> -                     }
> -
> -     \r?\n           {
> -                     DUMP_PREPROCESS;
> -                     current_lineno++;
> -                     }
> +     {IDS} {
> +             yylval.id = strdup(yytext);
> +             RETURN_TOKEN(TOK_ID);
> +     }
>  }
>  
>  <CHANGE_PROFILE_MODE>{
> -     {ARROW}         {
> -                     DUMP_PREPROCESS;
> -                     PDEBUG("Matched a change profile arrow\n");
> -                     return TOK_ARROW;
> -                     }
> -
> -     ({IDS}|{QUOTED_ID})     {
> -                       /* Ugh, this is a gross hack. I used to use
> -                        * {IDS} to match all TOK_IDs, but that would
> -                        * also match TOK_MODE + TOK_END_OF_RULE
> -                        * without any spaces in between (because it's
> -                        * a longer match). So now, when I want to
> -                        * match any random string, I go into a
> -                        * separate state. */
> -                     DUMP_PREPROCESS;
> -                     yylval.id = processid(yytext, yyleng);
> -                     PDEBUG("Found change profile name: \"%s\"\n", 
> yylval.id);
> -                     yy_pop_state();
> -                     return TOK_ID;
> -             }
> +     {ARROW}         { RETURN_TOKEN(TOK_ARROW); }
>  
> -     {WS}+                   {  DUMP_PREPROCESS; /* Ignoring whitespace */ }
> -     [^\n]   {
> -                     DUMP_PREPROCESS;
> -                     /* Something we didn't expect */
> -                     yyerror(_("Found unexpected character: '%s'"), yytext);
> -             }
> +     ({IDS}|{QUOTED_ID}) {
> +             yylval.id = processid(yytext, yyleng);
> +             POP_AND_RETURN(TOK_ID);
> +     }
>  }
>  
>  <RLIMIT_MODE>{
> -     {WS}+           { DUMP_PREPROCESS; /* Eat whitespace */ }
> -
> -
>       -?{NUMBER}[[:alpha:]]*  {

Not introduced in this patch, but can we use a more-specific set of
chars here to give better error messages? Or would errors get worse? At
least line number is easily available here...

> -                     DUMP_PREPROCESS;
> -                     yylval.var_val = strdup(yytext);
> -                     return TOK_VALUE;
> -                     }
> +             yylval.var_val = strdup(yytext);
> +             RETURN_TOKEN(TOK_VALUE);
> +     }
>  
>       {KEYWORD}       {
> -                     DUMP_PREPROCESS;
> -                     yylval.id = strdup(yytext);
> -                     if (strcmp(yytext, "infinity") == 0)
> -                             return TOK_VALUE;
> -                     return TOK_ID;
> -                     }
> +             yylval.id = strdup(yytext);
> +             if (strcmp(yytext, "infinity") == 0)
> +                     RETURN_TOKEN(TOK_VALUE);
> +             RETURN_TOKEN(TOK_ID);
> +     }
>  
> -     {LT_EQUAL}      { DUMP_PREPROCESS; return TOK_LE; }
> +     {LT_EQUAL}      { RETURN_TOKEN(TOK_LE); }
> +}
>  
> -     {END_OF_RULE}   {
> -                     DUMP_PREPROCESS;
> -                     yy_pop_state();
> -                     return TOK_END_OF_RULE;
> -                     }
> +<MOUNT_MODE>{
> +     {ARROW}         { RETURN_TOKEN(TOK_ARROW); }
>  
> -     \\\n            {
> -                     DUMP_PREPROCESS;
> -                     current_lineno++;
> -                     yy_pop_state();
> -                     }
> +     ({IDS_NOEQ}|{PATHNAME}|{QUOTED_ID}) {
> +             yylval.id = processid(yytext, yyleng);
> +             RETURN_TOKEN(TOK_ID);
> +     }
> +}
>  
> -     \r?\n           {
> -                     DUMP_PREPROCESS;
> -                     current_lineno++;
> -                     yy_pop_state();
> -                     }
> +#include/.*\r?\n     {

Hunh, I don't think I knew that "# include" wouldn't include a file. Now I
do know. :)

> +     DUMP_PREPROCESS;
> +     PDEBUG("Matched: %s\n", yytext);
> +     yy_push_state(INCLUDE);
>  }
>  
> -<MOUNT_MODE>{
> -     {WS}+           {  DUMP_PREPROCESS; /* Ignoring whitespace */ }
> +#.*\r?\n     { /* normal comment */
> +     DUMP_PREPROCESS;
> +     PDEBUG("comment(%d): %s\n", current_lineno, yytext);
> +     current_lineno++;
> +}
>  
> -     {ARROW}         {
> -                     DUMP_PREPROCESS;
> -                     PDEBUG("Matched arrow\n");
> -                     return TOK_ARROW;
> -                     }
> +{CARET}              { PUSH_AND_RETURN(SUB_ID, TOK_CARET); }
>  
> -     ({IDS_NOEQ}|{PATHNAME}|{QUOTED_ID})     {
> -                     DUMP_PREPROCESS;
> -                     yylval.id = processid(yytext, yyleng);
> -                     PDEBUG("Found ID: \"%s\"\n", yylval.id);
> -                     return TOK_ID;
> -                     }
> +{ARROW}              { RETURN_TOKEN(TOK_ARROW); }
>  
> +{EQUALS}     { PUSH_AND_RETURN(ASSIGN_MODE, TOK_EQUALS); }
> +
> +{ADD_ASSIGN} { PUSH_AND_RETURN(ASSIGN_MODE, TOK_ADD_ASSIGN); }
> +
> +{SET_VARIABLE}       {
> +     yylval.set_var = strdup(yytext);
> +     RETURN_TOKEN(TOK_SET_VAR);
> +}
> +
> +{BOOL_VARIABLE}      {
> +     yylval.bool_var = strdup(yytext);
> +     RETURN_TOKEN(TOK_BOOL_VAR);
> +}
> +
> +{OPEN_BRACE} { RETURN_TOKEN(TOK_OPEN); }
> +
> +{CLOSE_BRACE}        { RETURN_TOKEN(TOK_CLOSE); }
> +
> +({PATHNAME}|{QPATHNAME}) {
> +     yylval.id = processid(yytext, yyleng);
> +     RETURN_TOKEN(TOK_ID);
> +}
> +
> +({MODES})/([[:space:],]) {
> +     yylval.mode = strdup(yytext);
> +     RETURN_TOKEN(TOK_MODE);
> +}
> +
> +{HAT}                { PUSH_AND_RETURN(SUB_ID, TOK_HAT); }
> +
> +{PROFILE}    { PUSH_AND_RETURN(SUB_ID, TOK_PROFILE); }
> +
> +{COLON}              { RETURN_TOKEN(TOK_COLON); }
> +
> +{OPEN_PAREN} { PUSH_AND_RETURN(LIST_VAL_MODE, TOK_OPENPAREN); }
> +
> +{VARIABLE_NAME}      {
> +     DUMP_PREPROCESS;
> +     int token = get_keyword_token(yytext);
> +     int state = INITIAL;
> +
> +     /* special cases */
> +     switch (token) {
> +     case -1:
> +             /* no token found */
> +             yylval.id = processunquoted(yytext, yyleng);
> +             RETURN_TOKEN(TOK_ID);
> +             break;
> +     case TOK_RLIMIT:
> +             state = RLIMIT_MODE;
> +             break;
> +     case TOK_NETWORK:
> +             state = NETWORK_MODE;
> +             break;
> +     case TOK_CHANGE_PROFILE:
> +             state = CHANGE_PROFILE_MODE;
> +             break;
> +     case TOK_MOUNT:
> +     case TOK_REMOUNT:
> +     case TOK_UMOUNT:
> +             state = MOUNT_MODE;
> +             break;
> +     default: /* nothing */
> +             break;
> +     }
> +     PUSH_AND_RETURN(state, token);
> +}
> +
> +<INITIAL,NETWORK_MODE,RLIMIT_MODE,MOUNT_MODE>{
>       {END_OF_RULE}   {
> -                     DUMP_PREPROCESS;
> +             if (YY_START != INITIAL)
>                       yy_pop_state();
> -                     return TOK_END_OF_RULE;
> -                     }
> -
> -     [^\n]           {
> -                     DUMP_PREPROCESS;
> -                     /* Something we didn't expect */
> -                     yyerror(_("Found unexpected character: '%s'"), yytext);
> -                     }
> +             RETURN_TOKEN(TOK_END_OF_RULE);
> +     }
>  
>       \r?\n           {
> -                     DUMP_PREPROCESS;
> -                     current_lineno++;
> -                     yy_pop_state();
> -                     }
> +             DUMP_PREPROCESS;
> +             current_lineno++;
> +     }
>  }
>  
> -#include/.*\r?\n      { /* include */
> -                     PDEBUG("Matched #include\n");
> -                     yy_push_state(INCLUDE);
> -                     }
> -
> -#.*\r?\n             { /* normal comment */
> -                     DUMP_PREPROCESS;
> -                     PDEBUG("comment(%d): %s\n", current_lineno, yytext);
> -                     current_lineno++;
> -                     }
> -
> -{END_OF_RULE}                { DUMP_PREPROCESS; return TOK_END_OF_RULE; }
> -
> -{CARET}                      {
> -                     DUMP_PREPROCESS;
> -                     PDEBUG("Matched hat ^\n");
> -                     yy_push_state(SUB_ID);
> -                     return TOK_CARET;
> -                     }
> -{ARROW}                      {
> -                     DUMP_PREPROCESS;
> -                     PDEBUG("Matched a arrow\n");
> -                     return TOK_ARROW;
> -                     }
> -{EQUALS}             {
> -                     DUMP_PREPROCESS;
> -                     PDEBUG("Matched equals for assignment\n");
> -                     yy_push_state(ASSIGN_MODE);
> -                     return TOK_EQUALS;
> -                     }
> -{ADD_ASSIGN}         {
> -                     DUMP_PREPROCESS;
> -                     PDEBUG("Matched additive value assignment\n");
> -                     yy_push_state(ASSIGN_MODE);
> -                     return TOK_ADD_ASSIGN;
> -                     }
> -{SET_VARIABLE}               {
> -                     DUMP_PREPROCESS;
> -                     yylval.set_var = strdup(yytext);
> -                     PDEBUG("Found set variable %s\n", yylval.set_var);
> -                     return TOK_SET_VAR;
> -                     }
> -
> -{BOOL_VARIABLE}              {
> -                     DUMP_PREPROCESS;
> -                     yylval.bool_var = strdup(yytext);
> -                     PDEBUG("Found boolean variable %s\n", yylval.bool_var);
> -                     return TOK_BOOL_VAR;
> -                     }
> -
> -{OPEN_BRACE}         {
> -                     DUMP_PREPROCESS;
> -                     PDEBUG("Open Brace\n");
> -                     return TOK_OPEN;
> -                     }
> -{CLOSE_BRACE}                {
> -                     DUMP_PREPROCESS;
> -                     PDEBUG("Close Brace\n");
> -                     return TOK_CLOSE;
> -                     }
> -
> -({PATHNAME}|{QPATHNAME})             {
> -                     DUMP_PREPROCESS;
> -                     yylval.id = processid(yytext, yyleng);
> -                     PDEBUG("Found id: \"%s\"\n", yylval.id);
> -                     return TOK_ID;
> -                     }
> -
> -({MODES})/([[:space:],])     {
> -                     DUMP_PREPROCESS;
> -                     yylval.mode = strdup(yytext);
> -                     PDEBUG("Found modes: %s\n", yylval.mode);
> -                     return TOK_MODE;
> -                     }
> -
> -{HAT}                        {
> -                     DUMP_PREPROCESS;
> -                     yy_push_state(SUB_ID);
> -                     return TOK_HAT;
> -                     }
> -
> -{PROFILE}            {
> -                     DUMP_PREPROCESS;
> -                     yy_push_state(SUB_ID);
> -                     return TOK_PROFILE;
> -                     }
> -
> -{COLON}                      {
> -                     DUMP_PREPROCESS;
> -                     PDEBUG("Found a colon\n");
> -                     return TOK_COLON;
> -                     }
> -
> -{OPEN_PAREN} {
> -                     DUMP_PREPROCESS;
> -                     PDEBUG("listval (\n");
> -                     yy_push_state(LIST_VAL_MODE);
> -                     return TOK_OPENPAREN;
> -                     }
> -
> -{VARIABLE_NAME}              {
> -                     DUMP_PREPROCESS;
> -                     int token = get_keyword_token(yytext);
> -
> -                     /* special cases */
> -                     switch (token) {
> -                     case -1:
> -                             /* no token found */
> -                             yylval.id = processunquoted(yytext, yyleng);
> -                             PDEBUG("Found (var) id: \"%s\"\n", yylval.id);
> -                             return TOK_ID;
> -                             break;
> -                     case TOK_RLIMIT:
> -                             yy_push_state(RLIMIT_MODE);
> -                             break;
> -                     case TOK_NETWORK:
> -                             yy_push_state(NETWORK_MODE);
> -                             break;
> -                     case TOK_CHANGE_PROFILE:
> -                             yy_push_state(CHANGE_PROFILE_MODE);
> -                             break;
> -                     case TOK_MOUNT:
> -                     case TOK_REMOUNT:
> -                     case TOK_UMOUNT:
> -                             PDEBUG("Entering mount\n");
> -                             yy_push_state(MOUNT_MODE);
> -                             break;
> -                     default: /* nothing */
> -                             break;
> -                     }
> -                     return token;
> -                     }
> -
> -{WS}+                        {  DUMP_PREPROCESS; /* Ignoring whitespace */ }
> -
> -\r?\n                        { DUMP_PREPROCESS; current_lineno++ ; }
> -
> -[^\n]                        {
> -                     DUMP_PREPROCESS;
> -
> -                       /* Something we didn't expect */
> -                     yyerror(_("Found unexpected character: '%s'"), yytext);
> -                     }
> -
> +<INITIAL,SUB_ID,SUB_VALUE,LIST_VAL_MODE,EXTCOND_MODE,ASSIGN_MODE,NETWORK_MODE,CHANGE_PROFILE_MODE,RLIMIT_MODE,MOUNT_MODE>{
> +     [^\n]   {
> +             DUMP_PREPROCESS;
> +             /* Something we didn't expect */
> +             yyerror(_("Found unexpected character: '%s'"), yytext);
> +     }
> +}
>  %%
> +
> +/* Create a table mapping lexer state number to the name used in the
> + * in the code.  This allows for better debug output
> + */
> +static const char *const state_names[] = {
> +     STATE_TABLE_ENT(INITIAL),
> +     STATE_TABLE_ENT(SUB_ID),
> +     STATE_TABLE_ENT(SUB_VALUE),
> +     STATE_TABLE_ENT(EXTCOND_MODE),
> +     STATE_TABLE_ENT(NETWORK_MODE),
> +     STATE_TABLE_ENT(LIST_VAL_MODE),
> +     STATE_TABLE_ENT(ASSIGN_MODE),
> +     STATE_TABLE_ENT(RLIMIT_MODE),
> +     STATE_TABLE_ENT(MOUNT_MODE),
> +     STATE_TABLE_ENT(CHANGE_PROFILE_MODE),
> +     STATE_TABLE_ENT(INCLUDE),
> +};
> diff --git a/parser/parser_yacc.y b/parser/parser_yacc.y
> index 351a173..433bb6d 100644
> --- a/parser/parser_yacc.y
> +++ b/parser/parser_yacc.y
> @@ -87,6 +87,7 @@ void add_local_entry(struct codomain *cod);
>  %token TOK_MODE
>  %token TOK_END_OF_RULE
>  %token TOK_EQUALS
> +%token TOK_NOTEQUALS

.. Well, okay, used here, but not used _much_. :)

>  %token TOK_ARROW
>  %token TOK_ADD_ASSIGN
>  %token TOK_LE
> -- 

Man, what an awesome cleanup. Thanks. :)

signature.asc
Description: Digital signature

-- 
AppArmor mailing list
[email protected]
Modify settings or unsubscribe at: 
https://lists.ubuntu.com/mailman/listinfo/apparmor

Re: [apparmor] [PATCH 01/10] clean up the lexer

Reply via email to