tokenizer.h

neil.schemenauer Fri, 21 Sep 2007 13:55:38 -0700

Author: neil.schemenauer
Date: Fri Sep 21 22:50:26 2007
New Revision: 58227


Modified:
   python/branches/py3k/Parser/tokenizer.c
   python/branches/py3k/Parser/tokenizer.h
Log:
Use an enum for decoding_state.  It makes the code a little more
understandable.


Modified: python/branches/py3k/Parser/tokenizer.c
==============================================================================
--- python/branches/py3k/Parser/tokenizer.c     (original)
+++ python/branches/py3k/Parser/tokenizer.c     Fri Sep 21 22:50:26 2007
@@ -139,7 +139,7 @@
        tok->alterror = 1;
        tok->alttabsize = 1;
        tok->altindstack[0] = 0;
-       tok->decoding_state = 0;
+       tok->decoding_state = STATE_INIT;
        tok->decoding_erred = 0;
        tok->read_coding_spec = 0;
        tok->encoding = NULL;
@@ -280,7 +280,7 @@
        if (cs != NULL) {
                tok->read_coding_spec = 1;
                if (tok->encoding == NULL) {
-                       assert(tok->decoding_state == 1); /* raw */
+                       assert(tok->decoding_state == STATE_RAW);
                        if (strcmp(cs, "utf-8") == 0 ||
                            strcmp(cs, "iso-8859-1") == 0) {
                                tok->encoding = cs;
@@ -288,7 +288,7 @@
                                r = set_readline(tok, cs);
                                if (r) {
                                        tok->encoding = cs;
-                                       tok->decoding_state = -1;
+                                       tok->decoding_state = STATE_NORMAL;
                                }
                                else
                                        PyMem_FREE(cs);
@@ -318,7 +318,7 @@
          struct tok_state *tok)
 {
        int ch = get_char(tok);
-       tok->decoding_state = 1;
+       tok->decoding_state = STATE_RAW;
        if (ch == EOF) {
                return 1;
        } else if (ch == 0xEF) {
@@ -330,11 +330,11 @@
        } else if (ch == 0xFE) {
                ch = get_char(tok); if (ch != 0xFF) goto NON_BOM;
                if (!set_readline(tok, "utf-16-be")) return 0;
-               tok->decoding_state = -1;
+               tok->decoding_state = STATE_NORMAL;
        } else if (ch == 0xFF) {
                ch = get_char(tok); if (ch != 0xFE) goto NON_BOM;
                if (!set_readline(tok, "utf-16-le")) return 0;
-               tok->decoding_state = -1;
+               tok->decoding_state = STATE_NORMAL;
 #endif
        } else {
                unget_char(ch, tok);
@@ -494,12 +494,12 @@
        char *line = NULL;
        int badchar = 0;
        for (;;) {
-               if (tok->decoding_state < 0) {
+               if (tok->decoding_state == STATE_NORMAL) {
                        /* We already have a codec associated with
                           this input. */
                        line = fp_readl(s, size, tok);
                        break;
-               } else if (tok->decoding_state > 0) {
+               } else if (tok->decoding_state == STATE_RAW) {
                        /* We want a 'raw' read. */
                        line = Py_UniversalNewlineFgets(s, size,
                                                        tok->fp, NULL);
@@ -510,7 +510,7 @@
                           reader functions from now on. */
                        if (!check_bom(fp_getc, fp_ungetc, fp_setreadl, tok))
                                return error_ret(tok);
-                       assert(tok->decoding_state != 0);
+                       assert(tok->decoding_state != STATE_INIT);
                }
        }
        if (line != NULL && tok->lineno < 2 && !tok->read_coding_spec) {
@@ -550,7 +550,7 @@
 static int
 decoding_feof(struct tok_state *tok)
 {
-       if (tok->decoding_state >= 0) {
+       if (tok->decoding_state != STATE_NORMAL) {
                return feof(tok->fp);
        } else {
                PyObject* buf = tok->decoding_buffer;
@@ -700,7 +700,7 @@
                        return NULL;
                }
                strcpy(tok->encoding, enc);
-               tok->decoding_state = -1;
+               tok->decoding_state = STATE_NORMAL;
        }
        return tok;
 }

Modified: python/branches/py3k/Parser/tokenizer.h
==============================================================================
--- python/branches/py3k/Parser/tokenizer.h     (original)
+++ python/branches/py3k/Parser/tokenizer.h     Fri Sep 21 22:50:26 2007
@@ -12,6 +12,12 @@
 
 #define MAXINDENT 100  /* Max indentation level */
 
+enum decoding_state {
+       STATE_INIT,
+       STATE_RAW,
+       STATE_NORMAL, /* have a codec associated with input */
+};
+
 /* Tokenizer state */
 struct tok_state {
        /* Input state; buf <= cur <= inp <= end */
@@ -40,7 +46,7 @@
        int alttabsize; /* Alternate tab spacing */
        int altindstack[MAXINDENT];     /* Stack of alternate indents */
        /* Stuff for PEP 0263 */
-       int decoding_state;     /* -1:decoding, 0:init, 1:raw */
+       enum decoding_state decoding_state;
        int decoding_erred;     /* whether erred in decoding  */
        int read_coding_spec;   /* whether 'coding:...' has been read  */
        char *encoding;
_______________________________________________
Python-3000-checkins mailing list
[email protected]
http://mail.python.org/mailman/listinfo/python-3000-checkins

[Python-3000-checkins] r58227 - python/branches/py3k/Parser/tokenizer.c python/branches/py3k/Parser/tokenizer.h

Reply via email to