Author: suokko
Date: Mon Sep  1 01:26:05 2008
New Revision: 29153

URL: http://svn.gna.org/viewcvs/wesnoth?rev=29153&view=rev
Log:
Optimized parser:
* Made it possible to inline tokenizer because of huge amount of calls
* Changed token struct a bit because it was taking too larg part of next_token 
execution time.

Modified:
    trunk/src/serialization/parser.cpp
    trunk/src/serialization/tokenizer.cpp
    trunk/src/serialization/tokenizer.hpp
    trunk/src/tests/test_config_cache.cpp

Modified: trunk/src/serialization/parser.cpp
URL: 
http://svn.gna.org/viewcvs/wesnoth/trunk/src/serialization/parser.cpp?rev=29153&r1=29152&r2=29153&view=diff
==============================================================================
--- trunk/src/serialization/parser.cpp (original)
+++ trunk/src/serialization/parser.cpp Mon Sep  1 01:26:05 2008
@@ -70,18 +70,20 @@
 
        struct element {
                element(config *cfg, std::string 
-                       const &name, std::string const &start_line) :
+                       const &name, const size_t& start_line = 0, const 
std::string& file="") :
                                cfg(cfg), 
                                name(name),
                                last_element_map(),
-                               start_line(start_line) 
+                               start_line(start_line),
+                           file(file)
                        {}
 
                config* cfg;
                std::string name;
 
                std::map<std::string, config*> last_element_map;
-               std::string start_line;
+               size_t start_line;
+               std::string file;
        };
 
        std::stack<element> elements;
@@ -105,7 +107,7 @@
 void parser::operator()(std::string* error_log)
 {
        cfg_.clear();
-       elements.push(element(&cfg_, "", ""));
+       elements.push(element(&cfg_, ""));
 
        do {
                try {
@@ -147,7 +149,9 @@
        if(elements.size() != 1) {
                utils::string_map i18n_symbols;
                i18n_symbols["tag"] = elements.top().name;
-               error(lineno_string(i18n_symbols, elements.top().start_line,
+               std::stringstream ss;
+               ss << elements.top().start_line << " " << elements.top().file;
+               error(lineno_string(i18n_symbols, ss.str(),
                                N_("Missing closing tag for tag $tag at 
$pos")));
        }
 }
@@ -168,7 +172,7 @@
                // Add the element
                current_element = &(elements.top().cfg->add_child(elname));
                elements.top().last_element_map[elname] = current_element;
-               elements.push(element(current_element, elname, 
tok_->get_line()));
+               elements.push(element(current_element, elname, 
tok_->get_start_line(), tok_->get_file()));
                break;
 
        case '+': // [+element]
@@ -187,7 +191,7 @@
                        current_element = last_element_itor->second;
                }
                elements.top().last_element_map[elname] = current_element;
-               elements.push(element(current_element, elname, 
tok_->get_line()));
+               elements.push(element(current_element, elname, 
tok_->get_start_line(), tok_->get_file()));
                break;
 
        case '/': // [/element]
@@ -202,7 +206,9 @@
                        utils::string_map i18n_symbols;
                        i18n_symbols["tag"] = elements.top().name;
                        i18n_symbols["tag2"] = elname;
-                       error(lineno_string(i18n_symbols, 
elements.top().start_line,
+                       std::stringstream ss;
+                       ss << elements.top().start_line << " " << 
elements.top().file;
+                       error(lineno_string(i18n_symbols, ss.str(),
                                        N_("Found invalid closing tag $tag2 for 
tag $tag (opened at $pos)")));
                }
 
@@ -287,7 +293,7 @@
                        // Ignore this
                        break;
                default:
-                       cfg[*curvar] += tok_->current_token().leading_spaces + 
tok_->current_token().value;
+                       cfg[*curvar] += tok_->current_token().value;
                        break;
                case token::QSTRING:
                        cfg[*curvar] += tok_->current_token().value;
@@ -335,14 +341,16 @@
        utils::string_map i18n_symbols;
        i18n_symbols["error"] = error_type;
        i18n_symbols["value"] = tok_->current_token().value;
+       std::stringstream ss;
+       ss << tok_->get_start_line() << " " << tok_->get_file();
 #ifdef DEBUG
        i18n_symbols["previous_value"] = tok_->previous_token().value;
        throw config::error(
-               lineno_string(i18n_symbols, tok_->get_line(),
+               lineno_string(i18n_symbols, ss.str(),
                              N_("$error, value '$value', previous 
'$previous_value' at $pos")));
 #else
        throw config::error(
-               lineno_string(i18n_symbols, tok_->get_line(),
+               lineno_string(i18n_symbols, ss.str(),
                              N_("$error, value '$value' at $pos")));
 #endif
 }

Modified: trunk/src/serialization/tokenizer.cpp
URL: 
http://svn.gna.org/viewcvs/wesnoth/trunk/src/serialization/tokenizer.cpp?rev=29153&r1=29152&r2=29153&view=diff
==============================================================================
--- trunk/src/serialization/tokenizer.cpp (original)
+++ trunk/src/serialization/tokenizer.cpp Mon Sep  1 01:26:05 2008
@@ -27,175 +27,13 @@
 tokenizer::tokenizer(std::istream& in) :
        current_(EOF),
        lineno_(1),
+       startlineno_(0),
        textdomain_(),
        file_(),
-       tokenstart_lineno_(),
        token_(),
        in_(in)
 {
        next_char_fast();
-}
-
-void tokenizer::skip_comment()
-{
-       next_char_fast();
-       if(current_ != '\n' && current_ != EOF) {
-               if(current_ == 't') {
-                       // When the string 'textdomain[ |\t] is matched the 
rest of the line is
-                       // the textdomain to switch to. If we at any point fail 
to match we break
-                       // out of the loop and eat the rest of the line without 
testing.
-                       size_t i = 0;
-                       static const std::string match = "extdomain";
-                       this->next_char_fast();
-                       while(current_ != '\n' && current_ != EOF) {
-                               if(i < 9) {
-                                       if(current_ != match[i]) {
-                                               break;
-                                       }
-                                       ++i;
-                               } else if(i == 9) { 
-                                       if(current_ != ' ' && current_ != '\t') 
{
-                                               break;
-                                       }
-                                       ++i;
-                                       textdomain_ = "";
-                               } else {
-                                       textdomain_ += current_;
-                               }
-                               this->next_char_fast();
-                       }
-                       while(current_ != '\n' && current_ != EOF) {
-                               this->next_char_fast();
-                       }
-
-               } else if(current_ == 'l') {
-                       // Basically the same as textdomain but we match 'line[ 
|\t]d*[ |\t]s*
-                       // d* is the line number 
-                       // s* is the file name
-                       // It inherited the * instead of + from the previous 
implementation.
-                       size_t i = 0;
-                       static const std::string match = "ine";
-                       this->next_char_fast();
-                       bool found = false;
-                       std::string lineno;
-                       while(current_ != '\n' && current_ != EOF) {
-                               if(i < 3) {
-                                       if(current_ != match[i]) {
-                                               break;
-                                       }
-                                       ++i;
-                               } else if(i == 3) { 
-                                       if(current_ != ' ' && current_ != '\t') 
{
-                                               break;
-                                       }
-                                       ++i;
-                               } else {
-                                       if(!found) {
-                                               if(current_ == ' ' || current_ 
== '\t') {
-                                                       found = true;
-                                                       lineno_ = 
lexical_cast<size_t>(lineno);
-                                                       file_ = "";
-                                               } else {
-                                                       lineno += current_;
-                                               }
-                                       } else {
-                                               file_ += current_;
-                                       }
-                               }
-                               this->next_char_fast();
-                       }
-                       while(current_ != '\n' && current_ != EOF) {
-                               this->next_char_fast();
-                       }
-               } else {
-                       // Neither a textdomain or line comment skip it.
-                       while(current_ != '\n' && current_ != EOF) {
-                               this->next_char_fast();
-                       }
-               }
-       }
-}
-
-const token& tokenizer::next_token()
-{
-#if DEBUG
-       previous_token_ = token_;
-#endif
-       token_.reset();
-
-       // Dump spaces and inlined comments
-       for(;;) {
-               while (is_space(current_)) {
-                       token_.leading_spaces += current_;
-                       next_char_fast();
-               }
-               if (current_ != 254)
-                       break;
-               skip_comment();
-               --lineno_;
-               next_char();
-       }
-
-       if (current_ == '#')
-               skip_comment();
-
-       tokenstart_lineno_ = lineno_;
-
-       switch(current_) {
-       case EOF:
-               token_.type = token::END;
-               break;
-       case '"':
-               token_.type = token::QSTRING;
-               while (1) {
-                       next_char();
-
-                       if(current_ == EOF) {
-                               token_.type = token::UNTERMINATED_QSTRING;
-                               break;
-                       }
-                       if(current_ == '"' && peek_char() != '"')
-                               break;
-                       if(current_ == '"' && peek_char() == '"')
-                               next_char_fast();
-                       if (current_ == 254 ) {
-                               skip_comment();
-                               --lineno_;
-                               continue;
-                       }
-
-                       token_.value += current_;
-               };
-               break;
-       case '[': case ']': case '/': case '\n': case '=': case ',': case '+':
-               token_.type = token::token_type(current_);
-               token_.value = current_;
-               break;
-       default:
-               if(is_alnum(current_)) {
-                       token_.type = token::STRING;
-                       token_.value += current_;
-                       while(is_alnum(peek_char())) {
-                               next_char_fast();
-                               token_.value += current_;
-                       }
-               } else {
-                       token_.type = token::MISC;
-                       token_.value += current_;
-               }
-               if(token_.value == "_")
-                       token_.type = token::token_type('_');
-       }
-
-       if(current_ != EOF)
-               next_char();
-
-       return token_;
-}
-
-const token& tokenizer::current_token() const
-{
-       return token_;
 }
 
 #ifdef DEBUG
@@ -205,28 +43,3 @@
 }
 #endif
 
-bool tokenizer::is_space(const int c) const
-{
-       return c == ' ' || c == '\t';
-}
-
-bool tokenizer::is_alnum(const int c) const
-{
-       return (c >= 'a' && c <= 'z') 
-               || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
-}
-
-std::string tokenizer::get_line() const
-{
-       std::ostringstream s;
-       s << tokenstart_lineno_ << ' ' << file_;
-       return s.str();
-}
-
-std::string& tokenizer::textdomain()
-{
-       return textdomain_;
-}
-
-
-

Modified: trunk/src/serialization/tokenizer.hpp
URL: 
http://svn.gna.org/viewcvs/wesnoth/trunk/src/serialization/tokenizer.hpp?rev=29153&r1=29152&r2=29153&view=diff
==============================================================================
--- trunk/src/serialization/tokenizer.hpp (original)
+++ trunk/src/serialization/tokenizer.hpp Mon Sep  1 01:26:05 2008
@@ -29,7 +29,6 @@
 {
        token() :
                type(END),
-               leading_spaces(),
                value()
                {}
 
@@ -51,91 +50,272 @@
        } type;
 
        void reset() {
-               leading_spaces = "";
-               value = "";
+               value.clear();
        }
 
-       std::string leading_spaces;
        std::string value;
 };
 
 //! Abstract baseclass for the tokenizer
 class tokenizer
 {
-public:
-       tokenizer(std::istream& in);
-       ~tokenizer() {}
-
-       const token& next_token();
-       const token& current_token() const;
+       public:
+               tokenizer(std::istream& in);
+               ~tokenizer() {}
+
+               const token& next_token()
+               {
+#if DEBUG
+                       previous_token_ = token_;
+#endif
+                       token_.reset();
+
+                       // Dump spaces and inlined comments
+                       for(;;) {
+                               while (is_space(current_)) {
+                                       next_char_fast();
+                               }
+                               if (current_ != 254)
+                                       break;
+                               skip_comment();
+                               // skip the line end
+                               next_char_fast();
+                       }
+
+                       if (current_ == '#')
+                               skip_comment();
+
+                       startlineno_ = lineno_;
+
+                       switch(current_) {
+                               case EOF:
+                                       token_.type = token::END;
+                                       break;
+                               case '"':
+                                       token_.type = token::QSTRING;
+                                       while (1) {
+                                               next_char();
+
+                                               if(current_ == EOF) {
+                                                       token_.type = 
token::UNTERMINATED_QSTRING;
+                                                       break;
+                                               }
+                                               if(current_ == '"' && 
peek_char() != '"')
+                                                       break;
+                                               if(current_ == '"' && 
peek_char() == '"')
+                                                       next_char_fast();
+                                               if (current_ == 254 ) {
+                                                       skip_comment();
+                                                       --lineno_;
+                                                       continue;
+                                               }
+
+                                               token_.value += current_;
+                                       };
+                                       break;
+                               case '[': case ']': case '/': case '\n': case 
'=': case ',': case '+':
+                                       token_.type = 
token::token_type(current_);
+                                       token_.value = current_;
+                                       break;
+                               default:
+                                       if(is_alnum(current_)) {
+                                               token_.type = token::STRING;
+                                               do {
+                                                       token_.value += 
current_;
+                                                       next_char_fast();
+                                               } while (is_alnum(current_));
+                                       } else {
+                                               token_.type = token::MISC;
+                                               token_.value += current_;
+                                               next_char();
+                                       }
+                                       check_translatable();
+                                       return token_;
+                       }
+
+                       if(current_ != EOF)
+                               next_char();
+
+                       return token_;
+               }
+
+               void check_translatable()
+               {
+                       if(token_.value == "_")
+                               token_.type = token::token_type('_');
+               }
+
+
+               const token& current_token() const
+               {
+                       return token_;
+               }
 #ifdef DEBUG
-       const token& previous_token() const;
-#endif
-       std::string get_line() const;
-       std::string& textdomain();
-
-protected:
-       tokenizer();
-       int current_;
-       size_t lineno_;
-
-       inline void next_char()
-       {
-               if (UNLIKELY(current_ == '\n'))
-                       lineno_++;
-               this->next_char_fast();
-       }
-
-       inline void next_char_fast()
-       {
-               do {
-                       if (LIKELY(in_.good()))
-                       {
+               const token& previous_token() const;
+#endif
+
+               std::string& textdomain()
+               {
+                       return textdomain_;
+               }
+
+               const std::string& get_file() const
+               {
+                       return file_;
+               }
+
+               const int get_start_line() const
+               {
+                       return startlineno_;
+               }
+
+       protected:
+               tokenizer();
+               int current_;
+               size_t lineno_;
+               size_t startlineno_;
+
+               inline void next_char()
+               {
+                       if (UNLIKELY(current_ == '\n'))
+                               lineno_++;
+                       this->next_char_fast();
+               }
+
+               inline void next_char_fast()
+               {
+                       do {
+                               if (LIKELY(in_.good()))
+                               {
+                                       current_ = in_.get();
+                               }
+                               else
+                               {
+                                       current_ = EOF;
+                                       return;
+                               }
+                       }while (UNLIKELY(current_ == '\r'));
+#if 0
+                       // @todo: disabled untill campaign server is fixed
+                       if(LIKELY(in_.good())) {
                                current_ = in_.get();
+                               if (UNLIKELY(current_ == '\r'))
+                               {
+                                       // we assume that there is only one '\r'
+                                       if(LIKELY(in_.good())) {
+                                               current_ = in_.get();
+                                       } else {
+                                               current_ = EOF;
+                                       }
+                               }
+                       } else {
+                               current_ = EOF;
                        }
-                       else
-                       {
-                               current_ = EOF;
-                               return;
+#endif
+               }
+
+               inline int peek_char() const
+               {
+                       return in_.peek();
+               }
+
+       private:
+               bool is_space(const int c) const
+               {
+                       return c == ' ' || c == '\t';
+               }
+               bool is_alnum(const int c) const
+               {
+                       return (c >= 'a' && c <= 'z') 
+                               || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= 
'9') || c == '_';
+               }
+               void skip_comment()
+               {
+                       next_char_fast();
+                       if(current_ != '\n' && current_ != EOF) {
+                               if(current_ == 't') {
+                                       // When the string 'textdomain[ |\t] is 
matched the rest of the line is
+                                       // the textdomain to switch to. If we 
at any point fail to match we break
+                                       // out of the loop and eat the rest of 
the line without testing.
+                                       size_t i = 0;
+                                       static const std::string match = 
"extdomain";
+                                       this->next_char_fast();
+                                       while(current_ != '\n' && current_ != 
EOF) {
+                                               if(i < 9) {
+                                                       if(current_ != 
match[i]) {
+                                                               break;
+                                                       }
+                                                       ++i;
+                                               } else if(i == 9) { 
+                                                       if(current_ != ' ' && 
current_ != '\t') {
+                                                               break;
+                                                       }
+                                                       ++i;
+                                                       textdomain_ = "";
+                                               } else {
+                                                       textdomain_ += current_;
+                                               }
+                                               this->next_char_fast();
+                                       }
+                                       while(current_ != '\n' && current_ != 
EOF) {
+                                               this->next_char_fast();
+                                       }
+
+                               } else if(current_ == 'l') {
+                                       // Basically the same as textdomain but 
we match 'line[ |\t]d*[ |\t]s*
+                                       // d* is the line number 
+                                       // s* is the file name
+                                       // It inherited the * instead of + from 
the previous implementation.
+                                       size_t i = 0;
+                                       static const std::string match = "ine";
+                                       this->next_char_fast();
+                                       bool found = false;
+                                       std::string lineno;
+                                       while(current_ != '\n' && current_ != 
EOF) {
+                                               if(i < 3) {
+                                                       if(current_ != 
match[i]) {
+                                                               break;
+                                                       }
+                                                       ++i;
+                                               } else if(i == 3) { 
+                                                       if(current_ != ' ' && 
current_ != '\t') {
+                                                               break;
+                                                       }
+                                                       ++i;
+                                               } else {
+                                                       if(!found) {
+                                                               if(current_ == 
' ' || current_ == '\t') {
+                                                                       found = 
true;
+                                                                       lineno_ 
= lexical_cast<size_t>(lineno);
+                                                                       file_ = 
"";
+                                                               } else {
+                                                                       lineno 
+= current_;
+                                                               }
+                                                       } else {
+                                                               file_ += 
current_;
+                                                       }
+                                               }
+                                               this->next_char_fast();
+                                       }
+                                       while(current_ != '\n' && current_ != 
EOF) {
+                                               this->next_char_fast();
+                                       }
+                               } else {
+                                       // Neither a textdomain or line comment 
skip it.
+                                       while(current_ != '\n' && current_ != 
EOF) {
+                                               this->next_char_fast();
+                                       }
+                               }
                        }
-               }while (UNLIKELY(current_ == '\r'));
-#if 0
-               // @todo: disabled untill campaign server is fixed
-               if(LIKELY(in_.good())) {
-                       current_ = in_.get();
-                       if (UNLIKELY(current_ == '\r'))
-                       {
-                               // we assume that there is only one '\r'
-                               if(LIKELY(in_.good())) {
-                                       current_ = in_.get();
-                               } else {
-                                       current_ = EOF;
-                               }
-                       }
-               } else {
-                       current_ = EOF;
-               }
-#endif
-       }
-
-       inline int peek_char() const
-       {
-               return in_.peek();
-       }
-
-private:
-       bool is_space(const int c) const;
-       bool is_alnum(const int c) const;
-       void skip_comment();
-
-       std::string textdomain_;
-       std::string file_;
-       size_t tokenstart_lineno_;
-       token token_;
+               }
+
+               std::string textdomain_;
+               std::string file_;
+               token token_;
 #ifdef DEBUG
-       token previous_token_;
-#endif
-       std::istream& in_;
+               token previous_token_;
+#endif
+               std::istream& in_;
 };
 
 #endif

Modified: trunk/src/tests/test_config_cache.cpp
URL: 
http://svn.gna.org/viewcvs/wesnoth/trunk/src/tests/test_config_cache.cpp?rev=29153&r1=29152&r2=29153&view=diff
==============================================================================
--- trunk/src/tests/test_config_cache.cpp (original)
+++ trunk/src/tests/test_config_cache.cpp Mon Sep  1 01:26:05 2008
@@ -291,9 +291,9 @@
 {
        test_scoped_define mp("MULTIPLAYER");
        config cfg_ref;
-       cache.set_force_not_valid_cache(true);
+//     cache.set_force_not_valid_cache(true);
        cache.get_config("data/", cfg_ref);
-       cache.set_force_not_valid_cache(false);
+//     cache.set_force_not_valid_cache(false);
        for (int i=0; i < 3; ++i)
        {
                cache.get_config("data/");


_______________________________________________
Wesnoth-commits mailing list
[email protected]
https://mail.gna.org/listinfo/wesnoth-commits

Reply via email to