Author: suokko
Date: Mon Sep 1 01:26:05 2008
New Revision: 29153
URL: http://svn.gna.org/viewcvs/wesnoth?rev=29153&view=rev
Log:
Optimized parser:
* Made it possible to inline tokenizer because of huge amount of calls
* Changed token struct a bit because it was taking too larg part of next_token
execution time.
Modified:
trunk/src/serialization/parser.cpp
trunk/src/serialization/tokenizer.cpp
trunk/src/serialization/tokenizer.hpp
trunk/src/tests/test_config_cache.cpp
Modified: trunk/src/serialization/parser.cpp
URL:
http://svn.gna.org/viewcvs/wesnoth/trunk/src/serialization/parser.cpp?rev=29153&r1=29152&r2=29153&view=diff
==============================================================================
--- trunk/src/serialization/parser.cpp (original)
+++ trunk/src/serialization/parser.cpp Mon Sep 1 01:26:05 2008
@@ -70,18 +70,20 @@
struct element {
element(config *cfg, std::string
- const &name, std::string const &start_line) :
+ const &name, const size_t& start_line = 0, const
std::string& file="") :
cfg(cfg),
name(name),
last_element_map(),
- start_line(start_line)
+ start_line(start_line),
+ file(file)
{}
config* cfg;
std::string name;
std::map<std::string, config*> last_element_map;
- std::string start_line;
+ size_t start_line;
+ std::string file;
};
std::stack<element> elements;
@@ -105,7 +107,7 @@
void parser::operator()(std::string* error_log)
{
cfg_.clear();
- elements.push(element(&cfg_, "", ""));
+ elements.push(element(&cfg_, ""));
do {
try {
@@ -147,7 +149,9 @@
if(elements.size() != 1) {
utils::string_map i18n_symbols;
i18n_symbols["tag"] = elements.top().name;
- error(lineno_string(i18n_symbols, elements.top().start_line,
+ std::stringstream ss;
+ ss << elements.top().start_line << " " << elements.top().file;
+ error(lineno_string(i18n_symbols, ss.str(),
N_("Missing closing tag for tag $tag at
$pos")));
}
}
@@ -168,7 +172,7 @@
// Add the element
current_element = &(elements.top().cfg->add_child(elname));
elements.top().last_element_map[elname] = current_element;
- elements.push(element(current_element, elname,
tok_->get_line()));
+ elements.push(element(current_element, elname,
tok_->get_start_line(), tok_->get_file()));
break;
case '+': // [+element]
@@ -187,7 +191,7 @@
current_element = last_element_itor->second;
}
elements.top().last_element_map[elname] = current_element;
- elements.push(element(current_element, elname,
tok_->get_line()));
+ elements.push(element(current_element, elname,
tok_->get_start_line(), tok_->get_file()));
break;
case '/': // [/element]
@@ -202,7 +206,9 @@
utils::string_map i18n_symbols;
i18n_symbols["tag"] = elements.top().name;
i18n_symbols["tag2"] = elname;
- error(lineno_string(i18n_symbols,
elements.top().start_line,
+ std::stringstream ss;
+ ss << elements.top().start_line << " " <<
elements.top().file;
+ error(lineno_string(i18n_symbols, ss.str(),
N_("Found invalid closing tag $tag2 for
tag $tag (opened at $pos)")));
}
@@ -287,7 +293,7 @@
// Ignore this
break;
default:
- cfg[*curvar] += tok_->current_token().leading_spaces +
tok_->current_token().value;
+ cfg[*curvar] += tok_->current_token().value;
break;
case token::QSTRING:
cfg[*curvar] += tok_->current_token().value;
@@ -335,14 +341,16 @@
utils::string_map i18n_symbols;
i18n_symbols["error"] = error_type;
i18n_symbols["value"] = tok_->current_token().value;
+ std::stringstream ss;
+ ss << tok_->get_start_line() << " " << tok_->get_file();
#ifdef DEBUG
i18n_symbols["previous_value"] = tok_->previous_token().value;
throw config::error(
- lineno_string(i18n_symbols, tok_->get_line(),
+ lineno_string(i18n_symbols, ss.str(),
N_("$error, value '$value', previous
'$previous_value' at $pos")));
#else
throw config::error(
- lineno_string(i18n_symbols, tok_->get_line(),
+ lineno_string(i18n_symbols, ss.str(),
N_("$error, value '$value' at $pos")));
#endif
}
Modified: trunk/src/serialization/tokenizer.cpp
URL:
http://svn.gna.org/viewcvs/wesnoth/trunk/src/serialization/tokenizer.cpp?rev=29153&r1=29152&r2=29153&view=diff
==============================================================================
--- trunk/src/serialization/tokenizer.cpp (original)
+++ trunk/src/serialization/tokenizer.cpp Mon Sep 1 01:26:05 2008
@@ -27,175 +27,13 @@
tokenizer::tokenizer(std::istream& in) :
current_(EOF),
lineno_(1),
+ startlineno_(0),
textdomain_(),
file_(),
- tokenstart_lineno_(),
token_(),
in_(in)
{
next_char_fast();
-}
-
-void tokenizer::skip_comment()
-{
- next_char_fast();
- if(current_ != '\n' && current_ != EOF) {
- if(current_ == 't') {
- // When the string 'textdomain[ |\t] is matched the
rest of the line is
- // the textdomain to switch to. If we at any point fail
to match we break
- // out of the loop and eat the rest of the line without
testing.
- size_t i = 0;
- static const std::string match = "extdomain";
- this->next_char_fast();
- while(current_ != '\n' && current_ != EOF) {
- if(i < 9) {
- if(current_ != match[i]) {
- break;
- }
- ++i;
- } else if(i == 9) {
- if(current_ != ' ' && current_ != '\t')
{
- break;
- }
- ++i;
- textdomain_ = "";
- } else {
- textdomain_ += current_;
- }
- this->next_char_fast();
- }
- while(current_ != '\n' && current_ != EOF) {
- this->next_char_fast();
- }
-
- } else if(current_ == 'l') {
- // Basically the same as textdomain but we match 'line[
|\t]d*[ |\t]s*
- // d* is the line number
- // s* is the file name
- // It inherited the * instead of + from the previous
implementation.
- size_t i = 0;
- static const std::string match = "ine";
- this->next_char_fast();
- bool found = false;
- std::string lineno;
- while(current_ != '\n' && current_ != EOF) {
- if(i < 3) {
- if(current_ != match[i]) {
- break;
- }
- ++i;
- } else if(i == 3) {
- if(current_ != ' ' && current_ != '\t')
{
- break;
- }
- ++i;
- } else {
- if(!found) {
- if(current_ == ' ' || current_
== '\t') {
- found = true;
- lineno_ =
lexical_cast<size_t>(lineno);
- file_ = "";
- } else {
- lineno += current_;
- }
- } else {
- file_ += current_;
- }
- }
- this->next_char_fast();
- }
- while(current_ != '\n' && current_ != EOF) {
- this->next_char_fast();
- }
- } else {
- // Neither a textdomain or line comment skip it.
- while(current_ != '\n' && current_ != EOF) {
- this->next_char_fast();
- }
- }
- }
-}
-
-const token& tokenizer::next_token()
-{
-#if DEBUG
- previous_token_ = token_;
-#endif
- token_.reset();
-
- // Dump spaces and inlined comments
- for(;;) {
- while (is_space(current_)) {
- token_.leading_spaces += current_;
- next_char_fast();
- }
- if (current_ != 254)
- break;
- skip_comment();
- --lineno_;
- next_char();
- }
-
- if (current_ == '#')
- skip_comment();
-
- tokenstart_lineno_ = lineno_;
-
- switch(current_) {
- case EOF:
- token_.type = token::END;
- break;
- case '"':
- token_.type = token::QSTRING;
- while (1) {
- next_char();
-
- if(current_ == EOF) {
- token_.type = token::UNTERMINATED_QSTRING;
- break;
- }
- if(current_ == '"' && peek_char() != '"')
- break;
- if(current_ == '"' && peek_char() == '"')
- next_char_fast();
- if (current_ == 254 ) {
- skip_comment();
- --lineno_;
- continue;
- }
-
- token_.value += current_;
- };
- break;
- case '[': case ']': case '/': case '\n': case '=': case ',': case '+':
- token_.type = token::token_type(current_);
- token_.value = current_;
- break;
- default:
- if(is_alnum(current_)) {
- token_.type = token::STRING;
- token_.value += current_;
- while(is_alnum(peek_char())) {
- next_char_fast();
- token_.value += current_;
- }
- } else {
- token_.type = token::MISC;
- token_.value += current_;
- }
- if(token_.value == "_")
- token_.type = token::token_type('_');
- }
-
- if(current_ != EOF)
- next_char();
-
- return token_;
-}
-
-const token& tokenizer::current_token() const
-{
- return token_;
}
#ifdef DEBUG
@@ -205,28 +43,3 @@
}
#endif
-bool tokenizer::is_space(const int c) const
-{
- return c == ' ' || c == '\t';
-}
-
-bool tokenizer::is_alnum(const int c) const
-{
- return (c >= 'a' && c <= 'z')
- || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
-}
-
-std::string tokenizer::get_line() const
-{
- std::ostringstream s;
- s << tokenstart_lineno_ << ' ' << file_;
- return s.str();
-}
-
-std::string& tokenizer::textdomain()
-{
- return textdomain_;
-}
-
-
-
Modified: trunk/src/serialization/tokenizer.hpp
URL:
http://svn.gna.org/viewcvs/wesnoth/trunk/src/serialization/tokenizer.hpp?rev=29153&r1=29152&r2=29153&view=diff
==============================================================================
--- trunk/src/serialization/tokenizer.hpp (original)
+++ trunk/src/serialization/tokenizer.hpp Mon Sep 1 01:26:05 2008
@@ -29,7 +29,6 @@
{
token() :
type(END),
- leading_spaces(),
value()
{}
@@ -51,91 +50,272 @@
} type;
void reset() {
- leading_spaces = "";
- value = "";
+ value.clear();
}
- std::string leading_spaces;
std::string value;
};
//! Abstract baseclass for the tokenizer
class tokenizer
{
-public:
- tokenizer(std::istream& in);
- ~tokenizer() {}
-
- const token& next_token();
- const token& current_token() const;
+ public:
+ tokenizer(std::istream& in);
+ ~tokenizer() {}
+
+ const token& next_token()
+ {
+#if DEBUG
+ previous_token_ = token_;
+#endif
+ token_.reset();
+
+ // Dump spaces and inlined comments
+ for(;;) {
+ while (is_space(current_)) {
+ next_char_fast();
+ }
+ if (current_ != 254)
+ break;
+ skip_comment();
+ // skip the line end
+ next_char_fast();
+ }
+
+ if (current_ == '#')
+ skip_comment();
+
+ startlineno_ = lineno_;
+
+ switch(current_) {
+ case EOF:
+ token_.type = token::END;
+ break;
+ case '"':
+ token_.type = token::QSTRING;
+ while (1) {
+ next_char();
+
+ if(current_ == EOF) {
+ token_.type =
token::UNTERMINATED_QSTRING;
+ break;
+ }
+ if(current_ == '"' &&
peek_char() != '"')
+ break;
+ if(current_ == '"' &&
peek_char() == '"')
+ next_char_fast();
+ if (current_ == 254 ) {
+ skip_comment();
+ --lineno_;
+ continue;
+ }
+
+ token_.value += current_;
+ };
+ break;
+ case '[': case ']': case '/': case '\n': case
'=': case ',': case '+':
+ token_.type =
token::token_type(current_);
+ token_.value = current_;
+ break;
+ default:
+ if(is_alnum(current_)) {
+ token_.type = token::STRING;
+ do {
+ token_.value +=
current_;
+ next_char_fast();
+ } while (is_alnum(current_));
+ } else {
+ token_.type = token::MISC;
+ token_.value += current_;
+ next_char();
+ }
+ check_translatable();
+ return token_;
+ }
+
+ if(current_ != EOF)
+ next_char();
+
+ return token_;
+ }
+
+ void check_translatable()
+ {
+ if(token_.value == "_")
+ token_.type = token::token_type('_');
+ }
+
+
+ const token& current_token() const
+ {
+ return token_;
+ }
#ifdef DEBUG
- const token& previous_token() const;
-#endif
- std::string get_line() const;
- std::string& textdomain();
-
-protected:
- tokenizer();
- int current_;
- size_t lineno_;
-
- inline void next_char()
- {
- if (UNLIKELY(current_ == '\n'))
- lineno_++;
- this->next_char_fast();
- }
-
- inline void next_char_fast()
- {
- do {
- if (LIKELY(in_.good()))
- {
+ const token& previous_token() const;
+#endif
+
+ std::string& textdomain()
+ {
+ return textdomain_;
+ }
+
+ const std::string& get_file() const
+ {
+ return file_;
+ }
+
+ const int get_start_line() const
+ {
+ return startlineno_;
+ }
+
+ protected:
+ tokenizer();
+ int current_;
+ size_t lineno_;
+ size_t startlineno_;
+
+ inline void next_char()
+ {
+ if (UNLIKELY(current_ == '\n'))
+ lineno_++;
+ this->next_char_fast();
+ }
+
+ inline void next_char_fast()
+ {
+ do {
+ if (LIKELY(in_.good()))
+ {
+ current_ = in_.get();
+ }
+ else
+ {
+ current_ = EOF;
+ return;
+ }
+ }while (UNLIKELY(current_ == '\r'));
+#if 0
+ // @todo: disabled untill campaign server is fixed
+ if(LIKELY(in_.good())) {
current_ = in_.get();
+ if (UNLIKELY(current_ == '\r'))
+ {
+ // we assume that there is only one '\r'
+ if(LIKELY(in_.good())) {
+ current_ = in_.get();
+ } else {
+ current_ = EOF;
+ }
+ }
+ } else {
+ current_ = EOF;
}
- else
- {
- current_ = EOF;
- return;
+#endif
+ }
+
+ inline int peek_char() const
+ {
+ return in_.peek();
+ }
+
+ private:
+ bool is_space(const int c) const
+ {
+ return c == ' ' || c == '\t';
+ }
+ bool is_alnum(const int c) const
+ {
+ return (c >= 'a' && c <= 'z')
+ || (c >= 'A' && c <= 'Z') || (c >= '0' && c <=
'9') || c == '_';
+ }
+ void skip_comment()
+ {
+ next_char_fast();
+ if(current_ != '\n' && current_ != EOF) {
+ if(current_ == 't') {
+ // When the string 'textdomain[ |\t] is
matched the rest of the line is
+ // the textdomain to switch to. If we
at any point fail to match we break
+ // out of the loop and eat the rest of
the line without testing.
+ size_t i = 0;
+ static const std::string match =
"extdomain";
+ this->next_char_fast();
+ while(current_ != '\n' && current_ !=
EOF) {
+ if(i < 9) {
+ if(current_ !=
match[i]) {
+ break;
+ }
+ ++i;
+ } else if(i == 9) {
+ if(current_ != ' ' &&
current_ != '\t') {
+ break;
+ }
+ ++i;
+ textdomain_ = "";
+ } else {
+ textdomain_ += current_;
+ }
+ this->next_char_fast();
+ }
+ while(current_ != '\n' && current_ !=
EOF) {
+ this->next_char_fast();
+ }
+
+ } else if(current_ == 'l') {
+ // Basically the same as textdomain but
we match 'line[ |\t]d*[ |\t]s*
+ // d* is the line number
+ // s* is the file name
+ // It inherited the * instead of + from
the previous implementation.
+ size_t i = 0;
+ static const std::string match = "ine";
+ this->next_char_fast();
+ bool found = false;
+ std::string lineno;
+ while(current_ != '\n' && current_ !=
EOF) {
+ if(i < 3) {
+ if(current_ !=
match[i]) {
+ break;
+ }
+ ++i;
+ } else if(i == 3) {
+ if(current_ != ' ' &&
current_ != '\t') {
+ break;
+ }
+ ++i;
+ } else {
+ if(!found) {
+ if(current_ ==
' ' || current_ == '\t') {
+ found =
true;
+ lineno_
= lexical_cast<size_t>(lineno);
+ file_ =
"";
+ } else {
+ lineno
+= current_;
+ }
+ } else {
+ file_ +=
current_;
+ }
+ }
+ this->next_char_fast();
+ }
+ while(current_ != '\n' && current_ !=
EOF) {
+ this->next_char_fast();
+ }
+ } else {
+ // Neither a textdomain or line comment
skip it.
+ while(current_ != '\n' && current_ !=
EOF) {
+ this->next_char_fast();
+ }
+ }
}
- }while (UNLIKELY(current_ == '\r'));
-#if 0
- // @todo: disabled untill campaign server is fixed
- if(LIKELY(in_.good())) {
- current_ = in_.get();
- if (UNLIKELY(current_ == '\r'))
- {
- // we assume that there is only one '\r'
- if(LIKELY(in_.good())) {
- current_ = in_.get();
- } else {
- current_ = EOF;
- }
- }
- } else {
- current_ = EOF;
- }
-#endif
- }
-
- inline int peek_char() const
- {
- return in_.peek();
- }
-
-private:
- bool is_space(const int c) const;
- bool is_alnum(const int c) const;
- void skip_comment();
-
- std::string textdomain_;
- std::string file_;
- size_t tokenstart_lineno_;
- token token_;
+ }
+
+ std::string textdomain_;
+ std::string file_;
+ token token_;
#ifdef DEBUG
- token previous_token_;
-#endif
- std::istream& in_;
+ token previous_token_;
+#endif
+ std::istream& in_;
};
#endif
Modified: trunk/src/tests/test_config_cache.cpp
URL:
http://svn.gna.org/viewcvs/wesnoth/trunk/src/tests/test_config_cache.cpp?rev=29153&r1=29152&r2=29153&view=diff
==============================================================================
--- trunk/src/tests/test_config_cache.cpp (original)
+++ trunk/src/tests/test_config_cache.cpp Mon Sep 1 01:26:05 2008
@@ -291,9 +291,9 @@
{
test_scoped_define mp("MULTIPLAYER");
config cfg_ref;
- cache.set_force_not_valid_cache(true);
+// cache.set_force_not_valid_cache(true);
cache.get_config("data/", cfg_ref);
- cache.set_force_not_valid_cache(false);
+// cache.set_force_not_valid_cache(false);
for (int i=0; i < 3; ++i)
{
cache.get_config("data/");
_______________________________________________
Wesnoth-commits mailing list
[email protected]
https://mail.gna.org/listinfo/wesnoth-commits