Re: parsing bibtex using gscanner
On 02/18/2013 11:15 AM, Rudra Banerjee wrote: On Mon, 2013-02-18 at 19:02 +0100, David Nečas wrote: The best approach to parse a grammar is, you know, using a parser. So anything better then bison? You can write your own parser if you want. Maybe a recursive-descent parser. You will want to study up on parsers. There are different kinds of parsers (recursive-descent, table-driven which bison is) for different kinds of grammars. You might not want to use a grammar at all. Why not use a dedicated bibtex-parsing library: https://www.google.com/search?q=bibtex+c+library ___ gtk-app-devel-list mailing list gtk-app-devel-list@gnome.org https://mail.gnome.org/mailman/listinfo/gtk-app-devel-list
parsing bibtex using gscanner
Dear Friends, I am trying to parse a bibtex file using gscanner. The problem is that, due to many formats accepted by bibtex, it seems bit hard to parse it. What I mean is as long as the bibtex is of the form key=some value, then g_scanner_get_next_token can get the string. But it fails if it is in the format key={value}. I am attaching my code. Some help (outside using btparse/ bison )is needed. guint parse_entry (GScanner *scanner, GHashTable *table) { int tokount; /* Entry starts with @ */ g_scanner_get_next_token (scanner); if (scanner-token != '@') return G_TOKEN_ERROR; /* Now get identifier */ g_scanner_get_next_token (scanner); if (scanner-token != G_TOKEN_IDENTIFIER) return G_TOKEN_ERROR; g_hash_table_insert (table, g_strdup (type), g_strdup (scanner-value.v_identifier)); /* Brace */ g_scanner_get_next_token (scanner); if (scanner-token != G_TOKEN_LEFT_CURLY){ return G_TOKEN_ERROR;} else tokount += tokount; /* ID */ g_scanner_get_next_token (scanner); if (scanner-token != G_TOKEN_IDENTIFIER) return G_TOKEN_ERROR; g_hash_table_insert (table, g_strdup (id), g_strdup (scanner-value.v_identifier)); while (TRUE) { char *key, *val; g_scanner_get_next_token (scanner); if (scanner-token != G_TOKEN_COMMA) return G_TOKEN_ERROR; g_scanner_get_next_token (scanner); if (scanner-token != G_TOKEN_IDENTIFIER) return G_TOKEN_ERROR; key = g_strdup (scanner-value.v_identifier); /*g_scanner_peek_next_token (scanner); if (scanner-token == G_TOKEN_LEFT_CURLY) tokount += tokount;*/ g_scanner_get_next_token (scanner); if (scanner-token != '=') { g_free (key); return G_TOKEN_ERROR; } g_scanner_get_next_token (scanner); if (scanner-token != G_TOKEN_STRING) { g_free (key); return G_TOKEN_ERROR; } val = g_strdup (scanner-value.v_string); g_hash_table_insert(table, key, val); g_scanner_peek_next_token (scanner); if (scanner-next_token == G_TOKEN_RIGHT_CURLY) break; } g_scanner_get_next_token (scanner); return G_TOKEN_NONE; } ___ gtk-app-devel-list mailing list gtk-app-devel-list@gnome.org https://mail.gnome.org/mailman/listinfo/gtk-app-devel-list
Re: parsing bibtex using gscanner
Below is a minimal example. One can check the problem with key={some value} by changing, say, author = \Chowdhury, D.\,\n to author = {Chowdhury, D.},\n This can be compiled as gcc -Wall `pkg-config --cflags --libs gtk+-3.0` glex.c /*glex.c*/ #include glib.h #include string.h /* Test data */ static const gchar *ttest = @phdthesis{chow1983thesis,\n author = \Chowdhury, D.\,\n institution = \Department of Physics, IIT, Kanpur\,\n location = \Kanpur\,\n publisher = \Department of Physics, IIT, Kanpur\,\n school = \Department of Physics, IIT, Kanpur\,\n title = \{The Spin Glass Transition}\,\n year = \1983\\n }; static void output_entry (GHashTable *table) { GHashTableIter iter; char *key, *val; g_print (Citation entry:\n); g_hash_table_iter_init (iter, table); while (g_hash_table_iter_next (iter, (void **)key, (void **)val)) g_print ( %16s: %s\n, key, val); g_print (\n); } static guint parse_entry (GScanner *scanner, GHashTable *table) { /* Entry starts with @ */ g_scanner_get_next_token (scanner); if (scanner-token != '@') return G_TOKEN_ERROR; /* Now get identifier */ g_scanner_get_next_token (scanner); if (scanner-token != G_TOKEN_IDENTIFIER) return G_TOKEN_ERROR; g_hash_table_insert (table, g_strdup (type), g_strdup (scanner-value.v_identifier)); /* Brace */ g_scanner_get_next_token (scanner); if (scanner-token != G_TOKEN_LEFT_CURLY) return G_TOKEN_ERROR; /* ID */ g_scanner_get_next_token (scanner); if (scanner-token != G_TOKEN_IDENTIFIER) return G_TOKEN_ERROR; g_hash_table_insert (table, g_strdup (id), g_strdup (scanner-value.v_identifier)); while (TRUE) { char *key, *val; g_scanner_get_next_token (scanner); if (scanner-token != G_TOKEN_COMMA) return G_TOKEN_ERROR; g_scanner_get_next_token (scanner); if (scanner-token != G_TOKEN_IDENTIFIER) return G_TOKEN_ERROR; key = g_strdup (scanner-value.v_identifier); g_scanner_get_next_token (scanner); if (scanner-token != '=') { g_free (key); return G_TOKEN_ERROR; } g_scanner_get_next_token (scanner); if (scanner-token != G_TOKEN_STRING) { g_free (key); return G_TOKEN_ERROR; } val = g_strdup (scanner-value.v_string); g_hash_table_insert(table, key, val); g_scanner_peek_next_token (scanner); if (scanner-next_token == G_TOKEN_RIGHT_CURLY) break; } /* Eat last curly brace and return */ g_scanner_get_next_token (scanner); return G_TOKEN_NONE; } int main (intargc, char **argv) { GScanner *scanner; GHashTable *table; guint ret; scanner = g_scanner_new (NULL); g_scanner_input_text (scanner, ttest, strlen (ttest)); table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free); do { g_hash_table_remove_all (table); ret = parse_entry (scanner, table); if (ret == G_TOKEN_ERROR) break; else output_entry (table); g_scanner_peek_next_token (scanner); } while (scanner-next_token != G_TOKEN_EOF scanner-next_token != G_TOKEN_ERROR); /* finsish parsing */ g_scanner_destroy (scanner); g_hash_table_destroy (table); return 0; } ___ gtk-app-devel-list mailing list gtk-app-devel-list@gnome.org https://mail.gnome.org/mailman/listinfo/gtk-app-devel-list
Re: parsing bibtex using gscanner
On Mon, Feb 18, 2013 at 05:15:56PM +, Rudra Banerjee wrote: I am trying to parse a bibtex file using gscanner. The problem is that, due to many formats accepted by bibtex, it seems bit hard to parse it. What I mean is as long as the bibtex is of the form key=some value, then g_scanner_get_next_token can get the string. But it fails if it is in the format key={value}. And it fails even before escaping literal TeX code using braces within entries or string macros have come to play... I am attaching my code. Some help (outside using btparse/ bison )is needed. Don't do it this way. GScanner is a lexical scanner, it just tokenizes the input but it does not help with grammar. The best approach to parse a grammar is, you know, using a parser. If you insist on writing one manually realise that you need to formally keep state, eg. the nesting level of braces at which you are now, etc. Construct the parser similarly you would if you did if you just wrote the BNF and let the parser be generared, e.g. write subroutines to parse balanced braces, string, etc. possibly recusrively calling each other. Attempting to write code for all the cases that can occur using sequences of hardcoded ifs will only result in buggy mess. You have been warned. Yeti ___ gtk-app-devel-list mailing list gtk-app-devel-list@gnome.org https://mail.gnome.org/mailman/listinfo/gtk-app-devel-list
Re: parsing bibtex using gscanner
I forget the acknowledgment: http://www.gtkforums.com/viewtopic.php?f=3t=178159 On Mon, 2013-02-18 at 18:00 +, Rudra Banerjee wrote: Below is a minimal example. One can check the problem with key={some value} by changing, say, author = \Chowdhury, D.\,\n to author = {Chowdhury, D.},\n This can be compiled as gcc -Wall `pkg-config --cflags --libs gtk+-3.0` glex.c /*glex.c*/ #include glib.h #include string.h /* Test data */ static const gchar *ttest = @phdthesis{chow1983thesis,\n author = \Chowdhury, D.\,\n institution = \Department of Physics, IIT, Kanpur\,\n location = \Kanpur\,\n publisher = \Department of Physics, IIT, Kanpur\,\n school = \Department of Physics, IIT, Kanpur\,\n title = \{The Spin Glass Transition}\,\n year = \1983\\n }; static void output_entry (GHashTable *table) { GHashTableIter iter; char *key, *val; g_print (Citation entry:\n); g_hash_table_iter_init (iter, table); while (g_hash_table_iter_next (iter, (void **)key, (void **)val)) g_print ( %16s: %s\n, key, val); g_print (\n); } static guint parse_entry (GScanner *scanner, GHashTable *table) { /* Entry starts with @ */ g_scanner_get_next_token (scanner); if (scanner-token != '@') return G_TOKEN_ERROR; /* Now get identifier */ g_scanner_get_next_token (scanner); if (scanner-token != G_TOKEN_IDENTIFIER) return G_TOKEN_ERROR; g_hash_table_insert (table, g_strdup (type), g_strdup (scanner-value.v_identifier)); /* Brace */ g_scanner_get_next_token (scanner); if (scanner-token != G_TOKEN_LEFT_CURLY) return G_TOKEN_ERROR; /* ID */ g_scanner_get_next_token (scanner); if (scanner-token != G_TOKEN_IDENTIFIER) return G_TOKEN_ERROR; g_hash_table_insert (table, g_strdup (id), g_strdup (scanner-value.v_identifier)); while (TRUE) { char *key, *val; g_scanner_get_next_token (scanner); if (scanner-token != G_TOKEN_COMMA) return G_TOKEN_ERROR; g_scanner_get_next_token (scanner); if (scanner-token != G_TOKEN_IDENTIFIER) return G_TOKEN_ERROR; key = g_strdup (scanner-value.v_identifier); g_scanner_get_next_token (scanner); if (scanner-token != '=') { g_free (key); return G_TOKEN_ERROR; } g_scanner_get_next_token (scanner); if (scanner-token != G_TOKEN_STRING) { g_free (key); return G_TOKEN_ERROR; } val = g_strdup (scanner-value.v_string); g_hash_table_insert(table, key, val); g_scanner_peek_next_token (scanner); if (scanner-next_token == G_TOKEN_RIGHT_CURLY) break; } /* Eat last curly brace and return */ g_scanner_get_next_token (scanner); return G_TOKEN_NONE; } int main (intargc, char **argv) { GScanner *scanner; GHashTable *table; guint ret; scanner = g_scanner_new (NULL); g_scanner_input_text (scanner, ttest, strlen (ttest)); table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free); do { g_hash_table_remove_all (table); ret = parse_entry (scanner, table); if (ret == G_TOKEN_ERROR) break; else output_entry (table); g_scanner_peek_next_token (scanner); } while (scanner-next_token != G_TOKEN_EOF scanner-next_token != G_TOKEN_ERROR); /* finsish parsing */ g_scanner_destroy (scanner); g_hash_table_destroy (table); return 0; } ___ gtk-app-devel-list mailing list gtk-app-devel-list@gnome.org https://mail.gnome.org/mailman/listinfo/gtk-app-devel-list ___ gtk-app-devel-list mailing list gtk-app-devel-list@gnome.org https://mail.gnome.org/mailman/listinfo/gtk-app-devel-list
Re: parsing bibtex using gscanner
On Mon, 2013-02-18 at 19:02 +0100, David Nečas wrote: The best approach to parse a grammar is, you know, using a parser. So anything better then bison? ___ gtk-app-devel-list mailing list gtk-app-devel-list@gnome.org https://mail.gnome.org/mailman/listinfo/gtk-app-devel-list
Re: parsing bibtex using gscanner
On Mon, 2013-02-18 at 19:02 +0100, David Nečas wrote: The best approach to parse a grammar is, you know, using a parser. So is there any better option then bison? ___ gtk-app-devel-list mailing list gtk-app-devel-list@gnome.org https://mail.gnome.org/mailman/listinfo/gtk-app-devel-list