Re: parsing bibtex using gscanner

2013-02-21 Thread Michael Torrie
On 02/18/2013 11:15 AM, Rudra Banerjee wrote:
 On Mon, 2013-02-18 at 19:02 +0100, David Nečas wrote:
 The best approach to parse a grammar is, you know, using a parser.
 So anything better then bison?

You can write your own parser if you want. Maybe a recursive-descent
parser.  You will want to study up on parsers. There are different kinds
of parsers (recursive-descent, table-driven which bison is) for
different kinds of grammars.

You might not want to use a grammar at all. Why not use a dedicated
bibtex-parsing library:

https://www.google.com/search?q=bibtex+c+library
___
gtk-app-devel-list mailing list
gtk-app-devel-list@gnome.org
https://mail.gnome.org/mailman/listinfo/gtk-app-devel-list

parsing bibtex using gscanner

2013-02-18 Thread Rudra Banerjee
Dear Friends,
I am trying to parse a bibtex file using gscanner.
The problem is that, due to many formats accepted by bibtex, it seems
bit hard to parse it.
What I mean is as long as the bibtex is of the form key=some value,
then g_scanner_get_next_token can get the string.
But it fails if it is in the format key={value}.

I am attaching my code. Some help (outside using btparse/ bison )is
needed.
guint parse_entry (GScanner   *scanner,
GHashTable *table)
{
  int tokount;
  /* Entry starts with @ */
  g_scanner_get_next_token (scanner);
  if (scanner-token != '@')
return G_TOKEN_ERROR;

  /* Now get identifier */
  g_scanner_get_next_token (scanner);
  if (scanner-token != G_TOKEN_IDENTIFIER)
return G_TOKEN_ERROR;

  g_hash_table_insert (table, g_strdup (type),
  g_strdup (scanner-value.v_identifier));

  /* Brace */
  g_scanner_get_next_token (scanner);
  if (scanner-token != G_TOKEN_LEFT_CURLY){
return G_TOKEN_ERROR;}
  else
tokount += tokount;

  /* ID */
  g_scanner_get_next_token (scanner);
  if (scanner-token != G_TOKEN_IDENTIFIER)
return G_TOKEN_ERROR;

  g_hash_table_insert (table, g_strdup (id),
  g_strdup (scanner-value.v_identifier));

  while (TRUE)
  {
char *key, *val;

g_scanner_get_next_token (scanner);
if (scanner-token != G_TOKEN_COMMA)
  return G_TOKEN_ERROR;


g_scanner_get_next_token (scanner);
if (scanner-token != G_TOKEN_IDENTIFIER)
  return G_TOKEN_ERROR;

key = g_strdup (scanner-value.v_identifier);

/*g_scanner_peek_next_token (scanner);
if (scanner-token == G_TOKEN_LEFT_CURLY)
  tokount += tokount;*/

g_scanner_get_next_token (scanner);
if (scanner-token != '=')
{
  g_free (key);
  return G_TOKEN_ERROR;
}

g_scanner_get_next_token (scanner);
if (scanner-token != G_TOKEN_STRING)
{
  g_free (key);
  return G_TOKEN_ERROR;
}

val = g_strdup (scanner-value.v_string);
g_hash_table_insert(table, key, val);

g_scanner_peek_next_token (scanner);
if (scanner-next_token == G_TOKEN_RIGHT_CURLY)
  break;
  }

  g_scanner_get_next_token (scanner);
  return G_TOKEN_NONE;
}


___
gtk-app-devel-list mailing list
gtk-app-devel-list@gnome.org
https://mail.gnome.org/mailman/listinfo/gtk-app-devel-list


Re: parsing bibtex using gscanner

2013-02-18 Thread Rudra Banerjee
Below is a minimal example.
One  can check the problem with key={some value} by changing, say, 
author = \Chowdhury, D.\,\n
to 
author = {Chowdhury, D.},\n

This can be compiled as
gcc -Wall `pkg-config --cflags --libs gtk+-3.0` glex.c

/*glex.c*/
#include glib.h
#include string.h

/* Test data */
static const gchar *ttest = @phdthesis{chow1983thesis,\n
author = \Chowdhury, D.\,\n
institution = \Department of Physics, IIT,
Kanpur\,\n
location = \Kanpur\,\n
publisher = \Department of Physics, IIT,
Kanpur\,\n
school = \Department of Physics, IIT,
Kanpur\,\n
title = \{The Spin Glass Transition}\,\n
year = \1983\\n
};


static void
output_entry (GHashTable *table)
{
  GHashTableIter iter;
  char *key, *val;

  g_print (Citation entry:\n);
  g_hash_table_iter_init (iter, table);
  while (g_hash_table_iter_next (iter, (void **)key, (void **)val))
g_print (  %16s: %s\n, key, val);
  g_print (\n);
}

static guint
parse_entry (GScanner   *scanner,
 GHashTable *table)
{
  /* Entry starts with @ */
  g_scanner_get_next_token (scanner);
  if (scanner-token != '@')
return G_TOKEN_ERROR;

  /* Now get identifier */
  g_scanner_get_next_token (scanner);
  if (scanner-token != G_TOKEN_IDENTIFIER)
return G_TOKEN_ERROR;

  g_hash_table_insert (table, g_strdup (type),
   g_strdup (scanner-value.v_identifier));

  /* Brace */
  g_scanner_get_next_token (scanner);
  if (scanner-token != G_TOKEN_LEFT_CURLY)
return G_TOKEN_ERROR;

  /* ID */
  g_scanner_get_next_token (scanner);
  if (scanner-token != G_TOKEN_IDENTIFIER)
return G_TOKEN_ERROR;

  g_hash_table_insert (table, g_strdup (id),
   g_strdup (scanner-value.v_identifier));

  while (TRUE)
{
  char *key, *val;

  g_scanner_get_next_token (scanner);
  if (scanner-token != G_TOKEN_COMMA)
return G_TOKEN_ERROR;

  g_scanner_get_next_token (scanner);
  if (scanner-token != G_TOKEN_IDENTIFIER)
return G_TOKEN_ERROR;

  key = g_strdup (scanner-value.v_identifier);

  g_scanner_get_next_token (scanner);
  if (scanner-token != '=')
{
  g_free (key);
  return G_TOKEN_ERROR;
}

  g_scanner_get_next_token (scanner);
  if (scanner-token != G_TOKEN_STRING)
{
  g_free (key);
  return G_TOKEN_ERROR;
}

  val = g_strdup (scanner-value.v_string);
  g_hash_table_insert(table, key, val);

  g_scanner_peek_next_token (scanner);
  if (scanner-next_token == G_TOKEN_RIGHT_CURLY)
break;
}

  /* Eat last curly brace and return */
  g_scanner_get_next_token (scanner);
  return G_TOKEN_NONE;
}


int
main (intargc,
  char **argv)
{
  GScanner *scanner;
  GHashTable *table;
  guint ret;

  scanner = g_scanner_new (NULL);
  g_scanner_input_text (scanner, ttest, strlen (ttest));

  table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free,
g_free);
  do
{
  g_hash_table_remove_all (table);
  ret = parse_entry (scanner, table);

  if (ret == G_TOKEN_ERROR)
break;
  else
output_entry (table);

  g_scanner_peek_next_token (scanner);
}
  while (scanner-next_token != G_TOKEN_EOF 
 scanner-next_token != G_TOKEN_ERROR);

  /* finsish parsing */
  g_scanner_destroy (scanner);
  g_hash_table_destroy (table);

  return 0;
}




___
gtk-app-devel-list mailing list
gtk-app-devel-list@gnome.org
https://mail.gnome.org/mailman/listinfo/gtk-app-devel-list


Re: parsing bibtex using gscanner

2013-02-18 Thread David Nečas
On Mon, Feb 18, 2013 at 05:15:56PM +, Rudra Banerjee wrote:
 I am trying to parse a bibtex file using gscanner.
 The problem is that, due to many formats accepted by bibtex, it seems
 bit hard to parse it.
 What I mean is as long as the bibtex is of the form key=some value,
 then g_scanner_get_next_token can get the string.
 But it fails if it is in the format key={value}.

And it fails even before escaping literal TeX code using braces within
entries or string macros have come to play...

 I am attaching my code. Some help (outside using btparse/ bison )is
 needed.

Don't do it this way.  GScanner is a lexical scanner, it just tokenizes
the input but it does not help with grammar.

The best approach to parse a grammar is, you know, using a parser.

If you insist on writing one manually realise that you need to formally
keep state, eg. the nesting level of braces at which you are now, etc.
Construct the parser similarly you would if you did if you just wrote
the BNF and let the parser be generared, e.g. write subroutines to parse
balanced braces, string, etc. possibly recusrively calling each other.

Attempting to write code for all the cases that can occur using
sequences of hardcoded ifs will only result in buggy mess.  You have
been warned.

Yeti

___
gtk-app-devel-list mailing list
gtk-app-devel-list@gnome.org
https://mail.gnome.org/mailman/listinfo/gtk-app-devel-list


Re: parsing bibtex using gscanner

2013-02-18 Thread Rudra Banerjee
I forget the acknowledgment:
http://www.gtkforums.com/viewtopic.php?f=3t=178159
On Mon, 2013-02-18 at 18:00 +, Rudra Banerjee wrote:
 Below is a minimal example.
 One  can check the problem with key={some value} by changing, say, 
 author = \Chowdhury, D.\,\n
 to 
 author = {Chowdhury, D.},\n
 
 This can be compiled as
 gcc -Wall `pkg-config --cflags --libs gtk+-3.0` glex.c
 
 /*glex.c*/
 #include glib.h
 #include string.h
 
 /* Test data */
 static const gchar *ttest = @phdthesis{chow1983thesis,\n
 author = \Chowdhury, D.\,\n
 institution = \Department of Physics, IIT,
 Kanpur\,\n
 location = \Kanpur\,\n
 publisher = \Department of Physics, IIT,
 Kanpur\,\n
 school = \Department of Physics, IIT,
 Kanpur\,\n
 title = \{The Spin Glass Transition}\,\n
 year = \1983\\n
 };
 
 
 static void
 output_entry (GHashTable *table)
 {
   GHashTableIter iter;
   char *key, *val;
 
   g_print (Citation entry:\n);
   g_hash_table_iter_init (iter, table);
   while (g_hash_table_iter_next (iter, (void **)key, (void **)val))
 g_print (  %16s: %s\n, key, val);
   g_print (\n);
 }
 
 static guint
 parse_entry (GScanner   *scanner,
  GHashTable *table)
 {
   /* Entry starts with @ */
   g_scanner_get_next_token (scanner);
   if (scanner-token != '@')
 return G_TOKEN_ERROR;
 
   /* Now get identifier */
   g_scanner_get_next_token (scanner);
   if (scanner-token != G_TOKEN_IDENTIFIER)
 return G_TOKEN_ERROR;
 
   g_hash_table_insert (table, g_strdup (type),
g_strdup (scanner-value.v_identifier));
 
   /* Brace */
   g_scanner_get_next_token (scanner);
   if (scanner-token != G_TOKEN_LEFT_CURLY)
 return G_TOKEN_ERROR;
 
   /* ID */
   g_scanner_get_next_token (scanner);
   if (scanner-token != G_TOKEN_IDENTIFIER)
 return G_TOKEN_ERROR;
 
   g_hash_table_insert (table, g_strdup (id),
g_strdup (scanner-value.v_identifier));
 
   while (TRUE)
 {
   char *key, *val;
 
   g_scanner_get_next_token (scanner);
   if (scanner-token != G_TOKEN_COMMA)
 return G_TOKEN_ERROR;
 
   g_scanner_get_next_token (scanner);
   if (scanner-token != G_TOKEN_IDENTIFIER)
 return G_TOKEN_ERROR;
 
   key = g_strdup (scanner-value.v_identifier);
 
   g_scanner_get_next_token (scanner);
   if (scanner-token != '=')
 {
   g_free (key);
   return G_TOKEN_ERROR;
 }
 
   g_scanner_get_next_token (scanner);
   if (scanner-token != G_TOKEN_STRING)
 {
   g_free (key);
   return G_TOKEN_ERROR;
 }
 
   val = g_strdup (scanner-value.v_string);
   g_hash_table_insert(table, key, val);
 
   g_scanner_peek_next_token (scanner);
   if (scanner-next_token == G_TOKEN_RIGHT_CURLY)
 break;
 }
 
   /* Eat last curly brace and return */
   g_scanner_get_next_token (scanner);
   return G_TOKEN_NONE;
 }
 
 
 int
 main (intargc,
   char **argv)
 {
   GScanner *scanner;
   GHashTable *table;
   guint ret;
 
   scanner = g_scanner_new (NULL);
   g_scanner_input_text (scanner, ttest, strlen (ttest));
 
   table = g_hash_table_new_full (g_str_hash, g_str_equal, g_free,
 g_free);
   do
 {
   g_hash_table_remove_all (table);
   ret = parse_entry (scanner, table);
 
   if (ret == G_TOKEN_ERROR)
 break;
   else
 output_entry (table);
 
   g_scanner_peek_next_token (scanner);
 }
   while (scanner-next_token != G_TOKEN_EOF 
  scanner-next_token != G_TOKEN_ERROR);
 
   /* finsish parsing */
   g_scanner_destroy (scanner);
   g_hash_table_destroy (table);
 
   return 0;
 }
 
 
 
 
 ___
 gtk-app-devel-list mailing list
 gtk-app-devel-list@gnome.org
 https://mail.gnome.org/mailman/listinfo/gtk-app-devel-list


___
gtk-app-devel-list mailing list
gtk-app-devel-list@gnome.org
https://mail.gnome.org/mailman/listinfo/gtk-app-devel-list


Re: parsing bibtex using gscanner

2013-02-18 Thread Rudra Banerjee
On Mon, 2013-02-18 at 19:02 +0100, David Nečas wrote:
 The best approach to parse a grammar is, you know, using a parser.
So anything better then bison?

___
gtk-app-devel-list mailing list
gtk-app-devel-list@gnome.org
https://mail.gnome.org/mailman/listinfo/gtk-app-devel-list

Re: parsing bibtex using gscanner

2013-02-18 Thread Rudra Banerjee
On Mon, 2013-02-18 at 19:02 +0100, David Nečas wrote:
 The best approach to parse a grammar is, you know, using a parser.
So is there any better option then bison?

___
gtk-app-devel-list mailing list
gtk-app-devel-list@gnome.org
https://mail.gnome.org/mailman/listinfo/gtk-app-devel-list