Update of /cvsroot/monetdb/pathfinder/src/sqlhelpers/xmlshred
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv22861

Modified Files:
        encoding.c guides.c hash.c main.c shred_helper.c 
Log Message:
-- The formatting of encoded nodes now compiles the format once
   into an array of function pointers (or ``lambdas'') and applies
   these functions to evaluate the format at encoding time.

   If this performs as expected, the ``fast format'' logic in the
   shredder can be removed.

-- Moved pf_config.h back to the very top of includes (thanks, Sjoerd).



Index: encoding.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/src/sqlhelpers/xmlshred/encoding.c,v
retrieving revision 1.17
retrieving revision 1.18
diff -u -d -r1.17 -r1.18
--- encoding.c  10 Jan 2008 09:55:01 -0000      1.17
+++ encoding.c  10 Jan 2008 14:34:34 -0000      1.18
@@ -25,22 +25,21 @@
  * $Id$
  */
 
+#include "pf_config.h"
+
 #include <stdio.h>
 #include <string.h>
+#include <assert.h>
 
 /* libxml SAX2 parser internals */
 #include "libxml/parserInternals.h"
 
-#include "pf_config.h"
-
 #include "encoding.h"
 #include "guides.h"
 #include "oops.h"
 #include "hash.h"
 #include "shred_helper.h"
 
-#include <assert.h>
-
 #ifndef HAVE_SAX2
  #error "libxml2 SAX2 interface required to compile the XML shredder `pfshred'"
 #endif
@@ -98,6 +97,21 @@
 static nat post;
 static nat rank;
 static nat att_id;
+             
+/* encoding format compilation */
+
+/* maximum and actual number of 
+ * formatting instructions `%.' in `-F' argument 
+ */
+#define FMT_MAX 32
+static unsigned int fmts;
+
+/* type of a node formatting function */
+typedef void (fmt_lambda_t) (node_t);
+
+/* formatting instructions and their separating strings */
+static fmt_lambda_t *fmt_funs[FMT_MAX];
+static char         *fmt_seps[FMT_MAX];
 
 void
 print_text (FILE *f, char *buf, size_t len) {
@@ -186,70 +200,117 @@
     fprintf (f, "\"%s\"", (char *) tuple.uri);
 }
 
+/* implementation of formatting functions */
+static void lambda_e (node_t n) 
+{ fprintf (out, SSZFMT, n.pre); }
+
+static void lambda_o (node_t n) 
+{ fprintf (out, SSZFMT, n.post); }
+
+static void lambda_E (node_t n) 
+{ fprintf (out, SSZFMT, n.pre_stretched); }
+
+static void lambda_O (node_t n) 
+{ fprintf (out, SSZFMT, n.post_stretched); }
+
+static void lambda_s (node_t n) 
+{ fprintf (out, SSZFMT, n.size); }
+
+static void lambda_l (node_t n) 
+{ fprintf (out, "%i", n.level); }
+
+static void lambda_k (node_t n) 
+{ print_kind (out, n.kind); }
+
+static void lambda_p (node_t n) 
+{ if (n.parent) lambda_e (*(n.parent)); }
+
+static void lambda_P (node_t n) 
+{ if (n.parent) lambda_E (*(n.parent)); }
+
+static void lambda_n (node_t n) 
+{ if (n.localname_id != -1) print_localname (out, n); }
+
+static void lambda_u (node_t n) 
+{ if (n.uri_id != -1) print_uri (out, n); }
+
+static void lambda_d (node_t n) 
+{ print_number (out, n); }
+
+static void lambda_t (node_t n) 
+{ print_value (out, n); }
+
+static void lambda_g (node_t n) 
+{ fprintf (out, SSZFMT, guide_val_ (n.guide)); }
+
+static void lambda_percent (node_t n) 
+{ (void) n; putc ('%', out); } 
+           
+/* binding formatting functions to formatting instructions `%.' */
+static fmt_lambda_t *fmt_lambdas[] = {
+    ['e'] = lambda_e 
+  , ['o'] = lambda_o 
+  , ['E'] = lambda_E 
+  , ['O'] = lambda_O 
+  , ['s'] = lambda_s 
+  , ['l'] = lambda_l 
+  , ['k'] = lambda_k
+  , ['p'] = lambda_p
+  , ['P'] = lambda_P
+  , ['n'] = lambda_n
+  , ['u'] = lambda_u
+  , ['d'] = lambda_d
+  , ['t'] = lambda_t
+  , ['g'] = lambda_g
+  , ['%'] = lambda_percent
+  , ['~'] = NULL
+};
+
 static void
-print_tuple (node_t tuple)
+apply_fmt (node_t tuple)
+{   
+    unsigned int fn;
+
+    /* alternate: 
+     * - print separator string
+     * - execute formatting instruction
+     * - print separator string
+     * - ...
+     */
+    fputs (fmt_seps[0], out);
+
+    for (fn = 0; fn < fmts; fn++) { 
+        (*fmt_funs[fn]) (tuple);
+        fputs (fmt_seps[fn + 1], out);
+    } 
+    
+    fputc ('\n', out);
+}
+
+static void
+compile_fmt (char *fmt)
 {
-    const char  *format = shredstate.format;
-    unsigned int i;
+    char *sep;
+    fmt_lambda_t *lambda;
+
+    fmts = 0;
     
-    /* Fast path for the default SQL format */
-    if (shredstate.fastformat) {
-        fprintf (out, SSZFMT ", " SSZFMT ", %i, ",
-                 tuple.pre, tuple.size, tuple.level);
-        print_kind (out, tuple.kind);
-        print_text (out, ", ", 2);
-        if (tuple.uri_id != -1)
-            print_uri (out, tuple);
-        print_text (out, ", ", 2);
-        if (tuple.localname_id != -1)
-            print_localname (out, tuple);
-        print_text (out, ", ", 2);
-        print_number (out, tuple);
-        print_text (out, ", ", 2);
-        print_value (out, tuple);
-        putc ('\n', out);
-        return;
+    while (fmts < FMT_MAX && (sep = strsep (&fmt, "%"))) {
+        fmt_seps[fmts] = sep;
+        if (fmt) {
+            lambda = fmt_lambdas[(int)(*fmt)];
+            if (!lambda)
+               SHoops (SH_FATAL,
+                       "unknown formatting instruction `%%%c' in argument to 
-F",
+                       *fmt);
+
+            fmt_funs[fmts] = lambda;
+
+            fmts++;
+            /* skip over formatting instruction character */
+            fmt++;
+        } 
     }
-    
-    for (i = 0; format[i]; i++)
-         if (format[i] == '%') {
-             i++;
-             switch (format[i]) {
-                 case 'e': fprintf (out, SSZFMT, tuple.pre); break;
-                 case 'o': fprintf (out, SSZFMT, tuple.post); break;
-                 case 'E': fprintf (out, SSZFMT, tuple.pre_stretched); break;
-                 case 'O': fprintf (out, SSZFMT, tuple.post_stretched); break;
-                 case 's': fprintf (out, SSZFMT, tuple.size); break;
-                 case 'l': fprintf (out, "%i",   tuple.level); break;
-                 case 'k': print_kind (out, tuple.kind); break;
-                 case 'p':
-                     if (tuple.parent)
-                         fprintf (out, SSZFMT, tuple.parent->pre);
-                     break;
-                 case 'P':
-                     if (tuple.parent)
-                         fprintf (out, SSZFMT,tuple.parent->pre_stretched);
-                     break;
-                 case 'n':
-                     if (tuple.localname_id != -1)
-                         print_localname (out, tuple);
-                     break;
-                 case 'u':
-                     if (tuple.uri_id != -1)
-                         print_uri (out, tuple);
-                     break;
-                 case 'd':  print_number (out, tuple); break;    
-                 case 't':  print_value (out, tuple); break;    
-                 case 'g':  fprintf (out, SSZFMT, guide_val_(tuple.guide)); 
break;
-                 case '%':  putc ('%', out); break;
-                 default:   SHoops (SH_FATAL,
-                                    "unexpected formatting character `%c' 
(print_tuple)",
-                                    format[i]);
-             }
-         }
-         else
-             putc (format[i], out);
-    putc ('\n', out);
 }
 
 static int
@@ -351,7 +412,7 @@
     post++;
     rank++;
     
-    print_tuple (stack[level]);
+    apply_fmt (stack[level]);
 
     if (stack[level].localname) xmlFree (stack[level].localname);
     if (stack[level].uri)       xmlFree (stack[level].uri);
@@ -422,14 +483,15 @@
 
     assert (level == 0);
 
-    post++;
     rank++;
 
     stack[level].post           = post;
     stack[level].post_stretched = rank;
     stack[level].size           = pre - stack[level].pre;
 
-    print_tuple (stack[level]);
+    apply_fmt (stack[level]);
+
+    post++;
 
     if (shredstate.statistics) 
         guide_occurrence (stack[level].guide);
@@ -579,7 +641,7 @@
     stack[level].post_stretched = rank;
     stack[level].size           = pre - stack[level].pre;
 
-    print_tuple (stack[level]);
+    apply_fmt (stack[level]);
 
     if (shredstate.statistics)
         guide_occurrence (stack[level].guide);
@@ -724,15 +786,18 @@
 
     /* how many characters should be stored in
      * the value column */
-    text_size = status->strip_values;
+    text_size = shredstate.strip_values;
 
     text_stripped = 0;
     tag_stripped  = 0;
 
+    /* compile the -F format string */
+    compile_fmt (shredstate.format);
+    
     /* initialize localname and URI hashes */
     localname_hash = new_hashtable (); 
     uris_hash = new_hashtable ();
-    /* pre-insert entry for empty namespace prefixes */
+    /* pre-insert entry for empty namespace URIs */
     generate_uri_id ((xmlChar *) "");
     
     /* Whether to print the node localnames and URIs

Index: shred_helper.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/src/sqlhelpers/xmlshred/shred_helper.c,v
retrieving revision 1.12
retrieving revision 1.13
diff -u -d -r1.12 -r1.13
--- shred_helper.c      9 Jan 2008 17:24:44 -0000       1.12
+++ shred_helper.c      10 Jan 2008 14:34:34 -0000      1.13
@@ -25,6 +25,8 @@
  * $Id$
  */
 
+#include "pf_config.h"
+
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
@@ -32,7 +34,6 @@
 #include <assert.h>
 #include <errno.h>
  
-#include "pf_config.h"
 #include "shred_helper.h"
 #include "oops.h"
 

Index: main.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/src/sqlhelpers/xmlshred/main.c,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -d -r1.8 -r1.9
--- main.c      9 Jan 2008 17:24:44 -0000       1.8
+++ main.c      10 Jan 2008 14:34:34 -0000      1.9
@@ -26,6 +26,7 @@
  */
 
 #include "pf_config.h"
+
 #include <assert.h>
 #include <stdio.h>
 #ifdef HAVE_STDBOOL_H
@@ -211,7 +212,7 @@
      */
     progname = strndup (argv[0], FILENAME_MAX);
 
-    status.format = SQL_FORMAT; 
+    status.format = strdup(SQL_FORMAT); 
     status.fastformat = true;
     status.infile = NULL;
     status.outfile = NULL;

Index: guides.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/src/sqlhelpers/xmlshred/guides.c,v
retrieving revision 1.11
retrieving revision 1.12
diff -u -d -r1.11 -r1.12
--- guides.c    10 Jan 2008 09:55:01 -0000      1.11
+++ guides.c    10 Jan 2008 14:34:34 -0000      1.12
@@ -25,11 +25,12 @@
  * $Id$
  */
 
+#include "pf_config.h"
+
 #include <stdio.h>
 #include <assert.h>
 #include <limits.h>
 
-#include "pf_config.h"
 #include "guides.h"
 
 /**

Index: hash.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/src/sqlhelpers/xmlshred/hash.c,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -d -r1.8 -r1.9
--- hash.c      10 Jan 2008 09:02:16 -0000      1.8
+++ hash.c      10 Jan 2008 14:34:34 -0000      1.9
@@ -25,11 +25,12 @@
  * $Id$
  */
 
+#include "pf_config.h"
+
 #include <string.h>
 #include <unistd.h>
 #include <assert.h>
 
-#include "pf_config.h"
 #include "hash.h"
 #include "shred_helper.h"
 


-------------------------------------------------------------------------
Check out the new SourceForge.net Marketplace.
It's the best place to buy or sell services for
just about anything Open Source.
http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins

Reply via email to