Update of /cvsroot/monetdb/pathfinder/src/sqlhelpers/xmlshred
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv10014

Modified Files:
        encoding.c hash.c 
Log Message:
-- pfshred now requires SAX2 (namespace URI processing)

-- Local name and URI hash tables operate with xmlChar* values



Index: encoding.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/src/sqlhelpers/xmlshred/encoding.c,v
retrieving revision 1.15
retrieving revision 1.16
diff -u -d -r1.15 -r1.16
--- encoding.c  9 Jan 2008 09:16:05 -0000       1.15
+++ encoding.c  10 Jan 2008 09:02:16 -0000      1.16
@@ -25,13 +25,14 @@
  * $Id$
  */
 
-#include "pf_config.h"
 #include <stdio.h>
 #include <string.h>
 
 /* libxml SAX2 parser internals */
 #include "libxml/parserInternals.h"
 
+#include "pf_config.h"
+
 #include "encoding.h"
 #include "guides.h"
 #include "oops.h"
@@ -40,6 +41,9 @@
 
 #include <assert.h>
 
+#ifndef HAVE_SAX2
+ #error "libxml2 SAX2 interface required to compile the XML shredder `pfshred'"
+#endif
 
 FILE *out;
 FILE *out_attr;
@@ -256,15 +260,14 @@
    
     if (!localname)
         return -1;
+                     
+    localname_id = hashtable_find (localname_hash, localname);
 
-    localname_id = hashtable_find (localname_hash, (char *) localname);
-
-    /* key not found */
     if (NOKEY (localname_id)) {
-        /* create a new name id */
+        /* key not found, create a new name id */
         localname_id = global_localname_id++;
-        /* add the pair into the hashtable */
-        hashtable_insert (localname_hash, (char *) localname, localname_id);
+        /* add the (localname, localname_id) pair into the hash table */
+        hashtable_insert (localname_hash, localname, localname_id);
         /* print the name binding if necessary */
         if (shredstate.names_separate)
             fprintf (out_names, "%i, \"%s\"\n", localname_id, (char*) 
localname);
@@ -282,14 +285,13 @@
     if (!URI)
         return -1;
         
-    uri_id = hashtable_find (uris_hash, (char *) URI);
+    uri_id = hashtable_find (uris_hash, URI);
 
-    /* key not found */
     if (NOKEY (uri_id)) {
-        /* create a new URI id */
+        /* key not found, create a new URI id */
         uri_id = global_uri_id++;
-        /* add the pair into the hashtable */
-        hashtable_insert (uris_hash, (char *) URI, uri_id);
+        /* add the (URI, uri_id) pair to the hash table */
+        hashtable_insert (uris_hash, URI, uri_id);
         /* print the URI binding if necessary */
         if (shredstate.names_separate)
             fprintf (out_uris, "%i, \"%s\"\n", uri_id, (char*) URI);
@@ -313,7 +315,8 @@
 
     /* check if tagname is larger than TAG_SIZE characters */
     if (localname && xmlStrlen (localname) > TAG_SIZE)
-        BAILOUT ("attribute local name `%s' exceeds %u characters", localname, 
TAG_SIZE);
+        BAILOUT ("attribute local name `%s' exceeds %u characters", 
+                 localname, TAG_SIZE);
     
     if (URI && xmlStrlen (URI) > TAG_SIZE)
         BAILOUT ("namespace URI `%s' exceeds length of %u characters", 
@@ -686,7 +689,7 @@
 report (void)
 {
     if (text_stripped > 0) {
-        fprintf (err, "%u values were stripped to %u "
+        fprintf (err, "%u text node/attribute values were stripped to %u "
                       "character(s).\n", text_stripped, text_size);
     }
 }

Index: hash.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/src/sqlhelpers/xmlshred/hash.c,v
retrieving revision 1.7
retrieving revision 1.8
diff -u -d -r1.7 -r1.8
--- hash.c      8 Jan 2008 10:00:07 -0000       1.7
+++ hash.c      10 Jan 2008 09:02:16 -0000      1.8
@@ -25,39 +25,39 @@
  * $Id$
  */
 
-#include "pf_config.h"
-#include "hash.h"
-#include "shred_helper.h"
 #include <string.h>
 #include <unistd.h>
 #include <assert.h>
 
+#include "pf_config.h"
+#include "hash.h"
+#include "shred_helper.h"
+
 /* We use a seperate chaining strategy to
- * mantain our hash_table,
- * So each bucket is a chained list itself,
- * to handle possible collisions.
+ * mantain our hash table, so each bucket is a chained list itself
+ * to handle collisions.
  */
 struct bucket_t {
-    char *key;      /**< key as string */
-    int id;         /**< name_id */
-    bucket_t* next; /**< next bucket in our list */
+    xmlChar  *key;      /**< key (elem/attr name or namespace URI) */
+    int       id;       /**< name_id */
+    bucket_t *next;     /**< next bucket in overflow chain */
 };
 
-/* size of the hashtable */
+/* hashtable size */
 #define PRIME 113
 
 /**
- * Lookup an id in a given bucket using it associated key.
+ * Lookup an id in a given bucket using its associated key.
  */
 static int
-find_id (bucket_t *bucket, char *key)
+find_id (bucket_t *bucket, const xmlChar *key)
 {
     bucket_t *cur_bucket = bucket;
     
     assert (key);
 
     while (cur_bucket)
-        if (strcmp (cur_bucket->key, key) == 0)
+        if (xmlStrcmp (cur_bucket->key, key) == 0)
             return cur_bucket->id;
         else
             cur_bucket = cur_bucket->next;
@@ -69,16 +69,16 @@
  * Attach an (id, key) pair to a given bucket list.
  */
 static bucket_t *
-bucket_insert (bucket_t *bucket, char *key, int id)
+bucket_insert (bucket_t *bucket, const xmlChar *key, int id)
 {
     int ident = find_id (bucket, key);
     
-    /* no key found */
     if (NOKEY (ident)) {
+        /* no key found */
         bucket_t *newbucket = (bucket_t*) malloc (sizeof (bucket_t));
 
         newbucket->id = id;
-        newbucket->key = strndup (key, strlen(key));
+        newbucket->key = xmlStrdup (key);
 
         /* add new bucket to the front of list */
         newbucket->next = bucket;
@@ -95,13 +95,12 @@
  * Create the hash value for a given key.
  */
 static int
-find_hash_bucket (char *key)
+find_hash_bucket (const xmlChar *key)
 {   
     assert (key);
     
-    size_t len = strlen (key);
-    /* keys have at least length 1 */
-    /* assert (len > 0); */
+    size_t len = xmlStrlen (key);
+
     /* build a hash out of the first and the last character
        and the length of the key */
     return (key[0] * key[MAX(0,len-1)] * len) % PRIME;
@@ -113,7 +112,9 @@
 hashtable_t
 new_hashtable (void)
 {
-    hashtable_t ht = malloc (PRIME * sizeof (bucket_t));
+    hashtable_t ht;
+    
+    ht = (hashtable_t) malloc (PRIME * sizeof (bucket_t));
     
     /* initialize the hash table */
     for (unsigned int i = 0; i < PRIME; i++)
@@ -126,24 +127,25 @@
  * Insert key and id into hashtable.
  */
 void
-hashtable_insert (hashtable_t hash_table, char *key, int id)
+hashtable_insert (hashtable_t hash_table, const xmlChar *key, int id)
 {
     int hashkey;
     
-    assert (hash_table && key);
-    
+    assert (hash_table);
+    assert (key);
+
     hashkey = find_hash_bucket (key);
     hash_table[hashkey] = bucket_insert (hash_table[hashkey], key, id);
-    return;
 }
 
 /**
  * Find element in hashtable. 
  */
 int
-hashtable_find (hashtable_t hash_table, char *key)
+hashtable_find (hashtable_t hash_table, const xmlChar *key)
 {
-    assert (key);
+    assert (key); 
+    
     return find_id (hash_table[find_hash_bucket (key)], key);
 }
 
@@ -156,7 +158,6 @@
     bucket_t *bucket, *free_bucket;
     
     assert (hash_table);
-    if (!hash_table) return;
 
     for (int i = 0; i < PRIME; i++) {
         bucket = hash_table[i];
@@ -165,9 +166,11 @@
             free_bucket = bucket;
             bucket = bucket->next;
             /* free the copied hash key */
-            if (free_bucket->key) free (free_bucket->key);
+            if (free_bucket->key) 
+                xmlFree (free_bucket->key);
             free (free_bucket);
         }
    }
+
    free(hash_table);
 }


-------------------------------------------------------------------------
Check out the new SourceForge.net Marketplace.
It's the best place to buy or sell services for
just about anything Open Source.
http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins

Reply via email to