Update of /cvsroot/monetdb/pathfinder/src/sqlhelpers/xmlshred
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv10014
Modified Files:
encoding.c hash.c
Log Message:
-- pfshred now requires SAX2 (namespace URI processing)
-- Local name and URI hash tables operate with xmlChar* values
Index: encoding.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/src/sqlhelpers/xmlshred/encoding.c,v
retrieving revision 1.15
retrieving revision 1.16
diff -u -d -r1.15 -r1.16
--- encoding.c 9 Jan 2008 09:16:05 -0000 1.15
+++ encoding.c 10 Jan 2008 09:02:16 -0000 1.16
@@ -25,13 +25,14 @@
* $Id$
*/
-#include "pf_config.h"
#include <stdio.h>
#include <string.h>
/* libxml SAX2 parser internals */
#include "libxml/parserInternals.h"
+#include "pf_config.h"
+
#include "encoding.h"
#include "guides.h"
#include "oops.h"
@@ -40,6 +41,9 @@
#include <assert.h>
+#ifndef HAVE_SAX2
+ #error "libxml2 SAX2 interface required to compile the XML shredder `pfshred'"
+#endif
FILE *out;
FILE *out_attr;
@@ -256,15 +260,14 @@
if (!localname)
return -1;
+
+ localname_id = hashtable_find (localname_hash, localname);
- localname_id = hashtable_find (localname_hash, (char *) localname);
-
- /* key not found */
if (NOKEY (localname_id)) {
- /* create a new name id */
+ /* key not found, create a new name id */
localname_id = global_localname_id++;
- /* add the pair into the hashtable */
- hashtable_insert (localname_hash, (char *) localname, localname_id);
+ /* add the (localname, localname_id) pair into the hash table */
+ hashtable_insert (localname_hash, localname, localname_id);
/* print the name binding if necessary */
if (shredstate.names_separate)
fprintf (out_names, "%i, \"%s\"\n", localname_id, (char*)
localname);
@@ -282,14 +285,13 @@
if (!URI)
return -1;
- uri_id = hashtable_find (uris_hash, (char *) URI);
+ uri_id = hashtable_find (uris_hash, URI);
- /* key not found */
if (NOKEY (uri_id)) {
- /* create a new URI id */
+ /* key not found, create a new URI id */
uri_id = global_uri_id++;
- /* add the pair into the hashtable */
- hashtable_insert (uris_hash, (char *) URI, uri_id);
+ /* add the (URI, uri_id) pair to the hash table */
+ hashtable_insert (uris_hash, URI, uri_id);
/* print the URI binding if necessary */
if (shredstate.names_separate)
fprintf (out_uris, "%i, \"%s\"\n", uri_id, (char*) URI);
@@ -313,7 +315,8 @@
/* check if tagname is larger than TAG_SIZE characters */
if (localname && xmlStrlen (localname) > TAG_SIZE)
- BAILOUT ("attribute local name `%s' exceeds %u characters", localname,
TAG_SIZE);
+ BAILOUT ("attribute local name `%s' exceeds %u characters",
+ localname, TAG_SIZE);
if (URI && xmlStrlen (URI) > TAG_SIZE)
BAILOUT ("namespace URI `%s' exceeds length of %u characters",
@@ -686,7 +689,7 @@
report (void)
{
if (text_stripped > 0) {
- fprintf (err, "%u values were stripped to %u "
+ fprintf (err, "%u text node/attribute values were stripped to %u "
"character(s).\n", text_stripped, text_size);
}
}
Index: hash.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/src/sqlhelpers/xmlshred/hash.c,v
retrieving revision 1.7
retrieving revision 1.8
diff -u -d -r1.7 -r1.8
--- hash.c 8 Jan 2008 10:00:07 -0000 1.7
+++ hash.c 10 Jan 2008 09:02:16 -0000 1.8
@@ -25,39 +25,39 @@
* $Id$
*/
-#include "pf_config.h"
-#include "hash.h"
-#include "shred_helper.h"
#include <string.h>
#include <unistd.h>
#include <assert.h>
+#include "pf_config.h"
+#include "hash.h"
+#include "shred_helper.h"
+
/* We use a seperate chaining strategy to
- * mantain our hash_table,
- * So each bucket is a chained list itself,
- * to handle possible collisions.
+ * mantain our hash table, so each bucket is a chained list itself
+ * to handle collisions.
*/
struct bucket_t {
- char *key; /**< key as string */
- int id; /**< name_id */
- bucket_t* next; /**< next bucket in our list */
+ xmlChar *key; /**< key (elem/attr name or namespace URI) */
+ int id; /**< name_id */
+ bucket_t *next; /**< next bucket in overflow chain */
};
-/* size of the hashtable */
+/* hashtable size */
#define PRIME 113
/**
- * Lookup an id in a given bucket using it associated key.
+ * Lookup an id in a given bucket using its associated key.
*/
static int
-find_id (bucket_t *bucket, char *key)
+find_id (bucket_t *bucket, const xmlChar *key)
{
bucket_t *cur_bucket = bucket;
assert (key);
while (cur_bucket)
- if (strcmp (cur_bucket->key, key) == 0)
+ if (xmlStrcmp (cur_bucket->key, key) == 0)
return cur_bucket->id;
else
cur_bucket = cur_bucket->next;
@@ -69,16 +69,16 @@
* Attach an (id, key) pair to a given bucket list.
*/
static bucket_t *
-bucket_insert (bucket_t *bucket, char *key, int id)
+bucket_insert (bucket_t *bucket, const xmlChar *key, int id)
{
int ident = find_id (bucket, key);
- /* no key found */
if (NOKEY (ident)) {
+ /* no key found */
bucket_t *newbucket = (bucket_t*) malloc (sizeof (bucket_t));
newbucket->id = id;
- newbucket->key = strndup (key, strlen(key));
+ newbucket->key = xmlStrdup (key);
/* add new bucket to the front of list */
newbucket->next = bucket;
@@ -95,13 +95,12 @@
* Create the hash value for a given key.
*/
static int
-find_hash_bucket (char *key)
+find_hash_bucket (const xmlChar *key)
{
assert (key);
- size_t len = strlen (key);
- /* keys have at least length 1 */
- /* assert (len > 0); */
+ size_t len = xmlStrlen (key);
+
/* build a hash out of the first and the last character
and the length of the key */
return (key[0] * key[MAX(0,len-1)] * len) % PRIME;
@@ -113,7 +112,9 @@
hashtable_t
new_hashtable (void)
{
- hashtable_t ht = malloc (PRIME * sizeof (bucket_t));
+ hashtable_t ht;
+
+ ht = (hashtable_t) malloc (PRIME * sizeof (bucket_t));
/* initialize the hash table */
for (unsigned int i = 0; i < PRIME; i++)
@@ -126,24 +127,25 @@
* Insert key and id into hashtable.
*/
void
-hashtable_insert (hashtable_t hash_table, char *key, int id)
+hashtable_insert (hashtable_t hash_table, const xmlChar *key, int id)
{
int hashkey;
- assert (hash_table && key);
-
+ assert (hash_table);
+ assert (key);
+
hashkey = find_hash_bucket (key);
hash_table[hashkey] = bucket_insert (hash_table[hashkey], key, id);
- return;
}
/**
* Find element in hashtable.
*/
int
-hashtable_find (hashtable_t hash_table, char *key)
+hashtable_find (hashtable_t hash_table, const xmlChar *key)
{
- assert (key);
+ assert (key);
+
return find_id (hash_table[find_hash_bucket (key)], key);
}
@@ -156,7 +158,6 @@
bucket_t *bucket, *free_bucket;
assert (hash_table);
- if (!hash_table) return;
for (int i = 0; i < PRIME; i++) {
bucket = hash_table[i];
@@ -165,9 +166,11 @@
free_bucket = bucket;
bucket = bucket->next;
/* free the copied hash key */
- if (free_bucket->key) free (free_bucket->key);
+ if (free_bucket->key)
+ xmlFree (free_bucket->key);
free (free_bucket);
}
}
+
free(hash_table);
}
-------------------------------------------------------------------------
Check out the new SourceForge.net Marketplace.
It's the best place to buy or sell services for
just about anything Open Source.
http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins