Update of /cvsroot/monetdb/pathfinder/runtime
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv5106

Modified Files:
      Tag: XQuery_0-18
        shredder.mx 
Log Message:
This is a first working implementation of EXTERNAL ENTITIES. I consider
this a bugpatch because it solves an open bug but it feels like a new
feature:). Some NOTES:

- The last line of the entity page of xmlsoft (http://xmlsoft.org/entities.html)
  reads:

  WARNING: handling entities on top of the libxml2 SAX interface is
  difficult!!! If you plan to use non-predefined entities in your documents,
  then the learning curve to handle then using the SAX API may be long. If
  you plan to use complex documents, I strongly suggest you consider using
  the DOM interface instead and let libxml deal with the complexity rather
  than trying to do it yourself.

  I find this very true:)

- internal subsets are created automatically. You only have to create
  a getEntity() callback with a hashtable lookup function and it works.

- external subsets are NOT created automatically. You have to create an
  externalSubset() callback. Here you have to parse the Dtd by hand and
  install all entitie declarations in the document structure. Luckely the
  the same lookup as for internal entities can be used now.

- The document structure ((xmlParserCtxtPtr)ctx->myDoc) is also not
  created in case of external subsets (optimization??). You have to create
  the document structure yourself to be able to store the external entities
  in the way the internal entities are stored.

- I do not know if this solution is very portable. It feels a bit like a
  hack but other solutions I saw also looked like hacks and some of
  them did not work on my SuSe9.3 system. But the implemenation should
  not influence the shredder when no external entities are used.



Index: shredder.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/runtime/shredder.mx,v
retrieving revision 1.126.2.1
retrieving revision 1.126.2.2
diff -u -d -r1.126.2.1 -r1.126.2.2
--- shredder.mx 5 Jun 2007 07:30:46 -0000       1.126.2.1
+++ shredder.mx 6 Jun 2007 14:26:47 -0000       1.126.2.2
@@ -648,7 +648,7 @@
  * misc shredder helper functions
  * - handle_xml_chars()   buffered string reader
  * - handle_pfx_uri_loc() administer double-eliminated use of Q-Names 
- * - handle_ext_subset()  handle a DTD (external subset), needed to identify 
ID/IDREF attrs
+ * - handle_externalSubset()  handle a DTD (external subset), needed to 
identify ID/IDREF attrs
  * 
====================================================================================
 */
 
 static int 
@@ -720,9 +720,10 @@
 }
 
 static int 
-handle_ext_subset(shredCtxStruct *shredCtx, 
+handle_externalSubset(shredCtxStruct *shredCtx, 
                   xmlDtdPtr dtd)
 {
+    xmlParserCtxtPtr ctx = shredCtx->xmlCtx;
     struct _xmlNode *p = dtd->children;
 
     while(p) {
@@ -740,13 +741,24 @@
                 default:
                     break; /* ignore */
             }
-        }
+        } else if (p->type == XML_ENTITY_DECL) {
+            xmlEntityPtr ep =(xmlEntityPtr)p;
+           if ( ! xmlAddDtdEntity(
+               ctx->myDoc,
+               ep->name,
+               ep->type,
+               ep->ExternalID,
+               ep->SystemID,
+               ep->content)
+              ) {
+                   GDKerror("fail to install ENTITY(\"%s\")\n",ep->name);
+           }
+       }
         p = p->next;
     }
     return GDK_SUCCEED;
 }
 
-
 /* 
====================================================================================
  * SAX2 callback functions 
  * - shred_start_document()
@@ -766,6 +778,7 @@
 shred_start_document(void *xmlCtx)
 {
     shredCtxStruct *shredCtx = (shredCtxStruct*) xmlCtx;
+    xmlParserCtxtPtr ctx = shredCtx->xmlCtx;
 #ifdef DEBUG
     stream_printf(GDKout, "start_document()\n");
 #endif
@@ -1151,6 +1164,7 @@
    (void)*defaultValue;
    (void)tree;
 
+    stream_printf(GDKout, "#XXXX\n");
     switch(type) {
         case XML_ATTRIBUTE_ID:
         case XML_ATTRIBUTE_IDREF:
@@ -1168,22 +1182,29 @@
                       const xmlChar *ExternalID, 
                       const xmlChar *SystemID)
 {
-    shredCtxStruct *shredCtx = (shredCtxStruct*) xmlCtx;
    (void)name;
+
+    shredCtxStruct *shredCtx = (shredCtxStruct*)xmlCtx;
+    xmlParserCtxtPtr ctx = ((shredCtxStruct*) xmlCtx)->xmlCtx;
 #ifdef ADB_DEBUG
-    stream_printf(GDKout, "shred_external_subset(ctx, %s, %s, %s)\n", name, 
ExternalID, SystemID);
+    stream_printf(GDKerr, "shred_external_subset(ctx, %s, %s, %s)\n", name, 
ExternalID, SystemID);
 #endif
-    if (ExternalID || SystemID) {
-        /* INCOMPLETE, this part only works with absolute filenames or
-         * files located in the current working directory! A bit more
-         * inteligence could be applied here.
-         */
+    if (ExternalID || SystemID) { /* ignore dummy calls */
         xmlDtdPtr dtd;
 
         if ((dtd=xmlParseDTD(ExternalID, SystemID))) {
-            if (!handle_ext_subset(shredCtx, dtd))
+           if ( !ctx->myDoc ) {
+               /* Mysteriously no myDoc is created by libxml2. For internal
+                * subsets this is done automatically. For external subsets
+                * it has to be done by hand. The parsed dtd is the external
+                * subset for this doc
+                */
+               ctx->myDoc = xmlNewDoc(ctx->version);
+               ctx->myDoc->extSubset = dtd;
+            }
+            if (!handle_externalSubset(shredCtx, dtd))
                 stream_printf(GDKout, "!WARNING: xmlParseDTD(\"%s\") failed, 
skipping ID/IDREF information.\n", SystemID);
-            xmlFreeDtd(dtd);
+            // xmlFreeDtd(dtd); should be done by freeer of myDoc 
         } else {
             GDKerror("shred_external_subset: WARNING: xmlParseDTD(\"%s\") 
FAILED, NO ID/IDREF QUERIES\n", SystemID);
             GDKerror("shred_external_subset: NOTE   : maybe using absolute 
filenames works, sorry!\n");
@@ -1205,39 +1226,29 @@
 #endif
        xmlParserCtxtPtr ctx = ((shredCtxStruct*) xmlCtx)->xmlCtx;
        /* lookup the entity in the document entity hash table */
-       return xmlGetDocEntity(ctx->myDoc,name);
+       /* maybe we should use xmlGetDtdEntity in case of failure */
+       xmlEntityPtr res = xmlGetDocEntity(ctx->myDoc,name);
+#if 0
+       if ( res ) {
+           stream_printf(GDKout,"#!found ENTITY \"%s\"\n",name);
+       } else {
+           stream_printf(GDKout,"#!cannot find ENTITY \"%s\"\n",name);
+       }
+#endif
        /* QUESTION: xmlGetDtdEntity() and xmlGetParameterEntity() were also
         * possible, whats the diff between the doc/dtd versions, they both
         * seem to work. */
+       return res;
 }
 
-#if 0
-/* My first try at building an entity table but this one was not necessary
- * because the internal subset table was already build.
- */
-static void
-shred_entityDecl(void *xmlCtx,
-                 const xmlChar *name,
-                 int type,
-                 const xmlChar *publicId,
-                 const xmlChar *systemId,
-                 xmlChar *content)
-{
-       xmlParserCtxtPtr ctx = ((shredCtxStruct*) xmlCtx)->xmlCtx;
-       if ( ! xmlAddDtdEntity(ctx->myDoc,name,type,publicId,systemId,content) )
-          stream_printf(GDKerr,"shred_entityDecl(ctx,\"%s\") FAIL\n",name);
-}
-#endif
-
-
-/* 
====================================================================================
+/* ====================================================================
  * the shredder and its data structures
  * - shredder_create()     create all data structures
  * - shredder_parse()      invoke the libxml2 SAX2 parser
  * - shredder_stats()      print some statistics (off by default)
  * - shredder_finalize()   finish the bats in case of a succesful parse
  * - shredder_free()       provides *atomic* cleanup
- * 
====================================================================================
 */
+ * ==================================================================== */
 
 /**
  * SAX callback table.
@@ -1258,7 +1269,7 @@
   , .hasExternalSubset     = 0
   , .resolveEntity         = 0
   , .getEntity             = shred_getEntity
-  , .entityDecl            = 0
+  , .entityDecl            = 0 // shred_entityDecl
   , .notationDecl          = 0
   , .attributeDecl         = shred_attribute_def
   , .elementDecl           = 0
@@ -1330,6 +1341,7 @@
         * TODO: how to prevent expansion of entities?
         */
         xmlCtx = xmlCreateURLParserCtxt(location,
+                       XML_PARSE_DTDLOAD|
                        XML_PARSE_XINCLUDE|
                        XML_PARSE_NOXINCNODE);
     } 


-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins

Reply via email to