Update of /cvsroot/monetdb/pathfinder/runtime
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv19804/runtime
Modified Files:
pathfinder.mx pf_support.mx shredder.mx
Log Message:
propagated changes of Wednesday Jun 06 2007 - Wednesday Jun 06 2007
from the XQuery_0-18 branch to the development trunk
Index: pathfinder.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/runtime/pathfinder.mx,v
retrieving revision 1.368
retrieving revision 1.369
diff -u -d -r1.368 -r1.369
--- pathfinder.mx 3 Jun 2007 20:21:58 -0000 1.368
+++ pathfinder.mx 6 Jun 2007 20:19:29 -0000 1.369
@@ -3467,6 +3467,10 @@
if (count(selidx_colname) > 0)
selidx_colname := reverse(reverse(idx_colnames).kunique().sort());
+ var chk := select(idx_names.histogram(),2,int_nil);
+ if (count(chk) > 0)
+ ERROR("A document named '%s' is added more than once (%d such
errors).", reverse(chk).fetch(0), count(chk));
+
lock_set(pf_short);
err := CATCH(pivot := _shred_doc_base(selidx_colname, idx_names,
idx_colnames, wsid));
lock_unset(pf_short);
Index: pf_support.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/runtime/pf_support.mx,v
retrieving revision 1.244
retrieving revision 1.245
diff -u -d -r1.244 -r1.245
--- pf_support.mx 29 May 2007 13:57:55 -0000 1.244
+++ pf_support.mx 6 Jun 2007 20:19:31 -0000 1.245
@@ -1564,15 +1564,7 @@
min_iter := min(iter);
max_iter := max(iter);
- # pre-sort input
- if ( and(order,1) = 0 ) {
- var ord := item.tsort();
- ord := ord.CTrefine(iter).mark([EMAIL PROTECTED]).reverse();
- iter := ord.leftfetchjoin(iter);
- item := ord.leftfetchjoin(item);
- iter := iter.chk_order();
- item := item.chk_order();
- }
+ @:pre_sort_input@
# the actual location step
if ( isnil(result) ) {
@@ -1605,6 +1597,17 @@
returns all nodes on the @1 axis of the ctx-nodes duplicate free for each
group.",
"pf_support");
@
[EMAIL PROTECTED] pre_sort_input
+ # pre-sort input
+ if ( and(order,1) = 0 ) {
+ var ord := item.tsort();
+ ord := ord.CTrefine(iter).mark([EMAIL PROTECTED]).reverse();
+ iter := ord.leftfetchjoin(iter);
+ item := ord.leftfetchjoin(item);
+ iter := iter.chk_order();
+ item := item.chk_order();
+ }
+@
@= post_sort_output
# post-sort output
if ( (and(order,2) = 2) and not(ordered(reverse(result.fetch(1)))) ) {
@@ -1712,9 +1715,11 @@
@= upwards
PROC @1(BAT[void,oid] iter, BAT[void,oid] ctx, oid cont, BAT[void,bat] ws, int
order) : BAT[void,bat]
{
- # "order" is not (yet?) used, here.
var pre_sizes := ws.fetch(PRE_SIZE).fetch(cont);
var pre_levels := ws.fetch(PRE_LEVEL).fetch(cont);
+
+ # If ctx is not sorted on tail, [EMAIL PROTECTED] will internally sort ctx
and re-order iter accordingly.
+
var res := [EMAIL PROTECTED](iter.chk_order(), ctx.chk_order(), pre_sizes,
pre_levels);
var result := new(void,bat,2).seqbase([EMAIL
PROTECTED]).append(hmark(res,[EMAIL PROTECTED])).append(tmark(res,[EMAIL
PROTECTED])).access(BAT_READ);
@@ -1727,7 +1732,6 @@
@= ll_prec_foll_impl
PROC @1(BAT[void,oid] iter, BAT[void,oid] item, oid cont, BAT[void,bat] ws,
int order, chr kind_test) : BAT[void,bat]
{
- # "order" is not (yet?) used, here.
@:foll_prec_code@
var pre_sizes := ws.fetch(PRE_SIZE).fetch(cont);
var pre_kinds;
@@ -1737,6 +1741,11 @@
} else {
pre_kinds := ws.fetch(PRE_KIND).fetch(cont);
}
+
+ iter := iter.chk_order();
+ item := item.chk_order();
+ @:pre_sort_input@
+
var res := [EMAIL PROTECTED](iter.chk_order(), item.chk_order(), doc_pre,
pre_sizes, pre_kinds, kind_test);
var result := new(void,bat,2).seqbase([EMAIL
PROTECTED]).append(hmark(res,[EMAIL PROTECTED])).append(tmark(res,[EMAIL
PROTECTED])).access(BAT_READ);
Index: shredder.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/runtime/shredder.mx,v
retrieving revision 1.127
retrieving revision 1.128
diff -u -d -r1.127 -r1.128
--- shredder.mx 6 Jun 2007 12:16:35 -0000 1.127
+++ shredder.mx 6 Jun 2007 20:19:31 -0000 1.128
@@ -648,7 +648,7 @@
* misc shredder helper functions
* - handle_xml_chars() buffered string reader
* - handle_pfx_uri_loc() administer double-eliminated use of Q-Names
- * - handle_ext_subset() handle a DTD (external subset), needed to identify
ID/IDREF attrs
+ * - handle_externalSubset() handle a DTD (external subset), needed to
identify ID/IDREF attrs
*
====================================================================================
*/
static int
@@ -720,9 +720,10 @@
}
static int
-handle_ext_subset(shredCtxStruct *shredCtx,
+handle_externalSubset(shredCtxStruct *shredCtx,
xmlDtdPtr dtd)
{
+ xmlParserCtxtPtr ctx = shredCtx->xmlCtx;
struct _xmlNode *p = dtd->children;
while(p) {
@@ -740,13 +741,24 @@
default:
break; /* ignore */
}
- }
+ } else if (p->type == XML_ENTITY_DECL) {
+ xmlEntityPtr ep =(xmlEntityPtr)p;
+ if ( ! xmlAddDtdEntity(
+ ctx->myDoc,
+ ep->name,
+ ep->type,
+ ep->ExternalID,
+ ep->SystemID,
+ ep->content)
+ ) {
+ GDKerror("fail to install ENTITY(\"%s\")\n",ep->name);
+ }
+ }
p = p->next;
}
return GDK_SUCCEED;
}
-
/*
====================================================================================
* SAX2 callback functions
* - shred_start_document()
@@ -1168,22 +1180,29 @@
const xmlChar *ExternalID,
const xmlChar *SystemID)
{
- shredCtxStruct *shredCtx = (shredCtxStruct*) xmlCtx;
(void)name;
+
+ shredCtxStruct *shredCtx = (shredCtxStruct*)xmlCtx;
+ xmlParserCtxtPtr ctx = ((shredCtxStruct*) xmlCtx)->xmlCtx;
#ifdef ADB_DEBUG
- stream_printf(GDKout, "shred_external_subset(ctx, %s, %s, %s)\n", name,
ExternalID, SystemID);
+ stream_printf(GDKerr, "shred_external_subset(ctx, %s, %s, %s)\n", name,
ExternalID, SystemID);
#endif
- if (ExternalID || SystemID) {
- /* INCOMPLETE, this part only works with absolute filenames or
- * files located in the current working directory! A bit more
- * inteligence could be applied here.
- */
+ if (ExternalID || SystemID) { /* ignore dummy calls */
xmlDtdPtr dtd;
if ((dtd=xmlParseDTD(ExternalID, SystemID))) {
- if (!handle_ext_subset(shredCtx, dtd))
+ if ( !ctx->myDoc ) {
+ /* Mysteriously no myDoc is created by libxml2. For internal
+ * subsets this is done automatically. For external subsets
+ * it has to be done by hand. The parsed dtd is the external
+ * subset for this doc
+ */
+ ctx->myDoc = xmlNewDoc(ctx->version);
+ ctx->myDoc->extSubset = dtd;
+ }
+ if (!handle_externalSubset(shredCtx, dtd))
stream_printf(GDKout, "!WARNING: xmlParseDTD(\"%s\") failed,
skipping ID/IDREF information.\n", SystemID);
- xmlFreeDtd(dtd);
+ // xmlFreeDtd(dtd); should be done by freeer of myDoc
} else {
GDKerror("shred_external_subset: WARNING: xmlParseDTD(\"%s\")
FAILED, NO ID/IDREF QUERIES\n", SystemID);
GDKerror("shred_external_subset: NOTE : maybe using absolute
filenames works, sorry!\n");
@@ -1205,39 +1224,29 @@
#endif
xmlParserCtxtPtr ctx = ((shredCtxStruct*) xmlCtx)->xmlCtx;
/* lookup the entity in the document entity hash table */
- return xmlGetDocEntity(ctx->myDoc,name);
+ /* maybe we should use xmlGetDtdEntity in case of failure */
+ xmlEntityPtr res = xmlGetDocEntity(ctx->myDoc,name);
+#if 0
+ if ( res ) {
+ stream_printf(GDKout,"#!found ENTITY \"%s\"\n",name);
+ } else {
+ stream_printf(GDKout,"#!cannot find ENTITY \"%s\"\n",name);
+ }
+#endif
/* QUESTION: xmlGetDtdEntity() and xmlGetParameterEntity() were also
* possible, whats the diff between the doc/dtd versions, they both
* seem to work. */
+ return res;
}
-#if 0
-/* My first try at building an entity table but this one was not necessary
- * because the internal subset table was already build.
- */
-static void
-shred_entityDecl(void *xmlCtx,
- const xmlChar *name,
- int type,
- const xmlChar *publicId,
- const xmlChar *systemId,
- xmlChar *content)
-{
- xmlParserCtxtPtr ctx = ((shredCtxStruct*) xmlCtx)->xmlCtx;
- if ( ! xmlAddDtdEntity(ctx->myDoc,name,type,publicId,systemId,content) )
- stream_printf(GDKerr,"shred_entityDecl(ctx,\"%s\") FAIL\n",name);
-}
-#endif
-
-
-/*
====================================================================================
+/* ====================================================================
* the shredder and its data structures
* - shredder_create() create all data structures
* - shredder_parse() invoke the libxml2 SAX2 parser
* - shredder_stats() print some statistics (off by default)
* - shredder_finalize() finish the bats in case of a succesful parse
* - shredder_free() provides *atomic* cleanup
- *
====================================================================================
*/
+ * ==================================================================== */
/**
* SAX callback table.
@@ -1258,7 +1267,7 @@
, .hasExternalSubset = 0
, .resolveEntity = 0
, .getEntity = shred_getEntity
- , .entityDecl = 0
+ , .entityDecl = 0 // shred_entityDecl
, .notationDecl = 0
, .attributeDecl = shred_attribute_def
, .elementDecl = 0
@@ -1330,6 +1339,7 @@
* TODO: how to prevent expansion of entities?
*/
xmlCtx = xmlCreateURLParserCtxt(location,
+ XML_PARSE_DTDLOAD|
XML_PARSE_XINCLUDE|
XML_PARSE_NOXINCNODE);
}
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins