Changeset: 146b0a7a1b66 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=146b0a7a1b66
Added Files:
        monetdb5/extras/rdf/30_rdf.mal
        monetdb5/extras/rdf/rdfalgebra.c
        monetdb5/extras/rdf/rdfalgebra.mal
Modified Files:
        .hgtags
        monetdb5/extras/rdf/Makefile.ag
        monetdb5/extras/rdf/rdf.h
        monetdb5/extras/rdf/rdf_shredder.mx
        monetdb5/modules/mal/tokenizer.c
        monetdb5/modules/mal/tokenizer.h
        monetdb5/modules/mal/tokenizer.mal
        sql/backends/monet5/sql.mx
Branch: default
Log Message:

Merged lodrdf branch into default

lodrdf has landed on default


diffs (truncated from 1245 to 300 lines):

diff --git a/.hgtags b/.hgtags
--- a/.hgtags
+++ b/.hgtags
@@ -502,3 +502,4 @@ f21bc494423c56e9cb68bae0b400e1f2a6d0c2bc
 f8e913e56b99223764b1c22f526087d6bcad2656 Jul2012_SP2_release
 2d08000a18ddbe8f2cc3f12e09b6def8e53ae53a Oct2012_1
 4233be3c7f49f704e5ed466d2c1836c9dcfbb9f7 Oct2012_release
+d762ae2e83bd42a48aebf0ebb051a643f77f77d7 Lod2_D2_5_Deliverable_MonetDBRDF
diff --git a/monetdb5/extras/rdf/30_rdf.mal b/monetdb5/extras/rdf/30_rdf.mal
new file mode 100644
--- /dev/null
+++ b/monetdb5/extras/rdf/30_rdf.mal
@@ -0,0 +1,20 @@
+# The contents of this file are subject to the MonetDB Public License
+# Version 1.1 (the "License"); you may not use this file except in
+# compliance with the License. You may obtain a copy of the License at
+# http://www.monetdb.org/Legal/MonetDBLicense
+#
+# Software distributed under the License is distributed on an "AS IS"
+# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+# License for the specific language governing rights and limitations
+# under the License.
+#
+# The Original Code is the MonetDB Database System.
+#
+# The Initial Developer of the Original Code is CWI.
+# Portions created by CWI are Copyright (C) 1997-July 2008 CWI.
+# Copyright August 2008-2012 MonetDB B.V.
+# All Rights Reserved.
+
+# This loads the MonetDB/RDF module
+include tokenizer;
+include rdfalgebra;
diff --git a/monetdb5/extras/rdf/Makefile.ag b/monetdb5/extras/rdf/Makefile.ag
--- a/monetdb5/extras/rdf/Makefile.ag
+++ b/monetdb5/extras/rdf/Makefile.ag
@@ -16,6 +16,7 @@
 # All Rights Reserved.
 
 INCLUDES = ../../modules/atoms ../../modules/mal ../../mal \
+                  ../../modules/kernel \
                   ../../../clients/mapilib \
                   ../../../common/options \
                   ../../../common/stream \
@@ -25,7 +26,26 @@ INCLUDES = ../../modules/atoms ../../mod
 MTSAFE
 
 lib_rdf = {
-       NOINST
+       MODULE
        DIR = libdir/monetdb5
-       SOURCES = rdf_shredder.mx rdf.h
+       SOURCES = rdf.h rdf_shredder.mx rdfalgebra.c
+
+       LIBS = ../../tools/libmonetdb5 \
+                  ../../../gdk/libbat \
+                  $(MALLOC_LIBS) $(raptor_LIBS)
 }
+
+headers_rdf_mal = {
+       HEADERS = mal
+       DIR = libdir/monetdb5
+       SOURCES = rdfalgebra.mal
+}
+
+headers_autoload = {
+       HEADERS = mal
+       DIR = libdir/monetdb5/autoload
+       SOURCES = 30_rdf.mal
+}
+
+#EXTRA_DIST_DIR = Tests
+EXTRA_DIST = 30_rdf.mal rdfalgebra.mal
diff --git a/monetdb5/extras/rdf/rdf.h b/monetdb5/extras/rdf/rdf.h
--- a/monetdb5/extras/rdf/rdf.h
+++ b/monetdb5/extras/rdf/rdf.h
@@ -27,6 +27,8 @@
 #ifndef _RDF_H_
 #define _RDF_H_
 
+#include <gdk.h>
+
 #ifdef WIN32
 #ifndef LIBRDF
 #define rdf_export extern __declspec(dllimport)
@@ -41,7 +43,13 @@
 #define _RDF_DEBUG
 
 rdf_export str
-RDFParser(int *retval, str *location, str *graphname, str *schemam);
+RDFParser(BAT **graph, str *location, str *graphname, str *schemam);
+
+rdf_export str 
+RDFleftfetchjoin_sortedestimate(int *result, int *lid, int *rid, lng 
*estimate);
+rdf_export str 
+RDFleftfetchjoin_sorted(int *result, int* lid, int *rid);
+
 
 #define TRIPLE_STORE 1
 #define MLA_STORE    2
diff --git a/monetdb5/extras/rdf/rdf_shredder.mx 
b/monetdb5/extras/rdf/rdf_shredder.mx
--- a/monetdb5/extras/rdf/rdf_shredder.mx
+++ b/monetdb5/extras/rdf/rdf_shredder.mx
@@ -92,7 +92,7 @@ typedef struct parserData {
        int line;                     /* locator for errors     */
        int column;                   /* locator for errors     */
                                      /**GRAPH DATA             */
-       BAT *graph[N_GRAPH_BAT];      /* BATs for the result
+       BAT **graph;                  /* BATs for the result
                                         shredded RDF graph     */
 } parserData;
 
@@ -146,7 +146,7 @@ static void
 @= rdf_BUNappend_unq_1
 bun = BUNfnd(BATmirror(@1),(ptr)@2);
 if (bun == BUN_NONE) {
-       if (BATcount(@1) > 4 * @1->T->hash->mask) {
+       if (@1->T->hash && BATcount(@1) > 4 * @1->T->hash->mask) {
                HASHdestroy(@1);
                BAThash(BATmirror(@1), 2*BATcount(@1));
        }
@@ -255,7 +255,6 @@ create_BAT(int ht, int tt, int size)
 
        /* disable all properties */
        b->tsorted = FALSE;
-       b->trevsorted = FALSE;
        b->tdense = FALSE;
        b->tkey = FALSE;
        b->hdense = TRUE;
@@ -264,7 +263,7 @@ create_BAT(int ht, int tt, int size)
 }
 
 static parserData*
-parserData_create (str location)
+parserData_create (str location, BAT** graph)
 {
        int i;
 
@@ -277,8 +276,9 @@ parserData_create (str location)
        pdata->error = 0;
        pdata->warning = 0;
        pdata->location = location;
+       pdata->graph = graph;
 
-       for (i = 0; i < N_GRAPH_BAT; i++) {
+       for (i = 0; i <= N_GRAPH_BAT; i++) {
                pdata->graph[i] = NULL;
        }
 
@@ -306,7 +306,7 @@ parserData_create (str location)
                return NULL;
        }
        /* MAP_LEX must have the key property */
-       BATseqbase(pdata->graph[MAP_LEX], 1 << 30);
+       BATseqbase(pdata->graph[MAP_LEX], RDF_MIN_LITERAL);
        pdata->graph[MAP_LEX]->tkey = BOUND2BTRUE;
        pdata->graph[MAP_LEX]->T->nokey[0] = 0;
        pdata->graph[MAP_LEX]->T->nokey[1] = 0;
@@ -329,7 +329,7 @@ parserData_create (str location)
  * example, it holds that S_SPO == S_SOP.
  */
 @= order
-@:order2(@1,@2,@3,@4)@
+@:order3(@1,@2,@3,@4)@
 
 @= order2
 if (!CTrefine(&ctref, @1, @2))         /* refine @2 given @1= sorted  */
@@ -347,6 +347,7 @@ BBPcold(graph[@3_@4]->batCacheid);
 /* free ctref */
 BBPreclaim(ctref);
 @
+
 @= order3
 if ( !(CTrefine(&map_oid, @1, @2)         /* refine @3 given @1= sorted  */
                && CTrefine(&ctref, map_oid, @3)))/* refine @4 given @3         
 */
@@ -385,19 +386,19 @@ post_processing (parserData *pdata)
 
        /* order MAP_LEX */
        BATorder(BATmirror(graph[MAP_LEX]));
-       map_oid = BATmark(graph[MAP_LEX], 1<<30);   /* BATmark will create a 
copy */
+       map_oid = BATmark(graph[MAP_LEX], RDF_MIN_LITERAL);   /* BATmark will 
create a copy */
        BATorder(map_oid);
        BATsetaccess(map_oid, BAT_READ);        /* force BAtmark not to copy 
bat */
-       map_oid = BATmirror(BATmark(BATmirror(map_oid), 1<<30));
+       map_oid = BATmirror(BATmark(BATmirror(map_oid), RDF_MIN_LITERAL));
        BATsetaccess(graph[MAP_LEX], BAT_READ); /* force BATmark not to copy 
bat */
-       graph[MAP_LEX] = BATmirror(BATmark(BATmirror(graph[MAP_LEX]), 1<<30));
+       graph[MAP_LEX] = BATmirror(BATmark(BATmirror(graph[MAP_LEX]), 
RDF_MIN_LITERAL));
 
        /* convert old oids of O_sort to new ones */
        bi = bat_iterator(graph[O_sort]);
        mi = bat_iterator(map_oid);
        BATloop(graph[O_sort], p, d) {
                bt = (oid *) BUNtloc(bi, p);
-               if (*bt >= (1 << 30)) {
+               if (*bt >= (RDF_MIN_LITERAL)) {
                        BUNfndVOID(r, mi, bt);
                        void_inplace(graph[O_sort], p, BUNtloc(mi, r), 1);
                }
@@ -484,6 +485,7 @@ raptor_free_parser(rparser);
 raptor_free_uri(uri);
 raptor_finish();
 @
+
 @= clean
 if (pdata != NULL) {
        for (iret = 0; iret < N_GRAPH_BAT; iret++) {
@@ -494,9 +496,11 @@ if (pdata != NULL) {
 }
 @
 @c
+#define RDF_CHUNK_SIZE 100*1024*1024
+
 /* Main RDF parser function that drives raptor */
 str
-RDFParser (int *retval, str *location, str *graphname, str *schema)
+RDFParser (BAT **graph, str *location, str *graphname, str *schema)
 {
        raptor_parser *rparser;
        parserData *pdata;
@@ -504,8 +508,6 @@ RDFParser (int *retval, str *location, s
        bit isURI;
        str ret;
        int iret;
-       BAT **graph;
-       BAT *retbat;
        (void) graphname;
 
        /* init tokenizer */
@@ -519,7 +521,7 @@ RDFParser (int *retval, str *location, s
 #endif
 
        /* Init pdata  */
-       pdata = parserData_create(*location);
+       pdata = parserData_create(*location,graph);
        if (pdata == NULL) {
 #ifdef _TKNZR_H
                TKNZRclose(&iret);
@@ -555,16 +557,38 @@ RDFParser (int *retval, str *location, s
                uri = raptor_new_uri((unsigned char *) pdata->location);
                iret = raptor_parse_uri(rparser, uri, NULL);
        } else {
-               uri = raptor_new_uri(
-                               
raptor_uri_filename_to_uri_string(pdata->location));
-               iret = raptor_parse_file(rparser, uri, NULL);
+               
+               /* Too slow loading --> use old code 
+               FILE *fp = fopen(pdata->location, "r");
+               char *buf = (char*) GDKmalloc(RDF_CHUNK_SIZE);
+               if (buf == NULL) {
+                       throw(RDF, "rdf.rdfShred",
+                               "could not allocate a %dMB file buffer\n", 
(int) (RDF_CHUNK_SIZE>>20));
+               }
+               uri = 
raptor_new_uri(raptor_uri_filename_to_uri_string(pdata->location));
+               iret = raptor_start_parse(rparser, uri);
+               while(fp && iret == 0) {
+                       ssize_t len = (ssize_t) fread(buf, 1, RDF_CHUNK_SIZE, 
fp);  
+                       iret = raptor_parse_chunk(rparser, (const unsigned 
char*) buf, (size_t) len, len < RDF_CHUNK_SIZE);
+               }
+               fclose(fp);
+               
+               */
+
+               /* does/may? not work on large files -- therefore the abpove 
chunked read
+                   iret = raptor_parse_file_stream(rparser, fp, 
pdata->location, uri); 
+                */
+
+               /* Old code */
+                uri = raptor_new_uri(
+                                
raptor_uri_filename_to_uri_string(pdata->location));
+                iret = raptor_parse_file(rparser, uri, NULL);
        }
        @:clean_raptor@
 #ifdef _TKNZR_H
        TKNZRclose(&iret);
 #endif
 
-       graph = pdata->graph;
        assert (pdata->tcount == BATcount(graph[S_sort]) &&
                        pdata->tcount == BATcount(graph[P_sort]) &&
                        pdata->tcount == BATcount(graph[O_sort]));
@@ -593,25 +617,6 @@ RDFParser (int *retval, str *location, s
                @:clean@
                throw(RDF, "rdf.rdfShred", "could not post-proccess data");
        }
-
-       /* prepare return bat of bats */
-/* XXX: BAT columns of TYPE_bat are no longer allowed: this function
- * needs to be rewritten to return multiple BATs instead of a single
- * BAT-of-batS */
-       retbat = BATnew(TYPE_void, TYPE_bat, N_GRAPH_BAT);
-       if (retbat == NULL) {
-               @:clean@
-               throw(RDF, "rdf.rdfShred",
-                               "could not allocate enough memory for return 
bat");
-       }
-       BATseqbase(retbat, 0);
-       for (iret = 0; iret < N_GRAPH_BAT; iret++) {
-               retbat = BUNappend(retbat, &graph[iret]->batCacheid, TRUE);
-               BBPunfix(graph[iret]->batCacheid);
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to