Changeset: b3a9d8848cce for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=b3a9d8848cce
Added Files:
        monetdb5/extras/rdf/rdf_shredder.c
Removed Files:
        monetdb5/extras/rdf/rdf_shredder.mx
Modified Files:
        monetdb5/extras/rdf/Makefile.ag
Branch: default
Log Message:

Another mx file down the drain.


diffs (truncated from 1280 to 300 lines):

diff --git a/monetdb5/extras/rdf/Makefile.ag b/monetdb5/extras/rdf/Makefile.ag
--- a/monetdb5/extras/rdf/Makefile.ag
+++ b/monetdb5/extras/rdf/Makefile.ag
@@ -28,7 +28,7 @@ MTSAFE
 lib_rdf = {
        MODULE
        DIR = libdir/monetdb5
-       SOURCES = rdf.h rdf_shredder.mx rdfalgebra.c
+       SOURCES = rdf.h rdf_shredder.c rdfalgebra.c
 
        LIBS = ../../tools/libmonetdb5 \
                   ../../../gdk/libbat \
diff --git a/monetdb5/extras/rdf/rdf_shredder.c 
b/monetdb5/extras/rdf/rdf_shredder.c
new file mode 100644
--- /dev/null
+++ b/monetdb5/extras/rdf/rdf_shredder.c
@@ -0,0 +1,636 @@
+/*
+ * The contents of this file are subject to the MonetDB Public License
+ * Version 1.1 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ * http://www.monetdb.org/Legal/MonetDBLicense
+ * 
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ * 
+ * The Original Code is the MonetDB Database System.
+ * 
+ * The Initial Developer of the Original Code is CWI.
+ * Portions created by CWI are Copyright (C) 1997-July 2008 CWI.
+ * Copyright August 2008-2013 MonetDB B.V.
+ * All Rights Reserved.
+*/
+/*
+ * (author) L.Sidirourgos
+ *
+ * Shredder for RDF Documents
+ */
+#include "monetdb_config.h"
+#include "mal_exception.h"
+#include "url.h"
+#include "tokenizer.h"
+#include <gdk.h>
+#include <rdf.h>
+#include <raptor.h>
+
+typedef struct graphBATdef {
+       graphBATType batType;    /* BAT type             */
+       str name;                /* name of the BAT      */
+       int headType;            /* type of left column  */
+       int tailType;            /* type of right column */
+} graphBATdef;
+
+static BUN batsz = 10000000;
+
+/* this list should be kept alligned with the graphBATType enum */
+#if STORE == TRIPLE_STORE
+ static graphBATdef graphdef[N_GRAPH_BAT] = {
+       {S_sort,   "_s_sort",   TYPE_void, TYPE_oid},
+       {P_sort,   "_p_sort",   TYPE_void, TYPE_oid},
+       {O_sort,   "_o_sort",   TYPE_void, TYPE_oid},
+
+       {P_PO,     "_p_po",     TYPE_void, TYPE_oid},
+       {O_PO,     "_o_po",     TYPE_void, TYPE_oid},
+       {P_OP,     "_p_op",     TYPE_void, TYPE_oid},
+       {O_OP,     "_o_op",     TYPE_void, TYPE_oid},
+
+       {S_SO,     "_s_so",     TYPE_void, TYPE_oid},
+       {O_SO,     "_o_so",     TYPE_void, TYPE_oid},
+       {S_OS,     "_s_os",     TYPE_void, TYPE_oid},
+       {O_OS,     "_o_os",     TYPE_void, TYPE_oid},
+
+       {S_SP,     "_s_sp",     TYPE_void, TYPE_oid},
+       {P_SP,     "_p_sp",     TYPE_void, TYPE_oid},
+       {S_PS,     "_s_ps",     TYPE_void, TYPE_oid},
+       {P_PS,     "_p_ps",     TYPE_void, TYPE_oid},
+
+       {MAP_LEX, "_map_lex",   TYPE_void, TYPE_str}
+ };
+#elif STORE == MLA_STORE
+ static graphBATdef graphdef[N_GRAPH_BAT] = {
+       {S_sort,   "_s_sort",   TYPE_void, TYPE_oid},
+       {P_sort,   "_p_sort",   TYPE_void, TYPE_oid},
+       {O_sort,   "_o_sort",   TYPE_void, TYPE_oid},
+       {MAP_LEX, "_map_lex",   TYPE_void, TYPE_str}
+ };
+#endif /* STORE */
+
+typedef struct parserData {
+                                     /**PROPERTIES             */
+       str location;                 /* rdf data file location */
+       oid tcount;                   /* triple count           */
+       raptor_parser *rparser;       /* the parser object      */
+                                     /**ERROR HANDLING         */
+       int exception;                /* raise an exception     */
+       int warning;                  /* number of warning msgs */
+       int error;                    /* number of error   msgs */
+       int fatal;                    /* number of fatal   msgs */
+       const char *exceptionMsg;     /* exception msgs         */
+       const char *warningMsg;       /* warning msgs           */
+       const char *errorMsg;         /* error   msgs           */
+       const char *fatalMsg;         /* fatal   msgs           */
+       int line;                     /* locator for errors     */
+       int column;                   /* locator for errors     */
+                                     /**GRAPH DATA             */
+       BAT **graph;                  /* BATs for the result
+                                        shredded RDF graph     */
+} parserData;
+
+/*
+ * The (fatal) errors and warnings produced by the raptor parser are handled
+ * by the next three message handler functions.
+ */
+#define raptor_exception(P,M) \
+P->exception++;\
+P->exceptionMsg = M;\
+raptor_parse_abort (P->rparser);
+
+static void
+fatalHandler (void *user_data, raptor_locator* locator,
+               const char *message)
+{
+       parserData *pdata = (parserData *) user_data;
+       pdata->fatalMsg = GDKstrdup(message);
+       mnstr_printf(GDKout, "rdflib: fatal:%s\n", pdata->fatalMsg);
+       pdata->fatal++;
+
+       /* check for a valid locator object and only then use it */
+       if (locator != NULL) {
+               pdata->line = locator->line;
+               pdata->column = locator->column;
+       } else {
+       }
+}
+
+errorHandler (void *user_data, raptor_locator* locator,
+               const char *message)
+{
+       parserData *pdata = (parserData *) user_data;
+       pdata->errorMsg = GDKstrdup(message);
+       mnstr_printf(GDKout, "rdflib: error:%s\n", pdata->errorMsg);
+       pdata->error++;
+
+       /* check for a valid locator object and only then use it */
+       if (locator != NULL) {
+               pdata->line = locator->line;
+               pdata->column = locator->column;
+       } else {
+       }
+}
+
+warningHandler (void *user_data, raptor_locator* locator,
+               const char *message)
+{
+       parserData *pdata = (parserData *) user_data;
+       pdata->warningMsg = GDKstrdup(message);
+       mnstr_printf(GDKout, "rdflib: warning:%s\n", pdata->warningMsg);
+       pdata->warning++;
+
+       /* check for a valid locator object and only then use it */
+       if (locator != NULL) {
+               pdata->line = locator->line;
+               pdata->column = locator->column;
+       } else {
+       }
+}
+
+
+/*
+ * The raptor parser needs to register a callback function that handles one 
triple
+ * at a time. Function rdf_parser_triple_handler() does exactly this.
+ */
+
+#define rdf_BUNappend_unq(X,Y)\
+bun = BUNfnd(BATmirror(X),(ptr)Y);\
+if (bun == BUN_NONE) {\
+       if (BATcount(X) > 4 * X->T->hash->mask) {\
+               HASHdestroy(X);\
+               BAThash(BATmirror(X), 2*BATcount(X));\
+       }\
+       bun = (BUN) X->batCount;\
+       X = BUNappend(X, (ptr)Y, TRUE);\
+       if (X == NULL) {\
+               raptor_exception(pdata, "could not append");\
+       }\
+}
+
+#define rdf_BUNappend(X,Y) \
+{X = BUNappend(X, Y, TRUE);}\
+if (X  == NULL) {\
+       raptor_exception(pdata, "could not append");\
+}
+
+static void
+tripleHandler(void* user_data, const raptor_statement* triple)
+{
+       parserData *pdata = ((parserData *) user_data);
+       BUN bun = BUN_NONE;
+       BAT **graph = pdata->graph;
+
+       if (triple->subject_type == RAPTOR_IDENTIFIER_TYPE_RESOURCE
+                       || triple->subject_type == 
RAPTOR_IDENTIFIER_TYPE_ANONYMOUS) {
+#ifdef _TKNZR_H
+{
+       str t = (str)triple->subject;
+       TKNZRappend(&bun,&t);
+}
+#else
+                rdf_BUNappend_unq(graph[MAP_LEX], (str)triple->sibject);
+#endif
+               rdf_BUNappend(graph[S_sort], &bun);
+               bun = BUN_NONE;
+       } else {
+               raptor_exception(pdata, "could not determine type of subject");
+       }
+
+       if (triple->predicate_type == RAPTOR_IDENTIFIER_TYPE_RESOURCE) {
+#ifdef _TKNZR_H
+{
+       str t = (str)triple->predicate;
+       TKNZRappend(&bun,&t);
+}
+#else
+                rdf_BUNappend_unq(pdate, (str)triple->predicate);
+#endif
+               rdf_BUNappend(graph[P_sort], &bun);
+               bun = BUN_NONE;
+       } else {
+               raptor_exception(pdata, "could not determine type of property");
+       }
+
+       if (triple->object_type == RAPTOR_IDENTIFIER_TYPE_RESOURCE
+                       || triple->object_type == 
RAPTOR_IDENTIFIER_TYPE_ANONYMOUS) {
+#ifdef _TKNZR_H
+{
+       str t = (str)triple->object;
+       TKNZRappend(&bun,&t);
+}
+#else
+        rdf_BUNappend_unq(graph[MAP_LEX], (str)triple->object);
+#endif
+               rdf_BUNappend(graph[O_sort], &bun);
+               bun = BUN_NONE;
+       } else if (triple->object_type == RAPTOR_IDENTIFIER_TYPE_LITERAL) {
+               bun = BUNfnd(BATmirror(graph[MAP_LEX]),(ptr)triple->object);
+               if (bun == BUN_NONE) {
+                       if (graph[MAP_LEX]->T->hash && BATcount(graph[MAP_LEX]) 
> 4 * graph[MAP_LEX]->T->hash->mask) {
+                               HASHdestroy(graph[MAP_LEX]);
+                               BAThash(BATmirror(graph[MAP_LEX]), 
2*BATcount(graph[MAP_LEX]));
+                       }
+                       bun = (BUN) ((graph[MAP_LEX])->hseqbase + 
(graph[MAP_LEX])->batCount);
+                       graph[MAP_LEX] = BUNappend(graph[MAP_LEX], 
(ptr)triple->object, TRUE);
+                       if (graph[MAP_LEX] == NULL) {
+                               raptor_exception(pdata, "could not append 
ingraph[MAP_LEX]");
+                       }
+               } else {
+                       bun = (graph[MAP_LEX])->hseqbase + bun;
+               }
+
+               rdf_BUNappend(graph[O_sort], &bun);
+               bun = BUN_NONE;
+       } else {
+               raptor_exception(pdata, "could not determine type of object");
+       }
+
+       pdata->tcount++;
+
+       return;
+}
+
+/*
+ * Function RDFParser() is the entry point to parse an RDF document.
+ */
+/* creates a BAT for the triple table */
+static BAT*
+create_BAT(int ht, int tt, int size)
+{
+       BAT *b = BATnew(ht, tt, size);
+       if (b == NULL) {
+               return b;
+       }
+       BATseqbase(b, 0);
+
+       /* disable all properties */
+       b->tsorted = FALSE;
+       b->tdense = FALSE;
+       b->tkey = FALSE;
+       b->hdense = TRUE;
+
+       return b;
+}
+
+static parserData*
+parserData_create (str location, BAT** graph)
+{
+       int i;
+
+       parserData *pdata = (parserData *) GDKmalloc(sizeof(parserData));
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to