Changeset: b3a9d8848cce for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=b3a9d8848cce
Added Files:
monetdb5/extras/rdf/rdf_shredder.c
Removed Files:
monetdb5/extras/rdf/rdf_shredder.mx
Modified Files:
monetdb5/extras/rdf/Makefile.ag
Branch: default
Log Message:
Another mx file down the drain.
diffs (truncated from 1280 to 300 lines):
diff --git a/monetdb5/extras/rdf/Makefile.ag b/monetdb5/extras/rdf/Makefile.ag
--- a/monetdb5/extras/rdf/Makefile.ag
+++ b/monetdb5/extras/rdf/Makefile.ag
@@ -28,7 +28,7 @@ MTSAFE
lib_rdf = {
MODULE
DIR = libdir/monetdb5
- SOURCES = rdf.h rdf_shredder.mx rdfalgebra.c
+ SOURCES = rdf.h rdf_shredder.c rdfalgebra.c
LIBS = ../../tools/libmonetdb5 \
../../../gdk/libbat \
diff --git a/monetdb5/extras/rdf/rdf_shredder.c
b/monetdb5/extras/rdf/rdf_shredder.c
new file mode 100644
--- /dev/null
+++ b/monetdb5/extras/rdf/rdf_shredder.c
@@ -0,0 +1,636 @@
+/*
+ * The contents of this file are subject to the MonetDB Public License
+ * Version 1.1 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ * http://www.monetdb.org/Legal/MonetDBLicense
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * The Original Code is the MonetDB Database System.
+ *
+ * The Initial Developer of the Original Code is CWI.
+ * Portions created by CWI are Copyright (C) 1997-July 2008 CWI.
+ * Copyright August 2008-2013 MonetDB B.V.
+ * All Rights Reserved.
+*/
+/*
+ * (author) L.Sidirourgos
+ *
+ * Shredder for RDF Documents
+ */
+#include "monetdb_config.h"
+#include "mal_exception.h"
+#include "url.h"
+#include "tokenizer.h"
+#include <gdk.h>
+#include <rdf.h>
+#include <raptor.h>
+
+typedef struct graphBATdef {
+ graphBATType batType; /* BAT type */
+ str name; /* name of the BAT */
+ int headType; /* type of left column */
+ int tailType; /* type of right column */
+} graphBATdef;
+
+static BUN batsz = 10000000;
+
+/* this list should be kept alligned with the graphBATType enum */
+#if STORE == TRIPLE_STORE
+ static graphBATdef graphdef[N_GRAPH_BAT] = {
+ {S_sort, "_s_sort", TYPE_void, TYPE_oid},
+ {P_sort, "_p_sort", TYPE_void, TYPE_oid},
+ {O_sort, "_o_sort", TYPE_void, TYPE_oid},
+
+ {P_PO, "_p_po", TYPE_void, TYPE_oid},
+ {O_PO, "_o_po", TYPE_void, TYPE_oid},
+ {P_OP, "_p_op", TYPE_void, TYPE_oid},
+ {O_OP, "_o_op", TYPE_void, TYPE_oid},
+
+ {S_SO, "_s_so", TYPE_void, TYPE_oid},
+ {O_SO, "_o_so", TYPE_void, TYPE_oid},
+ {S_OS, "_s_os", TYPE_void, TYPE_oid},
+ {O_OS, "_o_os", TYPE_void, TYPE_oid},
+
+ {S_SP, "_s_sp", TYPE_void, TYPE_oid},
+ {P_SP, "_p_sp", TYPE_void, TYPE_oid},
+ {S_PS, "_s_ps", TYPE_void, TYPE_oid},
+ {P_PS, "_p_ps", TYPE_void, TYPE_oid},
+
+ {MAP_LEX, "_map_lex", TYPE_void, TYPE_str}
+ };
+#elif STORE == MLA_STORE
+ static graphBATdef graphdef[N_GRAPH_BAT] = {
+ {S_sort, "_s_sort", TYPE_void, TYPE_oid},
+ {P_sort, "_p_sort", TYPE_void, TYPE_oid},
+ {O_sort, "_o_sort", TYPE_void, TYPE_oid},
+ {MAP_LEX, "_map_lex", TYPE_void, TYPE_str}
+ };
+#endif /* STORE */
+
+typedef struct parserData {
+ /**PROPERTIES */
+ str location; /* rdf data file location */
+ oid tcount; /* triple count */
+ raptor_parser *rparser; /* the parser object */
+ /**ERROR HANDLING */
+ int exception; /* raise an exception */
+ int warning; /* number of warning msgs */
+ int error; /* number of error msgs */
+ int fatal; /* number of fatal msgs */
+ const char *exceptionMsg; /* exception msgs */
+ const char *warningMsg; /* warning msgs */
+ const char *errorMsg; /* error msgs */
+ const char *fatalMsg; /* fatal msgs */
+ int line; /* locator for errors */
+ int column; /* locator for errors */
+ /**GRAPH DATA */
+ BAT **graph; /* BATs for the result
+ shredded RDF graph */
+} parserData;
+
+/*
+ * The (fatal) errors and warnings produced by the raptor parser are handled
+ * by the next three message handler functions.
+ */
+#define raptor_exception(P,M) \
+P->exception++;\
+P->exceptionMsg = M;\
+raptor_parse_abort (P->rparser);
+
+static void
+fatalHandler (void *user_data, raptor_locator* locator,
+ const char *message)
+{
+ parserData *pdata = (parserData *) user_data;
+ pdata->fatalMsg = GDKstrdup(message);
+ mnstr_printf(GDKout, "rdflib: fatal:%s\n", pdata->fatalMsg);
+ pdata->fatal++;
+
+ /* check for a valid locator object and only then use it */
+ if (locator != NULL) {
+ pdata->line = locator->line;
+ pdata->column = locator->column;
+ } else {
+ }
+}
+
+errorHandler (void *user_data, raptor_locator* locator,
+ const char *message)
+{
+ parserData *pdata = (parserData *) user_data;
+ pdata->errorMsg = GDKstrdup(message);
+ mnstr_printf(GDKout, "rdflib: error:%s\n", pdata->errorMsg);
+ pdata->error++;
+
+ /* check for a valid locator object and only then use it */
+ if (locator != NULL) {
+ pdata->line = locator->line;
+ pdata->column = locator->column;
+ } else {
+ }
+}
+
+warningHandler (void *user_data, raptor_locator* locator,
+ const char *message)
+{
+ parserData *pdata = (parserData *) user_data;
+ pdata->warningMsg = GDKstrdup(message);
+ mnstr_printf(GDKout, "rdflib: warning:%s\n", pdata->warningMsg);
+ pdata->warning++;
+
+ /* check for a valid locator object and only then use it */
+ if (locator != NULL) {
+ pdata->line = locator->line;
+ pdata->column = locator->column;
+ } else {
+ }
+}
+
+
+/*
+ * The raptor parser needs to register a callback function that handles one
triple
+ * at a time. Function rdf_parser_triple_handler() does exactly this.
+ */
+
+#define rdf_BUNappend_unq(X,Y)\
+bun = BUNfnd(BATmirror(X),(ptr)Y);\
+if (bun == BUN_NONE) {\
+ if (BATcount(X) > 4 * X->T->hash->mask) {\
+ HASHdestroy(X);\
+ BAThash(BATmirror(X), 2*BATcount(X));\
+ }\
+ bun = (BUN) X->batCount;\
+ X = BUNappend(X, (ptr)Y, TRUE);\
+ if (X == NULL) {\
+ raptor_exception(pdata, "could not append");\
+ }\
+}
+
+#define rdf_BUNappend(X,Y) \
+{X = BUNappend(X, Y, TRUE);}\
+if (X == NULL) {\
+ raptor_exception(pdata, "could not append");\
+}
+
+static void
+tripleHandler(void* user_data, const raptor_statement* triple)
+{
+ parserData *pdata = ((parserData *) user_data);
+ BUN bun = BUN_NONE;
+ BAT **graph = pdata->graph;
+
+ if (triple->subject_type == RAPTOR_IDENTIFIER_TYPE_RESOURCE
+ || triple->subject_type ==
RAPTOR_IDENTIFIER_TYPE_ANONYMOUS) {
+#ifdef _TKNZR_H
+{
+ str t = (str)triple->subject;
+ TKNZRappend(&bun,&t);
+}
+#else
+ rdf_BUNappend_unq(graph[MAP_LEX], (str)triple->sibject);
+#endif
+ rdf_BUNappend(graph[S_sort], &bun);
+ bun = BUN_NONE;
+ } else {
+ raptor_exception(pdata, "could not determine type of subject");
+ }
+
+ if (triple->predicate_type == RAPTOR_IDENTIFIER_TYPE_RESOURCE) {
+#ifdef _TKNZR_H
+{
+ str t = (str)triple->predicate;
+ TKNZRappend(&bun,&t);
+}
+#else
+ rdf_BUNappend_unq(pdate, (str)triple->predicate);
+#endif
+ rdf_BUNappend(graph[P_sort], &bun);
+ bun = BUN_NONE;
+ } else {
+ raptor_exception(pdata, "could not determine type of property");
+ }
+
+ if (triple->object_type == RAPTOR_IDENTIFIER_TYPE_RESOURCE
+ || triple->object_type ==
RAPTOR_IDENTIFIER_TYPE_ANONYMOUS) {
+#ifdef _TKNZR_H
+{
+ str t = (str)triple->object;
+ TKNZRappend(&bun,&t);
+}
+#else
+ rdf_BUNappend_unq(graph[MAP_LEX], (str)triple->object);
+#endif
+ rdf_BUNappend(graph[O_sort], &bun);
+ bun = BUN_NONE;
+ } else if (triple->object_type == RAPTOR_IDENTIFIER_TYPE_LITERAL) {
+ bun = BUNfnd(BATmirror(graph[MAP_LEX]),(ptr)triple->object);
+ if (bun == BUN_NONE) {
+ if (graph[MAP_LEX]->T->hash && BATcount(graph[MAP_LEX])
> 4 * graph[MAP_LEX]->T->hash->mask) {
+ HASHdestroy(graph[MAP_LEX]);
+ BAThash(BATmirror(graph[MAP_LEX]),
2*BATcount(graph[MAP_LEX]));
+ }
+ bun = (BUN) ((graph[MAP_LEX])->hseqbase +
(graph[MAP_LEX])->batCount);
+ graph[MAP_LEX] = BUNappend(graph[MAP_LEX],
(ptr)triple->object, TRUE);
+ if (graph[MAP_LEX] == NULL) {
+ raptor_exception(pdata, "could not append
ingraph[MAP_LEX]");
+ }
+ } else {
+ bun = (graph[MAP_LEX])->hseqbase + bun;
+ }
+
+ rdf_BUNappend(graph[O_sort], &bun);
+ bun = BUN_NONE;
+ } else {
+ raptor_exception(pdata, "could not determine type of object");
+ }
+
+ pdata->tcount++;
+
+ return;
+}
+
+/*
+ * Function RDFParser() is the entry point to parse an RDF document.
+ */
+/* creates a BAT for the triple table */
+static BAT*
+create_BAT(int ht, int tt, int size)
+{
+ BAT *b = BATnew(ht, tt, size);
+ if (b == NULL) {
+ return b;
+ }
+ BATseqbase(b, 0);
+
+ /* disable all properties */
+ b->tsorted = FALSE;
+ b->tdense = FALSE;
+ b->tkey = FALSE;
+ b->hdense = TRUE;
+
+ return b;
+}
+
+static parserData*
+parserData_create (str location, BAT** graph)
+{
+ int i;
+
+ parserData *pdata = (parserData *) GDKmalloc(sizeof(parserData));
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list