Changeset: da008339680a for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=da008339680a
Removed Files:
monetdb5/extras/rdf/rdf_shredder.mx
Modified Files:
monetdb5/extras/rdf/rdf_shredder.c
monetdb5/extras/rdf/rdfschema.c
Branch: rdf
Log Message:
Find the maximum super CS for each CS. (Tunning in RDF schema.)
diffs (truncated from 863 to 300 lines):
diff --git a/monetdb5/extras/rdf/rdf_shredder.c
b/monetdb5/extras/rdf/rdf_shredder.c
--- a/monetdb5/extras/rdf/rdf_shredder.c
+++ b/monetdb5/extras/rdf/rdf_shredder.c
@@ -264,18 +264,18 @@ getObjectType(unsigned char* objStr){
ObjectType obType;
if (strstr((const char*) objStr, "XMLSchema#date") != NULL){
obType = DATETIME;
- printf("%s: DateTime \n", objStr);
+ //printf("%s: DateTime \n", objStr);
}
else if (strstr((const char*) objStr, "XMLSchema#float") != NULL
|| strstr((const char*) objStr, "XMLSchema#integer") != NULL
)
{
obType = NUMERIC;
- printf("%s: Numeric \n", objStr);
+ //printf("%s: Numeric \n", objStr);
}
else {
obType = STRING;
- printf("%s: String \n", objStr);
+ //printf("%s: String \n", objStr);
}
return obType;
diff --git a/monetdb5/extras/rdf/rdf_shredder.mx
b/monetdb5/extras/rdf/rdf_shredder.mx
deleted file mode 100644
--- a/monetdb5/extras/rdf/rdf_shredder.mx
+++ /dev/null
@@ -1,707 +0,0 @@
-@/
-The contents of this file are subject to the MonetDB Public License
-Version 1.1 (the "License"); you may not use this file except in
-compliance with the License. You may obtain a copy of the License at
-http://www.monetdb.org/Legal/MonetDBLicense
-
-Software distributed under the License is distributed on an "AS IS"
-basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
-License for the specific language governing rights and limitations
-under the License.
-
-The Original Code is the MonetDB Database System.
-
-The Initial Developer of the Original Code is CWI.
-Portions created by CWI are Copyright (C) 1997-July 2008 CWI.
-Copyright August 2008-2013 MonetDB B.V.
-All Rights Reserved.
-@
-
-@f rdf_shredder
-
-@c
-/*
- * @a L.Sidirourgos, Minh-Duc Pham
- *
- * @+ Shredder for RDF Documents
- */
-#include "monetdb_config.h"
-#include "mal_exception.h"
-#include "url.h"
-#include "tokenizer.h"
-#include <gdk.h>
-#include <rdf.h>
-#include <raptor2.h>
-
-typedef struct graphBATdef {
- graphBATType batType; /* BAT type */
- str name; /* name of the BAT */
- int headType; /* type of left column */
- int tailType; /* type of right column */
-} graphBATdef;
-
-/* this list should be kept alligned with the graphBATType enum */
-#if STORE == TRIPLE_STORE
- static graphBATdef graphdef[N_GRAPH_BAT] = {
- {S_sort, "_s_sort", TYPE_void, TYPE_oid},
- {P_sort, "_p_sort", TYPE_void, TYPE_oid},
- {O_sort, "_o_sort", TYPE_void, TYPE_oid},
-
- {P_PO, "_p_po", TYPE_void, TYPE_oid},
- {O_PO, "_o_po", TYPE_void, TYPE_oid},
- {P_OP, "_p_op", TYPE_void, TYPE_oid},
- {O_OP, "_o_op", TYPE_void, TYPE_oid},
-
- {S_SO, "_s_so", TYPE_void, TYPE_oid},
- {O_SO, "_o_so", TYPE_void, TYPE_oid},
- {S_OS, "_s_os", TYPE_void, TYPE_oid},
- {O_OS, "_o_os", TYPE_void, TYPE_oid},
-
- {S_SP, "_s_sp", TYPE_void, TYPE_oid},
- {P_SP, "_p_sp", TYPE_void, TYPE_oid},
- {S_PS, "_s_ps", TYPE_void, TYPE_oid},
- {P_PS, "_p_ps", TYPE_void, TYPE_oid},
-
- {MAP_LEX, "_map_lex", TYPE_void, TYPE_str}
- };
-#elif STORE == MLA_STORE
- static graphBATdef graphdef[N_GRAPH_BAT] = {
- {S_sort, "_s_sort", TYPE_void, TYPE_oid},
- {P_sort, "_p_sort", TYPE_void, TYPE_oid},
- {O_sort, "_o_sort", TYPE_void, TYPE_oid},
- {MAP_LEX, "_map_lex", TYPE_void, TYPE_str}
- };
-#endif /* STORE */
-
-typedef struct parserData {
- /**PROPERTIES */
- str location; /* rdf data file location */
- oid tcount; /* triple count */
- raptor_parser *rparser; /* the parser object */
- /**ERROR HANDLING */
- int exception; /* raise an exception */
- int warning; /* number of warning msgs */
- int error; /* number of error msgs */
- int fatal; /* number of fatal msgs */
- const char *exceptionMsg; /* exception msgs */
- const char *warningMsg; /* warning msgs */
- const char *errorMsg; /* error msgs */
- const char *fatalMsg; /* fatal msgs */
- int line; /* locator for errors */
- int column; /* locator for errors */
- /**GRAPH DATA */
- BAT **graph; /* BATs for the result
- shredded RDF graph */
-} parserData;
-
-/*
- * @-
- * The (fatal) errors and warnings produced by the raptor parser are handled
- * by the next three message handler functions.
- */
-@= raptor_exception
-@1->exception++;
-@1->exceptionMsg = @2;
-raptor_parser_parse_abort (@1->rparser);
-
-@= rdf_parser_handler
-static void
-@1Handler (void *user_data, raptor_log_message* message)
-{
- parserData *pdata = ((parserData *) user_data);
- pdata->@1Msg = GDKstrdup(message->text);
- mnstr_printf(GDKout, "rdflib: @1:%s\n", pdata->@1Msg);
- pdata->@1++;
-
- /* check for a valid locator object and only then use it */
- if (message->locator != NULL) {
- pdata->line = message->locator->line;
- pdata->column = message->locator->column;
- mnstr_printf(GDKout, "rdflib: @1: at line %d column %d\n",
pdata->line, pdata->column);
- }
-
-}
-
-@
-@c
-@:rdf_parser_handler(fatal)@
-@:rdf_parser_handler(error)@
-@:rdf_parser_handler(warning)@
-
-/*
- * @-
- * The raptor parser needs to register a callback function that handles one
triple
- * at a time. Function rdf_parser_triple_handler() does exactly this.
- */
-@= rdf_insert
-#ifdef _TKNZR_H
- @:rdf_tknzr_insert(@2)@
-#else
- @:rdf_BUNappend_unq(@1, @2)@
-#endif
-
-
-@= rdf_BUNappend_unq_1
-bun = BUNfnd(BATmirror(@1),(ptr)@2);
-if (bun == BUN_NONE) {
- if (@1->T->hash && BATcount(@1) > 4 * @1->T->hash->mask) {
- HASHdestroy(@1);
- BAThash(BATmirror(@1), 2*BATcount(@1));
- }
- //bun = (BUN) ((@1)->hseqbase + (@1)->batCount);
- bun = (BUN) (RDF_MIN_LITERAL + (@1)->batCount);
-
- /* Add the type here by changing 2 bits at position 62, 63 of oid */
- if (@3 == DATETIME){
- printf("Datetime appears here \n Before: " BUNFMT "\n", bun);
- bun |= (BUN)1 << (sizeof(BUN)*8 - 3);
- printf("After: " BUNFMT "\n", bun);
- }
- else if (@3 == NUMERIC){
- printf("Numeric value appears here \n Before: " BUNFMT "\n",
bun);
- bun |= (BUN)2 << (sizeof(BUN)*8 - 3);
- printf("After: " BUNFMT "\n", bun);
- }
- else { /* @3 == STRING */
- printf("String value appears here \n Before: " BUNFMT "\n",
bun);
- bun |= (BUN)3 << (sizeof(BUN)*8 - 3);
- printf("After: " BUNFMT "\n", bun);
- }
-
- //@1 = BUNappend(@1, (ptr)@2, TRUE);
- @1 = BUNins(@1, (ptr) &bun, (ptr)@2, TRUE);
-
- if (@1 == NULL) {
- @:raptor_exception(pdata, "could not append in@1")@
- }
-} else {
- bun = (@1)->hseqbase + bun;
-}
-
-@= rdf_BUNappend_unq
-bun = BUNfnd(BATmirror(@1),(ptr)@2);
-if (bun == BUN_NONE) {
- if (BATcount(@1) > 4 * @1->T->hash->mask) {
- HASHdestroy(@1);
- BAThash(BATmirror(@1), 2*BATcount(@1));
- }
- bun = (BUN) @1->batCount;
- @1 = BUNappend(@1, (ptr)@2, TRUE);
- if (@1 == NULL) {
- @:raptor_exception(pdata, "could not append in@1")@
- }
-}
-
-@= rdf_tknzr_insert
-{
- str t = @1;
- TKNZRappend(&bun,&t);
-}
-
-@= rdf_BUNappend
-{@1 = BUNappend(@1, @2, TRUE);}
-if (@1 == NULL) {
- @:raptor_exception(pdata, "could not append in@1")@
-}
-
-@
-@c
-
-
-/*
-* Get the specific type of the object value in an RDF triple
-* The URI object can be recoginized by raptor parser.
-* If the object value is not an URI ==> it is a literal, and
-* specifically, a numeric, a dateTime or a string.
-* This function will find the specific type of Object value
-*/
-
-static ObjectType
-getObjectType(unsigned char* objStr){
- ObjectType obType;
- if (strstr((const char*) objStr, "XMLSchema#date") != NULL){
- obType = DATETIME;
- printf("%s: DateTime \n", objStr);
- }
- else if (strstr((const char*) objStr, "XMLSchema#float") != NULL
- || strstr((const char*) objStr, "XMLSchema#integer") != NULL
- )
- {
- obType = NUMERIC;
- printf("%s: Numeric \n", objStr);
- }
- else {
- obType = STRING;
- printf("%s: String \n", objStr);
- }
-
- return obType;
-}
-
-
-static void
-tripleHandler(void* user_data, const raptor_statement* triple)
-{
- parserData *pdata = ((parserData *) user_data);
- BUN bun = BUN_NONE;
- BAT **graph = pdata->graph;
-
- if (triple->subject->type == RAPTOR_TERM_TYPE_URI
- || triple->subject->type == RAPTOR_TERM_TYPE_BLANK) {
- unsigned char* subjectStr;
- subjectStr = raptor_term_to_string(triple->subject);
- @:rdf_insert(graph[MAP_LEX],(str)subjectStr)@
- @:rdf_BUNappend(graph[S_sort], &bun)@
- bun = BUN_NONE;
- free(subjectStr);
- } else {
- @:raptor_exception(pdata, "could not determine type of
subject")@
- }
-
- if (triple->predicate->type == RAPTOR_TERM_TYPE_URI) {
- unsigned char* predicateStr;
- predicateStr = raptor_term_to_string(triple->predicate);
- @:rdf_insert(graph[MAP_LEX],(str)predicateStr)@
- @:rdf_BUNappend(graph[P_sort], &bun)@
- bun = BUN_NONE;
- free(predicateStr);
- } else {
- @:raptor_exception(pdata, "could not determine type of
property")@
- }
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list