Changeset: ccac6dfbaacc for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=ccac6dfbaacc
Modified Files:
        monetdb5/extras/rdf/rdf.h
        monetdb5/extras/rdf/rdf_shredder.c
Branch: rdf
Log Message:

Add an option to rdf_shredder so that it can ignore incorrect syntax triples.


diffs (172 lines):

diff --git a/monetdb5/extras/rdf/rdf.h b/monetdb5/extras/rdf/rdf.h
--- a/monetdb5/extras/rdf/rdf.h
+++ b/monetdb5/extras/rdf/rdf.h
@@ -65,6 +65,7 @@ typedef enum {
 #define IS_DUPLICATE_FREE 0    /* 0: Duplications have not been removed, 
otherwise 1 */
 #define TRIPLE_STORE 1
 #define MLA_STORE    2
+#define NOT_IGNORE_ERROR_TRIPLE 0
 
 #define STORE TRIPLE_STORE /* this should become a compile time option */
 
diff --git a/monetdb5/extras/rdf/rdf_shredder.c 
b/monetdb5/extras/rdf/rdf_shredder.c
--- a/monetdb5/extras/rdf/rdf_shredder.c
+++ b/monetdb5/extras/rdf/rdf_shredder.c
@@ -82,6 +82,7 @@ typedef struct parserData {
        int exception;                /* raise an exception     */
        int warning;                  /* number of warning msgs */
        int error;                    /* number of error   msgs */
+       int lasterror;                /* # errors before next triple */
        int fatal;                    /* number of fatal   msgs */
        const char *exceptionMsg;     /* exception msgs         */
        const char *warningMsg;       /* warning msgs           */
@@ -295,61 +296,67 @@ tripleHandler(void* user_data, const rap
        BUN bun = BUN_NONE;
        BAT **graph = pdata->graph;
 
-       if (triple->subject->type == RAPTOR_TERM_TYPE_URI
-                       || triple->subject->type == RAPTOR_TERM_TYPE_BLANK) {
-               unsigned char* subjectStr; 
-               subjectStr = raptor_term_to_string(triple->subject);
-               //rdf_insert(pdata, graph[MAP_LEX], (str) subjectStr, &bun);
-               rdf_tknzr_insert((str) subjectStr, &bun);
-               rdf_BUNappend(pdata, graph[S_sort], &bun); 
-                       
-               bun = BUN_NONE;
-               free(subjectStr);
-       } else {
-               raptor_exception(pdata, "could not determine type of subject");
+       if (pdata->error > pdata->lasterror){
+               printf("Incorrect or wrong syntax triple %s \n ", 
pdata->errorMsg);
+               pdata->lasterror = pdata->error; 
        }
+       else{
+               if (triple->subject->type == RAPTOR_TERM_TYPE_URI
+                               || triple->subject->type == 
RAPTOR_TERM_TYPE_BLANK) {
+                       unsigned char* subjectStr; 
+                       subjectStr = raptor_term_to_string(triple->subject);
+                       //rdf_insert(pdata, graph[MAP_LEX], (str) subjectStr, 
&bun);
+                       rdf_tknzr_insert((str) subjectStr, &bun);
+                       rdf_BUNappend(pdata, graph[S_sort], &bun); 
+                               
+                       bun = BUN_NONE;
+                       free(subjectStr);
+               } else {
+                       raptor_exception(pdata, "could not determine type of 
subject");
+               }
 
-       if (triple->predicate->type == RAPTOR_TERM_TYPE_URI) {
-               unsigned char* predicateStr;
-               predicateStr = raptor_term_to_string(triple->predicate);
-               //rdf_insert(pdata, graph[MAP_LEX], (str) predicateStr, &bun);
-               rdf_tknzr_insert((str) predicateStr, &bun);
-               rdf_BUNappend(pdata, graph[P_sort], &bun); 
+               if (triple->predicate->type == RAPTOR_TERM_TYPE_URI) {
+                       unsigned char* predicateStr;
+                       predicateStr = raptor_term_to_string(triple->predicate);
+                       //rdf_insert(pdata, graph[MAP_LEX], (str) predicateStr, 
&bun);
+                       rdf_tknzr_insert((str) predicateStr, &bun);
+                       rdf_BUNappend(pdata, graph[P_sort], &bun); 
 
-               bun = BUN_NONE;
-               free(predicateStr);
-       } else {
-               raptor_exception(pdata, "could not determine type of property");
+                       bun = BUN_NONE;
+                       free(predicateStr);
+               } else {
+                       raptor_exception(pdata, "could not determine type of 
property");
+               }
+
+               if (triple->object->type == RAPTOR_TERM_TYPE_URI
+                               || triple->object->type == 
RAPTOR_TERM_TYPE_BLANK) {
+                       unsigned char* objStr;
+                       objStr = raptor_term_to_string(triple->object);
+                       //rdf_insert(pdata, graph[MAP_LEX], (str) objStr, &bun);
+                       rdf_tknzr_insert((str) objStr, &bun);
+                       rdf_BUNappend(pdata, graph[O_sort], &bun); 
+
+                       bun = BUN_NONE;
+                       free(objStr);
+               } else if (triple->object->type == RAPTOR_TERM_TYPE_LITERAL) {
+                       unsigned char* objStr;
+                       ObjectType objType;
+                       objStr = raptor_term_to_string(triple->object);
+                       objType = getObjectType(objStr);
+
+                       rdf_BUNappend_unq_ForObj(pdata, graph[MAP_LEX], 
(str)objStr, objType, &bun);    
+                       rdf_BUNappend(pdata, graph[O_sort], &bun); 
+
+                       bun = BUN_NONE;
+                       free(objStr);
+               } else {
+                       raptor_exception(pdata, "could not determine type of 
object");
+
+               }
+
+               pdata->tcount++;
        }
 
-       if (triple->object->type == RAPTOR_TERM_TYPE_URI
-                       || triple->object->type == RAPTOR_TERM_TYPE_BLANK) {
-               unsigned char* objStr;
-               objStr = raptor_term_to_string(triple->object);
-               //rdf_insert(pdata, graph[MAP_LEX], (str) objStr, &bun);
-               rdf_tknzr_insert((str) objStr, &bun);
-               rdf_BUNappend(pdata, graph[O_sort], &bun); 
-
-               bun = BUN_NONE;
-               free(objStr);
-       } else if (triple->object->type == RAPTOR_TERM_TYPE_LITERAL) {
-               unsigned char* objStr;
-               ObjectType objType;
-               objStr = raptor_term_to_string(triple->object);
-               objType = getObjectType(objStr);
-
-               rdf_BUNappend_unq_ForObj(pdata, graph[MAP_LEX], (str)objStr, 
objType, &bun);    
-               rdf_BUNappend(pdata, graph[O_sort], &bun); 
-
-               bun = BUN_NONE;
-               free(objStr);
-       } else {
-               raptor_exception(pdata, "could not determine type of object");
-
-       }
-
-       pdata->tcount++;
-
        return;
 }
 
@@ -390,6 +397,7 @@ parserData_create (str location, BAT** g
        pdata->exception = 0;
        pdata->fatal = 0;
        pdata->error = 0;
+       pdata->lasterror = 0;
        pdata->warning = 0;
        pdata->location = location;
        pdata->graph = graph;
@@ -793,11 +801,14 @@ RDFParser (BAT **graph, str *location, s
                        pdata->tcount == BATcount(graph[O_sort]));
 
        /* error check */
+       
        if (iret) {
                
                clean(pdata);
                throw(RDF, "rdf.rdfShred", "parsing failed\n");
        }
+       
+#if    NOT_IGNORE_ERROR_TRIPLE 
        if (pdata->exception) {
                throw(RDF, "rdf.rdfShred", "%s\n", pdata->exceptionMsg);
        } else if (pdata->fatal) {
@@ -811,6 +822,8 @@ RDFParser (BAT **graph, str *location, s
                                pdata->warningMsg);
        }
 
+
+#endif
        /* post processing step */
        ret = post_processing(pdata);
        if (ret != MAL_SUCCEED) {
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to