Changeset: cf78ee0e2cda for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=cf78ee0e2cda
Modified Files:
monetdb5/extras/rdf/Makefile.ag
monetdb5/extras/rdf/rdf.h
monetdb5/extras/rdf/rdf_shredder.c
monetdb5/extras/rdf/rdflabels.c
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:
Use ValPtr in casting data types
Small fix for memory leaks.
diffs (truncated from 385 to 300 lines):
diff --git a/monetdb5/extras/rdf/Makefile.ag b/monetdb5/extras/rdf/Makefile.ag
--- a/monetdb5/extras/rdf/Makefile.ag
+++ b/monetdb5/extras/rdf/Makefile.ag
@@ -32,7 +32,7 @@ lib__rdf = {
#MODULE
NOINST
#DIR = libdir/monetdb5
- SOURCES = rdf.h rdfschema.h rdfminheap.h rdfminheap.c rdflabels.h
rdfretrieval.h rdfparser.h rdfparser.c rdfontologyload.h rdfontologyload.c
rdf_shredder.c rdfalgebra.c rdfschema.c rdflabels.c rdfretrieval.c
+ SOURCES = rdf.h rdftypes.h rdfschema.h rdfminheap.h rdfminheap.c
rdflabels.h rdfretrieval.h rdfparser.h rdftypes.c rdfparser.c rdfontologyload.h
rdfontologyload.c rdf_shredder.c rdfalgebra.c rdfschema.c rdflabels.c
rdfretrieval.c
#SEP = _
# LIBS = ./hashmap/librdfhash
diff --git a/monetdb5/extras/rdf/rdf.h b/monetdb5/extras/rdf/rdf.h
--- a/monetdb5/extras/rdf/rdf.h
+++ b/monetdb5/extras/rdf/rdf.h
@@ -57,26 +57,9 @@ TKNZRrdf2str (bat *res, bat *bid, bat *m
rdf_export str
RDFpartialjoin (bat *res, bat *lmap, bat *rmap, bat *input);
-rdf_export int
-getIntFromRDFString(str input);
-
-rdf_export float
-getFloatFromRDFString(str input);
-
-rdf_export str
-getDateTimeFromRDFString(str input);
-
#define RDF_MIN_LITERAL (((oid) 1) << ((sizeof(oid)==8)?59:27))
-typedef enum {
- URI,
- DATETIME,
- INTEGER,
- FLOAT,
- STRING,
- BLANKNODE,
- MULTIVALUES // For the multi-value property
-} ObjectType;
+
#define IS_DUPLICATE_FREE 0 /* 0: Duplications have not been
removed, otherwise 1 */
#define IS_COMPACT_TRIPLESTORE 1 /* 1: Only keep SPO for triple store */
diff --git a/monetdb5/extras/rdf/rdf_shredder.c
b/monetdb5/extras/rdf/rdf_shredder.c
--- a/monetdb5/extras/rdf/rdf_shredder.c
+++ b/monetdb5/extras/rdf/rdf_shredder.c
@@ -28,6 +28,7 @@
#include "tokenizer.h"
#include <gdk.h>
#include <rdf.h>
+#include <rdftypes.h>
#include <rdfparser.h>
#include <sys/stat.h>
#include <sys/types.h>
@@ -175,140 +176,6 @@ rdf_BUNappend_unq_ForObj(parserData* pda
}
/*
- * Get substring of a string
- * NOTE: The position starts from 1 (not from 0)
- * */
-static
-char *substring(char *string, int position, int length)
-{
- char *pointer;
- int c;
-
- pointer = malloc(length+1);
-
- if (pointer == NULL)
- {
- throw(RDF, "rdf_shredder.substring", "Memory allocation
failed!");
- }
-
- for (c = 0 ; c < position -1 ; c++)
- string++;
-
- for (c = 0 ; c < length ; c++)
- {
- *(pointer+c) = *string;
- string++;
- }
-
- *(pointer+c) = '\0';
-
- return pointer;
-}
-
-static
-char isInt(char *input, int len){
-
- int i;
- //int len = strlen(input);
- //printf("... Checking value %s with len %d \n", input, len);
- for(i = 0; i < len; i++)
- {
- if(isdigit(input[i]) == 0){ // May also check
ispunct(string[i]) != 0
- //printf("NOT A DIGIT \n");
- break;
- }
- }
- //printf("i is %d \n",i);
- if(i == len)
- return 1;
- else
- return 0;
-}
-
-int getIntFromRDFString(str input){
- int i;
- int ret;
- char *tmpStr;
-
- assert(input[0] == '\"');
-
- //Find the second quote
- i = 1;
- while (input[i] != '\"'){
- i++;
- }
- //input[i] = '\0';
- //input++;
- tmpStr = substring(input, 2, i - 1);
- //printf("INT: Input after extraction %s \n", tmpStr);
- ret = atoi(tmpStr);
- //printf("return value: %d \n",ret);
- free(tmpStr);
- return ret;
-}
-
-float getFloatFromRDFString(str input){
- int i;
- float ret;
- char *tmpStr;
-
- assert(input[0] == '\"');
-
- //Find the second quote
- i = 1;
- while (input[i] != '\"'){
- i++;
- }
- //input[i] = '\0';
- //input++;
- tmpStr = substring(input, 2, i - 1);
- //printf("FLOAT: Input after extraction %s \n", tmpStr);
- ret = atof(tmpStr);
- //printf("return value: %f \n",ret);
- return ret;
-}
-
-str getDateTimeFromRDFString(str input){
-
- int i;
- char *tmpStr;
- assert(input[0] == '\"');
-
- //Find the second quote
- i = 1;
- while (input[i] != '\"'){
- i++;
- }
- //input[i] = '\0';
- //input++;
- tmpStr = substring(input, 2, i - 1);
- //printf("DATETIME: Input after extraction %s \n", tmpStr);
-
- return tmpStr;
-}
-
-/*
-static
-char isIntWithQuote(char *input, int len){ // "123123" ==> INT
-
- int i;
- if (len < 3) return 0;
- for(i = 1; i < len-1; i++)
- {
- if(isdigit(input[i]) == 0){ // May also check
ispunct(string[i]) != 0
- //printf("NOT A DIGIT \n");
- break;
- }
- }
- //printf("i is %d \n",i);
- if(i == len)
- return 1;
- else
- return 0;
-}
-*/
-
-/*
* Get the specific type of the object value in an RDF triple
* The URI object can be recoginized by raptor parser.
* If the object value is not an URI ==> it is a literal, and
@@ -348,7 +215,7 @@ getObjectType(unsigned char* objStr, BUN
else
obType = STRING;
- free(valuepart);
+ GDKfree(valuepart);
}
else if ((pos = strstr((str) endpart, "XMLSchema#float>")) !=
NULL
diff --git a/monetdb5/extras/rdf/rdflabels.c b/monetdb5/extras/rdf/rdflabels.c
--- a/monetdb5/extras/rdf/rdflabels.c
+++ b/monetdb5/extras/rdf/rdflabels.c
@@ -1831,6 +1831,7 @@ void printUML2(CSset *freqCSset, CSlabel
memcpy(labelStrEscaped, labelStr, (strlen(labelStr) +
1));
escapeURI(labelStrEscaped);
#endif
+ GDKfree(tmpStr);
GDKfree(labelStr);
}
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -21,6 +21,7 @@
#include "monetdb_config.h"
#include "rdf.h"
+#include "rdftypes.h"
#include "rdfschema.h"
#include "rdflabels.h"
#include "rdfretrieval.h"
@@ -5958,6 +5959,8 @@ void fillMissingValueByNils(CStableStat*
}
}
+
+#if 0
static
void getRealValue(void **returnValue, oid objOid, ObjectType objType, BATiter
mapi, BAT *mapbat){
str objStr;
@@ -6027,6 +6030,67 @@ void getRealValue(void **returnValue, oi
}
}
+#endif
+
+static
+void getRealValue(ValPtr returnValue, oid objOid, ObjectType objType, BATiter
mapi, BAT *mapbat){
+ str objStr;
+ str datetimeStr;
+ str tmpStr;
+ BUN bun;
+ BUN maxObjectURIOid = ((oid)1 << (sizeof(BUN)*8 - NBITS_FOR_CSID -
1)) - 1; //Base on getTblIdxFromS
+ float realFloat;
+ int realInt;
+ oid realUri;
+
+ //printf("objOid = " BUNFMT " \n",objOid);
+ if (objType == URI || objType == BLANKNODE){
+ objOid = objOid - ((oid)objType << (sizeof(BUN)*8 - 4));
+
+ if (objOid < maxObjectURIOid){
+ //takeOid(objOid, &objStr); //TODO: Do we
need to get URI string???
+ //printf("From tokenizer URI object value: "BUNFMT "
(str: %s) \n", objOid, objStr);
+ }
+ //else, this object value refers to a subject oid
+ //IDEA: Modify the function for calculating new subject Id:
+ //==> subjectID = TBLID ... tmpSoid ....
+ }
+ else{
+ objOid = objOid - (objType*2 + 1) * RDF_MIN_LITERAL; /* Get
the real objOid from Map or Tokenizer */
+ bun = BUNfirst(mapbat);
+ objStr = (str) BUNtail(mapi, bun + objOid);
+ //printf("From mapbat BATcount= "BUNFMT" at position " BUNFMT
": %s \n", BATcount(mapbat), bun + objOid,objStr);
+ }
+
+
+ switch (objType)
+ {
+ case STRING:
+ //printf("A String object value: %s \n",objStr);
+ tmpStr = GDKmalloc(sizeof(char) * strlen(objStr) + 1);
+ memcpy(tmpStr, objStr, sizeof(char) * strlen(objStr) +
1);
+ VALset(returnValue, TYPE_str, tmpStr);
+ break;
+ case DATETIME:
+ datetimeStr = getDateTimeFromRDFString(objStr);
+ VALset(returnValue, TYPE_str, datetimeStr);
+ break;
+ case INTEGER:
+ //printf("Full object value: %s \n",objStr);
+ realInt = getIntFromRDFString(objStr);
+ VALset(returnValue,TYPE_int, &realInt);
+ break;
+ case FLOAT:
+ //printf("Full object value: %s \n",objStr);
+ realFloat = getFloatFromRDFString(objStr);
+ VALset(returnValue,TYPE_flt, &realFloat);
+ break;
+ default: //URI or BLANK NODE
+ realUri = objOid;
+ VALset(returnValue,TYPE_oid, &realUri);
+ }
+
+}
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list