Changeset: f560b2e3dd38 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=f560b2e3dd38
Modified Files:
monetdb5/extras/rdf/rdf.h
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
sql/backends/monet5/sql.mx
Branch: rdf
Log Message:
Add nil values to ex-tables when the default value is available
diffs (truncated from 432 to 300 lines):
diff --git a/monetdb5/extras/rdf/rdf.h b/monetdb5/extras/rdf/rdf.h
--- a/monetdb5/extras/rdf/rdf.h
+++ b/monetdb5/extras/rdf/rdf.h
@@ -28,6 +28,7 @@
#define _RDF_H_
#include <gdk.h>
+#include "tokenizer.h"
#ifdef WIN32
#ifndef LIBRDF
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -27,7 +27,6 @@
#include "algebra.h"
#include <gdk.h>
#include <hashmap/hashmap.h>
-#include "tokenizer.h"
#include <math.h>
#include <time.h>
#include <trie/trie.h>
@@ -545,8 +544,8 @@ static
char isMultiValueCol(PropTypes pt){
double tmpRatio;
- tmpRatio = (double) (pt.propCover / (pt.numSingleType + pt.numMVType));
-
+ tmpRatio = ((double)pt.propCover / (pt.numSingleType + pt.numMVType));
+ printf("NumMVType = %d | Ratio %f \n", pt.numMVType, tmpRatio);
if ((pt.numMVType > 0) && (tmpRatio > IS_MULVALUE_THRESHOLD)){
return 1;
}
@@ -566,6 +565,7 @@ void genCSPropTypesColIdx(CSPropTypes* c
for (i = 0; i < numMergedCS; i++){
curTypeColIdx = 0;
for(j = 0; j < csPropTypes[i].numProp; j++){
+ printf("genCSPropTypesColIdx: Table: %d | Prop: %d \n",
i, j);
if (isMultiValueCol(csPropTypes[i].lstPropTypes[j])){
//if this property is a Multi-valued prop
csPropTypes[i].lstPropTypes[j].TableTypes[MULTIVALUES] = MAINTBL;
@@ -5811,7 +5811,7 @@ void fillMissingvaluesAll(CStableStat* c
int i;
int tmpColExIdx;
- printf("Fill for Table %d and prop %d \n", lasttblIdx, lastColIdx);
+ printf("Fill for Table %d and prop %d (lastSubjId = " BUNFMT" \n",
lasttblIdx, lastColIdx, lastSubjId[lasttblIdx]);
tmpBat = cstablestat->lstcstable[lasttblIdx].colBats[lastColIdx];
fillMissingvalues(tmpBat, BATcount(tmpBat), lastSubjId[lasttblIdx]);
@@ -5819,12 +5819,55 @@ void fillMissingvaluesAll(CStableStat* c
if
(csPropTypes[lasttblIdx].lstPropTypes[lastColIdx].TableTypes[i] == TYPETBL){
tmpColExIdx =
csPropTypes[lasttblIdx].lstPropTypes[lastColIdx].colIdxes[i];
tmpBat =
cstablestat->lstcstableEx[lasttblIdx].colBats[tmpColExIdx];
+ printf("Fill excol %d \n", tmpColExIdx);
fillMissingvalues(tmpBat, BATcount(tmpBat),
lastSubjId[lasttblIdx]);
}
}
}
+
+// colIdx: The column to be appenned
+static
+void fillMissingValueByNils(CStableStat* cstablestat, CSPropTypes
*csPropTypes, int tblIdx, int colIdx, int colIdxEx, char tblType, oid from, oid
to){
+ BAT *tmpBat = NULL;
+ int i;
+ int tmpColExIdx;
+ oid k;
+
+ printf("Fill nils for Table %d and prop %d from " BUNFMT " to " BUNFMT
"\n", tblIdx, colIdx, from, to);
+
+ tmpBat = cstablestat->lstcstable[tblIdx].colBats[colIdx];
+ //Fill all missing values from From to To
+ if (to > (from + 1)){
+ for(k = from -1; k < to - 1; k++){
+ BUNappend(tmpBat, ATOMnilptr(tmpBat->ttype), TRUE);
+ }
+ }
+ if (tblType != MAINTBL){
+ BUNappend(tmpBat, ATOMnilptr(tmpBat->ttype), TRUE);
+ }
+ for (i = 0; i < (MULTIVALUES + 1); i++){
+ if (csPropTypes[tblIdx].lstPropTypes[colIdx].TableTypes[i] ==
TYPETBL){
+ tmpColExIdx =
csPropTypes[tblIdx].lstPropTypes[colIdx].colIdxes[i];
+ tmpBat =
cstablestat->lstcstableEx[tblIdx].colBats[tmpColExIdx];
+ //Fill all missing values from From to To
+ if (to > (from + 1)){
+ for(k = from -1; k < (to - 1); k++){
+ BUNappend(tmpBat,
ATOMnilptr(tmpBat->ttype), TRUE);
+ }
+ }
+
+ if (tblType != MAINTBL && tmpColExIdx != colIdxEx){
+ BUNappend(tmpBat, ATOMnilptr(tmpBat->ttype),
TRUE);
+ }
+ else
+ BUNappend(tmpBat, ATOMnilptr(tmpBat->ttype),
TRUE);
+ }
+
+ }
+}
+
static
void getRealValue(void **returnValue, oid objOid, ObjectType objType, BATiter
mapi, BAT *mapbat){
str objStr;
@@ -5915,7 +5958,7 @@ str RDFdistTriplesToCSs(int *ret, bat *s
BAT *tmpBat = NULL;
BAT *tmpmvBat = NULL; // Multi-values BAT
//BAT *tmpmvExBat = NULL;
- oid *tmplastInsertedS;
+ oid tmplastInsertedS;
int numMultiValues = 0;
oid tmpmvValue;
char istmpMVProp = 0;
@@ -5955,8 +5998,7 @@ str RDFdistTriplesToCSs(int *ret, bat *s
tmpTblIdxPropIdxMap = (int*)malloc(sizeof(int) *
cstablestat->numTables);
initIntArray(tmpTblIdxPropIdxMap, cstablestat->numTables, -1);
- tmplastInsertedS = (oid*)malloc(sizeof(oid) * (MULTIVALUES + 1));
- initArray(tmplastInsertedS, (MULTIVALUES + 1), 0);
+ tmplastInsertedS = 0;
lastP = BUN_NONE;
@@ -5973,7 +6015,7 @@ str RDFdistTriplesToCSs(int *ret, bat *s
printf(BUNFMT ": " BUNFMT " | " BUNFMT " | " BUNFMT , p,
*pbt, *sbt, *obt);
getTblIdxFromS(*sbt, &tblIdx, &tmpSoid);
- printf(" --> Tbl: %d tmpSoid: " BUNFMT, tblIdx,tmpSoid);
+ printf(" --> Tbl: %d tmpSoid: " BUNFMT " | Last SubjId "
BUNFMT "", tblIdx,tmpSoid, lastSubjId[tblIdx]);
if (tblIdx == -1){ // This is for irregular triples, put
them to pso table
@@ -5986,6 +6028,8 @@ str RDFdistTriplesToCSs(int *ret, bat *s
}
if (*pbt != lastP){
+
+
//Get number of BATs for this p
ppos = BUNfnd(BATmirror(propStat->pBat),pbt);
if (ppos == BUN_NONE)
@@ -5996,9 +6040,10 @@ str RDFdistTriplesToCSs(int *ret, bat *s
tmpPtl.lstIdx,
tmpPtl.lstInvertIdx,tmpPtl.numAdded);
lastP = *pbt;
- lastS = *sbt;
+ //lastS = *sbt;
+ lastS = BUN_NONE;
numMultiValues = 0;
- tmplastInsertedS[MULTIVALUES] = 0;
+ tmplastInsertedS = 0;
}
@@ -6007,6 +6052,25 @@ str RDFdistTriplesToCSs(int *ret, bat *s
tmpColIdx = tmpTblIdxPropIdxMap[tblIdx];
printf(" Tbl: %d | Col: %d \n", tblIdx, tmpColIdx);
+
+ if (p == 0){
+ lastColIdx = tmpColIdx;
+ lasttblIdx = tblIdx;
+ }
+
+ /* New column. Finish with lastTblIdx and lastColIdx. Note:
This lastColIdx is
+ * the position of the prop in a final CS. Not the exact colIdx
in MAINTBL or TYPETBL
+ * */
+ if (tmpColIdx != lastColIdx || lasttblIdx != tblIdx){
+ //Insert missing values for all columns of this
property in this table
+
+ fillMissingvaluesAll(cstablestat, csPropTypes,
lasttblIdx, lastColIdx, lastSubjId);
+ lastColIdx = tmpColIdx;
+ lasttblIdx = tblIdx;
+ tmplastInsertedS = 0;
+ cstablestat->lastInsertedS[tblIdx][tmpColIdx] = 0;
+
+ }
istmpMVProp =
csPropTypes[tblIdx].lstPropTypes[tmpColIdx].isMVProp;
@@ -6014,6 +6078,7 @@ str RDFdistTriplesToCSs(int *ret, bat *s
printf("Multi values prop \n");
if (*sbt != lastS){
numMultiValues = 0;
+ lastS = *sbt;
}
assert(objType != MULTIVALUES); //TODO: Remove
this
@@ -6038,8 +6103,8 @@ str RDFdistTriplesToCSs(int *ret, bat *s
//In search the position of the first value
//to the correcponding column in the MAINTBL
//First: Insert all missing value
- if (tmpSoid > (tmplastInsertedS[MULTIVALUES] +
1)){
- fillMissingvalues(tmpBat,
tmplastInsertedS[MULTIVALUES] + 1, tmpSoid-1);
+ if (tmpSoid > (tmplastInsertedS + 1)){
+ fillMissingvalues(tmpBat,
tmplastInsertedS + 1, tmpSoid-1);
}
BATprint(tmpmvBat);
@@ -6047,12 +6112,30 @@ str RDFdistTriplesToCSs(int *ret, bat *s
printf("Insert the refered oid " BUNFMT "for MV
prop \n", tmpmvValue);
BUNappend(tmpBat, &tmpmvValue, TRUE);
BATprint(tmpBat);
- tmplastInsertedS[MULTIVALUES] = tmpSoid;
+ tmplastInsertedS = tmpSoid;
+
+ lastColIdx = tmpColIdx;
+ lasttblIdx = tblIdx;
+
numMultiValues++;
}
continue;
}
+ else{
+ //If there exist multi-valued prop, but handle them as
single-valued prop.
+ //Only first object value is stored. Other object
values are
+ if (*sbt != lastS){
+ lastS = *sbt;
+ }
+ else{ // This is an extra object value
+ BUNappend(cstablestat->pbat,pbt , TRUE);
+ BUNappend(cstablestat->sbat,sbt , TRUE);
+ BUNappend(cstablestat->obat,obt , TRUE);
+ printf(" Extra object value ==> To PSO \n");
+ continue;
+ }
+ }
tmpTableType =
csPropTypes[tblIdx].lstPropTypes[tmpColIdx].TableTypes[(int)objType];
@@ -6066,42 +6149,33 @@ str RDFdistTriplesToCSs(int *ret, bat *s
continue;
}
- if (p == 0){
- lastColIdx = tmpColIdx;
- lasttblIdx = tblIdx;
- }
-
- /* New column. Finish with lastTblIdx and lastColIdx. Note:
This lastColIdx is
- * the position of the prop in a final CS. Not the exact colIdx
in MAINTBL or TYPETBL
- * */
- if (tmpColIdx != lastColIdx || lasttblIdx != tblIdx){
- //Insert missing values for all columns of this
property in this table
-
- fillMissingvaluesAll(cstablestat, csPropTypes,
lasttblIdx, lastColIdx, lastSubjId);
-
- lastColIdx = tmpColIdx;
- lasttblIdx = tblIdx;
- initArray(tmplastInsertedS, (MULTIVALUES + 1), 0);
-
- }
+
+
if (tmpTableType == MAINTBL){
curBat =
cstablestat->lstcstable[tblIdx].colBats[tmpColIdx];
- tmplastInsertedS[(int)objType] =
cstablestat->lastInsertedS[tblIdx][tmpColIdx];
printf(" tmpColIdx = %d \n",tmpColIdx);
}
else{ //tmpTableType == TYPETBL
tmpColExIdx =
csPropTypes[tblIdx].lstPropTypes[tmpColIdx].colIdxes[(int)objType];
curBat =
cstablestat->lstcstableEx[tblIdx].colBats[tmpColExIdx];
- tmplastInsertedS[(int)objType] =
cstablestat->lastInsertedSEx[tblIdx][tmpColExIdx];
- printf(" tmpColIdx = %d \n",tmpColExIdx);
- }
+ printf(" tmpColExIdx = %d \n",tmpColExIdx);
+ }
+
+
+ tmplastInsertedS =
cstablestat->lastInsertedS[tblIdx][tmpColIdx];
//TODO: Check last subjectId for this prop. If the subjectId is
not continuous, insert NIL
+ /*
if (tmpSoid > (tmplastInsertedS[(int)objType] + 1)){
printf("Fill begin from tmplastInsertedS[%d] = "BUNFMT"
to " BUNFMT "\n", (int)objType, tmplastInsertedS[(int)objType],tmpSoid-1);
fillMissingvalues(curBat,
tmplastInsertedS[(int)objType] + 1, tmpSoid-1);
}
+ */
+
+ fillMissingValueByNils(cstablestat, csPropTypes, tblIdx,
tmpColIdx, tmpColExIdx, tmpTableType, tmplastInsertedS + 1, tmpSoid);
+
+ //fillMissingvalues(tmpBat, BATcount(tmpBat),
lastSubjId[tblIdx]);
getRealValue(&realObjValue, *obt, objType, mi, mbat);
if (objType == STRING) printf("Value returned by getRealValue
is %s \n", (char*)realObjValue);
@@ -6112,12 +6186,8 @@ str RDFdistTriplesToCSs(int *ret, bat *s
// tmpColIdx, *pbt,
tmpSoid, *obt);
//Update last inserted S
- if (tmpTableType == MAINTBL){
- cstablestat->lastInsertedS[tblIdx][tmpColIdx] = tmpSoid;
- }
- else{ //tmpTableType == TYPETBL
- cstablestat->lastInsertedSEx[tblIdx][tmpColExIdx] =
tmpSoid;
- }
+ cstablestat->lastInsertedS[tblIdx][tmpColIdx] = tmpSoid;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list