Changeset: 5cda6eb377a2 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=5cda6eb377a2
Modified Files:
monetdb5/extras/rdf/Makefile.ag
monetdb5/extras/rdf/rdfalgebra.c
monetdb5/extras/rdf/rdfcommon.c
monetdb5/extras/rdf/rdfcommon.h
monetdb5/extras/rdf/rdfdump.c
monetdb5/extras/rdf/rdfdump.h
monetdb5/extras/rdf/rdfschema.c
sql/backends/monet5/sql_rdf.c
Branch: rdf
Log Message:
More information need to be stored for exception handling
diffs (truncated from 588 to 300 lines):
diff --git a/monetdb5/extras/rdf/Makefile.ag b/monetdb5/extras/rdf/Makefile.ag
--- a/monetdb5/extras/rdf/Makefile.ag
+++ b/monetdb5/extras/rdf/Makefile.ag
@@ -22,6 +22,10 @@ INCLUDES = ../../modules/atoms ../../mod
../../../common/stream \
../../../gdk \
../../../sql/include \
+ ../../../sql/backends/monet5 \
+ ../../../sql/server \
+ ../../../sql/common \
+ ../../../sql/storage \
$(raptor_CFLAGS)
MTSAFE
diff --git a/monetdb5/extras/rdf/rdfalgebra.c b/monetdb5/extras/rdf/rdfalgebra.c
--- a/monetdb5/extras/rdf/rdfalgebra.c
+++ b/monetdb5/extras/rdf/rdfalgebra.c
@@ -235,7 +235,9 @@ str RDFmultiway_merge_outerjoins(int np,
for (i = 0; i < np; i++){
r_obats[i] = BATnew(TYPE_void, TYPE_oid, estimate, TRANSIENT);
-
+
+ //assert(BATcount(sbats[i]) > 0);
+ //
//Keep the cursor to the first element of each input sbats
sbatCursors[i] = (oid *) Tloc(sbats[i], BUNfirst(sbats[i]));
obatCursors[i] = (oid *) Tloc(obats[i], BUNfirst(obats[i]));
@@ -245,9 +247,16 @@ str RDFmultiway_merge_outerjoins(int np,
//has first element of an array (pointing to the first element of each
sbat)
harr = (MinHeapNode*)malloc(sizeof(MinHeapNode) * np);
for (i = 0; i < np; i++){
- harr[i].element = sbatCursors[i][0]; //Store the first element
- harr[i].i = i; //index of array
- harr[i].j = 1; //Index of next element to be stored from array
+ if (BATcount(sbats[i]) != 0){
+ harr[i].element = sbatCursors[i][0]; //Store the first
element
+ harr[i].i = i; //index of array
+ harr[i].j = 1; //Index of next element to be stored
from array
+ } else {
+ harr[i].element = INT_MAX; //Store the INT_MAX in case
of empty BATs
+ harr[i].i = i; //index of array
+ harr[i].j = 1; //Index of next element to be stored
from array
+ }
+
}
hp = (MinHeap *) malloc(sizeof(MinHeap));
diff --git a/monetdb5/extras/rdf/rdfcommon.c b/monetdb5/extras/rdf/rdfcommon.c
--- a/monetdb5/extras/rdf/rdfcommon.c
+++ b/monetdb5/extras/rdf/rdfcommon.c
@@ -28,6 +28,9 @@ void copyIntSet(int* dest, int* orig, in
memcpy(dest, orig, len * sizeof(int));
}
+void copybatSet(bat* dest, bat* orig, int len){
+ memcpy(dest, orig, len * sizeof(int));
+}
void initCharArray(char* inputArr, int num, char defaultValue){
int i;
@@ -325,3 +328,15 @@ void appendIntArrayToBat(BAT *b, int* in
BATsetcount(b, (BUN) (b->batCount + num));
}
}
+
+
+void appendbatArrayToBat(BAT *b, bat* inArray, int num){
+ if (num > 0){
+ BUN r = BUNlast(b);
+ if (r + num > b->batCapacity){
+ BATextend(b, b->batCapacity + smallbatsz);
+ }
+ memcpy(Tloc(b, BUNlast(b)), inArray, sizeof(bat) * num);
+ BATsetcount(b, (BUN) (b->batCount + num));
+ }
+}
diff --git a/monetdb5/extras/rdf/rdfcommon.h b/monetdb5/extras/rdf/rdfcommon.h
--- a/monetdb5/extras/rdf/rdfcommon.h
+++ b/monetdb5/extras/rdf/rdfcommon.h
@@ -28,6 +28,8 @@
rdf_export void copyOidSet(oid* dest, oid* orig, int len);
rdf_export void copyIntSet(int* dest, int* orig, int len);
+rdf_export void copybatSet(bat *dest, bat* orig, int len);
+
rdf_export void initCharArray(char* inputArr, int num, char defaultValue);
rdf_export void initArray(oid* inputArr, int num, oid defaultValue);
rdf_export void initIntArray(int* inputArr, int num, oid defaultValue);
@@ -41,5 +43,6 @@ rdf_export void get_sorted_distinct_set(
rdf_export void appendArrayToBat(BAT *b, BUN* inArray, int num);
rdf_export void appendIntArrayToBat(BAT *b, int* inArray, int num);
+rdf_export void appendbatArrayToBat(BAT *b, bat* inArray, int num);
#endif /* _RDFCOMMON_H_ */
diff --git a/monetdb5/extras/rdf/rdfdump.c b/monetdb5/extras/rdf/rdfdump.c
--- a/monetdb5/extras/rdf/rdfdump.c
+++ b/monetdb5/extras/rdf/rdfdump.c
@@ -29,11 +29,12 @@
#include "rdfdump.h"
#include "bat5.h"
#include "rdfcommon.h"
-
+#include "sql_rdf.h"
static csdumBATdef csdumBatdefs[N_CSDUM_BAT] = {
{csd_tblId, "tblIdBat_dump", TYPE_void, TYPE_int},
{csd_tblname, "tblnameBat_dump", TYPE_void, TYPE_oid},
+ {csd_tblsname, "tblsnameBat_dump", TYPE_void, TYPE_str},
{csd_csId, "csIdBat_dump", TYPE_void, TYPE_int},
{csd_freq, "freqBat_dump", TYPE_void, TYPE_int},
{csd_coverage, "coverageBat_dump", TYPE_void, TYPE_int},
@@ -41,6 +42,8 @@ static csdumBATdef csdumBatdefs[N_CSDUM_
{csd_fullP, "fullPBat_dump", TYPE_void, TYPE_oid},
{csd_cOffset, "cOffsetBat_dump", TYPE_void, TYPE_oid},
{csd_fullC, "fullCBat_dump", TYPE_void, TYPE_oid},
+ {csd_fullC_batIds, "fullC_batIds_dump", TYPE_void, TYPE_int}, //BAT
ids for each column
+ {csd_fullC_name, "fullC_name_dump", TYPE_void, TYPE_str}, //Name
of each column
{csd_isMV, "isMVBat_dump", TYPE_void, TYPE_int}, // 0 indicating
single-valued column, otherwise > 0
// the value is
the number of column in MVtable
{csd_cname, "cIdxBat_dump", TYPE_void, TYPE_int} //Index of the
col in the table
@@ -102,10 +105,12 @@ void commitCSDump(CSDump *csdump){
}
static
-void dumpCS(CSDump *csdump, int _freqId, int _tblId, CS cs, CStable cstbl){
+void dumpCS(CSDump *csdump, int _freqId, int _tblId, CS cs, CStable cstbl,
BATiter mapi, BAT *mbat){
BUN offset, offsetc;
int tblId, freqId, freq, cov;
oid tblname;
+ str tblsname;
+ bat *lstColbat = NULL;
int *lstIsMV;
int i;
@@ -120,6 +125,11 @@ void dumpCS(CSDump *csdump, int _freqId,
BUNappend(csdump->dumpBats[csd_tblname], &tblname, TRUE);
+ tblsname = (str) GDKmalloc(sizeof(char) * 100);
+
+ getTblSQLname(tblsname, tblId, 0, tblname, mapi, mbat);
+ BUNappend(csdump->dumpBats[csd_tblsname], tblsname, TRUE);
+
BUNappend(csdump->dumpBats[csd_csId], &freqId, TRUE);
BUNappend(csdump->dumpBats[csd_freq], &freq, TRUE);
@@ -138,17 +148,25 @@ void dumpCS(CSDump *csdump, int _freqId,
/* Add list of multi-valued indication to csd_isMV bat*/
lstIsMV = (int *) malloc(sizeof(int) * cstbl.numCol);
+ lstColbat = (bat *) malloc(sizeof(bat) * cstbl.numCol);
+
for (i = 0; i < cstbl.numCol; i++){
+ str tmpColName = (char *) malloc(sizeof(char) * 100);
lstIsMV[i] = cstbl.lstMVTables[i].numCol;
+ lstColbat[i] = cstbl.colBats[i]->batCacheid;
+ getColSQLname(tmpColName, i, -1, cstbl.lstProp[i], mapi, mbat);
+ BUNappend(csdump->dumpBats[csd_fullC_name], tmpColName, TRUE);
}
+
+ appendIntArrayToBat(csdump->dumpBats[csd_isMV], lstIsMV, cstbl.numCol);
+ appendbatArrayToBat(csdump->dumpBats[csd_fullC_batIds], lstColbat,
cstbl.numCol);
- appendIntArrayToBat(csdump->dumpBats[csd_isMV], lstIsMV, cstbl.numCol);
free(lstIsMV);
}
-void dumpFreqCSs(CStableStat* cstablestat, CSset *freqCSset){
+void dumpFreqCSs(CStableStat* cstablestat, CSset *freqCSset, BATiter mapi, BAT
*mbat){
int i, numTables;
int freqId;
int is_already_built = 0;
@@ -169,7 +187,7 @@ void dumpFreqCSs(CStableStat* cstablesta
for (i = 0; i < numTables; i++){
freqId = cstablestat->lstfreqId[i];
assert(freqId != -1);
- dumpCS(csdump, freqId, i, freqCSset->items[freqId],
cstablestat->lstcstable[i]);
+ dumpCS(csdump, freqId, i, freqCSset->items[freqId],
cstablestat->lstcstable[i], mapi, mbat);
}
commitCSDump(csdump);
@@ -214,11 +232,13 @@ void freeCSDump(CSDump *csdump){
static
-SimpleCS *create_simpleCS(int tblId, oid tblname, int freqId, int numP, oid*
lstProp, int numC, oid* lstCol, int* lstIsMV, int sup, int cov){
+SimpleCS *create_simpleCS(int tblId, oid tblname, str tblsname, int freqId,
int numP, oid* lstProp, int numC, oid* lstCol, bat *lstColbat, str *lstColname,
int* lstIsMV, int sup, int cov){
SimpleCS *cs;
cs = (SimpleCS *) malloc(sizeof(SimpleCS));
cs->tblId = tblId;
cs->tblname = tblname;
+ cs->tblsname = GDKstrdup(tblsname);
+
cs->freqId = freqId;
cs->numP = numP;
@@ -229,6 +249,11 @@ SimpleCS *create_simpleCS(int tblId, oid
cs->lstCol = (oid *) malloc(sizeof(oid) * numC);
copyOidSet(cs->lstCol, lstCol, numC);
+ cs->lstColbat = (bat *) malloc(sizeof(bat) * numC);
+ copybatSet(cs->lstColbat, lstColbat, numC);
+
+ cs->lstColname = lstColname;
+
cs->lstIsMV = (int *) malloc(sizeof(int) * numC);
copyIntSet(cs->lstIsMV, lstIsMV, numC);
@@ -242,6 +267,14 @@ void free_simpleCS(SimpleCS *cs){
if (cs->lstProp) free(cs->lstProp);
if (cs->lstCol) free(cs->lstCol);
if (cs->lstIsMV) free(cs->lstIsMV);
+ if (cs->lstColname){
+ int i;
+ for (i = 0; i < cs->numC; i++){
+ GDKfree(cs->lstColname[i]);
+ }
+ GDKfree(cs->lstColname);
+ }
+ GDKfree(cs->tblsname);
free(cs);
}
@@ -254,16 +287,25 @@ SimpleCS* read_a_cs_from_csdump(int pos,
int *tblId, *freqId, *freq, *coverage;
oid *tblname;
oid *lstProp = NULL, *lstCol = NULL;
+ bat *lstColbat = NULL;
int *lstIsMV = NULL;
+ str *lstColname = NULL;
+ BATiter cname_mapi, tblsname_mapi;
+ int i;
SimpleCS *cs;
+ str tblsname;
tblId = (int *) Tloc(csdump->dumpBats[csd_tblId], pos);
assert(*tblId == pos);
tblname = (oid *) Tloc(csdump->dumpBats[csd_tblname], pos);
+
+ tblsname_mapi = bat_iterator(csdump->dumpBats[csd_tblsname]);
+ tblsname = (str) BUNtail(tblsname_mapi, pos);
+
freqId = (int *) Tloc(csdump->dumpBats[csd_csId], pos);
freq = (int *) Tloc(csdump->dumpBats[csd_freq], pos);
@@ -296,8 +338,21 @@ SimpleCS* read_a_cs_from_csdump(int pos,
lstCol = (oid *)Tloc(csdump->dumpBats[csd_fullC], *offsetC);
lstIsMV = (int *)Tloc(csdump->dumpBats[csd_isMV], *offsetC);
+
+ lstColbat = (bat *)Tloc(csdump->dumpBats[csd_fullC_batIds], *offsetC);
- cs = create_simpleCS(*tblId, *tblname, *freqId, numP, lstProp, numC,
lstCol, lstIsMV, *freq, *coverage);
+ lstColname = (str *)malloc(sizeof(str) * numC);
+
+ cname_mapi = bat_iterator(csdump->dumpBats[csd_fullC_name]);
+
+ for (i = 0; i < numC; i++){
+ str tmpStr = (str) BUNtail(cname_mapi,
BUNfirst(csdump->dumpBats[csd_fullC_name]) + (BUN) (*offsetC + i));
+
+ lstColname[i] = GDKstrdup(tmpStr);
+
+ }
+
+ cs = create_simpleCS(*tblId, *tblname, tblsname, *freqId, numP,
lstProp, numC, lstCol, lstColbat, lstColname, lstIsMV, *freq, *coverage);
return cs;
}
@@ -332,7 +387,7 @@ void print_simpleCSset(SimpleCSset *csse
for (i = 0; i < num; i++){
SimpleCS *cs = csset->items[i];
- printf("Simple CS: %d [TblId: %d] [FreqId: %d] [Support: %d]
[Coverage: %d]\n", i, cs->tblId, cs->freqId, cs->sup, cs->cov);
+ printf("Simple CS: %d [TblId: %d] [Name: %s] [FreqId: %d]
[Support: %d] [Coverage: %d]\n", i, cs->tblId, cs->tblsname, cs->freqId,
cs->sup, cs->cov);
printf(" Props: ");
for (j = 0; j < cs->numP; j++){
printf(" " BUNFMT, cs->lstProp[j]);
@@ -341,7 +396,7 @@ void print_simpleCSset(SimpleCSset *csse
printf(" Cols: ");
for (j = 0; j < cs->numC; j++){
- printf(" " BUNFMT " (isMV: %d) ",
cs->lstCol[j],cs->lstIsMV[j]);
+ printf(" " BUNFMT " (Name: %s) (isMV: %d) ",
cs->lstCol[j], cs->lstColname[j], cs->lstIsMV[j]);
}
printf("\n");
}
@@ -391,6 +446,25 @@ int getColIdx_from_oid(int tblId, Simple
return -1;
}
+str getColumnName(SimpleCSset *csset, int tblId, int colId){
+
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list