Changeset: c81f96c6f3ae for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=c81f96c6f3ae
Modified Files:
monetdb5/extras/rdf/Makefile.ag
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
sql/backends/monet5/sql.mx
sql/scripts/30_rdf.sql
Branch: rdf
Log Message:
Add sql procedure for handling rdf reorganizing
diffs (truncated from 370 to 300 lines):
diff --git a/monetdb5/extras/rdf/Makefile.ag b/monetdb5/extras/rdf/Makefile.ag
--- a/monetdb5/extras/rdf/Makefile.ag
+++ b/monetdb5/extras/rdf/Makefile.ag
@@ -21,6 +21,7 @@ INCLUDES = ../../modules/atoms ../../mod
../../../common/options \
../../../common/stream \
../../../gdk \
+ ../../../sql/include \
$(raptor_CFLAGS)
MTSAFE
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -2901,10 +2901,10 @@ RDFextractCSwithTypes(int *ret, bat *sba
- BBPreclaim(sbat);
- BBPreclaim(pbat);
- BBPreclaim(obat);
- BBPreclaim(mbat);
+ BBPunfix(sbat->batCacheid);
+ BBPunfix(pbat->batCacheid);
+ BBPunfix(obat->batCacheid);
+ BBPunfix(mbat->batCacheid);
free (subjSubCSMap);
free (csFreqMap);
@@ -3079,15 +3079,16 @@ str triplesubsort(BAT **sbat, BAT **pbat
}
static
-CStable* initCStablesAndIdxMapping(CSset* freqCSset, int* csTblIdxMapping,
int* mfreqIdxTblIdxMapping, int* mTblIdxFreqIdxMapping){
+CStableStat* initCStablesAndIdxMapping(CSset* freqCSset, int* csTblIdxMapping,
int* mfreqIdxTblIdxMapping, int* mTblIdxFreqIdxMapping){
int i, k;
CS cs;
- CStable* cstable;
+ CStableStat* cstablestat;
int tmpParentidx;
int tmpNumProp;
-
- cstable = (CStable *) malloc (sizeof (CStable));
+ //str *schema = "rdfro";
+
+ cstablestat = (CStableStat *) malloc (sizeof (CStableStat));
// Get the number of tables
k = 0;
@@ -3099,19 +3100,20 @@ CStable* initCStablesAndIdxMapping(CSset
}
}
- // allocate memory space for cstable
- cstable->numTables = k;
- cstable->lstbatid = (bat**) malloc(sizeof (bat*) * k);
- cstable->numPropPerTable = (int*) malloc(sizeof (int) * k);
- cstable->lastInsertedS = (oid*) malloc(sizeof(oid) * k);
-
+ // allocate memory space for cstablestat
+ cstablestat->numTables = k;
+ cstablestat->lstbatid = (bat**) malloc(sizeof (bat*) * k);
+ cstablestat->numPropPerTable = (int*) malloc(sizeof (int) * k);
+ cstablestat->lastInsertedS = (oid*) malloc(sizeof(oid) * k);
+ //cstablestat->cstable = (CStable*) malloc(sizeof(CStable) * k);
k = 0;
for (i = 0; i < freqCSset->numCSadded; i++){
if (freqCSset->items[i].parentFreqIdx == -1){ // Only use the
maximum or merge CS
tmpNumProp = freqCSset->items[i].numProp;
- cstable->numPropPerTable[k] = tmpNumProp;
- cstable->lstbatid[k] = (bat*) malloc (sizeof(bat) *
tmpNumProp);
+ cstablestat->numPropPerTable[k] = tmpNumProp;
+ cstablestat->lstbatid[k] = (bat*) malloc (sizeof(bat) *
tmpNumProp);
+
k++;
}
}
@@ -3137,21 +3139,68 @@ CStable* initCStablesAndIdxMapping(CSset
}
- return cstable;
+ return cstablestat;
}
-str RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid,
PropStat* propStat, CStable *cstable){
+static
+void freeCStableStat(CStableStat* cstablestat){
+ int i;
+
+ for (i = 0; i < cstablestat->numTables; i++){
+ free(cstablestat->lstbatid[i]);
+ }
+ free(cstablestat->lstbatid);
+ free(cstablestat->lastInsertedS);
+ free(cstablestat->numPropPerTable);
+ free(cstablestat);
+}
+
+static str
+creatPBats(BAT** setofBats, Postinglist ptl, int HeadType, int TailType){
+ int i;
+ int numbat;
+
+ numbat = ptl.numAdded;
+
+ for (i = 0; i < numbat; i++){
+ setofBats[ptl.lstIdx[i]] = BATnew(HeadType, TailType,
smallbatsz);
+ // only create BAT for few
+ }
+
+ return MAL_SUCCEED;
+}
+
+/*
+static str
+savePBats(BAT** setofBats, Postinglist ptl, CStableStat* cstablestat){
+ int i;
+ int numbat;
+
+ numbat = ptl.numAdded;
+
+ for (i = 0; i < numbat; i++){
+ //store to cstablestat
+ cstablestat->lstbatid[ptl.lstIdx[i]][ptl.lstInvertIdx[i]] =
setofBats[ptl.lstIdx[i]]->batCacheid;
+
+ //removec completely
+ BBPreclaim(setofBats[ptl.lstIdx[i]]) ;
+ }
+
+ return MAL_SUCCEED;
+}
+*/
+
+str RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid,
PropStat* propStat, CStableStat *cstablestat){
BAT *sbat = NULL, *pbat = NULL, *obat = NULL;
BATiter si,pi,oi;
BUN p,q;
oid *pbt, *sbt, *obt;
oid lastP, lastS;
- int freqid;
+ int tblIdx;
BUN ppos;
BAT** setofBats = NULL;
- int* csIdBatidxmap; //small map for each p
if ((sbat = BATdescriptor(*sbatid)) == NULL) {
throw(MAL, "rdf.RDFdistTriplesToCSs", RUNTIME_OBJECT_MISSING);
@@ -3172,32 +3221,29 @@ str RDFdistTriplesToCSs(int *ret, bat *s
lastP = BUN_NONE;
- printf("Created cstable with %d tables \n", cstable->numTables);
+ printf("Created cstablestat with %d tables \n", cstablestat->numTables);
+
+ setofBats = (BAT**)malloc(sizeof(BAT*) * cstablestat->numTables);
BATloop(pbat, p, q){
pbt = (oid *) BUNtloc(pi, p);
sbt = (oid *) BUNtloc(si, p);
obt = (oid *) BUNtloc(oi, p);
if (*pbt != lastP){
- if (csIdBatidxmap == NULL){
- csIdBatidxmap = (int *) malloc(sizeof(int) * 1);
- }
-
//Get number of BATs for this p
ppos = BUNfnd(BATmirror(propStat->pBat),pbt);
if (ppos == BUN_NONE)
throw(RDF, "rdf.RDFdistTriplesToCSs", "This
prop must be in propStat bat");
- if (setofBats != NULL)
- free(setofBats);
//init set of BATs containing this property
- setofBats = (BAT**) malloc(sizeof(BAT*) *
propStat->plCSidx[ppos].numAdded);
-
+ if (creatPBats(setofBats, propStat->plCSidx[ppos],
TYPE_void, TYPE_oid) != MAL_SUCCEED){
+ throw(RDF, "rdf.RDFdistTriplesToCSs", "Problem
in creating set of bats for a P");
+ }
- freqid = getTblidFromSoid(*sbt);
- printf("Table for prop " BUNFMT " | obj " BUNFMT "is %d
\n",*pbt, *obt, freqid);
-
+ tblIdx = getTblidFromSoid(*sbt);
+ printf("Table for prop " BUNFMT " | obj " BUNFMT "is %d
\n",*pbt, *obt, tblIdx);
+
lastP = *pbt;
}
else if (*sbt != lastS){
@@ -3238,7 +3284,7 @@ RDFreorganize(int *ret, bat *sbatid, bat
int *mTblIdxFreqIdxMapping; /* Invert of
mfreqIdxTblIdxMapping */
PropStat *propStat;
int numdistinctMCS = 0;
- CStable *cstable;
+ CStableStat *cstablestat;
freqCSset = initCSset();
@@ -3258,10 +3304,10 @@ RDFreorganize(int *ret, bat *sbatid, bat
initIntArray(mTblIdxFreqIdxMapping , freqCSset->numCSadded, -1);
//Mapping from from CSId to TableIdx
- cstable = initCStablesAndIdxMapping(freqCSset, csTblIdxMapping,
mfreqIdxTblIdxMapping, mTblIdxFreqIdxMapping);
-
- lastSubjId = (oid *) malloc (sizeof(oid) * cstable->numTables);
- initArray(lastSubjId, cstable->numTables, 0);
+ cstablestat = initCStablesAndIdxMapping(freqCSset, csTblIdxMapping,
mfreqIdxTblIdxMapping, mTblIdxFreqIdxMapping);
+
+ lastSubjId = (oid *) malloc (sizeof(oid) * cstablestat->numTables);
+ initArray(lastSubjId, cstablestat->numTables, 0);
if ((sbat = BATdescriptor(*sbatid)) == NULL) {
throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING);
@@ -3374,14 +3420,15 @@ RDFreorganize(int *ret, bat *sbatid, bat
printPropStat(propStat);
- if (RDFdistTriplesToCSs(ret, &sNewBat->batCacheid,
&pNewBat->batCacheid, &oNewBat->batCacheid, propStat, cstable) != MAL_SUCCEED){
+ if (RDFdistTriplesToCSs(ret, &sNewBat->batCacheid,
&pNewBat->batCacheid, &oNewBat->batCacheid, propStat, cstablestat) !=
MAL_SUCCEED){
throw(RDF, "rdf.RDFreorganize", "Problem in distributing
triples to BATs using CSs");
}
freeCSset(freqCSset);
free(subjCSMap);
free(csTblIdxMapping);
-
+ freeCStableStat(cstablestat);
+
BBPreclaim(lmap);
BBPreclaim(rmap);
BBPreclaim(sbat);
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -20,6 +20,8 @@
#ifndef _RDFSCHEMA_H_
#define _RDFSCHEMA_H_
+#include <sql_catalog.h>
+
rdf_export str
RDFSchemaExplore(int *ret, str *tbname, str *clname);
@@ -174,16 +176,24 @@ typedef struct CSmergeRel{
int numAllocation;
} CSmergeRel;
+/*
typedef struct CStable {
- bat** lstbatid;
- int numTables;
- int* numPropPerTable;
- //int* freqIdx; //Idx of the corresponding freqCS for a table
- oid* lastInsertedS;
+ BAT** colBats;
} CStable;
+*/
+
+typedef struct CStableStat {
+ bat** lstbatid;
+ int numTables;
+ int* numPropPerTable;
+ //int* freqIdx; //Idx of the corresponding freqCS for a
table
+ oid* lastInsertedS;
+ //sql_schema* schema;
+ //CStable* cstable;
+} CStableStat;
rdf_export str
-RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid, PropStat*
propStat, CStable *cstable);
+RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid, PropStat*
propStat, CStableStat *cstablestat);
#endif /* _RDFSCHEMA_H_ */
diff --git a/sql/backends/monet5/sql.mx b/sql/backends/monet5/sql.mx
--- a/sql/backends/monet5/sql.mx
+++ b/sql/backends/monet5/sql.mx
@@ -1274,6 +1274,12 @@ permutations of SPO and a mapping) in th
rdf.graph We can then query with SQL queries the RDF triple storeby quering
tables gid_spo, gid_pso etc., where gid is looked up in rdf.graph"
+pattern rdfreorganize (schema:str, table:str, threshold:int)
+address SQLrdfreorganize
+comment "This procedure call a function from rdf module to extract the
relational schema
+from rdf triples. Then, do re-organizing the rdf triples table by clustering
according
+to the subject. "
+
command prelude()
address SQLprelude;
@@ -1431,6 +1437,7 @@ sql5_export str SQLoctopusBind(Client cn
sql5_export str SQLoctopusBinddbat(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
InstrPtr pci);
sql5_export str SQLargRecord(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
InstrPtr pci);
sql5_export str SQLrdfShred(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
InstrPtr pci);
+sql5_export str SQLrdfreorganize(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
InstrPtr pci);
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list