Changeset: 2f08774fbc54 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=2f08774fbc54
Modified Files:
monetdb5/extras/rdf/rdfparams.c
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
sql/backends/monet5/sql_rdf.c
Branch: rdf
Log Message:
Modify the way of generating sql table/col names
diffs (truncated from 335 to 300 lines):
diff --git a/monetdb5/extras/rdf/rdfparams.c b/monetdb5/extras/rdf/rdfparams.c
--- a/monetdb5/extras/rdf/rdfparams.c
+++ b/monetdb5/extras/rdf/rdfparams.c
@@ -46,7 +46,7 @@ void createDefaultParamsFile(void){
fprintf(paramFile, "upperboundNumTables 1000\n");
//fprintf(paramFile, "simTfidfThreshold 0.75");
fprintf(paramFile, "minTableSize 1000\n");
- fprintf(paramFile, "infreqTypeThreshold 0.1\n");
+ fprintf(paramFile, "infreqTypeThreshold 0.05\n");
fprintf(paramFile, "infreqPropThreshold 0.05\n");
fclose(paramFile);
}
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -3951,8 +3951,8 @@ void buildLabelStat(LabelStat *labelStat
numDummy++;
}
- //printf("Total number of distinct labels in Top%d is %d \n", k,
labelStat->numLabeladded);
- //printf("Number of DUMMY freqCS: %d \n",numDummy);
+ printf("Total number of distinct labels in Top%d is %d \n", k,
labelStat->numLabeladded);
+ printf("Number of DUMMY freqCS: %d \n",numDummy);
//Build list of FreqCS
labelStat->freqIdList = (int**) malloc(sizeof(int*) *
labelStat->numLabeladded);
for (i =0; i < labelStat->numLabeladded; i++){
@@ -6702,6 +6702,27 @@ str getOrigObt(oid *obt, oid *origObt, B
}
#endif
+static
+oid getFirstEncodedSubjId(int tblIdx){
+
+ return (BUN)(tblIdx + 1) << (sizeof(BUN)*8 - NBITS_FOR_CSID);
+}
+
+//Encoded subject BAT contains
+//sequential numbers from getFirstEncodedSubjId()
+//to getFirstEncodedSubjId() + numberofelements
+
+BAT* createEncodedSubjBat(int tblIdx, int num){
+ BAT* subjBat = NULL;
+
+ subjBat = BATnew(TYPE_void, TYPE_void , num + 1);
+ BATsetcount(subjBat,num);
+ BATseqbase(subjBat, 0);
+ BATseqbase(BATmirror(subjBat), getFirstEncodedSubjId(tblIdx));
+
+ return subjBat;
+}
+
#if NO_OUTPUTFILE == 0
static
char getObjTypeFromBATtype(int battype){
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -294,6 +294,10 @@ typedef struct SubCSSet{
#define REMOVE_SMALL_TABLE 1 /* Remove SMALL but NOT dimension
table*/
+#define APPENDSUBJECTCOLUMN 1 // The subject column actually doesn't
need to be included into the relational table
+ // However, for creating the foreign
key relationship, we add this column and
+ // markt it as a primary key
+
typedef struct CSset{
CS* items;
@@ -527,5 +531,9 @@ isCSTable(CS item, oid name);
rdf_export str
printTKNZStringFromOid(oid id);
+rdf_export BAT*
+createEncodedSubjBat(int tblIdx, int num);
+
+
#endif /* _RDFSCHEMA_H_ */
diff --git a/sql/backends/monet5/sql_rdf.c b/sql/backends/monet5/sql_rdf.c
--- a/sql/backends/monet5/sql_rdf.c
+++ b/sql/backends/monet5/sql_rdf.c
@@ -542,6 +542,80 @@ SQLrdfShred(Client cntxt, MalBlkPtr mb,
#endif /* HAVE_RAPTOR */
}
+static
+void getTblSQLname(char *tmptbname, int tblIdx, int isExTbl, CStableStat
*cstablestat, BATiter mapi, BAT *mbat){
+ str baseTblName;
+ char tmpstr[20];
+
+ if (isExTbl ==0)
+ sprintf(tmpstr, "%d",tblIdx);
+ else //isExTbl == 1
+ sprintf(tmpstr, "ex%d",tblIdx);
+
+ getTblName(&baseTblName, cstablestat->lstcstable[tblIdx].tblname, mapi,
mbat);
+ sprintf(tmptbname, "%s", baseTblName);
+ strcat(tmptbname,tmpstr);
+
+ GDKfree(baseTblName);
+}
+
+//If colType == -1, ==> default col
+//If not, it is a ex-type column
+static
+void getColSQLname(char *tmpcolname, int tblIdx, int colIdx, int colType,
CStableStat *cstablestat, BATiter mapi, BAT *mbat){
+ str baseColName;
+ char tmpstr[20];
+
+ if (colType == -1) sprintf(tmpstr, "%d",colIdx);
+ else
+ sprintf(tmpstr, "%dtype%d",colIdx, colType);
+ getTblName(&baseColName,
cstablestat->lstcstable[tblIdx].lstProp[colIdx], mapi, mbat);
+ sprintf(tmpcolname, "%s", baseColName);
+ strcat(tmpcolname,tmpstr);
+
+
+ GDKfree(baseColName);
+}
+
+static
+void getMvTblSQLname(char *tmpmvtbname, int tblIdx, int colIdx, CStableStat
*cstablestat, BATiter mapi, BAT *mbat){
+ str baseTblName;
+ str baseColName;
+
+ getTblName(&baseTblName, cstablestat->lstcstable[tblIdx].tblname, mapi,
mbat);
+ getTblName(&baseColName,
cstablestat->lstcstable[tblIdx].lstProp[colIdx], mapi, mbat);
+
+ sprintf(tmpmvtbname, "mv%s%d_%s%d", baseTblName, tblIdx, baseColName,
colIdx);
+
+ GDKfree(baseTblName);
+ GDKfree(baseColName);
+}
+
+/*
+static
+addFKs(CStableStat* cstablestat, CSPropTypes *csPropTypes){
+ FILE *fout;
+ char filename[100];
+ int i;
+ char fromTbl[100];
+ char fromTblCol[100];
+ char toTbl[100];
+ char toTblCol[100];
+ int refTblId;
+
+ strcpy(filename, "fkCreate.sql");
+ fout = fopen(filename, "wt");
+ for (i = 0; i < cstablestat->numTables; i++){
+ for(j = 0; j < csPropTypes[i].numProp; j++){
+ if (csPropTypes[i].lstPropTypes[j].isFKProp == 1){
+ refTblId =
csPropTypes[i].lstPropTypes[j].refTblId;
+ }
+ }
+ }
+ fclose(fout);
+
+}
+*/
/* Re-organize triple table by using clustering storage
* CALL rdf_reorganize('schema','tablename', 1);
@@ -560,13 +634,12 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
sql_schema *sch;
int ret = 0;
CStableStat *cstablestat;
- str baseTblName;
char tmptbname[100];
- char tmpstr[20];
+ char tmpmvtbname[100];
char tmptbnameex[100];
//char tmpviewname[100];
- str baseColName;
char tmpcolname[100];
+ char tmpmvcolname[100];
//char viewcommand[500];
sql_subtype tpe;
sql_subtype tpes[50];
@@ -710,50 +783,46 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
for (i = 0; i < cstablestat->numTables; i++){
//printf("creating table %d \n", i);
- sprintf(tmpstr, "%d",i);
- getTblName(&baseTblName, cstablestat->lstcstable[i].tblname,
mapi, mbat);
- sprintf(tmptbname, "%s", baseTblName);
- strcat(tmptbname,tmpstr);
+ getTblSQLname(tmptbname, i, 0, cstablestat, mapi, mbat);
printf("Table %d:|| %s ||\n",i, tmptbname);
cstables[i] = mvc_create_table(m, sch, tmptbname, tt_table, 0,
SQL_PERSIST, 0, 3);
- GDKfree(baseTblName);
totalNoTablesCreated++;
//Multivalues tables for each column
csmvtables[i] = (sql_table **)malloc(sizeof(sql_table*) *
cstablestat->numPropPerTable[i]);
+ #if APPENDSUBJECTCOLUMN
+ mvc_create_column(m, cstables[i], "subject", &tpes[TYPE_oid]);
+ #endif
for (j = 0; j < cstablestat->numPropPerTable[i]; j++){
//TODO: Use propertyId from Propstat
- sprintf(tmpstr, "%d",j);
- getTblName(&baseColName,
cstablestat->lstcstable[i].lstProp[j], mapi, mbat);
- sprintf(tmpcolname, "%s", baseColName);
- strcat(tmpcolname,tmpstr);
- //sprintf(tmpcolname,
"col"BUNFMT,(cstablestat->lstcstable[i].lstProp[j]));
+ getColSQLname(tmpcolname, i, j, -1, cstablestat, mapi,
mbat);
+
tmpbat = cstablestat->lstcstable[i].colBats[j];
mvc_create_column(m, cstables[i], tmpcolname,
&tpes[tmpbat->ttype]);
- GDKfree(baseColName);
//For multi-values table
tmpNumMVCols =
cstablestat->lstcstable[i].lstMVTables[j].numCol;
if (tmpNumMVCols != 0){
- sprintf(tmptbname, "mvtable%dp%d",i,j);
- csmvtables[i][j] = mvc_create_table(m, sch,
tmptbname, tt_table, 0, SQL_PERSIST, 0, 3);
+ getMvTblSQLname(tmpmvtbname, i, j, cstablestat,
mapi, mbat);
+ csmvtables[i][j] = mvc_create_table(m, sch,
tmpmvtbname, tt_table, 0, SQL_PERSIST, 0, 3);
totalNoTablesCreated++;
//One column for key
- sprintf(tmpcolname, "mvCol%dt%dpKey",i,j);
+ sprintf(tmpcolname, "mvKey");
tmpbat =
cstablestat->lstcstable[i].lstMVTables[j].keyBat;
mvc_create_column(m, csmvtables[i][j],
tmpcolname, &tpes[tmpbat->ttype]);
//Value columns
for (k = 0; k < tmpNumMVCols; k++){
- sprintf(tmpcolname,
"mvCol%dt%dp%dc",i,j,k);
+ getColSQLname(tmpmvcolname, i, j, k,
cstablestat, mapi, mbat);
+
tmpbat =
cstablestat->lstcstable[i].lstMVTables[j].mvBats[k];
- mvc_create_column(m, csmvtables[i][j],
tmpcolname, &tpes[tmpbat->ttype]);
+ mvc_create_column(m, csmvtables[i][j],
tmpmvcolname, &tpes[tmpbat->ttype]);
}
}
@@ -767,20 +836,17 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
// Add non-default type table
if (cstablestat->lstcstableEx[i].numCol != 0){
- sprintf(tmpstr, "ex%d",i);
- getTblName(&baseTblName,
cstablestat->lstcstable[i].tblname, mapi, mbat);
- sprintf(tmptbnameex, "%s", baseTblName);
- strcat(tmptbnameex,tmpstr);
+ getTblSQLname(tmptbnameex, i, 1, cstablestat, mapi,
mbat);
printf("TableEx %d: || %s || \n",i, tmptbnameex);
cstablesEx[i] = mvc_create_table(m, sch, tmptbnameex,
tt_table, 0,
SQL_PERSIST, 0, 3);
- GDKfree(baseTblName);
totalNoTablesCreated++;
totalNoExTables++;
for (j = 0; j < cstablestat->lstcstableEx[i].numCol;
j++){
//TODO: Use propertyId from Propstat
- sprintf(tmpcolname,
"colex%dtype%d",cstablestat->lstcstableEx[i].mainTblColIdx[j],
(int)(cstablestat->lstcstableEx[i].colTypes[j]));
+ getColSQLname(tmpcolname, i,
cstablestat->lstcstableEx[i].mainTblColIdx[j],
(int)(cstablestat->lstcstableEx[i].colTypes[j]), cstablestat, mapi, mbat);
+
tmpbat =
cstablestat->lstcstableEx[i].colBats[j];
mvc_create_column(m, cstablesEx[i], tmpcolname,
&tpes[tmpbat->ttype]);
}
@@ -789,15 +855,19 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
#endif
-
+ #if APPENDSUBJECTCOLUMN
+ {
+ BAT* subjBat =
createEncodedSubjBat(i,BATcount(cstablestat->lstcstable[i].colBats[0]));
+ store_funcs.append_col(m->session->tr,
+ mvc_bind_column(m,
cstables[i],"subject"),
+ subjBat, TYPE_bat);
+ BBPreclaim(subjBat);
+ }
+ #endif
for (j = 0; j < cstablestat->numPropPerTable[i]; j++){
//TODO: Use propertyId from Propstat
- sprintf(tmpstr, "%d",j);
- getTblName(&baseColName,
cstablestat->lstcstable[i].lstProp[j], mapi, mbat);
- sprintf(tmpcolname, "%s", baseColName);
- strcat(tmpcolname,tmpstr);
- //sprintf(tmpcolname,
"col"BUNFMT,(cstablestat->lstcstable[i].lstProp[j]));
+ getColSQLname(tmpcolname, i, j, -1, cstablestat, mapi,
mbat);
tmpbat = cstablestat->lstcstable[i].colBats[j];
@@ -807,14 +877,12 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
mvc_bind_column(m,
cstables[i],tmpcolname ),
tmpbat, TYPE_bat);
- GDKfree(baseColName);
//For multi-values table
tmpNumMVCols =
cstablestat->lstcstable[i].lstMVTables[j].numCol;
if (tmpNumMVCols != 0){
- sprintf(tmptbname, "mvtable%dp%d",i,j);
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list