Changeset: 2f08774fbc54 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=2f08774fbc54
Modified Files:
        monetdb5/extras/rdf/rdfparams.c
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
        sql/backends/monet5/sql_rdf.c
Branch: rdf
Log Message:

Modify the way of generating sql table/col names


diffs (truncated from 335 to 300 lines):

diff --git a/monetdb5/extras/rdf/rdfparams.c b/monetdb5/extras/rdf/rdfparams.c
--- a/monetdb5/extras/rdf/rdfparams.c
+++ b/monetdb5/extras/rdf/rdfparams.c
@@ -46,7 +46,7 @@ void createDefaultParamsFile(void){
        fprintf(paramFile, "upperboundNumTables 1000\n");
        //fprintf(paramFile, "simTfidfThreshold 0.75");
        fprintf(paramFile, "minTableSize 1000\n");
-       fprintf(paramFile, "infreqTypeThreshold 0.1\n");
+       fprintf(paramFile, "infreqTypeThreshold 0.05\n");
        fprintf(paramFile, "infreqPropThreshold 0.05\n");
        fclose(paramFile); 
 }
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -3951,8 +3951,8 @@ void buildLabelStat(LabelStat *labelStat
                        numDummy++;
        }
        
-       //printf("Total number of distinct labels in Top%d is %d \n", k, 
labelStat->numLabeladded);
-       //printf("Number of DUMMY freqCS: %d \n",numDummy);
+       printf("Total number of distinct labels in Top%d is %d \n", k, 
labelStat->numLabeladded);
+       printf("Number of DUMMY freqCS: %d \n",numDummy);
        //Build list of FreqCS
        labelStat->freqIdList = (int**) malloc(sizeof(int*) * 
labelStat->numLabeladded);
        for (i =0; i < labelStat->numLabeladded; i++){
@@ -6702,6 +6702,27 @@ str getOrigObt(oid *obt, oid *origObt, B
 }
 #endif
 
+static
+oid getFirstEncodedSubjId(int tblIdx){
+       
+       return (BUN)(tblIdx + 1) << (sizeof(BUN)*8 - NBITS_FOR_CSID);
+}
+
+//Encoded subject BAT contains 
+//sequential numbers from getFirstEncodedSubjId()
+//to getFirstEncodedSubjId() + numberofelements 
+
+BAT* createEncodedSubjBat(int tblIdx, int num){
+       BAT* subjBat = NULL; 
+       
+       subjBat = BATnew(TYPE_void, TYPE_void , num + 1);
+       BATsetcount(subjBat,num);
+       BATseqbase(subjBat, 0);
+       BATseqbase(BATmirror(subjBat), getFirstEncodedSubjId(tblIdx));
+
+       return subjBat; 
+}
+
 #if NO_OUTPUTFILE == 0
 static
 char getObjTypeFromBATtype(int battype){
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -294,6 +294,10 @@ typedef struct SubCSSet{
 
 #define REMOVE_SMALL_TABLE     1       /* Remove SMALL but NOT dimension 
table*/
 
+#define APPENDSUBJECTCOLUMN    1       // The subject column actually doesn't 
need to be included into the relational table
+                                       // However, for creating the foreign 
key relationship, we add this column and 
+                                       // markt it as a primary key
+
 
 typedef struct CSset{
        CS* items;
@@ -527,5 +531,9 @@ isCSTable(CS item, oid name);
 rdf_export str
 printTKNZStringFromOid(oid id);
 
+rdf_export BAT*
+createEncodedSubjBat(int tblIdx, int num);
+
+
 
 #endif /* _RDFSCHEMA_H_ */
diff --git a/sql/backends/monet5/sql_rdf.c b/sql/backends/monet5/sql_rdf.c
--- a/sql/backends/monet5/sql_rdf.c
+++ b/sql/backends/monet5/sql_rdf.c
@@ -542,6 +542,80 @@ SQLrdfShred(Client cntxt, MalBlkPtr mb, 
 #endif /* HAVE_RAPTOR */
 }
 
+static
+void getTblSQLname(char *tmptbname, int tblIdx, int isExTbl, CStableStat 
*cstablestat, BATiter mapi, BAT *mbat){
+       str     baseTblName;
+       char    tmpstr[20]; 
+
+       if (isExTbl ==0) 
+               sprintf(tmpstr, "%d",tblIdx);
+       else //isExTbl == 1
+               sprintf(tmpstr, "ex%d",tblIdx);
+
+       getTblName(&baseTblName, cstablestat->lstcstable[tblIdx].tblname, mapi, 
mbat); 
+       sprintf(tmptbname, "%s", baseTblName);
+       strcat(tmptbname,tmpstr);
+
+       GDKfree(baseTblName);
+}
+
+//If colType == -1, ==> default col
+//If not, it is a ex-type column
+static
+void getColSQLname(char *tmpcolname, int tblIdx, int colIdx, int colType, 
CStableStat *cstablestat, BATiter mapi, BAT *mbat){
+       str baseColName;
+       char    tmpstr[20];
+
+       if (colType == -1) sprintf(tmpstr, "%d",colIdx);
+       else 
+               sprintf(tmpstr, "%dtype%d",colIdx, colType); 
+       getTblName(&baseColName, 
cstablestat->lstcstable[tblIdx].lstProp[colIdx], mapi, mbat);
+       sprintf(tmpcolname, "%s", baseColName);
+       strcat(tmpcolname,tmpstr); 
+
+
+       GDKfree(baseColName);
+}
+
+static
+void getMvTblSQLname(char *tmpmvtbname, int tblIdx, int colIdx, CStableStat 
*cstablestat, BATiter mapi, BAT *mbat){
+       str baseTblName;
+       str baseColName; 
+
+       getTblName(&baseTblName, cstablestat->lstcstable[tblIdx].tblname, mapi, 
mbat);
+       getTblName(&baseColName, 
cstablestat->lstcstable[tblIdx].lstProp[colIdx], mapi, mbat);
+
+       sprintf(tmpmvtbname, "mv%s%d_%s%d", baseTblName, tblIdx, baseColName, 
colIdx);
+
+       GDKfree(baseTblName);
+       GDKfree(baseColName);
+}
+
+/*
+static
+addFKs(CStableStat* cstablestat, CSPropTypes *csPropTypes){
+       FILE            *fout;
+       char            filename[100];
+       int             i;
+       char            fromTbl[100]; 
+       char            fromTblCol[100]; 
+       char            toTbl[100];
+       char            toTblCol[100]; 
+       int             refTblId; 
+
+       strcpy(filename, "fkCreate.sql");
+       fout = fopen(filename, "wt");
+       for (i = 0; i < cstablestat->numTables; i++){
+               for(j = 0; j < csPropTypes[i].numProp; j++){
+                       if (csPropTypes[i].lstPropTypes[j].isFKProp == 1){
+                               refTblId = 
csPropTypes[i].lstPropTypes[j].refTblId;                                     
+                       }
+               }
+       }
+       fclose(fout);   
+
+}
+*/
 
 /* Re-organize triple table by using clustering storage
  * CALL rdf_reorganize('schema','tablename', 1);
@@ -560,13 +634,12 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
        sql_schema *sch; 
        int ret = 0; 
        CStableStat *cstablestat; 
-       str     baseTblName;
        char    tmptbname[100]; 
-       char    tmpstr[20]; 
+       char    tmpmvtbname[100];
        char    tmptbnameex[100];
        //char  tmpviewname[100]; 
-       str     baseColName;
        char    tmpcolname[100]; 
+       char    tmpmvcolname[100];
        //char  viewcommand[500];
        sql_subtype tpe;        
        sql_subtype tpes[50];
@@ -710,50 +783,46 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
        for (i = 0; i < cstablestat->numTables; i++){
                //printf("creating table %d \n", i);
 
-               sprintf(tmpstr, "%d",i);
-               getTblName(&baseTblName, cstablestat->lstcstable[i].tblname, 
mapi, mbat); 
-               sprintf(tmptbname, "%s", baseTblName);
-               strcat(tmptbname,tmpstr);
+               getTblSQLname(tmptbname, i, 0, cstablestat, mapi, mbat);
                printf("Table %d:||  %s ||\n",i, tmptbname);
 
                cstables[i] = mvc_create_table(m, sch, tmptbname, tt_table, 0,
                                   SQL_PERSIST, 0, 3);
-               GDKfree(baseTblName);
                totalNoTablesCreated++;
                //Multivalues tables for each column
                csmvtables[i] = (sql_table **)malloc(sizeof(sql_table*) * 
cstablestat->numPropPerTable[i]);
                
+               #if APPENDSUBJECTCOLUMN
+               mvc_create_column(m, cstables[i], "subject",  &tpes[TYPE_oid]);
+               #endif
                for (j = 0; j < cstablestat->numPropPerTable[i]; j++){
 
                        //TODO: Use propertyId from Propstat
-                       sprintf(tmpstr, "%d",j);
-                       getTblName(&baseColName, 
cstablestat->lstcstable[i].lstProp[j], mapi, mbat);
-                       sprintf(tmpcolname, "%s", baseColName);
-                       strcat(tmpcolname,tmpstr); 
-                       //sprintf(tmpcolname, 
"col"BUNFMT,(cstablestat->lstcstable[i].lstProp[j]));
+                       getColSQLname(tmpcolname, i, j, -1, cstablestat, mapi, 
mbat);
+
 
                        tmpbat = cstablestat->lstcstable[i].colBats[j];
 
                        mvc_create_column(m, cstables[i], tmpcolname,  
&tpes[tmpbat->ttype]);
                        
-                       GDKfree(baseColName);
                        //For multi-values table
                        tmpNumMVCols = 
cstablestat->lstcstable[i].lstMVTables[j].numCol;
                        if (tmpNumMVCols != 0){
-                               sprintf(tmptbname, "mvtable%dp%d",i,j);
-                               csmvtables[i][j] = mvc_create_table(m, sch, 
tmptbname, tt_table, 0, SQL_PERSIST, 0, 3); 
+                               getMvTblSQLname(tmpmvtbname, i, j, cstablestat, 
mapi, mbat);
+                               csmvtables[i][j] = mvc_create_table(m, sch, 
tmpmvtbname, tt_table, 0, SQL_PERSIST, 0, 3); 
                                totalNoTablesCreated++;
 
                                //One column for key
-                               sprintf(tmpcolname, "mvCol%dt%dpKey",i,j);
+                               sprintf(tmpcolname, "mvKey");
                                tmpbat = 
cstablestat->lstcstable[i].lstMVTables[j].keyBat;
                                mvc_create_column(m, csmvtables[i][j], 
tmpcolname,  &tpes[tmpbat->ttype]);
 
                                //Value columns 
                                for (k = 0; k < tmpNumMVCols; k++){
-                                       sprintf(tmpcolname, 
"mvCol%dt%dp%dc",i,j,k);
+                                       getColSQLname(tmpmvcolname, i, j, k, 
cstablestat, mapi, mbat);
+
                                        tmpbat = 
cstablestat->lstcstable[i].lstMVTables[j].mvBats[k];
-                                       mvc_create_column(m, csmvtables[i][j], 
tmpcolname,  &tpes[tmpbat->ttype]);
+                                       mvc_create_column(m, csmvtables[i][j], 
tmpmvcolname,  &tpes[tmpbat->ttype]);
                                }
 
                        }
@@ -767,20 +836,17 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
                // Add non-default type table
                if (cstablestat->lstcstableEx[i].numCol != 0){  
 
-                       sprintf(tmpstr, "ex%d",i);
-                       getTblName(&baseTblName, 
cstablestat->lstcstable[i].tblname, mapi, mbat); 
-                       sprintf(tmptbnameex, "%s", baseTblName);
-                       strcat(tmptbnameex,tmpstr);
+                       getTblSQLname(tmptbnameex, i, 1, cstablestat, mapi, 
mbat);
                        printf("TableEx %d: || %s || \n",i, tmptbnameex);
 
                        cstablesEx[i] = mvc_create_table(m, sch, tmptbnameex, 
tt_table, 0,
                                           SQL_PERSIST, 0, 3);
-                       GDKfree(baseTblName);
                        totalNoTablesCreated++;
                        totalNoExTables++;
                        for (j = 0; j < cstablestat->lstcstableEx[i].numCol; 
j++){
                                //TODO: Use propertyId from Propstat
-                               sprintf(tmpcolname, 
"colex%dtype%d",cstablestat->lstcstableEx[i].mainTblColIdx[j], 
(int)(cstablestat->lstcstableEx[i].colTypes[j]));
+                               getColSQLname(tmpcolname, i, 
cstablestat->lstcstableEx[i].mainTblColIdx[j], 
(int)(cstablestat->lstcstableEx[i].colTypes[j]), cstablestat, mapi, mbat);
+
                                tmpbat = 
cstablestat->lstcstableEx[i].colBats[j];
                                mvc_create_column(m, cstablesEx[i], tmpcolname, 
 &tpes[tmpbat->ttype]);                         
                        }
@@ -789,15 +855,19 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
 
                #endif
 
-
+               #if APPENDSUBJECTCOLUMN
+               {
+                       BAT* subjBat = 
createEncodedSubjBat(i,BATcount(cstablestat->lstcstable[i].colBats[0]));
+                       store_funcs.append_col(m->session->tr,
+                                       mvc_bind_column(m, 
cstables[i],"subject"), 
+                                       subjBat, TYPE_bat);
+                       BBPreclaim(subjBat);
+               }
+               #endif
                for (j = 0; j < cstablestat->numPropPerTable[i]; j++){
 
                        //TODO: Use propertyId from Propstat
-                       sprintf(tmpstr, "%d",j);
-                       getTblName(&baseColName, 
cstablestat->lstcstable[i].lstProp[j], mapi, mbat);
-                       sprintf(tmpcolname, "%s", baseColName);
-                       strcat(tmpcolname,tmpstr); 
-                       //sprintf(tmpcolname, 
"col"BUNFMT,(cstablestat->lstcstable[i].lstProp[j]));
+                       getColSQLname(tmpcolname, i, j, -1, cstablestat, mapi, 
mbat);
 
                        tmpbat = cstablestat->lstcstable[i].colBats[j];
 
@@ -807,14 +877,12 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
                                        mvc_bind_column(m, 
cstables[i],tmpcolname ), 
                                        tmpbat, TYPE_bat);
 
-                       GDKfree(baseColName);
                        //For multi-values table
                        tmpNumMVCols = 
cstablestat->lstcstable[i].lstMVTables[j].numCol;
                        if (tmpNumMVCols != 0){
-                               sprintf(tmptbname, "mvtable%dp%d",i,j);
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to