Changeset: 0138dd320f44 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=0138dd320f44
Modified Files:
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
        sql/backends/monet5/sql.mx
Branch: rdf
Log Message:

Create FKs by filtering relationships between merged CSs


diffs (truncated from 416 to 300 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -1746,7 +1746,7 @@ str printmergeCSSet(CSset *freqCSset, in
                        fprintf(fout, "\n");
                        for (j = 0; j < cs.numProp; j++){
                                takeOid(cs.lstProp[j], &propStr);       
-                               fprintf(fout,"          %s\n", propStr);
+                               fprintf(fout,"PropId: "BUNFMT"  --->    %s\n", 
cs.lstProp[j],  propStr);
                                GDKfree(propStr);
                        }
                        fprintf(fout, "\n");
@@ -3671,7 +3671,6 @@ static void getStatisticCSsBySupports(BA
        //free(csPropNum); 
 }
 
-
 static void getStatisticFinalCSs(CSset *freqCSset, BAT *sbat, int 
freqThreshold, int curNumMergeCS, oid* mergeCSFreqCSMap){
 
        //int   *csPropNum; 
@@ -3680,7 +3679,7 @@ static void getStatisticFinalCSs(CSset *
        int     i,j ; 
        char    filename[100];
        char    tmpStr[20];
-       int     maxNumtriple; 
+       int     maxNumtriple = 0; 
        int     minNumtriple = INT_MAX; 
        int     numMergeCS = 0; 
        int     totalCoverage = 0; 
@@ -4395,7 +4394,11 @@ str RDFExtractCSPropTypes(int *ret, BAT 
                obt = (oid *) BUNtloc(oi, p); 
                /* Check type of object */
                objType = getObjType(*obt);     /* Get two bits 63th, 62nd from 
object oid */
-       
+               
+               if (objType == BLANKNODE){      //BLANKNODE object values will 
be stored in the same column with URI object values      
+                       objType = URI; 
+               }
+
                pbt = (oid *) BUNtloc(pi, p);
 
                if (curP == *pbt){
@@ -4970,35 +4973,55 @@ CSrel* generateCsRelBetweenMergeFreqSet(
 }
 
 
-/* Create a new data structure to store relationships including merged CS */
-/*
+/* Refine the relationship between mergeCS in order to create FK relationship 
between tables */
+
 static
-CSrel* getRefinedCsRelSet(CSrel *csrelFreqSet, CSset *freqCSset){
+CSrel* getFKBetweenTableSet(CSrel *csrelFreqSet, CSset *freqCSset, 
CSPropTypes* csPropTypes, int* mfreqIdxTblIdxMapping, int numTables){
        int     i,j;
-       int     numFreqCS = freqCSset->numOrigFreqCS; 
        int     from, to;
+       int     toFreqId; 
        CSrel   rel;
        CSrel*  refinedCsRel;
-       int     numRel = freqCSset->numCSadded;
-       
-       refinedCsRel = initCSrelset(numRel);
+       int     propIdx;        //Index of prop in list of props for each FreqCS
+       int     numRel = freqCSset->numCSadded; 
+
+       refinedCsRel = initCSrelset(numTables);
 
        for (i = 0; i < numRel; ++i) {
-               if (csrelFreqSet[i].numRef == 0) continue; // ignore CS without 
relations
+               if (csrelFreqSet[i].numRef == 0 || freqCSset->items[i].coverage 
> MINIMUM_TABLE_SIZE) continue; // ignore CS without relations
+               assert(freqCSset->items[i].parentFreqIdx == -1);
                rel = csrelFreqSet[i];
+               from = mfreqIdxTblIdxMapping[i];
+               assert(from < numTables);
                // update the 'from' value
-               from = i;
-               assert(freqCSset[from].parentFreqIdx == -1);
                for (j = 0; j < rel.numRef; ++j) {
-                       to = rel.lstRefFreqIdx[j];
-                       assert(freqCSset->items[to].parentFreqIdx == -1);
+                       toFreqId = rel.lstRefFreqIdx[j];
+                       assert(freqCSset->items[toFreqId].parentFreqIdx == -1);
                        // add relation to new data structure
+
+                       //Compare with prop coverage from csproptype    
+                       if (rel.lstCnt[j]  < freqCSset->items[toFreqId].support 
* MIN_FK_FREQUENCY)     continue; 
+
+                       to = mfreqIdxTblIdxMapping[toFreqId]; 
+                       
+                       //printf("Pass all basic conditions \n"); 
+
+                       //Compare with the property coverage from csPropTypes
+                       propIdx = 0; 
+                       while (csPropTypes[from].lstPropTypes[propIdx].prop != 
rel.lstPropId[j]){
+                               propIdx++;
+                       }
+                       assert(propIdx < freqCSset->items[i].numProp);
+                       
+                       if (csPropTypes[from].lstPropTypes[propIdx].propCover * 
MIN_FK_PROPCOVERAGE > rel.lstCnt[j]) continue; 
+                       
+                       assert(to < numTables);
                        addReltoCSRelWithFreq(from, to, rel.lstPropId[j], 
rel.lstCnt[j], rel.lstBlankCnt[j], &refinedCsRel[from]);
                }
        }
        return refinedCsRel;
 }
-*/
+
 static
 CSrel* generateCsRelToMergeFreqSet(CSrel *csrelFreqSet, CSset *freqCSset){
        int     i,j;
@@ -5036,7 +5059,7 @@ CSrel* generateCsRelToMergeFreqSet(CSrel
 }
 
 static
-void printCSRel(CSset *freqCSset, CSrel *csRelMergeFreqSet, int freqThreshold){
+str printCSRel(CSset *freqCSset, CSrel *csRelMergeFreqSet, int freqThreshold){
        FILE    *fout2,*fout2filter;
        char    filename2[100];
        char    tmpStr[20];
@@ -5044,6 +5067,8 @@ void printCSRel(CSset *freqCSset, CSrel 
        int             i,j, k;
        int             freq;
        int     *mfreqIdxTblIdxMapping;
+       char*   schema = "rdf";
+       int     ret; 
 
        strcpy(filename2, "csRelationshipBetweenMergeFreqCS");
        sprintf(tmpStr, "%d", freqThreshold);
@@ -5065,6 +5090,12 @@ void printCSRel(CSset *freqCSset, CSrel 
                }
        }
        
+
+       if (TKNZRopen (NULL, &schema) != MAL_SUCCEED) {
+               throw(RDF, "printCSrel",
+                               "could not open the tokenizer\n");
+       }
+
        fprintf(fout2filter, "TblIdx: (Frequency) --> TblIdx (Property) (#of 
References) (#of blanknodes),...");        
        for (i = 0; i < freqCSset->numCSadded; i++){
                if (csRelMergeFreqSet[i].numRef != 0){  //Only print CS with FK
@@ -5096,10 +5127,58 @@ void printCSRel(CSset *freqCSset, CSrel 
                }
        }
 
+       TKNZRclose(&ret);
        fclose(fout2);
        fclose(fout2filter);
        free(mfreqIdxTblIdxMapping);
 
+       return MAL_SUCCEED; 
+}
+
+
+static
+str printFKs(CSrel *csRelFinalFKs, int freqThreshold, int numTables){
+       FILE    *fout;
+       char    filename[100];
+       char    tmpStr[20];
+       str     propStr;
+       int     i,j;
+       char*   schema = "rdf";
+       int     ret; 
+
+       strcpy(filename, "FKRelationship");
+       sprintf(tmpStr, "%d", freqThreshold);
+       strcat(filename, tmpStr);
+       strcat(filename, ".txt");
+
+       fout = fopen(filename,"wt");
+
+       if (TKNZRopen (NULL, &schema) != MAL_SUCCEED) {
+               throw(RDF, "printFKs",
+                               "could not open the tokenizer\n");
+       }
+
+       for (i = 0; i < numTables; i++){
+               if (csRelFinalFKs[i].numRef != 0){      //Only print CS with FK
+                       fprintf(fout, "FK "BUNFMT ": ", 
csRelFinalFKs[i].origFreqIdx);
+                       for (j = 0; j < csRelFinalFKs[i].numRef; j++){
+                               #if SHOWPROPERTYNAME
+                               takeOid(csRelFinalFKs[i].lstPropId[j], 
&propStr);
+                               fprintf(fout, BUNFMT "(P:" BUNFMT " - %s) 
(%d)(Blank:%d) ", csRelFinalFKs[i].lstRefFreqIdx[j], 
csRelFinalFKs[i].lstPropId[j], propStr, csRelFinalFKs[i].lstCnt[j], 
csRelFinalFKs[i].lstBlankCnt[j]);
+                               GDKfree(propStr);
+                               #else
+                               fprintf(fout, BUNFMT "(P:" BUNFMT ") 
(%d)(Blank:%d) ", 
csRelFinalFKs[i].lstRefFreqIdx[j],csRelFinalFKs[i].lstPropId[j], 
csRelFinalFKs[i].lstCnt[j], csRelFinalFKs[i].lstBlankCnt[j]);
+                               #endif
+
+                       }
+                       fprintf(fout, "\n");
+               }
+       }
+
+       TKNZRclose(&ret);
+       fclose(fout);
+
+       return MAL_SUCCEED; 
 }
 
 // for storing ontology data
@@ -5447,7 +5526,7 @@ RDFextractPfromPSO(int *ret, bat *pbatid
 }
 
 static 
-BAT* getOriginalOBat(BAT *obat){
+BAT* getOriginalUriOBat(BAT *obat){
        BAT*    origobat; 
        BATiter oi; 
        BUN     p,q; 
@@ -5461,8 +5540,8 @@ BAT* getOriginalOBat(BAT *obat){
 
                obt = (oid *) BUNtloc(oi, p); 
                /* Check type of object */
-               objType = (char) ((*obt) >> (sizeof(BUN)*8 - 4))  &  7 ;        
/* Get two bits 63th, 62nd from object oid */
-       
+               objType = getObjType(*obt); 
+
                if (objType == URI || objType == BLANKNODE){
                        *obt = (*obt) - ((oid)objType << (sizeof(BUN)*8 - 4));
                }
@@ -5880,7 +5959,7 @@ void getRealValue(void **returnValue, oi
        str     objStr; 
        str     datetimeStr; 
        BUN     bun;    
-       BUN     maxObjectURIOid =  ((oid)1 << (sizeof(BUN)*8 - NBITS_FOR_CSID - 
1)); //Base on getTblIdxFromS
+       BUN     maxObjectURIOid =  ((oid)1 << (sizeof(BUN)*8 - NBITS_FOR_CSID - 
1)) - 1; //Base on getTblIdxFromS
        float   realFloat; 
        int     realInt; 
        oid     realUri;
@@ -5888,11 +5967,14 @@ void getRealValue(void **returnValue, oi
        //printf("objOid = " BUNFMT " \n",objOid);
        if (objType == URI || objType == BLANKNODE){
                objOid = objOid - ((oid)objType << (sizeof(BUN)*8 - 4));
-
+               
                if (objOid < maxObjectURIOid){
-                       takeOid(objOid, &objStr); 
+                       //takeOid(objOid, &objStr);             //TODO: Do we 
need to get URI string???
                        //printf("From tokenizer URI object value: "BUNFMT " 
(str: %s) \n", objOid, objStr);
                }
+               //else, this object value refers to a subject oid
+               //IDEA: Modify the function for calculating new subject Id:
+               //==> subjectID = TBLID ... tmpSoid ....              
        }
        else{
                objOid = objOid - (objType*2 + 1) *  RDF_MIN_LITERAL;   /* Get 
the real objOid from Map or Tokenizer */ 
@@ -5915,25 +5997,28 @@ void getRealValue(void **returnValue, oi
                        datetimeStr = getDateTimeFromRDFString(objStr);
                        if (*returnValue != NULL) free(*returnValue);
                        *returnValue = (char *)malloc(sizeof(char) * 
strlen(datetimeStr) + 1); 
-                       memcpy(*returnValue,datetimeStr,sizeof(char) * 
strlen(objStr) + 1);
+                       memcpy(*returnValue,datetimeStr,sizeof(char) * 
strlen(datetimeStr) + 1);
                        //printf("A datetime object value: %s \n",(char 
*)(*returnValue));
                        break; 
                case INTEGER:
                        //printf("Full object value: %s \n",objStr);
                        realInt = getIntFromRDFString(objStr);
-                       //printf("A INTEGER object value: %i \n",realInt);
+                       (*returnValue) = (int*)malloc(sizeof(int));
                        *(int*)(*returnValue) = realInt;
+                       //printf("A INTEGER object value: %d 
\n",*(int*)*returnValue);
                        break; 
                case FLOAT:
                        //printf("Full object value: %s \n",objStr);
                        realFloat = getFloatFromRDFString(objStr);
-                       //printf("A FLOAT object value: %f \n",realFloat);
+                       (*returnValue) = (float *)malloc(sizeof(float));
                        *(float*)(*returnValue) = realFloat;
+                       //printf("A FLOAT object value: %f \n", 
*(float*)*returnValue);
                        break; 
                default: //URI or BLANK NODE            
-                       //printf("A URI object value: " BUNFMT " \n", objOid);
+                       (*returnValue) = (oid*)malloc(sizeof(oid));
                        realUri = objOid;
                        *(oid*)(*returnValue) = realUri;
+                       //printf("A URI object value: " BUNFMT " \n", 
*(oid*)*returnValue);
        }
 
 }
@@ -6058,6 +6143,7 @@ str RDFdistTriplesToCSs(int *ret, bat *s
                }
 
                objType = getObjType(*obt); 
+               assert (objType != BLANKNODE);
 
                tmpColIdx = tmpTblIdxPropIdxMap[tblIdx]; 
 
@@ -6111,6 +6197,9 @@ str RDFdistTriplesToCSs(int *ret, bat *s
                        
                        }
 
+                       free(realObjValue); 
+                       realObjValue = NULL; 
+
                        if (numMultiValues == 0){       
                                //In search the position of the first value 
                                //to the correcponding column in the MAINTBL
@@ -6184,7 +6273,10 @@ str RDFdistTriplesToCSs(int *ret, bat *s
                //if (objType == STRING) printf("Value returned by getRealValue 
is %s \n", (char*)realObjValue);
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to