Changeset: e78930413d8d for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=e78930413d8d
Modified Files:
        monetdb5/extras/rdf/rdf_shredder.c
        monetdb5/extras/rdf/rdfschema.c
Branch: rdf
Log Message:

Map each CSid in the relationship to its maximum super CSid.

Then, group all the same super CSids in the relationships from one cs.


diffs (truncated from 327 to 300 lines):

diff --git a/monetdb5/extras/rdf/rdf_shredder.c 
b/monetdb5/extras/rdf/rdf_shredder.c
--- a/monetdb5/extras/rdf/rdf_shredder.c
+++ b/monetdb5/extras/rdf/rdf_shredder.c
@@ -297,8 +297,16 @@ tripleHandler(void* user_data, const rap
        BAT **graph = pdata->graph;
 
        if (pdata->error > pdata->lasterror){
-               printf("Incorrect or wrong syntax triple %s \n ", 
pdata->errorMsg);
+               unsigned char* objStr;
+               int objLen; 
+               //printf("[Incorrect or wrong syntax triple] %s \n ", 
pdata->errorMsg);
                pdata->lasterror = pdata->error; 
+               objStr = raptor_term_to_string(triple->object);
+               objLen =  strlen((const char*)objStr);
+               //printf("Object: %s %d \n", objStr, objLen);
+               if (objLen == 2) 
+                       printf("EMPTY OBJECT STRING \n");
+               free(objStr); 
        }
        else{
                if (triple->subject->type == RAPTOR_TERM_TYPE_URI
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -127,6 +127,97 @@ CSrel* creataCSrel(oid csoid){
        return csrel; 
 }
 
+
+static 
+void addReltoCSRel(oid origCSoid, oid refCSoid, CSrel *csrel)
+{
+       void *_tmp; 
+       void *_tmp2; 
+
+       int i = 0; 
+
+       assert (origCSoid == csrel->origCSoid);
+
+       while (i < csrel->numRef){
+               if (refCSoid == csrel->lstRefCSoid[i]){
+                       //Existing
+                       break; 
+               }
+               i++;
+       }
+       
+       if (i != csrel->numRef){ 
+               csrel->lstCnt[i]++; 
+               return; 
+       }
+       else{   // New Ref
+       
+               if(csrel->numRef == csrel->numAllocation) 
+               { 
+                       csrel->numAllocation += INIT_NUM_CSREL; 
+                       
+                       _tmp = realloc(csrel->lstRefCSoid, 
(csrel->numAllocation * sizeof(oid)));
+                       _tmp2 = realloc(csrel->lstCnt, (csrel->numAllocation * 
sizeof(int)));
+
+                       if (!_tmp || !_tmp2){
+                               fprintf(stderr, "ERROR: Couldn't realloc 
memory!\n");
+                       }
+                       csrel->lstRefCSoid = (oid*)_tmp;
+                       csrel->lstCnt = (int*)_tmp2; 
+               }
+
+               csrel->lstRefCSoid[csrel->numRef] = refCSoid;
+               csrel->lstCnt[csrel->numRef] = 1; 
+               csrel->numRef++;
+       }
+}
+
+
+static 
+void addReltoCSRelWithFreq(oid origCSoid, oid refCSoid, int freq, CSrel *csrel)
+{
+       void *_tmp; 
+       void *_tmp2; 
+
+       int i = 0; 
+
+       assert (origCSoid == csrel->origCSoid);
+
+       while (i < csrel->numRef){
+               if (refCSoid == csrel->lstRefCSoid[i]){
+                       //Existing
+                       break; 
+               }
+               i++;
+       }
+       
+       if (i != csrel->numRef){ 
+               csrel->lstCnt[i] = csrel->lstCnt[i] + freq; 
+               return; 
+       }
+       else{   // New Ref
+       
+               if(csrel->numRef == csrel->numAllocation) 
+               { 
+                       csrel->numAllocation += INIT_NUM_CSREL; 
+                       
+                       _tmp = realloc(csrel->lstRefCSoid, 
(csrel->numAllocation * sizeof(oid)));
+                       _tmp2 = realloc(csrel->lstCnt, (csrel->numAllocation * 
sizeof(int)));
+
+                       if (!_tmp || !_tmp2){
+                               fprintf(stderr, "ERROR: Couldn't realloc 
memory!\n");
+                       }
+                       csrel->lstRefCSoid = (oid*)_tmp;
+                       csrel->lstCnt = (int*)_tmp2; 
+               }
+
+               csrel->lstRefCSoid[csrel->numRef] = refCSoid;
+               csrel->lstCnt[csrel->numRef] = freq; 
+               csrel->numRef++;
+       }
+}
+
+
 static 
 CSrel* initCSrelset(oid numCSrel){
        oid i; 
@@ -200,6 +291,54 @@ void printCSrelSet(CSrel *csrelSet, char
        
 }
 
+/*
+ * Show the relationship from each CS to maximumFreqCSs
+ * */
+
+static 
+void printCSrelWithMaxSet(oid* csSuperCSMap, CSrel *csrelWithMaxSet, CSrel 
*csrelSet, char *csFreqMap, BAT* freqBat, int num, int freqThreshold){
+
+       int     i; 
+       int     j; 
+       int     *freq; 
+       FILE    *fout; 
+       char    filename[100];
+       char    tmpStr[20];
+
+       strcpy(filename, "csRelatioinshipWithMaxFreqCS");
+       sprintf(tmpStr, "%d", freqThreshold);
+       strcat(filename, tmpStr);
+       strcat(filename, ".txt");
+
+       fout = fopen(filename,"wt"); 
+
+       // Merge the relationship
+       for (i = 0; i < num; i++){
+               if (csrelSet[i].numRef != 0){
+                       for (j = 0; j < csrelSet[i].numRef; j++){               
+                               if (csSuperCSMap[csrelSet[i].lstRefCSoid[j]] != 
BUN_NONE){
+                                       
addReltoCSRelWithFreq(csrelSet[i].origCSoid, 
csSuperCSMap[csrelSet[i].lstRefCSoid[j]], csrelSet[i].lstCnt[j], 
&csrelWithMaxSet[i]);
+                               }
+                       }
+               }
+       }
+
+       for (i = 0; i < num; i++){
+               if (csrelWithMaxSet[i].numRef != 0){    //Only print CS with FK
+                       fprintf(fout, "Relationship %d: ", i);
+                       freq  = (int *) Tloc(freqBat, i);
+                       fprintf(fout, "CS " BUNFMT " (Freq: %d, isFreq: %d) --> 
", csrelWithMaxSet[i].origCSoid, *freq, csFreqMap[i]);
+                       for (j = 0; j < csrelWithMaxSet[i].numRef; j++){
+                               fprintf(fout, BUNFMT " (%d) ", 
csrelWithMaxSet[i].lstRefCSoid[j],csrelWithMaxSet[i].lstCnt[j]); 
+                       }       
+                       fprintf(fout, "\n");
+               }
+       }
+
+
+       fclose(fout);
+}
+
 static 
 void printSubCSInformation(SubCSSet *subcsset, int num, char isWriteTofile, 
int freqThreshold){
 
@@ -382,51 +521,7 @@ oid addSubCS(char *buff, int numP, int c
 
 }
 
-static 
-void addReltoCSRel(oid origCSoid, oid refCSoid, CSrel *csrel)
-{
-       void *_tmp; 
-       void *_tmp2; 
-
-       int i = 0; 
-
-       assert (origCSoid == csrel->origCSoid);
-
-       while (i < csrel->numRef){
-               if (refCSoid == csrel->lstRefCSoid[i]){
-                       //Existing
-                       break; 
-               }
-               i++;
-       }
-       
-       if (i != csrel->numRef){ 
-               csrel->lstCnt[i]++; 
-               return; 
-       }
-       else{   // New Ref
-       
-               if(csrel->numRef == csrel->numAllocation) 
-               { 
-                       csrel->numAllocation += INIT_NUM_CSREL; 
-                       
-                       _tmp = realloc(csrel->lstRefCSoid, 
(csrel->numAllocation * sizeof(oid)));
-                       _tmp2 = realloc(csrel->lstCnt, (csrel->numAllocation * 
sizeof(int)));
-
-                       if (!_tmp || !_tmp2){
-                               fprintf(stderr, "ERROR: Couldn't realloc 
memory!\n");
-                       }
-                       csrel->lstRefCSoid = (oid*)_tmp;
-                       csrel->lstCnt = (int*)_tmp2; 
-               }
-
-               csrel->lstRefCSoid[csrel->numRef] = refCSoid;
-               csrel->lstCnt[csrel->numRef] = 1; 
-               csrel->numRef++;
-       }
-}
-
-static 
+static
 void freeCSset(CSset *csSet){
        int i;
        for(i = 0; i < csSet->numCSadded; i ++){
@@ -647,7 +742,12 @@ oid putaCStoHash(CSBats *csBats, oid* ke
        if (bun == BUN_NONE) {
                csId = *csoid; 
                addNewCS(csBats, &csKey, key, csoid, num);
-               //assert(csId != BUN_NONE);
+               
+               //Handle the case when freqThreshold == 1 
+               if (isStoreFreqCS ==1 && freqThreshold == 1){
+                       freqCS = creatCS(csId, num, key);               
+                       addCStoSet(freqCSset, *freqCS);
+               }
        }
        else{
                //printf("Same HashKey: ");     
@@ -659,6 +759,12 @@ oid putaCStoHash(CSBats *csBats, oid* ke
                        // New CS
                        csId = *csoid;
                        addNewCS(csBats, &csKey, key, csoid, num);
+                       
+                       //Handle the case when freqThreshold == 1 
+                       if (isStoreFreqCS ==1 && freqThreshold == 1){
+                               freqCS = creatCS(csId, num, key);               
+                               addCStoSet(freqCSset, *freqCS);
+                       }
 
                }
                else{
@@ -866,7 +972,7 @@ static void getStatisticCSsBySize(map_t 
 */
 
 
-static void getStatisticCSsBySupports(BAT *pOffsetBat, BAT *freqBat, BAT 
*fullPBat, char isWriteToFile, int freqThreshold){
+static void getStatisticCSsBySupports(BAT *pOffsetBat, BAT *freqBat, BAT 
*fullPBat, oid* csSuperCSMap, char isWriteToFile, int freqThreshold){
 
        //int   *csPropNum; 
        //int   *csFreq; 
@@ -885,7 +991,7 @@ static void getStatisticCSsBySupports(BA
        strcat(filename, ".txt");
 
        fout = fopen(filename,"wt"); 
-       fprintf(fout, " csId  #Prop   #frequency \n"); 
+       fprintf(fout, " csId  #Prop   #frequency maxCSid\n"); 
 
        pi = bat_iterator(pOffsetBat);
        freqi = bat_iterator(freqBat);
@@ -905,9 +1011,9 @@ static void getStatisticCSsBySupports(BA
 
                // Output the result 
                if (isWriteToFile == 0)
-                       printf(BUNFMT "  %d  %d \n", p, numP, *freq); 
+                       printf(BUNFMT "  %d  %d " BUNFMT "\n", p, numP, *freq, 
csSuperCSMap[p]); 
                else 
-                       fprintf(fout, BUNFMT " %d  %d \n", p, numP, *freq); 
+                       fprintf(fout, BUNFMT " %d  %d " BUNFMT "\n", p, numP, 
*freq, csSuperCSMap[p]); 
        }
 
        fclose(fout); 
@@ -1185,6 +1291,7 @@ RDFextractCSwithTypes(int *ret, bat *sba
        int             maxNumPwithDup = 0; 
        char            *csFreqMap; 
        CSrel           *csrelSet;
+       CSrel           *csrelWithMaxFreqSet;
        SubCSSet        *csSubCSMap; 
        oid             *csSuperCSMap;  
 
@@ -1244,6 +1351,8 @@ RDFextractCSwithTypes(int *ret, bat *sba
 
        csrelSet = initCSrelset(maxCSoid + 1);
 
+       csrelWithMaxFreqSet = initCSrelset(maxCSoid + 1);
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to