Changeset: 8d7c61e2b191 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=8d7c61e2b191
Modified Files:
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Fix the bug causing from using int type in stead of oid.


diffs (truncated from 326 to 300 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -35,7 +35,7 @@ RDFSchemaExplore(int *ret, str *tbname, 
        return MAL_SUCCEED;
 }
 
-static void copyIntSet(int* dest, int* orig, int len){
+static void copyOidSet(oid* dest, oid* orig, int len){
        int i; 
        for (i = 0; i < len; i++){
                dest[i] = orig[i];
@@ -69,7 +69,7 @@ void freeCSset(CSset *csSet){
                free(csSet->items[i].lstProp);
        }
        free(csSet->items);
-       free(csSet);
+       //free(csSet);          // DUC: NEED TO RECHECK
 }
 
 static 
@@ -91,16 +91,16 @@ void freeCS(CS *cs){
 */
 
 static 
-CS* creatCS(int subId, int numP, int* buff){
+CS* creatCS(oid subId, int numP, oid* buff){
        CS *cs = malloc(sizeof(CS)); 
-       cs->lstProp =  (int*) malloc(sizeof(int) * numP);
+       cs->lstProp =  (oid*) malloc(sizeof(oid) * numP);
        
        if (cs->lstProp == NULL){
                printf("Malloc failed. at %d", numP);
                exit(-1); 
        }
 
-       copyIntSet(cs->lstProp, buff, numP); 
+       copyOidSet(cs->lstProp, buff, numP); 
        cs->subIdx = subId;
        cs->numProp = numP; 
        cs->numAllocation = numP; 
@@ -114,8 +114,9 @@ CS* creatCS(int subId, int numP, int* bu
  * Rely on djb2 http://www.cse.yorku.ca/~oz/hash.html
  *
  */
-static unsigned int RDF_hash_intlist(int* key, int num){
-       unsigned int hashCode = 5381u; 
+static oid RDF_hash_oidlist(oid* key, int num){
+       //unsigned int hashCode = 5381u; 
+       oid  hashCode = 5381u;
        int i; 
 
        for (i = 0; i < num; i++){
@@ -127,33 +128,33 @@ static unsigned int RDF_hash_intlist(int
 }
 
 static 
-void appendArrayToBat(BAT *b, int* inArray, int num){
-       int i; 
+void appendArrayToBat(BAT *b, BUN* inArray, int num){
+       //int i; 
        BUN r = BUNlast(b);
        if (r + num < b->batCapacity){
                BATextend(b, smallbatsz); 
        }
-       for (i = 0; i < num; i++){
-               memcpy(Tloc(b, BUNlast(b)), inArray, sizeof(int) * num); 
-       }
+       //for (i = 0; i < num; i++){
+       memcpy(Tloc(b, BUNlast(b)), inArray, sizeof(BUN) * num); 
+       //}
        BATsetcount(b, (BUN) (b->batCount + num)); 
        
 }
 
 static 
-void checkCSduplication(BAT* pOffsetBat, BAT* fullPBat, BUN pos, int* key, int 
numK){
-       BUN *offset; 
-       BUN *offset2; 
+void checkCSduplication(BAT* pOffsetBat, BAT* fullPBat, BUN pos, oid* key, int 
numK){
+       oid *offset; 
+       oid *offset2; 
        int numP; 
        int i; 
        BUN *existvalue; 
 
-       offset = (BUN *) Tloc(pOffsetBat, pos); 
+       offset = (oid *) Tloc(pOffsetBat, pos); 
        if ((pos + 1) < pOffsetBat->batCount){
-               offset2 = (BUN *)Tloc(pOffsetBat, pos + 1);
+               offset2 = (oid *)Tloc(pOffsetBat, pos + 1);
        }
        else{
-               offset2 = malloc(sizeof(BUN)); 
+               offset2 = malloc(sizeof(oid)); 
                *offset2 = BUNlast(fullPBat); 
        }
 
@@ -165,9 +166,10 @@ void checkCSduplication(BAT* pOffsetBat,
                return; 
        }
        else{
-               existvalue = (BUN *)Tloc(fullPBat, *offset);    
+               existvalue = (oid *)Tloc(fullPBat, *offset);    
                for (i = 0; i < numP; i++){
-                       if (key[i] != (int)*existvalue++) {
+                       //if (key[i] != (int)*existvalue++) {
+                       if (key[i] != existvalue[i]) {
                                printf("No duplication \n");
                                return;
                        }       
@@ -185,15 +187,16 @@ void checkCSduplication(BAT* pOffsetBat,
  *
  * */
 static 
-void putaCStoHash(BAT* hsKeyBat, BAT* pOffsetBat, BAT* fullPBat, oid subjId, 
int* key, int num, 
+int putaCStoHash(BAT* hsKeyBat, BAT* pOffsetBat, BAT* fullPBat, oid subjId, 
oid* key, int num, 
                oid *csoid, char isStoreFreqCS, int freqThreshold, CSset 
**freqCSset){
-       int     csKey; 
+       BUN     csKey; 
        int     freq = 0; 
        CS      *freqCS; 
        BUN     bun; 
        BUN     offset; 
+       oid     csId;           /* Id of the characteristic set */
 
-       csKey = RDF_hash_intlist(key, num);
+       csKey = RDF_hash_oidlist(key, num);
        bun = BUNfnd(BATmirror(hsKeyBat),(ptr) &csKey);
        if (bun == BUN_NONE) {
                if (hsKeyBat->T->hash && BATcount(hsKeyBat) > 4 * 
hsKeyBat->T->hash->mask) {
@@ -202,8 +205,10 @@ void putaCStoHash(BAT* hsKeyBat, BAT* pO
                }
                hsKeyBat = BUNappend(hsKeyBat, (ptr) &csKey, TRUE);
 
+               
+               csId = *csoid;
                (*csoid)++;
-
+               
                offset = BUNlast(fullPBat);
                /* Add list of p to fullPBat and pOffsetBat*/
                BUNappend(pOffsetBat, &offset , TRUE);
@@ -212,7 +217,10 @@ void putaCStoHash(BAT* hsKeyBat, BAT* pO
        }
        else{
                printf("This CS exists \n");    
+               csId = bun; 
                /* Check whether it is really an duplication (same hashvalue 
but different list of */
+               BATprint(pOffsetBat);
+               BATprint(fullPBat);
                checkCSduplication(pOffsetBat, fullPBat, bun, key, num );
 
                if (isStoreFreqCS == 1){        /* Store the frequent CS to the 
CSset*/
@@ -224,13 +232,14 @@ void putaCStoHash(BAT* hsKeyBat, BAT* pO
                }
        }
 
+       return csId;
 }
 
 /* Return 1 if sorted arr2[] is a subset of sorted arr1[] 
  * arr1 has m members, arr2 has n members
  * */
 
-static int isSubset(int* arr1, int* arr2, int m, int n)
+static int isSubset(oid* arr1, oid* arr2, int m, int n)
 {
        int i = 0, j = 0;
         
@@ -333,6 +342,7 @@ static void putPtoHash(map_t pmap, int k
        }
 }
 
+/*
 static void getTopFreqCSs(map_t csmap, int threshold){
        int count;
        hashmap_map* m; 
@@ -344,6 +354,7 @@ static void getTopFreqCSs(map_t csmap, i
        return;
 
 }
+*/
 
 /*
 static void getStatisticCSsBySize(map_t csmap, int maximumNumP){
@@ -368,6 +379,7 @@ static void getStatisticCSsBySize(map_t 
 }
 */
 
+/*
 static void getStatisticCSsBySupports(map_t csmap, int maxSupport, char 
isWriteToFile, char isCummulative){
 
        int* statCS; 
@@ -383,7 +395,7 @@ static void getStatisticCSsBySupports(ma
        else 
                hashmap_statistic_CSbysupport(csmap, statCS, maxSupport); 
 
-       /* Output the result */
+       // Output the result 
        
        if (isWriteToFile == 0){
                printf(" --- Number of CS per support (Max = %d)--- \n", 
maxSupport);
@@ -408,6 +420,7 @@ static void getStatisticCSsBySupports(ma
 
        free(statCS); 
 }
+*/
 
 /*
  * Get the refer CS 
@@ -467,8 +480,7 @@ RDFextractCSwithTypes(int *ret, bat *sba
        oid     curP;           /* current Property oid */
        oid     CSoid = 0;      /* Characteristic set oid */
        int     numP;           /* Number of properties for current S */
-       map_t   csMap;          
-       int*    buff;    
+       oid*    buff;    
        int     INIT_PROPERTY_NUM = 5000; 
        int     maxNumProp = 0; 
        CSset   *freqCSset;     /* Set of frequent CSs */
@@ -478,9 +490,12 @@ RDFextractCSwithTypes(int *ret, bat *sba
        //BAT   *hsValueBat;
        BAT     *pOffsetBat;    /* BAT storing the offset for set of 
properties, refer to fullPBat */
        BAT     *fullPBat;      /* Stores all set of properties */
-       //int   *subjCSMap;     /* Store the correspoinding CS Id for each 
subject */
+
+       oid     *subjCSMap;     /* Store the correspoinding CS Id for each 
subject */
+       BUN     *maxSoid;       
+       oid     returnCSid; 
        
-       buff = (int *) malloc (sizeof(int) * INIT_PROPERTY_NUM);
+       buff = (oid *) malloc (sizeof(oid) * INIT_PROPERTY_NUM);
 
        if ((sbat = BATdescriptor(*sbatid)) == NULL) {
                throw(MAL, "rdf.RDFextractCSwithTypes", RUNTIME_OBJECT_MISSING);
@@ -495,6 +510,9 @@ RDFextractCSwithTypes(int *ret, bat *sba
        if ((obat = BATdescriptor(*obatid)) == NULL) {
                throw(MAL, "rdf.RDFextractCSwithTypes", RUNTIME_OBJECT_MISSING);
        }
+
+       maxSoid = (BUN *) Tloc(sbat, BUNlast(sbat));
+       subjCSMap = (oid *) malloc (sizeof(oid) * (*maxSoid)); 
        
        si = bat_iterator(sbat); 
        pi = bat_iterator(pbat); 
@@ -502,8 +520,8 @@ RDFextractCSwithTypes(int *ret, bat *sba
 
        hsKeyBat = BATnew(TYPE_void, TYPE_int, smallbatsz);
        //hsValueBat = BATnew(TYPE_void, TYPE_int, smallbatsz);
-       pOffsetBat = BATnew(TYPE_void, TYPE_int, smallbatsz);
-       fullPBat = BATnew(TYPE_void, TYPE_int, smallbatsz);
+       pOffsetBat = BATnew(TYPE_void, TYPE_oid, smallbatsz);
+       fullPBat = BATnew(TYPE_void, TYPE_oid, smallbatsz);
 
        if (hsKeyBat == NULL) {
                throw(MAL, "rdf.RDFextractCSwithTypes", "Error in BAT 
creation");
@@ -511,7 +529,6 @@ RDFextractCSwithTypes(int *ret, bat *sba
        BATseqbase(hsKeyBat, 0);
 
        /* Init a hashmap */
-       csMap = hashmap_new(); 
        freqCSset = initCSset();
 
        numP = 0;
@@ -522,7 +539,9 @@ RDFextractCSwithTypes(int *ret, bat *sba
                sbt = (oid *) BUNtloc(si, p);           
                if (*sbt != curS){
                        if (p != 0){    /* Not the first S */
-                               putaCStoHash(hsKeyBat, pOffsetBat, fullPBat, 
curS, buff, numP, &CSoid, 1, *freqThreshold, &freqCSset); 
+                               returnCSid = putaCStoHash(hsKeyBat, pOffsetBat, 
fullPBat, curS, buff, numP, &CSoid, 1, *freqThreshold, &freqCSset); 
+
+                               subjCSMap[*sbt] = returnCSid;                   
        
 
                                if (numP > maxNumProp) 
                                        maxNumProp = numP; 
@@ -558,7 +577,10 @@ RDFextractCSwithTypes(int *ret, bat *sba
        }
        
        /*put the last CS */
-       putaCStoHash(hsKeyBat, pOffsetBat, fullPBat, curS, buff, numP, &CSoid, 
1, *freqThreshold, &freqCSset ); 
+       returnCSid = putaCStoHash(hsKeyBat, pOffsetBat, fullPBat, curS, buff, 
numP, &CSoid, 1, *freqThreshold, &freqCSset ); 
+
+       subjCSMap[*sbt] = returnCSid;                           
+
 
        if (numP > maxNumProp) 
                maxNumProp = numP; 
@@ -567,13 +589,13 @@ RDFextractCSwithTypes(int *ret, bat *sba
 
        /*get the statistic */
 
-       getTopFreqCSs(csMap,*freqThreshold);
+       //getTopFreqCSs(csMap,*freqThreshold);
 
        getMaximumFreqCSs(freqCSset); 
 
        //getStatisticCSsBySize(csMap,maxNumProp); 
 
-       getStatisticCSsBySupports(csMap, 5000, 1, 0);
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to