Changeset: af9eec714439 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=af9eec714439
Modified Files:
        monetdb5/extras/rdf/rdfschema.c
Branch: rdf
Log Message:

Only store the relationships between freqCS


diffs (132 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -3101,7 +3101,6 @@ str RDFgetRefCounts(int *ret, BAT *sbat,
        oid             curP;           /* current Property oid */
        int             numP;           /* Number of properties for current S */
        oid*            buff;    
-       oid             tmpCSid; 
 
        char            objType;
        oid             realObjOid;     
@@ -3128,9 +3127,9 @@ str RDFgetRefCounts(int *ret, BAT *sbat,
                /* Look at the referenced CS Id using subjCSMap */
                if (objType == URI || objType == BLANKNODE){
                        realObjOid = (*obt) - ((oid) objType << (sizeof(BUN)*8 
- 4));
-                       tmpCSid = subjCSMap[realObjOid];
-                       if (realObjOid <= maxSoid && tmpCSid != BUN_NONE){
-                               refCount[tmpCSid]++; 
+
+                       if (realObjOid <= maxSoid && subjCSMap[realObjOid] != 
BUN_NONE){
+                               refCount[subjCSMap[realObjOid]]++; 
                        }
 
                }
@@ -3154,14 +3153,12 @@ str RDFgetRefCounts(int *ret, BAT *sbat,
 #if NEEDSUBCS
 static 
 str RDFrelationships(int *ret, BAT *sbat, BATiter si, BATiter pi, BATiter oi,  
-               oid *subjCSMap, oid *subjSubCSMap, SubCSSet *csSubCSSet, CSrel 
*csrelSet, BUN maxSoid, int maxNumPwithDup){
+               oid *subjCSMap, oid *subjSubCSMap, SubCSSet *csSubCSSet, CSrel 
*csrelSet, BUN maxSoid, int maxNumPwithDup,int *csIdFreqIdxMap){
 #else
 static
 str RDFrelationships(int *ret, BAT *sbat, BATiter si, BATiter pi, BATiter oi,
-               oid *subjCSMap, CSrel *csrelSet, BUN maxSoid, int 
maxNumPwithDup){
+               oid *subjCSMap, CSrel *csrelSet, BUN maxSoid, int 
maxNumPwithDup,int *csIdFreqIdxMap){
 #endif 
-
-
        BUN             p, q; 
        oid             *sbt = 0, *obt, *pbt;
        oid             curS;           /* current Subject oid */
@@ -3174,7 +3171,8 @@ str RDFrelationships(int *ret, BAT *sbat
        char*           buffTypes; 
        oid             realObjOid;     
        char            isBlankNode; 
-       oid             curP; 
+       oid             curP;
+
 
        if (BATcount(sbat) == 0) {
                throw(RDF, "rdf.RDFrelationships", "sbat must not be empty");
@@ -3190,6 +3188,7 @@ str RDFrelationships(int *ret, BAT *sbat
 
        BATloop(sbat, p, q){
                sbt = (oid *) BUNtloc(si, p);           
+               if ( csIdFreqIdxMap[subjCSMap[*sbt]] == -1) continue; /* Do not 
consider infrequentCS */
                if (*sbt != curS){
                        #if NEEDSUBCS
                        if (p != 0){    /* Not the first S */
@@ -3205,18 +3204,19 @@ str RDFrelationships(int *ret, BAT *sbat
                        curP = 0; 
                }
                                
+               pbt = (oid *) BUNtloc(pi, p);
+
                obt = (oid *) BUNtloc(oi, p); 
                /* Check type of object */
                objType = getObjType(*obt);
-       
-               pbt = (oid *) BUNtloc(pi, p);
 
                /* Look at the referenced CS Id using subjCSMap */
                isBlankNode = 0;
                if (objType == URI || objType == BLANKNODE){
                        realObjOid = (*obt) - ((oid) objType << (sizeof(BUN)*8 
- 4));
 
-                       if (realObjOid <= maxSoid && subjCSMap[realObjOid] != 
BUN_NONE){
+                       /* Only consider references to freqCS */        
+                       if (realObjOid <= maxSoid && subjCSMap[realObjOid] != 
BUN_NONE && csIdFreqIdxMap[subjCSMap[realObjOid]] != -1){
                                if (objType == BLANKNODE) isBlankNode = 1;
                                addReltoCSRel(subjCSMap[*sbt], 
subjCSMap[realObjOid], *pbt, &csrelSet[subjCSMap[*sbt]], isBlankNode);
                        }
@@ -3251,6 +3251,8 @@ str RDFrelationships(int *ret, BAT *sbat
        return MAL_SUCCEED; 
 }
 
+
+
 static
 str addHighRefCSsToFreqCS(BAT *pOffsetBat, BAT *freqBat, BAT *coverageBat, BAT 
*fullPBat, 
                int* refCount, CSset *freqCSset, int *csIdFreqIdxMap, int 
numCS, int threshold){
@@ -3670,7 +3672,11 @@ RDFextractCSwithTypes(int *ret, bat *sba
                
        curT = clock(); 
        printf (" ----- Exploring all CSs took  %f seconds.\n", ((float)(curT - 
tmpLastT))/CLOCKS_PER_SEC);
+               
+       printf("Max CS oid: " BUNFMT "\n", *maxCSoid);
+       printf("Max Number of P per CS (with/without duplication): %d / %d \n", 
maxNumProp, *maxNumPwithDup);
        printf("Number of freqCSs found by frequency: %d \n", 
freqCSset->numCSadded);
+
        tmpLastT = curT;                
        
        /* Phase 2: Get the references count for each CS. Add frequent one to 
freqCSset */
@@ -3686,15 +3692,8 @@ RDFextractCSwithTypes(int *ret, bat *sba
        printf (" ----- Counting references and adding highly referred CS's 
took  %f seconds.\n", ((float)(curT - tmpLastT))/CLOCKS_PER_SEC);
        printf("Number of freqCSs after considering # references: %d \n", 
freqCSset->numCSadded);
        tmpLastT = curT;
-
        
-       //Phase 2: Check the relationship       
-
-       printf("Max CS oid: " BUNFMT "\n", *maxCSoid);
-
-       printf("Max Number of P (considering duplicated P): %d \n", 
*maxNumPwithDup);
-
-
+       //Phase 3: Check the relationship       
 
        csrelSet = initCSrelset(*maxCSoid + 1);
        
@@ -3706,9 +3705,9 @@ RDFextractCSwithTypes(int *ret, bat *sba
 
        csSubCSSet = initCS_SubCSSets(*maxCSoid +1); 
 
-       RDFrelationships(ret, sbat, si, pi, oi, *subjCSMap, subjSubCSMap, 
csSubCSSet, csrelSet, *maxSoid, *maxNumPwithDup);
+       RDFrelationships(ret, sbat, si, pi, oi, *subjCSMap, subjSubCSMap, 
csSubCSSet, csrelSet, *maxSoid, *maxNumPwithDup, csIdFreqIdxMap);
        #else
-       RDFrelationships(ret, sbat, si, pi, oi, *subjCSMap, csrelSet, *maxSoid, 
*maxNumPwithDup);
+       RDFrelationships(ret, sbat, si, pi, oi, *subjCSMap, csrelSet, *maxSoid, 
*maxNumPwithDup, csIdFreqIdxMap);
        #endif
 
        curT = clock(); 
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to