Changeset: 7e39d2305140 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=7e39d2305140
Modified Files:
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Compute support for each prop in a CS.

This is for the presentation purpose, specifically in coloring the prop.


diffs (194 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -791,6 +791,20 @@ void genCSPropTypesColIdx(CSPropTypes* c
 
 }
 
+#if     COLORINGPROP
+static 
+void updatePropSupport(CSPropTypes* csPropTypes, int numMergedCS, CSset* 
freqCSset){
+       int i, j; 
+       int freqId; 
+       for (i = 0; i < numMergedCS; i++){
+               freqId = csPropTypes[i].freqCSId; 
+               freqCSset->items[freqId].lstPropSupport = (int*) malloc 
(sizeof(int) * freqCSset->items[freqId].numProp);
+               for (j = 0; j < freqCSset->items[freqId].numProp; j++){
+                       freqCSset->items[freqId].lstPropSupport[j] = 
csPropTypes[i].lstPropTypes[j].propFreq; 
+               }
+       }
+}
+#endif /* #if COLORINGPROP */
 static 
 void printCSPropTypes(CSPropTypes* csPropTypes, int numMergedCS, CSset* 
freqCSset, int freqThreshold){
        char filename[100]; 
@@ -831,8 +845,9 @@ void printCSPropTypes(CSPropTypes* csPro
                                numMVColsFilter++;
                        }
 
-                       fprintf(fout, "  P " BUNFMT "(%d | cov:%d | Null: %d | 
Single: %d | Multi: %d) \n", 
-                                       csPropTypes[i].lstPropTypes[j].prop, 
csPropTypes[i].lstPropTypes[j].defaultType,csPropTypes[i].lstPropTypes[j].propCover,
+                       fprintf(fout, "  P " BUNFMT "(%d | freq: %d | cov:%d | 
Null: %d | Single: %d | Multi: %d) \n", 
+                                       csPropTypes[i].lstPropTypes[j].prop, 
csPropTypes[i].lstPropTypes[j].defaultType,
+                                       
csPropTypes[i].lstPropTypes[j].propFreq, 
csPropTypes[i].lstPropTypes[j].propCover,
                                        csPropTypes[i].lstPropTypes[j].numNull, 
csPropTypes[i].lstPropTypes[j].numSingleType, 
csPropTypes[i].lstPropTypes[j].numMVType);
                        fprintf(fout, "         ");
                        for (k = 0; k < csPropTypes[i].lstPropTypes[j].numType; 
k++){
@@ -1074,6 +1089,10 @@ void freeCSset(CSset *csSet){
        int i;
        for(i = 0; i < csSet->numCSadded; i ++){
                free(csSet->items[i].lstProp);
+               #if COLORINGPROP
+               if (csSet->items[i].lstPropSupport != NULL)
+                       free(csSet->items[i].lstPropSupport);
+               #endif
 
        }
 
@@ -1141,6 +1160,9 @@ CS* creatCS(oid csId, int numP, oid* buf
 #endif 
 {
        CS *cs = (CS*)malloc(sizeof(CS)); 
+       #if COLORINGPROP
+       cs->lstPropSupport = NULL; 
+       #endif
        cs->lstProp =  (oid*) malloc(sizeof(oid) * numP);
        
        if (cs->lstProp == NULL){
@@ -3400,7 +3422,7 @@ int       ontmetadataCount = 0;
 
 /* Extract CS from SPO triples table */
 str
-RDFextractCSwithTypes(int *ret, bat *sbatid, bat *pbatid, bat *obatid, bat 
*mapbatid, int *freqThreshold, void *_freqCSset, oid **subjCSMap, oid 
*maxCSoid, char **subjdefaultMap,int *maxNumPwithDup){
+RDFextractCSwithTypes(int *ret, bat *sbatid, bat *pbatid, bat *obatid, bat 
*mapbatid, int *freqThreshold, void *_freqCSset, oid **subjCSMap, oid 
*maxCSoid, char **subjdefaultMap,int *maxNumPwithDup, CSmergeRel 
**csRelBetweenMergeFreqSet){
 
        BAT             *sbat = NULL, *pbat = NULL, *obat = NULL, *mbat = NULL; 
        BATiter         si, pi, oi;     /*iterator for BAT of s,p,o columns in 
spo table */
@@ -3414,7 +3436,6 @@ RDFextractCSwithTypes(int *ret, bat *sba
        CSrel           *csrelSet;
        CSrel           *csrelToMaxFreqSet, *csrelFromMaxFreqSet;
        CSrel           *csrelBetweenMaxFreqSet; 
-       CSmergeRel      *csRelBetweenMergeFreqSet;
        SubCSSet        *csSubCSSet; 
 
        int*            csIdFreqIdxMap; /* Map a CSId to a freqIdx. Should be 
removed in the future .... */
@@ -3575,12 +3596,12 @@ RDFextractCSwithTypes(int *ret, bat *sba
        curT = clock(); 
        printf (" ----- Merging Frequent CSs took  %f seconds.\n", 
((float)(curT - tmpLastT))/CLOCKS_PER_SEC);
        tmpLastT = curT;                
-       
-
-       csRelBetweenMergeFreqSet = (CSmergeRel *) malloc (sizeof(CSmergeRel) * 
freqCSset->numCSadded);
-       initCsRelBetweenMergeFreqSet(csRelBetweenMergeFreqSet, 
freqCSset->numCSadded);
-       generateCsRelBetweenMergeFreqSet(csRelBetweenMergeFreqSet, 
csrelBetweenMaxFreqSet, *maxCSoid + 1, csIdFreqIdxMap, freqCSset);
-       printCSmergeRel(freqCSset, csRelBetweenMergeFreqSet, *freqThreshold);
+
+
+       *csRelBetweenMergeFreqSet = (CSmergeRel *) malloc (sizeof(CSmergeRel) * 
freqCSset->numCSadded);
+       initCsRelBetweenMergeFreqSet(*csRelBetweenMergeFreqSet, 
freqCSset->numCSadded);
+       generateCsRelBetweenMergeFreqSet(*csRelBetweenMergeFreqSet, 
csrelBetweenMaxFreqSet, *maxCSoid + 1, csIdFreqIdxMap, freqCSset);
+       printCSmergeRel(freqCSset, *csRelBetweenMergeFreqSet, *freqThreshold);
 
        printmergeCSSet(freqCSset, *freqThreshold);
        //getStatisticCSsBySize(csMap,maxNumProp); 
@@ -3604,7 +3625,6 @@ RDFextractCSwithTypes(int *ret, bat *sba
        freeCS_SubCSMapSet(csSubCSSet, *maxCSoid + 1); 
 
        free(csIdFreqIdxMap); 
-       freeMergeCSrelset(csRelBetweenMergeFreqSet,freqCSset->numCSadded);
        freeCSrelSet(csrelSet, *maxCSoid + 1); 
        freeCSrelSet(csrelToMaxFreqSet, *maxCSoid + 1); 
        freeCSrelSet(csrelFromMaxFreqSet, *maxCSoid + 1);
@@ -4285,11 +4305,11 @@ RDFreorganize(int *ret, CStableStat *cst
        //CStableStat   *cstablestat;
        char            *subjdefaultMap = NULL; /* Specify whether this subject 
contains default value or not. This array may be large */
        CSPropTypes     *csPropTypes; 
-
+       CSmergeRel      *csRelBetweenMergeFreqSet = NULL;
 
        freqCSset = initCSset();
 
-       if (RDFextractCSwithTypes(ret, sbatid, pbatid, obatid, mapbatid, 
freqThreshold, freqCSset,&subjCSMap, &maxCSoid, &subjdefaultMap, 
&maxNumPwithDup) != MAL_SUCCEED){
+       if (RDFextractCSwithTypes(ret, sbatid, pbatid, obatid, mapbatid, 
freqThreshold, freqCSset,&subjCSMap, &maxCSoid, &subjdefaultMap, 
&maxNumPwithDup, &csRelBetweenMergeFreqSet) != MAL_SUCCEED){
                throw(RDF, "rdf.RDFreorganize", "Problem in extracting CSs");
        } 
 
@@ -4334,6 +4354,11 @@ RDFreorganize(int *ret, CStableStat *cst
        RDFExtractCSPropTypes(ret, sbat, si, pi, oi, subjCSMap, 
csTblIdxMapping, csPropTypes, maxNumPwithDup);
        genCSPropTypesColIdx(csPropTypes, numTables, freqCSset);
        printCSPropTypes(csPropTypes, numTables, freqCSset, *freqThreshold);
+       
+       #if COLORINGPROP
+       /* Update list of support for properties in freqCSset */
+       updatePropSupport(csPropTypes, numTables, freqCSset);
+       #endif
 
        // Init CStableStat
        initCStables(cstablestat, freqCSset, csPropTypes, numTables);
@@ -4347,6 +4372,7 @@ RDFreorganize(int *ret, CStableStat *cst
                free(mfreqIdxTblIdxMapping);
                free(mTblIdxFreqIdxMapping);
                freeCSPropTypes(csPropTypes,numTables);
+               
freeMergeCSrelset(csRelBetweenMergeFreqSet,freqCSset->numCSadded);
 
                return MAL_SUCCEED;
        }
@@ -4455,6 +4481,7 @@ RDFreorganize(int *ret, CStableStat *cst
        }
                
 
+       freeMergeCSrelset(csRelBetweenMergeFreqSet,freqCSset->numCSadded);
        freeCSPropTypes(csPropTypes,numTables);
        freeCSset(freqCSset); 
        free(subjCSMap); 
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -31,8 +31,7 @@ RDFextractCS(int *ret, bat *sbatid, bat 
 rdf_export str
 RDFextractPfromPSO(int *ret, bat *pbatid, bat *sbatid); 
 
-rdf_export str 
-RDFextractCSwithTypes(int *ret, bat *sbatid, bat *pbatid, bat *obatid, bat 
*mapbatid, int *freqThreshold, void *freqCSset, oid **subjCSMap, oid *maxCSoid, 
char **subjdefaultMap,int *maxNumPwithDup);
+
 
 typedef enum{
        EXPLOREONLY, 
@@ -95,6 +94,8 @@ typedef struct PropStat {
 
 #define STAT_ANALYZE 1 // Only use for collecting the statistic on the number 
of multi/null/single-valued prop
 
+#define COLORINGPROP 1 // Only use for coloring property in schema 
representation. 
+
 
 #define USE_LABEL_FINDING_MAXCS        0       // Use the labels received from 
labeling process for finding maxCS 
 #define USE_LABEL_FOR_MERGING  0       // Use the labels received from 
labeling process for finding mergeCS
@@ -103,13 +104,16 @@ typedef struct CS
 {
        oid     csId;           //Id of the CS
        oid*    lstProp;        //List of properties' Ids
+       #if     COLORINGPROP    
+       int*    lstPropSupport;  //Number of subjects that the object value for 
this is not null
+       #endif
        int     numProp;
        int     numAllocation;
        //char  isSubset; 
        int     parentFreqIdx;  //Index of the parent in freqCSset
        #if STOREFULLCS
-       oid     subject;        //A subject
-       oid*    lstObj;         //List of sample objects
+       oid     subject;         //A subject
+       oid*    lstObj;          //List of sample objects
        #endif
        
        char    type; 
@@ -255,6 +259,9 @@ typedef struct CSPropTypes {
 rdf_export str
 RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid, PropStat* 
propStat, CStableStat *cstablestat, CSPropTypes *csPropTypes, oid* lastSubjId);
 
+rdf_export str 
+RDFextractCSwithTypes(int *ret, bat *sbatid, bat *pbatid, bat *obatid, bat 
*mapbatid, int *freqThreshold, void *freqCSset, oid **subjCSMap, oid *maxCSoid, 
char **subjdefaultMap,int *maxNumPwithDup,CSmergeRel 
**csRelBetweenMergeFreqSet);
+
 rdf_export str
 RDFreorganize(int *ret, CStableStat *cstablestat, bat *sbatid, bat *pbatid, 
bat *obatid, bat *mapbatid, int *freqThreshold, int *mode);
 
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to