Changeset: 3146cb37862d for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=3146cb37862d
Modified Files:
monetdb5/extras/rdf/rdfschema.c
Branch: rdf
Log Message:
Add function for computing the metric for the refined schema
diffs (130 lines):
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -7888,6 +7888,115 @@ void computeMetricsQ(CSset *freqCSset){
free(weight);
}
+
+
+//Compute the metric for table after removing infrequent props
+static
+void computeMetricsQForRefinedTable(CSset *freqCSset,CSPropTypes
*csPropTypes,int *mfreqIdxTblIdxMapping, int *mTblIdxFreqIdxMapping, int
numTables){
+ float* fillRatio;
+ float* refRatio;
+ float* weight;
+ CS cs;
+ int totalCov = 0;
+ float Q = 0.0;
+ int i,j;
+ int tmpFinalFreqIdx, tmpTblIdx, tmpPropIdx;
+ int tmpNumFreqProps;
+ int *numRefinedFills = NULL;
+ int *numRefinedSupport = NULL;
+
+ fillRatio = (float*)malloc(sizeof(float) * numTables);
+ refRatio = (float*)malloc(sizeof(float) * numTables);
+ weight = (float*)malloc(sizeof(float) * numTables);
+
+ numRefinedFills = (int*)malloc(sizeof(int) * numTables);
+ numRefinedSupport = (int*)malloc(sizeof(int) * numTables);
+ //At the beginning
+ for (i = 0; i < numTables; i ++){
+ numRefinedFills[i] =
freqCSset->items[mTblIdxFreqIdxMapping[i]].numFill;
+ numRefinedSupport[i] =
freqCSset->items[mTblIdxFreqIdxMapping[i]].support;
+ }
+
+ //Removing LOTSOFNULL_SUBJECT_THRESHOLD
+ //Check which freqCS having small number of prop
+ //--> they will be removed from the final table.
+ for (i = 0; i < freqCSset->numOrigFreqCS; i++){
+ tmpFinalFreqIdx = i;
+ while (freqCSset->items[tmpFinalFreqIdx].parentFreqIdx != -1){
+ tmpFinalFreqIdx =
freqCSset->items[tmpFinalFreqIdx].parentFreqIdx;
+ }
+
+ if (mfreqIdxTblIdxMapping[tmpFinalFreqIdx] == -1) continue;
//This mergedCS does not become the final table, because of e.g.,small size
+
+ tmpTblIdx = mfreqIdxTblIdxMapping[tmpFinalFreqIdx];
+ tmpNumFreqProps = csPropTypes[tmpTblIdx].numProp -
csPropTypes[tmpTblIdx].numInfreqProp;
+
+ if (freqCSset->items[i].numProp < tmpNumFreqProps *
LOTSOFNULL_SUBJECT_THRESHOLD){
+ int tmpNumFreqProp = freqCSset->items[i].numProp;
//Init
+ //This CS will be removed
+
+ //Check number of InfreqProp exist in that CS
+ //Since they will finally be removed by removing
Infrequent Prop from final tabl
+ //the reducing of numofFill caused by these props will
not be counted
+ //when removing this freqCS i.
+ tmpPropIdx = 0;
+ for (j = 0; j < freqCSset->items[i].numProp; j++){
+ oid checkProp = freqCSset->items[i].lstProp[j];
+ //Check if prop j is a infrquent prop
+ while (tmpPropIdx <
csPropTypes[tmpTblIdx].numProp &&
csPropTypes[tmpTblIdx].lstPropTypes[tmpPropIdx].prop != checkProp){
+ tmpPropIdx++;
+ }
+
+ if (tmpPropIdx ==
csPropTypes[tmpTblIdx].numProp) break; //No more check
+
+ //if found the index of the prop, check if it
is infrequent
+ if (
isInfrequentProp(csPropTypes[tmpTblIdx].lstPropTypes[tmpPropIdx],
freqCSset->items[tmpFinalFreqIdx])){
+ tmpNumFreqProp--;
+ }
+
+ }
+
+ numRefinedSupport[tmpTblIdx] =
numRefinedSupport[tmpTblIdx] - freqCSset->items[i].support;
+ numRefinedFills[tmpTblIdx] = numRefinedFills[tmpTblIdx]
- (freqCSset->items[i].support * tmpNumFreqProp);
+
+
+ }
+ }
+
+ for (i = 0; i < numTables; i++){
+ tmpFinalFreqIdx = mTblIdxFreqIdxMapping[i];
+ cs = freqCSset->items[tmpFinalFreqIdx];
+
+ //Reduce the number of fill when removing infrequent props
+ for (j = 0; j < csPropTypes[i].numProp; j++){
+ if ( isInfrequentProp(csPropTypes[i].lstPropTypes[j],
cs)){
+ numRefinedFills[i] = numRefinedFills[i] -
csPropTypes[i].lstPropTypes[j].propFreq;
+ }
+ }
+ tmpNumFreqProps = csPropTypes[i].numProp -
csPropTypes[i].numInfreqProp;
+ assert(tmpNumFreqProps > 0);
+ assert( numRefinedSupport[i] > 0);
+ fillRatio[i] = (float) numRefinedFills[i]
/((float)tmpNumFreqProps * numRefinedSupport[i]);
+ assert( fillRatio[i] > 0);
+ refRatio[i] = (float) cs.numInRef / freqCSset->totalInRef;
+ weight[i] = (float) cs.coverage * ( fillRatio[i] +
refRatio[i]);
+ totalCov += cs.coverage;
+
+ Q += weight[i];
+ }
+ printf("Refined Table: Performance metric Q = (weighting %f)/(totalCov
%d * numTbl %d) \n", Q,totalCov, numTables);
+
+ Q = Q/((float)totalCov * numTables);
+
+ printf("==> Performance metric Q = %f \n", Q);
+
+ free(fillRatio);
+ free(refRatio);
+ free(weight);
+ free(numRefinedFills);
+ free(numRefinedSupport);
+
+}
#endif
@@ -9753,6 +9862,10 @@ RDFreorganize(int *ret, CStableStat *cst
#if NO_OUTPUTFILE == 0
printFinalStructure(cstablestat, csPropTypes, numTables,*freqThreshold,
mapbatid);
#endif
+
+ #if STORE_PERFORMANCE_METRIC_INFO
+ computeMetricsQForRefinedTable(freqCSset,
csPropTypes,mfreqIdxTblIdxMapping,mTblIdxFreqIdxMapping,numTables);
+ #endif
if (*mode == EXPLOREONLY){
printf("Only explore the schema information \n");
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list