Changeset: 7e39d2305140 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=7e39d2305140
Modified Files:
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:
Compute support for each prop in a CS.
This is for the presentation purpose, specifically in coloring the prop.
diffs (194 lines):
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -791,6 +791,20 @@ void genCSPropTypesColIdx(CSPropTypes* c
}
+#if COLORINGPROP
+static
+void updatePropSupport(CSPropTypes* csPropTypes, int numMergedCS, CSset*
freqCSset){
+ int i, j;
+ int freqId;
+ for (i = 0; i < numMergedCS; i++){
+ freqId = csPropTypes[i].freqCSId;
+ freqCSset->items[freqId].lstPropSupport = (int*) malloc
(sizeof(int) * freqCSset->items[freqId].numProp);
+ for (j = 0; j < freqCSset->items[freqId].numProp; j++){
+ freqCSset->items[freqId].lstPropSupport[j] =
csPropTypes[i].lstPropTypes[j].propFreq;
+ }
+ }
+}
+#endif /* #if COLORINGPROP */
static
void printCSPropTypes(CSPropTypes* csPropTypes, int numMergedCS, CSset*
freqCSset, int freqThreshold){
char filename[100];
@@ -831,8 +845,9 @@ void printCSPropTypes(CSPropTypes* csPro
numMVColsFilter++;
}
- fprintf(fout, " P " BUNFMT "(%d | cov:%d | Null: %d |
Single: %d | Multi: %d) \n",
- csPropTypes[i].lstPropTypes[j].prop,
csPropTypes[i].lstPropTypes[j].defaultType,csPropTypes[i].lstPropTypes[j].propCover,
+ fprintf(fout, " P " BUNFMT "(%d | freq: %d | cov:%d |
Null: %d | Single: %d | Multi: %d) \n",
+ csPropTypes[i].lstPropTypes[j].prop,
csPropTypes[i].lstPropTypes[j].defaultType,
+
csPropTypes[i].lstPropTypes[j].propFreq,
csPropTypes[i].lstPropTypes[j].propCover,
csPropTypes[i].lstPropTypes[j].numNull,
csPropTypes[i].lstPropTypes[j].numSingleType,
csPropTypes[i].lstPropTypes[j].numMVType);
fprintf(fout, " ");
for (k = 0; k < csPropTypes[i].lstPropTypes[j].numType;
k++){
@@ -1074,6 +1089,10 @@ void freeCSset(CSset *csSet){
int i;
for(i = 0; i < csSet->numCSadded; i ++){
free(csSet->items[i].lstProp);
+ #if COLORINGPROP
+ if (csSet->items[i].lstPropSupport != NULL)
+ free(csSet->items[i].lstPropSupport);
+ #endif
}
@@ -1141,6 +1160,9 @@ CS* creatCS(oid csId, int numP, oid* buf
#endif
{
CS *cs = (CS*)malloc(sizeof(CS));
+ #if COLORINGPROP
+ cs->lstPropSupport = NULL;
+ #endif
cs->lstProp = (oid*) malloc(sizeof(oid) * numP);
if (cs->lstProp == NULL){
@@ -3400,7 +3422,7 @@ int ontmetadataCount = 0;
/* Extract CS from SPO triples table */
str
-RDFextractCSwithTypes(int *ret, bat *sbatid, bat *pbatid, bat *obatid, bat
*mapbatid, int *freqThreshold, void *_freqCSset, oid **subjCSMap, oid
*maxCSoid, char **subjdefaultMap,int *maxNumPwithDup){
+RDFextractCSwithTypes(int *ret, bat *sbatid, bat *pbatid, bat *obatid, bat
*mapbatid, int *freqThreshold, void *_freqCSset, oid **subjCSMap, oid
*maxCSoid, char **subjdefaultMap,int *maxNumPwithDup, CSmergeRel
**csRelBetweenMergeFreqSet){
BAT *sbat = NULL, *pbat = NULL, *obat = NULL, *mbat = NULL;
BATiter si, pi, oi; /*iterator for BAT of s,p,o columns in
spo table */
@@ -3414,7 +3436,6 @@ RDFextractCSwithTypes(int *ret, bat *sba
CSrel *csrelSet;
CSrel *csrelToMaxFreqSet, *csrelFromMaxFreqSet;
CSrel *csrelBetweenMaxFreqSet;
- CSmergeRel *csRelBetweenMergeFreqSet;
SubCSSet *csSubCSSet;
int* csIdFreqIdxMap; /* Map a CSId to a freqIdx. Should be
removed in the future .... */
@@ -3575,12 +3596,12 @@ RDFextractCSwithTypes(int *ret, bat *sba
curT = clock();
printf (" ----- Merging Frequent CSs took %f seconds.\n",
((float)(curT - tmpLastT))/CLOCKS_PER_SEC);
tmpLastT = curT;
-
-
- csRelBetweenMergeFreqSet = (CSmergeRel *) malloc (sizeof(CSmergeRel) *
freqCSset->numCSadded);
- initCsRelBetweenMergeFreqSet(csRelBetweenMergeFreqSet,
freqCSset->numCSadded);
- generateCsRelBetweenMergeFreqSet(csRelBetweenMergeFreqSet,
csrelBetweenMaxFreqSet, *maxCSoid + 1, csIdFreqIdxMap, freqCSset);
- printCSmergeRel(freqCSset, csRelBetweenMergeFreqSet, *freqThreshold);
+
+
+ *csRelBetweenMergeFreqSet = (CSmergeRel *) malloc (sizeof(CSmergeRel) *
freqCSset->numCSadded);
+ initCsRelBetweenMergeFreqSet(*csRelBetweenMergeFreqSet,
freqCSset->numCSadded);
+ generateCsRelBetweenMergeFreqSet(*csRelBetweenMergeFreqSet,
csrelBetweenMaxFreqSet, *maxCSoid + 1, csIdFreqIdxMap, freqCSset);
+ printCSmergeRel(freqCSset, *csRelBetweenMergeFreqSet, *freqThreshold);
printmergeCSSet(freqCSset, *freqThreshold);
//getStatisticCSsBySize(csMap,maxNumProp);
@@ -3604,7 +3625,6 @@ RDFextractCSwithTypes(int *ret, bat *sba
freeCS_SubCSMapSet(csSubCSSet, *maxCSoid + 1);
free(csIdFreqIdxMap);
- freeMergeCSrelset(csRelBetweenMergeFreqSet,freqCSset->numCSadded);
freeCSrelSet(csrelSet, *maxCSoid + 1);
freeCSrelSet(csrelToMaxFreqSet, *maxCSoid + 1);
freeCSrelSet(csrelFromMaxFreqSet, *maxCSoid + 1);
@@ -4285,11 +4305,11 @@ RDFreorganize(int *ret, CStableStat *cst
//CStableStat *cstablestat;
char *subjdefaultMap = NULL; /* Specify whether this subject
contains default value or not. This array may be large */
CSPropTypes *csPropTypes;
-
+ CSmergeRel *csRelBetweenMergeFreqSet = NULL;
freqCSset = initCSset();
- if (RDFextractCSwithTypes(ret, sbatid, pbatid, obatid, mapbatid,
freqThreshold, freqCSset,&subjCSMap, &maxCSoid, &subjdefaultMap,
&maxNumPwithDup) != MAL_SUCCEED){
+ if (RDFextractCSwithTypes(ret, sbatid, pbatid, obatid, mapbatid,
freqThreshold, freqCSset,&subjCSMap, &maxCSoid, &subjdefaultMap,
&maxNumPwithDup, &csRelBetweenMergeFreqSet) != MAL_SUCCEED){
throw(RDF, "rdf.RDFreorganize", "Problem in extracting CSs");
}
@@ -4334,6 +4354,11 @@ RDFreorganize(int *ret, CStableStat *cst
RDFExtractCSPropTypes(ret, sbat, si, pi, oi, subjCSMap,
csTblIdxMapping, csPropTypes, maxNumPwithDup);
genCSPropTypesColIdx(csPropTypes, numTables, freqCSset);
printCSPropTypes(csPropTypes, numTables, freqCSset, *freqThreshold);
+
+ #if COLORINGPROP
+ /* Update list of support for properties in freqCSset */
+ updatePropSupport(csPropTypes, numTables, freqCSset);
+ #endif
// Init CStableStat
initCStables(cstablestat, freqCSset, csPropTypes, numTables);
@@ -4347,6 +4372,7 @@ RDFreorganize(int *ret, CStableStat *cst
free(mfreqIdxTblIdxMapping);
free(mTblIdxFreqIdxMapping);
freeCSPropTypes(csPropTypes,numTables);
+
freeMergeCSrelset(csRelBetweenMergeFreqSet,freqCSset->numCSadded);
return MAL_SUCCEED;
}
@@ -4455,6 +4481,7 @@ RDFreorganize(int *ret, CStableStat *cst
}
+ freeMergeCSrelset(csRelBetweenMergeFreqSet,freqCSset->numCSadded);
freeCSPropTypes(csPropTypes,numTables);
freeCSset(freqCSset);
free(subjCSMap);
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -31,8 +31,7 @@ RDFextractCS(int *ret, bat *sbatid, bat
rdf_export str
RDFextractPfromPSO(int *ret, bat *pbatid, bat *sbatid);
-rdf_export str
-RDFextractCSwithTypes(int *ret, bat *sbatid, bat *pbatid, bat *obatid, bat
*mapbatid, int *freqThreshold, void *freqCSset, oid **subjCSMap, oid *maxCSoid,
char **subjdefaultMap,int *maxNumPwithDup);
+
typedef enum{
EXPLOREONLY,
@@ -95,6 +94,8 @@ typedef struct PropStat {
#define STAT_ANALYZE 1 // Only use for collecting the statistic on the number
of multi/null/single-valued prop
+#define COLORINGPROP 1 // Only use for coloring property in schema
representation.
+
#define USE_LABEL_FINDING_MAXCS 0 // Use the labels received from
labeling process for finding maxCS
#define USE_LABEL_FOR_MERGING 0 // Use the labels received from
labeling process for finding mergeCS
@@ -103,13 +104,16 @@ typedef struct CS
{
oid csId; //Id of the CS
oid* lstProp; //List of properties' Ids
+ #if COLORINGPROP
+ int* lstPropSupport; //Number of subjects that the object value for
this is not null
+ #endif
int numProp;
int numAllocation;
//char isSubset;
int parentFreqIdx; //Index of the parent in freqCSset
#if STOREFULLCS
- oid subject; //A subject
- oid* lstObj; //List of sample objects
+ oid subject; //A subject
+ oid* lstObj; //List of sample objects
#endif
char type;
@@ -255,6 +259,9 @@ typedef struct CSPropTypes {
rdf_export str
RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid, PropStat*
propStat, CStableStat *cstablestat, CSPropTypes *csPropTypes, oid* lastSubjId);
+rdf_export str
+RDFextractCSwithTypes(int *ret, bat *sbatid, bat *pbatid, bat *obatid, bat
*mapbatid, int *freqThreshold, void *freqCSset, oid **subjCSMap, oid *maxCSoid,
char **subjdefaultMap,int *maxNumPwithDup,CSmergeRel
**csRelBetweenMergeFreqSet);
+
rdf_export str
RDFreorganize(int *ret, CStableStat *cstablestat, bat *sbatid, bat *pbatid,
bat *obatid, bat *mapbatid, int *freqThreshold, int *mode);
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list