Changeset: d2027f421e77 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=d2027f421e77
Modified Files:
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:
Remove the use of subCS.
diffs (truncated from 306 to 300 lines):
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -48,6 +48,7 @@ static void copyOidSet(oid* dest, oid* o
}
+#if NEEDSUBCS
static void copyTypesSet(char* dest, char* orig, int len){
memcpy(dest, orig, len * sizeof(char));
}
@@ -71,6 +72,13 @@ static oid RDF_hash_Tyleslist(char* type
return hashCode;
}
+static void initCharArray(char* inputArr, int num, char defaultValue){
+ int i;
+ for (i = 0; i < num; i++){
+ inputArr[i] = defaultValue;
+ }
+}
+#endif /* if NEEDSUBCS */
/*
static void printArray(oid* inputArr, int num){
int i;
@@ -111,12 +119,7 @@ static void initcsIdFreqIdxMap(int* inpu
-static void initCharArray(char* inputArr, int num, char defaultValue){
- int i;
- for (i = 0; i < num; i++){
- inputArr[i] = defaultValue;
- }
-}
+
static
void addCStoSet(CSset *csSet, CS item)
@@ -558,7 +561,7 @@ str printCSrelWithMaxSet(CSset *freqCSse
return MAL_SUCCEED;
}
-
+#if NEEDSUBCS
static
void setdefaultSubCSs(SubCSSet *subcsset, int num, BAT *sbat, oid
*subjSubCSMap,oid *subjCSMap, char *subjdefaultMap){
@@ -602,6 +605,9 @@ void setdefaultSubCSs(SubCSSet *subcsset
}
}
+#endif
+
+#if NEEDSUBCS
static
void printSubCSInformation(SubCSSet *subcsset, BAT* freqBat, int num, char
isWriteTofile, int freqThreshold){
@@ -678,6 +684,8 @@ void printSubCSInformation(SubCSSet *sub
}
}
+#endif /* NEEDSUBCS */
+
static char
getObjType(oid objOid){
char objType = (char) (objOid >> (sizeof(BUN)*8 - 4)) & 7 ;
@@ -941,6 +949,7 @@ void freeCSPropTypes(CSPropTypes* csProp
GDKfree(csPropTypes);
}
+#if NEEDSUBCS
static
SubCS* creatSubCS(oid subCSId, int numP, char* buff, oid subCSsign){
SubCS *subcs = (SubCS*) malloc(sizeof(SubCS));
@@ -1076,6 +1085,7 @@ oid addSubCS(char *buff, int numP, int c
return subCSId;
}
+#endif /*if NEEDSUBCS*/
static
void freeCSset(CSset *csSet){
@@ -3141,9 +3151,16 @@ str RDFgetRefCounts(int *ret, BAT *sbat,
return MAL_SUCCEED;
}
+#if NEEDSUBCS
static
str RDFrelationships(int *ret, BAT *sbat, BATiter si, BATiter pi, BATiter oi,
oid *subjCSMap, oid *subjSubCSMap, SubCSSet *csSubCSSet, CSrel
*csrelSet, BUN maxSoid, int maxNumPwithDup){
+#else
+static
+str RDFrelationships(int *ret, BAT *sbat, BATiter si, BATiter pi, BATiter oi,
+ oid *subjCSMap, CSrel *csrelSet, BUN maxSoid, int
maxNumPwithDup){
+#endif
+
BUN p, q;
oid *sbt = 0, *obt, *pbt;
@@ -3151,7 +3168,9 @@ str RDFrelationships(int *ret, BAT *sbat
//oid CSoid = 0; /* Characteristic set oid */
int numPwithDup; /* Number of properties for current S */
char objType;
+ #if NEEDSUBCS
oid returnSubCSid;
+ #endif
char* buffTypes;
oid realObjOid;
char isBlankNode;
@@ -3172,6 +3191,7 @@ str RDFrelationships(int *ret, BAT *sbat
BATloop(sbat, p, q){
sbt = (oid *) BUNtloc(si, p);
if (*sbt != curS){
+ #if NEEDSUBCS
if (p != 0){ /* Not the first S */
returnSubCSid = addSubCS(buffTypes,
numPwithDup, subjCSMap[curS], csSubCSSet);
@@ -3179,6 +3199,7 @@ str RDFrelationships(int *ret, BAT *sbat
subjSubCSMap[curS] = returnSubCSid;
}
+ #endif
curS = *sbt;
numPwithDup = 0;
curP = 0;
@@ -3217,9 +3238,11 @@ str RDFrelationships(int *ret, BAT *sbat
}
}
+ #if NEEDSUBCS
/* Check for the last CS */
returnSubCSid = addSubCS(buffTypes, numPwithDup, subjCSMap[*sbt],
csSubCSSet);
subjSubCSMap[*sbt] = returnSubCSid;
+ #endif
free (buffTypes);
@@ -3560,20 +3583,24 @@ int ontmetadataCount = 0;
/* Extract CS from SPO triples table */
str
-RDFextractCSwithTypes(int *ret, bat *sbatid, bat *pbatid, bat *obatid, bat
*mapbatid, int *freqThreshold, void *_freqCSset, oid **subjCSMap, oid
*maxCSoid, char **subjdefaultMap,int *maxNumPwithDup, CSlabel** labels,
CSmergeRel **csRelBetweenMergeFreqSet){
+RDFextractCSwithTypes(int *ret, bat *sbatid, bat *pbatid, bat *obatid, bat
*mapbatid, int *freqThreshold, void *_freqCSset, oid **subjCSMap, oid
*maxCSoid, int *maxNumPwithDup, CSlabel** labels, CSmergeRel
**csRelBetweenMergeFreqSet){
BAT *sbat = NULL, *pbat = NULL, *obat = NULL, *mbat = NULL;
BATiter si, pi, oi; /*iterator for BAT of s,p,o columns in
spo table */
CSBats *csBats;
- oid *subjSubCSMap; /* Store the corresponding CS sub Id
for each subject */
BUN *maxSoid;
int maxNumProp = 0;
CSrel *csrelSet;
CSrel *csrelToMaxFreqSet, *csrelFromMaxFreqSet;
CSrel *csrelBetweenMaxFreqSet;
+
+ #if NEEDSUBCS
SubCSSet *csSubCSSet;
+ oid *subjSubCSMap; /* Store the corresponding CS sub Id
for each subject */
+ char *subjdefaultMap = NULL; /* Specify whether this subject
contains default value or not. This array may be large */
+ #endif
int *refCount; /* Count the number of references to
each CS */
@@ -3626,11 +3653,9 @@ RDFextractCSwithTypes(int *ret, bat *sba
assert(*maxSoid != BUN_NONE);
*subjCSMap = (oid *) malloc (sizeof(oid) * ((*maxSoid) + 1));
- subjSubCSMap = (oid *) malloc (sizeof(oid) * ((*maxSoid) + 1));
- *subjdefaultMap = (char *) malloc (sizeof(char) * ((*maxSoid) + 1));
-
initArray(*subjCSMap, (*maxSoid) + 1, BUN_NONE);
- initCharArray(*subjdefaultMap,(*maxSoid) + 1, 0);
+
+
tmpLastT = clock();
@@ -3645,6 +3670,7 @@ RDFextractCSwithTypes(int *ret, bat *sba
curT = clock();
printf (" ----- Exploring all CSs took %f seconds.\n", ((float)(curT -
tmpLastT))/CLOCKS_PER_SEC);
+ printf("Number of freqCSs found by frequency: %d \n",
freqCSset->numCSadded);
tmpLastT = curT;
/* Phase 2: Get the references count for each CS. Add frequent one to
freqCSset */
@@ -3656,6 +3682,10 @@ RDFextractCSwithTypes(int *ret, bat *sba
initIntArray(refCount, (*maxCSoid + 1), 0);
RDFgetRefCounts(ret, sbat, si, pi,oi, *subjCSMap, maxNumProp, *maxSoid,
refCount);
addHighRefCSsToFreqCS(csBats->pOffsetBat, csBats->freqBat,
csBats->coverageBat, csBats->fullPBat, refCount, freqCSset, csIdFreqIdxMap,
*maxCSoid + 1, 2* (*freqThreshold));
+ curT = clock();
+ printf (" ----- Counting references and adding highly referred CS's
took %f seconds.\n", ((float)(curT - tmpLastT))/CLOCKS_PER_SEC);
+ printf("Number of freqCSs after considering # references: %d \n",
freqCSset->numCSadded);
+ tmpLastT = curT;
//Phase 2: Check the relationship
@@ -3664,16 +3694,22 @@ RDFextractCSwithTypes(int *ret, bat *sba
printf("Max Number of P (considering duplicated P): %d \n",
*maxNumPwithDup);
- printf("Number of freqCSs: %d \n", freqCSset->numCSadded);
-
-
csrelSet = initCSrelset(*maxCSoid + 1);
+
+ #if NEEDSUBCS
+ subjSubCSMap = (oid *) malloc (sizeof(oid) * ((*maxSoid) + 1));
+ subjdefaultMap = (char *) malloc (sizeof(char) * ((*maxSoid) + 1));
+
+ initCharArray(subjdefaultMap,(*maxSoid) + 1, 0);
csSubCSSet = initCS_SubCSSets(*maxCSoid +1);
RDFrelationships(ret, sbat, si, pi, oi, *subjCSMap, subjSubCSMap,
csSubCSSet, csrelSet, *maxSoid, *maxNumPwithDup);
+ #else
+ RDFrelationships(ret, sbat, si, pi, oi, *subjCSMap, csrelSet, *maxSoid,
*maxNumPwithDup);
+ #endif
curT = clock();
printf (" ----- Exploring subCSs and FKs took %f seconds.\n",
((float)(curT - tmpLastT))/CLOCKS_PER_SEC);
@@ -3682,9 +3718,10 @@ RDFextractCSwithTypes(int *ret, bat *sba
printCSrelSet(csrelSet,csIdFreqIdxMap, csBats->freqBat, *maxCSoid + 1,
1, *freqThreshold);
- setdefaultSubCSs(csSubCSSet,*maxCSoid + 1, sbat, subjSubCSMap,
*subjCSMap, *subjdefaultMap);
-
+ #if NEEDSUBCS
+ setdefaultSubCSs(csSubCSSet,*maxCSoid + 1, sbat, subjSubCSMap,
*subjCSMap, subjdefaultMap);
printSubCSInformation(csSubCSSet, csBats->freqBat, *maxCSoid + 1, 1,
*freqThreshold);
+ #endif
printf("Number of frequent CSs is: %d \n", freqCSset->numCSadded);
@@ -3761,11 +3798,13 @@ RDFextractCSwithTypes(int *ret, bat *sba
BBPunfix(mbat->batCacheid);
freeOntoUsageTree(ontoUsageTree);
- free (subjSubCSMap);
free (superCSFreqCSMap);
free (superCSMergeMaxCSMap);
-
+
+ #if NEEDSUBCS
+ free (subjSubCSMap);
freeCS_SubCSMapSet(csSubCSSet, *maxCSoid + 1);
+ #endif
free(csIdFreqIdxMap);
freeCSrelSet(csrelSet, *maxCSoid + 1);
@@ -4446,14 +4485,13 @@ RDFreorganize(int *ret, CStableStat *cst
int numdistinctMCS = 0;
int maxNumPwithDup = 0;
//CStableStat *cstablestat;
- char *subjdefaultMap = NULL; /* Specify whether this subject
contains default value or not. This array may be large */
CSPropTypes *csPropTypes;
CSlabel *labels, *labels2;
CSmergeRel *csRelBetweenMergeFreqSet = NULL;
freqCSset = initCSset();
- if (RDFextractCSwithTypes(ret, sbatid, pbatid, obatid, mapbatid,
freqThreshold, freqCSset,&subjCSMap, &maxCSoid, &subjdefaultMap,
&maxNumPwithDup, &labels, &csRelBetweenMergeFreqSet) != MAL_SUCCEED){
+ if (RDFextractCSwithTypes(ret, sbatid, pbatid, obatid, mapbatid,
freqThreshold, freqCSset,&subjCSMap, &maxCSoid, &maxNumPwithDup, &labels,
&csRelBetweenMergeFreqSet) != MAL_SUCCEED){
throw(RDF, "rdf.RDFreorganize", "Problem in extracting CSs");
}
@@ -4516,7 +4554,6 @@ RDFreorganize(int *ret, CStableStat *cst
freeMergeCSrelset(csRelBetweenMergeFreqSet,freqCSset->numCSadded);
freeCSset(freqCSset);
free(subjCSMap);
- free(subjdefaultMap);
free(csTblIdxMapping);
free(mfreqIdxTblIdxMapping);
free(mTblIdxFreqIdxMapping);
@@ -4643,7 +4680,6 @@ RDFreorganize(int *ret, CStableStat *cst
freePropStat(propStat);
//freeCStableStat(cstablestat);
//
- if (subjdefaultMap != NULL) free(subjdefaultMap);
BBPreclaim(lmap);
BBPreclaim(rmap);
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -97,6 +97,8 @@ typedef struct PropStat {
#define COLORINGPROP 1 // Only use for coloring property in schema
representation.
+#define NEEDSUBCS 0 // We actually do not need to use SubCS as the idea of
default subCS is not used. But it is still good
+ // for collecting the statistical information (For
reporting/writing)
#define USE_LABEL_FINDING_MAXCS 0 // Use the labels received from
labeling process for finding maxCS
#define USE_LABEL_FOR_MERGING 0 // Use the labels received from
labeling process for finding mergeCS
@@ -261,7 +263,7 @@ rdf_export str
RDFdistTriplesToCSs(int *ret, bat *sbatid, bat *pbatid, bat *obatid, PropStat*
propStat, CStableStat *cstablestat, CSPropTypes *csPropTypes, oid* lastSubjId);
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list