Changeset: 56cf319b0599 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=56cf319b0599
Modified Files:
monetdb5/extras/rdf/rdfschema.c
Branch: rdf
Log Message:
Adding the second phase in exploring RDF schema (Relationship exploration)
diffs (121 lines):
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -620,10 +620,10 @@ void freeCSBats(CSBats *csBats){
static
-str RDFassignCSId(int *ret, BAT *sbat, BATiter si, BATiter pi, BATiter oi,
CSset *freqCSset, int *freqThreshold, CSBats* csBats, oid *subjCSMap){
+str RDFassignCSId(int *ret, BAT *sbat, BATiter si, BATiter pi, CSset
*freqCSset, int *freqThreshold, CSBats* csBats, oid *subjCSMap){
BUN p, q;
- oid *sbt, *pbt, *obt;
+ oid *sbt, *pbt;
oid curS; /* current Subject oid */
oid curP; /* current Property oid */
oid CSoid = 0; /* Characteristic set oid */
@@ -631,7 +631,6 @@ str RDFassignCSId(int *ret, BAT *sbat, B
oid* buff;
int INIT_PROPERTY_NUM = 5000;
int maxNumProp = 0;
- oid objType;
oid returnCSid;
buff = (oid *) malloc (sizeof(oid) * INIT_PROPERTY_NUM);
@@ -670,13 +669,83 @@ str RDFassignCSId(int *ret, BAT *sbat, B
curP = *pbt;
}
+ }
+
+ /*put the last CS */
+ returnCSid = putaCStoHash(csBats, curS, buff, numP, &CSoid, 1,
*freqThreshold, freqCSset );
+
+ subjCSMap[curS] = returnCSid;
+
+ if (numP > maxNumProp)
+ maxNumProp = numP;
+
+ free (buff);
+
+ *ret = 1;
+
+ return MAL_SUCCEED;
+}
+
+static
+str RDFrelationships(int *ret, BAT *sbat, BATiter si, BATiter pi, BATiter oi,
CSset *freqCSset,
+ int *freqThreshold, CSBats* csBats, oid *subjCSMap, BUN
maxSoid){
+
+ BUN p, q;
+ oid *sbt, *pbt, *obt;
+ oid curS; /* current Subject oid */
+ oid curP; /* current Property oid */
+ oid CSoid = 0; /* Characteristic set oid */
+ int numP; /* Number of properties for current S */
+ oid* buff;
+ int INIT_PROPERTY_NUM = 5000;
+ int maxNumProp = 0;
+ oid objType;
+ oid returnCSid;
+
+ buff = (oid *) malloc (sizeof(oid) * INIT_PROPERTY_NUM);
+
+ numP = 0;
+ curP = 0;
+ curS = 0;
+
+ BATloop(sbat, p, q){
+ sbt = (oid *) BUNtloc(si, p);
+ if (*sbt != curS){
+ if (p != 0){ /* Not the first S */
+ returnCSid = putaCStoHash(csBats, curS, buff,
numP, &CSoid, 1, *freqThreshold, freqCSset);
+
+ subjCSMap[curS] = returnCSid;
+
+ if (numP > maxNumProp)
+ maxNumProp = numP;
+ }
+ curS = *sbt;
+ curP = 0;
+ numP = 0;
+ }
+
+ pbt = (oid *) BUNtloc(pi, p);
+
+ if (numP > INIT_PROPERTY_NUM){
+ throw(MAL, "rdf.RDFextractCS", "# of properties is
greater than INIT_PROPERTY_NUM");
+ exit(-1);
+ }
+
+ if (curP != *pbt){ /* Multi values property */
+ buff[numP] = *pbt;
+ numP++;
+ curP = *pbt;
+ }
+
obt = (oid *) BUNtloc(oi, p);
/* Check type of object */
objType = ((*obt) >> (sizeof(BUN)*8 - 3)) & 3 ; /* Get
two bits 63th, 62nd from object oid */
/* Look at sbat*/
if (objType == URI){
- //getReferCS(sbat, pbat, obt);
+ if (*obt <= maxSoid && subjCSMap[*obt] != BUN_NONE){
+ printf(" CS " BUNFMT " refer to CS " BUNFMT "
\n",*sbt, subjCSMap[*obt]);
+ }
}
}
@@ -736,7 +805,10 @@ RDFextractCSwithTypes(int *ret, bat *sba
initArray(subjCSMap, (*maxSoid), BUN_NONE);
//Phase 1: Assign an ID for each CS
- RDFassignCSId(ret, sbat, si, pi, oi, freqCSset, freqThreshold, csBats,
subjCSMap);
+ RDFassignCSId(ret, sbat, si, pi, freqCSset, freqThreshold, csBats,
subjCSMap);
+
+ //Phase 2: Check the relationship
+ RDFrelationships(ret, sbat, si, pi, oi, freqCSset, freqThreshold,
csBats, subjCSMap, *maxSoid);
printf("Number of frequent CSs is: %d \n", freqCSset->numCSadded);
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list