Changeset: 68033018c163 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=68033018c163
Modified Files:
monetdb5/extras/rdf/rdfschema.c
Branch: rdf
Log Message:
Check the type common between a subject and its redirect subject
diffs (156 lines):
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -5475,6 +5475,7 @@ str RDFassignCSId(int *ret, BAT *sbat, B
#endif
#endif
+ BBPreclaim(typeBat);
#if EXTRAINFO_FROM_RDFTYPE
freePropStat(ontPropStat);
freeTFIDFInfo(tfidfInfos, numOntClass);
@@ -5501,6 +5502,7 @@ str RDFcheckWrongTypeSubject(BAT *sbat,
BUN p, q;
oid *sbt, *pbt, *obt;
oid curS; /* current Subject oid */
+ oid redirectS; /* Subject that are redirected from the curS*/
oid curP; /* current Property oid */
int numP; /* Number of properties for current S */
int numPwithDup = 0;
@@ -5535,9 +5537,15 @@ str RDFcheckWrongTypeSubject(BAT *sbat,
BUN bun, bunprop;
oid prop;
+ oid *subjTypeMap = NULL;
+ oid *maxSoid;
+
+
(void) mTblIdxFreqIdxMapping;
(void) numTables;
+
+
#if EXTRAINFO_FROM_RDFTYPE
numOntClass = BATcount(ontmetaBat);
ontPropStat = initPropStat();
@@ -5576,6 +5584,13 @@ str RDFcheckWrongTypeSubject(BAT *sbat,
}
#endif
+ maxSoid = (BUN *) Tloc(sbat, BUNlast(sbat) - 1);
+
+ assert(*maxSoid != BUN_NONE);
+
+ subjTypeMap = (oid *) malloc (sizeof(oid) * ((*maxSoid) + 1));
+ initArray(subjCSMap, (*maxSoid) + 1, BUN_NONE);
+
BATloop(sbat, p, q){
sbt = (oid *) BUNtloc(si, p);
if (*sbt != curS){
@@ -5585,6 +5600,10 @@ str RDFcheckWrongTypeSubject(BAT *sbat,
//Only check for subject that have type
value
markedName = rdftypeOntologyValues[0];
+
+ assert(markedName != BUN_NONE);
+ subjTypeMap[curS] = markedName;
+
bun = BUNfnd(labelStat->labelBat,
&markedName);
if (bun != BUN_NONE){ //There is
table to compare
int freqId =
csFreqCSMapping[subjCSMap[curS]];
@@ -5687,10 +5706,64 @@ str RDFcheckWrongTypeSubject(BAT *sbat,
/*put the last CS */
//TODO: Check the last subject, copy from above
- //printf("subjCSMap[" BUNFMT "]=" BUNFMT " (CSoid = " BUNFMT ") \n",
curS, returnCSid, CSoid);
+
+ //Run again and check if a subj and it pageRedirect has the same type
+ {
+ char* schema = "rdf";
+ int ret = 0;
+ oid redirectAttributeOid = BUN_NONE;
+ char* redirectAttributes =
"<http://dbpedia.org/ontology/wikiPageRedirects>";
+
+ if (TKNZRopen (NULL, &schema) != MAL_SUCCEED) {
+ throw(RDF, "rdf.rdfschema",
+ "could not open the tokenizer\n");
+ }
+
+ TKNZRappend(&redirectAttributeOid,&redirectAttributes);
+
+ assert(redirectAttributeOid != BUN_NONE);
+
+ printf("<http://dbpedia.org/ontology/wikiPageRedirects> id is
"BUNFMT"\n",redirectAttributeOid);
+
+ BATloop(sbat, p, q){
+ sbt = (oid *) BUNtloc(si, p);
+ //Only check for subject that have type
value
+ pbt = (oid *) BUNtloc(pi, p);
+
+ if (*pbt == redirectAttributeOid && subjTypeMap[*sbt] !=
BUN_NONE){ //Check redirect value
+ obt = (oid *) BUNtloc(oi, p);
+ redirectS = *obt;
+ if (redirectS < *maxSoid){
+ if (subjTypeMap[redirectS] != BUN_NONE){
+ if (subjTypeMap[*sbt] !=
subjTypeMap[redirectS]){
+ str curSstr;
+ str redirectSstr;
+ str curStype;
+ str redirecttype;
+ takeOid(*sbt, &curSstr);
+ takeOid(redirectS,
&redirectSstr);
+ takeOid(subjTypeMap[*sbt],
&curStype);
+
takeOid(subjTypeMap[redirectS],&redirecttype);
+ printf("Subject %s [Type: %s]
redirects to %s [Type: %s] \n",
+
curSstr,curStype,redirectSstr,redirecttype);
+ GDKfree(curSstr);
+ GDKfree(redirectSstr);
+ GDKfree(curStype);
+ GDKfree(redirecttype);
+ }
+
+ }
+ }
+ }
+ }
+
+ TKNZRclose(&ret);
+ }
free (buff);
+ free(subjTypeMap);
+
#if EXTRAINFO_FROM_RDFTYPE
freePropStat(ontPropStat);
freeTFIDFInfo(tfidfInfos, numOntClass);
@@ -5701,6 +5774,9 @@ str RDFcheckWrongTypeSubject(BAT *sbat,
free(rdftypeSpecificLevels);
free(rdftypeOntClassPos);
+ BBPreclaim(typeBat);
+
+
return MAL_SUCCEED;
}
@@ -10388,7 +10464,10 @@ RDFreorganize(int *ret, CStableStat *cst
throw(MAL, "rdf.RDFextractCSwithTypes", RUNTIME_OBJECT_MISSING);
}
+
+
labelStat = initLabelStat();
+
#if USING_FINALTABLE
buildLabelStatForTable(labelStat, numTables, cstablestat);
#else
@@ -10471,7 +10550,7 @@ RDFreorganize(int *ret, CStableStat *cst
if (tblIdx != -1){
freqIdx = csFreqCSMapping[subjCSMap[*sbt]];
if (freqCSset->items[freqIdx].numProp <
cstablestat->lstcstable[tblIdx].numCol * LOTSOFNULL_SUBJECT_THRESHOLD){
- printf("Subject " BUNFMT " is removed from
table %d with %d cols \n",*sbt,tblIdx, cstablestat->lstcstable[tblIdx].numCol);
+ //printf("Subject " BUNFMT " is removed from
table %d with %d cols \n",*sbt,tblIdx, cstablestat->lstcstable[tblIdx].numCol);
isLotsNullSubj[*sbt] = 1;
tblIdx = -1;
numSubjRemoved++;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list