Changeset: 373ab6c925c8 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=373ab6c925c8 Modified Files: monetdb5/extras/rdf/rdfschema.c monetdb5/extras/rdf/rdfschema.h Branch: rdf Log Message:
Modify S1: Merge CS's only if their common name comes from ontology class name diffs (114 lines): diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c --- a/monetdb5/extras/rdf/rdfschema.c +++ b/monetdb5/extras/rdf/rdfschema.c @@ -37,6 +37,17 @@ #define SHOWPROPERTYNAME 1 + +// for storing ontology data +oid **ontattributes = NULL; +int ontattributesCount = 0; +oid **ontmetadata = NULL; +int ontmetadataCount = 0; +BAT *ontmetaBat = NULL; +OntClass *ontclassSet = NULL; +int totalNumberOfTriples = 0; +int acceptableTableSize = 0; + str RDFSchemaExplore(int *ret, str *tbname, str *clname) { @@ -3970,6 +3981,17 @@ void doMerge(CSset *freqCSset, int ruleN } + +#if ONLY_MERGE_ONTOLOGYBASEDNAME_CS_S1 +static +char isOntologyName(oid valueOid){ + BUN ontClassPos = BUN_NONE; + ontClassPos = BUNfnd(BATmirror(ontmetaBat), &valueOid); + if (ontClassPos == BUN_NONE) return 0; + else return 1; +} +#endif + #if USE_LABEL_FOR_MERGING static str mergeFreqCSByS1(CSset *freqCSset, CSlabel** labels, oid *mergecsId, oid** ontmetadata, int ontmetadataCount,bat *mapbatid){ @@ -3989,8 +4011,12 @@ str mergeFreqCSByS1(CSset *freqCSset, CS LabelStat *labelStat = NULL; oid *name; + #if ONLY_MERGE_URINAME_CS_S1 + ObjectType objType; + #endif + #if ONLY_MERGE_ONTOLOGYBASEDNAME_CS_S1 - ObjectType objType; + char isOntName = 0; #endif #if OUTPUT_FREQID_PER_LABEL @@ -4035,6 +4061,13 @@ str mergeFreqCSByS1(CSset *freqCSset, CS for (i = 0; i < labelStat->numLabeladded; i++){ name = (oid*) Tloc(labelStat->labelBat, i); #if ONLY_MERGE_ONTOLOGYBASEDNAME_CS_S1 + isOntName = isOntologyName(*name); + if (isOntName != 1){ + printf("Name "BUNFMT" is not an ontology name \n", *name); + continue; + } + #endif + #if ONLY_MERGE_URINAME_CS_S1 objType = getObjType(*name); if (objType != URI) continue; #endif @@ -5155,15 +5188,7 @@ BAT* generateTablesForEvaluating(CSset * #endif -// for storing ontology data -oid **ontattributes = NULL; -int ontattributesCount = 0; -oid **ontmetadata = NULL; -int ontmetadataCount = 0; -BAT *ontmetaBat = NULL; -OntClass *ontclassSet = NULL; -int totalNumberOfTriples = 0; -int acceptableTableSize = 0; + static BAT* buildTypeOidBat(void){ @@ -5202,6 +5227,9 @@ BAT* buildTypeOidBat(void){ return typeBat; } + + + #if EXTRAINFO_FROM_RDFTYPE //Check whether a prop is a rdf:type prop static @@ -5221,6 +5249,8 @@ char isTypeAttribute(oid propId, BAT* ty //-1: Not an ontology class //Higher number --> more specific + + static int getOntologySpecificLevel(oid valueOid, BUN *ontClassPos){ diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h --- a/monetdb5/extras/rdf/rdfschema.h +++ b/monetdb5/extras/rdf/rdfschema.h @@ -145,6 +145,7 @@ typedef struct PropStat { #define NOT_MERGE_DIMENSIONCS 1 /* Default: 1, 0: Is for example data */ #define NOT_MERGE_DIMENSIONCS_IN_S1 0 /* Whether we should merge dimension CSs in S1 */ #define ONLY_MERGE_ONTOLOGYBASEDNAME_CS_S1 1 /* Only merge CS's whose name comes from an ontology class*/ +#define ONLY_MERGE_URINAME_CS_S1 0 /* Only merge CS's whose name is an URI */ #define FILTER_INFREQ_FK_FOR_IR 1 /* We filter out all the dirty references from a CS */ #define FILTER_THRESHOLD_FK_FOR_IR 0.1 /* The FK that their frequency < FILTER_THRESHOLD_FK_FOR_IR * FreqCS's frequency */ _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list