Changeset: bd25ee565cab for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=bd25ee565cab Modified Files: monetdb5/extras/rdf/rdflabels.c monetdb5/extras/rdf/rdflabels.h monetdb5/extras/rdf/rdfontologyload.c monetdb5/extras/rdf/rdfontologyload.h monetdb5/extras/rdf/rdfschema.c Branch: rdf Log Message:
Add the code for building ontology hierarchy information. diffs (truncated from 381 to 300 lines): diff --git a/monetdb5/extras/rdf/rdflabels.c b/monetdb5/extras/rdf/rdflabels.c --- a/monetdb5/extras/rdf/rdflabels.c +++ b/monetdb5/extras/rdf/rdflabels.c @@ -2074,12 +2074,15 @@ void removeDuplicatedCandidates(CSlabel #if USE_TABLE_NAME /* For one CS: Choose the best table name out of all collected candidates (ontology, type, fk). */ static -void getTableName(CSlabel* label, int csIdx, int typeAttributesCount, TypeAttributesFreq*** typeAttributesHistogram, int** typeAttributesHistogramCount, TypeStat* typeStat, int typeStatCount, oid** result, int* resultCount, IncidentFKs* links, oid** ontmetadata, int ontmetadataCount) { +void getTableName(CSlabel* label, int csIdx, int typeAttributesCount, TypeAttributesFreq*** typeAttributesHistogram, int** typeAttributesHistogramCount, TypeStat* typeStat, int typeStatCount, oid** result, int* resultCount, IncidentFKs* links, oid** ontmetadata, int ontmetadataCount, BAT *ontmetaBat, OntClass *ontclassSet) { int i, j, k; oid *tmpList; int tmpListCount; char nameFound = 0; + BUN ontClassPos; //Position of ontology in the ontmetaBat + oid typeOid; + // --- ONTOLOGY --- // add all ontology candidates to list of candidates if (resultCount[csIdx] >= 1) { @@ -2170,6 +2173,23 @@ void getTableName(CSlabel* label, int cs tmpListCount = 0; for (i = 0; i < typeAttributesCount; ++i) { if (typeAttributesHistogramCount[csIdx][i] == 0) continue; + /* //TODO: Uncomment this path + for (j = 0; j < typeAttributesHistogramCount[csIdx][i]; j++){ + str typelabel; + typeOid = typeAttributesHistogram[csIdx][i][j].value; + printf("FreqCS %d : Type[%d][%d][oid] = " BUNFMT, csIdx, i,j, typeOid); + ontClassPos = BUNfnd(BATmirror(ontmetaBat), &typeOid); + if (ontClassPos != BUN_NONE){ + takeOid(typeOid,&typelabel); + assert(ontclassSet[ontClassPos].cOid == typeOid); + printf(" --> class %s | Index = %d |Specific level: %d \n", typelabel, (int)ontClassPos, ontclassSet[ontClassPos].hierDepth); + GDKfree(typelabel); + } + else{ + printf(" --> No class \n"); + } + } + */ if (typeAttributesHistogram[csIdx][i][0].percent < TYPE_FREQ_THRESHOLD) continue; // sorted tmpList = (oid *) realloc(tmpList, sizeof(oid) * (tmpListCount + 1)); if (!tmpList) fprintf(stderr, "ERROR: Couldn't realloc memory!\n"); @@ -2303,14 +2323,14 @@ CSlabel* initLabels(CSset *freqCSset) { #if USE_TABLE_NAME /* Creates the final result of the labeling: table name and attribute names. */ static -void getAllLabels(CSlabel* labels, CSset* freqCSset, int typeAttributesCount, TypeAttributesFreq*** typeAttributesHistogram, int** typeAttributesHistogramCount, TypeStat* typeStat, int typeStatCount, oid** result, int* resultCount, IncidentFKs* links, oid** ontmetadata, int ontmetadataCount) { +void getAllLabels(CSlabel* labels, CSset* freqCSset, int typeAttributesCount, TypeAttributesFreq*** typeAttributesHistogram, int** typeAttributesHistogramCount, TypeStat* typeStat, int typeStatCount, oid** result, int* resultCount, IncidentFKs* links, oid** ontmetadata, int ontmetadataCount, BAT *ontmetaBat, OntClass *ontclassSet) { int i, j; for (i = 0; i < freqCSset->numCSadded; ++i) { CS cs = (CS) freqCSset->items[i]; // get table name - getTableName(&labels[i], i, typeAttributesCount, typeAttributesHistogram, typeAttributesHistogramCount, typeStat, typeStatCount, result, resultCount, links, ontmetadata, ontmetadataCount); + getTableName(&labels[i], i, typeAttributesCount, typeAttributesHistogram, typeAttributesHistogramCount, typeStat, typeStatCount, result, resultCount, links, ontmetadata, ontmetadataCount, ontmetaBat, ontclassSet); // copy attribute oids (names) labels[i].numProp = cs.numProp; @@ -2636,7 +2656,7 @@ void freeOntologyLookupResult(oid** onto } /* Creates labels for all CS (without a parent). */ -CSlabel* createLabels(CSset* freqCSset, CSrel* csrelSet, int num, BAT *sbat, BATiter si, BATiter pi, BATiter oi, oid *subjCSMap, int *csIdFreqIdxMap, oid** ontattributes, int ontattributesCount, oid** ontmetadata, int ontmetadataCount, OntoUsageNode** ontoUsageTree) { +CSlabel* createLabels(CSset* freqCSset, CSrel* csrelSet, int num, BAT *sbat, BATiter si, BATiter pi, BATiter oi, oid *subjCSMap, int *csIdFreqIdxMap, oid** ontattributes, int ontattributesCount, oid** ontmetadata, int ontmetadataCount, OntoUsageNode** ontoUsageTree, BAT *ontmetaBat, OntClass *ontclassSet) { #if USE_TYPE_NAMES char* typeAttributes[] = { "<http://ogp.me/ns#type>", @@ -2713,7 +2733,7 @@ CSlabel* createLabels(CSset* freqCSset, // Assigning Names labels = initLabels(freqCSset); #if USE_TABLE_NAME - getAllLabels(labels, freqCSset, typeAttributesCount, typeAttributesHistogram, typeAttributesHistogramCount, typeStat, typeStatCount, ontologyLookupResult, ontologyLookupResultCount, links, ontmetadata, ontmetadataCount); + getAllLabels(labels, freqCSset, typeAttributesCount, typeAttributesHistogram, typeAttributesHistogramCount, typeStat, typeStatCount, ontologyLookupResult, ontologyLookupResultCount, links, ontmetadata, ontmetadataCount, ontmetaBat, ontclassSet); if (typeStatCount > 0) free(typeStat); #endif diff --git a/monetdb5/extras/rdf/rdflabels.h b/monetdb5/extras/rdf/rdflabels.h --- a/monetdb5/extras/rdf/rdflabels.h +++ b/monetdb5/extras/rdf/rdflabels.h @@ -21,6 +21,7 @@ #define _RDFLABELS_H_ #include "rdfschema.h" +#include "rdfontologyload.h" // Counts the occurances of type attribute values typedef struct TypeAttributesFreq { @@ -107,7 +108,8 @@ rdf_export void getStringBetweenQuotes(str* out, str in); rdf_export CSlabel* -createLabels(CSset* freqCSset, CSrel* csrelSet, int num, BAT *sbat, BATiter si, BATiter pi, BATiter oi, oid *subjCSMap, int *csIdFreqIdxMap, oid** ontattributes, int ontattributesCount, oid** ontmetadata, int ontmetadataCount, OntoUsageNode** ontoUsageTree); +createLabels(CSset* freqCSset, CSrel* csrelSet, int num, BAT *sbat, BATiter si, BATiter pi, BATiter oi, oid *subjCSMap, int *csIdFreqIdxMap, oid** ontattributes, int ontattributesCount, oid** ontmetadata, + int ontmetadataCount, OntoUsageNode** ontoUsageTree, BAT *ontmetaBat, OntClass *ontclassSet); rdf_export void exportLabels(CSlabel* labels, CSset* freqCSset, CSrel* csRelBetweenMergeFreqSet, int freqThreshold, int* mTblIdxFreqIdxMapping,int* mfreqIdxTblIdxMapping,int numTables); diff --git a/monetdb5/extras/rdf/rdfontologyload.c b/monetdb5/extras/rdf/rdfontologyload.c --- a/monetdb5/extras/rdf/rdfontologyload.c +++ b/monetdb5/extras/rdf/rdfontologyload.c @@ -134,45 +134,6 @@ tripleHandler(void* user_data, const rap return; } - -/* -static -OntClass* creatOC(oid ocId, oid subClassofId, int numP, oid* buff) -{ - OntClass *oc = (OntClass*)malloc(sizeof(OntClass)); - oc->lstProp = (oid*) malloc(sizeof(oid) * numP); - - if (oc->lstProp == NULL){ - printf("Malloc failed. at %d", numP); - exit(-1); - } - - copyOidSet(oc->lstProp, buff, numP); - oc->ocId = ocId; - oc->subclassof = subClassofId; - oc->numProp = numP; - oc->numAllocation = numP; - return oc; -} - -str -RDFOntologyRead(int *ret, bat *ontcBatid, bat *ontaBatid, OntClassset* ontclassset){ - BAT* ontcBat; // BAT for ontology classes - BAT* ontaBat; // BAT for ontology - if ((ontcBat = BATdescriptor(*ontcBatid)) == NULL) { - throw(MAL, "rdf.RDFOntologyParser", RUNTIME_OBJECT_MISSING); - } - - if ((ontaBat = BATdescriptor(*ontaBatid)) == NULL) { - BBPreleaseref(ontcBat->batCacheid); - throw(MAL, "rdf.RDFOntologyParser", RUNTIME_OBJECT_MISSING); - } - - *ret = 1; - return MAL_SUCCEED; -} - -*/ str RDFOntologyParser(int *xret, str *location, str *schema){ @@ -268,6 +229,144 @@ extern oid **ontattributes; extern int ontattributesCount; extern oid **ontmetadata; extern int ontmetadataCount; +extern BAT *ontmetaBat; //To lookup which ontology class based on the class oid +extern OntClass *ontclassSet; //Store the class Idx & superClass Idxes + +static +int getDepth(int Idx, OntClass *tmpontclassSet){ + int maxDepth = 0; + int i; + int tmpDepth = 0; + + if (tmpontclassSet[Idx].hierDepth != -1) return tmpontclassSet[Idx].hierDepth; //Computed already + + if (tmpontclassSet[Idx].numsc == 0) { + tmpontclassSet[Idx].hierDepth = 0; //There is no super class for this class + return 0; + }else{ + for (i = 0; i < tmpontclassSet[Idx].numsc; i++){ + tmpDepth = 1 + getDepth(tmpontclassSet[Idx].scIdxes[i],tmpontclassSet); + if (tmpDepth > maxDepth) maxDepth = tmpDepth; + } + + return maxDepth; + } + + return maxDepth; +} + +static +str buildOntologyClassesInfo(oid **ontmetadat, int ontmetadataCount){ + + int i; + oid classOid; //The class Oid comes from + oid scOid; + int classIdx; + int scIdx; + + BUN tmpBun = BUN_NONE; + int numClass; + BATiter ontmetaBati; + BUN p,q; + oid *tmpOid; + int* _tmpIdxes; + OntClass *tmpontclassSet = NULL; + //Read all ontmetadata and store them in the ontmetaBat + + ontmetaBat = BATnew(TYPE_void, TYPE_oid, ontmetadataCount); + BATseqbase(ontmetaBat, 0); + (void)BATprepareHash(BATmirror(ontmetaBat)); + if (!(ontmetaBat->T->hash)){ + throw(RDF, "buildOntologyClassesInfo", "Cannot allocate the hash for Bat"); + } + for (i = 0; i < ontmetadataCount; i++){ + classOid = ontmetadat[0][i]; + assert(classOid != BUN_NONE); + + tmpBun = BUNfnd(BATmirror(ontmetaBat),&classOid); + if (tmpBun == BUN_NONE){ //If it is a new class + if (BUNappend(ontmetaBat,&classOid, TRUE) == NULL) + throw(RDF, "buildOntologyClassesInfo", "Cannot insert to ontmetaBat"); + } + + scOid = ontmetadat[1][i]; + + if (scOid != BUN_NONE){ //The superClass oid is there + tmpBun = BUNfnd(BATmirror(ontmetaBat),&scOid); + if (tmpBun == BUN_NONE){ //If it is a new class + if (BUNappend(ontmetaBat, &scOid, TRUE) == NULL) + throw(RDF, "buildOntologyClassesInfo", "Cannot insert to ontmetaBat"); + } + } + } + + numClass = BATcount(ontmetaBat); + printf("Number of ontology classes added: %d\n", numClass); + + + // Add metadata and hierarchy information + tmpontclassSet = (OntClass *) malloc(sizeof(OntClass) * numClass); + ontmetaBati = bat_iterator(ontmetaBat); + i = 0; + + (void) tmpOid; + (void) q; + (void) p; + (void) ontmetaBati; + + + BATloop(ontmetaBat, p, q){ + tmpOid = (oid *) BUNtloc(ontmetaBati,p); + + tmpontclassSet[i].cOid = *tmpOid; + + //Init other info + tmpontclassSet[i].scIdxes = (int *) malloc(sizeof(int) * NUMSC_PER_ONTCLASS); + tmpontclassSet[i].numsc = 0; + tmpontclassSet[i].numAllocation = NUMSC_PER_ONTCLASS; + tmpontclassSet[i].hierDepth = -1; + + i++; + } + + + //Add sc + for (i = 0; i < ontmetadataCount; i++){ + //Get index + classOid = ontmetadat[0][i]; + scOid = ontmetadat[1][i]; + tmpBun = BUNfnd(BATmirror(ontmetaBat), &classOid); + assert(tmpBun != BUN_NONE); + + classIdx = (int) (tmpBun); + + if (scOid == BUN_NONE) continue; + else{ + tmpBun = BUNfnd(BATmirror(ontmetaBat), &scOid); + assert(tmpBun != BUN_NONE); + scIdx = (int) (tmpBun); + + //Add scIdx to tmpontclassSet[classIdx] + if (tmpontclassSet[classIdx].numsc == tmpontclassSet[classIdx].numAllocation){ + tmpontclassSet[classIdx].numAllocation += NUMSC_PER_ONTCLASS; + _tmpIdxes = realloc(tmpontclassSet[classIdx].scIdxes, sizeof(int) * tmpontclassSet[classIdx].numAllocation); + if (!_tmpIdxes) fprintf(stderr, "ERROR: Couldn't realloc memory!\n"); + + tmpontclassSet[classIdx].scIdxes = (int*)_tmpIdxes; + } + tmpontclassSet[classIdx].scIdxes[tmpontclassSet[classIdx].numsc] = scIdx; + tmpontclassSet[classIdx].numsc++; + } + } + + //Get the hierarchy depth information + for (i = 0; i < numClass; i++){ + tmpontclassSet[i].hierDepth = getDepth(i,tmpontclassSet); + } + + ontclassSet = tmpontclassSet; + return MAL_SUCCEED; +} str RDFloadsqlontologies(int *ret, bat *auriid, bat *aattrid, bat *muriid, bat *msuperid){ @@ -325,6 +424,7 @@ RDFloadsqlontologies(int *ret, bat *auri ontattributes[1] = malloc(sizeof(str) * auriCount); // attr if (!ontattributes[0] || !ontattributes[1]) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); + BATloop(auri, p, q){ _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list