Changeset: bd25ee565cab for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=bd25ee565cab
Modified Files:
        monetdb5/extras/rdf/rdflabels.c
        monetdb5/extras/rdf/rdflabels.h
        monetdb5/extras/rdf/rdfontologyload.c
        monetdb5/extras/rdf/rdfontologyload.h
        monetdb5/extras/rdf/rdfschema.c
Branch: rdf
Log Message:

Add the code for building ontology hierarchy information.


diffs (truncated from 381 to 300 lines):

diff --git a/monetdb5/extras/rdf/rdflabels.c b/monetdb5/extras/rdf/rdflabels.c
--- a/monetdb5/extras/rdf/rdflabels.c
+++ b/monetdb5/extras/rdf/rdflabels.c
@@ -2074,12 +2074,15 @@ void removeDuplicatedCandidates(CSlabel 
 #if USE_TABLE_NAME
 /* For one CS: Choose the best table name out of all collected candidates 
(ontology, type, fk). */
 static
-void getTableName(CSlabel* label, int csIdx,  int typeAttributesCount, 
TypeAttributesFreq*** typeAttributesHistogram, int** 
typeAttributesHistogramCount, TypeStat* typeStat, int typeStatCount, oid** 
result, int* resultCount, IncidentFKs* links, oid** ontmetadata, int 
ontmetadataCount) {
+void getTableName(CSlabel* label, int csIdx,  int typeAttributesCount, 
TypeAttributesFreq*** typeAttributesHistogram, int** 
typeAttributesHistogramCount, TypeStat* typeStat, int typeStatCount, oid** 
result, int* resultCount, IncidentFKs* links, oid** ontmetadata, int 
ontmetadataCount, BAT *ontmetaBat, OntClass *ontclassSet) {
        int             i, j, k;
        oid             *tmpList;
        int             tmpListCount;
        char            nameFound = 0;
 
+       BUN             ontClassPos;    //Position of ontology in the ontmetaBat
+       oid             typeOid;        
+
        // --- ONTOLOGY ---
        // add all ontology candidates to list of candidates
        if (resultCount[csIdx] >= 1) {
@@ -2170,6 +2173,23 @@ void getTableName(CSlabel* label, int cs
        tmpListCount = 0;
        for (i = 0; i < typeAttributesCount; ++i) {
                if (typeAttributesHistogramCount[csIdx][i] == 0) continue;
+               /*   //TODO: Uncomment this path
+               for (j = 0; j < typeAttributesHistogramCount[csIdx][i]; j++){
+                       str typelabel; 
+                       typeOid = typeAttributesHistogram[csIdx][i][j].value;
+                       printf("FreqCS %d : Type[%d][%d][oid] = " BUNFMT, 
csIdx, i,j, typeOid);
+                       ontClassPos = BUNfnd(BATmirror(ontmetaBat), &typeOid); 
+                       if (ontClassPos != BUN_NONE){
+                               takeOid(typeOid,&typelabel);
+                               assert(ontclassSet[ontClassPos].cOid == 
typeOid); 
+                               printf(" --> class %s | Index = %d |Specific 
level: %d \n", typelabel, (int)ontClassPos, ontclassSet[ontClassPos].hierDepth);
+                               GDKfree(typelabel);
+                       }
+                       else{
+                               printf(" --> No class \n");     
+                       }
+               }
+               */
                if (typeAttributesHistogram[csIdx][i][0].percent < 
TYPE_FREQ_THRESHOLD) continue; // sorted
                tmpList = (oid *) realloc(tmpList, sizeof(oid) * (tmpListCount 
+ 1));
                if (!tmpList) fprintf(stderr, "ERROR: Couldn't realloc 
memory!\n");
@@ -2303,14 +2323,14 @@ CSlabel* initLabels(CSset *freqCSset) {
 #if USE_TABLE_NAME
 /* Creates the final result of the labeling: table name and attribute names. */
 static
-void getAllLabels(CSlabel* labels, CSset* freqCSset,  int typeAttributesCount, 
TypeAttributesFreq*** typeAttributesHistogram, int** 
typeAttributesHistogramCount, TypeStat* typeStat, int typeStatCount, oid** 
result, int* resultCount, IncidentFKs* links, oid** ontmetadata, int 
ontmetadataCount) {
+void getAllLabels(CSlabel* labels, CSset* freqCSset,  int typeAttributesCount, 
TypeAttributesFreq*** typeAttributesHistogram, int** 
typeAttributesHistogramCount, TypeStat* typeStat, int typeStatCount, oid** 
result, int* resultCount, IncidentFKs* links, oid** ontmetadata, int 
ontmetadataCount, BAT *ontmetaBat, OntClass *ontclassSet) {
        int             i, j;
 
        for (i = 0; i < freqCSset->numCSadded; ++i) {
                CS cs = (CS) freqCSset->items[i];
 
                // get table name
-               getTableName(&labels[i], i,  typeAttributesCount, 
typeAttributesHistogram, typeAttributesHistogramCount, typeStat, typeStatCount, 
result, resultCount, links, ontmetadata, ontmetadataCount);
+               getTableName(&labels[i], i,  typeAttributesCount, 
typeAttributesHistogram, typeAttributesHistogramCount, typeStat, typeStatCount, 
result, resultCount, links, ontmetadata, ontmetadataCount, ontmetaBat, 
ontclassSet);
 
                // copy attribute oids (names)
                labels[i].numProp = cs.numProp;
@@ -2636,7 +2656,7 @@ void freeOntologyLookupResult(oid** onto
 }
 
 /* Creates labels for all CS (without a parent). */
-CSlabel* createLabels(CSset* freqCSset, CSrel* csrelSet, int num, BAT *sbat, 
BATiter si, BATiter pi, BATiter oi, oid *subjCSMap, int *csIdFreqIdxMap, oid** 
ontattributes, int ontattributesCount, oid** ontmetadata, int ontmetadataCount, 
OntoUsageNode** ontoUsageTree) {
+CSlabel* createLabels(CSset* freqCSset, CSrel* csrelSet, int num, BAT *sbat, 
BATiter si, BATiter pi, BATiter oi, oid *subjCSMap, int *csIdFreqIdxMap, oid** 
ontattributes, int ontattributesCount, oid** ontmetadata, int ontmetadataCount, 
OntoUsageNode** ontoUsageTree, BAT *ontmetaBat, OntClass *ontclassSet) {
 #if USE_TYPE_NAMES
        char*           typeAttributes[] = {
                                "<http://ogp.me/ns#type>",
@@ -2713,7 +2733,7 @@ CSlabel* createLabels(CSset* freqCSset, 
        // Assigning Names
        labels = initLabels(freqCSset);
 #if USE_TABLE_NAME
-       getAllLabels(labels, freqCSset, typeAttributesCount, 
typeAttributesHistogram, typeAttributesHistogramCount, typeStat, typeStatCount, 
ontologyLookupResult, ontologyLookupResultCount, links, ontmetadata, 
ontmetadataCount);
+       getAllLabels(labels, freqCSset, typeAttributesCount, 
typeAttributesHistogram, typeAttributesHistogramCount, typeStat, typeStatCount, 
ontologyLookupResult, ontologyLookupResultCount, links, ontmetadata, 
ontmetadataCount, ontmetaBat, ontclassSet);
        if (typeStatCount > 0) free(typeStat);
 #endif
 
diff --git a/monetdb5/extras/rdf/rdflabels.h b/monetdb5/extras/rdf/rdflabels.h
--- a/monetdb5/extras/rdf/rdflabels.h
+++ b/monetdb5/extras/rdf/rdflabels.h
@@ -21,6 +21,7 @@
 #define _RDFLABELS_H_
 
 #include "rdfschema.h"
+#include "rdfontologyload.h"
 
 // Counts the occurances of type attribute values
 typedef struct TypeAttributesFreq {
@@ -107,7 +108,8 @@ rdf_export void
 getStringBetweenQuotes(str* out, str in);
 
 rdf_export CSlabel*
-createLabels(CSset* freqCSset, CSrel* csrelSet, int num, BAT *sbat, BATiter 
si, BATiter pi, BATiter oi, oid *subjCSMap, int *csIdFreqIdxMap, oid** 
ontattributes, int ontattributesCount, oid** ontmetadata, int ontmetadataCount, 
OntoUsageNode** ontoUsageTree);
+createLabels(CSset* freqCSset, CSrel* csrelSet, int num, BAT *sbat, BATiter 
si, BATiter pi, BATiter oi, oid *subjCSMap, int *csIdFreqIdxMap, oid** 
ontattributes, int ontattributesCount, oid** ontmetadata, 
+               int ontmetadataCount, OntoUsageNode** ontoUsageTree, BAT 
*ontmetaBat, OntClass *ontclassSet);
 
 rdf_export void
 exportLabels(CSlabel* labels, CSset* freqCSset, CSrel* 
csRelBetweenMergeFreqSet, int freqThreshold, int* mTblIdxFreqIdxMapping,int* 
mfreqIdxTblIdxMapping,int numTables);
diff --git a/monetdb5/extras/rdf/rdfontologyload.c 
b/monetdb5/extras/rdf/rdfontologyload.c
--- a/monetdb5/extras/rdf/rdfontologyload.c
+++ b/monetdb5/extras/rdf/rdfontologyload.c
@@ -134,45 +134,6 @@ tripleHandler(void* user_data, const rap
        return; 
 }
 
-
-/*
-static 
-OntClass* creatOC(oid ocId, oid subClassofId, int numP, oid* buff)
-{
-       OntClass *oc = (OntClass*)malloc(sizeof(OntClass)); 
-       oc->lstProp =  (oid*) malloc(sizeof(oid) * numP);
-       
-       if (oc->lstProp == NULL){
-               printf("Malloc failed. at %d", numP);
-               exit(-1); 
-       }
-
-       copyOidSet(oc->lstProp, buff, numP); 
-       oc->ocId = ocId;
-       oc->subclassof = subClassofId; 
-       oc->numProp = numP; 
-       oc->numAllocation = numP; 
-       return oc; 
-}
-
-str
-RDFOntologyRead(int *ret, bat *ontcBatid, bat *ontaBatid, OntClassset* 
ontclassset){
-       BAT* ontcBat;   // BAT for ontology classes 
-       BAT* ontaBat;   // BAT for ontology 
-       if ((ontcBat = BATdescriptor(*ontcBatid)) == NULL) {
-               throw(MAL, "rdf.RDFOntologyParser", RUNTIME_OBJECT_MISSING);
-       }
-
-       if ((ontaBat = BATdescriptor(*ontaBatid)) == NULL) {
-               BBPreleaseref(ontcBat->batCacheid);
-               throw(MAL, "rdf.RDFOntologyParser", RUNTIME_OBJECT_MISSING);
-       }
-
-       *ret = 1; 
-       return MAL_SUCCEED; 
-}
-
-*/
 str
 RDFOntologyParser(int *xret, str *location, str *schema){
 
@@ -268,6 +229,144 @@ extern oid **ontattributes;
 extern int ontattributesCount;
 extern oid **ontmetadata;
 extern int ontmetadataCount;
+extern BAT *ontmetaBat;        //To lookup which ontology class based on the 
class oid
+extern OntClass *ontclassSet;  //Store the class Idx & superClass Idxes
+
+static 
+int getDepth(int Idx, OntClass *tmpontclassSet){
+       int maxDepth = 0;
+       int i; 
+       int tmpDepth = 0;
+
+       if (tmpontclassSet[Idx].hierDepth != -1) return 
tmpontclassSet[Idx].hierDepth;  //Computed already
+
+       if (tmpontclassSet[Idx].numsc == 0) {
+               tmpontclassSet[Idx].hierDepth = 0;              //There is no 
super class for this class
+               return 0; 
+       }else{
+               for (i = 0; i < tmpontclassSet[Idx].numsc; i++){
+                       tmpDepth = 1 + 
getDepth(tmpontclassSet[Idx].scIdxes[i],tmpontclassSet);
+                       if (tmpDepth > maxDepth) maxDepth = tmpDepth; 
+               }
+
+               return maxDepth;
+       }
+
+       return maxDepth; 
+}
+
+static 
+str buildOntologyClassesInfo(oid **ontmetadat, int ontmetadataCount){
+
+       int     i; 
+       oid     classOid; //The class Oid comes from 
+       oid     scOid; 
+       int     classIdx; 
+       int     scIdx; 
+
+       BUN     tmpBun = BUN_NONE;
+       int     numClass;
+       BATiter ontmetaBati; 
+       BUN     p,q;
+       oid     *tmpOid;
+       int*    _tmpIdxes; 
+       OntClass *tmpontclassSet = NULL;
+       //Read all ontmetadata and store them in the ontmetaBat
+       
+       ontmetaBat = BATnew(TYPE_void, TYPE_oid, ontmetadataCount);
+       BATseqbase(ontmetaBat, 0);
+       (void)BATprepareHash(BATmirror(ontmetaBat));
+       if (!(ontmetaBat->T->hash)){
+               throw(RDF, "buildOntologyClassesInfo", "Cannot allocate the 
hash for Bat");
+       }
+       for (i = 0; i < ontmetadataCount; i++){
+               classOid = ontmetadat[0][i];
+               assert(classOid != BUN_NONE); 
+
+               tmpBun = BUNfnd(BATmirror(ontmetaBat),&classOid);
+               if (tmpBun == BUN_NONE){        //If it is a new class
+                       if (BUNappend(ontmetaBat,&classOid, TRUE) == NULL)    
+                               throw(RDF, "buildOntologyClassesInfo", "Cannot 
insert to ontmetaBat");
+               } 
+
+               scOid = ontmetadat[1][i];
+
+               if (scOid != BUN_NONE){ //The superClass oid is there
+                       tmpBun = BUNfnd(BATmirror(ontmetaBat),&scOid);  
+                       if (tmpBun == BUN_NONE){        //If it is a new class
+                               if (BUNappend(ontmetaBat, &scOid, TRUE) == 
NULL)    
+                                       throw(RDF, "buildOntologyClassesInfo", 
"Cannot insert to ontmetaBat");
+                       } 
+               }
+       }
+
+       numClass = BATcount(ontmetaBat);        
+       printf("Number of ontology classes added: %d\n", numClass);
+       
+
+       // Add metadata  and hierarchy information
+       tmpontclassSet = (OntClass *) malloc(sizeof(OntClass) * numClass);
+       ontmetaBati = bat_iterator(ontmetaBat);
+       i = 0;
+
+       (void) tmpOid;
+       (void) q;
+       (void) p;
+       (void) ontmetaBati;
+       
+
+       BATloop(ontmetaBat, p, q){
+               tmpOid = (oid *) BUNtloc(ontmetaBati,p);
+               
+               tmpontclassSet[i].cOid = *tmpOid;
+
+               //Init other info
+               tmpontclassSet[i].scIdxes = (int *) malloc(sizeof(int) * 
NUMSC_PER_ONTCLASS);
+               tmpontclassSet[i].numsc = 0;
+               tmpontclassSet[i].numAllocation = NUMSC_PER_ONTCLASS;
+               tmpontclassSet[i].hierDepth = -1;
+
+               i++;
+       }
+       
+
+       //Add sc
+       for (i = 0; i < ontmetadataCount; i++){
+               //Get index
+               classOid = ontmetadat[0][i];
+               scOid = ontmetadat[1][i];
+               tmpBun = BUNfnd(BATmirror(ontmetaBat), &classOid);
+               assert(tmpBun != BUN_NONE);
+
+               classIdx = (int) (tmpBun); 
+
+               if (scOid == BUN_NONE) continue; 
+               else{
+                       tmpBun = BUNfnd(BATmirror(ontmetaBat), &scOid);
+                       assert(tmpBun != BUN_NONE);
+                       scIdx = (int) (tmpBun); 
+
+                       //Add scIdx to tmpontclassSet[classIdx]
+                       if (tmpontclassSet[classIdx].numsc == 
tmpontclassSet[classIdx].numAllocation){
+                               tmpontclassSet[classIdx].numAllocation += 
NUMSC_PER_ONTCLASS;
+                               _tmpIdxes = 
realloc(tmpontclassSet[classIdx].scIdxes, sizeof(int) * 
tmpontclassSet[classIdx].numAllocation);
+                               if (!_tmpIdxes) fprintf(stderr, "ERROR: 
Couldn't realloc memory!\n");
+
+                               tmpontclassSet[classIdx].scIdxes = 
(int*)_tmpIdxes;
+                       }
+                       
tmpontclassSet[classIdx].scIdxes[tmpontclassSet[classIdx].numsc] = scIdx;
+                       tmpontclassSet[classIdx].numsc++;
+               }
+       }
+
+       //Get the hierarchy depth information 
+       for (i = 0; i < numClass; i++){
+               tmpontclassSet[i].hierDepth = getDepth(i,tmpontclassSet);
+       }
+
+       ontclassSet = tmpontclassSet;
+       return MAL_SUCCEED; 
+}
 
 str
 RDFloadsqlontologies(int *ret, bat *auriid, bat *aattrid, bat *muriid, bat 
*msuperid){
@@ -325,6 +424,7 @@ RDFloadsqlontologies(int *ret, bat *auri
        ontattributes[1] = malloc(sizeof(str) * auriCount); // attr
        if (!ontattributes[0] || !ontattributes[1]) fprintf(stderr, "ERROR: 
Couldn't malloc memory!\n");
 
+
        BATloop(auri, p, q){
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to