Changeset: d928f17e12f0 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=d928f17e12f0 Modified Files: monetdb5/extras/rdf/rdflabels.c monetdb5/extras/rdf/rdflabels.h monetdb5/extras/rdf/rdfschema.c Branch: rdf Log Message:
Use patch from Linnea for removing infrequent props in diagram diffs (202 lines): diff --git a/monetdb5/extras/rdf/rdflabels.c b/monetdb5/extras/rdf/rdflabels.c --- a/monetdb5/extras/rdf/rdflabels.c +++ b/monetdb5/extras/rdf/rdflabels.c @@ -1158,13 +1158,15 @@ void createOntologyLookupResult(oid** re * Call GraphViz to create the graphic: "dot -Tpdf -O UMLxxx.dot" to create "UMLxxx.dot.pdf" */ static -void printUML2(CSset *freqCSset, CSlabel* labels, int freqThreshold, CSrel *csRelMergeFreqSet, BATiter mapi, BAT *mbat) { - int i, j; +void printUML2(CStableStat *cstablestat, CSPropTypes* csPropTypes, int freqThreshold, CSrel *csRelMergeFreqSet, BATiter mapi, BAT *mbat, int numTables, int* mTblIdxFreqIdxMapping, int* csTblIdxMapping, CSset* freqCSset) { + int i, j, k; FILE *fout; char filename[20], tmp[10]; int smallest = -1, biggest = -1; + (void) csTblIdxMapping; + strcpy(filename, "UML"); sprintf(tmp, "%d", freqThreshold); strcat(filename, tmp); @@ -1178,38 +1180,37 @@ void printUML2(CSset *freqCSset, CSlabel fprintf(fout, "node [shape=\"none\"];\n\n"); // find biggest and smallest table - for (i = 0; i < freqCSset->numCSadded; ++i) { - CS cs = (CS) freqCSset->items[i]; - if (!isCSTable(cs,labels[i].name)) continue; // ignore + for (i = 0; i < numTables; ++i) { + int csIdx = mTblIdxFreqIdxMapping[i]; // set first values - if (smallest == -1) smallest = i; - if (biggest == -1) biggest = i; + if (smallest == -1) smallest = csIdx; + if (biggest == -1) biggest = csIdx; - if (cs.coverage < freqCSset->items[smallest].coverage) smallest = i; - if (cs.coverage > freqCSset->items[biggest].coverage) biggest = i; + if (freqCSset->items[csIdx].coverage < freqCSset->items[smallest].coverage) smallest = csIdx; + if (freqCSset->items[csIdx].coverage > freqCSset->items[biggest].coverage) biggest = csIdx; } // for each table - for (i = 0; i < freqCSset->numCSadded; ++i) { + for (i = 0; i < numTables; ++i) { + int csIdx = mTblIdxFreqIdxMapping[i]; int width; str labelStrEscaped = NULL; - CS cs = (CS) freqCSset->items[i]; - if (!isCSTable(cs, labels[i].name)) continue; // ignore + if(!isCSTable(freqCSset->items[csIdx], cstablestat->lstcstable[i].tblname)) continue; // ignore small tables // print table header // set table width between 300 (smallest coverage) and 600 (biggest coverage) px, using log10 logarithm - width = (int) ((300 + 300 * (log10(freqCSset->items[i].coverage) - log10(freqCSset->items[smallest].coverage)) / (log10(freqCSset->items[biggest].coverage) - log10(freqCSset->items[smallest].coverage))) + 0.5); - fprintf(fout, "\"" BUNFMT "\" [\n", cs.csId); + width = (int) ((300 + 300 * (log10(freqCSset->items[csIdx].coverage) - log10(freqCSset->items[smallest].coverage)) / (log10(freqCSset->items[biggest].coverage) - log10(freqCSset->items[smallest].coverage))) + 0.5); + fprintf(fout, "\"%d\" [\n", csIdx); fprintf(fout, "label = <<TABLE BORDER=\"0\" CELLBORDER=\"1\" CELLSPACING=\"0\">\n"); - getTblName(&labelStrEscaped, labels[i].name, mapi, mbat); - fprintf(fout, "<TR><TD WIDTH=\"%d\"><B>%s (#triples: %d, #tuples: %d)</B></TD></TR>\n", width, labelStrEscaped, cs.coverage, cs.support); + getTblName(&labelStrEscaped, cstablestat->lstcstable[i].tblname, mapi, mbat); + fprintf(fout, "<TR><TD WIDTH=\"%d\"><B>%s (#triples: %d, #tuples: %d)</B></TD></TR>\n", width, labelStrEscaped, freqCSset->items[csIdx].coverage, freqCSset->items[csIdx].support); GDKfree(labelStrEscaped); // print columns - for (j = 0; j < cs.numProp; ++j) { + for (j = 0; j < csPropTypes[i].numProp; ++j) { str propStr; str tmpStr; char *propStrEscaped = NULL; @@ -1218,16 +1219,20 @@ void printUML2(CSset *freqCSset, CSlabel #endif str color; - takeOid(cs.lstProp[j], &tmpStr); +#if REMOVE_INFREQ_PROP + if (csPropTypes[i].lstPropTypes[j].defColIdx == -1) continue; // ignore infrequent props +#endif + + takeOid(freqCSset->items[csIdx].lstProp[j], &tmpStr); // assign color (the more tuples the property occurs in, the darker) - if ((1.0 * cs.lstPropSupport[j])/cs.support > 0.8) { + if ((1.0 * freqCSset->items[csIdx].lstPropSupport[j])/freqCSset->items[csIdx].support > 0.8) { color = "#5555FF"; - } else if ((1.0 * cs.lstPropSupport[j])/cs.support > 0.6) { + } else if ((1.0 * freqCSset->items[csIdx].lstPropSupport[j])/freqCSset->items[csIdx].support > 0.6) { color = "#7777FF"; - } else if ((1.0 * cs.lstPropSupport[j])/cs.support > 0.4) { + } else if ((1.0 * freqCSset->items[csIdx].lstPropSupport[j])/freqCSset->items[csIdx].support > 0.4) { color = "#9999FF"; - } else if ((1.0 * cs.lstPropSupport[j])/cs.support > 0.2) { + } else if ((1.0 * freqCSset->items[csIdx].lstPropSupport[j])/freqCSset->items[csIdx].support > 0.2) { color = "#BBBBFF"; } else { color = "#DDDDFF"; @@ -1241,10 +1246,10 @@ void printUML2(CSset *freqCSset, CSlabel escapeURI(propStrEscaped); #if USE_SHORT_NAMES getPropNameShort(&propStrShort, propStr); - fprintf(fout, "<TR><TD BGCOLOR=\"%s\" PORT=\"%s\">%s (%d%%)</TD></TR>\n", color, propStrEscaped, propStrShort, (100 * cs.lstPropSupport[j])/cs.support); + fprintf(fout, "<TR><TD BGCOLOR=\"%s\" PORT=\"%s\">%s (%d%%)</TD></TR>\n", color, propStrEscaped, propStrShort, (100 * freqCSset->items[csIdx].lstPropSupport[j])/freqCSset->items[csIdx].support); GDKfree(propStrShort); #else - fprintf(fout, "<TR><TD BGCOLOR=\"%s\" PORT=\"%s\">%s (%d%%)</TD></TR>\n", color, propStrEscaped, propStrEscaped, (100 * cs.lstPropSupport[j])/cs.support); + fprintf(fout, "<TR><TD BGCOLOR=\"%s\" PORT=\"%s\">%s (%d%%)</TD></TR>\n", color, propStrEscaped, propStrEscaped, (100 * freqCSset->items[csIdx].lstPropSupport[j])/freqCSset->items[csIdx].support); #endif GDKfree(propStr); @@ -1257,9 +1262,13 @@ void printUML2(CSset *freqCSset, CSlabel } // for each foreign key relationship - for (i = 0; i < freqCSset->numCSadded; ++i) { - int from = i; + for (i = 0; i < numTables; ++i) { + int csIdx = mTblIdxFreqIdxMapping[i]; + int from = csIdx; CSrel rel = csRelMergeFreqSet[from]; + + if(!isCSTable(freqCSset->items[csIdx], cstablestat->lstcstable[i].tblname)) continue; // ignore small tables + if (!isCSTable(freqCSset->items[from], 0)) continue; for (j = 0; j < rel.numRef; ++j) { int to = rel.lstRefFreqIdx[j]; @@ -1271,9 +1280,16 @@ void printUML2(CSset *freqCSset, CSlabel char *propStrShort = NULL; #endif - if (!isCSTable(freqCSset->items[to], 0)) continue; + if (!isCSTable(freqCSset->items[to], cstablestat->lstcstable[csTblIdxMapping[to]].tblname)) continue; // ignore small tables if (rel.lstCnt[j] < freqCSset->items[to].support * MIN_FK_FREQUENCY) continue; +#if REMOVE_INFREQ_PROP + // find prop + k = 0; + while (freqCSset->items[csIdx].lstProp[k] != prop) ++k; + if (csPropTypes[i].lstPropTypes[k].defColIdx == -1) continue; // ignore infrequent props +#endif + takeOid(prop, &tmpStr); // escape column names @@ -1285,10 +1301,10 @@ void printUML2(CSset *freqCSset, CSlabel #if USE_SHORT_NAMES getPropNameShort(&propStrShort, propStr); - fprintf(fout, "\""BUNFMT"\":\"%s\" -> \""BUNFMT"\" [label=\"%s\"];\n", freqCSset->items[from].csId, propStrEscaped, freqCSset->items[to].csId, propStrShort); // print foreign keys to dot file + fprintf(fout, "\"%d\":\"%s\" -> \"%d\" [label=\"%s\"];\n", from, propStrEscaped, to, propStrShort); // print foreign keys to dot file GDKfree(propStrShort); #else - fprintf(fout, "\""BUNFMT"\":\"%s\" -> \""BUNFMT"\" [label=\"%s\"];\n", freqCSset->items[from].csId, propStrEscaped, freqCSset->items[to].csId, propStrEscaped); // print foreign keys to dot file + fprintf(fout, "\"%d\":\"%s\" -> \"%d\" [label=\"%s\"];\n", from, propStrEscaped, to, propStrEscaped); // print foreign keys to dot file #endif GDKfree(propStr); @@ -2530,7 +2546,7 @@ void freeLabels(CSlabel* labels, CSset* GDKfree(labels); } -void exportLabels(CSlabel* labels, CSset* freqCSset, CSrel* csRelMergeFreqSet, int freqThreshold, BATiter mapi, BAT *mbat) { +void exportLabels(CSset* freqCSset, CSrel* csRelMergeFreqSet, int freqThreshold, BATiter mapi, BAT *mbat, CStableStat* cstablestat, CSPropTypes *csPropTypes, int numTables, int* mTblIdxFreqIdxMapping, int* csTblIdxMapping) { int **relationMetadataCount; Relation ***relationMetadata; @@ -2549,7 +2565,7 @@ void exportLabels(CSlabel* labels, CSset // Print and Export printf("exportLabels: printUML \n"); - printUML2(freqCSset, labels, freqThreshold, csRelMergeFreqSet, mapi, mbat); + printUML2(cstablestat, csPropTypes, freqThreshold, csRelMergeFreqSet, mapi, mbat, numTables, mTblIdxFreqIdxMapping, csTblIdxMapping, freqCSset); printf("exportLabels: Done \n"); freeRelationMetadata(relationMetadata, freqCSset); freeRelationMetadataCount(relationMetadataCount, freqCSset->numCSadded); diff --git a/monetdb5/extras/rdf/rdflabels.h b/monetdb5/extras/rdf/rdflabels.h --- a/monetdb5/extras/rdf/rdflabels.h +++ b/monetdb5/extras/rdf/rdflabels.h @@ -119,7 +119,7 @@ createLabels(CSset* freqCSset, CSrel* cs int ontmetadataCount, OntoUsageNode** ontoUsageTree, BAT *ontmetaBat, OntClass *ontclassSet); rdf_export void -exportLabels(CSlabel* labels, CSset* freqCSset, CSrel* csRelBetweenMergeFreqSet, int freqThreshold, BATiter mapi, BAT *mbat); +exportLabels(CSset* freqCSset, CSrel* csRelBetweenMergeFreqSet, int freqThreshold, BATiter mapi, BAT *mbat, CStableStat* cstablestat, CSPropTypes *csPropTypes, int numTables, int* mTblIdxFreqIdxMapping, int* csTblIdxMapping); rdf_export str updateLabel(int ruleNumber, CSset *freqCSset, CSlabel **labels, int newCS, int mergeCSFreqId, int freqCS1, int freqCS2, oid name, int isType, int isOnto, int isFK, oid **ontmetadata, int ontmetadataCount, int *lstFreqId, int numIds); diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c --- a/monetdb5/extras/rdf/rdfschema.c +++ b/monetdb5/extras/rdf/rdfschema.c @@ -10864,7 +10864,7 @@ RDFreorganize(int *ret, CStableStat *cst printf("Start exporting labels \n"); #if EXPORT_LABEL - exportLabels(labels, freqCSset, csRelMergeFreqSet, *freqThreshold, mi, mbat); + exportLabels(freqCSset, csRelMergeFreqSet, *freqThreshold, mi, mbat, cstablestat, csPropTypes, numTables, mTblIdxFreqIdxMapping, csTblIdxMapping); #endif curT = clock(); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list