Changeset: cbde82c8ce68 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=cbde82c8ce68
Modified Files:
monetdb5/extras/rdf/rdfretrieval.c
monetdb5/extras/rdf/rdfretrieval.h
monetdb5/extras/rdf/rdfschema.c
sql/backends/monet5/sql.mx
sql/scripts/30_rdf.sql
Branch: rdf
Log Message:
SQL procedure to create a subschema
diffs (truncated from 727 to 300 lines):
diff --git a/monetdb5/extras/rdf/rdfretrieval.c
b/monetdb5/extras/rdf/rdfretrieval.c
--- a/monetdb5/extras/rdf/rdfretrieval.c
+++ b/monetdb5/extras/rdf/rdfretrieval.c
@@ -24,65 +24,25 @@
#include "rdflabels.h"
static
-char** initAdjacencyMatrix(int csCount) {
- char **matrix = NULL; // matrix[from][to]
- int i, j;
-
- matrix = (char **) malloc(sizeof(char *) * csCount);
- if (!matrix) fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
-
- for (i = 0; i < csCount; ++i) {
- matrix[i] = (char *) malloc(sizeof(char *) * csCount);
- if (!matrix) fprintf(stderr, "ERROR: Couldn't realloc
memory!\n");
-
- for (j = 0; j < csCount; ++j) {
- matrix[i][j] = 0;
- }
+int edgeExists(long int from, long int to, long int* adjacency_from, long int*
adjacency_to, int adjacencyCount) {
+ int i;
+ for (i = 0; i < adjacencyCount; ++i) {
+ if (adjacency_from[i] == from && adjacency_to[i] == to) return
1;
}
-
- return matrix;
+ return 0;
}
static
-void createAdjacencyMatrix(char** matrix, CSset* freqCSset, CSmergeRel*
csRelBetweenMergeFreqSet) {
- int i, j, k;
-
- for (i = 0; i < freqCSset->numCSadded; ++i) {
- if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore
-
- for (j = 0; j < freqCSset->items[i].numProp; ++j) { // propNo
in CS order
- // check foreign key frequency
- int sum = 0;
- for (k = 0; k < csRelBetweenMergeFreqSet[i].numRef;
++k) {
- if (csRelBetweenMergeFreqSet[i].lstPropId[k] ==
freqCSset->items[i].lstProp[j]) {
- sum +=
csRelBetweenMergeFreqSet[i].lstCnt[k];
- }
- }
-
- for (k = 0; k < csRelBetweenMergeFreqSet[i].numRef;
++k) { // propNo in CSrel
- if (csRelBetweenMergeFreqSet[i].lstPropId[k] ==
freqCSset->items[i].lstProp[j]) {
- int to =
csRelBetweenMergeFreqSet[i].lstRefFreqIdx[k];
- if (i == to) continue; // ignore self
references
- if ((int) (100.0 *
csRelBetweenMergeFreqSet[i].lstCnt[k] / sum + 0.5) < FK_FREQ_THRESHOLD)
continue; // foreign key is not frequent enough
- matrix[i][to] = 1;
- }
- }
- }
- }
-}
-
-static
-NodeStat* initNodeStats(CSset* freqCSset) {
+NodeStat* initNodeStats1(long int* table_freq, int tableCount) {
NodeStat* nodeStats = NULL;
int i;
- nodeStats = (NodeStat *) malloc(sizeof(NodeStat) *
freqCSset->numCSadded);
+ nodeStats = (NodeStat *) malloc(sizeof(NodeStat) * tableCount);
if (!nodeStats) fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
- for (i = 0; i < freqCSset->numCSadded; ++i) {
- if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore
- nodeStats[i].origWeight = freqCSset->items[i].support;
- nodeStats[i].weight = freqCSset->items[i].support; // weight =
origWeight
+ for (i = 0; i < tableCount; ++i) {
+ nodeStats[i].origWeight = table_freq[i];
+ nodeStats[i].weight = table_freq[i]; // weight = origWeight
nodeStats[i].steps = -1;
nodeStats[i].predecessor = -1;
}
@@ -91,30 +51,11 @@ NodeStat* initNodeStats(CSset* freqCSset
}
static
-NodeStat* initNodeStats23(CSset* freqCSset) {
- NodeStat* nodeStats = NULL;
- int i;
-
- nodeStats = (NodeStat *) malloc(sizeof(NodeStat) *
freqCSset->numCSadded);
- if (!nodeStats) fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
-
- for (i = 0; i < freqCSset->numCSadded; ++i) {
- if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore
- nodeStats[i].origWeight = freqCSset->items[i].support;
- nodeStats[i].weight = 0;
- nodeStats[i].steps = -1; // not used
- nodeStats[i].predecessor = 0; // not used
- }
-
- return nodeStats;
-}
-
-static
-void bfs1(int root, CSset* freqCSset, char** adjacencyMatrix, int* queue, int*
visited, int* isInQueue, int* queuePosition, int* queueLength, NodeStat*
nodeStats) {
+void bfs1(int root, long int* table_id, int tableCount, long int*
adjacency_from, long int* adjacency_to, int adjacencyCount, int* queue, int*
visited, int* isInQueue, int* queuePosition, int* queueLength, NodeStat*
nodeStats) {
int i;
- for (i = 0; i < freqCSset->numCSadded; ++i) {
- if (adjacencyMatrix[root][i]) {
+ for (i = 0; i < tableCount; ++i) {
+ if (edgeExists(table_id[root], table_id[i], adjacency_from,
adjacency_to, adjacencyCount)) {
if (nodeStats[i].steps == -1) {
// no previous path to this node
nodeStats[i].weight = nodeStats[root].weight +
nodeStats[i].origWeight;
@@ -160,7 +101,7 @@ void bfs1(int root, CSset* freqCSset, ch
if (!visited[i] && !isInQueue[i]) {
// add to queue
- queue[((*queueLength + *queuePosition) %
freqCSset->numCSadded)] = i;
+ queue[((*queueLength + *queuePosition) %
tableCount)] = i;
*queueLength += 1;
isInQueue[i] = 1;
}
@@ -169,26 +110,26 @@ void bfs1(int root, CSset* freqCSset, ch
}
if (*queueLength > 0) {
- visited[queue[(*queuePosition % freqCSset->numCSadded)]] = 1;
- isInQueue[queue[(*queuePosition % freqCSset->numCSadded)]] = 0;
+ visited[queue[(*queuePosition % tableCount)]] = 1;
+ isInQueue[queue[(*queuePosition % tableCount)]] = 0;
*queuePosition += 1;
*queueLength -= 1;
- bfs1(queue[((*queuePosition + freqCSset->numCSadded - 1) %
freqCSset->numCSadded)], freqCSset, adjacencyMatrix, queue, visited, isInQueue,
queuePosition, queueLength, nodeStats);
+ bfs1(queue[((*queuePosition + tableCount - 1) % tableCount)],
table_id, tableCount, adjacency_from, adjacency_to, adjacencyCount, queue,
visited, isInQueue, queuePosition, queueLength, nodeStats);
}
}
static
-void addNode1(char** adjacencyMatrix, NodeStat* nodeStats, CSset* freqCSset,
int root, char initial) {
- int queue[freqCSset->numCSadded]; // cyclic array
- int visited[freqCSset->numCSadded];
- int isInQueue[freqCSset->numCSadded];
+void addNode1(long int* adjacency_from, long int* adjacency_to, int
adjacencyCount, NodeStat* nodeStats, long int* table_id, int tableCount, int
root, char initial) {
+ int queue[tableCount]; // cyclic array
+ int visited[tableCount];
+ int isInQueue[tableCount];
int queuePosition; // next element in queue to view at
int queueLength;
int pathId, pathIdTmp;
int i;
// init
- for (i = 0; i < freqCSset->numCSadded; ++i) {
+ for (i = 0; i < tableCount; ++i) {
queue[i] = -1;
visited[i] = 0;
isInQueue[i] = 0;
@@ -227,11 +168,11 @@ void addNode1(char** adjacencyMatrix, No
assert(steps == i);
}
- bfs1(root, freqCSset, adjacencyMatrix, queue, visited, isInQueue,
&queuePosition, &queueLength, nodeStats);
+ bfs1(root, table_id, tableCount, adjacency_from, adjacency_to,
adjacencyCount, queue, visited, isInQueue, &queuePosition, &queueLength,
nodeStats);
}
-int* retrieval1(int root, int numNodesMax, int* numNodesActual, CSset*
freqCSset, CSmergeRel* csRelBetweenMergeFreqSet) {
- char **adjacencyMatrix = NULL;
+static
+int* retrieval1(int root, int numNodesMax, int* numNodesActual, long int*
table_id, str* table_name, long int* table_freq, int tableCount, long int*
adjacency_from, long int* adjacency_to, int adjacencyCount) {
NodeStat *nodeStats = NULL;
int numNodes;
int *chosenNodes = NULL;
@@ -245,21 +186,18 @@ int* retrieval1(int root, int numNodesMa
chosenNodes = (int *) malloc(sizeof(int) * numNodesMax);
if (!chosenNodes) fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
- adjacencyMatrix = initAdjacencyMatrix(freqCSset->numCSadded);
- createAdjacencyMatrix(adjacencyMatrix, freqCSset,
csRelBetweenMergeFreqSet);
- nodeStats = initNodeStats(freqCSset);
+ nodeStats = initNodeStats1(table_freq, tableCount);
numNodes = 1;
// add root node
- addNode1(adjacencyMatrix, nodeStats, freqCSset, root, 1);
+ addNode1(adjacency_from, adjacency_to, adjacencyCount, nodeStats,
table_id, tableCount, root, 1);
// add nodes
while (numNodes < numNodesMax) {
// get top node (highest fraction (weight/steps))
int top = -1;
- for (i = 0; i < freqCSset->numCSadded; ++i) {
+ for (i = 0; i < tableCount; ++i) {
int topWeight, testWeight;
- if (freqCSset->items[i].parentFreqIdx != -1) continue;
// ignore
if (nodeStats[i].steps == -1) continue; // non-reachable
if (nodeStats[i].steps == 0) continue; // already chosen
if (numNodes + nodeStats[i].steps > numNodesMax)
continue; // path too long
@@ -275,36 +213,30 @@ int* retrieval1(int root, int numNodesMa
}
if (top == -1) break; // not enough nodes found
numNodes += nodeStats[top].steps;
- addNode1(adjacencyMatrix, nodeStats, freqCSset, top, 0); // add
node(s)
+ addNode1(adjacency_from, adjacency_to, adjacencyCount,
nodeStats, table_id, tableCount, top, 0); // add node(s)
}
// store list of chosen nodes
printf("SUBSCHEMA:\n");
j = 0; // counter for chosenNodes[]
- for (i = 0; i < freqCSset->numCSadded; ++i) {
- if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore
+ for (i = 0; i < tableCount; ++i) {
if (nodeStats[i].steps != 0) continue; // non-chosen
chosenNodes[j++] = i;
- printf("CS "BUNFMT"\n", freqCSset->items[i].csId);
+ printf("CS %s\n", table_name[i]);
}
// statistics
- for (i = 0; i < freqCSset->numCSadded; ++i) {
- if (freqCSset->items[i].parentFreqIdx != -1) continue; // ignore
+ for (i = 0; i < tableCount; ++i) {
csCount += 1;
- sumSubjects += freqCSset->items[i].support;
+ sumSubjects += table_freq[i];
if (nodeStats[i].steps == 0) {
- sumChosenSubjects += freqCSset->items[i].support;
+ sumChosenSubjects += table_freq[i];
}
}
printf("COVERAGE:\n");
printf("%d out of %d (%f %%) using %d out of %d tables (%f %%)\n",
sumChosenSubjects, sumSubjects, (100.00 * sumChosenSubjects) / sumSubjects,
numNodes, csCount, (100.00 * numNodes) / csCount);
// free
- for (i = 0; i < freqCSset->numCSadded; ++i) {
- free(adjacencyMatrix[i]);
- }
- free(adjacencyMatrix);
free(nodeStats);
*numNodesActual = numNodes;
@@ -312,7 +244,25 @@ int* retrieval1(int root, int numNodesMa
}
static
-void assignWeightToChildren2(char** adjacencyMatrix, NodeStat* nodeStats,
CSset* freqCSset, int root) {
+NodeStat* initNodeStats23(long int* table_freq, int tableCount) {
+ NodeStat* nodeStats = NULL;
+ int i;
+
+ nodeStats = (NodeStat *) malloc(sizeof(NodeStat) * tableCount);
+ if (!nodeStats) fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
+
+ for (i = 0; i < tableCount; ++i) {
+ nodeStats[i].origWeight = table_freq[i];
+ nodeStats[i].weight = 0;
+ nodeStats[i].steps = -1; // not used
+ nodeStats[i].predecessor = 0; // not used
+ }
+
+ return nodeStats;
+}
+
+static
+void assignWeightToChildren2(long int* adjacency_from, long int* adjacency_to,
int adjacencyCount, NodeStat* nodeStats, long int* table_id, int tableCount,
int root) {
int i, j;
// mark root as a "chosen node"
@@ -320,12 +270,12 @@ void assignWeightToChildren2(char** adja
nodeStats[root].weight = 0;
// set summed weight for children
- for (i = 0; i < freqCSset->numCSadded; ++i) {
- if (adjacencyMatrix[root][i]) {
+ for (i = 0; i < tableCount; ++i) {
+ if (edgeExists(table_id[root], table_id[i], adjacency_from,
adjacency_to, adjacencyCount)) {
if (nodeStats[i].steps == 0) continue; // already in
list
nodeStats[i].weight = 0;
- for (j = 0; j < freqCSset->numCSadded; ++j) {
- if (adjacencyMatrix[i][j]) {
+ for (j = 0; j < tableCount; ++j) {
+ if (edgeExists(table_id[i], table_id[j],
adjacency_from, adjacency_to, adjacencyCount)) {
if (nodeStats[j].steps == 0) continue;
// already in list
nodeStats[i].weight +=
nodeStats[j].origWeight;
}
@@ -335,8 +285,8 @@ void assignWeightToChildren2(char** adja
}
}
-int* retrieval2(int root, int numNodesMax, int* numNodesActual, CSset*
freqCSset, CSmergeRel* csRelBetweenMergeFreqSet) {
- char **adjacencyMatrix = NULL;
+static
+int* retrieval2(int root, int numNodesMax, int* numNodesActual, long int*
table_id, str* table_name, long int* table_freq, int tableCount, long int*
adjacency_from, long int* adjacency_to, int adjacencyCount) {
NodeStat *nodeStats = NULL;
int numNodes;
int *chosenNodes = NULL;
@@ -350,20 +300,18 @@ int* retrieval2(int root, int numNodesMa
chosenNodes = (int *) malloc(sizeof(int) * numNodesMax);
if (!chosenNodes) fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
- adjacencyMatrix = initAdjacencyMatrix(freqCSset->numCSadded);
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list