Changeset: 47eb36345602 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=47eb36345602
Added Files:
monetdb5/extras/rdf/rdfminheap.c
monetdb5/extras/rdf/rdfminheap.h
Modified Files:
monetdb5/extras/rdf/Makefile.ag
monetdb5/extras/rdf/rdfschema.c
Branch: rdf
Log Message:
Implement multi-way merging for the list of properties.
Using a min Heap for this multi-way merging.
diffs (truncated from 402 to 300 lines):
diff --git a/monetdb5/extras/rdf/Makefile.ag b/monetdb5/extras/rdf/Makefile.ag
--- a/monetdb5/extras/rdf/Makefile.ag
+++ b/monetdb5/extras/rdf/Makefile.ag
@@ -32,7 +32,7 @@ lib__rdf = {
#MODULE
NOINST
#DIR = libdir/monetdb5
- SOURCES = rdf.h rdfschema.h rdflabels.h rdfretrieval.h rdfparser.h
rdfparser.c rdfontologyload.h rdfontologyload.c rdf_shredder.c rdfalgebra.c
rdfschema.c rdflabels.c rdfretrieval.c
+ SOURCES = rdf.h rdfschema.h rdfminheap.h rdfminheap.c rdflabels.h
rdfretrieval.h rdfparser.h rdfparser.c rdfontologyload.h rdfontologyload.c
rdf_shredder.c rdfalgebra.c rdfschema.c rdflabels.c rdfretrieval.c
#SEP = _
# LIBS = ./hashmap/librdfhash
diff --git a/monetdb5/extras/rdf/rdfminheap.c b/monetdb5/extras/rdf/rdfminheap.c
new file mode 100644
--- /dev/null
+++ b/monetdb5/extras/rdf/rdfminheap.c
@@ -0,0 +1,211 @@
+/*
+ * The contents of this file are subject to the MonetDB Public License
+ * Version 1.1 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ * http://www.monetdb.org/Legal/MonetDBLicense
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * The Original Code is the MonetDB Database System.
+ *
+ * The Initial Developer of the Original Code is CWI.
+ * Portions created by CWI are Copyright (C) 1997-July 2008 CWI.
+ * Copyright August 2008-2013 MonetDB B.V.
+ * All Rights Reserved.
+ */
+
+#include "monetdb_config.h"
+#include "rdf.h"
+#include "tokenizer.h"
+#include <math.h>
+#include "rdfminheap.h"
+
+
+//to get index of left child of node at index i
+int leftchild(int i) {
+ return (2*i + 1);
+}
+
+//to get index of right child of node at index i
+int rightchild(int i) {
+ return (2*i + 2);
+}
+
+//to get the root
+MinHeapNode getMin(MinHeap *minHeap) {
+ return minHeap->harr[0];
+}
+
+// A utility function to swap two elements
+void swap(MinHeapNode *x, MinHeapNode *y){
+ MinHeapNode temp = *x; *x = *y; *y = temp;
+}
+
+
+//to replace root with new node x and heapify() new root
+void replaceMin(MinHeap *minHeap, MinHeapNode x){
+ minHeap->harr[0] = x;
+ MinHeapify(minHeap, 0);
+}
+
+// This method assumes that the subtrees are already heapified
+void MinHeapify(MinHeap *minHeap, int i)
+{
+ int l = leftchild(i);
+ int r = rightchild(i);
+ int smallest = i;
+ if (l < minHeap->heap_size && minHeap->harr[l].element <
minHeap->harr[i].element)
+ smallest = l;
+ if (r < minHeap->heap_size && minHeap->harr[r].element <
minHeap->harr[smallest].element)
+ smallest = r;
+ if (smallest != i){
+ swap(&minHeap->harr[i], &minHeap->harr[smallest]);
+ MinHeapify(minHeap, smallest);
+ }
+}
+
+void initMinHeap(MinHeap *minHeap, MinHeapNode *a, int size)
+{
+ int i;
+ minHeap->heap_size = size;
+ minHeap->harr = a; // store address of array
+ i = (minHeap->heap_size - 1)/2;
+ while (i >= 0)
+ {
+ MinHeapify(minHeap, i);
+ i--;
+ }
+}
+
+
+/* This function takes an array of arrays as an argument and
+All arrays are assumed to be sorted. It merges them together
+and prints the final sorted output. */
+
+int *mergeKArrays(int **arr, int k)
+{
+ int i;
+ int count;
+ int n = 4;
+ int *output = (int*) malloc (sizeof(int) *n*k); //To store output array
+ MinHeap *hp;
+
+ //Create a min heap with k heap nodes. Every heap node
+ //has first element of an array
+ MinHeapNode *harr = (MinHeapNode*)malloc(sizeof(MinHeapNode) * k);
+ for (i = 0; i < k; i++)
+ {
+ harr[i].element = arr[i][0]; //Store the first element
+ harr[i].i = i; //index of array
+ harr[i].j = 1; //Index of next element to be stored from array
+ }
+
+ hp = (MinHeap *) malloc(sizeof(MinHeap));
+ initMinHeap(hp, harr, k); //Create the heap
+
+ //Now one by one get the minimum element from min
+ //heap and replace it with next element of its array
+ for (count = 0; count < n*k; count++)
+ {
+ //Get the minimum element and store it in output
+ MinHeapNode root = getMin(hp);
+ output[count] = root.element;
+
+ //Find the next elelement that will replace current
+ //root of heap. The next element belongs to same
+ //array as the current root.
+ if (root.j < n)
+ {
+ root.element = arr[root.i][root.j];
+ root.j += 1;
+ }
+ //If root was the last element of its array
+ else root.element = INT_MAX; //INT_MAX is for infinite
+
+ //Replace root with next element of array
+ replaceMin(hp, root);
+ }
+
+ return output;
+}
+
+/* Merge multi property lists in freqCSset. This is used for forming mergeCS */
+#define INIT_MERGELIST_SIZE 1000
+
+oid* mergeMultiPropList(CSset *freqCSset, int *freqIdList, int k, int
*numCombinedP)
+{
+ int i;
+ //int j;
+ MinHeap *hp;
+ int freqIdx;
+ MinHeapNode *harr;
+
+ oid *output = (oid *) malloc(sizeof(oid) * INIT_MERGELIST_SIZE); //
Output array
+
+ /*
+ printf("Input list: \n");
+ for (i = 0; i < k; i++){
+ printf(" List %d: ",i);
+ freqIdx = freqIdList[i];
+ for (j = 0; j < freqCSset->items[freqIdx].numProp; j++){
+ printf(" "BUNFMT,
freqCSset->items[freqIdx].lstProp[j]);
+ }
+ printf("\n");
+ }
+ */
+
+ //Create a min heap with k heap nodes. Every heap node
+ //has first element of an array
+ harr = (MinHeapNode*)malloc(sizeof(MinHeapNode) * k);
+ for (i = 0; i < k; i++)
+ {
+ harr[i].element = freqCSset->items[freqIdList[i]].lstProp[0];
//Store the first element
+ harr[i].i = i; //index of array
+ harr[i].j = 1; //Index of next element to be stored from array
+ }
+
+ hp = (MinHeap *) malloc(sizeof(MinHeap));
+ initMinHeap(hp, harr, k); //Create the heap
+
+ //Now one by one get the minimum element from min
+ //heap and replace it with next element of its array
+ *numCombinedP = 0;
+ while (1)
+ {
+ //Get the minimum element and store it in output
+ MinHeapNode root = getMin(hp);
+ if (root.element == INT_MAX) break;
+
+ if (output[*numCombinedP - 1] != root.element){ //Only
append the distinct prop to combined list
+ output[*numCombinedP] = root.element;
+ (*numCombinedP)++;
+ }
+
+ //Find the next elelement that will replace current
+ //root of heap. The next element belongs to same
+ //array as the current root.
+ freqIdx = freqIdList[root.i];
+ if (root.j < freqCSset->items[freqIdx].numProp)
+ {
+ root.element =
freqCSset->items[freqIdx].lstProp[root.j];
+ root.j += 1;
+ }
+ //If root was the last element of its array
+ else root.element = INT_MAX; //INT_MAX is for infinite
+
+ //Replace root with next element of array
+ replaceMin(hp, root);
+ }
+
+ /*
+ printf("Output merge propList (%d) : \n", *numCombinedP);
+ for (i = 0; i < *numCombinedP; i++){
+ printf(BUNFMT " ",output[i]);
+ }
+ printf("\n");
+ */
+ return output;
+}
diff --git a/monetdb5/extras/rdf/rdfminheap.h b/monetdb5/extras/rdf/rdfminheap.h
new file mode 100644
--- /dev/null
+++ b/monetdb5/extras/rdf/rdfminheap.h
@@ -0,0 +1,67 @@
+/*
+ * The contents of this file are subject to the MonetDB Public License
+ * Version 1.1 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ * http://www.monetdb.org/Legal/MonetDBLicense
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * The Original Code is the MonetDB Database System.
+ *
+ * The Initial Developer of the Original Code is CWI.
+ * Portions created by CWI are Copyright (C) 1997-July 2008 CWI.
+ * Copyright August 2008-2013 MonetDB B.V.
+ * All Rights Reserved.
+ */
+
+#ifndef _MINHEAP_H_
+#define _MINHEAP_H_
+
+#include "rdfschema.h"
+
+//A min heap node
+typedef struct MinHeapNode
+{
+ oid element; //The element to be stored
+ int i; //index of the array from which the element is taken
+ int j; //index of the next element to be picked from array
+} MinHeapNode;
+
+
+//A struct for Min Heap
+typedef struct MinHeap
+{
+ MinHeapNode *harr; //pointer to array of elements in heap
+ int heap_size; //size of min heap
+} MinHeap;
+
+//to get index of left child of node at index i
+rdf_export int leftchild(int i);
+
+//to get index of right child of node at index i
+rdf_export int rightchild(int i);
+
+//to get the root
+rdf_export MinHeapNode getMin(MinHeap *minHeap);
+
+// A utility function to swap two elements
+rdf_export void swap(MinHeapNode *x, MinHeapNode *y);
+
+
+// This method assumes that the subtrees are already heapified
+rdf_export void MinHeapify(MinHeap *minHeap, int i);
+
+//to replace root with new node x and heapify() new root
+rdf_export void replaceMin(MinHeap *minHeap, MinHeapNode x);
+
+
+rdf_export void initMinHeap(MinHeap *minHeap, MinHeapNode *a, int size);
+
+rdf_export int* mergeKArrays(int **arr, int k);
+
+rdf_export oid* mergeMultiPropList(CSset *freqCSset, int *freqIdList, int num,
int *numCombinedP); /* Merge the lstProp from multi freqCSs */
+
+#endif /* _MINHEAP_H_ */
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list