Changeset: 47eb36345602 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=47eb36345602
Added Files:
        monetdb5/extras/rdf/rdfminheap.c
        monetdb5/extras/rdf/rdfminheap.h
Modified Files:
        monetdb5/extras/rdf/Makefile.ag
        monetdb5/extras/rdf/rdfschema.c
Branch: rdf
Log Message:

Implement multi-way merging for the list of properties.

Using a min Heap for this multi-way merging.


diffs (truncated from 402 to 300 lines):

diff --git a/monetdb5/extras/rdf/Makefile.ag b/monetdb5/extras/rdf/Makefile.ag
--- a/monetdb5/extras/rdf/Makefile.ag
+++ b/monetdb5/extras/rdf/Makefile.ag
@@ -32,7 +32,7 @@ lib__rdf = {
        #MODULE
        NOINST
        #DIR = libdir/monetdb5
-       SOURCES = rdf.h rdfschema.h rdflabels.h rdfretrieval.h rdfparser.h 
rdfparser.c rdfontologyload.h rdfontologyload.c rdf_shredder.c rdfalgebra.c 
rdfschema.c rdflabels.c rdfretrieval.c 
+       SOURCES = rdf.h rdfschema.h rdfminheap.h rdfminheap.c rdflabels.h 
rdfretrieval.h rdfparser.h rdfparser.c rdfontologyload.h rdfontologyload.c 
rdf_shredder.c rdfalgebra.c rdfschema.c rdflabels.c rdfretrieval.c  
 
        #SEP = _
        # LIBS =  ./hashmap/librdfhash  
diff --git a/monetdb5/extras/rdf/rdfminheap.c b/monetdb5/extras/rdf/rdfminheap.c
new file mode 100644
--- /dev/null
+++ b/monetdb5/extras/rdf/rdfminheap.c
@@ -0,0 +1,211 @@
+/*
+ * The contents of this file are subject to the MonetDB Public License
+ * Version 1.1 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ * http://www.monetdb.org/Legal/MonetDBLicense
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * The Original Code is the MonetDB Database System.
+ *
+ * The Initial Developer of the Original Code is CWI.
+ * Portions created by CWI are Copyright (C) 1997-July 2008 CWI.
+ * Copyright August 2008-2013 MonetDB B.V.
+ * All Rights Reserved.
+ */
+
+#include "monetdb_config.h"
+#include "rdf.h"
+#include "tokenizer.h"
+#include <math.h>
+#include "rdfminheap.h"
+
+
+//to get index of left child of node at index i
+int leftchild(int i) { 
+       return (2*i + 1); 
+}
+
+//to get index of right child of node at index i
+int rightchild(int i) { 
+       return (2*i + 2); 
+}
+
+//to get the root
+MinHeapNode getMin(MinHeap *minHeap) { 
+       return minHeap->harr[0];
+}
+
+// A utility function to swap two elements
+void swap(MinHeapNode *x, MinHeapNode *y){
+       MinHeapNode temp = *x;  *x = *y;  *y = temp;
+}
+
+
+//to replace root with new node x and heapify() new root
+void replaceMin(MinHeap *minHeap, MinHeapNode x){ 
+       minHeap->harr[0] = x;  
+       MinHeapify(minHeap, 0); 
+}
+
+// This method assumes that the subtrees are already heapified
+void MinHeapify(MinHeap *minHeap, int i)
+{
+       int l = leftchild(i);
+       int r = rightchild(i);
+       int smallest = i;
+       if (l < minHeap->heap_size && minHeap->harr[l].element < 
minHeap->harr[i].element)
+               smallest = l;
+       if (r < minHeap->heap_size && minHeap->harr[r].element < 
minHeap->harr[smallest].element)
+               smallest = r;
+       if (smallest != i){
+               swap(&minHeap->harr[i], &minHeap->harr[smallest]);
+               MinHeapify(minHeap, smallest);
+       }
+}
+
+void initMinHeap(MinHeap *minHeap, MinHeapNode *a, int size)
+{
+       int i; 
+       minHeap->heap_size = size;
+       minHeap->harr = a;  // store address of array
+       i = (minHeap->heap_size - 1)/2;
+       while (i >= 0)
+       {
+               MinHeapify(minHeap, i);
+               i--;
+       }
+}
+ 
+
+/* This function takes an array of arrays as an argument and
+All arrays are assumed to be sorted. It merges them together
+and prints the final sorted output. */
+
+int *mergeKArrays(int **arr, int k)
+{
+       int i;
+       int count; 
+       int n = 4; 
+       int *output = (int*) malloc (sizeof(int) *n*k); //To store output array
+       MinHeap *hp;
+
+       //Create a min heap with k heap nodes.  Every heap node
+       //has first element of an array
+       MinHeapNode *harr = (MinHeapNode*)malloc(sizeof(MinHeapNode) * k);
+       for (i = 0; i < k; i++)
+       {
+               harr[i].element = arr[i][0]; //Store the first element
+               harr[i].i = i; //index of array
+               harr[i].j = 1; //Index of next element to be stored from array
+       }
+
+       hp = (MinHeap *) malloc(sizeof(MinHeap)); 
+       initMinHeap(hp, harr, k);  //Create the heap
+
+       //Now one by one get the minimum element from min
+       //heap and replace it with next element of its array
+       for (count = 0; count < n*k; count++)
+       {
+               //Get the minimum element and store it in output
+               MinHeapNode root = getMin(hp);
+               output[count] = root.element;
+
+               //Find the next elelement that will replace current
+               //root of heap. The next element belongs to same
+               //array as the current root.
+               if (root.j < n)
+               {
+                       root.element = arr[root.i][root.j];
+                       root.j += 1;
+               }
+               //If root was the last element of its array
+               else root.element =  INT_MAX; //INT_MAX is for infinite
+
+               //Replace root with next element of array
+               replaceMin(hp, root);
+       }
+
+       return output;
+}
+
+/* Merge multi property lists in freqCSset. This is used for forming mergeCS */
+#define INIT_MERGELIST_SIZE 1000 
+
+oid* mergeMultiPropList(CSset *freqCSset, int *freqIdList, int k, int 
*numCombinedP)
+{
+       int i;
+       //int j;
+       MinHeap *hp;
+       int freqIdx; 
+       MinHeapNode *harr;
+
+       oid *output = (oid *) malloc(sizeof(oid) * INIT_MERGELIST_SIZE); // 
Output array
+
+       /*
+       printf("Input list: \n");
+       for (i = 0; i < k; i++){
+               printf(" List %d: ",i);
+               freqIdx = freqIdList[i];
+               for (j = 0; j < freqCSset->items[freqIdx].numProp; j++){
+                       printf("  "BUNFMT, 
freqCSset->items[freqIdx].lstProp[j]);
+               }
+               printf("\n");
+       }
+       */
+
+       //Create a min heap with k heap nodes.  Every heap node
+       //has first element of an array
+       harr = (MinHeapNode*)malloc(sizeof(MinHeapNode) * k);
+       for (i = 0; i < k; i++)
+       {
+               harr[i].element = freqCSset->items[freqIdList[i]].lstProp[0]; 
//Store the first element
+               harr[i].i = i; //index of array
+               harr[i].j = 1; //Index of next element to be stored from array
+       }
+
+       hp = (MinHeap *) malloc(sizeof(MinHeap)); 
+       initMinHeap(hp, harr, k);  //Create the heap
+
+       //Now one by one get the minimum element from min
+       //heap and replace it with next element of its array
+       *numCombinedP = 0;
+       while (1)
+       {
+               //Get the minimum element and store it in output
+               MinHeapNode root = getMin(hp);
+               if (root.element == INT_MAX) break; 
+               
+               if (output[*numCombinedP - 1] != root.element){         //Only 
append the distinct prop to combined list
+                       output[*numCombinedP] = root.element;
+                       (*numCombinedP)++;
+               }
+
+               //Find the next elelement that will replace current
+               //root of heap. The next element belongs to same
+               //array as the current root.
+               freqIdx = freqIdList[root.i]; 
+               if (root.j < freqCSset->items[freqIdx].numProp)
+               {
+                       root.element = 
freqCSset->items[freqIdx].lstProp[root.j];
+                       root.j += 1;
+               }
+               //If root was the last element of its array
+               else root.element =  INT_MAX; //INT_MAX is for infinite
+
+               //Replace root with next element of array
+               replaceMin(hp, root);
+       }
+       
+       /*
+       printf("Output merge propList (%d) : \n", *numCombinedP);
+       for (i = 0; i < *numCombinedP; i++){
+               printf(BUNFMT "  ",output[i]);
+       }
+       printf("\n");
+       */
+       return output;
+}
diff --git a/monetdb5/extras/rdf/rdfminheap.h b/monetdb5/extras/rdf/rdfminheap.h
new file mode 100644
--- /dev/null
+++ b/monetdb5/extras/rdf/rdfminheap.h
@@ -0,0 +1,67 @@
+/*
+ * The contents of this file are subject to the MonetDB Public License
+ * Version 1.1 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ * http://www.monetdb.org/Legal/MonetDBLicense
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * The Original Code is the MonetDB Database System.
+ *
+ * The Initial Developer of the Original Code is CWI.
+ * Portions created by CWI are Copyright (C) 1997-July 2008 CWI.
+ * Copyright August 2008-2013 MonetDB B.V.
+ * All Rights Reserved.
+ */
+
+#ifndef _MINHEAP_H_
+#define _MINHEAP_H_
+
+#include "rdfschema.h"
+
+//A min heap node
+typedef struct MinHeapNode
+{
+       oid element;    //The element to be stored
+       int i;          //index of the array from which the element is taken
+       int j;          //index of the next element to be picked from array
+} MinHeapNode;
+
+
+//A struct for Min Heap
+typedef struct MinHeap
+{
+       MinHeapNode *harr;  //pointer to array of elements in heap
+       int heap_size;  //size of min heap
+} MinHeap; 
+
+//to get index of left child of node at index i
+rdf_export int leftchild(int i);
+
+//to get index of right child of node at index i
+rdf_export int rightchild(int i);
+
+//to get the root
+rdf_export MinHeapNode getMin(MinHeap *minHeap);
+
+// A utility function to swap two elements
+rdf_export void swap(MinHeapNode *x, MinHeapNode *y);
+
+
+// This method assumes that the subtrees are already heapified
+rdf_export void MinHeapify(MinHeap *minHeap, int i);
+
+//to replace root with new node x and heapify() new root
+rdf_export void replaceMin(MinHeap *minHeap, MinHeapNode x);
+
+
+rdf_export void initMinHeap(MinHeap *minHeap, MinHeapNode *a, int size);
+ 
+rdf_export int* mergeKArrays(int **arr, int k);
+
+rdf_export oid* mergeMultiPropList(CSset *freqCSset, int *freqIdList, int num, 
int *numCombinedP);  /* Merge the lstProp from multi freqCSs */
+
+#endif /* _MINHEAP_H_ */
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to