Changeset: 660366c5d68d for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=660366c5d68d
Modified Files:
        monetdb5/extras/rdf/hashmap/hashmap.c
        monetdb5/extras/rdf/hashmap/hashmap.h
        monetdb5/extras/rdf/rdf_shredder.mx
        monetdb5/extras/rdf/rdfschema.c
Branch: rdf
Log Message:

Modify the log handler for Raptor parser


diffs (217 lines):

diff --git a/monetdb5/extras/rdf/hashmap/hashmap.c 
b/monetdb5/extras/rdf/hashmap/hashmap.c
--- a/monetdb5/extras/rdf/hashmap/hashmap.c
+++ b/monetdb5/extras/rdf/hashmap/hashmap.c
@@ -13,7 +13,8 @@
 /* We need to keep keys and values */
 typedef struct _hashmap_element{
        int* key;
-       int num;        
+       int num; 
+       int freq;       
        int in_use;
        any_t data;
 } hashmap_element;
@@ -171,6 +172,7 @@ int hashmap_put(map_t in, int* key, int 
        m->data[index].key = key;
        m->data[index].num = num; 
        m->data[index].in_use = 1;
+       m->data[index].freq = 1; 
        m->size++; 
 
        return MAP_OK;
@@ -179,7 +181,7 @@ int hashmap_put(map_t in, int* key, int 
 /*
  * Get your pointer out of the hashmap with a key
  */
-int hashmap_get(map_t in, int* key, int num,  any_t *arg){
+int hashmap_get(map_t in, int* key, int num,  any_t *arg, char 
isUpdateFrequency, int *retfreq){
        int curr;
        int i;
        hashmap_map* m;
@@ -193,14 +195,17 @@ int hashmap_get(map_t in, int* key, int 
        /* Linear probing, if necessary */
        for(i = 0; i<MAX_CHAIN_LENGTH; i++){
 
-        int in_use = m->data[curr].in_use;
-        if (in_use == 1){
-            if ((m->data[curr].num == num) && 
(intsetcmp(m->data[curr].key,key,num)==0)){
-                *arg = (m->data[curr].data);
-                return MAP_OK;
-            }
+               int in_use = m->data[curr].in_use;
+               if (in_use == 1){
+                       if ((m->data[curr].num == num) && 
(intsetcmp(m->data[curr].key,key,num)==0)){
+                               *arg = (m->data[curr].data);
+                               if (isUpdateFrequency == 1){
+                                       m->data[curr].freq++;
+                                       *retfreq = m->data[curr].freq; 
+                               }
+                               return MAP_OK;
+                       }
                }
-
                curr = (curr + 1) % m->table_size;
        }
 
@@ -210,6 +215,8 @@ int hashmap_get(map_t in, int* key, int 
        return MAP_MISSING;
 }
 
+
+
 /*
  * Iterate the function parameter over each element in the hashmap.  The
  * additional any_t argument is passed to the function as its first
@@ -239,6 +246,37 @@ int hashmap_iterate(map_t in, PFany f, a
 }
 
 /*
+ * This function is used for filtering the CSs only:
+ *
+ * Iterate over the hashmap and show the values from 
+ * all the hash element that has the frequent value > freqthreshold
+ *
+ * */
+
+int hashmap_iterate_threshold(map_t in, int freqthreshold){
+
+       int i;
+
+       /* Cast the hashmap */
+       hashmap_map* m = (hashmap_map*) in;
+
+       /* On empty hashmap, return immediately */
+       if (hashmap_length(m) <= 0)
+               return MAP_MISSING;     
+
+       /* Linear probing */
+       for(i = 0; i< m->table_size; i++)
+               if(m->data[i].in_use != 0) {
+                       if (m->data[i].freq > freqthreshold){
+                               //any_t data = (any_t) (m->data[i].data);
+                               
+                       }
+               }
+
+    return MAP_OK;
+}
+
+/*
  * Remove an element with that key from the map
  */
 int hashmap_remove(map_t in, int* key, int num){
@@ -261,6 +299,7 @@ int hashmap_remove(map_t in, int* key, i
                 /* Blank out the fields */
                 m->data[curr].in_use = 0;
                m->data[curr].num = 0; 
+               m->data[curr].freq = 1; 
                 m->data[curr].data = NULL;
                 m->data[curr].key = NULL;
 
diff --git a/monetdb5/extras/rdf/hashmap/hashmap.h 
b/monetdb5/extras/rdf/hashmap/hashmap.h
--- a/monetdb5/extras/rdf/hashmap/hashmap.h
+++ b/monetdb5/extras/rdf/hashmap/hashmap.h
@@ -51,6 +51,15 @@ extern map_t hashmap_new(void);
 extern int hashmap_iterate(map_t in, PFany f, any_t item);
 
 /*
+ * This function is used for filtering the CSs only:
+ *
+ * Iterate over the hashmap and show the values from 
+ * all the hash element that has the frequent value > freqthreshold
+ *
+ * */
+extern int hashmap_iterate_threshold(map_t in, int freqthreshold);
+
+/*
  * Add an element to the hashmap. Return MAP_OK or MAP_OMEM.
  */
 extern int hashmap_put(map_t in, int* key, int num,  any_t value);
@@ -58,7 +67,7 @@ extern int hashmap_put(map_t in, int* ke
 /*
  * Get an element from the hashmap. Return MAP_OK or MAP_MISSING.
  */
-extern int hashmap_get(map_t in, int* key, int num, any_t *arg);
+extern int hashmap_get(map_t in, int* key, int num, any_t *arg, char 
isUpdateFreq, int *retfreq);
 
 /*
  * Remove an element from the hashmap. Return MAP_OK or MAP_MISSING.
diff --git a/monetdb5/extras/rdf/rdf_shredder.mx 
b/monetdb5/extras/rdf/rdf_shredder.mx
--- a/monetdb5/extras/rdf/rdf_shredder.mx
+++ b/monetdb5/extras/rdf/rdf_shredder.mx
@@ -108,20 +108,20 @@ raptor_parser_parse_abort (@1->rparser);
 
 @= rdf_parser_handler
 static void
-@1Handler (void *user_data, raptor_locator* locator,
-               const char *message)
+@1Handler (void *user_data, raptor_log_message* message)
 {
-       parserData *pdata = (parserData *) user_data;
-       pdata->@1Msg = GDKstrdup(message);
+       parserData *pdata = ((parserData *) user_data);
+       pdata->@1Msg = GDKstrdup(message->text);
        mnstr_printf(GDKout, "rdflib: @1:%s\n", pdata->@1Msg);
        pdata->@1++;
 
        /* check for a valid locator object and only then use it */
-       if (locator != NULL) {
-               pdata->line = locator->line;
-               pdata->column = locator->column;
-       } else {
-       }
+       if (message->locator != NULL) {
+               pdata->line = message->locator->line;
+               pdata->column = message->locator->column;
+               mnstr_printf(GDKout, "rdflib: @1: at line %d column %d\n", 
pdata->line, pdata->column);
+       } 
+       
 }
 
 @
@@ -563,6 +563,7 @@ RDFParser (BAT **graph, str *location, s
        @:set_handlers(world, rparser, pdata)@
 
        //raptor_parser_set_option(rparser, 0); //MDPHAM: CHECK FOR THIS SETTING
+       raptor_parser_set_option(rparser, RAPTOR_OPTION_SCANNING);
 
        /* Parse URI or local file. */
        ret = URLisaURL(&isURI, location);
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -44,11 +44,12 @@ static void putCStoHash(map_t csmap, int
        oid     *getCSoid; 
        oid     *putCSoid; 
        int     err; 
-       int* cs; 
+       int*    cs; 
+       int     freq; 
 
        cs = (int*) malloc(sizeof(int) * num);
        copyIntSet(cs, buff, num); 
-       if (hashmap_get(csmap, cs, num,(void**)(&getCSoid)) != MAP_OK){
+       if (hashmap_get(csmap, cs, num,(void**)(&getCSoid),1, &freq) != MAP_OK){
                putCSoid = malloc(sizeof(oid)); 
                *putCSoid = *csoid; 
 
@@ -60,12 +61,18 @@ static void putCStoHash(map_t csmap, int
                (*csoid)++; 
        }
        else{
-               printf("The key %d exists in the hashmap \n", (int) *getCSoid);
+               printf("The key %d exists in the hashmap with freq %d \n", 
(int) *getCSoid, freq);
                free(cs); 
 
        }
 }
 
+/*
+static void getTopFreqCSs(map_t csmap){
+       
+}
+*/
+
 str
 RDFextractCS(int *ret, bat *sbatid, bat *pbatid){
        BUN     p, q; 
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to