Changeset: 802c6a1cd0ba for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=802c6a1cd0ba
Modified Files:
monetdb5/extras/rdf/hashmap/hashmap.c
monetdb5/extras/rdf/hashmap/hashmap.h
monetdb5/extras/rdf/rdf_shredder.mx
monetdb5/extras/rdf/rdfschema.c
Branch: rdf
Log Message:
Add function for checking the number of frequent CSs
diffs (213 lines):
diff --git a/monetdb5/extras/rdf/hashmap/hashmap.c
b/monetdb5/extras/rdf/hashmap/hashmap.c
--- a/monetdb5/extras/rdf/hashmap/hashmap.c
+++ b/monetdb5/extras/rdf/hashmap/hashmap.c
@@ -10,22 +10,7 @@
#define INITIAL_SIZE (256)
#define MAX_CHAIN_LENGTH (8)
-/* We need to keep keys and values */
-typedef struct _hashmap_element{
- int* key;
- int num;
- int freq;
- int in_use;
- any_t data;
-} hashmap_element;
-/* A hashmap has some maximum size and current size,
- * as well as the data to hold. */
-typedef struct _hashmap_map{
- int table_size;
- int size;
- hashmap_element *data;
-} hashmap_map;
/*
* Return an empty hashmap, or NULL on failure.
@@ -256,10 +241,11 @@ int hashmap_iterate(map_t in, PFany f, a
int hashmap_iterate_threshold(map_t in, int freqthreshold){
int i;
+ int count = 0;
/* Cast the hashmap */
hashmap_map* m = (hashmap_map*) in;
-
+
/* On empty hashmap, return immediately */
if (hashmap_length(m) <= 0)
return MAP_MISSING;
@@ -269,11 +255,11 @@ int hashmap_iterate_threshold(map_t in,
if(m->data[i].in_use != 0) {
if (m->data[i].freq > freqthreshold){
//any_t data = (any_t) (m->data[i].data);
-
+ count++;
}
}
- return MAP_OK;
+ return count;
}
/*
diff --git a/monetdb5/extras/rdf/hashmap/hashmap.h
b/monetdb5/extras/rdf/hashmap/hashmap.h
--- a/monetdb5/extras/rdf/hashmap/hashmap.h
+++ b/monetdb5/extras/rdf/hashmap/hashmap.h
@@ -36,6 +36,23 @@ typedef int (*PFany)(any_t, any_t);
*/
typedef any_t map_t;
+/* We need to keep keys and values */
+typedef struct _hashmap_element{
+ int* key;
+ int num;
+ int freq;
+ int in_use;
+ any_t data;
+} hashmap_element;
+
+/* A hashmap has some maximum size and current size,
+ * as well as the data to hold. */
+typedef struct _hashmap_map{
+ int table_size;
+ int size;
+ hashmap_element *data;
+} hashmap_map;
+
/*
* Return an empty hashmap. Returns NULL if empty.
*/
diff --git a/monetdb5/extras/rdf/rdf_shredder.mx
b/monetdb5/extras/rdf/rdf_shredder.mx
--- a/monetdb5/extras/rdf/rdf_shredder.mx
+++ b/monetdb5/extras/rdf/rdf_shredder.mx
@@ -563,7 +563,7 @@ RDFParser (BAT **graph, str *location, s
@:set_handlers(world, rparser, pdata)@
//raptor_parser_set_option(rparser, 0); //MDPHAM: CHECK FOR THIS SETTING
- raptor_parser_set_option(rparser, RAPTOR_OPTION_SCANNING);
+ //raptor_parser_set_option(rparser, RAPTOR_OPTION_SCANNING);
/* Parse URI or local file. */
ret = URLisaURL(&isURI, location);
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -48,6 +48,11 @@ static void putCStoHash(map_t csmap, int
int freq;
cs = (int*) malloc(sizeof(int) * num);
+ if (cs==NULL){
+ printf("Malloc failed. at %d", num);
+ exit(-1);
+ }
+
copyIntSet(cs, buff, num);
if (hashmap_get(csmap, cs, num,(void**)(&getCSoid),1, &freq) != MAP_OK){
putCSoid = malloc(sizeof(oid));
@@ -56,22 +61,30 @@ static void putCStoHash(map_t csmap, int
err = hashmap_put(csmap, cs, num, putCSoid);
assert(err == MAP_OK);
- printf("Put CS %d into hashmap \n", (int) *putCSoid);
+ //printf("Put CS %d into hashmap \n", (int) *putCSoid);
(*csoid)++;
}
else{
- printf("The key %d exists in the hashmap with freq %d \n",
(int) *getCSoid, freq);
+ //printf("The key %d exists in the hashmap with freq %d \n",
(int) *getCSoid, freq);
free(cs);
}
}
-/*
-static void getTopFreqCSs(map_t csmap){
-
+
+static void getTopFreqCSs(map_t csmap, int threshold){
+ int count;
+ hashmap_map* m;
+ count = hashmap_iterate_threshold(csmap, threshold);
+ m = (hashmap_map *) csmap;
+ printf("Threshold: %d \n ", threshold);
+ printf("Number of frequent CSs %d / Number of CSs %d (Table size: %d)
\n" , count, m->size, m->table_size);
+
+ return;
+
}
-*/
+
str
RDFextractCS(int *ret, bat *sbatid, bat *pbatid){
@@ -80,11 +93,12 @@ RDFextractCS(int *ret, bat *sbatid, bat
BATiter si, pi; /*iterator for BAT of s,p columns in spo table
*/
oid *bt, *pbt;
oid curS; /* current Subject oid */
+ oid curP; /* current Property oid */
oid CSoid = 0; /* Characteristic set oid */
int numP; /* Number of properties for current S */
map_t csMap;
int* buff;
- int INIT_PROPERTY_NUM = 256;
+ int INIT_PROPERTY_NUM = 50000;
buff = (int *) malloc (sizeof(int) * INIT_PROPERTY_NUM);
@@ -100,27 +114,50 @@ RDFextractCS(int *ret, bat *sbatid, bat
/* Init a hashmap */
csMap = hashmap_new();
- numP = 0;
+ numP = 0;
+ curP = 0;
BATloop(sbat, p, q){
bt = (oid *) BUNtloc(si, p);
if (*bt != curS){
if (p != 0){ /* Not the first S */
putCStoHash(csMap, buff, numP, &CSoid);
+
}
curS = *bt;
+ curP = 0;
numP = 0;
}
pbt = (oid *) BUNtloc(pi, p);
- buff[numP] = *pbt;
- numP++;
- printf("Travel sbat at %d value: %d , for pbat: %d \n", (int)
p, (int) *bt, (int) *pbt);
+
+ if (numP > INIT_PROPERTY_NUM){
+ printf("# of properties %d is greater than
INIT_PROPERTY_NUM at CS %d property %d \n", numP, (int)CSoid, (int)*pbt);
+ exit(-1);
+ }
+
+ if (curP != *pbt){
+ buff[numP] = *pbt;
+ numP++;
+ curP = *pbt;
+ }
+ //printf("Travel sbat at %d value: %d , for pbat: %d \n",
(int) p, (int) *bt, (int) *pbt);
}
/*put the last CS */
putCStoHash(csMap, buff, numP, &CSoid);
+ /*get the statistic */
+
+ getTopFreqCSs(csMap,20);
+
+ getTopFreqCSs(csMap,10);
+
+ getTopFreqCSs(csMap,5);
+
+ getTopFreqCSs(csMap,2);
+
+
BBPreclaim(sbat);
BBPreclaim(pbat);
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list