Changeset: 6c62d2d5ac29 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=6c62d2d5ac29
Modified Files:
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Remove the use of prefix tree since it doesn't help in efficiently finding 
maximum FreqCSs.


diffs (163 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -1995,20 +1995,121 @@ void getMaximumFreqCSs(CSset *freqCSset,
 
 }
 
-static 
+static int 
+trie_insertwithFreqCS(struct trie_node* root, oid* key, int key_len, int val, 
CSset *freqCSset)
+{
+
+       int i, found_child;
+       int     returnvalue; 
+
+       struct trie_node* curr_node = NULL;
+       struct trie_node* new_node = NULL;
+       struct trie_node* iter = NULL;
+
+       /*
+       printf("Insert: \n");
+       for (i = 0; i < key_len; i++){
+               printf(BUNFMT " ", key[i]);
+       }
+       printf("\n");
+       */
+
+
+       /* Negative values nor NULL keys are allowed */
+       if(val < 0 || root == NULL)
+               return -1; 
+
+       curr_node = root;
+
+       /* iterates over all key's elements. For each one,
+       tries to advance in the trie reusing the path. When 
+       the reusable part ends, start to add new nodes.
+       */
+
+       for(i=0; i <= key_len; i++){ 
+               if(i == key_len){
+                       returnvalue = curr_node->value;
+                       if(curr_node->children != NULL){
+                               // Go to the leaf of this path
+                               while(curr_node->children != NULL)
+                                       curr_node = curr_node->children; 
+                               
+
+                               freqCSset->items[val].parentFreqIdx = 
curr_node->value;
+                       }
+                       return returnvalue;
+               }
+       
+               found_child = 0; 
+               for(iter=curr_node->children; iter != NULL; iter=iter->right)
+               { 
+                       if(iter->key == key[i])
+                       {
+                               found_child = 1;
+                               curr_node = iter;
+                               if (iter->value != TRIE_PATH){
+                                       
freqCSset->items[iter->value].parentFreqIdx = val; 
+                               }
+                               break;
+                       }
+               }
+
+               /* Adds a new node on the trie */
+               if(!found_child){       
+                       new_node = malloc(sizeof(struct trie_node));
+                       new_node->key = key[i];
+                       /*If we are in the end of key, this node should get the
+                       value*/
+                       new_node->value = i == key_len-1 ? val:TRIE_PATH;       
+                       new_node->parent = curr_node;
+                       new_node->children = NULL;              // DUC: CHECK. 
Force the children of newnode to be NULL
+                       /*Updates the children linked list*/
+                       new_node->left = NULL;
+                       new_node->right = curr_node->children;  
+                       if(curr_node->children != NULL)
+                               curr_node->children->left = new_node;
+       
+                       curr_node->children = new_node;         
+       
+                       /*Next loop iteration consider the new node*/
+                       curr_node = new_node; 
+               }
+               else {
+                       if(i == key_len -1)
+                               assert(curr_node->key == key[i]);
+                               //curr_node->key = key[i];
+               }
+       }
+
+       return 1;
+}
+
 void createTreeForCSset(CSset *freqCSset){
        struct trie_node* root;
        int     i;      
        int     numFreqCS;
+       int     numMaxCS = 0; 
+       int     numLeaf = 0; 
+
        
        printf("Build tree from frequent CSs \n");
        init_trie(&root);
        numFreqCS = freqCSset->numCSadded;
 
        for(i = 0; i < numFreqCS; i++){
-               trie_insert(root,freqCSset->items[i].lstProp, 
freqCSset->items[i].numProp,i);
+               trie_insertwithFreqCS(root,freqCSset->items[i].lstProp, 
freqCSset->items[i].numProp,i,freqCSset);
        }
-       
+
+       count_trieleaf(&root, &numLeaf);
+       printf("Num of leaf is %d \n", numLeaf);
+
+       for(i = 0; i < numFreqCS; i++){
+               if (freqCSset->items[i].parentFreqIdx == -1){
+                       numMaxCS++;
+               }
+       }
+               
+       printf("Num max freqCSs explored from prefix tree: %d \n", numMaxCS);
        delete_trie(&root);
 }
 
@@ -3059,13 +3160,13 @@ RDFextractCSwithTypes(int *ret, bat *sba
 
        printf("Number of frequent CSs is: %d \n", freqCSset->numCSadded);
 
-       createTreeForCSset(freqCSset); 
+       //createTreeForCSset(freqCSset);        // DOESN'T HELP --> REMOVE
+       
+       //curT = clock(); 
+       //printf (" -----       Create tree for frequent CSs took  %f 
seconds.\n", ((float)(curT - tmpLastT))/CLOCKS_PER_SEC);
+       //tmpLastT = curT;              
+
        /*get the statistic */
-       
-       curT = clock(); 
-       printf (" ----- Create tree for frequent CSs took  %f seconds.\n", 
((float)(curT - tmpLastT))/CLOCKS_PER_SEC);
-       tmpLastT = curT;                
-       
        //getTopFreqCSs(csMap,*freqThreshold);
 
        getMaximumFreqCSs(freqCSset, csBats->coverageBat,  csBats->freqBat, 
*maxCSoid + 1, &numMaxCSs); 
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -219,8 +219,10 @@ RDFreorganize(int *ret, CStableStat *cst
 rdf_export void
 freeCStableStat(CStableStat *cstablestat); 
 
-
 rdf_export void
 printPropStat(PropStat *propstat, int isPrintToFile); 
 
+rdf_export void 
+createTreeForCSset(CSset *freqCSset);
+
 #endif /* _RDFSCHEMA_H_ */
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to