Changeset: 72505c66484c for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=72505c66484c
Modified Files:
        monetdb5/extras/rdf/rdf_shredder.c
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
        monetdb5/extras/rdf/rdftypes.c
        sql/backends/monet5/sql_rdf.c
        sql/server/rel_optimizer.c
Branch: rdf
Log Message:

Avoid cross product + handle xsd:long in the input rdf data.

- Re-oder join with Atom "True" in the join predicate

- The ID in the ldbc data generator is Long value.


diffs (257 lines):

diff --git a/monetdb5/extras/rdf/rdf_shredder.c 
b/monetdb5/extras/rdf/rdf_shredder.c
--- a/monetdb5/extras/rdf/rdf_shredder.c
+++ b/monetdb5/extras/rdf/rdf_shredder.c
@@ -247,7 +247,8 @@ getObjectType_and_Value(unsigned char* o
                }
                else if ((pos = strstr((str) endpart, "XMLSchema#float>")) != 
NULL 
                                || (pos = strstr((str) endpart, 
"XMLSchema#double>")) != NULL  
-                               || (pos = strstr((str) endpart, 
"XMLSchema#decimal>")) != NULL){
+                               || (pos = strstr((str) endpart, 
"XMLSchema#decimal>")) != NULL
+                               || (pos = strstr((str) endpart, 
"XMLSchema#long>")) != NULL){
                        obType = DOUBLE;
                        subLen = (int) (pos - (str)objStr - 28);
                        valuepart = substring((char*)objStr, 2 , subLen);
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -4469,7 +4469,12 @@ void mergeCSByS4(CSset *freqCSset, CSlab
 
                          if (simscore > simTfidfThreshold && 
(existDiscriminatingProp || isSameLabel)){
                          #else 
-                         if (simscore > simTfidfThreshold && 
existDiscriminatingProp){   
+                         if (
+                               #if MERGE_SAME_PROP_CS
+                               simscore > SIM_SAME_PROP_THRESHOLD || 
+                               #endif
+                               (simscore > simTfidfThreshold && 
existDiscriminatingProp)){       
+
                          //if (simscore > simTfidfThreshold){    
                          #endif
                        #else   
@@ -5213,39 +5218,6 @@ float similarityScoreWithOntologyClass(o
        return  ((float) sumXY);
 }
 
-#if COUNT_PERCENTAGE_ONTO_PROP_USED
-
-static 
-void countNumOverlapProp(oid* arr1, oid* arr2, int m, int n, 
-               int *numOverlap){
-       
-       int i = 0, j = 0;
-       int numCommon = 0; 
-
-       i = 0;
-       j = 0;
-       while( i < n && j < m )
-       {
-               if( arr1[j] < arr2[i] ){
-                       j++;
-
-               }
-               else if( arr1[j] == arr2[i] )
-               {
-                       j++;
-                       i++;
-                       numCommon++;
-
-               }
-               else if( arr1[j] > arr2[i] )
-                       i++;
-       }
-       
-       *numOverlap = numCommon;
-
-}
-#endif
-       
 static
 void getBestRdfTypeValue(oid *buff, int numP, oid *rdftypeOntologyValues, char 
*rdftypeSelectedValues, char *rdftypeSpecificLevels, BUN *rdftypeOntClassPos, 
int *numTypeValues, int maxSpecificLevel, TFIDFInfo *tfidfInfos){
        int i, j, k;
@@ -5339,6 +5311,40 @@ void getBestRdfTypeValue(oid *buff, int 
 
 
 #if COUNT_PERCENTAGE_ONTO_PROP_USED
+
+static 
+void countNumOverlapProp(oid* arr1, oid* arr2, int m, int n, 
+               int *numOverlap){
+       
+       int i = 0, j = 0;
+       int numCommon = 0; 
+
+       i = 0;
+       j = 0;
+       while( i < n && j < m )
+       {
+               if( arr1[j] < arr2[i] ){
+                       j++;
+
+               }
+               else if( arr1[j] == arr2[i] )
+               {
+                       j++;
+                       i++;
+                       numCommon++;
+
+               }
+               else if( arr1[j] > arr2[i] )
+                       i++;
+       }
+       
+       *numOverlap = numCommon;
+
+}
+#endif
+       
+
+#if COUNT_PERCENTAGE_ONTO_PROP_USED
 /*
  * If the name of the CS comes from an ontology class, 
  * ontology contribution for the CS is computed as:
@@ -5521,7 +5527,7 @@ str RDFassignCSId(int *ret, BAT *sbat, B
 
        first = 0; 
        last = BATcount(sbat) -1; 
-       
+       printf("Number of triples %d\n", last);
        for (p = first; p <= last; p++){
                sbt = sbatCursor[p];
                if (sbt != curS){
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -162,6 +162,9 @@ typedef struct PsoPropStat {
                                                //URI should be ok.
 #define        ONLY_MERGE_URINAME_CS_S1 1              /* Only merge CS's 
whose name is an URI */
 
+#define MERGE_SAME_PROP_CS 1
+#define SIM_SAME_PROP_THRESHOLD 0.9999         /* It should exactly be 1.0, 
however, the float multiplication may loss the precision */
+
 #define FILTER_INFREQ_FK_FOR_IR        1               /* We filter out all 
the dirty references from a CS */
 //#define FILTER_THRESHOLD_FK_FOR_IR   0.1     /* The FK that their frequency 
< FILTER_THRESHOLD_FK_FOR_IR * FreqCS's frequency */     
 //                                             //Replaced by 
INFREQ_TYPE_THRESHOLD as a reference can be considered as a type of the object 
value
diff --git a/monetdb5/extras/rdf/rdftypes.c b/monetdb5/extras/rdf/rdftypes.c
--- a/monetdb5/extras/rdf/rdftypes.c
+++ b/monetdb5/extras/rdf/rdftypes.c
@@ -98,7 +98,6 @@ char isInt(char *input, int len){
                return 0;
 }
 
-
 char isDouble(char *input, int len){
        
        int     i = 0;
@@ -590,20 +589,20 @@ encodeValueInOid(ValPtr vrPtrRealValue, 
 static ObjectType getObjType_fromValRec(ValRecord v){
        ObjectType objT; 
        switch (v.vtype){
-               case TYPE_bit:
                case TYPE_bte:
                case TYPE_sht:
                case TYPE_int:
                case TYPE_wrd:
-               case TYPE_lng:
                        objT = INTEGER; 
                        break; 
                case TYPE_oid: 
                        objT = URI; 
                        break;
+               case TYPE_lng:
                case TYPE_dbl:
                case TYPE_flt:                  
                        objT = DOUBLE;
+                       break; 
                case TYPE_str:          //Have not handle this case
                        assert(0); 
                default: 
@@ -612,7 +611,42 @@ static ObjectType getObjType_fromValRec(
 
        return objT; 
 }
+//Set the value for the new type from the old value
+static void set_Val_of_new_type(ValPtr v, ObjectType objT){
+       
+       if (objT == INTEGER){
+               switch (v->vtype){
+                       case TYPE_bte:
+                               v->val.ival = (int) v->val.btval;
+                               break;
+                       case TYPE_sht:
+                               v->val.ival = (int) v->val.shval;
+                               break;
+                       case TYPE_int:                          
+                               break; 
+                       default: 
+                               assert(0); 
+               }
+               v->vtype = TYPE_int; 
+       } else if (objT == DOUBLE) {
+       
+               switch (v->vtype){
+                       case TYPE_lng:
+                               v->val.dval = (double) v->val.lval;
+                               break;          
+                       case TYPE_flt:                  
+                               v->val.dval = (double) v->val.fval;             
        
+                               break; 
+                       case TYPE_dbl:
+                               break;
+                       default: 
+                               assert(0);
+               }
+               v->vtype = TYPE_dbl;                            
+       } else 
+               assert(0);
 
+}
 
 void get_encodedOid_from_atom(atom *at, oid *ret){
        ValRecord vrec = at->data; 
@@ -626,7 +660,13 @@ void get_encodedOid_from_atom(atom *at, 
                *ret = (oid)(vrec.val.lval); 
                return; 
        }
-       encodeValueInOid(&vrec, objT, ret); 
+       
+       if (objT == INTEGER || objT == DOUBLE){
+               set_Val_of_new_type(&vrec, objT); 
+               encodeValueInOid(&vrec, objT, ret); 
+               return;
+       }
+
 }
 
 void 
diff --git a/sql/backends/monet5/sql_rdf.c b/sql/backends/monet5/sql_rdf.c
--- a/sql/backends/monet5/sql_rdf.c
+++ b/sql/backends/monet5/sql_rdf.c
@@ -1587,6 +1587,8 @@ static void refine_BAT_with_possible_tbl
        
        *retsbat = r_sbat; 
        *retobat = r_obat; 
+       
+       return; 
 
    bunins_failed:
        fprintf(stderr, "refine_BAT_with_possible_tblId: Failed in fast 
inserting\n");
diff --git a/sql/server/rel_optimizer.c b/sql/server/rel_optimizer.c
--- a/sql/server/rel_optimizer.c
+++ b/sql/server/rel_optimizer.c
@@ -5966,6 +5966,16 @@ rel_simplify_predicates(int *changes, mv
                                if (flag)
                                        break;
                        }
+                       if (is_atom(e->type) && !e->l && !e->r) { /* numbered 
variable */
+                               atom *a = sql->args[e->flag];
+                               int flag = a->data.val.bval;
+
+                               /* remove simple select true expressions */
+                               if (flag) {
+                                       sql->caching = 0;
+                                       break;
+                               }
+                       }
                        if (e->type == e_cmp && get_cmp(e) == cmp_equal) {
                                sql_exp *l = e->l;
                                sql_exp *r = e->r;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to