Changeset: 72505c66484c for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=72505c66484c
Modified Files:
monetdb5/extras/rdf/rdf_shredder.c
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
monetdb5/extras/rdf/rdftypes.c
sql/backends/monet5/sql_rdf.c
sql/server/rel_optimizer.c
Branch: rdf
Log Message:
Avoid cross product + handle xsd:long in the input rdf data.
- Re-oder join with Atom "True" in the join predicate
- The ID in the ldbc data generator is Long value.
diffs (257 lines):
diff --git a/monetdb5/extras/rdf/rdf_shredder.c
b/monetdb5/extras/rdf/rdf_shredder.c
--- a/monetdb5/extras/rdf/rdf_shredder.c
+++ b/monetdb5/extras/rdf/rdf_shredder.c
@@ -247,7 +247,8 @@ getObjectType_and_Value(unsigned char* o
}
else if ((pos = strstr((str) endpart, "XMLSchema#float>")) !=
NULL
|| (pos = strstr((str) endpart,
"XMLSchema#double>")) != NULL
- || (pos = strstr((str) endpart,
"XMLSchema#decimal>")) != NULL){
+ || (pos = strstr((str) endpart,
"XMLSchema#decimal>")) != NULL
+ || (pos = strstr((str) endpart,
"XMLSchema#long>")) != NULL){
obType = DOUBLE;
subLen = (int) (pos - (str)objStr - 28);
valuepart = substring((char*)objStr, 2 , subLen);
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -4469,7 +4469,12 @@ void mergeCSByS4(CSset *freqCSset, CSlab
if (simscore > simTfidfThreshold &&
(existDiscriminatingProp || isSameLabel)){
#else
- if (simscore > simTfidfThreshold &&
existDiscriminatingProp){
+ if (
+ #if MERGE_SAME_PROP_CS
+ simscore > SIM_SAME_PROP_THRESHOLD ||
+ #endif
+ (simscore > simTfidfThreshold &&
existDiscriminatingProp)){
+
//if (simscore > simTfidfThreshold){
#endif
#else
@@ -5213,39 +5218,6 @@ float similarityScoreWithOntologyClass(o
return ((float) sumXY);
}
-#if COUNT_PERCENTAGE_ONTO_PROP_USED
-
-static
-void countNumOverlapProp(oid* arr1, oid* arr2, int m, int n,
- int *numOverlap){
-
- int i = 0, j = 0;
- int numCommon = 0;
-
- i = 0;
- j = 0;
- while( i < n && j < m )
- {
- if( arr1[j] < arr2[i] ){
- j++;
-
- }
- else if( arr1[j] == arr2[i] )
- {
- j++;
- i++;
- numCommon++;
-
- }
- else if( arr1[j] > arr2[i] )
- i++;
- }
-
- *numOverlap = numCommon;
-
-}
-#endif
-
static
void getBestRdfTypeValue(oid *buff, int numP, oid *rdftypeOntologyValues, char
*rdftypeSelectedValues, char *rdftypeSpecificLevels, BUN *rdftypeOntClassPos,
int *numTypeValues, int maxSpecificLevel, TFIDFInfo *tfidfInfos){
int i, j, k;
@@ -5339,6 +5311,40 @@ void getBestRdfTypeValue(oid *buff, int
#if COUNT_PERCENTAGE_ONTO_PROP_USED
+
+static
+void countNumOverlapProp(oid* arr1, oid* arr2, int m, int n,
+ int *numOverlap){
+
+ int i = 0, j = 0;
+ int numCommon = 0;
+
+ i = 0;
+ j = 0;
+ while( i < n && j < m )
+ {
+ if( arr1[j] < arr2[i] ){
+ j++;
+
+ }
+ else if( arr1[j] == arr2[i] )
+ {
+ j++;
+ i++;
+ numCommon++;
+
+ }
+ else if( arr1[j] > arr2[i] )
+ i++;
+ }
+
+ *numOverlap = numCommon;
+
+}
+#endif
+
+
+#if COUNT_PERCENTAGE_ONTO_PROP_USED
/*
* If the name of the CS comes from an ontology class,
* ontology contribution for the CS is computed as:
@@ -5521,7 +5527,7 @@ str RDFassignCSId(int *ret, BAT *sbat, B
first = 0;
last = BATcount(sbat) -1;
-
+ printf("Number of triples %d\n", last);
for (p = first; p <= last; p++){
sbt = sbatCursor[p];
if (sbt != curS){
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -162,6 +162,9 @@ typedef struct PsoPropStat {
//URI should be ok.
#define ONLY_MERGE_URINAME_CS_S1 1 /* Only merge CS's
whose name is an URI */
+#define MERGE_SAME_PROP_CS 1
+#define SIM_SAME_PROP_THRESHOLD 0.9999 /* It should exactly be 1.0,
however, the float multiplication may loss the precision */
+
#define FILTER_INFREQ_FK_FOR_IR 1 /* We filter out all
the dirty references from a CS */
//#define FILTER_THRESHOLD_FK_FOR_IR 0.1 /* The FK that their frequency
< FILTER_THRESHOLD_FK_FOR_IR * FreqCS's frequency */
// //Replaced by
INFREQ_TYPE_THRESHOLD as a reference can be considered as a type of the object
value
diff --git a/monetdb5/extras/rdf/rdftypes.c b/monetdb5/extras/rdf/rdftypes.c
--- a/monetdb5/extras/rdf/rdftypes.c
+++ b/monetdb5/extras/rdf/rdftypes.c
@@ -98,7 +98,6 @@ char isInt(char *input, int len){
return 0;
}
-
char isDouble(char *input, int len){
int i = 0;
@@ -590,20 +589,20 @@ encodeValueInOid(ValPtr vrPtrRealValue,
static ObjectType getObjType_fromValRec(ValRecord v){
ObjectType objT;
switch (v.vtype){
- case TYPE_bit:
case TYPE_bte:
case TYPE_sht:
case TYPE_int:
case TYPE_wrd:
- case TYPE_lng:
objT = INTEGER;
break;
case TYPE_oid:
objT = URI;
break;
+ case TYPE_lng:
case TYPE_dbl:
case TYPE_flt:
objT = DOUBLE;
+ break;
case TYPE_str: //Have not handle this case
assert(0);
default:
@@ -612,7 +611,42 @@ static ObjectType getObjType_fromValRec(
return objT;
}
+//Set the value for the new type from the old value
+static void set_Val_of_new_type(ValPtr v, ObjectType objT){
+
+ if (objT == INTEGER){
+ switch (v->vtype){
+ case TYPE_bte:
+ v->val.ival = (int) v->val.btval;
+ break;
+ case TYPE_sht:
+ v->val.ival = (int) v->val.shval;
+ break;
+ case TYPE_int:
+ break;
+ default:
+ assert(0);
+ }
+ v->vtype = TYPE_int;
+ } else if (objT == DOUBLE) {
+
+ switch (v->vtype){
+ case TYPE_lng:
+ v->val.dval = (double) v->val.lval;
+ break;
+ case TYPE_flt:
+ v->val.dval = (double) v->val.fval;
+ break;
+ case TYPE_dbl:
+ break;
+ default:
+ assert(0);
+ }
+ v->vtype = TYPE_dbl;
+ } else
+ assert(0);
+}
void get_encodedOid_from_atom(atom *at, oid *ret){
ValRecord vrec = at->data;
@@ -626,7 +660,13 @@ void get_encodedOid_from_atom(atom *at,
*ret = (oid)(vrec.val.lval);
return;
}
- encodeValueInOid(&vrec, objT, ret);
+
+ if (objT == INTEGER || objT == DOUBLE){
+ set_Val_of_new_type(&vrec, objT);
+ encodeValueInOid(&vrec, objT, ret);
+ return;
+ }
+
}
void
diff --git a/sql/backends/monet5/sql_rdf.c b/sql/backends/monet5/sql_rdf.c
--- a/sql/backends/monet5/sql_rdf.c
+++ b/sql/backends/monet5/sql_rdf.c
@@ -1587,6 +1587,8 @@ static void refine_BAT_with_possible_tbl
*retsbat = r_sbat;
*retobat = r_obat;
+
+ return;
bunins_failed:
fprintf(stderr, "refine_BAT_with_possible_tblId: Failed in fast
inserting\n");
diff --git a/sql/server/rel_optimizer.c b/sql/server/rel_optimizer.c
--- a/sql/server/rel_optimizer.c
+++ b/sql/server/rel_optimizer.c
@@ -5966,6 +5966,16 @@ rel_simplify_predicates(int *changes, mv
if (flag)
break;
}
+ if (is_atom(e->type) && !e->l && !e->r) { /* numbered
variable */
+ atom *a = sql->args[e->flag];
+ int flag = a->data.val.bval;
+
+ /* remove simple select true expressions */
+ if (flag) {
+ sql->caching = 0;
+ break;
+ }
+ }
if (e->type == e_cmp && get_cmp(e) == cmp_equal) {
sql_exp *l = e->l;
sql_exp *r = e->r;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list