Changeset: cf71a2dd1319 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=cf71a2dd1319
Modified Files:
monetdb5/extras/rdf/rdf_shredder.c
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:
Fix bug in rdf_shredder (while assigning value for objtype) which also caused
the problem in exploring CS properties' types.
diffs (221 lines):
diff --git a/monetdb5/extras/rdf/rdf_shredder.c
b/monetdb5/extras/rdf/rdf_shredder.c
--- a/monetdb5/extras/rdf/rdf_shredder.c
+++ b/monetdb5/extras/rdf/rdf_shredder.c
@@ -158,7 +158,7 @@ rdf_BUNappend_unq_ForObj(parserData* pda
/* Add the type here by changing 2 bits at position 62, 63 of
oid */
*bun |= (BUN)objType << (sizeof(BUN)*8 - 4);
-
+
//b = BUNappend(b, (ptr) (str)objStr, TRUE);
b = BUNins(b, (ptr) bun, (ptr) (str)objStr, TRUE);
@@ -235,7 +235,7 @@ char isInt(char *input){
static ObjectType
getObjectType(unsigned char* objStr, BUN *realNumValue){
- ObjectType obType;
+ ObjectType obType = STRING;
unsigned char* endpart;
char* valuepart;
const char* pos = NULL;
@@ -275,6 +275,8 @@ getObjectType(unsigned char* objStr, BUN
//printf("%s: String \n", objStr);
}
}
+ else
+ obType = STRING;
return obType;
}
@@ -362,7 +364,7 @@ tripleHandler(void* user_data, const rap
ObjectType objType = STRING;
objStr = raptor_term_to_string(triple->object);
objType = getObjectType(objStr, &realNumValue);
-
+
rdf_BUNappend_unq_ForObj(pdata, graph[MAP_LEX],
(str)objStr, objType, &bun);
rdf_BUNappend(pdata, graph[O_sort], &bun);
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -722,7 +722,21 @@ CSPropTypes* initCSPropTypes(CSset* freq
return csPropTypes;
}
-
+static
+void printCSPropTypes(CSPropTypes* csPropTypes, int numMergedCS, CSset*
freqCSset){
+ int i, j, k;
+
+ for (i = 0; i < numMergedCS; i++){
+ printf("MergedCS %d (Freq: %d): \n", i,
freqCSset->items[csPropTypes[i].freqCSId].support);
+ for(j = 0; j < csPropTypes[i].numProp; j++){
+ printf(" P " BUNFMT " : ",
csPropTypes[i].lstPropTypes[j].prop);
+ for (k = 0; k < csPropTypes[i].lstPropTypes[j].numType;
k++){
+ printf(" Type %d (%d) | ", k,
csPropTypes[i].lstPropTypes[j].lstFreq[k]);
+ }
+ printf("\n");
+ }
+ }
+}
/*
* Add types of properties
* Note that the property list is sorted by prop's oids
@@ -734,9 +748,11 @@ void addPropTypes(char *buffTypes, oid*
int i,j;
int tblId = csTblIdxMapping[csId];
+ //printf("Add %d prop from CS %d to table %d \n", numP, csId, tblId);
j = 0;
if (tblId != -1){
for (i = 0; i < numP; i++){
+ //printf(" P: " BUNFMT " Type: %d ", buffP[i],
buffTypes[i]);
while (csPropTypes[tblId].lstPropTypes[j].prop !=
buffP[i]){
j++;
}
@@ -745,6 +761,7 @@ void addPropTypes(char *buffTypes, oid*
}
}
+ //printf("\n");
}
static
@@ -4202,61 +4219,62 @@ RDFreorganize(int *ret, CStableStat *cst
throw(RDF, "rdf.RDFreorganize", "Problem in extracting CSs");
}
+
+
+ printf("Start re-organizing triple store for " BUNFMT " CSs \n",
maxCSoid);
+
+ csTblIdxMapping = (int *) malloc (sizeof (int) * (maxCSoid + 1));
+ initIntArray(csTblIdxMapping, (maxCSoid + 1), -1);
+
+ mfreqIdxTblIdxMapping = (int *) malloc (sizeof (int) *
freqCSset->numCSadded);
+ initIntArray(mfreqIdxTblIdxMapping , freqCSset->numCSadded, -1);
+
+ mTblIdxFreqIdxMapping = (int *) malloc (sizeof (int) *
freqCSset->numCSadded); // A little bit reduntdant space
+ initIntArray(mTblIdxFreqIdxMapping , freqCSset->numCSadded, -1);
+
+ //Mapping from from CSId to TableIdx
+ initCSTableIdxMapping(freqCSset, csTblIdxMapping,
mfreqIdxTblIdxMapping, mTblIdxFreqIdxMapping);
+
+ // Init CStableStat
+ initCStables(cstablestat, freqCSset);
+
+ if ((sbat = BATdescriptor(*sbatid)) == NULL) {
+ throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING);
+ }
+
+ if ((obat = BATdescriptor(*obatid)) == NULL) {
+ BBPreleaseref(sbat->batCacheid);
+ throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING);
+ }
+
+ if ((pbat = BATdescriptor(*pbatid)) == NULL) {
+ BBPreleaseref(sbat->batCacheid);
+ BBPreleaseref(obat->batCacheid);
+ throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING);
+ }
+
+ si = bat_iterator(sbat);
+ pi = bat_iterator(pbat);
+ oi = bat_iterator(obat);
+
+ /* Get possible types of each property in a table (i.e., mergedCS) */
+ csPropTypes = initCSPropTypes(freqCSset, cstablestat->numTables);
+ RDFExtractCSPropTypes(ret, sbat, si, pi, oi, subjCSMap,
csTblIdxMapping, csPropTypes, maxNumPwithDup);
+ printCSPropTypes(csPropTypes,cstablestat->numTables, freqCSset);
+
if (*mode == EXPLOREONLY){
printf("Only explore the schema information \n");
freeCSset(freqCSset);
free(subjCSMap);
free(subjdefaultMap);
+ free(csTblIdxMapping);
+ free(mfreqIdxTblIdxMapping);
+ free(mTblIdxFreqIdxMapping);
return MAL_SUCCEED;
}
-
- printf("Start re-organizing triple store for " BUNFMT " CSs \n",
maxCSoid);
-
- csTblIdxMapping = (int *) malloc (sizeof (int) * (maxCSoid + 1));
- initIntArray(csTblIdxMapping, (maxCSoid + 1), -1);
-
- mfreqIdxTblIdxMapping = (int *) malloc (sizeof (int) *
freqCSset->numCSadded);
- initIntArray(mfreqIdxTblIdxMapping , freqCSset->numCSadded, -1);
-
- mTblIdxFreqIdxMapping = (int *) malloc (sizeof (int) *
freqCSset->numCSadded); // A little bit reduntdant space
- initIntArray(mTblIdxFreqIdxMapping , freqCSset->numCSadded, -1);
-
- //Mapping from from CSId to TableIdx
- initCSTableIdxMapping(freqCSset, csTblIdxMapping,
mfreqIdxTblIdxMapping, mTblIdxFreqIdxMapping);
-
- // Init CStableStat
- initCStables(cstablestat, freqCSset);
-
-
- lastSubjId = (oid *) malloc (sizeof(oid) * cstablestat->numTables);
- initArray(lastSubjId, cstablestat->numTables, -1);
-
- lastSubjIdEx = (oid *) malloc (sizeof(oid) * cstablestat->numTables);
- initArray(lastSubjIdEx, cstablestat->numTables, -1);
-
- if ((sbat = BATdescriptor(*sbatid)) == NULL) {
- throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING);
- }
-
- if ((obat = BATdescriptor(*obatid)) == NULL) {
- BBPreleaseref(sbat->batCacheid);
- throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING);
- }
-
- if ((pbat = BATdescriptor(*pbatid)) == NULL) {
- BBPreleaseref(sbat->batCacheid);
- BBPreleaseref(obat->batCacheid);
- throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING);
- }
-
- si = bat_iterator(sbat);
- pi = bat_iterator(pbat);
- oi = bat_iterator(obat);
-
- /* Get possible types of each property in a table (i.e., mergedCS) */
- csPropTypes = initCSPropTypes(freqCSset, cstablestat->numTables);
- RDFExtractCSPropTypes(ret, sbat, si, pi, oi, subjCSMap,
csTblIdxMapping, csPropTypes, maxNumPwithDup);
+
+
sNewBat = BATnew(TYPE_void, TYPE_oid, BATcount(sbat));
@@ -4281,6 +4299,11 @@ RDFreorganize(int *ret, CStableStat *cst
BATseqbase(rmap, 0);
+ lastSubjId = (oid *) malloc (sizeof(oid) * cstablestat->numTables);
+ initArray(lastSubjId, cstablestat->numTables, -1);
+
+ lastSubjIdEx = (oid *) malloc (sizeof(oid) * cstablestat->numTables);
+ initArray(lastSubjIdEx, cstablestat->numTables, -1);
printf("Re-assigning Subject oids ... ");
lastS = -1;
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -88,8 +88,8 @@ typedef struct PropStat {
#define FULL_PROP_STAT 1 // Only use for showing the statistic on all
properties / all CSs. (Default should be 0)
-#define USE_LABEL_FINDING_MAXCS 1 // Use the labels received from
labeling process for finding maxCS
-#define USE_LABEL_FOR_MERGING 1 // Use the labels received from
labeling process for finding mergeCS
+#define USE_LABEL_FINDING_MAXCS 0 // Use the labels received from
labeling process for finding maxCS
+#define USE_LABEL_FOR_MERGING 0 // Use the labels received from
labeling process for finding mergeCS
typedef struct CS
{
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list