Changeset: 1f98cd0cb212 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=1f98cd0cb212
Modified Files:
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
sql/backends/monet5/sql.mx
Branch: rdf
Log Message:
Generate type-specific relational tables + get the name for each table.
Bug: Name assigned to tables has some problems?.
diffs (truncated from 443 to 300 lines):
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -4548,6 +4548,45 @@ void getObjStr(BAT *mapbat, BATiter mapi
}
+
+//Assume Tokenizer is openned
+//
+void getTblName(char *name, oid nameId){
+ str canStr = NULL;
+ str canStrShort = NULL;
+ char *pch;
+
+ if (nameId != BUN_NONE){
+ takeOid(nameId, &canStr);
+ getPropNameShort(&canStrShort, canStr);
+
+ if (strstr (canStrShort,".") != NULL ||
+ strcmp(canStrShort,"") == 0 ||
+ strstr(canStrShort,"-") != NULL ){ // WEBCRAWL
specific problem with Table name a.jpg, b.png....
+
+ strcpy(name,"NONAME");
+ }
+ else {
+ pch = strstr (canStrShort,"(");
+ if (pch != NULL) *pch = '\0'; //Remove (...)
characters from table name
+ }
+
+ GDKfree(canStr);
+ if (strlen(canStrShort) < 50){
+ strcpy(name,canStrShort);
+ }
+ else{
+ strncpy (name, canStrShort, 50);
+ }
+
+ GDKfree(canStrShort);
+ }
+ else
+ strcpy(name,"NONAME");
+
+
+}
+
static
str printSampleData(CSSample *csSample, CSset *freqCSset, BAT *mbat, int num){
@@ -4931,6 +4970,35 @@ CSrel* generateCsRelBetweenMergeFreqSet(
}
+/* Create a new data structure to store relationships including merged CS */
+/*
+static
+CSrel* getRefinedCsRelSet(CSrel *csrelFreqSet, CSset *freqCSset){
+ int i,j;
+ int numFreqCS = freqCSset->numOrigFreqCS;
+ int from, to;
+ CSrel rel;
+ CSrel* refinedCsRel;
+ int numRel = freqCSset->numCSadded;
+
+ refinedCsRel = initCSrelset(numRel);
+
+ for (i = 0; i < numRel; ++i) {
+ if (csrelFreqSet[i].numRef == 0) continue; // ignore CS without
relations
+ rel = csrelFreqSet[i];
+ // update the 'from' value
+ from = i;
+ assert(freqCSset[from].parentFreqIdx == -1);
+ for (j = 0; j < rel.numRef; ++j) {
+ to = rel.lstRefFreqIdx[j];
+ assert(freqCSset->items[to].parentFreqIdx == -1);
+ // add relation to new data structure
+ addReltoCSRelWithFreq(from, to, rel.lstPropId[j],
rel.lstCnt[j], rel.lstBlankCnt[j], &refinedCsRel[from]);
+ }
+ }
+ return refinedCsRel;
+}
+*/
static
CSrel* generateCsRelToMergeFreqSet(CSrel *csrelFreqSet, CSset *freqCSset){
int i,j;
@@ -5263,8 +5331,13 @@ RDFextractCSwithTypes(int *ret, bat *sba
//Finally, re-create mergeFreqSet
+
*csRelMergeFreqSet = generateCsRelBetweenMergeFreqSet(csrelSet,
freqCSset);
printCSRel(freqCSset, *csRelMergeFreqSet, *freqThreshold);
+
+ curT = clock();
+ printf ("Get the final relationships between mergeCS took %f.
\n",((float)(curT - tmpLastT))/CLOCKS_PER_SEC);
+ tmpLastT = curT;
printmergeCSSet(freqCSset, *freqThreshold);
//getStatisticCSsBySize(csMap,maxNumProp);
@@ -5458,7 +5531,7 @@ str triplesubsort(BAT **sbat, BAT **pbat
}
static
-void initCStables(CStableStat* cstablestat, CSset* freqCSset, CSPropTypes
*csPropTypes, int numTables){
+void initCStables(CStableStat* cstablestat, CSset* freqCSset, CSPropTypes
*csPropTypes, int numTables, CSlabel *labels, int *mTblIdxFreqIdxMapping){
int i,j, k;
int tmpNumDefaultCol;
@@ -5509,11 +5582,13 @@ void initCStables(CStableStat* cstablest
cstablestat->lstcstable[i].lstMVTables = (CSMVtableEx *)
malloc(sizeof(CSMVtableEx) * tmpNumDefaultCol); // TODO: Only allocate memory
for multi-valued columns
cstablestat->lstcstable[i].lstProp = (oid*)malloc(sizeof(oid) *
tmpNumDefaultCol);
cstablestat->lstcstable[i].colTypes = (ObjectType
*)malloc(sizeof(ObjectType) * tmpNumDefaultCol);
+ cstablestat->lstcstable[i].tblname =
labels[mTblIdxFreqIdxMapping[i]].name;
#if CSTYPE_TABLE == 1
tmpNumExCol = csPropTypes[i].numNonDefTypes;
cstablestat->lastInsertedSEx[i] = (oid*) malloc(sizeof(oid) *
tmpNumExCol);
cstablestat->lstcstableEx[i].numCol = tmpNumExCol;
cstablestat->lstcstableEx[i].colBats =
(BAT**)malloc(sizeof(BAT*) * tmpNumExCol);
+ cstablestat->lstcstableEx[i].tblname =
labels[mTblIdxFreqIdxMapping[i]].name;
#endif
for(j = 0; j < tmpNumDefaultCol; j++){
@@ -5780,33 +5855,33 @@ void getRealValue(void **returnValue, oi
switch (objType)
{
case STRING:
- printf("A String object value: %s \n",objStr);
+ //printf("A String object value: %s \n",objStr);
if (*returnValue != NULL) free(*returnValue);
*returnValue = (char *)malloc(sizeof(char) *
strlen(objStr) + 1);
memcpy(*returnValue,objStr, sizeof(char) *
strlen(objStr) + 1);
- printf("A String value of returnValue: %s \n", (char
*)(*returnValue));
+ //printf("A String value of returnValue: %s \n", (char
*)(*returnValue));
break;
case DATETIME:
datetimeStr = getDateTimeFromRDFString(objStr);
if (*returnValue != NULL) free(*returnValue);
*returnValue = (char *)malloc(sizeof(char) *
strlen(datetimeStr) + 1);
memcpy(*returnValue,datetimeStr,sizeof(char) *
strlen(objStr) + 1);
- printf("A datetime object value: %s \n",(char
*)(*returnValue));
+ //printf("A datetime object value: %s \n",(char
*)(*returnValue));
break;
case INTEGER:
- printf("Full object value: %s \n",objStr);
+ //printf("Full object value: %s \n",objStr);
realInt = getIntFromRDFString(objStr);
- printf("A INTEGER object value: %i \n",realInt);
+ //printf("A INTEGER object value: %i \n",realInt);
*(int*)(*returnValue) = realInt;
break;
case FLOAT:
- printf("Full object value: %s \n",objStr);
+ //printf("Full object value: %s \n",objStr);
realFloat = getFloatFromRDFString(objStr);
- printf("A FLOAT object value: %f \n",realFloat);
+ //printf("A FLOAT object value: %f \n",realFloat);
*(float*)(*returnValue) = realFloat;
break;
default: //URI or BLANK NODE
- printf("A URI object value: " BUNFMT " \n", objOid);
+ //printf("A URI object value: " BUNFMT " \n", objOid);
realUri = objOid;
*(oid*)(*returnValue) = realUri;
}
@@ -5944,12 +6019,11 @@ str RDFdistTriplesToCSs(int *ret, bat *s
assert(objType != MULTIVALUES); //TODO: Remove
this
tmpMVColIdx =
csPropTypes[tblIdx].lstPropTypes[tmpColIdx].colIdxes[(int)objType];
tmpBat =
cstablestat->lstcstable[tblIdx].colBats[tmpColIdx];
- BATprint(tmpBat);
- BATprint(tmpmvBat);
getRealValue(&realObjValue, *obt, objType, mi, mbat);
for (i = 0; i <
cstablestat->lstcstable[tblIdx].lstMVTables[tmpColIdx].numCol; i++){
tmpmvBat =
cstablestat->lstcstable[tblIdx].lstMVTables[tmpColIdx].mvBats[i];
+ BATprint(tmpmvBat);
if (i == tmpMVColIdx){
// TODO: If i != 0, try to cast to
default value
BUNappend(tmpmvBat, (ptr) realObjValue,
TRUE);
@@ -6141,7 +6215,7 @@ RDFreorganize(int *ret, CStableStat *cst
mfreqIdxTblIdxMapping = (int *) malloc (sizeof (int) *
freqCSset->numCSadded);
initIntArray(mfreqIdxTblIdxMapping , freqCSset->numCSadded, -1);
- mTblIdxFreqIdxMapping = (int *) malloc (sizeof (int) *
freqCSset->numCSadded); // A little bit reduntdant space
+ mTblIdxFreqIdxMapping = (int *) malloc (sizeof (int) *
freqCSset->numCSadded); // TODO: little bit reduntdant space
initIntArray(mTblIdxFreqIdxMapping , freqCSset->numCSadded, -1);
//Mapping from from CSId to TableIdx
@@ -6192,7 +6266,7 @@ RDFreorganize(int *ret, CStableStat *cst
tmpLastT = curT;
// Init CStableStat
- initCStables(cstablestat, freqCSset, csPropTypes, numTables);
+ initCStables(cstablestat, freqCSset, csPropTypes, numTables, labels,
mTblIdxFreqIdxMapping);
// Summarize the statistics
curNumMergeCS = countNumberMergeCS(freqCSset);
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -178,7 +178,7 @@ typedef struct SubCSSet{
} SubCSSet;
//#define INIT_NUM_CS 9999 // workaround
-#define INIT_NUM_CS 100 // workaround
+#define INIT_NUM_CS 500 // workaround
#define SIM_THRESHOLD 0.6
#define SIM_TFIDF_THRESHOLD 0.55
#define IMPORTANCE_THRESHOLD 0.01
@@ -248,6 +248,7 @@ typedef struct CStable {
CSMVtableEx *lstMVTables;
int numCol;
oid* lstProp;
+ oid tblname; /* Label of the table */
} CStable;
@@ -255,6 +256,7 @@ typedef struct CStableEx { /* For non-d
BAT** colBats;
ObjectType* colTypes;
int numCol;
+ oid tblname; /* Label of the table */
} CStableEx;
@@ -328,6 +330,9 @@ rdf_export str
RDFreorganize(int *ret, CStableStat *cstablestat, bat *sbatid, bat *pbatid,
bat *obatid, bat *mapbatid, int *freqThreshold, int *mode);
rdf_export void
+getTblName(char *name, oid nameId);
+
+rdf_export void
freeCStableStat(CStableStat *cstablestat);
rdf_export void
diff --git a/sql/backends/monet5/sql.mx b/sql/backends/monet5/sql.mx
--- a/sql/backends/monet5/sql.mx
+++ b/sql/backends/monet5/sql.mx
@@ -7672,11 +7672,14 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
int ret = 0;
CStableStat *cstablestat;
char tmptbname[100];
+ char tmpstr[20];
char tmptbnameex[100];
//char tmpviewname[100];
char tmpcolname[100];
//char viewcommand[500];
- sql_subtype tpe;
+ sql_subtype tpe;
+ sql_subtype tpes[50];
+
sql_table **cstables;
sql_table ***csmvtables; //table for storing multi-values
#if CSTYPE_TABLE == 1
@@ -7692,7 +7695,8 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
str msg;
BAT *sbat, *pbat, *obat, *mbat;
-
+ BAT *tmpbat;
+
rethrow("sql.rdfShred", msg, getSQLContext(cntxt, mb, &m, NULL));
if ((sch = mvc_bind_schema(m, *schema)) == NULL)
@@ -7752,8 +7756,41 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
printf("Starting creating SQL Table -- \n");
+
+
+ sql_find_subtype(&tpes[TYPE_oid], "oid", 31 , 0);
+ printf("Tpes %d Type name is: %s \n", TYPE_oid,
tpes[TYPE_oid].type->sqlname);
+
+ sql_find_subtype(&tpes[TYPE_str], "varchar", 500 , 0);
+ printf("Tpes %d Type name is: %s \n", TYPE_str,
tpes[TYPE_str].type->sqlname);
+
+ sql_find_subtype(&tpes[TYPE_dbl], "double", 53 , 0);
+ printf("Tpes %d Type name is: %s \n", TYPE_dbl,
tpes[TYPE_dbl].type->sqlname);
+
+ sql_find_subtype(&tpes[TYPE_int], "int", 9 , 0);
+ printf("Tpes %d Type name is: %s \n", TYPE_int,
tpes[TYPE_int].type->sqlname);
+
+ sql_find_subtype(&tpes[TYPE_flt], "real", 23, 0);
+ printf("Tpes %d Type name is: %s \n", TYPE_flt,
tpes[TYPE_flt].type->sqlname);
+
+ /*
+ sql_find_subtype(&tpe, "float", 0 , 0);
+ printf("Test Type name is: %s \n", tpe.type->sqlname);
+ sql_find_subtype(&tpe, "int", 9 , 0);
+ printf("Test Type name is: %s \n", tpe.type->sqlname);
+ sql_find_subtype(&tpe, "oid", 31 , 0);
+ printf("Test Type name is: %s \n", tpe.type->sqlname);
+ */
+
for (i = 0; i < cstablestat->numTables; i++){
- sprintf(tmptbname, "cstable%d",i);
+ printf("creating table %d \n", i);
+ //sprintf(tmptbname, "cstable%d",i);
+
+ sprintf(tmpstr, "%d",i);
+ getTblName(tmptbname, cstablestat->lstcstable[i].tblname);
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list