Changeset: e78930413d8d for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=e78930413d8d
Modified Files:
monetdb5/extras/rdf/rdf_shredder.c
monetdb5/extras/rdf/rdfschema.c
Branch: rdf
Log Message:
Map each CSid in the relationship to its maximum super CSid.
Then, group all the same super CSids in the relationships from one cs.
diffs (truncated from 327 to 300 lines):
diff --git a/monetdb5/extras/rdf/rdf_shredder.c
b/monetdb5/extras/rdf/rdf_shredder.c
--- a/monetdb5/extras/rdf/rdf_shredder.c
+++ b/monetdb5/extras/rdf/rdf_shredder.c
@@ -297,8 +297,16 @@ tripleHandler(void* user_data, const rap
BAT **graph = pdata->graph;
if (pdata->error > pdata->lasterror){
- printf("Incorrect or wrong syntax triple %s \n ",
pdata->errorMsg);
+ unsigned char* objStr;
+ int objLen;
+ //printf("[Incorrect or wrong syntax triple] %s \n ",
pdata->errorMsg);
pdata->lasterror = pdata->error;
+ objStr = raptor_term_to_string(triple->object);
+ objLen = strlen((const char*)objStr);
+ //printf("Object: %s %d \n", objStr, objLen);
+ if (objLen == 2)
+ printf("EMPTY OBJECT STRING \n");
+ free(objStr);
}
else{
if (triple->subject->type == RAPTOR_TERM_TYPE_URI
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -127,6 +127,97 @@ CSrel* creataCSrel(oid csoid){
return csrel;
}
+
+static
+void addReltoCSRel(oid origCSoid, oid refCSoid, CSrel *csrel)
+{
+ void *_tmp;
+ void *_tmp2;
+
+ int i = 0;
+
+ assert (origCSoid == csrel->origCSoid);
+
+ while (i < csrel->numRef){
+ if (refCSoid == csrel->lstRefCSoid[i]){
+ //Existing
+ break;
+ }
+ i++;
+ }
+
+ if (i != csrel->numRef){
+ csrel->lstCnt[i]++;
+ return;
+ }
+ else{ // New Ref
+
+ if(csrel->numRef == csrel->numAllocation)
+ {
+ csrel->numAllocation += INIT_NUM_CSREL;
+
+ _tmp = realloc(csrel->lstRefCSoid,
(csrel->numAllocation * sizeof(oid)));
+ _tmp2 = realloc(csrel->lstCnt, (csrel->numAllocation *
sizeof(int)));
+
+ if (!_tmp || !_tmp2){
+ fprintf(stderr, "ERROR: Couldn't realloc
memory!\n");
+ }
+ csrel->lstRefCSoid = (oid*)_tmp;
+ csrel->lstCnt = (int*)_tmp2;
+ }
+
+ csrel->lstRefCSoid[csrel->numRef] = refCSoid;
+ csrel->lstCnt[csrel->numRef] = 1;
+ csrel->numRef++;
+ }
+}
+
+
+static
+void addReltoCSRelWithFreq(oid origCSoid, oid refCSoid, int freq, CSrel *csrel)
+{
+ void *_tmp;
+ void *_tmp2;
+
+ int i = 0;
+
+ assert (origCSoid == csrel->origCSoid);
+
+ while (i < csrel->numRef){
+ if (refCSoid == csrel->lstRefCSoid[i]){
+ //Existing
+ break;
+ }
+ i++;
+ }
+
+ if (i != csrel->numRef){
+ csrel->lstCnt[i] = csrel->lstCnt[i] + freq;
+ return;
+ }
+ else{ // New Ref
+
+ if(csrel->numRef == csrel->numAllocation)
+ {
+ csrel->numAllocation += INIT_NUM_CSREL;
+
+ _tmp = realloc(csrel->lstRefCSoid,
(csrel->numAllocation * sizeof(oid)));
+ _tmp2 = realloc(csrel->lstCnt, (csrel->numAllocation *
sizeof(int)));
+
+ if (!_tmp || !_tmp2){
+ fprintf(stderr, "ERROR: Couldn't realloc
memory!\n");
+ }
+ csrel->lstRefCSoid = (oid*)_tmp;
+ csrel->lstCnt = (int*)_tmp2;
+ }
+
+ csrel->lstRefCSoid[csrel->numRef] = refCSoid;
+ csrel->lstCnt[csrel->numRef] = freq;
+ csrel->numRef++;
+ }
+}
+
+
static
CSrel* initCSrelset(oid numCSrel){
oid i;
@@ -200,6 +291,54 @@ void printCSrelSet(CSrel *csrelSet, char
}
+/*
+ * Show the relationship from each CS to maximumFreqCSs
+ * */
+
+static
+void printCSrelWithMaxSet(oid* csSuperCSMap, CSrel *csrelWithMaxSet, CSrel
*csrelSet, char *csFreqMap, BAT* freqBat, int num, int freqThreshold){
+
+ int i;
+ int j;
+ int *freq;
+ FILE *fout;
+ char filename[100];
+ char tmpStr[20];
+
+ strcpy(filename, "csRelatioinshipWithMaxFreqCS");
+ sprintf(tmpStr, "%d", freqThreshold);
+ strcat(filename, tmpStr);
+ strcat(filename, ".txt");
+
+ fout = fopen(filename,"wt");
+
+ // Merge the relationship
+ for (i = 0; i < num; i++){
+ if (csrelSet[i].numRef != 0){
+ for (j = 0; j < csrelSet[i].numRef; j++){
+ if (csSuperCSMap[csrelSet[i].lstRefCSoid[j]] !=
BUN_NONE){
+
addReltoCSRelWithFreq(csrelSet[i].origCSoid,
csSuperCSMap[csrelSet[i].lstRefCSoid[j]], csrelSet[i].lstCnt[j],
&csrelWithMaxSet[i]);
+ }
+ }
+ }
+ }
+
+ for (i = 0; i < num; i++){
+ if (csrelWithMaxSet[i].numRef != 0){ //Only print CS with FK
+ fprintf(fout, "Relationship %d: ", i);
+ freq = (int *) Tloc(freqBat, i);
+ fprintf(fout, "CS " BUNFMT " (Freq: %d, isFreq: %d) -->
", csrelWithMaxSet[i].origCSoid, *freq, csFreqMap[i]);
+ for (j = 0; j < csrelWithMaxSet[i].numRef; j++){
+ fprintf(fout, BUNFMT " (%d) ",
csrelWithMaxSet[i].lstRefCSoid[j],csrelWithMaxSet[i].lstCnt[j]);
+ }
+ fprintf(fout, "\n");
+ }
+ }
+
+
+ fclose(fout);
+}
+
static
void printSubCSInformation(SubCSSet *subcsset, int num, char isWriteTofile,
int freqThreshold){
@@ -382,51 +521,7 @@ oid addSubCS(char *buff, int numP, int c
}
-static
-void addReltoCSRel(oid origCSoid, oid refCSoid, CSrel *csrel)
-{
- void *_tmp;
- void *_tmp2;
-
- int i = 0;
-
- assert (origCSoid == csrel->origCSoid);
-
- while (i < csrel->numRef){
- if (refCSoid == csrel->lstRefCSoid[i]){
- //Existing
- break;
- }
- i++;
- }
-
- if (i != csrel->numRef){
- csrel->lstCnt[i]++;
- return;
- }
- else{ // New Ref
-
- if(csrel->numRef == csrel->numAllocation)
- {
- csrel->numAllocation += INIT_NUM_CSREL;
-
- _tmp = realloc(csrel->lstRefCSoid,
(csrel->numAllocation * sizeof(oid)));
- _tmp2 = realloc(csrel->lstCnt, (csrel->numAllocation *
sizeof(int)));
-
- if (!_tmp || !_tmp2){
- fprintf(stderr, "ERROR: Couldn't realloc
memory!\n");
- }
- csrel->lstRefCSoid = (oid*)_tmp;
- csrel->lstCnt = (int*)_tmp2;
- }
-
- csrel->lstRefCSoid[csrel->numRef] = refCSoid;
- csrel->lstCnt[csrel->numRef] = 1;
- csrel->numRef++;
- }
-}
-
-static
+static
void freeCSset(CSset *csSet){
int i;
for(i = 0; i < csSet->numCSadded; i ++){
@@ -647,7 +742,12 @@ oid putaCStoHash(CSBats *csBats, oid* ke
if (bun == BUN_NONE) {
csId = *csoid;
addNewCS(csBats, &csKey, key, csoid, num);
- //assert(csId != BUN_NONE);
+
+ //Handle the case when freqThreshold == 1
+ if (isStoreFreqCS ==1 && freqThreshold == 1){
+ freqCS = creatCS(csId, num, key);
+ addCStoSet(freqCSset, *freqCS);
+ }
}
else{
//printf("Same HashKey: ");
@@ -659,6 +759,12 @@ oid putaCStoHash(CSBats *csBats, oid* ke
// New CS
csId = *csoid;
addNewCS(csBats, &csKey, key, csoid, num);
+
+ //Handle the case when freqThreshold == 1
+ if (isStoreFreqCS ==1 && freqThreshold == 1){
+ freqCS = creatCS(csId, num, key);
+ addCStoSet(freqCSset, *freqCS);
+ }
}
else{
@@ -866,7 +972,7 @@ static void getStatisticCSsBySize(map_t
*/
-static void getStatisticCSsBySupports(BAT *pOffsetBat, BAT *freqBat, BAT
*fullPBat, char isWriteToFile, int freqThreshold){
+static void getStatisticCSsBySupports(BAT *pOffsetBat, BAT *freqBat, BAT
*fullPBat, oid* csSuperCSMap, char isWriteToFile, int freqThreshold){
//int *csPropNum;
//int *csFreq;
@@ -885,7 +991,7 @@ static void getStatisticCSsBySupports(BA
strcat(filename, ".txt");
fout = fopen(filename,"wt");
- fprintf(fout, " csId #Prop #frequency \n");
+ fprintf(fout, " csId #Prop #frequency maxCSid\n");
pi = bat_iterator(pOffsetBat);
freqi = bat_iterator(freqBat);
@@ -905,9 +1011,9 @@ static void getStatisticCSsBySupports(BA
// Output the result
if (isWriteToFile == 0)
- printf(BUNFMT " %d %d \n", p, numP, *freq);
+ printf(BUNFMT " %d %d " BUNFMT "\n", p, numP, *freq,
csSuperCSMap[p]);
else
- fprintf(fout, BUNFMT " %d %d \n", p, numP, *freq);
+ fprintf(fout, BUNFMT " %d %d " BUNFMT "\n", p, numP,
*freq, csSuperCSMap[p]);
}
fclose(fout);
@@ -1185,6 +1291,7 @@ RDFextractCSwithTypes(int *ret, bat *sba
int maxNumPwithDup = 0;
char *csFreqMap;
CSrel *csrelSet;
+ CSrel *csrelWithMaxFreqSet;
SubCSSet *csSubCSMap;
oid *csSuperCSMap;
@@ -1244,6 +1351,8 @@ RDFextractCSwithTypes(int *ret, bat *sba
csrelSet = initCSrelset(maxCSoid + 1);
+ csrelWithMaxFreqSet = initCSrelset(maxCSoid + 1);
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list