Changeset: ecccfc50d79d for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=ecccfc50d79d
Modified Files:
monetdb5/extras/rdf/rdflabels.c
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:
Add function for creating set of freqIds per label (S1)
diffs (171 lines):
diff --git a/monetdb5/extras/rdf/rdflabels.c b/monetdb5/extras/rdf/rdflabels.c
--- a/monetdb5/extras/rdf/rdflabels.c
+++ b/monetdb5/extras/rdf/rdflabels.c
@@ -2407,6 +2407,11 @@ CSlabel* createLabels(CSset* freqCSset,
}
str updateLabel(int ruleNumber, CSlabel *labels, int mergeCSFreqId, int
freqCS1, int freqCS2){
+ (void) ruleNumber;
+ (void) labels;
+ (void) mergeCSFreqId;
+ (void) freqCS1;
+ (void) freqCS2;
return MAL_SUCCEED;
}
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -3077,6 +3077,103 @@ str getReferCS(BAT *sbat, BAT *pbat, oid
}
*/
+static
+LabelStat* initLabelStat(void){
+ LabelStat *labelStat = (LabelStat*) malloc(sizeof(LabelStat));
+ labelStat->labelBat = BATnew(TYPE_void, TYPE_str, INIT_DISTINCT_LABEL);
+ if (labelStat->labelBat == NULL){
+ return NULL;
+ }
+ (void)BATprepareHash(BATmirror(labelStat->labelBat));
+ if (!(labelStat->labelBat->T->hash))
+ return NULL;
+ labelStat->lstCount = (int*)malloc(sizeof(int) * INIT_DISTINCT_LABEL);
+
+ labelStat->freqIdList = NULL;
+ labelStat->numLabeladded = 0;
+ labelStat->numAllocation = INIT_DISTINCT_LABEL;
+
+ return labelStat;
+}
+
+static
+void buildLabelStat(LabelStat *labelStat, CSlabel *labels, CSset *freqCSset){
+ int i;
+ BUN bun;
+ int *_tmp;
+ int freqIdx;
+
+ //Preparation
+ for (i = 0; i < freqCSset->numCSadded; i++){
+ if (strcmp(labels[i].name,"DUMMY") != 0){
+ bun =
BUNfnd(BATmirror(labelStat->labelBat),(ptr)labels[i].name);
+ if (bun == BUN_NONE) {
+ /*New string*/
+ if (labelStat->labelBat->T->hash &&
BATcount(labelStat->labelBat) > 4 * labelStat->labelBat->T->hash->mask) {
+ HASHdestroy(labelStat->labelBat);
+ BAThash(BATmirror(labelStat->labelBat),
2*BATcount(labelStat->labelBat));
+ }
+
+ labelStat->labelBat =
BUNappend(labelStat->labelBat, (ptr) (str)labels[i].name, TRUE);
+
+ if(labelStat->numLabeladded ==
labelStat->numAllocation)
+ {
+ labelStat->numAllocation +=
INIT_DISTINCT_LABEL;
+
+ _tmp = realloc(labelStat->lstCount,
(labelStat->numAllocation * sizeof(int)));
+
+ if (!_tmp){
+ fprintf(stderr, "ERROR:
Couldn't realloc memory!\n");
+ }
+ labelStat->lstCount = (int*)_tmp;
+ }
+ labelStat->lstCount[labelStat->numLabeladded] =
1;
+ labelStat->numLabeladded++;
+ }
+ else{
+ labelStat->lstCount[bun]++;
+ }
+ }
+ }
+
+ printf("Total number of distinct labels is %d \n",
labelStat->numLabeladded);
+ //Build list of FreqCS
+ labelStat->freqIdList = (int**) malloc(sizeof(int*) *
labelStat->numLabeladded);
+ for (i =0; i < labelStat->numLabeladded; i++){
+ labelStat->freqIdList[i] = (int*)malloc(sizeof(int) *
labelStat->lstCount[i]);
+ //reset the lstCount
+ labelStat->lstCount[i] = 0;
+ }
+
+ for (i = 0; i < freqCSset->numCSadded; i++){
+ if (strcmp(labels[i].name,"DUMMY") != 0){
+ bun = BUNfnd(BATmirror(labelStat->labelBat),(ptr)
labels[i].name);
+ if (bun == BUN_NONE) {
+ fprintf(stderr, "All the name should be stored
already!\n");
+ }
+ else{
+ freqIdx = labelStat->lstCount[bun];
+ labelStat->freqIdList[bun][freqIdx] = i;
+ labelStat->lstCount[bun]++;
+ }
+ }
+ }
+}
+static
+void freeLabelStat(LabelStat *labelStat){
+ int i;
+ if (labelStat->freqIdList != NULL){
+ for (i = 0; i < labelStat->numLabeladded;i++){
+ free(labelStat->freqIdList[i]);
+ }
+ free(labelStat->freqIdList);
+ }
+ free(labelStat->lstCount);
+ BBPreclaim(labelStat->labelBat);
+ free(labelStat);
+}
+
+
@@ -3824,6 +3921,8 @@ RDFextractCSwithTypes(int *ret, bat *sba
clock_t curT;
clock_t tmpLastT;
OntoUsageNode *ontoUsageTree = NULL;
+ LabelStat *labelStat = NULL;
+
if ((sbat = BATdescriptor(*sbatid)) == NULL) {
throw(MAL, "rdf.RDFextractCSwithTypes", RUNTIME_OBJECT_MISSING);
@@ -3949,6 +4048,20 @@ RDFextractCSwithTypes(int *ret, bat *sba
tmpLastT = curT;
+ labelStat = initLabelStat();
+ buildLabelStat(labelStat, *labels, freqCSset);
+ freeLabelStat(labelStat);
+ /*
+ {
+ str tknzLabel = "cslabel";
+ if (TKNZRopen (NULL, &tknzLabel) != MAL_SUCCEED) {
+ throw(RDF, "RDFextractCSwithTypes", "could not open the
tokenizer\n");
+ }
+
+ TKNZRclose(ret);
+ }
+ */
+
/*S4: Merge two CS's having the subset-superset relationship */
getMaximumFreqCSs(freqCSset, *labels, csBats->coverageBat,
csBats->freqBat, *maxCSoid + 1, &numMaxCSs);
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -214,6 +214,15 @@ typedef struct CSrelSum{
int **freqIdList;
} CSrelSum;
+#define INIT_DISTINCT_LABEL 400
+typedef struct LabelStat{ /*Store the list of freqIds having the same
label*/
+ BAT *labelBat;
+ int *lstCount; /* Number of items per name */
+ int **freqIdList;
+ int numLabeladded;
+ int numAllocation;
+} LabelStat;
+
typedef struct CStable {
BAT** colBats;
ObjectType* colTypes;
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list