Changeset: d2508be0d720 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=d2508be0d720
Modified Files:
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Add prop support in the output sample data


diffs (265 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -4951,6 +4951,7 @@ str initFullSampleData(CSSampleExtend *c
                assert(tmpNumCols > 0); 
                        
                csSampleEx[i].lstProp = (oid*)malloc(sizeof(oid) * tmpNumCols); 
+               csSampleEx[i].lstPropSupport = (int*)malloc(sizeof(int) * 
tmpNumCols); 
                csSampleEx[i].lstIsInfrequentProp = (char*)malloc(sizeof(char) 
* tmpNumCols); 
                csSampleEx[i].lstIsMVCol = (char*)malloc(sizeof(char) * 
tmpNumCols); 
                csSampleEx[i].colBats = (BAT**)malloc(sizeof(BAT*) * 
tmpNumCols);
@@ -4962,6 +4963,7 @@ str initFullSampleData(CSSampleExtend *c
                        #endif
                        colIdx++;
                        csSampleEx[i].lstProp[colIdx] = 
csPropTypes[i].lstPropTypes[j].prop;
+                       csSampleEx[i].lstPropSupport[colIdx] = 
csPropTypes[i].lstPropTypes[j].propFreq;
                        
                        csSampleEx[i].colBats[colIdx] = BATnew(TYPE_void, 
cstablestat->lstcstable[i].colBats[colIdx]->ttype , NUM_SAMPLE_INSTANCE + 1);
 
@@ -5062,6 +5064,7 @@ void freeSampleExData(CSSampleExtend *cs
        int i, j; 
        for (i = 0; i < numCand; i++){
                free(csSampleEx[i].lstProp);
+               free(csSampleEx[i].lstPropSupport);
                free(csSampleEx[i].lstIsInfrequentProp);
                free(csSampleEx[i].lstIsMVCol);
                free(csSampleEx[i].candidates); 
@@ -5113,10 +5116,8 @@ void getObjStr(BAT *mapbat, BATiter mapi
 
        *retObjType = objType; 
 
-
-
-
-}
+}
+
 
 //Assume Tokenizer is openned 
 //
@@ -5446,25 +5447,22 @@ str printSampleData(CSSample *csSample, 
        return MAL_SUCCEED;
 }
 
-#if 0
 static 
-str printFullSampleData(CSSampleExtend *csSampleEx, CSset *freqCSset, BAT 
*mbat, int num, int sampleVersion){
+str printFullSampleData(CSSampleExtend *csSampleEx, int num){
 
        int     i,j, k; 
        FILE    *fout, *fouttb, *foutis; 
        char    filename[100], filename2[100], filename3[100];
-       char    tmpStr[20], tmpStr2[20], tmpStr3[20];
        int     ret;
 
        str     propStr; 
        str     subjStr; 
        char*   schema = "rdf";
-       CSSample        sample; 
-       CS              freqCS; 
-       char    objType = 0; 
+       CSSampleExtend  sample; 
        str     objStr;         
-       oid     objOid = BUN_NONE; 
-       BATiter mapi;
+       oid     *objOid = NULL; 
+       float   *objFlt = NULL; 
+       int     *objInt = NULL; 
        str     canStr; 
        char    isTitle = 0; 
        char    isUrl = 0;
@@ -5475,7 +5473,8 @@ str printFullSampleData(CSSampleExtend *
        char    isEmail = 0; 
        char    isCountry = 0; 
        char    isLocality = 0;
-       BAT     *lmap = NULL, *rmap = NULL
+       BAT     *tmpBat = NULL; 
+       BATiter tmpi; 
 #if USE_SHORT_NAMES
        str     propStrShort = NULL;
        char    *pch; 
@@ -5483,8 +5482,6 @@ str printFullSampleData(CSSampleExtend *
 
 
 
-       mapi = bat_iterator(mbat);
-
        if (TKNZRopen (NULL, &schema) != MAL_SUCCEED) {
                throw(RDF, "rdf.rdfschema",
                                "could not open the tokenizer\n");
@@ -5505,8 +5502,7 @@ str printFullSampleData(CSSampleExtend *
        foutis = fopen(filename3,"wt");
 
        for (i = 0; i < num; i++){
-               sample = csSample[i];
-               freqCS = freqCSset->items[sample.freqIdx];
+               sample = csSampleEx[i];
                fprintf(fout,"Sample table %d Candidates: ", i);
                for (j = 0; j < (int)sample.candidateCount; j++){
                        //fprintf(fout,"  "  BUNFMT,sample.candidates[j]);
@@ -5564,6 +5560,7 @@ str printFullSampleData(CSSampleExtend *
                isImage = 0;
                isSite = 0; 
                for (j = 0; j < sample.numProp; j++){
+                       if (sample.lstIsInfrequentProp[j] == 1) continue; 
 #if USE_SHORT_NAMES
                        propStrShort = NULL;
 #endif
@@ -5623,15 +5620,11 @@ str printFullSampleData(CSSampleExtend *
                
                //List of support
                for (j = 0; j < sample.numProp; j++){
-                       if (sampleVersion > 1){         //Do not consider 
infreq Prop 
-                               if (isInfrequentSampleProp(freqCS, j)) 
continue; 
-                               fprintf(fout,";%d", freqCS.lstPropSupport[j]);
-                       }
-                       else{
-                               fprintf(fout,";%d", freqCS.support);
-                       }
+                       if (sample.lstIsInfrequentProp[j] == 1) continue;
+                       fprintf(fout,";%d", sample.lstPropSupport[j]);
                }
                fprintf(fout, "\n");
+       
                
                fprintf(foutis, "echo \"");
                //All the instances 
@@ -5651,42 +5644,76 @@ str printFullSampleData(CSSampleExtend *
                        GDKfree(subjStr); 
                        
                        for (j = 0; j < sample.numProp; j++){
-                               if (sampleVersion > 1){         //Do not 
consider infreq Prop 
-                                       if (isInfrequentSampleProp(freqCS, j)) 
continue; 
-                               }
-                               objOid = sample.lstObj[j][k];
-                               if (objOid == BUN_NONE){
-                                       fprintf(fout,";NULL");
-                                       fprintf(foutis,"|NULL");
-                               }
-                               else{
-                                       objStr = NULL;
-                                       getObjStr(mbat, mapi, objOid, &objStr, 
&objType);
-                                       if (objType == URI || objType == 
BLANKNODE){
-#if USE_SHORT_NAMES
-                                               str objStrShort = NULL;
-                                               getPropNameShort(&objStrShort, 
objStr);
-                                               fprintf(fout,";<%s>", 
objStrShort);
-                                               fprintf(foutis,"|<%s>", 
objStrShort);
-                                               GDKfree(objStrShort);
-#else
-                                               fprintf(fout,";%s", objStr);
-#endif
-                                               GDKfree(objStr);
-                                       } else {
-                                               str betweenQuotes;
-                                               
getStringBetweenQuotes(&betweenQuotes, objStr);
-                                               fprintf(fout,";%s", 
betweenQuotes);
-                                               pch = strstr 
(betweenQuotes,"\\");
-                                               if (pch != NULL) *pch = '\0';   
//Remove \ characters from table name
-                                               fprintf(foutis,"|%s", 
betweenQuotes);
-                                               GDKfree(betweenQuotes);
+                               if (sample.lstIsInfrequentProp[j] == 1) 
continue;                       
+
+                               tmpBat = sample.colBats[j];
+                               tmpi = bat_iterator(tmpBat);
+                               
+                               if (tmpBat->ttype == TYPE_oid){ //URI or BLANK 
NODE  or MVCol
+                                       objOid = (oid *) BUNtail(tmpi, k);
+                                       if (*objOid == oid_nil){
+                                               fprintf(fout,";NULL");
+                                               fprintf(foutis,"|NULL");
+                                       }
+                                       else{
+                                               if (sample.lstIsMVCol[j] == 1){ 
//
+                                                       
fprintf(fout,";<"BUNFMT">",*objOid);
+                                               }
+                                               else{
+                                                       str objStrShort = NULL;
+                                                       takeOid(*objOid, 
&objStr);
+                                                       
getPropNameShort(&objStrShort, objStr);
+
+                                                       fprintf(fout,";<%s>", 
objStrShort);
+                                                       fprintf(foutis,"|<%s>", 
objStrShort);
+                                                       GDKfree(objStrShort);
+                                                       GDKfree(objStr);
+
+
+                                               }
                                        }
                                }
+                               else if (tmpBat->ttype == TYPE_flt){
+                                       objFlt = (float *) BUNtail(tmpi, k); 
+                                       if (*objFlt == flt_nil){
+                                               fprintf(fout,";NULL");
+                                               fprintf(foutis,"|NULL");
+                                       } 
+                                       else{
+                                               fprintf(fout,";%f", *objFlt);
+                                               fprintf(foutis,"|%f", *objFlt);
+
+                                       }
+                               }
+                               else if (tmpBat->ttype == TYPE_int){
+                                       objInt = (int *) BUNtail(tmpi, k);
+                                       if (*objInt == int_nil){
+                                               fprintf(fout,";NULL");
+                                               fprintf(foutis,"|NULL");
+                                       }
+                                       else{
+                                               fprintf(fout,";%d", *objInt);
+                                               fprintf(foutis,"|%d", *objInt);
+                                       }
+                               
+                               }
+                               else{ //tmpBat->ttype == TYPE_str
+                                       objStr = NULL; 
+                                       objStr = BUNtail(tmpi, k);
+                                       if (strcmp(objStr, str_nil) == 0){
+                                               fprintf(fout,";NULL");
+                                               fprintf(foutis,"|NULL");
+                                       }
+                                       else{
+                                               fprintf(fout,";%s", objStr);
+                                               fprintf(foutis,"| %s", objStr);
+                                       }
+                               }
+
+
                        }
                        fprintf(fout, "\n");
                        fprintf(foutis, "\n");
-
                }
 
                fprintf(fout, "\n");
@@ -5733,7 +5760,6 @@ str printFullSampleData(CSSampleExtend *
        return MAL_SUCCEED;
 }
 
-#endif
 
 static 
 str RDFExtractSampleData(int *ret, BAT *sbat, BATiter si, BATiter pi, BATiter 
oi,  
@@ -6199,7 +6225,7 @@ str getFullSampleData(CStableStat* cstab
        
        initFullSampleData(csSampleEx, mTblIdxFreqIdxMapping, labels, 
cstablestat, csPropTypes, freqCSset, numTables, lmapbatid, rmapbatid);
 
-       //printFullSampleData(csSampleEx, mbat, numTables);
+       printFullSampleData(csSampleEx, numTables);
        
        freeSampleExData(csSampleEx, numTables);
 
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -401,7 +401,8 @@ typedef struct CSSampleExtend{
        oid     *candidates;
        oid     candidateCount; 
        int     numProp; 
-       oid     *lstProp; 
+       oid     *lstProp;
+       int     *lstPropSupport; 
        char    *lstIsInfrequentProp;
        char    *lstIsMVCol; 
        oid     *lstSubjOid;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to