Changeset: b5e6d838b9c2 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=b5e6d838b9c2
Modified Files:
        monetdb5/extras/rdf/rdfschema.c
Branch: rdf
Log Message:

Fix the output sample data script for Webcrawl-specific problems


diffs (135 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -4465,6 +4465,15 @@ str printSampleData(CSSample *csSample, 
        oid     objOid = BUN_NONE; 
        BATiter mapi;
        str     canStr; 
+       char    isTitle = 0; 
+       char    isUrl = 0;
+       char    isType = 0;
+       char    isDescription = 0;
+       char    isImage = 0; 
+       char    isSite = 0;
+       char    isEmail = 0; 
+       char    isCountry = 0; 
+       char    isLocality = 0;
 #if USE_SHORT_NAMES
        str     propStrShort = NULL;
        char    *pch; 
@@ -4517,18 +4526,36 @@ str printSampleData(CSSample *csSample, 
                        str canStrShort = NULL;
                        takeOid(sample.name, &canStr);
                        getPropNameShort(&canStrShort, canStr);
-                       pch = strstr (canStrShort,"(");
-                       if (pch != NULL) *pch = '\0';   //Remove (...) 
characters from table name
-                       fprintf(fouttb,"CREATE TABLE %s \n(\n",  canStrShort);
+
+                       if (strstr (canStrShort,".") != NULL || 
+                               strcmp(canStrShort,"") == 0 || 
+                               strstr(canStrShort,"-") != NULL ){      // 
WEBCRAWL specific problem with Table name a.jpg, b.png....
+                               fprintf(fouttb,"CREATE TABLE tbSample%d \n 
(\n", i);                    
+                       }
+                       else if (strcmp(canStrShort,"page") == 0){
+                               fprintf(fouttb,"CREATE TABLE %s%d \n(\n",  
canStrShort, i);
+                       }
+                       else {
+                               pch = strstr (canStrShort,"(");
+                               if (pch != NULL) *pch = '\0';   //Remove (...) 
characters from table name
+                               fprintf(fouttb,"CREATE TABLE %s \n(\n",  
canStrShort);
+                       }
+
                        GDKfree(canStrShort);
                        GDKfree(canStr);
                }
-               else
+               else 
                        fprintf(fouttb,"CREATE TABLE tbSample%d \n (\n", i);
 
                //List of columns
                fprintf(fout,"Subject");
                fprintf(fouttb,"SubjectCol string");
+               isTitle = 0; 
+               isUrl = 0;
+               isType = 0; 
+               isDescription = 0; 
+               isImage = 0;
+               isSite = 0; 
                for (j = 0; j < sample.numProp; j++){
                        if (freqCS.lstPropSupport[j] * 100 < freqCS.support * 
SAMPLE_FILTER_THRESHOLD) continue; 
 #if USE_SHORT_NAMES
@@ -4538,11 +4565,47 @@ str printSampleData(CSSample *csSample, 
 #if USE_SHORT_NAMES
                        getPropNameShort(&propStrShort, propStr);
                        fprintf(fout,";%s", propStrShort);
-                       if (strcmp(propStrShort,"type") == 0 || 
strcmp(propStrShort,"position") == 0 ||
-                                       strcmp(propStrShort,"order") == 0)
+
+                       pch = strstr (propStrShort,"-");
+                       if (pch != NULL) *pch = '\0';   //Remove - characters 
from prop  //WEBCRAWL specific problem
+
+                       if ((strcmp(propStrShort,"type") == 0 && isType == 1)|| 
+                                       strcmp(propStrShort,"position") == 0 ||
+                                       strcmp(propStrShort,"order") == 0 || 
+                                       (strcmp(propStrShort,"title") == 0 && 
isTitle == 1) ||
+                                       (strcmp(propStrShort,"url") == 0 && 
isUrl == 1) ||
+                                       (strcmp(propStrShort,"description") == 
0 && isDescription == 1) ||
+                                       (strcmp(propStrShort,"site_name") == 0 
&& isSite == 1) ||
+                                       (strcmp(propStrShort,"image") == 0 && 
isImage == 1)  ||
+                                       (strcmp(propStrShort,"email") == 0 && 
isEmail == 1)  ||
+                                       (strcmp(propStrShort,"country") == 0 && 
isCountry == 1)  ||
+                                       (strcmp(propStrShort,"locality") == 0 
&& isLocality == 1)  ||
+                                       strcmp(propStrShort,"fbmladmins") == 0 
||
+                                       strcmp(propStrShort,"latitude") == 0 ||
+                                       strcmp(propStrShort,"fbmlapp_id") == 0 
||
+                                       strcmp(propStrShort,"locale") == 0 ||
+                                       strcmp(propStrShort,"longitude") == 0 ||
+                                       strcmp(propStrShort,"phone_number") == 
0 ||
+                                       strcmp(propStrShort,"postal") == 0 ||
+                                       strcmp(propStrShort,"street") == 0 ||
+                                       strcmp(propStrShort,"region") == 0 ||
+                                       strcmp(propStrShort,"fax_number") == 0 
||
+                                       strcmp(propStrShort,"app_id") == 0 
+                                       )
                                fprintf(fouttb,",\n%s_%d 
string",propStrShort,j);
                        else
                                fprintf(fouttb,",\n%s string",propStrShort);
+
+                       if (strcmp(propStrShort,"title") == 0) isTitle = 1; 
//WEBCRAWL specific problem, duplicate title
+                       if (strcmp(propStrShort,"url") == 0) isUrl = 1; 
//WEBCRAWL specific problem, duplicate url
+                       if (strcmp(propStrShort,"type") == 0) isType = 1; 
//WEBCRAWL specific problem, duplicate type
+                       if (strcmp(propStrShort,"description") == 0) 
isDescription = 1; //WEBCRAWL specific problem, duplicate type
+                       if (strcmp(propStrShort,"image") == 0) isImage = 1; 
//WEBCRAWL specific problem, duplicate type
+                       if (strcmp(propStrShort,"site_name") == 0) isSite = 1; 
//WEBCRAWL specific problem, duplicate site_name
+                       if (strcmp(propStrShort,"email") == 0) isEmail = 1; 
//WEBCRAWL specific problem, duplicate email                
+                       if (strcmp(propStrShort,"country") == 0) isCountry = 1; 
//WEBCRAWL specific problem, duplicate site_name
+                       if (strcmp(propStrShort,"locality") == 0) isLocality = 
1; //WEBCRAWL specific problem, duplicate email          
+
                        GDKfree(propStrShort);
 #else
                        fprintf(fout,";%s", propStr);
@@ -4620,9 +4683,21 @@ str printSampleData(CSSample *csSample, 
                        str canStrShort = NULL;
                        takeOid(sample.name, &canStr);
                        getPropNameShort(&canStrShort, canStr);
-                       pch = strstr (canStrShort,"(");
-                       if (pch != NULL) *pch = '\0';   //Remove (...) 
characters from table name
-                       fprintf(foutis, "echo \"COPY %d RECORDS INTO %s FROM 
'ABSOLUTEPATH/tmp.txt'     USING DELIMITERS '|', '\\n'; \" > tmpload.sql \n", 
sample.numInstances, canStrShort);
+
+                       if (strstr (canStrShort,".") != NULL || 
+                               strcmp(canStrShort,"") == 0 || 
+                               strstr(canStrShort,"-") != NULL ){      // 
WEBCRAWL specific problem with Table name a.jpg, b.png....
+                               fprintf(foutis, "echo \"COPY %d RECORDS INTO 
tbSample%d FROM 'ABSOLUTEPATH/tmp.txt'     USING DELIMITERS '|', '\\n'; \" > 
tmpload.sql \n", sample.numInstances, i);
+                       }
+                       else if (strcmp(canStrShort,"page") == 0){
+                               fprintf(foutis, "echo \"COPY %d RECORDS INTO 
%s%d FROM 'ABSOLUTEPATH/tmp.txt'     USING DELIMITERS '|', '\\n'; \" > 
tmpload.sql \n", sample.numInstances, canStrShort, i);
+                       }
+                       else{
+
+                               pch = strstr (canStrShort,"(");
+                               if (pch != NULL) *pch = '\0';   //Remove (...) 
characters from table name
+                               fprintf(foutis, "echo \"COPY %d RECORDS INTO %s 
FROM 'ABSOLUTEPATH/tmp.txt'     USING DELIMITERS '|', '\\n'; \" > tmpload.sql 
\n", sample.numInstances, canStrShort);
+                       }
                        fprintf(foutis, "mclient < tmpload.sql \n");
                        GDKfree(canStrShort);
                        GDKfree(canStr);
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to