Hi

I took Sven's hstore patch and added a new functionality.
It is now possible to request an arbitrary number of extra hstore columns, that contains all tags that start with the column name.

eg: osm2pgsql --hstore-column "name:" --hstore-column "wikipedia:"

will add two new columns "name:" and "wikipedia:". When a node or a
way contains a name:de=Deutschland tag, the "name:"-column will contain an hstore entry 'de'=>'Deutschland'.

If no name:-Tag is present, the column is set to NULL. That way one can easily look for translated objects by toing a "name:" IS NOT NULL query.

What kind of Index would you suggest for such a IS NOT NULL query? A conditional index on osm_id WHERE "name:" IS NOT NULL?


I wrote the relevant code into a single function and also relocated the hstore code into a seperate function and documented both. Can someone with more C experience please check the patch because I'm not sure I did all pointer logic correct.

I did successfully test it on the berlin geofabrik extract and will try to run a planet import during the next week (need to talk about that on Maps-l ;).

The attached patch is against r21892 of the osm repo.

Special thanks to Sven who did the real hard work.
Peter
Index: output.h
===================================================================
--- output.h    (revision 21892)
+++ output.h    (working copy)
@@ -29,6 +29,8 @@
   const char *expire_tiles_filename;   /* File name to output expired tiles 
list to */
   int enable_hstore; /* add an additional hstore column with objects key/value 
pairs */
   int enable_multi; /* Output multi-geometries intead of several simple 
geometries */
+  char** hstore_columns; /* list of columns that should be written into their 
own hstore column */
+  int n_hstore_columns; /* number of hstore columns */
 };
 
 struct output_t {
Index: osm2pgsql.c
===================================================================
--- osm2pgsql.c (revision 21892)
+++ osm2pgsql.c (working copy)
@@ -552,6 +552,9 @@
     fprintf(stderr, "              \t\tThis includes the username, userid, 
timestamp and version.\n"); 
     fprintf(stderr, "              \t\tNote: this option also requires 
additional entries in your style file.\n"); 
     fprintf(stderr, "   -k|--hstore\t\tGenerate an additional hstore 
(key/value) column to  postgresql tables\n");
+    fprintf(stderr, "   -z|--hstore-column\tGenerate an additional hstore 
(key/value) column to containing all tags\n");
+    fprintf(stderr, "                     \tthat start with the specified 
string, eg --hstore-column \"name:\" will\n");
+    fprintf(stderr, "                     \tproduce an extra hstore column 
that contains all name:xx tags\n");
     fprintf(stderr, "   -G|--multi-geometry\t\tGenerate multi-geometry 
features in postgresql tables.\n");
     fprintf(stderr, "   -h|--help\t\tHelp information.\n");
     fprintf(stderr, "   -v|--verbose\t\tVerbose output.\n");
@@ -660,6 +663,8 @@
     const char *style = OSM2PGSQL_DATADIR "/default.style";
     const char *temparg;
     const char *output_backend = "pgsql";
+    const char **hstore_columns = NULL;
+    int n_hstore_columns = 0;
     int cache = 800;
     struct output_options options;
     PGconn *sql_conn;
@@ -693,12 +698,13 @@
             {"expire-output", 1, 0, 'o'},
             {"output",   1, 0, 'O'},
             {"extra-attributes", 0, 0, 'x'},
-           {"hstore", 0, 0, 'k'},
+            {"hstore", 0, 0, 'k'},
+            {"hstore-column", 1, 0, 'z'},
             {"multi-geometry", 0, 0, 'G'},
             {0, 0, 0, 0}
         };
 
-        c = getopt_long (argc, argv, 
"ab:cd:hlmMp:suvU:WH:P:i:E:C:S:e:o:O:xkG", long_options, &option_index);
+        c = getopt_long (argc, argv, 
"ab:cd:hlmMp:suvU:WH:P:i:E:C:S:e:o:O:xkG:z:", long_options, &option_index);
         if (c == -1)
             break;
 
@@ -732,6 +738,11 @@
            case 'O': output_backend = optarg; break;
             case 'x': extra_attributes=1; break;
            case 'k': enable_hstore=1; break;
+            case 'z': 
+                n_hstore_columns++;
+                hstore_columns = (const char**)realloc(hstore_columns, 
sizeof(&n_hstore_columns) * n_hstore_columns);
+                hstore_columns[n_hstore_columns-1] = optarg;
+                break;
            case 'G': enable_multi=1; break;
             case 'h': long_usage_bool=1; break;
             case '?':
@@ -804,6 +815,8 @@
     options.expire_tiles_filename = expire_tiles_filename;
     options.enable_multi = enable_multi;
     options.enable_hstore = enable_hstore;
+    options.hstore_columns = hstore_columns;
+    options.n_hstore_columns = n_hstore_columns;
 
     if (strcmp("pgsql", output_backend) == 0) {
       out = &out_pgsql;
@@ -848,6 +861,9 @@
     
     free(nds);
     free(members);
+    
+    // free the column pointer buffer
+    free(hstore_columns);
 
     project_exit();
     text_exit();
Index: keyvals.c
===================================================================
--- keyvals.c   (revision 21892)
+++ keyvals.c   (working copy)
@@ -287,25 +287,30 @@
 */
 void keyval2hstore(char *hstring, struct keyval *tags)
 {
+  keyval2hstore_manual(hstring, tags->key, tags->value);
+}
+
+void keyval2hstore_manual(char *hstring, char *key, char *value)
+{
   static char* str=NULL;
   static size_t stlen=0;
   size_t len;
  
-  len=strlen(tags->value);
+  len=strlen(value);
   if (len>stlen) {
     stlen=len;
     str=realloc(str,1+stlen*2);
   }
 
-  len=strlen(tags->key);
+  len=strlen(key);
   if (len>stlen) {
     stlen=len;
     str=realloc(str,1+stlen*2);
   }
 
-  escape4hstore(str,tags->key);  
+  escape4hstore(str,key);  
   hstring+=sprintf(hstring,"\"%s\"=>",str);
-  escape4hstore(str,tags->value);
+  escape4hstore(str,value);
   sprintf(hstring,"\"%s\"",str);  
 }
 
Index: keyvals.h
===================================================================
--- keyvals.h   (revision 21892)
+++ keyvals.h   (working copy)
@@ -30,4 +30,5 @@
 void updateItem(struct keyval *head, const char *name, const char *value);
 void cloneList( struct keyval *target, struct keyval *source );
 void keyval2hstore(char *hstring, struct keyval *tags);
+void keyval2hstore_manual(char *hstring, char *key, char *value);
 #endif
Index: output-pgsql.c
===================================================================
--- output-pgsql.c      (revision 21892)
+++ output-pgsql.c      (working copy)
@@ -436,7 +436,127 @@
     }
 }
 
+static void write_hstore(enum table_id table, struct keyval *tags)
+{
+    // sql buffer
+    static size_t sqllen = 2048;
+    static char *sql;
+    sql = malloc(sqllen);
+    
+    // a clone of the tags pointer
+    struct keyval *xtags = tags;
+    
+    // while this tags has a follow-up..
+    while (xtags->next->key != NULL)
+    {
+        /*
+          hstore ASCII representation looks like
+          "<key>"=>"<value>"
+          
+          we need at least strlen(key)+strlen(value)+6+'\0' bytes
+          in theory any single character could also be escaped
+          thus we need an additional factor of 2.
+          The maximum lenght of a single hstore element is thus
+          calcuated as follows:
+        */
+        size_t hlen=2 * (strlen(xtags->next->key) + 
strlen(xtags->next->value)) + 7;
+        
+        // if the sql buffer is too small
+        if (hlen > sqllen) {
+            sqllen = hlen;
+            sql = realloc(sql, sqllen);
+        }
+        
+        // pack the tag with its value into the hstore
+        keyval2hstore(sql, xtags->next);
+        copy_to_table(table, sql);
+        
+        // update the tag-pointer to point to the next tag
+        xtags = xtags->next;
+        
+        // if the tag has a follow up, add a comma to the end
+        if (xtags->next->key != NULL)
+            copy_to_table(table, ",");
+    }
+    
+    // finish the hstore column by placing a TAB into the data stream
+    copy_to_table(table, "\t");
+}
 
+// write an hstore column to the database
+static void write_hstore_columns(enum table_id table, struct keyval *tags)
+{
+    // sql buffer
+    static size_t sqllen = 2048;
+    static char *sql;
+    sql = malloc(sqllen);
+    
+    // the index of the current hstore column
+    int i_hstore_column;
+    
+    // iterate over all configured hstore colums in the options
+    for(i_hstore_column = 0; i_hstore_column < Options->n_hstore_columns; 
i_hstore_column++)
+    {
+        // did this node have a tag that matched the current hstore column
+        int found = 0;
+        
+        // a clone of the tags pointer
+        struct keyval *xtags = tags;
+        
+        // while this tags has a follow-up..
+        while (xtags->next->key != NULL) {
+            
+            // check if the tags' key starts with the name of the hstore column
+            char *pos = strstr(xtags->next->key, 
Options->hstore_columns[i_hstore_column]);
+            
+            // and if it does..
+            if(pos == xtags->next->key)
+            {
+                // remember we found one
+                found=1;
+                
+                // generate the short key name
+                char *shortkey = xtags->next->key + 
strlen(Options->hstore_columns[i_hstore_column]);
+                
+                // calculate the size needed for this hstore entry
+                size_t hlen=2*(strlen(shortkey)+strlen(xtags->next->value))+7;
+                
+                // if the sql buffer is too small
+                if (hlen > sqllen) {
+                    // resize it
+                    sqllen=hlen;
+                    sql=realloc(sql,sqllen);
+                }
+                
+                // and pack the shortkey with its value into the hstore
+                keyval2hstore_manual(sql, shortkey, xtags->next->value);
+                copy_to_table(table, sql);
+                
+                // update the tag-pointer to point to the next tag
+                xtags=xtags->next;
+                
+                // if the tag has a follow up, add a comma to the end
+                if (xtags->next->key != NULL)
+                    copy_to_table(table, ",");
+            }
+            else
+            {
+                // update the tag-pointer to point to the next tag
+                xtags=xtags->next;
+            }
+        }
+        
+        // if no matching tag has been found, write a NULL
+        if(!found)
+            copy_to_table(table, "\\N");
+        
+        // finish the hstore column by placing a TAB into the data stream
+        copy_to_table(table, "\t");
+    }
+    // all hstore-columns have now been written
+}
+
+
 /* example from: pg_dump -F p -t planet_osm gis
 COPY planet_osm (osm_id, name, place, landuse, leisure, "natural", man_made, 
waterway, highway, railway, amenity, tourism, learning, building, bridge, 
layer, way) FROM stdin;
 17959841        \N      \N      \N      \N      \N      \N      \N      
bus_stop        \N      \N      \N      \N      \N      \N    -\N      
0101000020E610000030CCA462B6C3D4BF92998C9B38E04940
@@ -484,36 +604,13 @@
         copy_to_table(t_point, "\t");
     }
     
-    if (Options->enable_hstore) {
-      while (tags->next->key != NULL) {
-       size_t hlen;
-       /*
-         hstore ASCII representation looks like
-         "<key>"=>"<value>"
-
-         we need at least strlen(key)+strlen(value)+6+'\0' bytes
-         in theory any single character could also be escaped
-         thus we need an additional factor of 2.
-         The maximum lenght of a single hstore element is thus
-         calcuated as follows:
-       */
-       hlen=2*(strlen(tags->next->key)+strlen(tags->next->value))+7;
-       if (hlen > sqllen) {
-         sqllen=hlen;
-         sql=realloc(sql,sqllen);
-       }
-       keyval2hstore(sql,tags->next);
-       tags=tags->next;
-       if (tags->next->key != NULL) {
-          copy_to_table(t_point, sql);
-         copy_to_table(t_point, ",");
-       } else {
-         copy_to_table(t_point, sql);
-       }
-      }
-      copy_to_table(t_point, "\t");
-    }
- 
+    // hstore columns
+    write_hstore_columns(t_point, tags);
+    
+    // check if a regular hstore is requested
+    if (Options->enable_hstore)
+        write_hstore(t_point, tags);
+    
     sprintf(sql, "SRID=%d;POINT(%.15g %.15g)", SRID, node_lon, node_lat);
     copy_to_table(t_point, sql);
     copy_to_table(t_point, "\n");
@@ -555,37 +652,14 @@
             copy_to_table(table, sql);
             copy_to_table(table, "\t");
     }
-
-    if (Options->enable_hstore) {
-      while (tags->next->key != NULL) {
-       size_t hlen;
-       /*
-         hstore ASCII representation looks like
-         "<key>"=>"<value>"
-
-         we need at least strlen(key)+strlen(value)+6+'\0' bytes
-         in theory any single character could also be escaped
-         thus we need an additional factor of 2.
-         The maximum lenght of a single hstore element is thus
-         calcuated as follows:
-       */
-       hlen=2*(strlen(tags->next->key)+strlen(tags->next->value))+7;
-       if (hlen > sqllen) {
-         sqllen=hlen;
-         //sql=realloc(sql,sqllen);
-       }
-       keyval2hstore(sql,tags->next);
-       tags=tags->next;
-       if (tags->next->key != NULL) {
-          copy_to_table(table, sql);
-         copy_to_table(table, ",");
-       } else {
-         copy_to_table(table, sql);
-       }
-      }
-      copy_to_table(table, "\t");
-    }
-
+    
+    // hstore columns
+    write_hstore_columns(table, tags);
+    
+    // check if a regular hstore is requested
+    if (Options->enable_hstore)
+        write_hstore(table, tags);
+    
     sprintf(sql, "SRID=%d;", SRID);
     copy_to_table(table, sql);
     copy_to_table(table, wkt);
@@ -1104,6 +1178,13 @@
                 }
                 strcat(sql, tmp);
             }
+            int i_hstore_column;
+            for(i_hstore_column = 0; i_hstore_column < 
Options->n_hstore_columns; i_hstore_column++)
+            {
+                strcat(sql, ",\"");
+                strcat(sql, Options->hstore_columns[i_hstore_column]);
+                strcat(sql, "\" hstore ");
+            }
             if (Options->enable_hstore) {
                 strcat(sql, ",tags hstore );\n");
            } else {
@@ -1169,6 +1250,14 @@
             strcat(sql, tmp);
         }
 
+        int i_hstore_column;
+        for(i_hstore_column = 0; i_hstore_column < Options->n_hstore_columns; 
i_hstore_column++)
+        {
+            strcat(sql, ",\"");
+            strcat(sql, Options->hstore_columns[i_hstore_column]);
+            strcat(sql, "\" ");
+        }
+    
        if (Options->enable_hstore) strcat(sql,",tags");
 
        tables[i].columns = strdup(sql);
_______________________________________________
dev mailing list
[email protected]
http://lists.openstreetmap.org/listinfo/dev

Reply via email to