A while back, there was a push to make COPY gzip-aware. That didn't happen,
but COPY FROM PROGRAM did, and it scratches the same itch.

I have a similar need, but with file_fdw foreign tables. I have .csv.gz
files downloaded to the server, but those CSVs have 100+ columns in them,
and in this case I only really care about a half dozen of those columns.
I'd like to avoid:
- the overhead of writing the uncompressed file to disk and then
immediately re-reading it
- writing unwanted columns to a temp/work table via COPY, and then
immediately re-reading them
- multicorn fdw because it ends up making a python string out of all data
cells
- a csv parsing tool like csvtool or mlr, because they output another CSV
which must be reparsed from scratch

Since file_fdw leverages COPY, it seemed like it would be easy to add the
FROM PROGRAM feature to file_fdw. I began asking questions on #postgresql
IRC, only to discover that Adam Gomaa ( akgo...@gmail.com ) had already
written such a thing, but hadn't submitted it. Attached is a small rework
of his patch, along with documentation.

NOTE: The regression test includes unix commands in the program option. I
figured that wouldn't work for win32 systems, so I checked to see what the
regression tests do to test COPY FROM PROGRAM...and I couldn't find any. So
I guess the test exists as a proof of concept that will get excised before
final commit.
diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c
index bc4d2d7..97df270 100644
--- a/contrib/file_fdw/file_fdw.c
+++ b/contrib/file_fdw/file_fdw.c
@@ -59,6 +59,7 @@ struct FileFdwOption
 static const struct FileFdwOption valid_options[] = {
        /* File options */
        {"filename", ForeignTableRelationId},
+       {"program", ForeignTableRelationId},
 
        /* Format options */
        /* oids option is not supported */
@@ -86,9 +87,10 @@ static const struct FileFdwOption valid_options[] = {
 typedef struct FileFdwPlanState
 {
        char       *filename;           /* file to read */
-       List       *options;            /* merged COPY options, excluding 
filename */
-       BlockNumber pages;                      /* estimate of file's physical 
size */
-       double          ntuples;                /* estimate of number of rows 
in file */
+       char       *program;            /* program to read output from */
+       List       *options;            /* merged COPY options, excluding 
filename and program */
+       BlockNumber pages;                      /* estimate of file or program 
output's physical size */
+       double          ntuples;                /* estimate of number of rows 
in file or program output */
 } FileFdwPlanState;
 
 /*
@@ -97,8 +99,9 @@ typedef struct FileFdwPlanState
 typedef struct FileFdwExecutionState
 {
        char       *filename;           /* file to read */
-       List       *options;            /* merged COPY options, excluding 
filename */
-       CopyState       cstate;                 /* state of reading file */
+       char       *program;            /* program to read output from */
+       List       *options;            /* merged COPY options, excluding 
filename and program */
+       CopyState       cstate;                 /* state of reading file or 
program */
 } FileFdwExecutionState;
 
 /*
@@ -139,7 +142,9 @@ static bool fileIsForeignScanParallelSafe(PlannerInfo 
*root, RelOptInfo *rel,
  */
 static bool is_valid_option(const char *option, Oid context);
 static void fileGetOptions(Oid foreigntableid,
-                          char **filename, List **other_options);
+                                                  char **filename,
+                                                  char **program,
+                                                  List **other_options);
 static List *get_file_fdw_attribute_options(Oid relid);
 static bool check_selective_binary_conversion(RelOptInfo *baserel,
                                                                  Oid 
foreigntableid,
@@ -189,6 +194,7 @@ file_fdw_validator(PG_FUNCTION_ARGS)
        List       *options_list = untransformRelOptions(PG_GETARG_DATUM(0));
        Oid                     catalog = PG_GETARG_OID(1);
        char       *filename = NULL;
+       char       *program = NULL;
        DefElem    *force_not_null = NULL;
        DefElem    *force_null = NULL;
        List       *other_options = NIL;
@@ -196,16 +202,17 @@ file_fdw_validator(PG_FUNCTION_ARGS)
 
        /*
         * Only superusers are allowed to set options of a file_fdw foreign 
table.
-        * This is because the filename is one of those options, and we don't 
want
-        * non-superusers to be able to determine which file gets read.
+        * This is because the filename or program string are two of those
+        * options, and we don't want non-superusers to be able to determine 
which
+        * file gets read or what command is run.
         *
         * Putting this sort of permissions check in a validator is a bit of a
         * crock, but there doesn't seem to be any other place that can enforce
         * the check more cleanly.
         *
-        * Note that the valid_options[] array disallows setting filename at any
-        * options level other than foreign table --- otherwise there'd still 
be a
-        * security hole.
+        * Note that the valid_options[] array disallows setting filename and
+        * program at any options level other than foreign table --- otherwise
+        * there'd still be a security hole.
         */
        if (catalog == ForeignTableRelationId && !superuser())
                ereport(ERROR,
@@ -247,7 +254,7 @@ file_fdw_validator(PG_FUNCTION_ARGS)
                }
 
                /*
-                * Separate out filename and column-specific options, since
+                * Separate out filename, program, and column-specific options, 
since
                 * ProcessCopyOptions won't accept them.
                 */
 
@@ -257,9 +264,26 @@ file_fdw_validator(PG_FUNCTION_ARGS)
                                ereport(ERROR,
                                                (errcode(ERRCODE_SYNTAX_ERROR),
                                                 errmsg("conflicting or 
redundant options")));
+                       if (program)
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                                errmsg("conflicting or 
redundant options")));
                        filename = defGetString(def);
                }
 
+               else if (strcmp(def->defname, "program") == 0)
+               {
+                       if (filename)
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                                errmsg("conflicting or 
redundant options")));
+                       if (program)
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                                errmsg("conflicting or 
redundant options")));
+                       program = defGetString(def);
+               }
+
                /*
                 * force_not_null is a boolean option; after validation we can 
discard
                 * it - it will be retrieved later in 
get_file_fdw_attribute_options()
@@ -296,12 +320,13 @@ file_fdw_validator(PG_FUNCTION_ARGS)
        ProcessCopyOptions(NULL, true, other_options);
 
        /*
-        * Filename option is required for file_fdw foreign tables.
+        * Either filename or program option is required for file_fdw foreign
+        * tables.
         */
-       if (catalog == ForeignTableRelationId && filename == NULL)
+       if (catalog == ForeignTableRelationId && filename == NULL && program == 
NULL)
                ereport(ERROR,
                                
(errcode(ERRCODE_FDW_DYNAMIC_PARAMETER_VALUE_NEEDED),
-                                errmsg("filename is required for file_fdw 
foreign tables")));
+                                errmsg("either filename or program is required 
for file_fdw foreign tables")));
 
        PG_RETURN_VOID();
 }
@@ -326,12 +351,13 @@ is_valid_option(const char *option, Oid context)
 /*
  * Fetch the options for a file_fdw foreign table.
  *
- * We have to separate out "filename" from the other options because
- * it must not appear in the options list passed to the core COPY code.
+ * We have to separate out "filename" and "program" from the other options
+ * because it must not appear in the options list passed to the core COPY
+ * code.
  */
 static void
 fileGetOptions(Oid foreigntableid,
-                          char **filename, List **other_options)
+                          char **filename, char **program, List 
**other_options)
 {
        ForeignTable *table;
        ForeignServer *server;
@@ -359,9 +385,10 @@ fileGetOptions(Oid foreigntableid,
        options = list_concat(options, 
get_file_fdw_attribute_options(foreigntableid));
 
        /*
-        * Separate out the filename.
+        * Separate out the filename or program.
         */
        *filename = NULL;
+       *program = NULL;
        prev = NULL;
        foreach(lc, options)
        {
@@ -373,6 +400,12 @@ fileGetOptions(Oid foreigntableid,
                        options = list_delete_cell(options, lc, prev);
                        break;
                }
+               else if (strcmp(def->defname, "program") == 0)
+               {
+                       *program = defGetString(def);
+                       options = list_delete_cell(options, lc, prev);
+                       break;
+               }
                prev = lc;
        }
 
@@ -380,8 +413,8 @@ fileGetOptions(Oid foreigntableid,
         * The validator should have checked that a filename was included in the
         * options, but check again, just in case.
         */
-       if (*filename == NULL)
-               elog(ERROR, "filename is required for file_fdw foreign tables");
+       if (*filename == NULL && *program == NULL)
+               elog(ERROR, "either filename or program is required for 
file_fdw foreign tables");
 
        *other_options = options;
 }
@@ -475,12 +508,13 @@ fileGetForeignRelSize(PlannerInfo *root,
        FileFdwPlanState *fdw_private;
 
        /*
-        * Fetch options.  We only need filename at this point, but we might as
-        * well get everything and not need to re-fetch it later in planning.
+        * Fetch options.  We only need filename (or program) at this point, but
+        * we might as well get everything and not need to re-fetch it later in
+        * planning.
         */
        fdw_private = (FileFdwPlanState *) palloc(sizeof(FileFdwPlanState));
        fileGetOptions(foreigntableid,
-                                  &fdw_private->filename, 
&fdw_private->options);
+                                  &fdw_private->filename, 
&fdw_private->program, &fdw_private->options);
        baserel->fdw_private = (void *) fdw_private;
 
        /* Estimate relation size */
@@ -583,20 +617,24 @@ static void
 fileExplainForeignScan(ForeignScanState *node, ExplainState *es)
 {
        char       *filename;
+       char       *program;
        List       *options;
 
        /* Fetch options --- we only need filename at this point */
        fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation),
-                                  &filename, &options);
+                                  &filename, &program, &options);
 
-       ExplainPropertyText("Foreign File", filename, es);
+       if(filename)
+               ExplainPropertyText("Foreign File", filename, es);
+       else
+               ExplainPropertyText("Foreign Program", program, es);
 
        /* Suppress file size if we're not showing cost details */
        if (es->costs)
        {
                struct stat stat_buf;
 
-               if (stat(filename, &stat_buf) == 0)
+               if (filename && (stat(filename, &stat_buf) == 0))
                        ExplainPropertyLong("Foreign File Size", (long) 
stat_buf.st_size,
                                                                es);
        }
@@ -611,6 +649,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags)
 {
        ForeignScan *plan = (ForeignScan *) node->ss.ps.plan;
        char       *filename;
+       char       *program;
        List       *options;
        CopyState       cstate;
        FileFdwExecutionState *festate;
@@ -623,7 +662,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags)
 
        /* Fetch options of foreign table */
        fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation),
-                                  &filename, &options);
+                                  &filename, &program, &options);
 
        /* Add any options from the plan (currently only convert_selectively) */
        options = list_concat(options, plan->fdw_private);
@@ -632,11 +671,18 @@ fileBeginForeignScan(ForeignScanState *node, int eflags)
         * Create CopyState from FDW options.  We always acquire all columns, so
         * as to match the expected ScanTupleSlot signature.
         */
-       cstate = BeginCopyFrom(node->ss.ss_currentRelation,
-                                                  filename,
-                                                  false,
-                                                  NIL,
-                                                  options);
+       if(filename)
+               cstate = BeginCopyFrom(node->ss.ss_currentRelation,
+                                                          filename,
+                                                          false,
+                                                          NIL,
+                                                          options);
+       else
+               cstate = BeginCopyFrom(node->ss.ss_currentRelation,
+                                                          program,
+                                                          true,
+                                                          NIL,
+                                                          options);
 
        /*
         * Save state in node->fdw_state.  We must save enough information to 
call
@@ -644,6 +690,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags)
         */
        festate = (FileFdwExecutionState *) 
palloc(sizeof(FileFdwExecutionState));
        festate->filename = filename;
+       festate->program = program;
        festate->options = options;
        festate->cstate = cstate;
 
@@ -705,11 +752,19 @@ fileReScanForeignScan(ForeignScanState *node)
 
        EndCopyFrom(festate->cstate);
 
-       festate->cstate = BeginCopyFrom(node->ss.ss_currentRelation,
-                                                                       
festate->filename,
-                                                                       false,
-                                                                       NIL,
-                                                                       
festate->options);
+       if(festate->filename)
+               festate->cstate = BeginCopyFrom(node->ss.ss_currentRelation,
+                                                                               
festate->filename,
+                                                                               
false,
+                                                                               
NIL,
+                                                                               
festate->options);
+       else
+               festate->cstate = BeginCopyFrom(node->ss.ss_currentRelation,
+                                                                               
festate->program,
+                                                                               
true,
+                                                                               
NIL,
+                                                                               
festate->options);
+
 }
 
 /*
@@ -736,11 +791,19 @@ fileAnalyzeForeignTable(Relation relation,
                                                BlockNumber *totalpages)
 {
        char       *filename;
+       char       *program;
        List       *options;
        struct stat stat_buf;
 
        /* Fetch options of foreign table */
-       fileGetOptions(RelationGetRelid(relation), &filename, &options);
+       fileGetOptions(RelationGetRelid(relation), &filename, &program, 
&options);
+
+       /*
+        * If this is a program instead of a file, just return false to skip
+        * analyzing the table.
+        */
+       if (program)
+               return false;
 
        /*
         * Get size of the file.  (XXX if we fail here, would it be better to 
just
@@ -914,9 +977,11 @@ estimate_size(PlannerInfo *root, RelOptInfo *baserel,
 
        /*
         * Get size of the file.  It might not be there at plan time, though, in
-        * which case we have to use a default estimate.
+        * which case we have to use a default estimate.  We also have to fall
+        * back to the default if using a program as the input.
         */
-       if (stat(fdw_private->filename, &stat_buf) < 0)
+       if (fdw_private->filename == NULL ||
+               stat(fdw_private->filename, &stat_buf) < 0)
                stat_buf.st_size = 10 * BLCKSZ;
 
        /*
@@ -1034,6 +1099,7 @@ file_acquire_sample_rows(Relation onerel, int elevel,
        bool       *nulls;
        bool            found;
        char       *filename;
+       char       *program;
        List       *options;
        CopyState       cstate;
        ErrorContextCallback errcallback;
@@ -1048,12 +1114,15 @@ file_acquire_sample_rows(Relation onerel, int elevel,
        nulls = (bool *) palloc(tupDesc->natts * sizeof(bool));
 
        /* Fetch options of foreign table */
-       fileGetOptions(RelationGetRelid(onerel), &filename, &options);
+       fileGetOptions(RelationGetRelid(onerel), &filename, &program, &options);
 
        /*
         * Create CopyState from FDW options.
         */
-       cstate = BeginCopyFrom(onerel, filename, false, NIL, options);
+       if(filename)
+               cstate = BeginCopyFrom(onerel, filename, false, NIL, options);
+       else
+               cstate = BeginCopyFrom(onerel, program, true, NIL, options);
 
        /*
         * Use per-tuple memory context to prevent leak of memory used to read
diff --git a/contrib/file_fdw/input/file_fdw.source 
b/contrib/file_fdw/input/file_fdw.source
index 35db4af..207f8e2 100644
--- a/contrib/file_fdw/input/file_fdw.source
+++ b/contrib/file_fdw/input/file_fdw.source
@@ -97,6 +97,17 @@ SELECT * FROM text_csv;
 ALTER FOREIGN TABLE text_csv ALTER COLUMN word1 OPTIONS (force_null 'true');
 ALTER FOREIGN TABLE text_csv ALTER COLUMN word3 OPTIONS (force_not_null 
'true');
 
+-- program tests
+CREATE FOREIGN TABLE text_tsv_program (
+    word1 text OPTIONS (force_not_null 'true'),
+    word2 text OPTIONS (force_not_null 'off'),
+    word3 text OPTIONS (force_null 'true'),
+    word4 text OPTIONS (force_null 'off')
+) SERVER file_server
+OPTIONS (format 'csv', program 'sed -e ''s/,/\t/g'' 
@abs_srcdir@/data/text.csv', null 'NULL', delimiter e'\t');
+\pset null _null_
+SELECT * FROM text_tsv_program;
+
 -- force_not_null is not allowed to be specified at any foreign object level:
 ALTER FOREIGN DATA WRAPPER file_fdw OPTIONS (ADD force_not_null '*'); -- ERROR
 ALTER SERVER file_server OPTIONS (ADD force_not_null '*'); -- ERROR
diff --git a/doc/src/sgml/file-fdw.sgml b/doc/src/sgml/file-fdw.sgml
index d3b39aa..f36a3c5 100644
--- a/doc/src/sgml/file-fdw.sgml
+++ b/doc/src/sgml/file-fdw.sgml
@@ -27,7 +27,26 @@
 
    <listitem>
     <para>
-     Specifies the file to be read.  Required.  Must be an absolute path name.
+     Specifies the file to be read.  Must be an absolute path name.
+     Either <literal>filename</literal> or <literal>program</literal> must be
+     specified.  They are mutually exclusive.
+    </para>
+   </listitem>
+  </varlistentry>
+
+  <varlistentry>
+   <term><literal>program</literal></term>
+
+   <listitem>
+    <para>
+     Specifies the command to executed.
+     Note that the command is invoked by the shell, so if you need to pass any
+     arguments to shell command that come from an untrusted source, you must
+     be careful to strip or escape any special characters that might have a
+     special meaning for the shell. For security reasons, it is best to use a
+     fixed command string, or at least avoid passing any user input in it.
+     Either <literal>program</literal> or <literal>filename</literal> must be
+     specified.  They are mutually exclusive.
     </para>
    </listitem>
   </varlistentry>
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to