Hi there,

To enable file_fdw to estimate costs of scanning a CSV file more
accurately, I would like to propose a new FDW callback routine,
AnalyzeForeignTable, which allows to ANALYZE command to collect
statistics on a foreign table, and a corresponding file_fdw function,
fileAnalyzeForeignTable. Attached is my WIP patch.

Here's a summary of the implementation:

void AnalyzeForeignTable (Relation relation, VacuumStmt *vacstmt, int
elevel);
This is a new FDW callback routine to collect statistics on a foreign
table and store the results in the pg_class and pg_statistic system
catalogs. This is called when ANALYZE command is executed. (ANALYZE
command should be executed because autovacuum does not analyze foreign
tables.)

static void fileAnalyzeForeignTable(Relation relation, VacuumStmt
*vacstmt, int elevel);
This new file_fdw function collects and stores the same statistics on
CSV file data as collected on a local table except for index related
statistics by executing the sequential scan on the CSV file and
acquiring sample rows using Vitter's algorithm. (It is time-consuming
for a large file.)

estimate_costs() (more precisely, clauselist_selectivity() in
estimate_costs()) estimates baserel->rows using the statistics stored in
the pg_statistic system catalog. If there are no statistics,
estimate_costs() estimates it using the default statistics as in
PostgreSQL 9.1.

I am able to demonstrate the effectiveness of this patch. The following
run is performed on a single core of a 3.00GHz Intel Xeon CPU with 8GB
of RAM. Configuration settings are default except for work_mem = 256MB.
We can see from this result that the optimiser selects a good plan when
the foreign tables have been analyzed.

I appreciate your comments and suggestions.

[sample csv file data]
postgres=# COPY (SELECT s.a, repeat('a', 100) FROM generate_series(1,
5000000) AS s(a)) TO '/home/pgsql/sample_csv_data1.csv' (FORMAT csv,
DELIMITER ',');
COPY 5000000
postgres=# COPY (SELECT (random()*10000)::int, repeat('b', 100) FROM
generate_series(1, 5000000)) TO '/home/pgsql/sample_csv_data2.csv'
(FORMAT csv, DELIMITER ',');
COPY 5000000

[Unpatched]
postgres=# CREATE FOREIGN TABLE tab1 (aid INTEGER, msg text) SERVER
file_fs OPTIONS (filename '/home/pgsql/sample_csv_data1.csv', format
'csv', delimiter ',');
CREATE FOREIGN TABLE
postgres=# CREATE FOREIGN TABLE tab2 (aid INTEGER, msg text) SERVER
file_fs OPTIONS (filename '/home/pgsql/sample_csv_data2.csv', format
'csv', delimiter ',');
CREATE FOREIGN TABLE
postgres=# SELECT count(*) FROM tab1;
  count
---------
 5000000
(1 row)

postgres=# SELECT count(*) FROM tab2;
  count
---------
 5000000
(1 row)

postgres=# EXPLAIN ANALYZE SELECT count(*) FROM tab1, tab2 WHERE
tab1.aid >= 0 AND tab1.aid <= 10000 AND tab1.aid = tab2.aid;
                                                                   QUERY
PLAN

---------------------------------------------------------------------------------------------------------------------------------------------

---
 Aggregate  (cost=128859182.29..128859182.30 rows=1 width=0) (actual
time=27321.304..27321.304 rows=1 loops=1)
   ->  Merge Join  (cost=5787102.68..111283426.33 rows=7030302383
width=0) (actual time=22181.428..26736.194 rows=4999745 loops=1)
         Merge Cond: (tab1.aid = tab2.aid)
         ->  Sort  (cost=1857986.37..1858198.83 rows=84983 width=4)
(actual time=5964.282..5965.958 rows=10000 loops=1)
               Sort Key: tab1.aid
               Sort Method: quicksort  Memory: 853kB
               ->  Foreign Scan on tab1  (cost=0.00..1851028.44
rows=84983 width=4) (actual time=0.071..5962.382 rows=10000 loops=1)
                     Filter: ((aid >= 0) AND (aid <= 10000))
                     Foreign File: /home/pgsql/sample_csv_data1.csv
                     Foreign File Size: 543888896
         ->  Materialize  (cost=3929116.30..4011842.29 rows=16545197
width=4) (actual time=16216.953..19550.846 rows=5000000 loops=1)
               ->  Sort  (cost=3929116.30..3970479.30 rows=16545197
width=4) (actual time=16216.947..18418.684 rows=5000000 loops=1)
                     Sort Key: tab2.aid
                     Sort Method: external merge  Disk: 68424kB
                     ->  Foreign Scan on tab2  (cost=0.00..1719149.70
rows=16545197 width=4) (actual time=0.081..6059.630 rows=5000000 loops=1)
                           Foreign File: /home/pgsql/sample_csv_data2.csv
                           Foreign File Size: 529446313
 Total runtime: 27350.673 ms
(18 rows)

[Patched]
postgres=# CREATE FOREIGN TABLE tab1 (aid INTEGER, msg text) SERVER
file_fs OPTIONS (filename '/home/pgsql/sample_csv_data1.csv', format
'csv', delimiter ',');
CREATE FOREIGN TABLE
postgres=# CREATE FOREIGN TABLE tab2 (aid INTEGER, msg text) SERVER
file_fs OPTIONS (filename '/home/pgsql/sample_csv_data2.csv', format
'csv', delimiter ',');
CREATE FOREIGN TABLE
postgres=# ANALYZE VERBOSE tab1;
INFO:  analyzing "public.tab1"
INFO:  "tab1": scanned, containing 5000000 rows; 30000 rows in sample
ANALYZE
postgres=# ANALYZE VERBOSE tab2;
INFO:  analyzing "public.tab2"
INFO:  "tab2": scanned, containing 5000000 rows; 30000 rows in sample
ANALYZE
postgres=# EXPLAIN ANALYZE SELECT count(*) FROM tab1, tab2 WHERE
tab1.aid >= 0 AND tab1.aid <= 10000 AND tab1.aid = tab2.aid;
                                                            QUERY PLAN

-----------------------------------------------------------------------------------------------------------------------------------
 Aggregate  (cost=1282725.25..1282725.26 rows=1 width=0) (actual
time=15114.325..15114.325 rows=1 loops=1)
   ->  Hash Join  (cost=591508.50..1271157.90 rows=4626940 width=0)
(actual time=5964.449..14526.822 rows=4999745 loops=1)
         Hash Cond: (tab2.aid = tab1.aid)
         ->  Foreign Scan on tab2  (cost=0.00..564630.00 rows=5000000
width=4) (actual time=0.070..6253.257 rows=5000000 loops=1)
               Foreign File: /home/pgsql/sample_csv_data2.csv
               Foreign File Size: 529446313
         ->  Hash  (cost=591393.00..591393.00 rows=9240 width=4) (actual
time=5964.346..5964.346 rows=10000 loops=1)
               Buckets: 1024  Batches: 1  Memory Usage: 352kB
               ->  Foreign Scan on tab1  (cost=0.00..591393.00 rows=9240
width=4) (actual time=0.066..5962.222 rows=10000 loops=1)
                     Filter: ((aid >= 0) AND (aid <= 10000))
                     Foreign File: /home/pgsql/sample_csv_data1.csv
                     Foreign File Size: 543888896
 Total runtime: 15114.480 ms
(13 rows)

Best regards,
Etsuro Fujita
diff -crNB original/postgresql-9.1beta1/contrib/file_fdw/file_fdw.c 
changed/postgresql-9.1beta1/contrib/file_fdw/file_fdw.c
*** original/postgresql-9.1beta1/contrib/file_fdw/file_fdw.c    2011-04-28 
06:17:22.000000000 +0900
--- changed/postgresql-9.1beta1/contrib/file_fdw/file_fdw.c     2011-09-12 
15:19:28.000000000 +0900
***************
*** 15,29 ****
--- 15,41 ----
  #include <sys/stat.h>
  #include <unistd.h>
  
+ #include "access/htup.h"
  #include "access/reloptions.h"
+ #include "access/transam.h"
  #include "catalog/pg_foreign_table.h"
  #include "commands/copy.h"
+ #include "commands/dbcommands.h"
  #include "commands/defrem.h"
  #include "commands/explain.h"
+ #include "commands/vacuum.h"
  #include "foreign/fdwapi.h"
  #include "foreign/foreign.h"
  #include "miscadmin.h"
  #include "optimizer/cost.h"
+ #include "optimizer/plancat.h"
+ #include "pgstat.h"
+ #include "parser/parse_relation.h"
+ #include "utils/attoptcache.h"
+ #include "utils/guc.h"
+ #include "utils/lsyscache.h"
+ #include "utils/memutils.h"
+ /* #include "utils/pg_rusage.h" */
  
  PG_MODULE_MAGIC;
  
***************
*** 101,106 ****
--- 113,119 ----
  static TupleTableSlot *fileIterateForeignScan(ForeignScanState *node);
  static void fileReScanForeignScan(ForeignScanState *node);
  static void fileEndForeignScan(ForeignScanState *node);
+ static void fileAnalyzeForeignTable(Relation relation, VacuumStmt *vacstmt, 
int elevel);
  
  /*
   * Helper functions
***************
*** 111,117 ****
  static void estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
                           const char *filename,
                           Cost *startup_cost, Cost *total_cost);
! 
  
  /*
   * Foreign-data wrapper handler function: return a struct with pointers
--- 124,131 ----
  static void estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
                           const char *filename,
                           Cost *startup_cost, Cost *total_cost);
! static void file_do_analyze_rel(Relation relation, VacuumStmt *vacstmt, int 
elevel, const char *filename, CopyState cstate);
! static int  file_acquire_sample_rows(Relation onerel, int elevel, CopyState 
cstate, HeapTuple *rows, int targrows, double *totalrows);
  
  /*
   * Foreign-data wrapper handler function: return a struct with pointers
***************
*** 128,133 ****
--- 142,148 ----
        fdwroutine->IterateForeignScan = fileIterateForeignScan;
        fdwroutine->ReScanForeignScan = fileReScanForeignScan;
        fdwroutine->EndForeignScan = fileEndForeignScan;
+       fdwroutine->AnalyzeForeignTable = fileAnalyzeForeignTable;
  
        PG_RETURN_POINTER(fdwroutine);
  }
***************
*** 464,469 ****
--- 479,509 ----
  }
  
  /*
+  * fileAnalyzeForeignTable
+  *            Analyze table
+  */
+ static void
+ fileAnalyzeForeignTable(Relation relation, VacuumStmt *vacstmt, int elevel)
+ {
+       char       *filename;
+       List       *options;
+       CopyState       cstate;
+ 
+       /* Fetch options of foreign table */
+       fileGetOptions(RelationGetRelid(relation), &filename, &options);
+ 
+       /*
+        * Create CopyState from FDW options.  We always acquire all columns, so
+        * as to match the expected ScanTupleSlot signature.
+        */
+       cstate = BeginCopyFrom(relation, filename, NIL, options);
+ 
+       file_do_analyze_rel(relation, vacstmt, elevel, filename, cstate);
+ 
+       EndCopyFrom(cstate);
+ }
+ 
+ /*
   * Estimate costs of scanning a foreign table.
   */
  static void
***************
*** 473,479 ****
  {
        struct stat stat_buf;
        BlockNumber pages;
!       int                     tuple_width;
        double          ntuples;
        double          nrows;
        Cost            run_cost = 0;
--- 513,520 ----
  {
        struct stat stat_buf;
        BlockNumber pages;
!       BlockNumber     relpages;
!       double          reltuples;
        double          ntuples;
        double          nrows;
        Cost            run_cost = 0;
***************
*** 493,508 ****
        if (pages < 1)
                pages = 1;
  
!       /*
!        * Estimate the number of tuples in the file.  We back into this 
estimate
!        * using the planner's idea of the relation width; which is bogus if not
!        * all columns are being read, not to mention that the text 
representation
!        * of a row probably isn't the same size as its internal representation.
!        * FIXME later.
!        */
!       tuple_width = MAXALIGN(baserel->width) + 
MAXALIGN(sizeof(HeapTupleHeaderData));
  
!       ntuples = clamp_row_est((double) stat_buf.st_size / (double) 
tuple_width);
  
        /*
         * Now estimate the number of rows returned by the scan after applying 
the
--- 534,565 ----
        if (pages < 1)
                pages = 1;
  
!       relpages = baserel->pages;
!       reltuples = baserel->tuples;
! 
!       if (relpages > 0)
!       {
!               double          density;
! 
!               density = reltuples / (double) relpages;
! 
!               ntuples = clamp_row_est(density * (double) pages);
!       }
!       else
!       {
!               int             tuple_width;
  
!               /*
!                * Estimate the number of tuples in the file.  We back into 
this estimate
!                * using the planner's idea of the relation width; which is 
bogus if not
!                * all columns are being read, not to mention that the text 
representation
!                * of a row probably isn't the same size as its internal 
representation.
!                * FIXME later.
!                */
!               tuple_width = MAXALIGN(baserel->width) + 
MAXALIGN(sizeof(HeapTupleHeaderData));
! 
!               ntuples = clamp_row_est((double) stat_buf.st_size / (double) 
tuple_width);
!       }
  
        /*
         * Now estimate the number of rows returned by the scan after applying 
the
***************
*** 534,536 ****
--- 591,960 ----
        run_cost += cpu_per_tuple * ntuples;
        *total_cost = *startup_cost + run_cost;
  }
+ 
+ /*
+  * file_do_analyze_rel() -- analyze one foreign table
+  */
+ static void
+ file_do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, int elevel, const 
char *filename, CopyState cstate)
+ {
+       int                     i,
+                               attr_cnt,
+                               tcnt,
+                               numrows = 0,
+                               targrows;
+       double          totalrows = 0;
+       HeapTuple  *rows;
+       struct stat     stat_buf;
+       BlockNumber     pages;
+       VacAttrStats **vacattrstats;
+       MemoryContext caller_context;
+       MemoryContext anl_context;
+ 
+       ereport(elevel,
+                       (errmsg("analyzing \"%s.%s\"",
+                                       
get_namespace_name(RelationGetNamespace(onerel)),
+                                       RelationGetRelationName(onerel))));
+ 
+       /*
+        * Set up a working context so that we can easily free whatever junk 
gets
+        * created.
+        */
+       anl_context = AllocSetContextCreate(CurrentMemoryContext,
+                                                                               
"Analyze",
+                                                                               
ALLOCSET_DEFAULT_MINSIZE,
+                                                                               
ALLOCSET_DEFAULT_INITSIZE,
+                                                                               
ALLOCSET_DEFAULT_MAXSIZE);
+       caller_context = MemoryContextSwitchTo(anl_context);
+ 
+       /*
+        * Switch to the table owner's userid, so that any index functions are 
run
+        * as that user.  Also lock down security-restricted operations and
+        * arrange to make GUC variable changes local to this command.
+        */
+       /*
+       GetUserIdAndSecContext(&save_userid, &save_sec_context);
+       SetUserIdAndSecContext(onerel->rd_rel->relowner,
+                                                  save_sec_context | 
SECURITY_RESTRICTED_OPERATION);
+       save_nestlevel = NewGUCNestLevel();
+       */
+ 
+       /*
+        * Determine which columns to analyze
+        *
+        * Note that system attributes are never analyzed.
+        */
+       if (vacstmt->va_cols != NIL)
+       {
+               ListCell           *le;
+ 
+               vacattrstats = (VacAttrStats **) 
palloc(list_length(vacstmt->va_cols) *
+                                                                               
                sizeof(VacAttrStats *));
+               tcnt = 0;
+               foreach(le, vacstmt->va_cols)
+               {
+                       char       *col = strVal(lfirst(le));
+ 
+                       i = attnameAttNum(onerel, col, false);
+                       if (i == InvalidAttrNumber)
+                               ereport(ERROR,
+                                               
(errcode(ERRCODE_UNDEFINED_COLUMN),
+                                                errmsg("column \"%s\" of 
relation \"%s\" does not exist",
+                                                               col, 
RelationGetRelationName(onerel))));
+                       vacattrstats[tcnt] = examine_attribute(onerel, i, NULL, 
anl_context);
+                       if (vacattrstats[tcnt] != NULL)
+                               tcnt++;
+               }
+               attr_cnt = tcnt;
+       }
+       else
+       {
+               attr_cnt = onerel->rd_att->natts;
+               vacattrstats = (VacAttrStats **) palloc(attr_cnt * 
sizeof(VacAttrStats *));
+               tcnt = 0;
+               for (i = 1; i <= attr_cnt; i++)
+               {
+                       vacattrstats[tcnt] = examine_attribute(onerel, i, NULL, 
anl_context);
+                       if (vacattrstats[tcnt] != NULL)
+                               tcnt++;
+               }
+               attr_cnt = tcnt;
+       }
+ 
+       /*
+        * Quit if no analyzable columns.
+        */
+       if (attr_cnt <= 0)
+               goto cleanup;
+ 
+       /*
+        * Determine how many rows we need to sample, using the worst case from
+        * all analyzable columns.      We use a lower bound of 100 rows to 
avoid
+        * possible overflow in Vitter's algorithm.
+        */
+       targrows = 100;
+       for (i = 0; i < attr_cnt; i++)
+       {
+               if (targrows < vacattrstats[i]->minrows)
+                       targrows = vacattrstats[i]->minrows;
+       }
+ 
+       /*
+        * Acquire the sample rows
+        */
+       rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple));
+       numrows = file_acquire_sample_rows(onerel, elevel, cstate, rows, 
targrows, &totalrows);
+ 
+       /*
+        * Compute the statistics.      Temporary results during the 
calculations for
+        * each column are stored in a child context.  The calc routines are
+        * responsible to make sure that whatever they store into the 
VacAttrStats
+        * structure is allocated in anl_context.
+        */
+       if (numrows > 0)
+       {
+               MemoryContext col_context, old_context;
+ 
+               col_context = AllocSetContextCreate(anl_context,
+                                                                               
        "Analyze Column",
+                                                                               
        ALLOCSET_DEFAULT_MINSIZE,
+                                                                               
        ALLOCSET_DEFAULT_INITSIZE,
+                                                                               
        ALLOCSET_DEFAULT_MAXSIZE);
+               old_context = MemoryContextSwitchTo(col_context);
+ 
+               for (i = 0; i < attr_cnt; i++)
+               {
+                       VacAttrStats       *stats = vacattrstats[i];
+                       AttributeOpts      *aopt = 
get_attribute_options(onerel->rd_id, stats->attr->attnum);
+ 
+                       stats->rows = rows;
+                       stats->tupDesc = onerel->rd_att;
+                       (*stats->compute_stats) (stats,
+                                                                        
std_fetch_func,
+                                                                        
numrows,
+                                                                        
totalrows);
+ 
+                       /*
+                        * If the appropriate flavor of the n_distinct option is
+                        * specified, override with the corresponding value.
+                        */
+                       if (aopt != NULL)
+                       {
+                               float8          n_distinct = aopt->n_distinct;
+ 
+                               if (n_distinct != 0.0)
+                                       stats->stadistinct = n_distinct;
+                       }
+ 
+                       MemoryContextResetAndDeleteChildren(col_context);
+               }
+ 
+               MemoryContextSwitchTo(old_context);
+               MemoryContextDelete(col_context);
+ 
+               /*
+                * Emit the completed stats rows into pg_statistic, replacing 
any
+                * previous statistics for the target columns.  (If there are 
stats in
+                * pg_statistic for columns we didn't process, we leave them 
alone.)
+                */
+               update_attstats(onerel->rd_id, false, attr_cnt, vacattrstats);
+       }
+ 
+       /*
+        * Get size of the file.  It might not be there at plan time, though, in
+        * which case we have to use a default estimate.
+        */
+       if (stat(filename, &stat_buf) < 0)
+               stat_buf.st_size = 10 * BLCKSZ;
+ 
+       /*
+        * Convert size to pages for use in I/O cost estimate below.
+        */
+       pages = (stat_buf.st_size + (BLCKSZ - 1)) / BLCKSZ;
+       if (pages < 1)
+               pages = 1;
+ 
+       /*
+        * Update pages/tuples stats in pg_class.
+        */
+       vac_update_relstats(onerel, pages, totalrows, false, 
InvalidTransactionId);
+ 
+       /*
+        * Report ANALYZE to the stats collector, too; likewise, tell it to 
adopt
+        * these numbers only if we're not inside a VACUUM that got a better
+        * number.      However, a call with inh = true shouldn't reset the 
stats.
+        */
+       pgstat_report_analyze(onerel, true, totalrows, 0);
+ 
+       /* We skip to here if there were no analyzable columns */
+ cleanup:
+ 
+       /* Restore current context and release memory */
+       MemoryContextSwitchTo(caller_context);
+       MemoryContextDelete(anl_context);
+       anl_context = NULL;
+ }
+ 
+ /*
+  * file_acquire_sample_rows -- acquire a random sample of rows from the table
+  *
+  * Selected rows are returned in the caller-allocated array rows[], which
+  * must have at least targrows entries.
+  * The actual number of rows selected is returned as the function result.
+  * We also count the number of rows in the table, and return it into 
*totalrows.
+  *
+  * The returned list of tuples is in order by physical position in the table.
+  * (We will rely on this later to derive correlation estimates.)
+  */
+ static int
+ file_acquire_sample_rows(Relation onerel, int elevel, CopyState cstate, 
HeapTuple *rows, int targrows, double *totalrows)
+ {
+       int                     numrows = 0;
+       double          samplerows = 0;  /* total # rows collected */
+       double          rowstoskip = -1; /* -1 means not set yet */
+       double          rstate;
+       HeapTuple       tuple;
+       TupleDesc       tupDesc;
+       TupleConstr *constr;
+       int                     natts;
+       int                     attrChk;
+       Datum           *values;
+       bool            *nulls;
+       bool            found;
+       bool            sample_it = false;
+       BlockNumber     blknum;
+       OffsetNumber offnum;
+       ErrorContextCallback errcontext;
+ 
+       Assert(onerel);
+       Assert(targrows > 0);
+ 
+       tupDesc = RelationGetDescr(onerel);
+       constr = tupDesc->constr;
+       natts = tupDesc->natts;
+       values = (Datum *) palloc(tupDesc->natts * sizeof(Datum));
+       nulls = (bool *) palloc(tupDesc->natts * sizeof(bool));
+ 
+       /* Prepare for sampling rows */
+       rstate = init_selection_state(targrows);
+ 
+       for (;;)
+       {
+               sample_it = true;
+ 
+               /* Set up callback to identify error line number. */
+               errcontext.callback = CopyFromErrorCallback;
+               errcontext.arg = (void *) cstate;
+               errcontext.previous = error_context_stack;
+               error_context_stack = &errcontext;
+ 
+               found = NextCopyFrom(cstate, NULL, values, nulls, NULL);
+ 
+               /* Remove error callback. */
+               error_context_stack = errcontext.previous;
+ 
+               if (!found)
+                       break;
+ 
+               tuple = heap_form_tuple(tupDesc, values, nulls);
+ 
+               if (constr && constr->has_not_null)
+               {
+                       for (attrChk = 1; attrChk <= natts; attrChk++)
+                       {
+                               if (onerel->rd_att->attrs[attrChk - 
1]->attnotnull &&
+                                       heap_attisnull(tuple, attrChk))
+                               {
+                                       sample_it = false;
+                                       break;
+                               }
+                       }
+               }
+ 
+               if (!sample_it)
+               {
+                       heap_freetuple(tuple);
+                       continue;
+               }
+ 
+               /*
+                * The first targrows sample rows are simply copied into the
+                * reservoir. Then we start replacing tuples in the sample
+                * until we reach the end of the relation.      This algorithm 
is
+                * from Jeff Vitter's paper (see full citation below). It
+                * works by repeatedly computing the number of tuples to skip
+                * before selecting a tuple, which replaces a randomly chosen
+                * element of the reservoir (current set of tuples).  At all
+                * times the reservoir is a true random sample of the tuples
+                * we've passed over so far, so when we fall off the end of
+                * the relation we're done.
+                */
+               if (numrows < targrows)
+               {
+                       blknum = (BlockNumber) samplerows / MaxOffsetNumber;
+                       offnum = (OffsetNumber) samplerows % MaxOffsetNumber + 
1;
+                       ItemPointerSet(&tuple->t_self, blknum, offnum);
+                       rows[numrows++] = heap_copytuple(tuple);
+               }
+               else
+               {
+                       /*
+                        * t in Vitter's paper is the number of records already
+                        * processed.  If we need to compute a new S value, we
+                        * must use the not-yet-incremented value of samplerows 
as
+                        * t.
+                        */
+                       if (rowstoskip < 0)
+                               rowstoskip = get_next_S(samplerows, targrows, 
&rstate);
+ 
+                       if (rowstoskip <= 0)
+                       {
+                               /*
+                                * Found a suitable tuple, so save it, 
replacing one
+                                * old tuple at random
+                                */
+                               int k = (int) (targrows * random_fract());
+ 
+                               Assert(k >= 0 && k < targrows);
+                               heap_freetuple(rows[k]);
+ 
+                               blknum = (BlockNumber) samplerows / 
MaxOffsetNumber;
+                               offnum = (OffsetNumber) samplerows % 
MaxOffsetNumber + 1;
+                               ItemPointerSet(&tuple->t_self, blknum, offnum);
+                               rows[k] = heap_copytuple(tuple);
+                       }
+ 
+                       rowstoskip -= 1;
+               }
+ 
+               samplerows += 1;
+               heap_freetuple(tuple);
+       }
+ 
+       /*
+        * If we didn't find as many tuples as we wanted then we're done. No 
sort
+        * is needed, since they're already in order.
+        *
+        * Otherwise we need to sort the collected tuples by position
+        * (itempointer). It's not worth worrying about corner cases where the
+        * tuples are already sorted.
+        */
+       if (numrows == targrows)
+               qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows);
+ 
+       *totalrows = samplerows;
+ 
+       pfree(values);
+       pfree(nulls);
+ 
+       /*
+        * Emit some interesting relation info
+        */
+       ereport(elevel,
+                       (errmsg("\"%s\": scanned, "
+                                       "containing %d rows; "
+                                       "%d rows in sample",
+                                       RelationGetRelationName(onerel), (int) 
samplerows, numrows)));
+ 
+       return numrows;
+ }
diff -crNB original/postgresql-9.1beta1/contrib/file_fdw/input/file_fdw.source 
changed/postgresql-9.1beta1/contrib/file_fdw/input/file_fdw.source
*** original/postgresql-9.1beta1/contrib/file_fdw/input/file_fdw.source 
2011-04-28 06:17:22.000000000 +0900
--- changed/postgresql-9.1beta1/contrib/file_fdw/input/file_fdw.source  
2011-09-04 19:29:23.000000000 +0900
***************
*** 94,99 ****
--- 94,104 ----
  EXECUTE st(100);
  DEALLOCATE st;
  
+ -- statistics collection tests
+ ANALYZE agg_csv;
+ SELECT relpages, reltuples FROM pg_class WHERE relname = 'agg_csv';
+ SELECT * FROM pg_stats WHERE tablename = 'agg_csv';
+ 
  -- tableoid
  SELECT tableoid::regclass, b FROM agg_csv;
  
diff -crNB original/postgresql-9.1beta1/contrib/file_fdw/output/file_fdw.source 
changed/postgresql-9.1beta1/contrib/file_fdw/output/file_fdw.source
*** original/postgresql-9.1beta1/contrib/file_fdw/output/file_fdw.source        
2011-04-28 06:17:22.000000000 +0900
--- changed/postgresql-9.1beta1/contrib/file_fdw/output/file_fdw.source 
2011-09-04 19:31:15.000000000 +0900
***************
*** 141,146 ****
--- 141,161 ----
  (1 row)
  
  DEALLOCATE st;
+ -- statistics collection tests
+ ANALYZE agg_csv;
+ SELECT relpages, reltuples FROM pg_class WHERE relname = 'agg_csv';
+  relpages | reltuples 
+ ----------+-----------
+         1 |         3
+ (1 row)
+ 
+ SELECT * FROM pg_stats WHERE tablename = 'agg_csv';
+  schemaname | tablename | attname | inherited | null_frac | avg_width | 
n_distinct | most_common_vals | most_common_freqs |    histogram_bounds     | 
correlation 
+ 
------------+-----------+---------+-----------+-----------+-----------+------------+------------------+-------------------+-------------------------+-------------
+  public     | agg_csv   | a       | f         |         0 |         2 |       
  -1 |                  |                   | {0,42,100}              |        
-0.5
+  public     | agg_csv   | b       | f         |         0 |         4 |       
  -1 |                  |                   | {0.09561,99.097,324.78} |         
0.5
+ (2 rows)
+ 
  -- tableoid
  SELECT tableoid::regclass, b FROM agg_csv;
   tableoid |    b    
diff -crNB original/postgresql-9.1beta1/src/backend/commands/analyze.c 
changed/postgresql-9.1beta1/src/backend/commands/analyze.c
*** original/postgresql-9.1beta1/src/backend/commands/analyze.c 2011-04-28 
06:17:22.000000000 +0900
--- changed/postgresql-9.1beta1/src/backend/commands/analyze.c  2011-09-12 
13:21:04.000000000 +0900
***************
*** 24,35 ****
--- 24,38 ----
  #include "catalog/index.h"
  #include "catalog/indexing.h"
  #include "catalog/namespace.h"
+ #include "catalog/pg_class.h"
  #include "catalog/pg_collation.h"
  #include "catalog/pg_inherits_fn.h"
  #include "catalog/pg_namespace.h"
  #include "commands/dbcommands.h"
  #include "commands/vacuum.h"
  #include "executor/executor.h"
+ #include "foreign/foreign.h"
+ #include "foreign/fdwapi.h"
  #include "miscadmin.h"
  #include "nodes/nodeFuncs.h"
  #include "parser/parse_oper.h"
***************
*** 94,114 ****
                                        AnlIndexData *indexdata, int nindexes,
                                        HeapTuple *rows, int numrows,
                                        MemoryContext col_context);
! static VacAttrStats *examine_attribute(Relation onerel, int attnum,
!                                 Node *index_expr);
  static int acquire_sample_rows(Relation onerel, HeapTuple *rows,
                                        int targrows, double *totalrows, double 
*totaldeadrows);
! static double random_fract(void);
! static double init_selection_state(int n);
! static double get_next_S(double t, int n, double *stateptr);
! static int    compare_rows(const void *a, const void *b);
  static int acquire_inherited_sample_rows(Relation onerel,
                                                          HeapTuple *rows, int 
targrows,
                                                          double *totalrows, 
double *totaldeadrows);
! static void update_attstats(Oid relid, bool inh,
!                               int natts, VacAttrStats **vacattrstats);
! static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
! static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
  
  static bool std_typanalyze(VacAttrStats *stats);
  
--- 97,117 ----
                                        AnlIndexData *indexdata, int nindexes,
                                        HeapTuple *rows, int numrows,
                                        MemoryContext col_context);
! /* static VacAttrStats *examine_attribute(Relation onerel, int attnum, */
! /*                              Node *index_expr); */
  static int acquire_sample_rows(Relation onerel, HeapTuple *rows,
                                        int targrows, double *totalrows, double 
*totaldeadrows);
! /* static double random_fract(void); */
! /* static double init_selection_state(int n); */
! /* static double get_next_S(double t, int n, double *stateptr); */
! /* static int compare_rows(const void *a, const void *b); */
  static int acquire_inherited_sample_rows(Relation onerel,
                                                          HeapTuple *rows, int 
targrows,
                                                          double *totalrows, 
double *totaldeadrows);
! /* static void update_attstats(Oid relid, bool inh, */
! /*                            int natts, VacAttrStats **vacattrstats); */
! /* static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool 
*isNull); */
! /* static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool 
*isNull); */
  
  static bool std_typanalyze(VacAttrStats *stats);
  
***************
*** 129,134 ****
--- 132,138 ----
                        BufferAccessStrategy bstrategy, bool update_reltuples)
  {
        Relation        onerel;
+       MemoryContext caller_context;
  
        /* Set up static variables */
        if (vacstmt->options & VACOPT_VERBOSE)
***************
*** 196,202 ****
         * Check that it's a plain table; we used to do this in get_rel_oids() 
but
         * seems safer to check after we've locked the relation.
         */
!       if (onerel->rd_rel->relkind != RELKIND_RELATION)
        {
                /* No need for a WARNING if we already complained during VACUUM 
*/
                if (!(vacstmt->options & VACOPT_VACUUM))
--- 200,207 ----
         * Check that it's a plain table; we used to do this in get_rel_oids() 
but
         * seems safer to check after we've locked the relation.
         */
!       if (!(onerel->rd_rel->relkind == RELKIND_RELATION ||
!                 onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE))
        {
                /* No need for a WARNING if we already complained during VACUUM 
*/
                if (!(vacstmt->options & VACOPT_VACUUM))
***************
*** 238,250 ****
        /*
         * Do the normal non-recursive ANALYZE.
         */
!       do_analyze_rel(onerel, vacstmt, update_reltuples, false);
  
!       /*
!        * If there are child tables, do recursive ANALYZE.
!        */
!       if (onerel->rd_rel->relhassubclass)
!               do_analyze_rel(onerel, vacstmt, false, true);
  
        /*
         * Close source relation now, but keep lock so that no one deletes it
--- 243,272 ----
        /*
         * Do the normal non-recursive ANALYZE.
         */
!       if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
!       {
!               ForeignDataWrapper      *wrapper; 
!               ForeignServer           *server;
!               ForeignTable            *table;
!               FdwRoutine                      *fdwroutine;
! 
!               table = GetForeignTable(RelationGetRelid(onerel));
!               server = GetForeignServer(table->serverid);
!               wrapper = GetForeignDataWrapper(server->fdwid);
!               fdwroutine = GetFdwRoutine(wrapper->fdwhandler);
  
!               fdwroutine->AnalyzeForeignTable(onerel, vacstmt, elevel);
!       }
!       else
!       {
!               do_analyze_rel(onerel, vacstmt, update_reltuples, false);
! 
!               /*
!                * If there are child tables, do recursive ANALYZE.
!                */
!               if (onerel->rd_rel->relhassubclass)
!                       do_analyze_rel(onerel, vacstmt, false, true);
!       }
  
        /*
         * Close source relation now, but keep lock so that no one deletes it
***************
*** 354,360 ****
                                                
(errcode(ERRCODE_UNDEFINED_COLUMN),
                                        errmsg("column \"%s\" of relation 
\"%s\" does not exist",
                                                   col, 
RelationGetRelationName(onerel))));
!                       vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
                        if (vacattrstats[tcnt] != NULL)
                                tcnt++;
                }
--- 376,382 ----
                                                
(errcode(ERRCODE_UNDEFINED_COLUMN),
                                        errmsg("column \"%s\" of relation 
\"%s\" does not exist",
                                                   col, 
RelationGetRelationName(onerel))));
!                       vacattrstats[tcnt] = examine_attribute(onerel, i, NULL, 
anl_context);
                        if (vacattrstats[tcnt] != NULL)
                                tcnt++;
                }
***************
*** 368,374 ****
                tcnt = 0;
                for (i = 1; i <= attr_cnt; i++)
                {
!                       vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
                        if (vacattrstats[tcnt] != NULL)
                                tcnt++;
                }
--- 390,396 ----
                tcnt = 0;
                for (i = 1; i <= attr_cnt; i++)
                {
!                       vacattrstats[tcnt] = examine_attribute(onerel, i, NULL, 
anl_context);
                        if (vacattrstats[tcnt] != NULL)
                                tcnt++;
                }
***************
*** 423,429 ****
                                                indexkey = (Node *) 
lfirst(indexpr_item);
                                                indexpr_item = 
lnext(indexpr_item);
                                                thisdata->vacattrstats[tcnt] =
!                                                       
examine_attribute(Irel[ind], i + 1, indexkey);
                                                if 
(thisdata->vacattrstats[tcnt] != NULL)
                                                {
                                                        tcnt++;
--- 445,451 ----
                                                indexkey = (Node *) 
lfirst(indexpr_item);
                                                indexpr_item = 
lnext(indexpr_item);
                                                thisdata->vacattrstats[tcnt] =
!                                                       
examine_attribute(Irel[ind], i + 1, indexkey, anl_context);
                                                if 
(thisdata->vacattrstats[tcnt] != NULL)
                                                {
                                                        tcnt++;
***************
*** 825,832 ****
   * If index_expr isn't NULL, then we're trying to analyze an expression index,
   * and index_expr is the expression tree representing the column's data.
   */
! static VacAttrStats *
! examine_attribute(Relation onerel, int attnum, Node *index_expr)
  {
        Form_pg_attribute attr = onerel->rd_att->attrs[attnum - 1];
        HeapTuple       typtuple;
--- 847,854 ----
   * If index_expr isn't NULL, then we're trying to analyze an expression index,
   * and index_expr is the expression tree representing the column's data.
   */
! VacAttrStats *
! examine_attribute(Relation onerel, int attnum, Node *index_expr, 
MemoryContext anl_context)
  {
        Form_pg_attribute attr = onerel->rd_att->attrs[attnum - 1];
        HeapTuple       typtuple;
***************
*** 1272,1278 ****
  }
  
  /* Select a random value R uniformly distributed in (0 - 1) */
! static double
  random_fract(void)
  {
        return ((double) random() + 1) / ((double) MAX_RANDOM_VALUE + 2);
--- 1294,1300 ----
  }
  
  /* Select a random value R uniformly distributed in (0 - 1) */
! double
  random_fract(void)
  {
        return ((double) random() + 1) / ((double) MAX_RANDOM_VALUE + 2);
***************
*** 1292,1305 ****
   * determines the number of records to skip before the next record is
   * processed.
   */
! static double
  init_selection_state(int n)
  {
        /* Initial value of W (for use when Algorithm Z is first applied) */
        return exp(-log(random_fract()) / n);
  }
  
! static double
  get_next_S(double t, int n, double *stateptr)
  {
        double          S;
--- 1314,1327 ----
   * determines the number of records to skip before the next record is
   * processed.
   */
! double
  init_selection_state(int n)
  {
        /* Initial value of W (for use when Algorithm Z is first applied) */
        return exp(-log(random_fract()) / n);
  }
  
! double
  get_next_S(double t, int n, double *stateptr)
  {
        double          S;
***************
*** 1384,1390 ****
  /*
   * qsort comparator for sorting rows[] array
   */
! static int
  compare_rows(const void *a, const void *b)
  {
        HeapTuple       ha = *(HeapTuple *) a;
--- 1406,1412 ----
  /*
   * qsort comparator for sorting rows[] array
   */
! int
  compare_rows(const void *a, const void *b)
  {
        HeapTuple       ha = *(HeapTuple *) a;
***************
*** 1578,1584 ****
   *            ANALYZE the same table concurrently.  Presently, we lock that 
out
   *            by taking a self-exclusive lock on the relation in 
analyze_rel().
   */
! static void
  update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats)
  {
        Relation        sd;
--- 1600,1606 ----
   *            ANALYZE the same table concurrently.  Presently, we lock that 
out
   *            by taking a self-exclusive lock on the relation in 
analyze_rel().
   */
! void
  update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats)
  {
        Relation        sd;
***************
*** 1712,1718 ****
   * This exists to provide some insulation between compute_stats routines
   * and the actual storage of the sample data.
   */
! static Datum
  std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
  {
        int                     attnum = stats->tupattnum;
--- 1734,1740 ----
   * This exists to provide some insulation between compute_stats routines
   * and the actual storage of the sample data.
   */
! Datum
  std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
  {
        int                     attnum = stats->tupattnum;
***************
*** 1728,1734 ****
   * We have not bothered to construct index tuples, instead the data is
   * just in Datum arrays.
   */
! static Datum
  ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
  {
        int                     i;
--- 1750,1756 ----
   * We have not bothered to construct index tuples, instead the data is
   * just in Datum arrays.
   */
! Datum
  ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
  {
        int                     i;
diff -crNB original/postgresql-9.1beta1/src/include/commands/vacuum.h 
changed/postgresql-9.1beta1/src/include/commands/vacuum.h
*** original/postgresql-9.1beta1/src/include/commands/vacuum.h  2011-04-28 
06:17:22.000000000 +0900
--- changed/postgresql-9.1beta1/src/include/commands/vacuum.h   2011-09-12 
15:03:51.000000000 +0900
***************
*** 130,137 ****
  
  
  /* GUC parameters */
! extern PGDLLIMPORT int default_statistics_target;             /* PGDLLIMPORT 
for
!                                                                               
                                 * PostGIS */
  extern int    vacuum_freeze_min_age;
  extern int    vacuum_freeze_table_age;
  
--- 130,137 ----
  
  
  /* GUC parameters */
! extern PGDLLIMPORT int default_statistics_target;     /* PGDLLIMPORT for
!                                                                               
                         * PostGIS */
  extern int    vacuum_freeze_min_age;
  extern int    vacuum_freeze_table_age;
  
***************
*** 161,166 ****
  
  /* in commands/analyze.c */
  extern void analyze_rel(Oid relid, VacuumStmt *vacstmt,
!                       BufferAccessStrategy bstrategy, bool update_reltuples);
  
  #endif   /* VACUUM_H */
--- 161,175 ----
  
  /* in commands/analyze.c */
  extern void analyze_rel(Oid relid, VacuumStmt *vacstmt,
!                                               BufferAccessStrategy bstrategy, 
bool update_reltuples);
! extern VacAttrStats * examine_attribute(Relation onerel, int attnum, Node 
*index_expr,
!                                                                               
MemoryContext anl_context);
! extern double random_fract(void);
! extern double init_selection_state(int n);
! extern double get_next_S(double t, int n, double *stateptr);
! extern int    compare_rows(const void *a, const void *b);
! extern void update_attstats(Oid relid, bool inh, int natts, VacAttrStats 
**vacattrstats);
! extern Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
! extern Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
  
  #endif   /* VACUUM_H */
diff -crNB original/postgresql-9.1beta1/src/include/foreign/fdwapi.h 
changed/postgresql-9.1beta1/src/include/foreign/fdwapi.h
*** original/postgresql-9.1beta1/src/include/foreign/fdwapi.h   2011-04-28 
06:17:22.000000000 +0900
--- changed/postgresql-9.1beta1/src/include/foreign/fdwapi.h    2011-09-12 
15:08:31.000000000 +0900
***************
*** 12,19 ****
--- 12,21 ----
  #ifndef FDWAPI_H
  #define FDWAPI_H
  
+ #include "foreign/foreign.h"
  #include "nodes/execnodes.h"
  #include "nodes/relation.h"
+ #include "utils/rel.h"
  
  /* To avoid including explain.h here, reference ExplainState thus: */
  struct ExplainState;
***************
*** 68,73 ****
--- 69,77 ----
  
  typedef void (*EndForeignScan_function) (ForeignScanState *node);
  
+ typedef void (*AnalyzeForeignTable_function) (Relation relation,
+                                                                               
          VacuumStmt *vacstmt,
+                                                                               
          int elevel);
  
  /*
   * FdwRoutine is the struct returned by a foreign-data wrapper's handler
***************
*** 88,93 ****
--- 92,98 ----
        IterateForeignScan_function IterateForeignScan;
        ReScanForeignScan_function ReScanForeignScan;
        EndForeignScan_function EndForeignScan;
+       AnalyzeForeignTable_function AnalyzeForeignTable;
  } FdwRoutine;
  
  
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to