Andres Freund <and...@anarazel.de> writes: > On 2022-02-23 00:51:24 +0100, Tomas Vondra wrote: >> And here's the slightly simplified patch, without the part adding the >> unnecessary GetServerVersion() function.
> Doesn't apply anymore: http://cfbot.cputube.org/patch_37_3535.log > Marked as waiting-on-author. Here's a rebased version that should at least pass regression tests. I've not reviewed it in any detail, but: * I'm not really on board with defaulting to SYSTEM sample method, and definitely not on board with not allowing any other choice. We don't know enough about the situation in a remote table to be confident that potentially-nonrandom sampling is OK. So personally I'd default to BERNOULLI, which is more reliable and seems plenty fast enough given your upthread results. It could be an idea to extend the sample option to be like "sample [ = methodname ]", if you want more flexibility, but I'd be happy leaving that for another time. * The code depending on reltuples is broken in recent server versions, and might give useless results in older ones too (if reltuples = relpages = 0). Ideally we'd retrieve all of reltuples, relpages, and pg_relation_size(rel), and do the same calculation the planner does. Not sure if pg_relation_size() exists far enough back though. * Copying-and-pasting all of deparseAnalyzeSql (twice!) seems pretty bletcherous. Why not call that and then add a WHERE clause to its result, or just add some parameters to it so it can do that itself? * More attention to updating relevant comments would be appropriate, eg here you've not bothered to fix the adjacent falsified comment: /* We've retrieved all living tuples from foreign server. */ - *totalrows = astate.samplerows; + if (do_sample) + *totalrows = reltuples; + else + *totalrows = astate.samplerows; * Needs docs obviously. I'm not sure if the existing regression testing covers the new code adequately or if we need more cases. Having said that much, I'm going to leave it in Waiting on Author state. regards, tom lane
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c index 8f4d8a5022..cdf12e53e8 100644 --- a/contrib/postgres_fdw/deparse.c +++ b/contrib/postgres_fdw/deparse.c @@ -2297,6 +2297,31 @@ deparseAnalyzeSizeSql(StringInfo buf, Relation rel) appendStringInfo(buf, "::pg_catalog.regclass) / %d", BLCKSZ); } +/* + * Construct SELECT statement to acquire the number of rows in given relation. + * + * We just need an estimate here, so consult pg_class.reltuples instead + * of doing anything as expensive as COUNT(*). + * + * Note: Maybe this should compare relpages and current relation size + * and adjust reltuples accordingly, like the planner does? + * XXX: it needs some work anyway, because it won't do anything sane + * for a never-analyzed table with reltuples = -1. + */ +void +deparseAnalyzeTuplesSql(StringInfo buf, Relation rel) +{ + StringInfoData relname; + + /* We'll need the remote relation name as a literal. */ + initStringInfo(&relname); + deparseRelation(&relname, rel); + + appendStringInfoString(buf, "SELECT reltuples FROM pg_catalog.pg_class WHERE oid = "); + deparseStringLiteral(buf, relname.data); + appendStringInfoString(buf, "::pg_catalog.regclass"); +} + /* * Construct SELECT statement to acquire sample rows of given relation. * @@ -2358,6 +2383,135 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs) deparseRelation(buf, rel); } +/* + * Construct SELECT statement to acquire sample rows of given relation, + * by sampling a fraction of the table using TABLESAMPLE SYSTEM. + * + * SELECT command is appended to buf, and list of columns retrieved + * is returned to *retrieved_attrs. + * + * Note: We could allow selecting system/bernoulli, and maybe even the + * optional TSM modules (especially tsm_system_rows would help). + */ +void +deparseAnalyzeTableSampleSql(StringInfo buf, Relation rel, List **retrieved_attrs, double sample_frac) +{ + Oid relid = RelationGetRelid(rel); + TupleDesc tupdesc = RelationGetDescr(rel); + int i; + char *colname; + List *options; + ListCell *lc; + bool first = true; + + *retrieved_attrs = NIL; + + appendStringInfoString(buf, "SELECT "); + for (i = 0; i < tupdesc->natts; i++) + { + /* Ignore dropped columns. */ + if (TupleDescAttr(tupdesc, i)->attisdropped) + continue; + + if (!first) + appendStringInfoString(buf, ", "); + first = false; + + /* Use attribute name or column_name option. */ + colname = NameStr(TupleDescAttr(tupdesc, i)->attname); + options = GetForeignColumnOptions(relid, i + 1); + + foreach(lc, options) + { + DefElem *def = (DefElem *) lfirst(lc); + + if (strcmp(def->defname, "column_name") == 0) + { + colname = defGetString(def); + break; + } + } + + appendStringInfoString(buf, quote_identifier(colname)); + + *retrieved_attrs = lappend_int(*retrieved_attrs, i + 1); + } + + /* Don't generate bad syntax for zero-column relation. */ + if (first) + appendStringInfoString(buf, "NULL"); + + /* + * Construct FROM clause + */ + appendStringInfoString(buf, " FROM "); + deparseRelation(buf, rel); + appendStringInfo(buf, " TABLESAMPLE SYSTEM(%f)", (100.0 * sample_frac)); +} + +/* + * Construct SELECT statement to acquire sample rows of given relation, + * by sampling a fraction of the table using random(). + * + * SELECT command is appended to buf, and list of columns retrieved + * is returned to *retrieved_attrs. + */ +void +deparseAnalyzeLegacySampleSql(StringInfo buf, Relation rel, List **retrieved_attrs, double sample_frac) +{ + Oid relid = RelationGetRelid(rel); + TupleDesc tupdesc = RelationGetDescr(rel); + int i; + char *colname; + List *options; + ListCell *lc; + bool first = true; + + *retrieved_attrs = NIL; + + appendStringInfoString(buf, "SELECT "); + for (i = 0; i < tupdesc->natts; i++) + { + /* Ignore dropped columns. */ + if (TupleDescAttr(tupdesc, i)->attisdropped) + continue; + + if (!first) + appendStringInfoString(buf, ", "); + first = false; + + /* Use attribute name or column_name option. */ + colname = NameStr(TupleDescAttr(tupdesc, i)->attname); + options = GetForeignColumnOptions(relid, i + 1); + + foreach(lc, options) + { + DefElem *def = (DefElem *) lfirst(lc); + + if (strcmp(def->defname, "column_name") == 0) + { + colname = defGetString(def); + break; + } + } + + appendStringInfoString(buf, quote_identifier(colname)); + + *retrieved_attrs = lappend_int(*retrieved_attrs, i + 1); + } + + /* Don't generate bad syntax for zero-column relation. */ + if (first) + appendStringInfoString(buf, "NULL"); + + /* + * Construct FROM clause + */ + appendStringInfoString(buf, " FROM "); + deparseRelation(buf, rel); + appendStringInfo(buf, " WHERE random() < %f", sample_frac); +} + /* * Construct a simple "TRUNCATE rel" statement */ diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 5f2ef88cf3..482c21032b 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -9532,7 +9532,7 @@ DO $d$ END; $d$; ERROR: invalid option "password" -HINT: Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, sslsni, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, truncatable, fetch_size, batch_size, async_capable, parallel_commit, keep_connections +HINT: Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, sslcrldir, sslsni, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, truncatable, fetch_size, batch_size, async_capable, parallel_commit, keep_connections, sample CONTEXT: SQL statement "ALTER SERVER loopback_nopw OPTIONS (ADD password 'dummypw')" PL/pgSQL function inline_code_block line 3 at EXECUTE -- If we add a password for our user mapping instead, we should get a different diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c index 572591a558..3749d4701a 100644 --- a/contrib/postgres_fdw/option.c +++ b/contrib/postgres_fdw/option.c @@ -122,7 +122,8 @@ postgres_fdw_validator(PG_FUNCTION_ARGS) strcmp(def->defname, "truncatable") == 0 || strcmp(def->defname, "async_capable") == 0 || strcmp(def->defname, "parallel_commit") == 0 || - strcmp(def->defname, "keep_connections") == 0) + strcmp(def->defname, "keep_connections") == 0 || + strcmp(def->defname, "sample") == 0) { /* these accept only boolean values */ (void) defGetBoolean(def); @@ -254,6 +255,10 @@ InitPgFdwOptions(void) {"keep_connections", ForeignServerRelationId, false}, {"password_required", UserMappingRelationId, false}, + /* sampling is available on both server and table */ + {"sample", ForeignServerRelationId, false}, + {"sample", ForeignTableRelationId, false}, + /* * sslcert and sslkey are in fact libpq options, but we repeat them * here to allow them to appear in both foreign server context (when diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 955a428e3d..cd2088e2ae 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -4961,6 +4961,68 @@ postgresAnalyzeForeignTable(Relation relation, return true; } +/* + * postgresCountTuplesForForeignTable + * Count tuples in foreign table (just get pg_class.reltuples). + * + * Note: It's unclear how accurate reltuples is, maybe size that using + * relpages and simple assumptions (1 tuples per page, ...)? Using + * tsm_system_rows wold make this somewhat unnecessary. + */ +static double +postgresCountTuplesForForeignTable(Relation relation) +{ + ForeignTable *table; + UserMapping *user; + PGconn *conn; + StringInfoData sql; + PGresult *volatile res = NULL; + double reltuples = -1; + + /* + * Now we have to get the number of pages. It's annoying that the ANALYZE + * API requires us to return that now, because it forces some duplication + * of effort between this routine and postgresAcquireSampleRowsFunc. But + * it's probably not worth redefining that API at this point. + */ + + /* + * Get the connection to use. We do the remote access as the table's + * owner, even if the ANALYZE was started by some other user. + */ + table = GetForeignTable(RelationGetRelid(relation)); + user = GetUserMapping(relation->rd_rel->relowner, table->serverid); + conn = GetConnection(user, false, NULL); + + /* + * Construct command to get page count for relation. + */ + initStringInfo(&sql); + deparseAnalyzeTuplesSql(&sql, relation); + + /* In what follows, do not risk leaking any PGresults. */ + PG_TRY(); + { + res = pgfdw_exec_query(conn, sql.data, NULL); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pgfdw_report_error(ERROR, res, conn, false, sql.data); + + if (PQntuples(res) != 1 || PQnfields(res) != 1) + elog(ERROR, "unexpected result from deparseAnalyzeSizeSql query"); + reltuples = strtod(PQgetvalue(res, 0, 0), NULL); + } + PG_FINALLY(); + { + if (res) + PQclear(res); + } + PG_END_TRY(); + + ReleaseConnection(conn); + + return reltuples; +} + /* * Acquire a random sample of rows from foreign table managed by postgres_fdw. * @@ -4991,6 +5053,12 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel, unsigned int cursor_number; StringInfoData sql; PGresult *volatile res = NULL; + ListCell *lc; + int server_version_num; + bool do_sample = true; /* enabled by default */ + bool use_tablesample = true; + double sample_frac = -1.0; + double reltuples; /* Initialize workspace state */ astate.rel = relation; @@ -5018,20 +5086,81 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel, user = GetUserMapping(relation->rd_rel->relowner, table->serverid); conn = GetConnection(user, false, NULL); + /* We'll need server version, so fetch it now. */ + server_version_num = PQserverVersion(conn); + + /* disable tablesample on old remote servers */ + if (server_version_num < 95000) + use_tablesample = false; + + /* + * Should we use TABLESAMPLE to collect the remote sample? + */ + foreach(lc, server->options) + { + DefElem *def = (DefElem *) lfirst(lc); + + if (strcmp(def->defname, "sample") == 0) + { + do_sample = defGetBoolean(def); + break; + } + } + foreach(lc, table->options) + { + DefElem *def = (DefElem *) lfirst(lc); + + if (strcmp(def->defname, "sample") == 0) + { + do_sample = defGetBoolean(def); + break; + } + } + + if (do_sample) + { + reltuples = postgresCountTuplesForForeignTable(relation); + + if ((reltuples <= 0) || (targrows >= reltuples)) + do_sample = false; + + sample_frac = targrows / reltuples; + + /* Let's sample a bit more, we'll reduce the sample locally. */ + sample_frac *= 1.25; + + /* Sanity checks. */ + sample_frac = Min(1.0, Max(0.0, sample_frac)); + + /* + * When sampling too large fraction, just read everything. + * + * XXX It's not clear where exactly the threshold is, with slow + * network it may be cheaper to sample even 90%. + */ + if (sample_frac > 0.5) + do_sample = false; + } + /* * Construct cursor that retrieves whole rows from remote. */ cursor_number = GetCursorNumber(conn); initStringInfo(&sql); appendStringInfo(&sql, "DECLARE c%u CURSOR FOR ", cursor_number); - deparseAnalyzeSql(&sql, relation, &astate.retrieved_attrs); + + if (do_sample && use_tablesample) + deparseAnalyzeTableSampleSql(&sql, relation, &astate.retrieved_attrs, sample_frac); + else if (do_sample) + deparseAnalyzeLegacySampleSql(&sql, relation, &astate.retrieved_attrs, sample_frac); + else + deparseAnalyzeSql(&sql, relation, &astate.retrieved_attrs); /* In what follows, do not risk leaking any PGresults. */ PG_TRY(); { char fetch_sql[64]; int fetch_size; - ListCell *lc; res = pgfdw_exec_query(conn, sql.data, NULL); if (PQresultStatus(res) != PGRES_COMMAND_OK) @@ -5119,7 +5248,10 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel, *totaldeadrows = 0.0; /* We've retrieved all living tuples from foreign server. */ - *totalrows = astate.samplerows; + if (do_sample) + *totalrows = reltuples; + else + *totalrows = astate.samplerows; /* * Emit some interesting relation info diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h index 21f2b20ce8..b0d9cf4298 100644 --- a/contrib/postgres_fdw/postgres_fdw.h +++ b/contrib/postgres_fdw/postgres_fdw.h @@ -211,8 +211,15 @@ extern void deparseDirectDeleteSql(StringInfo buf, PlannerInfo *root, List *returningList, List **retrieved_attrs); extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel); +extern void deparseAnalyzeTuplesSql(StringInfo buf, Relation rel); extern void deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs); +extern void deparseAnalyzeTableSampleSql(StringInfo buf, Relation rel, + List **retrieved_attrs, + double sample_frac); +extern void deparseAnalyzeLegacySampleSql(StringInfo buf, Relation rel, + List **retrieved_attrs, + double sample_frac); extern void deparseTruncateSql(StringInfo buf, List *rels, DropBehavior behavior,