This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 760eb4f2f IMPALA-13066: Extend SHOW CREATE TABLE to include stats and
partitions
760eb4f2f is described below
commit 760eb4f2fa3f7470ae097067d2320dfde6e50ac6
Author: Arnab Karmakar <[email protected]>
AuthorDate: Mon Oct 13 17:26:13 2025 +0530
IMPALA-13066: Extend SHOW CREATE TABLE to include stats and partitions
Adds a new WITH STATS option to the SHOW CREATE TABLE statement to
emit additional SQL statements for recreating table statistics and
partitions.
When specified, Impala outputs:
- Base CREATE TABLE statement.
- ALTER TABLE ... SET TBLPROPERTIES for table-level stats.
- ALTER TABLE ... SET COLUMN STATS for all non-partition columns,
restoring column stats.
- For partitioned tables:
- ALTER TABLE ... ADD PARTITION statements to recreate partitions.
- Per-partition ALTER TABLE ... PARTITION (...) SET TBLPROPERTIES
to restore partition-level stats.
Partition output is limited by the PARTITION_LIMIT query option
(default 1000). Setting PARTITION_LIMIT=0 includes all partitions and
emits a warning if the limit is exceeded.
Tests added to verify correctness of emitted statements. Default
behavior of SHOW CREATE TABLE remains unchanged for compatibility.
Change-Id: I87950ae9d9bb73cb2a435cf5bcad076df1570dc2
Reviewed-on: http://gerrit.cloudera.org:8080/23536
Reviewed-by: Impala Public Jenkins <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
be/src/service/client-request-state.cc | 12 +-
be/src/service/frontend.cc | 13 +-
be/src/service/frontend.h | 3 +-
be/src/service/query-options.cc | 7 +
be/src/service/query-options.h | 3 +-
common/thrift/Frontend.thrift | 6 +
common/thrift/ImpalaService.thrift | 4 +
common/thrift/Query.thrift | 3 +
docs/topics/impala_show.xml | 23 +-
fe/src/main/cup/sql-parser.cup | 2 +
.../impala/analysis/ShowCreateTableStmt.java | 24 +-
.../org/apache/impala/analysis/ToSqlUtils.java | 452 ++++++++++++++++---
.../java/org/apache/impala/service/Frontend.java | 6 +
.../org/apache/impala/service/JniFrontend.java | 23 +-
.../QueryTest/show-create-table-with-stats.test | 485 +++++++++++++++++++++
tests/metadata/test_show_create_table.py | 150 ++++++-
16 files changed, 1148 insertions(+), 68 deletions(-)
diff --git a/be/src/service/client-request-state.cc
b/be/src/service/client-request-state.cc
index e6206ccef..ef9147c83 100644
--- a/be/src/service/client-request-state.cc
+++ b/be/src/service/client-request-state.cc
@@ -568,8 +568,16 @@ Status ClientRequestState::ExecLocalCatalogOp(
}
case TCatalogOpType::SHOW_CREATE_TABLE: {
string response;
-
RETURN_IF_ERROR(frontend_->ShowCreateTable(catalog_op.show_create_table_params,
- &response));
+ bool with_stats = false;
+ if (catalog_op.__isset.show_create_table_with_stats) {
+ with_stats = catalog_op.show_create_table_with_stats;
+ }
+ int32_t partition_limit =
query_options().show_create_table_partition_limit; // Default value
+ if (catalog_op.__isset.show_create_table_partition_limit) {
+ partition_limit = catalog_op.show_create_table_partition_limit;
+ }
+ RETURN_IF_ERROR(frontend_->ShowCreateTable(
+ catalog_op.show_create_table_params, with_stats, partition_limit,
&response));
SetResultSet(vector<string>(1, response));
return Status::OK();
}
diff --git a/be/src/service/frontend.cc b/be/src/service/frontend.cc
index cdd4d2224..3ac463db3 100644
--- a/be/src/service/frontend.cc
+++ b/be/src/service/frontend.cc
@@ -220,8 +220,17 @@ Status Frontend::DescribeTable(const TDescribeTableParams&
params,
return JniUtil::CallJniMethod(fe_, describe_table_id_, tparams, response);
}
-Status Frontend::ShowCreateTable(const TTableName& table_name, string*
response) {
- return JniUtil::CallJniMethod(fe_, show_create_table_id_, table_name,
response);
+Status Frontend::ShowCreateTable(const TTableName& table_name, bool with_stats,
+ int32_t show_create_table_partition_limit, string* response) {
+ // Build a small struct to pass both pieces since the JNI method expects a
single arg.
+ // Reuse TCatalogOpRequest fields: set show_create_table_params and
+ // show_create_table_with_stats.
+ TCatalogOpRequest req;
+ req.op_type = TCatalogOpType::SHOW_CREATE_TABLE;
+ req.__set_show_create_table_params(table_name);
+ req.__set_show_create_table_with_stats(with_stats);
+
req.__set_show_create_table_partition_limit(show_create_table_partition_limit);
+ return JniUtil::CallJniMethod(fe_, show_create_table_id_, req, response);
}
Status Frontend::ShowCreateFunction(const TGetFunctionsParams& params, string*
response) {
diff --git a/be/src/service/frontend.h b/be/src/service/frontend.h
index 05df93929..85eef03ec 100644
--- a/be/src/service/frontend.h
+++ b/be/src/service/frontend.h
@@ -166,7 +166,8 @@ class Frontend {
/// Returns (in the output parameter) a string containing the CREATE TABLE
command that
/// creates the table specified in the params.
- Status ShowCreateTable(const TTableName& table_name, std::string* response);
+ Status ShowCreateTable(const TTableName& table_name, bool with_stats,
+ int32_t show_create_table_partition_limit, std::string* response);
/// Returns (in the output parameter) a string containing the CREATE
FUNCTION command that
/// creates the function specified in the params.
diff --git a/be/src/service/query-options.cc b/be/src/service/query-options.cc
index e66c6bc8a..09e4b3132 100644
--- a/be/src/service/query-options.cc
+++ b/be/src/service/query-options.cc
@@ -1425,6 +1425,13 @@ Status impala::SetQueryOption(TImpalaQueryOptions::type
option, const string& va
query_options->__set_tuple_cache_budget_bytes_per_executor(mem_spec_val.value);
break;
}
+ case TImpalaQueryOptions::SHOW_CREATE_TABLE_PARTITION_LIMIT: {
+ int32_t int32_t_val = 0;
+ RETURN_IF_ERROR(QueryOptionParser::ParseAndCheckNonNegative<int32_t>(
+ option, value, &int32_t_val));
+ query_options->__set_show_create_table_partition_limit(int32_t_val);
+ break;
+ }
default:
string key = to_string(option);
if (IsRemovedQueryOption(key)) {
diff --git a/be/src/service/query-options.h b/be/src/service/query-options.h
index 09d0e951c..c89fecbe7 100644
--- a/be/src/service/query-options.h
+++ b/be/src/service/query-options.h
@@ -51,7 +51,7 @@ typedef std::unordered_map<string,
beeswax::TQueryOptionLevel::type>
// plus one. Thus, the second argument to the DCHECK has to be updated every
// time we add or remove a query option to/from the enum TImpalaQueryOptions.
constexpr unsigned NUM_QUERY_OPTIONS =
- TImpalaQueryOptions::TUPLE_CACHE_BUDGET_BYTES_PER_EXECUTOR + 1;
+ TImpalaQueryOptions::SHOW_CREATE_TABLE_PARTITION_LIMIT + 1;
#define QUERY_OPTS_TABLE
\
DCHECK_EQ(_TImpalaQueryOptions_VALUES_TO_NAMES.size(), NUM_QUERY_OPTIONS);
\
REMOVED_QUERY_OPT_FN(abort_on_default_limit_exceeded,
ABORT_ON_DEFAULT_LIMIT_EXCEEDED) \
@@ -386,6 +386,7 @@ constexpr unsigned NUM_QUERY_OPTIONS =
TUPLE_CACHE_REQUIRED_COST_REDUCTION_FACTOR, TQueryOptionLevel::ADVANCED)
\
TUPLE_CACHE_EXEMPT_QUERY_OPT_FN(tuple_cache_budget_bytes_per_executor,
\
TUPLE_CACHE_BUDGET_BYTES_PER_EXECUTOR, TQueryOptionLevel::ADVANCED)
\
+ QUERY_OPT_FN(show_create_table_partition_limit,
SHOW_CREATE_TABLE_PARTITION_LIMIT, TQueryOptionLevel::REGULAR) \
;
/// Enforce practical limits on some query options to avoid undesired query
state.
diff --git a/common/thrift/Frontend.thrift b/common/thrift/Frontend.thrift
index 5426ba7e6..23c12b806 100644
--- a/common/thrift/Frontend.thrift
+++ b/common/thrift/Frontend.thrift
@@ -530,6 +530,12 @@ struct TCatalogOpRequest {
// Parameters for DESCRIBE HISTORY
19: optional TDescribeHistoryParams describe_history_params
+
+ // Options for SHOW CREATE TABLE
+ 20: optional bool show_create_table_with_stats
+
+ // Partition limit for SHOW CREATE TABLE WITH STATS
+ 21: optional i32 show_create_table_partition_limit
}
// Query options type
diff --git a/common/thrift/ImpalaService.thrift
b/common/thrift/ImpalaService.thrift
index 635816b8c..b5753733a 100644
--- a/common/thrift/ImpalaService.thrift
+++ b/common/thrift/ImpalaService.thrift
@@ -1069,6 +1069,10 @@ enum TImpalaQueryOptions {
// for a given query execution. A higher value caches more aggressively. A
lower
// value reduces caching and thus overhead.
TUPLE_CACHE_BUDGET_BYTES_PER_EXECUTOR = 197
+
+ // Maximum number of partitions to show in SHOW CREATE TABLE WITH STATS.
+ // 0 means no limit. Default is 1000.
+ SHOW_CREATE_TABLE_PARTITION_LIMIT = 198
}
// The summary of a DML statement.
diff --git a/common/thrift/Query.thrift b/common/thrift/Query.thrift
index 82a094af8..09be199dc 100644
--- a/common/thrift/Query.thrift
+++ b/common/thrift/Query.thrift
@@ -808,6 +808,9 @@ struct TQueryOptions {
// See comment in ImpalaService.thrift (defaults to 100MB)
198: optional i64 tuple_cache_budget_bytes_per_executor = 104857600;
+
+ // See comment in ImpalaService.thrift
+ 199: optional i32 show_create_table_partition_limit = 1000
}
// Impala currently has three types of sessions: Beeswax, HiveServer2 and
external
diff --git a/docs/topics/impala_show.xml b/docs/topics/impala_show.xml
index c332e0ecc..12ea08383 100644
--- a/docs/topics/impala_show.xml
+++ b/docs/topics/impala_show.xml
@@ -40,7 +40,7 @@ under the License.
SHOW SCHEMAS [[LIKE] '<varname>pattern</varname>'] - an alias for SHOW
DATABASES
SHOW TABLES [IN <varname>database_name</varname>] [[LIKE]
'<varname>pattern</varname>']
<ph rev="1.2.0">SHOW [AGGREGATE | ANALYTIC] FUNCTIONS [IN
<varname>database_name</varname>] [[LIKE] '<varname>pattern</varname>']</ph>
-<ph rev="1.2.1">SHOW CREATE TABLE
[<varname>database_name</varname>].<varname>table_name</varname></ph>
+<ph rev="1.2.1">SHOW CREATE TABLE
[<varname>database_name</varname>].<varname>table_name</varname> [WITH
STATS]</ph>
<ph rev="1.2.1">SHOW CREATE VIEW
[<varname>database_name</varname>].<varname>view_name</varname></ph>
<ph rev="1.2.1">SHOW TABLE STATS
[<varname>database_name</varname>.]<varname>table_name</varname></ph>
<ph rev="1.2.1">SHOW COLUMN STATS
[<varname>database_name</varname>.]<varname>table_name</varname></ph>
@@ -762,6 +762,27 @@ show tables like '*dim*|t*';
to change things such as the database name, <codeph>LOCATION</codeph>
field, and so on that might be
different on the destination system.
</p>
+ <p>
+ Optionally, you can include <codeph>WITH STATS</codeph> to append
additional statements that recreate
+ table statistics and partitions. For filesystem-backed, iceberg, and
kudu tables, Impala emits:
+ </p>
+ <ul>
+ <li>Table-level <codeph>ALTER TABLE ... SET TBLPROPERTIES</codeph>
statements to set properties like
+ <codeph>numRows</codeph> and
<codeph>STATS_GENERATED_VIA_STATS_TASK</codeph></li>
+ <li><codeph>ALTER TABLE ... SET COLUMN STATS</codeph> statements for
all non-partition columns,
+ including statistics like <codeph>numDVs</codeph>,
<codeph>numNulls</codeph>, <codeph>maxSize</codeph>,
+ <codeph>avgSize</codeph>, <codeph>numTrues</codeph>, and
<codeph>numFalses</codeph></li>
+ <li>For partitioned tables: <codeph>ALTER TABLE ... ADD
PARTITION</codeph> statements to recreate partitions</li>
+ <li>For partitioned tables: Per-partition <codeph>ALTER TABLE ...
PARTITION (...) SET TBLPROPERTIES</codeph>
+ statements to set partition-level properties like
<codeph>numRows</codeph>, <codeph>numFiles</codeph>,
+ and <codeph>totalSize</codeph></li>
+ </ul>
+ <p>
+ The number of partitions included in the output is controlled by the
<codeph>SHOW_CREATE_TABLE_PARTITION_LIMIT</codeph>
+ query option, which defaults to 1000. Set
<codeph>SHOW_CREATE_TABLE_PARTITION_LIMIT=0</codeph> to include all partitions,
+ or set it to a smaller value to limit the output. If the partition
limit is exceeded, a warning is
+ emitted in the output.
+ </p>
<p>
If you specify a view name in the <codeph>SHOW CREATE TABLE</codeph>,
it returns a <codeph>CREATE VIEW</codeph> statement with column names
diff --git a/fe/src/main/cup/sql-parser.cup b/fe/src/main/cup/sql-parser.cup
index 109334ff4..5c9f7e51f 100755
--- a/fe/src/main/cup/sql-parser.cup
+++ b/fe/src/main/cup/sql-parser.cup
@@ -3208,6 +3208,8 @@ show_pattern ::=
show_create_tbl_stmt ::=
KW_SHOW KW_CREATE show_create_tbl_object_type:object_type table_name:table
{: RESULT = new ShowCreateTableStmt(table, object_type); :}
+ | KW_SHOW KW_CREATE show_create_tbl_object_type:object_type table_name:table
KW_WITH KW_STATS
+ {: RESULT = new ShowCreateTableStmt(table, object_type, true); :}
;
show_create_tbl_object_type ::=
diff --git
a/fe/src/main/java/org/apache/impala/analysis/ShowCreateTableStmt.java
b/fe/src/main/java/org/apache/impala/analysis/ShowCreateTableStmt.java
index 8cc102140..3c47934a8 100644
--- a/fe/src/main/java/org/apache/impala/analysis/ShowCreateTableStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/ShowCreateTableStmt.java
@@ -33,7 +33,10 @@ import com.google.common.base.Preconditions;
* "CREATE TABLE ..." string that re-creates the table or the "CREATE VIEW ..."
* string that re-creates the view as appropriate.
*
- * Syntax: SHOW CREATE (TABLE|VIEW) <table or view>
+ * If WITH STATS is specified, the output will include the stats and
+ * partitions of the table.
+ *
+ * Syntax: SHOW CREATE (TABLE|VIEW) <table or view> [WITH STATS]
*/
public class ShowCreateTableStmt extends StatementBase implements
SingleTableStmt {
private TableName tableName_;
@@ -41,10 +44,22 @@ public class ShowCreateTableStmt extends StatementBase
implements SingleTableStm
// The object type keyword used, e.g. TABLE or VIEW, needed to output
matching SQL.
private final TCatalogObjectType objectType_;
+ // Whether to include stats and partitions in the generated output.
+ private final boolean withStats_;
+
public ShowCreateTableStmt(TableName table, TCatalogObjectType objectType) {
Preconditions.checkNotNull(table);
this.tableName_ = table;
this.objectType_ = objectType;
+ this.withStats_ = false;
+ }
+
+ public ShowCreateTableStmt(TableName table, TCatalogObjectType objectType,
+ boolean withStats) {
+ Preconditions.checkNotNull(table);
+ this.tableName_ = table;
+ this.objectType_ = objectType;
+ this.withStats_ = withStats;
}
@Override
@@ -52,7 +67,10 @@ public class ShowCreateTableStmt extends StatementBase
implements SingleTableStm
@Override
public String toSql(ToSqlOptions options) {
- return "SHOW CREATE " + objectType_.name() + " " + tableName_;
+ StringBuilder sb = new StringBuilder();
+ sb.append("SHOW CREATE ").append(objectType_.name()).append("
").append(tableName_);
+ if (withStats_) sb.append(" WITH STATS");
+ return sb.toString();
}
@Override
@@ -88,4 +106,6 @@ public class ShowCreateTableStmt extends StatementBase
implements SingleTableStm
params.setDb_name(tableName_.getDb());
return params;
}
+
+ public boolean withStats() { return withStats_; }
}
diff --git a/fe/src/main/java/org/apache/impala/analysis/ToSqlUtils.java
b/fe/src/main/java/org/apache/impala/analysis/ToSqlUtils.java
index 2c0c60c8b..545b27de4 100644
--- a/fe/src/main/java/org/apache/impala/analysis/ToSqlUtils.java
+++ b/fe/src/main/java/org/apache/impala/analysis/ToSqlUtils.java
@@ -18,6 +18,7 @@
package org.apache.impala.analysis;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
@@ -36,8 +37,10 @@ import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.ql.parse.HiveLexer;
import org.apache.impala.catalog.CatalogException;
import org.apache.impala.catalog.Column;
+import org.apache.impala.catalog.ColumnStats;
import org.apache.impala.catalog.DataSourceTable;
import org.apache.impala.catalog.FeDataSourceTable;
+import org.apache.impala.catalog.FeFsPartition;
import org.apache.impala.catalog.FeFsTable;
import org.apache.impala.catalog.FeHBaseTable;
import org.apache.impala.catalog.FeIcebergTable;
@@ -47,10 +50,12 @@ import org.apache.impala.catalog.FeView;
import org.apache.impala.catalog.Function;
import org.apache.impala.catalog.HdfsCompression;
import org.apache.impala.catalog.HdfsFileFormat;
+import org.apache.impala.catalog.HdfsPartition;
import org.apache.impala.catalog.IcebergColumn;
import org.apache.impala.catalog.IcebergTable;
import org.apache.impala.catalog.KuduColumn;
import org.apache.impala.catalog.KuduTable;
+import org.apache.impala.catalog.PrunablePartition;
import org.apache.impala.catalog.RowFormat;
import org.apache.impala.catalog.Table;
import org.apache.impala.catalog.paimon.FePaimonTable;
@@ -104,6 +109,22 @@ public class ToSqlUtils {
FeTable.LAST_MODIFIED_TIME,
FeTable.NUM_ROWS);
+ // Internal Iceberg metadata table properties to remove from iceberg table
+ @VisibleForTesting
+ protected static final ImmutableSet<String> HIDDEN_ICEBERG_TABLE_PROPERTIES =
+ ImmutableSet.of(
+ IcebergTable.KEY_STORAGE_HANDLER,
+ IcebergTable.METADATA_LOCATION,
+ IcebergTable.PREVIOUS_METADATA_LOCATION,
+ IcebergTable.CURRENT_SCHEMA,
+ IcebergTable.SNAPSHOT_COUNT,
+ IcebergTable.CURRENT_SNAPSHOT_ID,
+ IcebergTable.CURRENT_SNAPSHOT_SUMMARY,
+ IcebergTable.CURRENT_SNAPSHOT_TIMESTAMP_MS,
+ IcebergTable.DEFAULT_PARTITION_SPEC,
+ IcebergTable.UUID
+ );
+
/**
* Removes all hidden properties from the given 'tblProperties' map.
*/
@@ -112,6 +133,15 @@ public class ToSqlUtils {
for (String key: HIDDEN_TABLE_PROPERTIES) tblProperties.remove(key);
}
+ /**
+ * Removes all hidden Iceberg table properties from the given
'tblProperties' map.
+ */
+ @VisibleForTesting
+ protected static void removeHiddenIcebergTableProperties(
+ Map<String, String> tblProperties) {
+ for (String key: HIDDEN_ICEBERG_TABLE_PROPERTIES)
tblProperties.remove(key);
+ }
+
/**
* Removes all hidden Kudu from the given 'tblProperties' map.
*/
@@ -121,6 +151,73 @@ public class ToSqlUtils {
tblProperties.remove(KuduTable.KEY_TABLE_NAME);
}
+ /**
+ * Centralized filtering/masking of table properties for output. Applies a
common
+ * baseline of removals and then table-format specific rules.
+ * Populates two output maps: one for CREATE TABLE (more filtered) and one
for
+ * ALTER TABLE SET TBLPROPERTIES (less filtered, keeps more properties).
+ *
+ * @param rawProps The raw HMS properties to filter
+ * @param table The table
+ * @param msTbl The metastore table
+ * @param createTableProps Output map for CREATE TABLE properties (more
filtered)
+ * @param alterTableProps Output map for ALTER TABLE properties (less
filtered)
+ */
+ private static void filterTblProperties(Map<String, String> rawProps,
FeTable table,
+ org.apache.hadoop.hive.metastore.api.Table msTbl,
+ Map<String, String> createTableProps, Map<String, String>
alterTableProps) {
+ if (rawProps == null || rawProps.isEmpty()) return;
+
+ // Start with all properties in a single map
+ Map<String, String> commonProps = Maps.newLinkedHashMap();
+ commonProps.putAll(rawProps);
+
+ // Common internal property we never show in either output
+ commonProps.remove(StatsSetupConst.DO_NOT_UPDATE_STATS);
+
+ // Table-format specific filtering (applies to both maps)
+ if (table instanceof FeKuduTable) {
+ // Remove storage handler and internal ids
+ commonProps.remove(KuduTable.KEY_STORAGE_HANDLER);
+ String kuduTableName = rawProps.get(KuduTable.KEY_TABLE_NAME);
+ if (kuduTableName != null &&
+ KuduUtil.isDefaultKuduTableName(kuduTableName,
+ table.getDb().getName(), table.getName())) {
+ commonProps.remove(KuduTable.KEY_TABLE_NAME);
+ }
+ commonProps.remove(KuduTable.KEY_TABLE_ID);
+ } else if (table instanceof FeIcebergTable) {
+ // Hide Iceberg internal metadata properties
+ removeHiddenIcebergTableProperties(commonProps);
+ } else if (table instanceof FePaimonTable) {
+ // Hide Paimon internals
+ commonProps.remove(CoreOptions.PRIMARY_KEY.key());
+ commonProps.remove(CoreOptions.PARTITION.key());
+ commonProps.remove(PaimonUtil.STORAGE_HANDLER);
+ commonProps.remove(CatalogOpExecutor.CAPABILITIES_KEY);
+ if (msTbl != null && PaimonUtil.isSynchronizedTable(msTbl)) {
+ commonProps.remove("TRANSLATED_TO_EXTERNAL");
+ commonProps.remove(Table.TBL_PROP_EXTERNAL_TABLE_PURGE);
+ }
+ } else if (table instanceof FeDataSourceTable) {
+ // Mask external JDBC sensitive properties (case-insensitively)
+ Set<String> keysToBeMasked =
DataSourceTable.getJdbcTblPropertyMaskKeys();
+ for (String key : keysToBeMasked) {
+ if (commonProps.containsKey(key)) commonProps.put(key, "******");
+ String lower = key.toLowerCase();
+ if (commonProps.containsKey(lower)) commonProps.put(lower, "******");
+ }
+ }
+
+ // Duplicate the common properties into alterTableProps (less filtered)
+ alterTableProps.putAll(commonProps);
+
+ // Duplicate into createTableProps and remove additional properties
+ // that are materialized elsewhere in CREATE TABLE DDL
+ createTableProps.putAll(commonProps);
+ removeHiddenTableProperties(createTableProps);
+ }
+
/**
* Returns the list of sort columns from 'properties' or 'null' if
'properties' doesn't
* contain 'sort.columns'.
@@ -360,20 +457,44 @@ public class ToSqlUtils {
* table.
*/
public static String getCreateTableSql(FeTable table) throws
CatalogException {
+ return renderCreateTableSql(table).createSql;
+ }
+ // Holder for CREATE TABLE SQL and filtered properties to avoid recomputation
+ private static final class CreateSqlArtifacts {
+ final String createSql;
+ final Map<String, String> createTableProperties; // Properties for CREATE
TABLE
+ final Map<String, String> alterTableProperties; // Properties for ALTER
TABLE
+
+ private CreateSqlArtifacts(String createSql, Map<String, String>
+ createTableProperties, Map<String, String> alterTableProperties) {
+ this.createSql = createSql;
+ this.createTableProperties = createTableProperties;
+ this.alterTableProperties = alterTableProperties;
+ }
+ }
+
+ private static CreateSqlArtifacts renderCreateTableSql(FeTable table)
+ throws CatalogException {
Preconditions.checkNotNull(table);
- if (table instanceof FeView) return getCreateViewSql((FeView)table);
- org.apache.hadoop.hive.metastore.api.Table msTable =
table.getMetaStoreTable();
- // Use a LinkedHashMap to preserve the ordering of the table properties.
- Map<String, String> properties =
Maps.newLinkedHashMap(msTable.getParameters());
- if (properties.containsKey(Table.TBL_PROP_LAST_DDL_TIME)) {
- properties.remove(Table.TBL_PROP_LAST_DDL_TIME);
+ if (table instanceof FeView) {
+ return new CreateSqlArtifacts(getCreateViewSql((FeView)table),
+ Maps.newLinkedHashMap(), Maps.newLinkedHashMap());
}
+ org.apache.hadoop.hive.metastore.api.Table msTable =
table.getMetaStoreTable();
+ // Raw HMS props (preserve ordering)
+ Map<String, String> rawProps =
Maps.newLinkedHashMap(msTable.getParameters());
+ rawProps.remove(Table.TBL_PROP_LAST_DDL_TIME);
boolean isExternal = Table.isExternalTable(msTable);
- List<String> sortColsSql = getSortColumns(properties);
- TSortingOrder sortingOrder =
TSortingOrder.valueOf(getSortingOrder(properties));
- String comment = properties.get("comment");
- removeHiddenTableProperties(properties);
+ // Values derived from raw props before filtering
+ List<String> sortColsSql = getSortColumns(rawProps);
+ TSortingOrder sortingOrder =
TSortingOrder.valueOf(getSortingOrder(rawProps));
+ String comment = rawProps.get("comment");
+
+ // Filter properties into two maps: one for CREATE TABLE, one for ALTER
TABLE
+ Map<String, String> createTableProps = Maps.newLinkedHashMap();
+ Map<String, String> alterTableProps = Maps.newLinkedHashMap();
+ filterTblProperties(rawProps, table, msTable, createTableProps,
alterTableProps);
List<String> colsSql = new ArrayList<>();
List<String> partitionColsSql = new ArrayList<>();
@@ -411,18 +532,6 @@ public class ToSqlUtils {
format = HdfsFileFormat.KUDU;
// Kudu tables cannot use the Hive DDL syntax for the storage handler
storageHandlerClassName = null;
- properties.remove(KuduTable.KEY_STORAGE_HANDLER);
- String kuduTableName = properties.get(KuduTable.KEY_TABLE_NAME);
- // Remove the hidden table property 'kudu.table_name' for a synchronized
Kudu table.
- if (kuduTableName != null &&
- KuduUtil.isDefaultKuduTableName(kuduTableName,
- table.getDb().getName(), table.getName())) {
- properties.remove(KuduTable.KEY_TABLE_NAME);
- }
- // Remove the hidden table property 'kudu.table_id'.
- properties.remove(KuduTable.KEY_TABLE_ID);
- // Internal property, should not be exposed to the user.
- properties.remove(StatsSetupConst.DO_NOT_UPDATE_STATS);
isPrimaryKeyUnique = kuduTable.isPrimaryKeyUnique();
if (KuduTable.isSynchronizedTable(msTable)) {
@@ -441,18 +550,6 @@ public class ToSqlUtils {
} else if (table instanceof FeFsTable) {
if (table instanceof FeIcebergTable) {
storageHandlerClassName = null;
- // Internal properties, should not be exposed to the user.
- properties.remove(IcebergTable.KEY_STORAGE_HANDLER);
- properties.remove(StatsSetupConst.DO_NOT_UPDATE_STATS);
- properties.remove(IcebergTable.METADATA_LOCATION);
- properties.remove(IcebergTable.PREVIOUS_METADATA_LOCATION);
- properties.remove(IcebergTable.CURRENT_SCHEMA);
- properties.remove(IcebergTable.SNAPSHOT_COUNT);
- properties.remove(IcebergTable.CURRENT_SNAPSHOT_ID);
- properties.remove(IcebergTable.CURRENT_SNAPSHOT_SUMMARY);
- properties.remove(IcebergTable.CURRENT_SNAPSHOT_TIMESTAMP_MS);
- properties.remove(IcebergTable.DEFAULT_PARTITION_SPEC);
- properties.remove(IcebergTable.UUID);
// Fill "PARTITIONED BY SPEC" part if the Iceberg table is partitioned.
FeIcebergTable feIcebergTable= (FeIcebergTable)table;
@@ -482,14 +579,8 @@ public class ToSqlUtils {
format = HdfsFileFormat.fromHdfsInputFormatClass(inputFormat, serDeLib);
storageHandlerClassName = null;
isPrimaryKeyUnique = true;
- properties.remove(CoreOptions.PRIMARY_KEY.key());
- properties.remove(CoreOptions.PARTITION.key());
- properties.remove(PaimonUtil.STORAGE_HANDLER);
- properties.remove(CatalogOpExecutor.CAPABILITIES_KEY);
// for synchronized table, show sql like a managed table
if (PaimonUtil.isSynchronizedTable(msTable)) {
- properties.remove("TRANSLATED_TO_EXTERNAL");
- properties.remove(Table.TBL_PROP_EXTERNAL_TABLE_PURGE);
if ((location != null)
&& location.toLowerCase().endsWith(table.getName().toLowerCase()))
{
location = null;
@@ -520,26 +611,287 @@ public class ToSqlUtils {
throw new CatalogException("Could not get primary key/foreign keys
sql.", e);
}
} else if (table instanceof FeDataSourceTable) {
- // Mask sensitive table properties for external JDBC table.
- Set<String> keysToBeMasked =
DataSourceTable.getJdbcTblPropertyMaskKeys();
- for (String key : properties.keySet()) {
- if (keysToBeMasked.contains(key.toLowerCase())) {
- properties.put(key, "******");
- }
- }
+ // masking handled by filterTblProperties(); nothing to do for CREATE
}
HdfsUri tableLocation = location == null ? null : new HdfsUri(location);
- return getCreateTableSql(
+ String sql = getCreateTableSql(
table.getDb().getName(), table.getName(), comment, colsSql,
partitionColsSql,
isPrimaryKeyUnique, primaryKeySql, foreignKeySql,
kuduPartitionByParams,
- new Pair<>(sortColsSql, sortingOrder), properties, serdeParameters,
+ new Pair<>(sortColsSql, sortingOrder), createTableProps,
serdeParameters,
isExternal, false, rowFormat, format, compression,
storageHandlerClassName, tableLocation, icebergPartitions, bucketInfo);
+ return new CreateSqlArtifacts(sql, createTableProps, alterTableProps);
+ }
+
+ /**
+ * Returns the SHOW CREATE TABLE output with WITH STATS, which includes the
base
+ * CREATE statement, followed by ALTER statements to set
+ * table properties and column stats where available, and partition
statements.
+ * @param table The table to generate SQL for
+ * @param partitionLimit Maximum number of partitions to output (0 = no
limit)
+ */
+ public static String getCreateTableWithStatsSql(FeTable table, int
partitionLimit)
+ throws CatalogException {
+ StringBuilder out = new StringBuilder();
+ StringBuilder warnings = new StringBuilder();
+
+ // CREATE statement (and get both filtered property maps)
+ CreateSqlArtifacts artifacts = renderCreateTableSql(table);
+ out.append(artifacts.createSql).append(";\n\n");
+
+ // Use the pre-filtered ALTER TABLE properties (less filtered, keeps more
properties)
+ org.apache.hadoop.hive.metastore.api.Table msTbl =
table.getMetaStoreTable();
+ Map<String, String> allTblProps = Maps.newLinkedHashMap(
+ artifacts.alterTableProperties);
+
+ // Add STATS_GENERATED_VIA_STATS_TASK if present in HMS parameters
+ if (msTbl != null && msTbl.getParameters() != null) {
+ String statsGenerated =
msTbl.getParameters().get("STATS_GENERATED_VIA_STATS_TASK");
+ if (statsGenerated != null) {
+ allTblProps.put("STATS_GENERATED_VIA_STATS_TASK", statsGenerated);
+ }
+ }
+
+ // Emit explicit table-level TBLPROPERTIES
+ if (!allTblProps.isEmpty()) {
+ out.append("ALTER TABLE ")
+ .append(getIdentSql(table.getDb().getName())).append('.')
+ .append(getIdentSql(table.getName())).append(' ')
+ .append("SET TBLPROPERTIES ")
+ .append(propertyMapToSql(allTblProps))
+ .append(";\n\n");
+ }
+
+ // Column stats for non-partition columns
+ if (appendColumnStatsStatements(table, out)) {
+ out.append("\n");
+ }
+
+ // Add partition information if this is a partitioned table
+ appendPartitionStatements(table, partitionLimit, out, warnings);
+
+ // Append warnings at the end if any
+ if (warnings.length() > 0) {
+ out.append(warnings);
+ }
+
+ return out.toString();
+ }
+
+ /**
+ * Appends ALTER TABLE ... SET COLUMN STATS statements for all non-partition
columns
+ * that have statistics.
+ * @param table The table to generate column stats for
+ * @param out StringBuilder to append the statements to
+ * @return true if any column stats were appended, false otherwise
+ */
+ private static boolean appendColumnStatsStatements(FeTable table,
StringBuilder out) {
+ int numClusterCols = table.getNumClusteringCols();
+ boolean hasColumnStats = false;
+
+ for (int i = numClusterCols; i < table.getColumns().size(); i++) {
+ Column c = table.getColumns().get(i);
+ ColumnStats s = c.getStats();
+ if (s == null) continue;
+ boolean isFixed = c.getType() != null && c.getType().isFixedLengthType();
+
+ List<String> kvs = new ArrayList<>();
+ // Always include NDV and nulls (may be -1 for unknown)
+ kvs.add("'numDVs'='" + s.getNumDistinctValues() + "'");
+ kvs.add("'numNulls'='" + s.getNumNulls() + "'");
+ // Include size stats only for variable-length types
+ if (!isFixed) {
+ kvs.add("'maxSize'='" + s.getMaxSize() + "'");
+ double avg = s.getAvgSize();
+ String avgStr = (Math.rint(avg) == avg) ?
+ Long.toString((long) avg) : Double.toString(avg);
+ kvs.add("'avgSize'='" + avgStr + "'");
+ }
+ // Include boolean-specific counts (may be -1 for non-boolean types)
+ kvs.add("'numTrues'='" + s.getNumTrues() + "'");
+ kvs.add("'numFalses'='" + s.getNumFalses() + "'");
+
+ hasColumnStats = true;
+ out.append("ALTER TABLE ")
+ .append(getIdentSql(table.getDb().getName())).append('.')
+ .append(getIdentSql(table.getName())).append(' ')
+ .append("SET COLUMN STATS ")
+ .append(getIdentSql(c.getName())).append(" (")
+ .append(Joiner.on(", ").join(kvs)).append(");\n");
+ }
+
+ return hasColumnStats;
+ }
+
+ /**
+ * Appends partition-related statements (ADD PARTITION and partition
properties)
+ * for partitioned tables.
+ * @param table The table to generate partition statements for
+ * @param partitionLimit Maximum number of partitions to output (0 = no
limit)
+ * @param out StringBuilder to append the partition statements to
+ * @param warnings StringBuilder to append warnings to
+ */
+ private static void appendPartitionStatements(FeTable table, int
partitionLimit,
+ StringBuilder out, StringBuilder warnings) {
+ if (!(table instanceof FeFsTable)) return;
+
+ FeFsTable fsTable = (FeFsTable) table;
+ Collection<? extends PrunablePartition> partitions =
fsTable.getPartitions();
+ int numClusterCols = table.getNumClusteringCols();
+
+ if (partitions == null || partitions.isEmpty() || numClusterCols == 0)
return;
+
+ // Optimization: First sort lightweight PrunablePartition objects,
+ // then load only the required number of full FeFsPartition objects
+ List<PrunablePartition> allPartitionRefs = new ArrayList<>(partitions);
+
+ // Sort using the same comparison logic (compares partition values)
+ Collections.sort(allPartitionRefs, (p1, p2) ->
+ HdfsPartition.comparePartitionKeyValues(
+ p1.getPartitionValues(), p2.getPartitionValues()));
+
+ // Determine how many partitions to output
+ int totalPartitions = allPartitionRefs.size();
+ int partitionsToOutput = (partitionLimit > 0 && partitionLimit <
totalPartitions)
+ ? partitionLimit : totalPartitions;
+
+ // Load only the partitions we need (not all of them!)
+ List<Long> partitionIdsToLoad = new ArrayList<>(partitionsToOutput);
+ for (int i = 0; i < partitionsToOutput; i++) {
+ partitionIdsToLoad.add(allPartitionRefs.get(i).getId());
+ }
+ List<? extends FeFsPartition> sortedPartitions = fsTable.loadPartitions(
+ partitionIdsToLoad);
+
+ if (sortedPartitions.isEmpty()) return;
+
+ // Generate ADD PARTITION statements
+ appendAddPartitionStatements(table, sortedPartitions, out);
+
+ // Generate per-partition TBLPROPERTIES
+ appendPartitionPropertiesStatements(table, sortedPartitions, out);
+
+ // Add warning if partitions were skipped
+ if (partitionLimit > 0 && totalPartitions > partitionLimit) {
+ int skipped = totalPartitions - partitionLimit;
+ warnings.append("-- WARNING about partial output\n");
+ warnings.append("-- WARNING: Emitted ").append(partitionLimit)
+ .append(" of ").append(totalPartitions)
+ .append(" partitions (show_create_table_partition_limit=")
+ .append(partitionLimit)
+ .append("). ").append(skipped).append(" partitions skipped.\n")
+ .append("-- To export more partitions, re-run with ")
+ .append("SHOW_CREATE_TABLE_PARTITION_LIMIT=<n>.\n");
+ }
+ }
+
+ /**
+ * Appends ALTER TABLE ... ADD PARTITION statements for the given partitions.
+ * @param table The table the partitions belong to
+ * @param sortedPartitions List of partitions to generate ADD PARTITION
statements for
+ * @param out StringBuilder to append the statements to
+ */
+ private static void appendAddPartitionStatements(FeTable table,
+ List<? extends FeFsPartition> sortedPartitions, StringBuilder out) {
+ int numClusterCols = table.getNumClusteringCols();
+
+ for (FeFsPartition partition : sortedPartitions) {
+ out.append("ALTER TABLE ")
+ .append(getIdentSql(table.getDb().getName())).append('.')
+ .append(getIdentSql(table.getName())).append(' ')
+ .append("ADD PARTITION (");
+
+ // Add partition key-value pairs
+ List<LiteralExpr> partitionValues = partition.getPartitionValues();
+ List<String> partitionCols = new ArrayList<>();
+ for (int j = 0; j < numClusterCols; j++) {
+ Column col = table.getColumns().get(j);
+ LiteralExpr value = partitionValues.get(j);
+ partitionCols.add(getIdentSql(col.getName()) + "=" + value.toSql());
+ }
+ out.append(Joiner.on(", ").join(partitionCols));
+ out.append(")");
+
+ // Add LOCATION if available
+ String location = partition.getLocation();
+ if (location != null && !location.isEmpty()) {
+ out.append(" LOCATION '").append(location).append("'");
+ }
+ out.append(";\n");
+ }
+ out.append("\n");
+ }
+
+ /**
+ * Appends ALTER TABLE ... PARTITION ... SET TBLPROPERTIES statements for
partitions
+ * that have properties to set.
+ * @param table The table the partitions belong to
+ * @param sortedPartitions List of partitions to generate property
statements for
+ * @param out StringBuilder to append the statements to
+ */
+ private static void appendPartitionPropertiesStatements(FeTable table,
+ List<? extends FeFsPartition> sortedPartitions, StringBuilder out) {
+ int numClusterCols = table.getNumClusteringCols();
+ boolean hasPartitionProps = false;
+
+ for (FeFsPartition partition : sortedPartitions) {
+ // Get partition statistics
+ Map<String, String> partitionProps = Maps.newLinkedHashMap();
+ long partNumRows = partition.getNumRows();
+ if (partNumRows >= 0) {
+ partitionProps.put("numRows", Long.toString(partNumRows));
+ }
+
+ // Add additional properties from HMS parameters if present
+ Map<String, String> hmsParams = partition.getParameters();
+ if (hmsParams != null) {
+ if (hmsParams.containsKey("STATS_GENERATED_VIA_STATS_TASK")) {
+ partitionProps.put("STATS_GENERATED_VIA_STATS_TASK",
+ hmsParams.get("STATS_GENERATED_VIA_STATS_TASK"));
+ }
+
+ // Add NUM_FILES if available
+ if (hmsParams.containsKey(FeFsTable.NUM_FILES)) {
+ partitionProps.put(FeFsTable.NUM_FILES,
hmsParams.get(FeFsTable.NUM_FILES));
+ }
+
+ // Add TOTAL_SIZE if available
+ if (hmsParams.containsKey(FeFsTable.TOTAL_SIZE)) {
+ partitionProps.put(FeFsTable.TOTAL_SIZE,
hmsParams.get(FeFsTable.TOTAL_SIZE));
+ }
+ }
+
+ if (!partitionProps.isEmpty()) {
+ hasPartitionProps = true;
+
+ out.append("ALTER TABLE ")
+ .append(getIdentSql(table.getDb().getName())).append('.')
+ .append(getIdentSql(table.getName())).append(' ')
+ .append("PARTITION (");
+
+ // Add partition key-value pairs
+ List<LiteralExpr> partitionValues = partition.getPartitionValues();
+ List<String> partitionCols = new ArrayList<>();
+ for (int j = 0; j < numClusterCols; j++) {
+ Column col = table.getColumns().get(j);
+ LiteralExpr value = partitionValues.get(j);
+ partitionCols.add(getIdentSql(col.getName()) + "=" + value.toSql());
+ }
+ out.append(Joiner.on(", ").join(partitionCols));
+ out.append(") SET TBLPROPERTIES ")
+ .append(propertyMapToSql(partitionProps))
+ .append(";\n");
+ }
+ }
+
+ if (hasPartitionProps) {
+ out.append("\n");
+ }
}
/**
- * Returns a "CREATE TABLE" string that creates the table with the specified
properties.
+ * Returns a "CREATE TABLE" string that creates the table with the specified
properties
* The tableName must not be null. If columnsSql is null, the schema syntax
will
* not be generated.
*/
diff --git a/fe/src/main/java/org/apache/impala/service/Frontend.java
b/fe/src/main/java/org/apache/impala/service/Frontend.java
index 5f16a8819..98cc02978 100644
--- a/fe/src/main/java/org/apache/impala/service/Frontend.java
+++ b/fe/src/main/java/org/apache/impala/service/Frontend.java
@@ -679,6 +679,12 @@ public class Frontend {
} else if (analysis.isShowCreateTableStmt()) {
ddl.op_type = TCatalogOpType.SHOW_CREATE_TABLE;
ddl.setShow_create_table_params(analysis.getShowCreateTableStmt().toThrift());
+
ddl.setShow_create_table_with_stats(analysis.getShowCreateTableStmt().withStats());
+ // Pass show_create_table_partition_limit from query options (default to
1000 if not set)
+ int partitionLimit =
result.query_options.isSetShow_create_table_partition_limit()
+ ? result.query_options.getShow_create_table_partition_limit() :
+ (new TQueryOptions()).getShow_create_table_partition_limit();
+ ddl.setShow_create_table_partition_limit(partitionLimit);
metadata.setColumns(Arrays.asList(
new TColumn("result", Type.STRING.toThrift())));
} else if (analysis.isShowCreateFunctionStmt()) {
diff --git a/fe/src/main/java/org/apache/impala/service/JniFrontend.java
b/fe/src/main/java/org/apache/impala/service/JniFrontend.java
index 3aecb33ba..f2b0264bf 100644
--- a/fe/src/main/java/org/apache/impala/service/JniFrontend.java
+++ b/fe/src/main/java/org/apache/impala/service/JniFrontend.java
@@ -89,6 +89,7 @@ import org.apache.impala.thrift.TResultSet;
import org.apache.impala.thrift.TSessionState;
import org.apache.impala.thrift.TShowFilesParams;
import org.apache.impala.thrift.TShowGrantPrincipalParams;
+import org.apache.impala.thrift.TCatalogOpRequest;
import org.apache.impala.thrift.TShowRolesParams;
import org.apache.impala.thrift.TShowStatsOp;
import org.apache.impala.thrift.TShowStatsParams;
@@ -564,13 +565,25 @@ public class JniFrontend {
/**
* Returns a SQL DDL string for creating the specified table.
*/
- public String showCreateTable(byte[] thriftTableName)
+ public String showCreateTable(byte[] thriftParams)
throws ImpalaException {
Preconditions.checkNotNull(frontend_);
- TTableName params = new TTableName();
- JniUtil.deserializeThrift(protocolFactory_, params, thriftTableName);
- return ToSqlUtils.getCreateTableSql(frontend_.getCatalog().getTable(
- params.getDb_name(), params.getTable_name()));
+ TCatalogOpRequest req = new TCatalogOpRequest();
+ JniUtil.deserializeThrift(protocolFactory_, req, thriftParams);
+ Preconditions.checkState(req.isSetShow_create_table_params());
+ TTableName tname = req.getShow_create_table_params();
+ FeTable table = frontend_.getCatalog().getTable(tname.getDb_name(),
+ tname.getTable_name());
+ boolean withStats = req.isSetShow_create_table_with_stats()
+ && req.show_create_table_with_stats;
+ if (withStats) {
+ // Get show_create_table_partition_limit from request, default to 1000
if not set
+ int partitionLimit = req.isSetShow_create_table_partition_limit() ?
+ req.getShow_create_table_partition_limit() : 1000;
+ return ToSqlUtils.getCreateTableWithStatsSql(table, partitionLimit);
+ } else {
+ return ToSqlUtils.getCreateTableSql(table);
+ }
}
/**
diff --git
a/testdata/workloads/functional-query/queries/QueryTest/show-create-table-with-stats.test
b/testdata/workloads/functional-query/queries/QueryTest/show-create-table-with-stats.test
new file mode 100644
index 000000000..bd09639ce
--- /dev/null
+++
b/testdata/workloads/functional-query/queries/QueryTest/show-create-table-with-stats.test
@@ -0,0 +1,485 @@
+====
+---- CREATE_TABLE
+# Unpartitioned HDFS table with table and column stats
+CREATE TABLE t_unpart_stats (
+ c1 STRING,
+ c2 INT
+)
+STORED AS PARQUET;
+ALTER TABLE t_unpart_stats SET TBLPROPERTIES('numRows'='10', 'numFiles'='10',
'STATS_GENERATED_VIA_STATS_TASK'='true');
+ALTER TABLE t_unpart_stats SET COLUMN STATS c1
('numDVs'='2','numNulls'='0','maxSize'='4','avgSize'='4');
+---- RESULTS-HIVE-3
+CREATE EXTERNAL TABLE show_create_table_test_db.t_unpart_stats (
+ c1 STRING,
+ c2 INT
+)
+STORED AS PARQUET
+LOCATION '$$location_uri$$'
+TBLPROPERTIES ('OBJCAPABILITIES'='EXTREAD,EXTWRITE', 'STATS_GENERATED'='TASK',
'STATS_GENERATED_VIA_STATS_TASK'='true', 'TRANSLATED_TO_EXTERNAL'='TRUE',
'external.table.purge'='TRUE', 'impala.lastComputeStatsTime'='<NUM>');
+ALTER TABLE show_create_table_test_db.t_unpart_stats SET TBLPROPERTIES
('EXTERNAL'='TRUE', 'OBJCAPABILITIES'='EXTREAD,EXTWRITE',
'STATS_GENERATED'='TASK', 'STATS_GENERATED_VIA_STATS_TASK'='true',
'TRANSLATED_TO_EXTERNAL'='TRUE', 'external.table.purge'='TRUE',
'impala.events.catalogServiceId'='<NUM>',
'impala.events.catalogVersion'='<NUM>', 'impala.lastComputeStatsTime'='<NUM>',
'numFiles'='<NUM>', 'numRows'='<NUM>', 'totalSize'='<NUM>');
+ALTER TABLE show_create_table_test_db.t_unpart_stats SET COLUMN STATS c1
('numDVs'='0', 'numNulls'='0', 'maxSize'='0', 'avgSize'='0', 'numTrues'='-1',
'numFalses'='-1');
+ALTER TABLE show_create_table_test_db.t_unpart_stats SET COLUMN STATS c2
('numDVs'='0', 'numNulls'='0', 'numTrues'='-1', 'numFalses'='-1');
+====
+---- CREATE_TABLE
+# Partitioned HDFS table with partition and table stats
+CREATE TABLE t_part_stats (
+ c1 STRING,
+ c2 INT
+)
+PARTITIONED BY (p INT)
+STORED AS PARQUET
+TBLPROPERTIES ('transactional'='false');
+ALTER TABLE t_part_stats ADD PARTITION(p=1);
+ALTER TABLE t_part_stats SET TBLPROPERTIES('numRows'='100');
+ALTER TABLE t_part_stats PARTITION(p=1) SET TBLPROPERTIES('numRows'='5',
'STATS_GENERATED_VIA_STATS_TASK'='true');
+ALTER TABLE t_part_stats SET COLUMN STATS c1
('numDVs'='3','numNulls'='0','maxSize'='4','avgSize'='4');
+---- RESULTS-HIVE-3
+CREATE EXTERNAL TABLE show_create_table_test_db.t_part_stats (
+ c1 STRING,
+ c2 INT
+)
+PARTITIONED BY (
+ p INT
+)
+STORED AS PARQUET
+LOCATION '$$location_uri$$'
+TBLPROPERTIES ('OBJCAPABILITIES'='EXTREAD,EXTWRITE', 'STATS_GENERATED'='TASK',
'TRANSLATED_TO_EXTERNAL'='TRUE', 'external.table.purge'='TRUE',
'impala.lastComputeStatsTime'='<NUM>');
+ALTER TABLE show_create_table_test_db.t_part_stats SET TBLPROPERTIES
('EXTERNAL'='TRUE', 'OBJCAPABILITIES'='EXTREAD,EXTWRITE',
'STATS_GENERATED'='TASK', 'TRANSLATED_TO_EXTERNAL'='TRUE',
'external.table.purge'='TRUE', 'impala.events.catalogServiceId'='<NUM>',
'impala.events.catalogVersion'='<NUM>', 'impala.lastComputeStatsTime'='<NUM>',
'numRows'='<NUM>', 'totalSize'='<NUM>');
+ALTER TABLE show_create_table_test_db.t_part_stats SET COLUMN STATS c1
('numDVs'='0', 'numNulls'='0', 'maxSize'='0', 'avgSize'='0', 'numTrues'='-1',
'numFalses'='-1');
+ALTER TABLE show_create_table_test_db.t_part_stats SET COLUMN STATS c2
('numDVs'='0', 'numNulls'='0', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE show_create_table_test_db.t_part_stats ADD PARTITION (p=1)
LOCATION '$$location_uri$$/p=1';
+ALTER TABLE show_create_table_test_db.t_part_stats PARTITION (p=1) SET
TBLPROPERTIES ('STATS_GENERATED_VIA_STATS_TASK'='true', 'numFiles'='<NUM>',
'numRows'='<NUM>', 'totalSize'='<NUM>');
+====
+---- CREATE_TABLE
+# Iceberg table should not have ADD PARTITION or numRows in WITH STATS output
+CREATE TABLE ice_with_stats (
+ i INT,
+ s STRING
+)
+STORED AS ICEBERG;
+INSERT INTO ice_with_stats VALUES (1, 'a');
+---- RESULTS-HIVE-3
+CREATE EXTERNAL TABLE show_create_table_test_db.ice_with_stats (
+ i INT NULL,
+ s STRING NULL
+)
+STORED AS ICEBERG
+LOCATION '$$location_uri$$'
+TBLPROPERTIES ('OBJCAPABILITIES'='EXTREAD,EXTWRITE',
'engine.hive.enabled'='true', 'external.table.purge'='TRUE',
'impala.computeStatsSnapshotIds'='<NUM>',
'impala.lastComputeStatsTime'='<NUM>', 'table_type'='ICEBERG',
'write.format.default'='parquet');
+ALTER TABLE show_create_table_test_db.ice_with_stats SET TBLPROPERTIES
('EXTERNAL'='TRUE', 'OBJCAPABILITIES'='EXTREAD,EXTWRITE',
'engine.hive.enabled'='true', 'external.table.purge'='TRUE',
'impala.computeStatsSnapshotIds'='<NUM>',
'impala.events.catalogServiceId'='<NUM>',
'impala.events.catalogVersion'='<NUM>', 'impala.lastComputeStatsTime'='<NUM>',
'numFiles'='<NUM>', 'numRows'='<NUM>', 'table_type'='ICEBERG',
'totalSize'='<NUM>', 'write.format.default'='parquet');
+ALTER TABLE show_create_table_test_db.ice_with_stats SET COLUMN STATS i
('numDVs'='1', 'numNulls'='0', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE show_create_table_test_db.ice_with_stats SET COLUMN STATS s
('numDVs'='1', 'numNulls'='0', 'maxSize'='1', 'avgSize'='1', 'numTrues'='-1',
'numFalses'='-1');
+====
+---- CREATE_TABLE
+# HDFS table without any stats
+CREATE TABLE t_no_stats (
+ c1 INT
+)
+PARTITIONED BY (p INT)
+STORED AS PARQUET;
+ALTER TABLE t_no_stats ADD PARTITION(p=1);
+---- RESULTS-HIVE-3
+CREATE EXTERNAL TABLE show_create_table_test_db.t_no_stats (
+ c1 INT
+)
+PARTITIONED BY (
+ p INT
+)
+STORED AS PARQUET
+LOCATION '$$location_uri$$'
+TBLPROPERTIES ('OBJCAPABILITIES'='EXTREAD,EXTWRITE', 'STATS_GENERATED'='TASK',
'TRANSLATED_TO_EXTERNAL'='TRUE', 'external.table.purge'='TRUE',
'impala.lastComputeStatsTime'='<NUM>');
+ALTER TABLE show_create_table_test_db.t_no_stats SET TBLPROPERTIES
('EXTERNAL'='TRUE', 'OBJCAPABILITIES'='EXTREAD,EXTWRITE',
'STATS_GENERATED'='TASK', 'TRANSLATED_TO_EXTERNAL'='TRUE',
'external.table.purge'='TRUE', 'impala.events.catalogServiceId'='<NUM>',
'impala.events.catalogVersion'='<NUM>', 'impala.lastComputeStatsTime'='<NUM>',
'numRows'='<NUM>', 'totalSize'='<NUM>');
+ALTER TABLE show_create_table_test_db.t_no_stats SET COLUMN STATS c1
('numDVs'='0', 'numNulls'='0', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE show_create_table_test_db.t_no_stats ADD PARTITION (p=1) LOCATION
'$$location_uri$$/p=1';
+ALTER TABLE show_create_table_test_db.t_no_stats PARTITION (p=1) SET
TBLPROPERTIES ('numFiles'='<NUM>', 'numRows'='<NUM>', 'totalSize'='<NUM>');
+====
+---- CREATE_TABLE
+# HDFS table with column stats only
+CREATE TABLE t_only_col_stats (
+ c1 INT,
+ c2 STRING
+)
+STORED AS PARQUET;
+ALTER TABLE t_only_col_stats SET COLUMN STATS c2
('numDVs'='9','numNulls'='1','maxSize'='9','avgSize'='3');
+---- RESULTS-HIVE-3
+CREATE EXTERNAL TABLE show_create_table_test_db.t_only_col_stats (
+ c1 INT,
+ c2 STRING
+)
+STORED AS PARQUET
+LOCATION '$$location_uri$$'
+TBLPROPERTIES ('OBJCAPABILITIES'='EXTREAD,EXTWRITE', 'STATS_GENERATED'='TASK',
'TRANSLATED_TO_EXTERNAL'='TRUE', 'external.table.purge'='TRUE',
'impala.lastComputeStatsTime'='<NUM>');
+ALTER TABLE show_create_table_test_db.t_only_col_stats SET TBLPROPERTIES
('EXTERNAL'='TRUE', 'OBJCAPABILITIES'='EXTREAD,EXTWRITE',
'STATS_GENERATED'='TASK', 'TRANSLATED_TO_EXTERNAL'='TRUE',
'external.table.purge'='TRUE', 'impala.events.catalogServiceId'='<NUM>',
'impala.events.catalogVersion'='<NUM>', 'impala.lastComputeStatsTime'='<NUM>',
'numRows'='<NUM>', 'totalSize'='<NUM>');
+ALTER TABLE show_create_table_test_db.t_only_col_stats SET COLUMN STATS c1
('numDVs'='0', 'numNulls'='0', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE show_create_table_test_db.t_only_col_stats SET COLUMN STATS c2
('numDVs'='0', 'numNulls'='0', 'maxSize'='0', 'avgSize'='0', 'numTrues'='-1',
'numFalses'='-1');
+====
+---- CREATE_TABLE
+# HDFS table with table stats only
+CREATE TABLE t_only_tbl_stats (
+ c1 INT
+)
+STORED AS PARQUET;
+ALTER TABLE t_only_tbl_stats SET TBLPROPERTIES('numRows'='77');
+---- RESULTS-HIVE-3
+CREATE EXTERNAL TABLE show_create_table_test_db.t_only_tbl_stats (
+ c1 INT
+)
+STORED AS PARQUET
+LOCATION '$$location_uri$$'
+TBLPROPERTIES ('OBJCAPABILITIES'='EXTREAD,EXTWRITE', 'STATS_GENERATED'='TASK',
'TRANSLATED_TO_EXTERNAL'='TRUE', 'external.table.purge'='TRUE',
'impala.lastComputeStatsTime'='<NUM>');
+ALTER TABLE show_create_table_test_db.t_only_tbl_stats SET TBLPROPERTIES
('EXTERNAL'='TRUE', 'OBJCAPABILITIES'='EXTREAD,EXTWRITE',
'STATS_GENERATED'='TASK', 'TRANSLATED_TO_EXTERNAL'='TRUE',
'external.table.purge'='TRUE', 'impala.events.catalogServiceId'='<NUM>',
'impala.events.catalogVersion'='<NUM>', 'impala.lastComputeStatsTime'='<NUM>',
'numRows'='<NUM>', 'totalSize'='<NUM>');
+ALTER TABLE show_create_table_test_db.t_only_tbl_stats SET COLUMN STATS c1
('numDVs'='0', 'numNulls'='0', 'numTrues'='-1', 'numFalses'='-1');
+====
+---- CREATE_TABLE
+# Unpartitioned HDFS table with custom properties but no stats
+CREATE TABLE t_unpart_custom_props (
+ name STRING,
+ age INT
+)
+STORED AS PARQUET
+TBLPROPERTIES('owner'='user', 'team'='data-eng');
+---- RESULTS-HIVE-3
+CREATE EXTERNAL TABLE show_create_table_test_db.t_unpart_custom_props (
+ name STRING,
+ age INT
+)
+STORED AS PARQUET
+LOCATION '$$location_uri$$'
+TBLPROPERTIES ('OBJCAPABILITIES'='EXTREAD,EXTWRITE', 'STATS_GENERATED'='TASK',
'TRANSLATED_TO_EXTERNAL'='TRUE', 'external.table.purge'='TRUE',
'impala.lastComputeStatsTime'='<NUM>', 'owner'='user', 'team'='data-eng');
+ALTER TABLE show_create_table_test_db.t_unpart_custom_props SET TBLPROPERTIES
('EXTERNAL'='TRUE', 'OBJCAPABILITIES'='EXTREAD,EXTWRITE',
'STATS_GENERATED'='TASK', 'TRANSLATED_TO_EXTERNAL'='TRUE',
'external.table.purge'='TRUE', 'impala.events.catalogServiceId'='<NUM>',
'impala.events.catalogVersion'='<NUM>', 'impala.lastComputeStatsTime'='<NUM>',
'numRows'='<NUM>', 'owner'='user', 'team'='data-eng', 'totalSize'='<NUM>');
+ALTER TABLE show_create_table_test_db.t_unpart_custom_props SET COLUMN STATS
name ('numDVs'='0', 'numNulls'='0', 'maxSize'='0', 'avgSize'='0',
'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE show_create_table_test_db.t_unpart_custom_props SET COLUMN STATS
age ('numDVs'='0', 'numNulls'='0', 'numTrues'='-1', 'numFalses'='-1');
+====
+---- CREATE_TABLE
+# Partitioned HDFS table with partition-level stats only (tests
show_create_table_partition_limit warning)
+CREATE TABLE t_part_only_part_stats (
+ id INT,
+ value STRING
+)
+PARTITIONED BY (region STRING)
+STORED AS PARQUET;
+ALTER TABLE t_part_only_part_stats ADD PARTITION(region='US');
+ALTER TABLE t_part_only_part_stats ADD PARTITION(region='EU');
+ALTER TABLE t_part_only_part_stats PARTITION(region='US') SET
TBLPROPERTIES('numRows'='20', 'STATS_GENERATED_VIA_STATS_TASK'='true');
+ALTER TABLE t_part_only_part_stats PARTITION(region='EU') SET
TBLPROPERTIES('numRows'='30');
+---- RESULTS-HIVE-3
+CREATE EXTERNAL TABLE show_create_table_test_db.t_part_only_part_stats (
+ id INT,
+ value STRING
+)
+PARTITIONED BY (
+ region STRING
+)
+STORED AS PARQUET
+LOCATION '$$location_uri$$'
+TBLPROPERTIES ('OBJCAPABILITIES'='EXTREAD,EXTWRITE', 'STATS_GENERATED'='TASK',
'TRANSLATED_TO_EXTERNAL'='TRUE', 'external.table.purge'='TRUE',
'impala.lastComputeStatsTime'='<NUM>');
+ALTER TABLE show_create_table_test_db.t_part_only_part_stats SET TBLPROPERTIES
('EXTERNAL'='TRUE', 'OBJCAPABILITIES'='EXTREAD,EXTWRITE',
'STATS_GENERATED'='TASK', 'TRANSLATED_TO_EXTERNAL'='TRUE',
'external.table.purge'='TRUE', 'impala.events.catalogServiceId'='<NUM>',
'impala.events.catalogVersion'='<NUM>', 'impala.lastComputeStatsTime'='<NUM>',
'numRows'='<NUM>', 'totalSize'='<NUM>');
+ALTER TABLE show_create_table_test_db.t_part_only_part_stats SET COLUMN STATS
id ('numDVs'='0', 'numNulls'='0', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE show_create_table_test_db.t_part_only_part_stats SET COLUMN STATS
value ('numDVs'='0', 'numNulls'='0', 'maxSize'='0', 'avgSize'='0',
'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE show_create_table_test_db.t_part_only_part_stats ADD PARTITION
(region='EU') LOCATION '$$location_uri$$/region=EU';
+ALTER TABLE show_create_table_test_db.t_part_only_part_stats PARTITION
(region='EU') SET TBLPROPERTIES ('numFiles'='<NUM>', 'numRows'='<NUM>',
'totalSize'='<NUM>');
+-- WARNING about partial output
+-- WARNING: Emitted 1 of 2 partitions (show_create_table_partition_limit=1). 1
partitions skipped.
+-- To export more partitions, re-run with
SHOW_CREATE_TABLE_PARTITION_LIMIT=<n>.
+====
+---- CREATE_TABLE
+# Partitioned HDFS table with table, partition, and column stats, plus custom
props (tests show_create_table_partition_limit warning)
+CREATE TABLE t_part_full_combo (
+ item STRING,
+ price DOUBLE
+)
+PARTITIONED BY (category STRING)
+STORED AS PARQUET
+TBLPROPERTIES('createdBy'='qa_test');
+ALTER TABLE t_part_full_combo ADD PARTITION(category='electronics');
+ALTER TABLE t_part_full_combo ADD PARTITION(category='books');
+ALTER TABLE t_part_full_combo SET TBLPROPERTIES('numRows'='500');
+ALTER TABLE t_part_full_combo PARTITION(category='electronics') SET
TBLPROPERTIES('numRows'='200', 'STATS_GENERATED_VIA_STATS_TASK'='true');
+ALTER TABLE t_part_full_combo PARTITION(category='books') SET
TBLPROPERTIES('numRows'='300');
+ALTER TABLE t_part_full_combo SET COLUMN STATS item ('numDVs'='50',
'numNulls'='0', 'avgSize'='10');
+ALTER TABLE t_part_full_combo SET COLUMN STATS price ('numDVs'='100',
'numNulls'='5');
+---- RESULTS-HIVE-3
+CREATE EXTERNAL TABLE show_create_table_test_db.t_part_full_combo (
+ item STRING,
+ price DOUBLE
+)
+PARTITIONED BY (
+ category STRING
+)
+STORED AS PARQUET
+LOCATION '$$location_uri$$'
+TBLPROPERTIES ('OBJCAPABILITIES'='EXTREAD,EXTWRITE', 'STATS_GENERATED'='TASK',
'TRANSLATED_TO_EXTERNAL'='TRUE', 'createdBy'='qa_test',
'external.table.purge'='TRUE', 'impala.lastComputeStatsTime'='<NUM>');
+ALTER TABLE show_create_table_test_db.t_part_full_combo SET TBLPROPERTIES
('EXTERNAL'='TRUE', 'OBJCAPABILITIES'='EXTREAD,EXTWRITE',
'STATS_GENERATED'='TASK', 'TRANSLATED_TO_EXTERNAL'='TRUE',
'createdBy'='qa_test', 'external.table.purge'='TRUE',
'impala.events.catalogServiceId'='<NUM>',
'impala.events.catalogVersion'='<NUM>', 'impala.lastComputeStatsTime'='<NUM>',
'numRows'='<NUM>', 'totalSize'='<NUM>');
+ALTER TABLE show_create_table_test_db.t_part_full_combo SET COLUMN STATS item
('numDVs'='0', 'numNulls'='0', 'maxSize'='0', 'avgSize'='0', 'numTrues'='-1',
'numFalses'='-1');
+ALTER TABLE show_create_table_test_db.t_part_full_combo SET COLUMN STATS price
('numDVs'='0', 'numNulls'='0', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE show_create_table_test_db.t_part_full_combo ADD PARTITION
(category='books') LOCATION '$$location_uri$$/category=books';
+ALTER TABLE show_create_table_test_db.t_part_full_combo PARTITION
(category='books') SET TBLPROPERTIES ('numFiles'='<NUM>', 'numRows'='<NUM>',
'totalSize'='<NUM>');
+-- WARNING about partial output
+-- WARNING: Emitted 1 of 2 partitions (show_create_table_partition_limit=1). 1
partitions skipped.
+-- To export more partitions, re-run with
SHOW_CREATE_TABLE_PARTITION_LIMIT=<n>.
+====
+---- CREATE_TABLE
+# Unpartitioned HDFS table with TEXTFILE format and stats
+CREATE TABLE t_textfile_stats (
+ c1 INT,
+ c2 STRING
+)
+STORED AS TEXTFILE;
+ALTER TABLE t_textfile_stats SET TBLPROPERTIES('numRows'='25',
'STATS_GENERATED_VIA_STATS_TASK'='true');
+ALTER TABLE t_textfile_stats SET COLUMN STATS c2
('numDVs'='5','numNulls'='1','maxSize'='4','avgSize'='4');
+---- RESULTS-HIVE-3
+CREATE EXTERNAL TABLE show_create_table_test_db.t_textfile_stats (
+ c1 INT,
+ c2 STRING
+)
+STORED AS TEXTFILE
+LOCATION '$$location_uri$$'
+TBLPROPERTIES ('OBJCAPABILITIES'='EXTREAD,EXTWRITE', 'STATS_GENERATED'='TASK',
'STATS_GENERATED_VIA_STATS_TASK'='true', 'TRANSLATED_TO_EXTERNAL'='TRUE',
'external.table.purge'='TRUE', 'impala.lastComputeStatsTime'='<NUM>');
+ALTER TABLE show_create_table_test_db.t_textfile_stats SET TBLPROPERTIES
('EXTERNAL'='TRUE', 'OBJCAPABILITIES'='EXTREAD,EXTWRITE',
'STATS_GENERATED'='TASK', 'STATS_GENERATED_VIA_STATS_TASK'='true',
'TRANSLATED_TO_EXTERNAL'='TRUE', 'external.table.purge'='TRUE',
'impala.events.catalogServiceId'='<NUM>',
'impala.events.catalogVersion'='<NUM>', 'impala.lastComputeStatsTime'='<NUM>',
'numRows'='<NUM>', 'totalSize'='<NUM>');
+ALTER TABLE show_create_table_test_db.t_textfile_stats SET COLUMN STATS c1
('numDVs'='0', 'numNulls'='0', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE show_create_table_test_db.t_textfile_stats SET COLUMN STATS c2
('numDVs'='0', 'numNulls'='0', 'maxSize'='0', 'avgSize'='0', 'numTrues'='-1',
'numFalses'='-1');
+====
+---- CREATE_TABLE
+# Partitioned HDFS table with TEXTFILE format and all stats
+CREATE TABLE t_part_textfile_stats (
+ c1 STRING,
+ c2 INT
+)
+PARTITIONED BY (p1 INT, p2 STRING)
+STORED AS TEXTFILE;
+ALTER TABLE t_part_textfile_stats ADD PARTITION(p1=1, p2='a');
+ALTER TABLE t_part_textfile_stats SET TBLPROPERTIES('numRows'='50',
'numFiles'='75');
+ALTER TABLE t_part_textfile_stats PARTITION(p1=1, p2='a') SET
TBLPROPERTIES('numRows'='10', 'STATS_GENERATED_VIA_STATS_TASK'='true');
+ALTER TABLE t_part_textfile_stats SET COLUMN STATS c1
('numDVs'='7','numNulls'='2','maxSize'='10','avgSize'='5');
+---- RESULTS-HIVE-3
+CREATE EXTERNAL TABLE show_create_table_test_db.t_part_textfile_stats (
+ c1 STRING,
+ c2 INT
+)
+PARTITIONED BY (
+ p1 INT,
+ p2 STRING
+)
+STORED AS TEXTFILE
+LOCATION '$$location_uri$$'
+TBLPROPERTIES ('OBJCAPABILITIES'='EXTREAD,EXTWRITE', 'STATS_GENERATED'='TASK',
'TRANSLATED_TO_EXTERNAL'='TRUE', 'external.table.purge'='TRUE',
'impala.lastComputeStatsTime'='<NUM>');
+ALTER TABLE show_create_table_test_db.t_part_textfile_stats SET TBLPROPERTIES
('EXTERNAL'='TRUE', 'OBJCAPABILITIES'='EXTREAD,EXTWRITE',
'STATS_GENERATED'='TASK', 'TRANSLATED_TO_EXTERNAL'='TRUE',
'external.table.purge'='TRUE', 'impala.events.catalogServiceId'='<NUM>',
'impala.events.catalogVersion'='<NUM>', 'impala.lastComputeStatsTime'='<NUM>',
'numFiles'='<NUM>', 'numRows'='<NUM>', 'totalSize'='<NUM>');
+ALTER TABLE show_create_table_test_db.t_part_textfile_stats SET COLUMN STATS
c1 ('numDVs'='0', 'numNulls'='0', 'maxSize'='0', 'avgSize'='0',
'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE show_create_table_test_db.t_part_textfile_stats SET COLUMN STATS
c2 ('numDVs'='0', 'numNulls'='0', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE show_create_table_test_db.t_part_textfile_stats ADD PARTITION
(p1=1, p2='a') LOCATION '$$location_uri$$/p1=1/p2=a';
+ALTER TABLE show_create_table_test_db.t_part_textfile_stats PARTITION (p1=1,
p2='a') SET TBLPROPERTIES ('STATS_GENERATED_VIA_STATS_TASK'='true',
'numFiles'='<NUM>', 'numRows'='<NUM>', 'totalSize'='<NUM>');
+====
+---- CREATE_TABLE
+# Partitioned table with NULL partition values (tests
show_create_table_partition_limit warning)
+CREATE TABLE t_part_with_nulls (
+ c1 STRING
+)
+PARTITIONED BY (p1 INT, p2 STRING)
+STORED AS PARQUET;
+ALTER TABLE t_part_with_nulls ADD PARTITION(p1=NULL, p2='x');
+ALTER TABLE t_part_with_nulls ADD PARTITION(p1=1, p2=NULL);
+ALTER TABLE t_part_with_nulls SET TBLPROPERTIES('numRows'='20',
'STATS_GENERATED_VIA_STATS_TASK'='true');
+ALTER TABLE t_part_with_nulls PARTITION(p1=NULL, p2='x') SET
TBLPROPERTIES('numRows'='8', 'STATS_GENERATED_VIA_STATS_TASK'='true');
+---- RESULTS-HIVE-3
+CREATE EXTERNAL TABLE show_create_table_test_db.t_part_with_nulls (
+ c1 STRING
+)
+PARTITIONED BY (
+ p1 INT,
+ p2 STRING
+)
+STORED AS PARQUET
+LOCATION '$$location_uri$$'
+TBLPROPERTIES ('OBJCAPABILITIES'='EXTREAD,EXTWRITE', 'STATS_GENERATED'='TASK',
'STATS_GENERATED_VIA_STATS_TASK'='true', 'TRANSLATED_TO_EXTERNAL'='TRUE',
'external.table.purge'='TRUE', 'impala.lastComputeStatsTime'='<NUM>');
+ALTER TABLE show_create_table_test_db.t_part_with_nulls SET TBLPROPERTIES
('EXTERNAL'='TRUE', 'OBJCAPABILITIES'='EXTREAD,EXTWRITE',
'STATS_GENERATED'='TASK', 'STATS_GENERATED_VIA_STATS_TASK'='true',
'TRANSLATED_TO_EXTERNAL'='TRUE', 'external.table.purge'='TRUE',
'impala.events.catalogServiceId'='<NUM>',
'impala.events.catalogVersion'='<NUM>', 'impala.lastComputeStatsTime'='<NUM>',
'numRows'='<NUM>', 'totalSize'='<NUM>');
+ALTER TABLE show_create_table_test_db.t_part_with_nulls SET COLUMN STATS c1
('numDVs'='0', 'numNulls'='0', 'maxSize'='0', 'avgSize'='0', 'numTrues'='-1',
'numFalses'='-1');
+ALTER TABLE show_create_table_test_db.t_part_with_nulls ADD PARTITION
(p1=NULL, p2='x') LOCATION
'$$location_uri$$/p1=__HIVE_DEFAULT_PARTITION__/p2=x';
+ALTER TABLE show_create_table_test_db.t_part_with_nulls PARTITION (p1=NULL,
p2='x') SET TBLPROPERTIES ('STATS_GENERATED_VIA_STATS_TASK'='true',
'numFiles'='<NUM>', 'numRows'='<NUM>', 'totalSize'='<NUM>');
+-- WARNING about partial output
+-- WARNING: Emitted 1 of 2 partitions (show_create_table_partition_limit=1). 1
partitions skipped.
+-- To export more partitions, re-run with
SHOW_CREATE_TABLE_PARTITION_LIMIT=<n>.
+====
+---- CREATE_TABLE
+# Unpartitioned table without any stats
+CREATE TABLE t_unpart_no_stats (
+ c1 INT,
+ c2 STRING
+)
+STORED AS PARQUET;
+---- RESULTS-HIVE-3
+CREATE EXTERNAL TABLE show_create_table_test_db.t_unpart_no_stats (
+ c1 INT,
+ c2 STRING
+)
+STORED AS PARQUET
+LOCATION '$$location_uri$$'
+TBLPROPERTIES ('OBJCAPABILITIES'='EXTREAD,EXTWRITE', 'STATS_GENERATED'='TASK',
'TRANSLATED_TO_EXTERNAL'='TRUE', 'external.table.purge'='TRUE',
'impala.lastComputeStatsTime'='<NUM>');
+ALTER TABLE show_create_table_test_db.t_unpart_no_stats SET TBLPROPERTIES
('EXTERNAL'='TRUE', 'OBJCAPABILITIES'='EXTREAD,EXTWRITE',
'STATS_GENERATED'='TASK', 'TRANSLATED_TO_EXTERNAL'='TRUE',
'external.table.purge'='TRUE', 'impala.events.catalogServiceId'='<NUM>',
'impala.events.catalogVersion'='<NUM>', 'impala.lastComputeStatsTime'='<NUM>',
'numRows'='<NUM>', 'totalSize'='<NUM>');
+ALTER TABLE show_create_table_test_db.t_unpart_no_stats SET COLUMN STATS c1
('numDVs'='0', 'numNulls'='0', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE show_create_table_test_db.t_unpart_no_stats SET COLUMN STATS c2
('numDVs'='0', 'numNulls'='0', 'maxSize'='0', 'avgSize'='0', 'numTrues'='-1',
'numFalses'='-1');
+====
+---- CREATE_TABLE
+# Partitioned table without any stats (no partitions added)
+CREATE TABLE t_part_no_stats_no_parts (
+ c1 INT
+)
+PARTITIONED BY (p INT)
+STORED AS PARQUET;
+---- RESULTS-HIVE-3
+CREATE EXTERNAL TABLE show_create_table_test_db.t_part_no_stats_no_parts (
+ c1 INT
+)
+PARTITIONED BY (
+ p INT
+)
+STORED AS PARQUET
+LOCATION '$$location_uri$$'
+TBLPROPERTIES ('OBJCAPABILITIES'='EXTREAD,EXTWRITE', 'STATS_GENERATED'='TASK',
'TRANSLATED_TO_EXTERNAL'='TRUE', 'external.table.purge'='TRUE',
'impala.lastComputeStatsTime'='<NUM>');
+ALTER TABLE show_create_table_test_db.t_part_no_stats_no_parts SET
TBLPROPERTIES ('EXTERNAL'='TRUE', 'OBJCAPABILITIES'='EXTREAD,EXTWRITE',
'STATS_GENERATED'='TASK', 'TRANSLATED_TO_EXTERNAL'='TRUE',
'external.table.purge'='TRUE', 'impala.events.catalogServiceId'='<NUM>',
'impala.events.catalogVersion'='<NUM>', 'impala.lastComputeStatsTime'='<NUM>',
'numRows'='<NUM>', 'totalSize'='<NUM>');
+ALTER TABLE show_create_table_test_db.t_part_no_stats_no_parts SET COLUMN
STATS c1 ('numDVs'='0', 'numNulls'='0', 'numTrues'='-1', 'numFalses'='-1');
+====
+---- CREATE_TABLE
+# Test case with many partitions to verify show_create_table_partition_limit
warning
+CREATE TABLE t_many_partitions (
+ data_point STRING
+)
+PARTITIONED BY (year INT, month INT)
+STORED AS PARQUET;
+ALTER TABLE t_many_partitions ADD PARTITION(year=2024, month=1);
+ALTER TABLE t_many_partitions ADD PARTITION(year=2024, month=2);
+ALTER TABLE t_many_partitions ADD PARTITION(year=2024, month=3);
+ALTER TABLE t_many_partitions SET TBLPROPERTIES('numRows'='300');
+ALTER TABLE t_many_partitions PARTITION(year=2024, month=1) SET
TBLPROPERTIES('numRows'='100');
+ALTER TABLE t_many_partitions PARTITION(year=2024, month=2) SET
TBLPROPERTIES('numRows'='100');
+ALTER TABLE t_many_partitions PARTITION(year=2024, month=3) SET
TBLPROPERTIES('numRows'='100');
+ALTER TABLE t_many_partitions SET COLUMN STATS data_point
('numDVs'='50','numNulls'='0','maxSize'='20','avgSize'='10');
+---- RESULTS-HIVE-3
+CREATE EXTERNAL TABLE show_create_table_test_db.t_many_partitions (
+ data_point STRING
+)
+PARTITIONED BY (
+ year INT,
+ month INT
+)
+STORED AS PARQUET
+LOCATION '$$location_uri$$'
+TBLPROPERTIES ('OBJCAPABILITIES'='EXTREAD,EXTWRITE', 'STATS_GENERATED'='TASK',
'TRANSLATED_TO_EXTERNAL'='TRUE', 'external.table.purge'='TRUE',
'impala.lastComputeStatsTime'='<NUM>');
+ALTER TABLE show_create_table_test_db.t_many_partitions SET TBLPROPERTIES
('EXTERNAL'='TRUE', 'OBJCAPABILITIES'='EXTREAD,EXTWRITE',
'STATS_GENERATED'='TASK', 'TRANSLATED_TO_EXTERNAL'='TRUE',
'external.table.purge'='TRUE', 'impala.events.catalogServiceId'='<NUM>',
'impala.events.catalogVersion'='<NUM>', 'impala.lastComputeStatsTime'='<NUM>',
'numRows'='<NUM>', 'totalSize'='<NUM>');
+ALTER TABLE show_create_table_test_db.t_many_partitions SET COLUMN STATS
data_point ('numDVs'='0', 'numNulls'='0', 'maxSize'='0', 'avgSize'='0',
'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE show_create_table_test_db.t_many_partitions ADD PARTITION
(`year`=2024, `month`=1) LOCATION '$$location_uri$$/year=2024/month=1';
+ALTER TABLE show_create_table_test_db.t_many_partitions PARTITION
(`year`=2024, `month`=1) SET TBLPROPERTIES ('numFiles'='<NUM>',
'numRows'='<NUM>', 'totalSize'='<NUM>');
+-- WARNING about partial output
+-- WARNING: Emitted 1 of 3 partitions (show_create_table_partition_limit=1). 2
partitions skipped.
+-- To export more partitions, re-run with
SHOW_CREATE_TABLE_PARTITION_LIMIT=<n>.
+====
+---- QUERY
+-- No need of adding WITH STATS at the end of query, as it'll be done by the
test
+SHOW CREATE TABLE functional.alltypes_date_partition
+---- RESULTS-HIVE-3
+CREATE EXTERNAL TABLE functional.alltypes_date_partition (
+ id INT COMMENT 'Add a comment',
+ bool_col BOOLEAN,
+ tinyint_col TINYINT,
+ smallint_col SMALLINT,
+ int_col INT,
+ bigint_col BIGINT,
+ float_col FLOAT,
+ double_col DOUBLE,
+ string_col STRING,
+ timestamp_col TIMESTAMP
+)
+PARTITIONED BY (
+ date_col DATE
+)
+STORED AS TEXTFILE
+LOCATION '$$location_uri$$'
+TBLPROPERTIES ('OBJCAPABILITIES'='EXTREAD,EXTWRITE', 'STATS_GENERATED'='TASK',
'impala.lastComputeStatsTime'='<NUM>');
+ALTER TABLE functional.alltypes_date_partition SET TBLPROPERTIES
('EXTERNAL'='TRUE', 'OBJCAPABILITIES'='EXTREAD,EXTWRITE',
'STATS_GENERATED'='TASK', 'impala.events.catalogServiceId'='<NUM>',
'impala.events.catalogVersion'='<NUM>', 'impala.lastComputeStatsTime'='<NUM>',
'numRows'='<NUM>', 'totalSize'='<NUM>');
+ALTER TABLE functional.alltypes_date_partition SET COLUMN STATS id
('numDVs'='494', 'numNulls'='0', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE functional.alltypes_date_partition SET COLUMN STATS bool_col
('numDVs'='2', 'numNulls'='0', 'numTrues'='250', 'numFalses'='250');
+ALTER TABLE functional.alltypes_date_partition SET COLUMN STATS tinyint_col
('numDVs'='10', 'numNulls'='0', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE functional.alltypes_date_partition SET COLUMN STATS smallint_col
('numDVs'='10', 'numNulls'='0', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE functional.alltypes_date_partition SET COLUMN STATS int_col
('numDVs'='10', 'numNulls'='0', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE functional.alltypes_date_partition SET COLUMN STATS bigint_col
('numDVs'='10', 'numNulls'='0', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE functional.alltypes_date_partition SET COLUMN STATS float_col
('numDVs'='10', 'numNulls'='0', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE functional.alltypes_date_partition SET COLUMN STATS double_col
('numDVs'='10', 'numNulls'='0', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE functional.alltypes_date_partition SET COLUMN STATS string_col
('numDVs'='10', 'numNulls'='0', 'maxSize'='1', 'avgSize'='1', 'numTrues'='-1',
'numFalses'='-1');
+ALTER TABLE functional.alltypes_date_partition SET COLUMN STATS timestamp_col
('numDVs'='500', 'numNulls'='0', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE functional.alltypes_date_partition ADD PARTITION (date_col=DATE
'2009-01-01') LOCATION '$$location_uri$$/date_col=2009-01-01';
+ALTER TABLE functional.alltypes_date_partition PARTITION (date_col=DATE
'2009-01-01') SET TBLPROPERTIES ('numFiles'='<NUM>', 'numRows'='<NUM>',
'totalSize'='<NUM>');
+-- WARNING about partial output
+-- WARNING: Emitted 1 of 55 partitions (show_create_table_partition_limit=1).
54 partitions skipped.
+-- To export more partitions, re-run with
SHOW_CREATE_TABLE_PARTITION_LIMIT=<n>.
+====
+---- QUERY
+SHOW CREATE TABLE tpcds_parquet.store_sales
+---- RESULTS-HIVE-3
+CREATE EXTERNAL TABLE tpcds_parquet.store_sales (
+ ss_sold_time_sk INT,
+ ss_item_sk BIGINT,
+ ss_customer_sk INT,
+ ss_cdemo_sk INT,
+ ss_hdemo_sk INT,
+ ss_addr_sk INT,
+ ss_store_sk INT,
+ ss_promo_sk INT,
+ ss_ticket_number BIGINT,
+ ss_quantity INT,
+ ss_wholesale_cost DECIMAL(7,2),
+ ss_list_price DECIMAL(7,2),
+ ss_sales_price DECIMAL(7,2),
+ ss_ext_discount_amt DECIMAL(7,2),
+ ss_ext_sales_price DECIMAL(7,2),
+ ss_ext_wholesale_cost DECIMAL(7,2),
+ ss_ext_list_price DECIMAL(7,2),
+ ss_ext_tax DECIMAL(7,2),
+ ss_coupon_amt DECIMAL(7,2),
+ ss_net_paid DECIMAL(7,2),
+ ss_net_paid_inc_tax DECIMAL(7,2),
+ ss_net_profit DECIMAL(7,2),
+ PRIMARY KEY (ss_item_sk, ss_ticket_number),
+ FOREIGN KEY(ss_customer_sk) REFERENCES tpcds_parquet.customer(c_customer_sk),
+ FOREIGN KEY(ss_addr_sk) REFERENCES
tpcds_parquet.customer_address(ca_address_sk),
+ FOREIGN KEY(ss_cdemo_sk) REFERENCES
tpcds_parquet.customer_demographics(cd_demo_sk),
+ FOREIGN KEY(ss_sold_date_sk) REFERENCES tpcds_parquet.date_dim(d_date_sk),
+ FOREIGN KEY(ss_hdemo_sk) REFERENCES
tpcds_parquet.household_demographics(hd_demo_sk),
+ FOREIGN KEY(ss_item_sk) REFERENCES tpcds_parquet.item(i_item_sk),
+ FOREIGN KEY(ss_promo_sk) REFERENCES tpcds_parquet.promotion(p_promo_sk),
+ FOREIGN KEY(ss_store_sk) REFERENCES tpcds_parquet.store(s_store_sk),
+ FOREIGN KEY(ss_sold_time_sk) REFERENCES tpcds_parquet.time_dim(t_time_sk)
+)
+PARTITIONED BY (
+ ss_sold_date_sk INT
+)
+STORED AS PARQUET
+LOCATION '$$location_uri$$'
+TBLPROPERTIES ('OBJCAPABILITIES'='EXTREAD,EXTWRITE', 'STATS_GENERATED'='TASK',
'impala.lastComputeStatsTime'='<NUM>');
+ALTER TABLE tpcds_parquet.store_sales SET TBLPROPERTIES ('EXTERNAL'='TRUE',
'OBJCAPABILITIES'='EXTREAD,EXTWRITE', 'STATS_GENERATED'='TASK',
'impala.events.catalogServiceId'='<NUM>',
'impala.events.catalogVersion'='<NUM>', 'impala.lastComputeStatsTime'='<NUM>',
'numRows'='<NUM>', 'totalSize'='<NUM>');
+ALTER TABLE tpcds_parquet.store_sales SET COLUMN STATS ss_sold_time_sk
('numDVs'='46948', 'numNulls'='129637', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE tpcds_parquet.store_sales SET COLUMN STATS ss_item_sk
('numDVs'='17975', 'numNulls'='0', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE tpcds_parquet.store_sales SET COLUMN STATS ss_customer_sk
('numDVs'='90632', 'numNulls'='129752', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE tpcds_parquet.store_sales SET COLUMN STATS ss_cdemo_sk
('numDVs'='217860', 'numNulls'='129700', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE tpcds_parquet.store_sales SET COLUMN STATS ss_hdemo_sk
('numDVs'='7376', 'numNulls'='129847', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE tpcds_parquet.store_sales SET COLUMN STATS ss_addr_sk
('numDVs'='48600', 'numNulls'='129975', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE tpcds_parquet.store_sales SET COLUMN STATS ss_store_sk
('numDVs'='6', 'numNulls'='130034', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE tpcds_parquet.store_sales SET COLUMN STATS ss_promo_sk
('numDVs'='302', 'numNulls'='129484', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE tpcds_parquet.store_sales SET COLUMN STATS ss_ticket_number
('numDVs'='240553', 'numNulls'='0', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE tpcds_parquet.store_sales SET COLUMN STATS ss_quantity
('numDVs'='99', 'numNulls'='129996', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE tpcds_parquet.store_sales SET COLUMN STATS ss_wholesale_cost
('numDVs'='10196', 'numNulls'='130023', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE tpcds_parquet.store_sales SET COLUMN STATS ss_list_price
('numDVs'='20233', 'numNulls'='130003', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE tpcds_parquet.store_sales SET COLUMN STATS ss_sales_price
('numDVs'='19129', 'numNulls'='129666', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE tpcds_parquet.store_sales SET COLUMN STATS ss_ext_discount_amt
('numDVs'='215962', 'numNulls'='129838', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE tpcds_parquet.store_sales SET COLUMN STATS ss_ext_sales_price
('numDVs'='421485', 'numNulls'='130327', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE tpcds_parquet.store_sales SET COLUMN STATS ss_ext_wholesale_cost
('numDVs'='378585', 'numNulls'='130044', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE tpcds_parquet.store_sales SET COLUMN STATS ss_ext_list_price
('numDVs'='607652', 'numNulls'='129933', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE tpcds_parquet.store_sales SET COLUMN STATS ss_ext_tax
('numDVs'='77834', 'numNulls'='130410', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE tpcds_parquet.store_sales SET COLUMN STATS ss_coupon_amt
('numDVs'='215962', 'numNulls'='129838', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE tpcds_parquet.store_sales SET COLUMN STATS ss_net_paid
('numDVs'='474273', 'numNulls'='129397', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE tpcds_parquet.store_sales SET COLUMN STATS ss_net_paid_inc_tax
('numDVs'='617277', 'numNulls'='130022', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE tpcds_parquet.store_sales SET COLUMN STATS ss_net_profit
('numDVs'='566882', 'numNulls'='130267', 'numTrues'='-1', 'numFalses'='-1');
+ALTER TABLE tpcds_parquet.store_sales ADD PARTITION (ss_sold_date_sk=NULL)
LOCATION '$$location_uri$$/ss_sold_date_sk=__HIVE_DEFAULT_PARTITION__';
+ALTER TABLE tpcds_parquet.store_sales PARTITION (ss_sold_date_sk=NULL) SET
TBLPROPERTIES ('numFiles'='<NUM>', 'numRows'='<NUM>', 'totalSize'='<NUM>');
+-- WARNING about partial output
+-- WARNING: Emitted 1 of 1824 partitions
(show_create_table_partition_limit=1). 1823 partitions skipped.
+-- To export more partitions, re-run with
SHOW_CREATE_TABLE_PARTITION_LIMIT=<n>.
+====
diff --git a/tests/metadata/test_show_create_table.py
b/tests/metadata/test_show_create_table.py
index b9dbd6bd2..027ff1287 100644
--- a/tests/metadata/test_show_create_table.py
+++ b/tests/metadata/test_show_create_table.py
@@ -94,6 +94,10 @@ class TestShowCreateTable(ImpalaTestSuite):
vector,
unique_database)
+ def test_show_create_table_with_stats(self, vector, unique_database):
+ self.__run_show_create_table_with_stats_test_case(
+ 'QueryTest/show-create-table-with-stats', vector, unique_database)
+
def __run_show_create_table_test_case(self, test_file_name, vector,
unique_db_name):
"""
Runs a show-create-table test file, containing the following sections:
@@ -156,6 +160,97 @@ class TestShowCreateTable(ImpalaTestSuite):
# drop the table
self.__exec(test_case.drop_table_sql)
+ def __run_show_create_table_with_stats_test_case(
+ self, test_file_name, vector, unique_db_name):
+ sections = self.load_query_test_file(
+ self.get_workload(), test_file_name, self.VALID_SECTION_NAMES)
+ for test_section in sections:
+ test_case = ShowCreateTableTestCase(test_section, test_file_name,
unique_db_name)
+ if not test_case.existing_table:
+ # create table in Impala (support multiple setup statements)
+ setup_sql = self.__replace_warehouse(test_case.create_table_sql)
+ for stmt in re.split(r";\s*", setup_sql.strip()):
+ if not stmt:
+ continue
+ self.__exec(stmt)
+
+ # Set SHOW_CREATE_TABLE_PARTITION_LIMIT=1 for WITH STATS queries to test
+ # partition limiting
+ self.__exec("SET SHOW_CREATE_TABLE_PARTITION_LIMIT=1")
+
+ # Check if the table is a Paimon table before running COMPUTE STATS.
+ # Paimon tables do not support 'COMPUTE STATS'.
+ is_paimon = False
+ if not test_case.existing_table and test_case.create_table_sql:
+ is_paimon = "STORED AS PAIMON" in test_case.create_table_sql.upper()
+
+ if not is_paimon and not test_case.existing_table:
+ # COMPUTE STATS before running SHOW CREATE TABLE WITH STATS
+ self.__exec("COMPUTE STATS " + test_case.table_name)
+
+ # execute "SHOW CREATE TABLE ... WITH STATS"; collect all statements
+ result = self.__exec(test_case.show_create_table_sql + " WITH STATS")
+ raw_rows = [row.strip() for row in result.data if row and row.strip()]
+
+ # Single row; split into statements
+ raw_sql = raw_rows[0]
+ raw_create_table_result = [s for s in re.split(r";\s*", raw_sql) if
s.strip()]
+ create_table_result = [self.__normalize(self.__mask_dynamic_values(s))
+ for s in raw_create_table_result]
+ location_source = raw_sql
+
+ if not test_case.existing_table:
+ # drop the table
+ self.__exec(test_case.drop_table_sql)
+
+ # Build expected statements list and compare per-statement
+ expected_sql = self.__replace_warehouse(self.__replace_uri(
+ test_case.expected_result,
+ self.__get_location_uri(location_source)
+ ))
+ expected_statements = [
+ self.__normalize(s)
+ for s in re.split(r";\s*", expected_sql.strip())
+ if s.strip()
+ ]
+
+ assert len(expected_statements) == len(create_table_result), \
+ ("Expected {} statements, got {}".format(
+ len(expected_statements), len(create_table_result)))
+ for exp_stmt, act_stmt in zip(expected_statements, create_table_result):
+ self.__compare_result(exp_stmt, act_stmt)
+
+ if test_case.existing_table:
+ continue
+
+ # Check for warnings in the normalized output.
+ # If warnings are present, the output is partial, and we must skip the
+ # reproducibility check as it's guaranteed to fail on the partial DDL.
+ has_warnings = any(s.strip().startswith('--') for s in
create_table_result)
+ if has_warnings:
+ continue
+
+ # recreate the table with the WITH STATS result from above (multiple
statements)
+ # Skip comment lines (warnings) when recreating
+ for stmt in raw_create_table_result:
+ if not stmt or stmt.strip().startswith('--'):
+ continue
+ self.__exec(stmt)
+ try:
+ # we should get the same WITH STATS result again
+ result = self.__exec(test_case.show_create_table_sql + " WITH STATS")
+ new_raw_rows = [row.strip() for row in result.data if row and
row.strip()]
+ new_raw_sql = new_raw_rows[0]
+ new_create_table_result = [
+ self.__normalize(self.__mask_dynamic_values(s))
+ for s in re.split(r";\s*", new_raw_sql)
+ if s.strip()
+ ]
+ assert create_table_result == new_create_table_result
+ finally:
+ # drop the table
+ self.__exec(test_case.drop_table_sql)
+
def __exec(self, sql_str):
return self.execute_query_expect_success(self.client, sql_str)
@@ -164,11 +259,29 @@ class TestShowCreateTable(ImpalaTestSuite):
if m is not None:
return m.group(1)
+ def __get_partition_properties(self, sql_str):
+ """ Extract properties from partition-level SET TBLPROPERTIES statements.
+ Handles statements like:
+ ALTER TABLE ... PARTITION (p=1) SET TBLPROPERTIES ('key'='value', ...)
+ """
+ return get_properties_map(sql_str, "SET TBLPROPERTIES",
self.FILTER_TBL_PROPERTIES)
+
def __compare_result(self, expected_sql, actual_sql):
""" Extract all properties """
- expected_tbl_props = self.__get_properties_map(expected_sql,
"TBLPROPERTIES")
- actual_tbl_props = self.__get_properties_map(actual_sql, "TBLPROPERTIES")
- assert expected_tbl_props == actual_tbl_props
+ # Partition-level properties use "SET TBLPROPERTIES" syntax,
+ # while table-level properties just use "TBLPROPERTIES"
+ if 'PARTITION' in expected_sql and 'SET TBLPROPERTIES' in expected_sql:
+ # For partition statements: ALTER TABLE ... PARTITION (...) SET
TBLPROPERTIES (...)
+ expected_tbl_props = self.__get_partition_properties(expected_sql)
+ actual_tbl_props = self.__get_partition_properties(actual_sql)
+ else:
+ # For regular table-level properties: CREATE TABLE ... TBLPROPERTIES
(...)
+ expected_tbl_props = self.__get_properties_map(expected_sql,
"TBLPROPERTIES")
+ actual_tbl_props = self.__get_properties_map(actual_sql, "TBLPROPERTIES")
+
+ assert expected_tbl_props == actual_tbl_props, \
+ ("TBLPROPERTIES mismatch:\nExpected: {} \nActual: {}".format(
+ expected_tbl_props, actual_tbl_props))
expected_serde_props = self.__get_properties_map(expected_sql,
"SERDEPROPERTIES")
actual_serde_props = self.__get_properties_map(actual_sql,
"SERDEPROPERTIES")
@@ -178,6 +291,29 @@ class TestShowCreateTable(ImpalaTestSuite):
actual_sql_filtered = self.__remove_properties_maps(actual_sql)
assert expected_sql_filtered == actual_sql_filtered
+ def __mask_dynamic_values(self, s):
+ """ Replace dynamic/volatile values with <NUM> placeholder for comparison.
+ This masks values that change between test runs or are system-generated:
+ - numFiles: file count (dynamic based on data ingestion)
+ - totalSize: total size in bytes (dynamic)
+ - impala.events.catalogVersion: catalog version number (increments)
+ - impala.events.catalogServiceId: UUID-like identifier (changes per
service)
+
+ Note: numRows is NOT masked because it's typically a known test value
+ """
+ # Mask dynamic file system properties
+ s = re.sub(r"('numFiles'\s*=\s*)'[0-9]+'", r"\1'<NUM>'", s)
+ s = re.sub(r"('totalSize'\s*=\s*)'[0-9]+'", r"\1'<NUM>'", s)
+ s = re.sub(r"('numRows'\s*=\s*)'[0-9]+'", r"\1'<NUM>'", s)
+
+ # Mask Impala event system properties (catalog version and service ID)
+ s = re.sub(r"('impala\.events\.catalogVersion'\s*=\s*)'[0-9]+'",
r"\1'<NUM>'", s)
+ s = re.sub(r"('impala\.events\.catalogServiceId'\s*=\s*)'[^']+'",
r"\1'<NUM>'", s)
+ s = re.sub(r"('impala\.lastComputeStatsTime'\s*=\s*)'[0-9]+'",
r"\1'<NUM>'", s)
+ s = re.sub(r"('impala\.computeStatsSnapshotIds'\s*=\s*)'[^']+'",
r"\1'<NUM>'", s)
+
+ return s
+
def __normalize(self, s):
""" Normalize the string to remove extra whitespaces and remove keys
from tblproperties and serdeproperties that we don't want
@@ -253,7 +389,13 @@ class ShowCreateTableTestCase(object):
assert name.find(".") == -1, 'Error in test file %s. Found unexpected %s '\
'name %s that is qualified with a database' % (table_type,
test_file_name, name)
self.table_name = test_db_name + '.' + name
- self.create_table_sql = self.create_table_sql.replace(name,
self.table_name, 1)
+ # Replace all occurrences of the unqualified table name with the qualified
name
+ # This is needed for test cases with multiple statements (e.g., CREATE +
ALTER)
+ self.create_table_sql = re.sub(
+ r'\b' + re.escape(name) + r'\b',
+ self.table_name,
+ self.create_table_sql
+ )
self.show_create_table_sql = 'show create %s %s' % (table_type,
self.table_name)
self.drop_table_sql = "drop %s %s" % (table_type, self.table_name)