okumin commented on code in PR #5106:
URL: https://github.com/apache/hive/pull/5106#discussion_r1510501245


##########
ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java:
##########
@@ -1600,26 +1600,35 @@ Table materializeCTE(String cteName, CTEClause cte) 
throws HiveException {
     LOG.info("{} will be materialized into {}", cteName, location);
     cte.source = analyzer;
 
-    ctx.addMaterializedTable(cteName, table, 
getMaterializedTableStats(analyzer.getSinkOp(), table));
+    ctx.addMaterializedTable(cteName, table, 
getMaterializedTableStats(analyzer.getSinkOp()));
 
     return table;
   }
 
-  static Statistics getMaterializedTableStats(Operator<?> sinkOp, Table table) 
{
+  protected Statistics getMaterializedTableStats(Operator<?> sinkOp) {
     final Statistics tableStats = sinkOp.getStatistics().clone();
-    final List<ColStatistics> sourceColStatsList = tableStats.getColumnStats();
-    final List<String> colNames = 
table.getCols().stream().map(FieldSchema::getName).collect(Collectors.toList());
-    if (sourceColStatsList.size() != colNames.size()) {
-      throw new IllegalStateException(String.format(
-          "The size of col stats must be equal to that of schema. Stats = %s, 
Schema = %s",
-          sourceColStatsList, colNames));
-    }
-    final List<ColStatistics> colStatsList = new 
ArrayList<>(sourceColStatsList.size());
-    for (int i = 0; i < sourceColStatsList.size(); i++) {
-      final ColStatistics colStats = sourceColStatsList.get(i);
-      // FileSinkOperator stores column stats with internal names such as 
"_col1"
-      colStats.setColumnName(colNames.get(i));
-      colStatsList.add(colStats);
+    if (tableStats.getColumnStatsState() == Statistics.State.NONE || 
sinkOp.getNumParent() == 0) {
+      return tableStats;
+    }
+
+    final List<String> parentColumnNames = 
sinkOp.getParentOperators().get(0).getSchema().getColumnNames();

Review Comment:
   While I was debugging with a remote debugger, I found the order of 
`Statistics#getColumnStats` is non-deterministic as the underlying data 
structure is HashMap. That's why I modified how to map column names.
   - 
https://github.com/apache/hive/blob/rel/release-4.0.0-beta-1/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java#L327C33-L327C44
   - 
https://github.com/apache/hive/blob/rel/release-4.0.0-beta-1/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java#L211



##########
ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java:
##########
@@ -1600,26 +1600,35 @@ Table materializeCTE(String cteName, CTEClause cte) 
throws HiveException {
     LOG.info("{} will be materialized into {}", cteName, location);
     cte.source = analyzer;
 
-    ctx.addMaterializedTable(cteName, table, 
getMaterializedTableStats(analyzer.getSinkOp(), table));
+    ctx.addMaterializedTable(cteName, table, 
getMaterializedTableStats(analyzer.getSinkOp()));
 
     return table;
   }
 
-  static Statistics getMaterializedTableStats(Operator<?> sinkOp, Table table) 
{
+  protected Statistics getMaterializedTableStats(Operator<?> sinkOp) {
     final Statistics tableStats = sinkOp.getStatistics().clone();
-    final List<ColStatistics> sourceColStatsList = tableStats.getColumnStats();
-    final List<String> colNames = 
table.getCols().stream().map(FieldSchema::getName).collect(Collectors.toList());
-    if (sourceColStatsList.size() != colNames.size()) {
-      throw new IllegalStateException(String.format(
-          "The size of col stats must be equal to that of schema. Stats = %s, 
Schema = %s",
-          sourceColStatsList, colNames));
-    }
-    final List<ColStatistics> colStatsList = new 
ArrayList<>(sourceColStatsList.size());
-    for (int i = 0; i < sourceColStatsList.size(); i++) {
-      final ColStatistics colStats = sourceColStatsList.get(i);
-      // FileSinkOperator stores column stats with internal names such as 
"_col1"
-      colStats.setColumnName(colNames.get(i));
-      colStatsList.add(colStats);
+    if (tableStats.getColumnStatsState() == Statistics.State.NONE || 
sinkOp.getNumParent() == 0) {
+      return tableStats;

Review Comment:
   This is the main change.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org
For additional commands, e-mail: gitbox-h...@hive.apache.org

Reply via email to