Github user wangyum commented on a diff in the pull request:
https://github.com/apache/spark/pull/22124#discussion_r211447728
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
---
@@ -384,7 +384,12 @@ object RemoveRedundantAliases extends
Rule[LogicalPlan] {
}
}
- def apply(plan: LogicalPlan): LogicalPlan = removeRedundantAliases(plan,
AttributeSet.empty)
+ def apply(plan: LogicalPlan): LogicalPlan = {
+ plan match {
+ case c: Command => c
+ case _ => removeRedundantAliases(plan, AttributeSet.empty)
--- End diff --
For example:
```scala
val path = "/tmp/spark/parquet"
val cnt = 30
spark.range(cnt).selectExpr("id as
col1").write.mode("overwrite").parquet(path)
spark.sql(s"CREATE TABLE table1(col1 bigint) using parquet location
'$path'")
spark.sql("create view view1 as select col1 from table1 where col1 > -20")
// The column name of table2 is inconsistent with the column name of view1.
spark.sql("create table table2 (COL1 BIGINT) using parquet")
// When querying the view, ensure that the column name of the query matches
the column name of the target table.
spark.sql("insert overwrite table table2 select COL1 from view1")
```
The execution plan change track:
```scala
=== Applying Rule
org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences ===
!'Project ['id AS col1#2] Project [id#0L AS col1#2L]
+- Range (0, 30, step=1, splits=Some(1)) +- Range (0, 30, step=1,
splits=Some(1))
17:02:55.061 WARN
org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1:
=== Applying Rule org.apache.spark.sql.catalyst.analysis.CleanupAliases ===
Project [id#0L AS col1#2L] Project [id#0L AS col1#2L]
+- Range (0, 30, step=1, splits=Some(1)) +- Range (0, 30, step=1,
splits=Some(1))
17:02:59.174 WARN
org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1:
=== Applying Rule
org.apache.spark.sql.execution.datasources.DataSourceAnalysis ===
!'CreateTable `table1`, ErrorIfExists CreateDataSourceTableCommand
`table1`, false
17:02:59.909 WARN org.apache.hadoop.hive.metastore.ObjectStore: Failed to
get database global_temp, returning NoSuchObjectException
17:03:00.094 WARN
org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1:
=== Applying Rule
org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations ===
'Project ['col1] 'Project ['col1]
+- 'Filter ('col1 > -20) +- 'Filter ('col1 > -20)
! +- 'UnresolvedRelation `table1` +- 'SubqueryAlias
`default`.`table1`
! +- 'UnresolvedCatalogRelation
`default`.`table1`, org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
17:03:00.254 WARN
org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1:
=== Applying Rule
org.apache.spark.sql.execution.datasources.FindDataSourceTable ===
'Project ['col1]
'Project ['col1]
+- 'Filter ('col1 > -20)
+- 'Filter ('col1 > -20)
! +- 'SubqueryAlias `default`.`table1`
+- SubqueryAlias
`default`.`table1`
! +- 'UnresolvedCatalogRelation `default`.`table1`,
org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe +-
Relation[col1#5L] parquet
17:03:00.267 WARN
org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1:
=== Applying Rule
org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences ===
'Project ['col1] 'Project ['col1]
!+- 'Filter ('col1 > -20) +- 'Filter (col1#5L > -20)
+- SubqueryAlias `default`.`table1` +- SubqueryAlias
`default`.`table1`
+- Relation[col1#5L] parquet +- Relation[col1#5L] parquet
17:03:00.306 WARN
org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1:
=== Applying Rule
org.apache.spark.sql.catalyst.analysis.TypeCoercion$ImplicitTypeCasts ===
'Project ['col1] 'Project ['col1]
!+- 'Filter (col1#5L > -20) +- Filter (col1#5L > cast(-20 as
bigint))
+- SubqueryAlias `default`.`table1` +- SubqueryAlias
`default`.`table1`
+- Relation[col1#5L] parquet +- Relation[col1#5L] parquet
17:03:00.309 WARN
org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1:
=== Applying Rule
org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences ===
!'Project ['col1] Project [col1#5L]
+- Filter (col1#5L > cast(-20 as bigint)) +- Filter (col1#5L > cast(-20
as bigint))
+- SubqueryAlias `default`.`table1` +- SubqueryAlias
`default`.`table1`
+- Relation[col1#5L] parquet +- Relation[col1#5L]
parquet
17:03:00.314 WARN
org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1:
=== Applying Rule org.apache.spark.sql.catalyst.analysis.ResolveTimeZone ===
Project [col1#5L] Project [col1#5L]
+- Filter (col1#5L > cast(-20 as bigint)) +- Filter (col1#5L > cast(-20
as bigint))
+- SubqueryAlias `default`.`table1` +- SubqueryAlias
`default`.`table1`
+- Relation[col1#5L] parquet +- Relation[col1#5L]
parquet
17:03:00.383 WARN
org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1:
=== Applying Rule
org.apache.spark.sql.execution.datasources.DataSourceAnalysis ===
!'CreateTable `table2`, ErrorIfExists CreateDataSourceTableCommand
`table2`, false
17:03:00.729 WARN
org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1:
=== Applying Rule
org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations ===
'Project ['col1] 'Project ['col1]
+- 'Filter ('col1 > -20) +- 'Filter ('col1 > -20)
! +- 'UnresolvedRelation `table1` +- 'SubqueryAlias
`default`.`table1`
! +- 'UnresolvedCatalogRelation
`default`.`table1`, org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
17:03:00.730 WARN
org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1:
=== Applying Rule
org.apache.spark.sql.execution.datasources.FindDataSourceTable ===
'Project ['col1]
'Project ['col1]
+- 'Filter ('col1 > -20)
+- 'Filter ('col1 > -20)
! +- 'SubqueryAlias `default`.`table1`
+- SubqueryAlias
`default`.`table1`
! +- 'UnresolvedCatalogRelation `default`.`table1`,
org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe +-
Relation[col1#5L] parquet
17:03:00.731 WARN
org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1:
=== Applying Rule
org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences ===
'Project ['col1] 'Project ['col1]
!+- 'Filter ('col1 > -20) +- 'Filter (col1#5L > -20)
+- SubqueryAlias `default`.`table1` +- SubqueryAlias
`default`.`table1`
+- Relation[col1#5L] parquet +- Relation[col1#5L] parquet
17:03:00.734 WARN
org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1:
=== Applying Rule
org.apache.spark.sql.catalyst.analysis.TypeCoercion$ImplicitTypeCasts ===
'Project ['col1] 'Project ['col1]
!+- 'Filter (col1#5L > -20) +- Filter (col1#5L > cast(-20 as
bigint))
+- SubqueryAlias `default`.`table1` +- SubqueryAlias
`default`.`table1`
+- Relation[col1#5L] parquet +- Relation[col1#5L] parquet
17:03:00.735 WARN
org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1:
=== Applying Rule
org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences ===
!'Project ['col1] Project [col1#5L]
+- Filter (col1#5L > cast(-20 as bigint)) +- Filter (col1#5L > cast(-20
as bigint))
+- SubqueryAlias `default`.`table1` +- SubqueryAlias
`default`.`table1`
+- Relation[col1#5L] parquet +- Relation[col1#5L]
parquet
17:03:00.737 WARN
org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1:
=== Applying Rule org.apache.spark.sql.catalyst.analysis.ResolveTimeZone ===
Project [col1#5L] Project [col1#5L]
+- Filter (col1#5L > cast(-20 as bigint)) +- Filter (col1#5L > cast(-20
as bigint))
+- SubqueryAlias `default`.`table1` +- SubqueryAlias
`default`.`table1`
+- Relation[col1#5L] parquet +- Relation[col1#5L]
parquet
17:03:00.742 WARN
org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1:
=== Applying Rule
org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations ===
'InsertIntoTable 'UnresolvedRelation `table2`, true, false
'InsertIntoTable 'UnresolvedRelation `table2`, true, false
+- 'Project ['COL1] +- 'Project
['COL1]
! +- 'UnresolvedRelation `view1` +-
SubqueryAlias `default`.`view1`
! +- View
(`default`.`view1`, [col1#6L])
! +-
Project [col1#5L]
!
+- Filter (col1#5L > cast(-20 as bigint))
!
+- SubqueryAlias `default`.`table1`
!
+- Relation[col1#5L] parquet
17:03:00.744 WARN
org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1:
=== Applying Rule
org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences ===
'InsertIntoTable 'UnresolvedRelation `table2`, true, false
'InsertIntoTable 'UnresolvedRelation `table2`, true, false
!+- 'Project ['COL1] +- Project
[COL1#6L]
+- SubqueryAlias `default`.`view1` +-
SubqueryAlias `default`.`view1`
+- View (`default`.`view1`, [col1#6L]) +- View
(`default`.`view1`, [col1#6L])
+- Project [col1#5L] +-
Project [col1#5L]
+- Filter (col1#5L > cast(-20 as bigint))
+- Filter (col1#5L > cast(-20 as bigint))
+- SubqueryAlias `default`.`table1`
+- SubqueryAlias `default`.`table1`
+- Relation[col1#5L] parquet
+- Relation[col1#5L] parquet
17:03:00.768 WARN
org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1:
=== Applying Rule
org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations ===
!'InsertIntoTable 'UnresolvedRelation `table2`, true, false
'InsertIntoTable 'UnresolvedCatalogRelation `default`.`table2`,
org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe, true, false
+- Project [COL1#6L] +- Project
[COL1#6L]
+- SubqueryAlias `default`.`view1` +-
SubqueryAlias `default`.`view1`
+- View (`default`.`view1`, [col1#6L]) +- View
(`default`.`view1`, [col1#6L])
+- Project [col1#5L] +-
Project [col1#5L]
+- Filter (col1#5L > cast(-20 as bigint))
+- Filter (col1#5L > cast(-20 as bigint))
+- SubqueryAlias `default`.`table1`
+- SubqueryAlias `default`.`table1`
+- Relation[col1#5L] parquet
+- Relation[col1#5L] parquet
17:03:00.852 WARN
org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1:
=== Applying Rule
org.apache.spark.sql.execution.datasources.FindDataSourceTable ===
!'InsertIntoTable 'UnresolvedCatalogRelation `default`.`table2`,
org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe, true, false
'InsertIntoTable Relation[COL1#7L] parquet, true, false
+- Project [COL1#6L]
+- Project
[COL1#6L]
+- SubqueryAlias `default`.`view1`
+-
SubqueryAlias `default`.`view1`
+- View (`default`.`view1`, [col1#6L])
+- View
(`default`.`view1`, [col1#6L])
+- Project [col1#5L]
+-
Project [col1#5L]
+- Filter (col1#5L > cast(-20 as bigint))
+-
Filter (col1#5L > cast(-20 as bigint))
+- SubqueryAlias `default`.`table1`
+- SubqueryAlias `default`.`table1`
+- Relation[col1#5L] parquet
+- Relation[col1#5L] parquet
DataSourceStrategy 1:COL1#8L
DataSourceStrategy 2:COL1#6L
17:03:00.896 WARN
org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1:
=== Applying Rule
org.apache.spark.sql.execution.datasources.DataSourceAnalysis ===
!'InsertIntoTable Relation[COL1#7L] parquet, true, false
InsertIntoHadoopFsRelationCommand
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2,
false, Parquet, Map(serialization.format -> 1, path ->
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2),
Overwrite, CatalogTable(
!+- Project [COL1#6L] Database: default
! +- SubqueryAlias `default`.`view1` Table: table2
! +- View (`default`.`view1`, [col1#6L]) Owner: yumwang
! +- Project [col1#5L] Created Time:
Mon Aug 20 17:03:00 PDT 2018
! +- Filter (col1#5L > cast(-20 as bigint)) Last Access: Wed
Dec 31 16:00:00 PST 1969
! +- SubqueryAlias `default`.`table1` Created By:
Spark 2.4.0-SNAPSHOT
! +- Relation[col1#5L] parquet Type: MANAGED
! Provider: parquet
! Table
Properties: [transient_lastDdlTime=1534809780]
! Location:
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2
! Serde Library:
org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
! InputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
! OutputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
! Storage
Properties: [serialization.format=1]
! Schema: root
! |-- COL1: long
(nullable = true)
! ),
org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [COL1#6L]
! +- Project
[COL1#6L]
! +-
SubqueryAlias `default`.`view1`
! +- View
(`default`.`view1`, [col1#6L])
! +-
Project [col1#5L]
! +-
Filter (col1#5L > cast(-20 as bigint))
!
+- SubqueryAlias `default`.`table1`
!
+- Relation[col1#5L] parquet
17:03:00.916 WARN
org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1:
=== Applying Rule org.apache.spark.sql.catalyst.analysis.AliasViewChild ===
InsertIntoHadoopFsRelationCommand
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2,
false, Parquet, Map(serialization.format -> 1, path ->
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2),
Overwrite, CatalogTable( InsertIntoHadoopFsRelationCommand
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2,
false, Parquet, Map(serialization.format -> 1, path ->
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2),
Overwrite, CatalogTable(
Database: default
Database: default
Table: table2
Table: table2
Owner: yumwang
Owner: yumwang
Created Time: Mon Aug 20 17:03:00 PDT 2018
Created Time: Mon Aug 20 17:03:00 PDT 2018
Last Access: Wed Dec 31 16:00:00 PST 1969
Last Access: Wed Dec 31 16:00:00 PST 1969
Created By: Spark 2.4.0-SNAPSHOT
Created By: Spark 2.4.0-SNAPSHOT
Type: MANAGED
Type: MANAGED
Provider: parquet
Provider: parquet
Table Properties: [transient_lastDdlTime=1534809780]
Table Properties:
[transient_lastDdlTime=1534809780]
Location:
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2
Location:
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2
Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
Serde Library:
org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
InputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
OutputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
OutputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
Storage Properties: [serialization.format=1]
Storage Properties: [serialization.format=1]
Schema: root
Schema: root
-- COL1: long (nullable = true)
|-- COL1: long (nullable = true)
), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e,
[COL1#6L]
),
org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [COL1#6L]
+- Project [COL1#6L]
+- Project [COL1#6L]
+- SubqueryAlias `default`.`view1`
+- SubqueryAlias `default`.`view1`
+- View (`default`.`view1`, [col1#6L])
+- View (`default`.`view1`, [col1#6L])
! +- Project [col1#5L]
+- Project [cast(col1#5L as
bigint) AS col1#6L]
! +- Filter (col1#5L > cast(-20 as bigint))
+- Project [col1#5L]
! +- SubqueryAlias `default`.`table1`
+- Filter (col1#5L >
cast(-20 as bigint))
! +- Relation[col1#5L] parquet
+- SubqueryAlias
`default`.`table1`
!
+- Relation[col1#5L]
parquet
yumwang123:COL1#6L
17:03:00.949 WARN
org.apache.spark.sql.internal.BaseSessionStateBuilder$$anon$2:
=== Applying Rule
org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases ===
InsertIntoHadoopFsRelationCommand
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2,
false, Parquet, Map(serialization.format -> 1, path ->
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2),
Overwrite, CatalogTable( InsertIntoHadoopFsRelationCommand
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2,
false, Parquet, Map(serialization.format -> 1, path ->
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2),
Overwrite, CatalogTable(
Database: default
Database: default
Table: table2
Table: table2
Owner: yumwang
Owner: yumwang
Created Time: Mon Aug 20 17:03:00 PDT 2018
Created Time: Mon Aug 20 17:03:00 PDT 2018
Last Access: Wed Dec 31 16:00:00 PST 1969
Last Access: Wed Dec 31 16:00:00 PST 1969
Created By: Spark 2.4.0-SNAPSHOT
Created By: Spark 2.4.0-SNAPSHOT
Type: MANAGED
Type: MANAGED
Provider: parquet
Provider: parquet
Table Properties: [transient_lastDdlTime=1534809780]
Table Properties:
[transient_lastDdlTime=1534809780]
Location:
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2
Location:
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2
Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
Serde Library:
org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
InputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
OutputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
OutputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
Storage Properties: [serialization.format=1]
Storage Properties: [serialization.format=1]
Schema: root
Schema: root
-- COL1: long (nullable = true)
|-- COL1: long (nullable = true)
), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e,
[COL1#6L]
),
org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [COL1#6L]
+- Project [COL1#6L]
+- Project [COL1#6L]
! +- SubqueryAlias `default`.`view1`
+- View (`default`.`view1`, [col1#6L])
! +- View (`default`.`view1`, [col1#6L])
+- Project [cast(col1#5L as bigint)
AS col1#6L]
! +- Project [cast(col1#5L as bigint) AS col1#6L]
+- Project [col1#5L]
! +- Project [col1#5L]
+- Filter (col1#5L > cast(-20
as bigint))
! +- Filter (col1#5L > cast(-20 as bigint))
+- Relation[col1#5L] parquet
! +- SubqueryAlias `default`.`table1`
! +- Relation[col1#5L] parquet
17:03:00.959 WARN
org.apache.spark.sql.internal.BaseSessionStateBuilder$$anon$2:
=== Applying Rule org.apache.spark.sql.catalyst.analysis.EliminateView ===
InsertIntoHadoopFsRelationCommand
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2,
false, Parquet, Map(serialization.format -> 1, path ->
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2),
Overwrite, CatalogTable( InsertIntoHadoopFsRelationCommand
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2,
false, Parquet, Map(serialization.format -> 1, path ->
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2),
Overwrite, CatalogTable(
Database: default
Database: default
Table: table2
Table: table2
Owner: yumwang
Owner: yumwang
Created Time: Mon Aug 20 17:03:00 PDT 2018
Created Time: Mon Aug 20 17:03:00 PDT 2018
Last Access: Wed Dec 31 16:00:00 PST 1969
Last Access: Wed Dec 31 16:00:00 PST 1969
Created By: Spark 2.4.0-SNAPSHOT
Created By: Spark 2.4.0-SNAPSHOT
Type: MANAGED
Type: MANAGED
Provider: parquet
Provider: parquet
Table Properties: [transient_lastDdlTime=1534809780]
Table Properties:
[transient_lastDdlTime=1534809780]
Location:
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2
Location:
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2
Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
Serde Library:
org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
InputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
OutputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
OutputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
Storage Properties: [serialization.format=1]
Storage Properties: [serialization.format=1]
Schema: root
Schema: root
-- COL1: long (nullable = true)
|-- COL1: long (nullable = true)
), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e,
[COL1#6L]
),
org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [COL1#6L]
+- Project [COL1#6L]
+- Project [COL1#6L]
! +- View (`default`.`view1`, [col1#6L])
+- Project [cast(col1#5L as bigint) AS
col1#6L]
! +- Project [cast(col1#5L as bigint) AS col1#6L]
+- Project [col1#5L]
! +- Project [col1#5L]
+- Filter (col1#5L > cast(-20 as
bigint))
! +- Filter (col1#5L > cast(-20 as bigint))
+- Relation[col1#5L] parquet
! +- Relation[col1#5L] parquet
17:03:00.975 WARN
org.apache.spark.sql.internal.BaseSessionStateBuilder$$anon$2:
=== Applying Rule org.apache.spark.sql.catalyst.optimizer.ColumnPruning ===
InsertIntoHadoopFsRelationCommand
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2,
false, Parquet, Map(serialization.format -> 1, path ->
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2),
Overwrite, CatalogTable( InsertIntoHadoopFsRelationCommand
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2,
false, Parquet, Map(serialization.format -> 1, path ->
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2),
Overwrite, CatalogTable(
Database: default
Database: default
Table: table2
Table: table2
Owner: yumwang
Owner: yumwang
Created Time: Mon Aug 20 17:03:00 PDT 2018
Created Time: Mon Aug 20 17:03:00 PDT 2018
Last Access: Wed Dec 31 16:00:00 PST 1969
Last Access: Wed Dec 31 16:00:00 PST 1969
Created By: Spark 2.4.0-SNAPSHOT
Created By: Spark 2.4.0-SNAPSHOT
Type: MANAGED
Type: MANAGED
Provider: parquet
Provider: parquet
Table Properties: [transient_lastDdlTime=1534809780]
Table Properties:
[transient_lastDdlTime=1534809780]
Location:
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2
Location:
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2
Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
Serde Library:
org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
InputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
OutputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
OutputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
Storage Properties: [serialization.format=1]
Storage Properties: [serialization.format=1]
Schema: root
Schema: root
-- COL1: long (nullable = true)
|-- COL1: long (nullable = true)
), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e,
[COL1#6L]
),
org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [COL1#6L]
!+- Project [COL1#6L]
+- Project [cast(col1#5L as bigint) AS
col1#6L]
! +- Project [cast(col1#5L as bigint) AS col1#6L]
+- Filter (col1#5L > cast(-20 as bigint))
! +- Project [col1#5L]
+- Relation[col1#5L] parquet
! +- Filter (col1#5L > cast(-20 as bigint))
! +- Relation[col1#5L] parquet
17:03:00.980 WARN
org.apache.spark.sql.internal.BaseSessionStateBuilder$$anon$2:
=== Applying Rule org.apache.spark.sql.catalyst.optimizer.ConstantFolding
===
InsertIntoHadoopFsRelationCommand
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2,
false, Parquet, Map(serialization.format -> 1, path ->
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2),
Overwrite, CatalogTable( InsertIntoHadoopFsRelationCommand
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2,
false, Parquet, Map(serialization.format -> 1, path ->
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2),
Overwrite, CatalogTable(
Database: default
Database: default
Table: table2
Table: table2
Owner: yumwang
Owner: yumwang
Created Time: Mon Aug 20 17:03:00 PDT 2018
Created Time: Mon Aug 20 17:03:00 PDT 2018
Last Access: Wed Dec 31 16:00:00 PST 1969
Last Access: Wed Dec 31 16:00:00 PST 1969
Created By: Spark 2.4.0-SNAPSHOT
Created By: Spark 2.4.0-SNAPSHOT
Type: MANAGED
Type: MANAGED
Provider: parquet
Provider: parquet
Table Properties: [transient_lastDdlTime=1534809780]
Table Properties:
[transient_lastDdlTime=1534809780]
Location:
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2
Location:
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2
Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
Serde Library:
org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
InputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
OutputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
OutputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
Storage Properties: [serialization.format=1]
Storage Properties: [serialization.format=1]
Schema: root
Schema: root
-- COL1: long (nullable = true)
|-- COL1: long (nullable = true)
), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e,
[COL1#6L]
),
org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [COL1#6L]
+- Project [cast(col1#5L as bigint) AS col1#6L]
+- Project [cast(col1#5L as bigint) AS
col1#6L]
! +- Filter (col1#5L > cast(-20 as bigint))
+- Filter (col1#5L > -20)
+- Relation[col1#5L] parquet
+- Relation[col1#5L] parquet
17:03:01.047 WARN
org.apache.spark.sql.internal.BaseSessionStateBuilder$$anon$2:
=== Applying Rule org.apache.spark.sql.catalyst.optimizer.SimplifyCasts ===
InsertIntoHadoopFsRelationCommand
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2,
false, Parquet, Map(serialization.format -> 1, path ->
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2),
Overwrite, CatalogTable( InsertIntoHadoopFsRelationCommand
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2,
false, Parquet, Map(serialization.format -> 1, path ->
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2),
Overwrite, CatalogTable(
Database: default
Database: default
Table: table2
Table: table2
Owner: yumwang
Owner: yumwang
Created Time: Mon Aug 20 17:03:00 PDT 2018
Created Time: Mon Aug 20 17:03:00 PDT 2018
Last Access: Wed Dec 31 16:00:00 PST 1969
Last Access: Wed Dec 31 16:00:00 PST 1969
Created By: Spark 2.4.0-SNAPSHOT
Created By: Spark 2.4.0-SNAPSHOT
Type: MANAGED
Type: MANAGED
Provider: parquet
Provider: parquet
Table Properties: [transient_lastDdlTime=1534809780]
Table Properties:
[transient_lastDdlTime=1534809780]
Location:
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2
Location:
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2
Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
Serde Library:
org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
InputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
OutputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
OutputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
Storage Properties: [serialization.format=1]
Storage Properties: [serialization.format=1]
Schema: root
Schema: root
-- COL1: long (nullable = true)
|-- COL1: long (nullable = true)
), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e,
[COL1#6L]
),
org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [COL1#6L]
!+- Project [cast(col1#5L as bigint) AS col1#6L]
+- Project [col1#5L AS col1#6L]
+- Filter (col1#5L > -20)
+- Filter (col1#5L > -20)
+- Relation[col1#5L] parquet
+- Relation[col1#5L] parquet
17:03:01.058 WARN
org.apache.spark.sql.internal.BaseSessionStateBuilder$$anon$2:
=== Applying Rule
org.apache.spark.sql.catalyst.optimizer.RemoveRedundantAliases ===
InsertIntoHadoopFsRelationCommand
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2,
false, Parquet, Map(serialization.format -> 1, path ->
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2),
Overwrite, CatalogTable( InsertIntoHadoopFsRelationCommand
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2,
false, Parquet, Map(serialization.format -> 1, path ->
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2),
Overwrite, CatalogTable(
Database: default
Database: default
Table: table2
Table: table2
Owner: yumwang
Owner: yumwang
Created Time: Mon Aug 20 17:03:00 PDT 2018
Created Time: Mon Aug 20 17:03:00 PDT 2018
Last Access: Wed Dec 31 16:00:00 PST 1969
Last Access: Wed Dec 31 16:00:00 PST 1969
Created By: Spark 2.4.0-SNAPSHOT
Created By: Spark 2.4.0-SNAPSHOT
Type: MANAGED
Type: MANAGED
Provider: parquet
Provider: parquet
Table Properties: [transient_lastDdlTime=1534809780]
Table Properties:
[transient_lastDdlTime=1534809780]
Location:
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2
Location:
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2
Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
Serde Library:
org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
InputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
OutputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
OutputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
Storage Properties: [serialization.format=1]
Storage Properties: [serialization.format=1]
Schema: root
Schema: root
-- COL1: long (nullable = true)
|-- COL1: long (nullable = true)
!), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e,
[COL1#6L]
),
org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [col1#5L]
!+- Project [col1#5L AS col1#6L]
+- Project [col1#5L]
+- Filter (col1#5L > -20)
+- Filter (col1#5L > -20)
+- Relation[col1#5L] parquet
+- Relation[col1#5L] parquet
17:03:01.061 WARN
org.apache.spark.sql.internal.BaseSessionStateBuilder$$anon$2:
=== Applying Rule org.apache.spark.sql.catalyst.optimizer.ColumnPruning ===
InsertIntoHadoopFsRelationCommand
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2,
false, Parquet, Map(serialization.format -> 1, path ->
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2),
Overwrite, CatalogTable( InsertIntoHadoopFsRelationCommand
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2,
false, Parquet, Map(serialization.format -> 1, path ->
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2),
Overwrite, CatalogTable(
Database: default
Database: default
Table: table2
Table: table2
Owner: yumwang
Owner: yumwang
Created Time: Mon Aug 20 17:03:00 PDT 2018
Created Time: Mon Aug 20 17:03:00 PDT 2018
Last Access: Wed Dec 31 16:00:00 PST 1969
Last Access: Wed Dec 31 16:00:00 PST 1969
Created By: Spark 2.4.0-SNAPSHOT
Created By: Spark 2.4.0-SNAPSHOT
Type: MANAGED
Type: MANAGED
Provider: parquet
Provider: parquet
Table Properties: [transient_lastDdlTime=1534809780]
Table Properties:
[transient_lastDdlTime=1534809780]
Location:
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2
Location:
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2
Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
Serde Library:
org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
InputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
OutputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
OutputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
Storage Properties: [serialization.format=1]
Storage Properties: [serialization.format=1]
Schema: root
Schema: root
-- COL1: long (nullable = true)
|-- COL1: long (nullable = true)
), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e,
[col1#5L]
),
org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [col1#5L]
!+- Project [col1#5L]
+- Filter (col1#5L > -20)
! +- Filter (col1#5L > -20)
+- Relation[col1#5L] parquet
! +- Relation[col1#5L] parquet
17:03:01.116 WARN
org.apache.spark.sql.internal.BaseSessionStateBuilder$$anon$2:
=== Applying Rule
org.apache.spark.sql.catalyst.optimizer.InferFiltersFromConstraints ===
InsertIntoHadoopFsRelationCommand
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2,
false, Parquet, Map(serialization.format -> 1, path ->
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2),
Overwrite, CatalogTable( InsertIntoHadoopFsRelationCommand
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2,
false, Parquet, Map(serialization.format -> 1, path ->
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2),
Overwrite, CatalogTable(
Database: default
Database: default
Table: table2
Table: table2
Owner: yumwang
Owner: yumwang
Created Time: Mon Aug 20 17:03:00 PDT 2018
Created Time: Mon Aug 20 17:03:00 PDT 2018
Last Access: Wed Dec 31 16:00:00 PST 1969
Last Access: Wed Dec 31 16:00:00 PST 1969
Created By: Spark 2.4.0-SNAPSHOT
Created By: Spark 2.4.0-SNAPSHOT
Type: MANAGED
Type: MANAGED
Provider: parquet
Provider: parquet
Table Properties: [transient_lastDdlTime=1534809780]
Table Properties:
[transient_lastDdlTime=1534809780]
Location:
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2
Location:
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2
Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
Serde Library:
org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
InputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
OutputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
OutputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
Storage Properties: [serialization.format=1]
Storage Properties: [serialization.format=1]
Schema: root
Schema: root
-- COL1: long (nullable = true)
|-- COL1: long (nullable = true)
), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e,
[col1#5L]
),
org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [col1#5L]
!+- Filter (col1#5L > -20)
+- Filter (isnotnull(col1#5L) && (col1#5L >
-20))
+- Relation[col1#5L] parquet
+- Relation[col1#5L] parquet
queryExecution:== Parsed Logical Plan ==
'InsertIntoTable 'UnresolvedRelation `table2`, true, false
+- 'Project ['COL1]
+- 'UnresolvedRelation `view1`
== Analyzed Logical Plan ==
InsertIntoHadoopFsRelationCommand
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2,
false, Parquet, Map(serialization.format -> 1, path ->
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2),
Overwrite, CatalogTable(
Database: default
Table: table2
Owner: yumwang
Created Time: Mon Aug 20 17:03:00 PDT 2018
Last Access: Wed Dec 31 16:00:00 PST 1969
Created By: Spark 2.4.0-SNAPSHOT
Type: MANAGED
Provider: parquet
Table Properties: [transient_lastDdlTime=1534809780]
Location:
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2
Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
Storage Properties: [serialization.format=1]
Schema: root
-- COL1: long (nullable = true)
), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e,
[COL1#6L]
+- Project [COL1#6L]
+- SubqueryAlias `default`.`view1`
+- View (`default`.`view1`, [col1#6L])
+- Project [cast(col1#5L as bigint) AS col1#6L]
+- Project [col1#5L]
+- Filter (col1#5L > cast(-20 as bigint))
+- SubqueryAlias `default`.`table1`
+- Relation[col1#5L] parquet
== Optimized Logical Plan ==
InsertIntoHadoopFsRelationCommand
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2,
false, Parquet, Map(serialization.format -> 1, path ->
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2),
Overwrite, CatalogTable(
Database: default
Table: table2
Owner: yumwang
Created Time: Mon Aug 20 17:03:00 PDT 2018
Last Access: Wed Dec 31 16:00:00 PST 1969
Created By: Spark 2.4.0-SNAPSHOT
Type: MANAGED
Provider: parquet
Table Properties: [transient_lastDdlTime=1534809780]
Location:
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2
Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
Storage Properties: [serialization.format=1]
Schema: root
-- COL1: long (nullable = true)
), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e,
[col1#5L]
+- Filter (isnotnull(col1#5L) && (col1#5L > -20))
+- Relation[col1#5L] parquet
== Physical Plan ==
Execute InsertIntoHadoopFsRelationCommand InsertIntoHadoopFsRelationCommand
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2,
false, Parquet, Map(serialization.format -> 1, path ->
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2),
Overwrite, CatalogTable(
Database: default
Table: table2
Owner: yumwang
Created Time: Mon Aug 20 17:03:00 PDT 2018
Last Access: Wed Dec 31 16:00:00 PST 1969
Created By: Spark 2.4.0-SNAPSHOT
Type: MANAGED
Provider: parquet
Table Properties: [transient_lastDdlTime=1534809780]
Location:
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2
Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
Storage Properties: [serialization.format=1]
Schema: root
-- COL1: long (nullable = true)
), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e,
[col1#5L]
+- *(1) Project [col1#5L]
+- *(1) Filter (isnotnull(col1#5L) && (col1#5L > -20))
+- *(1) FileScan parquet default.table1[col1#5L] Batched: true,
Format: Parquet, Location: InMemoryFileIndex[file:/tmp/spark/parquet],
PartitionFilters: [], PushedFilters: [IsNotNull(col1), GreaterThan(col1,-20)],
ReadSchema: struct<col1:bigint>
```
The main 3 changes are:
```scala
=== Applying Rule
org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences ===
'InsertIntoTable 'UnresolvedRelation `table2`, true, false
'InsertIntoTable 'UnresolvedRelation `table2`, true, false
!+- 'Project ['COL1] +- Project
[COL1#6L]
+- SubqueryAlias `default`.`view1` +-
SubqueryAlias `default`.`view1`
+- View (`default`.`view1`, [col1#6L]) +- View
(`default`.`view1`, [col1#6L])
+- Project [col1#5L] +-
Project [col1#5L]
+- Filter (col1#5L > cast(-20 as bigint))
+- Filter (col1#5L > cast(-20 as bigint))
+- SubqueryAlias `default`.`table1`
+- SubqueryAlias `default`.`table1`
+- Relation[col1#5L] parquet
+- Relation[col1#5L] parquet
```
```scala
=== Applying Rule org.apache.spark.sql.catalyst.analysis.AliasViewChild ===
InsertIntoHadoopFsRelationCommand
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2,
false, Parquet, Map(serialization.format -> 1, path ->
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2),
Overwrite, CatalogTable( InsertIntoHadoopFsRelationCommand
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2,
false, Parquet, Map(serialization.format -> 1, path ->
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2),
Overwrite, CatalogTable(
Database: default
Database: default
Table: table2
Table: table2
Owner: yumwang
Owner: yumwang
Created Time: Mon Aug 20 17:03:00 PDT 2018
Created Time: Mon Aug 20 17:03:00 PDT 2018
Last Access: Wed Dec 31 16:00:00 PST 1969
Last Access: Wed Dec 31 16:00:00 PST 1969
Created By: Spark 2.4.0-SNAPSHOT
Created By: Spark 2.4.0-SNAPSHOT
Type: MANAGED
Type: MANAGED
Provider: parquet
Provider: parquet
Table Properties: [transient_lastDdlTime=1534809780]
Table Properties:
[transient_lastDdlTime=1534809780]
Location:
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2
Location:
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2
Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
Serde Library:
org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
InputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
OutputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
OutputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
Storage Properties: [serialization.format=1]
Storage Properties: [serialization.format=1]
Schema: root
Schema: root
-- COL1: long (nullable = true)
|-- COL1: long (nullable = true)
), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e,
[COL1#6L]
),
org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [COL1#6L]
+- Project [COL1#6L]
+- Project [COL1#6L]
+- SubqueryAlias `default`.`view1`
+- SubqueryAlias `default`.`view1`
+- View (`default`.`view1`, [col1#6L])
+- View (`default`.`view1`, [col1#6L])
! +- Project [col1#5L]
+- Project [cast(col1#5L as
bigint) AS col1#6L]
! +- Filter (col1#5L > cast(-20 as bigint))
+- Project [col1#5L]
! +- SubqueryAlias `default`.`table1`
+- Filter (col1#5L >
cast(-20 as bigint))
! +- Relation[col1#5L] parquet
+- SubqueryAlias
`default`.`table1`
!
+- Relation[col1#5L]
parquet
```
```scala
=== Applying Rule
org.apache.spark.sql.catalyst.optimizer.RemoveRedundantAliases ===
InsertIntoHadoopFsRelationCommand
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2,
false, Parquet, Map(serialization.format -> 1, path ->
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2),
Overwrite, CatalogTable( InsertIntoHadoopFsRelationCommand
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2,
false, Parquet, Map(serialization.format -> 1, path ->
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2),
Overwrite, CatalogTable(
Database: default
Database: default
Table: table2
Table: table2
Owner: yumwang
Owner: yumwang
Created Time: Mon Aug 20 17:03:00 PDT 2018
Created Time: Mon Aug 20 17:03:00 PDT 2018
Last Access: Wed Dec 31 16:00:00 PST 1969
Last Access: Wed Dec 31 16:00:00 PST 1969
Created By: Spark 2.4.0-SNAPSHOT
Created By: Spark 2.4.0-SNAPSHOT
Type: MANAGED
Type: MANAGED
Provider: parquet
Provider: parquet
Table Properties: [transient_lastDdlTime=1534809780]
Table Properties:
[transient_lastDdlTime=1534809780]
Location:
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2
Location:
file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2
Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
Serde Library:
org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
InputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
OutputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
OutputFormat:
org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
Storage Properties: [serialization.format=1]
Storage Properties: [serialization.format=1]
Schema: root
Schema: root
-- COL1: long (nullable = true)
|-- COL1: long (nullable = true)
!), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e,
[COL1#6L]
),
org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [col1#5L]
!+- Project [col1#5L AS col1#6L]
+- Project [col1#5L]
+- Filter (col1#5L > -20)
+- Filter (col1#5L > -20)
+- Relation[col1#5L] parquet
```
We need `COL1#6L`, but after some optimization, the
[`outputColumns`](https://github.com/apache/spark/blob/9f558601e822b7596e4bcc141d5c91a5a8859628/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala#L59)
changed to `col1#5L`.--- --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
