[
https://issues.apache.org/jira/browse/SPARK-39378?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17547918#comment-17547918
]
melin commented on SPARK-39378:
-------------------------------
The following two errors:
{code:java}
String sql = "CREATE OR REPLACE TABLE huaixin_rp.bigdata.parquet_orders_rp
USING PARQUET " +
"PARTITIONED BY (ds, type) " +
" select * from huaixin_rp.bigdata.test_orders_rp";
spark.sql(sql);{code}
Exception in thread "main" org.apache.spark.sql.AnalysisException: Unable to
infer schema for Parquet. It must be specified manually.
at
org.apache.spark.sql.errors.QueryCompilationErrors$.dataSchemaNotSpecifiedError(QueryCompilationErrors.scala:974)
at
org.apache.spark.sql.execution.datasources.v2.FileTable.$anonfun$dataSchema$5(FileTable.scala:72)
at scala.Option.getOrElse(Option.scala:189)
at
org.apache.spark.sql.execution.datasources.v2.FileTable.dataSchema$lzycompute(FileTable.scala:72)
at
org.apache.spark.sql.execution.datasources.v2.FileTable.dataSchema(FileTable.scala:64)
at
org.apache.spark.sql.execution.datasources.v2.FileTable.schema$lzycompute(FileTable.scala:82)
at
org.apache.spark.sql.execution.datasources.v2.FileTable.schema(FileTable.scala:80)
at
com.aloudata.spark.catalog.hadoop.StagedParquetTable.schema(StagedParquetTable.scala:42)
at
org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation$.create(DataSourceV2Relation.scala:175)
at
org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation$.create(DataSourceV2Relation.scala:183)
at
org.apache.spark.sql.execution.datasources.v2.DataSourceV2Strategy.invalidateCache(DataSourceV2Strategy.scala:84)
at
org.apache.spark.sql.execution.datasources.v2.DataSourceV2Strategy.$anonfun$apply$6(DataSourceV2Strategy.scala:197)
at
org.apache.spark.sql.execution.datasources.v2.DataSourceV2Strategy.$anonfun$apply$6$adapted(DataSourceV2Strategy.scala:197)
at
org.apache.spark.sql.execution.datasources.v2.AtomicReplaceTableAsSelectExec.run(WriteToDataSourceV2Exec.scala:208)
{code:java}
spark.sql("INSERT OVERWRITE TABLE huaixin_rp.bigdata.parquet_orders_rp " +
"PARTITION(ds=20220602, type='zz') select 5 as no, 'zz' as name, 10.0 as
price");{code}
Exception in thread "main" org.apache.spark.sql.AnalysisException: Table
parquet
hdfs://newns:8020/user/aloudata/warehouse/huaixin_rp/bigdata/parquet_orders_rp
does not support dynamic overwrite in batch mode.;
OverwritePartitionsDynamic RelationV2[no#27, name#28, price#29, ds#30, type#31]
parquet
hdfs://newns:8020/user/aloudata/warehouse/huaixin_rp/bigdata/parquet_orders_rp,
false
+- Project [ansi_cast(no#24 as string) AS no#34, name#25, ansi_cast(price#26 as
string) AS price#35, ds#32, type#33]
+- Project [no#24, name#25, price#26, ansi_cast(20220602 as int) AS ds#32,
ansi_cast(zz as string) AS type#33]
+- Project [5 AS no#24, zz AS name#25, 10.0 AS price#26]
+- OneRowRelation
at
org.apache.spark.sql.errors.QueryCompilationErrors$.unsupportedTableOperationError(QueryCompilationErrors.scala:801)
at
org.apache.spark.sql.errors.QueryCompilationErrors$.unsupportedDynamicOverwriteInBatchModeError(QueryCompilationErrors.scala:817)
at
org.apache.spark.sql.execution.datasources.v2.TableCapabilityCheck$.$anonfun$apply$1(TableCapabilityCheck.scala:54)
at
org.apache.spark.sql.execution.datasources.v2.TableCapabilityCheck$.$anonfun$apply$1$adapted(TableCapabilityCheck.scala:40)
at org.apache.spark.sql.catalyst.trees.TreeNode.foreach(TreeNode.scala:253)
at
org.apache.spark.sql.execution.datasources.v2.TableCapabilityCheck$.apply(TableCapabilityCheck.scala:40)
at
org.apache.spark.sql.execution.datasources.v2.TableCapabilityCheck$.apply(TableCapabilityCheck.scala:32)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis$37(CheckAnalysis.scala:550)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis$37$adapted(CheckAnalysis.scala:550)
> Customize metastore and catalog, use ParqurtTable, Cannot directly use CTAS
> and Insert SQL
> -------------------------------------------------------------------------------------------
>
> Key: SPARK-39378
> URL: https://issues.apache.org/jira/browse/SPARK-39378
> Project: Spark
> Issue Type: Improvement
> Components: SQL
> Affects Versions: 3.4.0
> Reporter: melin
> Priority: Major
>
--
This message was sent by Atlassian Jira
(v8.20.7#820007)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]