[
https://issues.apache.org/jira/browse/HUDI-8853?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17911694#comment-17911694
]
Mansi Patel edited comment on HUDI-8853 at 1/9/25 11:02 PM:
------------------------------------------------------------
Reproduction steps:
{code:java}
spark-shell --jars /usr/lib/hudi/hudi-spark-bundle.jar \
--conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \
--conf
"spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog"
\
--conf
"spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension"
{code}
{code:java}
import org.apache.spark.sql.SaveMode
import org.apache.spark.sql.functions._
import org.apache.hudi.DataSourceWriteOptions
import org.apache.hudi.DataSourceReadOptions
import org.apache.hudi.config.HoodieWriteConfig
import org.apache.hudi.hive.MultiPartKeysValueExtractor
import org.apache.hudi.hive.HiveSyncConfig
import org.apache.hudi.sync.common.HoodieSyncConfig
// Create a DataFrame
val inputDF = Seq(
("100", "2015-01-01", "2015-01-01T13:51:39.340396Z"),
("101", "2015-01-01", "2015-01-01T12:14:58.597216Z"),
("102", "2015-01-01", "2015-01-01T13:51:40.417052Z"),
("103", "2015-01-01", "2015-01-01T13:51:40.519832Z"),
("104", "2015-01-02", "2015-01-01T12:15:00.512679Z"),
("105", "2015-01-02", "2015-01-01T13:51:42.248818Z")
).toDF("id", "creation_date", "last_update_time")
//Specify common DataSourceWriteOptions in the single hudiOptions variable
val hudiOptions = Map[String,String](
HoodieWriteConfig.TBL_NAME.key -> "mansipp_hudi_fgac_table2",
DataSourceWriteOptions.TABLE_TYPE.key -> "COPY_ON_WRITE",
DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY -> "id",
DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY -> "creation_date",
DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY -> "last_update_time",
DataSourceWriteOptions.HIVE_SYNC_ENABLED_OPT_KEY -> "true",
DataSourceWriteOptions.HIVE_TABLE_OPT_KEY -> "mansipp_hudi_fgac_table2",
DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY -> "creation_date",
HoodieSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS.key ->
"org.apache.hudi.hive.MultiPartKeysValueExtractor",
HoodieSyncConfig.META_SYNC_ENABLED.key -> "true",
HiveSyncConfig.HIVE_SYNC_MODE.key -> "hms",
HoodieSyncConfig.META_SYNC_TABLE_NAME.key -> "mansipp_hudi_fgac_table2",
HoodieSyncConfig.META_SYNC_PARTITION_FIELDS.key -> "creation_date"
)
// Write the DataFrame as a Hudi dataset
(inputDF.write
.format("hudi")
.options(hudiOptions)
.option(DataSourceWriteOptions.OPERATION_OPT_KEY,"insert")
.option("hoodie.schema.on.read.enable","true")
.mode(SaveMode.Overwrite)
.save("s3://mansipp-emr-dev/hudi/mansipp_hudi_fgac_table2/"))
{code}
{code:java}
spark.sql("ALTER TABLE mansipp_hudi_fgac_table2 DROP COLUMN creation_date");
spark.sql("ALTER TABLE mansipp_hudi_fgac_table2 REPLACE COLUMNS (id int, name
varchar(10), city string)");
spark.sql("ALTER TABLE mansipp_hudi_fgac_table2 RENAME COLUMN creation_date TO
creation_date_renamed"); {code}
was (Author: JIRAUSER300888):
Reproduction steps:
{code:java}
spark-shell --jars /usr/lib/hudi/hudi-spark-bundle.jar \
--conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \
--conf
"spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog"
\
--conf
"spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension"
{code}
{code:java}
import org.apache.spark.sql.SaveMode
import org.apache.spark.sql.functions._
import org.apache.hudi.DataSourceWriteOptions
import org.apache.hudi.DataSourceReadOptions
import org.apache.hudi.config.HoodieWriteConfig
import org.apache.hudi.hive.MultiPartKeysValueExtractor
import org.apache.hudi.hive.HiveSyncConfig
import org.apache.hudi.sync.common.HoodieSyncConfig
// Create a DataFrame
val inputDF = Seq(
("100", "2015-01-01", "2015-01-01T13:51:39.340396Z"),
("101", "2015-01-01", "2015-01-01T12:14:58.597216Z"),
("102", "2015-01-01", "2015-01-01T13:51:40.417052Z"),
("103", "2015-01-01", "2015-01-01T13:51:40.519832Z"),
("104", "2015-01-02", "2015-01-01T12:15:00.512679Z"),
("105", "2015-01-02", "2015-01-01T13:51:42.248818Z")
).toDF("id", "creation_date", "last_update_time")
//Specify common DataSourceWriteOptions in the single hudiOptions variable
val hudiOptions = Map[String,String](
HoodieWriteConfig.TBL_NAME.key -> "mansipp_hudi_fgac_table2",
DataSourceWriteOptions.TABLE_TYPE.key -> "COPY_ON_WRITE",
DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY -> "id",
DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY -> "creation_date",
DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY -> "last_update_time",
DataSourceWriteOptions.HIVE_SYNC_ENABLED_OPT_KEY -> "true",
DataSourceWriteOptions.HIVE_TABLE_OPT_KEY -> "mansipp_hudi_fgac_table2",
DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY -> "creation_date",
HoodieSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS.key ->
"org.apache.hudi.hive.MultiPartKeysValueExtractor",
HoodieSyncConfig.META_SYNC_ENABLED.key -> "true",
HiveSyncConfig.HIVE_SYNC_MODE.key -> "hms",
HoodieSyncConfig.META_SYNC_TABLE_NAME.key -> "mansipp_hudi_fgac_table2",
HoodieSyncConfig.META_SYNC_PARTITION_FIELDS.key -> "creation_date"
)
// Write the DataFrame as a Hudi dataset
(inputDF.write
.format("hudi")
.options(hudiOptions)
.option(DataSourceWriteOptions.OPERATION_OPT_KEY,"insert")
.option("hoodie.schema.on.read.enable","true")
.mode(SaveMode.Overwrite)
.save("s3://mansipp-emr-dev/hudi/mansipp_hudi_fgac_table2/"))
{code}
> Spark sql ALTER TABLE queries are failing on EMR
> ------------------------------------------------
>
> Key: HUDI-8853
> URL: https://issues.apache.org/jira/browse/HUDI-8853
> Project: Apache Hudi
> Issue Type: Bug
> Reporter: Mansi Patel
> Priority: Major
>
> Some of the spark sql DDL queries are failing on EMR. Failed queries are
> listed here
> 1. ALTER TABLE DROP COLUMN
> 2. ALTER TABLE REPLACE COLUMN
> 3. ALTER TABLE RENAME COLUMN
> {code:java}
> scala> spark.sql("ALTER TABLE mansipp_hudi_fgac_table DROP COLUMN
> creation_date"); org.apache.spark.sql.AnalysisException:
> [UNSUPPORTED_FEATURE.TABLE_OPERATION] The feature is not supported: Table
> `spark_catalog`.`default`.`mansipp_hudi_fgac_table` does not support DROP
> COLUMN. Please check the current catalog and namespace to make sure the
> qualified table name is expected, and also check the catalog implementation
> which is configured by "spark.sql.catalog". at
> org.apache.spark.sql.errors.QueryCompilationErrors$.unsupportedTableOperationError(QueryCompilationErrors.scala:847)
> at
> org.apache.spark.sql.errors.QueryCompilationErrors$.unsupportedTableOperationError(QueryCompilationErrors.scala:837)
> at
> org.apache.spark.sql.catalyst.analysis.ResolveSessionCatalog$$anonfun$apply$1.applyOrElse(ResolveSessionCatalog.scala:110)
> {code}
> {code:java}
> scala> spark.sql("ALTER TABLE mansipp_hudi_fgac_table REPLACE COLUMNS (id
> int, name varchar(10), city string)");
> org.apache.spark.sql.AnalysisException: [UNSUPPORTED_FEATURE.TABLE_OPERATION]
> The feature is not supported: Table
> `spark_catalog`.`default`.`mansipp_hudi_fgac_table` does not support REPLACE
> COLUMNS. Please check the current catalog and namespace to make sure the
> qualified table name is expected, and also check the catalog implementation
> which is configured by "spark.sql.catalog".
> at
> org.apache.spark.sql.errors.QueryCompilationErrors$.unsupportedTableOperationError(QueryCompilationErrors.scala:847)
> at
> org.apache.spark.sql.errors.QueryCompilationErrors$.unsupportedTableOperationError(QueryCompilationErrors.scala:837)
> at
> org.apache.spark.sql.catalyst.analysis.ResolveSessionCatalog$$anonfun$apply$1.applyOrElse(ResolveSessionCatalog.scala:66)
> at
> org.apache.spark.sql.catalyst.analysis.ResolveSessionCatalog$$anonfun$apply$1.applyOrElse(ResolveSessionCatalog.scala:52)
> {code}
> {code:java}
> scala> spark.sql("ALTER TABLE mansipp_hudi_fgac_table RENAME COLUMN
> creation_date TO creation_date_renamed"); 25/01/09 00:38:42 WARN HiveConf:
> HiveConf of name hive.server2.thrift.url does not exist SLF4J: Failed to load
> class "org.slf4j.impl.StaticLoggerBinder". SLF4J: Defaulting to no-operation
> (NOP) logger implementation SLF4J: See
> http://www.slf4j.org/codes.html#StaticLoggerBinder for further details.
> org.apache.spark.sql.AnalysisException: [UNSUPPORTED_FEATURE.TABLE_OPERATION]
> The feature is not supported: Table
> `spark_catalog`.`default`.`mansipp_hudi_fgac_table` does not support RENAME
> COLUMN. Please check the current catalog and namespace to make sure the
> qualified table name is expected, and also check the catalog implementation
> which is configured by "spark.sql.catalog". at
> org.apache.spark.sql.errors.QueryCompilationErrors$.unsupportedTableOperationError(QueryCompilationErrors.scala:847)
> at org.apache.spark.sql.errors.QueryCompila
> {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)