zhangbutao commented on PR #5606:
URL: https://github.com/apache/hive/pull/5606#issuecomment-2677513059
Before merge this PR. i think we need to provide a doc to guide users how to
use this feature. Otherwise no one would know how to use this feature.
I tried the rest catalog with Spark, but found some issues. I am not sure i
missed some configuration:
1. HMS configuration
```
<property>
<name>hive.metastore.catalog.servlet.port</name>
<value>9088</value>
<description>iceberg rest catalog port</description>
</property>
<property>
<name>hive.metastore.catalog.servlet.auth</name>
<value>simple</value>
<description></description>
</property>
<property>
<name>hive.metastore.properties.servlet.auth</name>
<value>simple</value>
<description></description>
</property>
```
2. Use Spark to connect the HMS-Iceberg rest catalog:
```
spark-3.5.0-bin-hadoop3/bin/spark-sql \
--master local \
--deploy-mode client \
--conf
spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
\
--conf spark.sql.catalog.rest=org.apache.iceberg.spark.SparkCatalog \
--conf spark.sql.catalog.rest.type=rest \
--conf spark.sql.catalog.rest.uri=http://127.0.0.1:9088/iceberg/
```
**spark-sql (default)> use rest;**
```
25/02/24 14:06:37 WARN ErrorHandlers: Unable to parse error response
java.io.UncheckedIOException:
org.apache.iceberg.shaded.com.fasterxml.jackson.core.JsonParseException:
Unrecognized token 'Authentication': was expecting (JSON String, Number, Array,
Object or token 'null', 'true' or 'false')
at [Source: (String)"Authentication error: User header x-actor-username
missing in request
"; line: 1, column: 15]
at org.apache.iceberg.util.JsonUtil.parse(JsonUtil.java:101)
at
org.apache.iceberg.rest.responses.ErrorResponseParser.fromJson(ErrorResponseParser.java:71)
at
org.apache.iceberg.rest.ErrorHandlers$DefaultErrorHandler.parseResponse(ErrorHandlers.java:194)
at
org.apache.iceberg.rest.HTTPClient.throwFailure(HTTPClient.java:181)
at org.apache.iceberg.rest.HTTPClient.execute(HTTPClient.java:323)
at org.apache.iceberg.rest.HTTPClient.execute(HTTPClient.java:262)
at org.apache.iceberg.rest.HTTPClient.get(HTTPClient.java:358)
at
org.apache.iceberg.rest.RESTSessionCatalog.fetchConfig(RESTSessionCatalog.java:980)
at
org.apache.iceberg.rest.RESTSessionCatalog.initialize(RESTSessionCatalog.java:223)
at
org.apache.iceberg.rest.RESTCatalog.initialize(RESTCatalog.java:78)
at org.apache.iceberg.CatalogUtil.loadCatalog(CatalogUtil.java:256)
at
org.apache.iceberg.CatalogUtil.buildIcebergCatalog(CatalogUtil.java:310)
at
org.apache.iceberg.spark.SparkCatalog.buildIcebergCatalog(SparkCatalog.java:154)
at
org.apache.iceberg.spark.SparkCatalog.initialize(SparkCatalog.java:753)
at
org.apache.spark.sql.connector.catalog.Catalogs$.load(Catalogs.scala:65)
at
org.apache.spark.sql.connector.catalog.CatalogManager.$anonfun$catalog$1(CatalogManager.scala:54)
at scala.collection.mutable.HashMap.getOrElseUpdate(HashMap.scala:86)
at
org.apache.spark.sql.connector.catalog.CatalogManager.catalog(CatalogManager.scala:54)
at
org.apache.spark.sql.connector.catalog.LookupCatalog$CatalogAndNamespace$.unapply(LookupCatalog.scala:86)
at
org.apache.spark.sql.catalyst.analysis.ResolveCatalogs$$anonfun$apply$1.applyOrElse(ResolveCatalogs.scala:51)
at
org.apache.spark.sql.catalyst.analysis.ResolveCatalogs$$anonfun$apply$1.applyOrElse(ResolveCatalogs.scala:30)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsDownWithPruning$2(AnalysisHelper.scala:170)
at
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsDownWithPruning$1(AnalysisHelper.scala:170)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:323)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsDownWithPruning(AnalysisHelper.scala:168)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsDownWithPruning$(AnalysisHelper.scala:164)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsDownWithPruning(LogicalPlan.scala:32)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsDownWithPruning$4(AnalysisHelper.scala:175)
at
org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren(TreeNode.scala:1216)
at
org.apache.spark.sql.catalyst.trees.UnaryLike.mapChildren$(TreeNode.scala:1215)
at
org.apache.spark.sql.catalyst.plans.logical.SetCatalogAndNamespace.mapChildren(v2Commands.scala:941)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsDownWithPruning$1(AnalysisHelper.scala:175)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:323)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsDownWithPruning(AnalysisHelper.scala:168)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsDownWithPruning$(AnalysisHelper.scala:164)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsDownWithPruning(LogicalPlan.scala:32)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsWithPruning(AnalysisHelper.scala:99)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsWithPruning$(AnalysisHelper.scala:96)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsWithPruning(LogicalPlan.scala:32)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperators(AnalysisHelper.scala:76)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperators$(AnalysisHelper.scala:75)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:32)
at
org.apache.spark.sql.catalyst.analysis.ResolveCatalogs.apply(ResolveCatalogs.scala:30)
at
org.apache.spark.sql.catalyst.analysis.ResolveCatalogs.apply(ResolveCatalogs.scala:27)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:222)
at
scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
at
scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
at scala.collection.immutable.List.foldLeft(List.scala:91)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:219)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:211)
at scala.collection.immutable.List.foreach(List.scala:431)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:211)
at
org.apache.spark.sql.catalyst.analysis.Analyzer.org$apache$spark$sql$catalyst$analysis$Analyzer$$executeSameContext(Analyzer.scala:240)
at
org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$execute$1(Analyzer.scala:236)
at
org.apache.spark.sql.catalyst.analysis.AnalysisContext$.withNewAnalysisContext(Analyzer.scala:187)
at
org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:236)
at
org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:202)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:182)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:89)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:182)
at
org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:223)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:330)
at
org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:222)
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$analyzed$1(QueryExecution.scala:77)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:138)
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:219)
at
org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:546)
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:219)
at
org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)
at
org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:218)
at
org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:77)
at
org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:74)
at
org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:66)
at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:99)
at
org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)
at
org.apache.spark.sql.SparkSession.$anonfun$sql$4(SparkSession.scala:691)
at
org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:682)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:713)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:744)
at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:651)
at
org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:68)
at
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:501)
at
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.$anonfun$processLine$1(SparkSQLCLIDriver.scala:619)
at
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.$anonfun$processLine$1$adapted(SparkSQLCLIDriver.scala:613)
at scala.collection.Iterator.foreach(Iterator.scala:943)
at scala.collection.Iterator.foreach$(Iterator.scala:943)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
at scala.collection.IterableLike.foreach(IterableLike.scala:74)
at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
at
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processLine(SparkSQLCLIDriver.scala:613)
at
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:310)
at
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at
org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at
org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:1029)
at
org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:194)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:217)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:91)
at
org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1120)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1129)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by:
org.apache.iceberg.shaded.com.fasterxml.jackson.core.JsonParseException:
Unrecognized token 'Authentication': was expecting (JSON String, Number, Array,
Object or token 'null', 'true' or 'false')
at [Source: (String)"Authentication error: User header x-actor-username
missing in request
"; line: 1, column: 15]
at
org.apache.iceberg.shaded.com.fasterxml.jackson.core.JsonParser._constructError(JsonParser.java:2477)
at
org.apache.iceberg.shaded.com.fasterxml.jackson.core.base.ParserMinimalBase._reportError(ParserMinimalBase.java:760)
at
org.apache.iceberg.shaded.com.fasterxml.jackson.core.json.ReaderBasedJsonParser._reportInvalidToken(ReaderBasedJsonParser.java:3041)
at
org.apache.iceberg.shaded.com.fasterxml.jackson.core.json.ReaderBasedJsonParser._handleOddValue(ReaderBasedJsonParser.java:2082)
at
org.apache.iceberg.shaded.com.fasterxml.jackson.core.json.ReaderBasedJsonParser.nextToken(ReaderBasedJsonParser.java:808)
at
org.apache.iceberg.shaded.com.fasterxml.jackson.databind.ObjectMapper._initForReading(ObjectMapper.java:4912)
at
org.apache.iceberg.shaded.com.fasterxml.jackson.databind.ObjectMapper._readMapAndClose(ObjectMapper.java:4818)
at
org.apache.iceberg.shaded.com.fasterxml.jackson.databind.ObjectMapper.readValue(ObjectMapper.java:3772)
at
org.apache.iceberg.shaded.com.fasterxml.jackson.databind.ObjectMapper.readValue(ObjectMapper.java:3740)
at org.apache.iceberg.util.JsonUtil.parse(JsonUtil.java:99)
... 107 more
25/02/24 14:06:37 ERROR SparkSQLDriver: Failed in [use rest]
```
**HMS error log:**
```
2025-02-24T14:00:51,196 ERROR [qtp43748853-45] metastore.ServletSecurity:
Authentication error:
org.apache.hadoop.hive.metastore.auth.HttpAuthenticationException: User
header x-actor-username missing in request
at
org.apache.hadoop.hive.metastore.ServletSecurity.extractUserName(ServletSecurity.java:219)
~[hive-exec-4.1.0-SNAPSHOT.jar:4.1.0-SNAPSHOT]
at
org.apache.hadoop.hive.metastore.ServletSecurity.execute(ServletSecurity.java:182)
~[hive-exec-4.1.0-SNAPSHOT.jar:4.1.0-SNAPSHOT]
at
org.apache.hadoop.hive.metastore.ServletSecurity$ProxyServlet.service(ServletSecurity.java:130)
~[hive-exec-4.1.0-SNAPSHOT.jar:4.1.0-SNAPSHOT]
at javax.servlet.http.HttpServlet.service(HttpServlet.java:790)
~[javax.servlet-api-3.1.0.jar:3.1.0]
at
org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:799)
~[jetty-runner-9.4.45.v20220203.jar:9.4.45.v20220203]
at
org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:550)
~[jetty-runner-9.4.45.v20220203.jar:9.4.45.v20220203]
at
org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)
~[jetty-runner-9.4.45.v20220203.jar:9.4.45.v20220203]
at
org.eclipse.jetty.server.handler.gzip.GzipHandler.handle(GzipHandler.java:763)
~[jetty-runner-9.4.45.v20220203.jar:9.4.45.v20220203]
at
org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127)
~[jetty-runner-9.4.45.v20220203.jar:9.4.45.v20220203]
at
org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:235)
~[jetty-runner-9.4.45.v20220203.jar:9.4.45.v20220203]
at
org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1440)
~[jetty-runner-9.4.45.v20220203.jar:9.4.45.v20220203]
at
org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:188)
~[jetty-runner-9.4.45.v20220203.jar:9.4.45.v20220203]
at
org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:501)
~[jetty-runner-9.4.45.v20220203.jar:9.4.45.v20220203]
at
org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:186)
~[jetty-runner-9.4.45.v20220203.jar:9.4.45.v20220203]
at
org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1355)
~[jetty-runner-9.4.45.v20220203.jar:9.4.45.v20220203]
at
org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
~[jetty-runner-9.4.45.v20220203.jar:9.4.45.v20220203]
at
org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:191)
~[jetty-runner-9.4.45.v20220203.jar:9.4.45.v20220203]
at
org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127)
~[jetty-runner-9.4.45.v20220203.jar:9.4.45.v20220203]
at org.eclipse.jetty.server.Server.handle(Server.java:516)
~[jetty-runner-9.4.45.v20220203.jar:9.4.45.v20220203]
at
org.eclipse.jetty.server.HttpChannel.lambda$handle$1(HttpChannel.java:487)
~[jetty-runner-9.4.45.v20220203.jar:9.4.45.v20220203]
at
org.eclipse.jetty.server.HttpChannel.dispatch(HttpChannel.java:732)
~[jetty-runner-9.4.45.v20220203.jar:9.4.45.v20220203]
at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:479)
~[jetty-runner-9.4.45.v20220203.jar:9.4.45.v20220203]
at
org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:277)
~[jetty-runner-9.4.45.v20220203.jar:9.4.45.v20220203]
at
org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:311)
~[jetty-io-9.4.45.v20220203.jar:9.4.45.v20220203]
at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:105)
~[jetty-io-9.4.45.v20220203.jar:9.4.45.v20220203]
at
org.eclipse.jetty.io.ChannelEndPoint$1.run(ChannelEndPoint.java:104)
~[jetty-io-9.4.45.v20220203.jar:9.4.45.v20220203]
at
org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.runTask(EatWhatYouKill.java:338)
~[jetty-runner-9.4.45.v20220203.jar:9.4.45.v20220203]
at
org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:315)
~[jetty-runner-9.4.45.v20220203.jar:9.4.45.v20220203]
at
org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:173)
~[jetty-runner-9.4.45.v20220203.jar:9.4.45.v20220203]
at
org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.produce(EatWhatYouKill.java:137)
~[jetty-runner-9.4.45.v20220203.jar:9.4.45.v20220203]
at
org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:883)
~[jetty-runner-9.4.45.v20220203.jar:9.4.45.v20220203]
at
org.eclipse.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:1034)
~[jetty-runner-9.4.45.v20220203.jar:9.4.45.v20220203]
at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_221]
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]