yuqi1129 opened a new issue, #6054:
URL: https://github.com/apache/gravitino/issues/6054
### What would you like to be improved?
py4j.protocol.Py4JJavaError: An error occurred while calling o69.csv.
: java.io.IOException: Error getting StorageObject: : bucket:
strato-iceberg, object: 1/
at
com.google.cloud.hadoop.gcsio.GoogleCloudStorageImpl.wrapException(GoogleCloudStorageImpl.java:1871)
at
com.google.cloud.hadoop.gcsio.GoogleCloudStorageImpl$12.onFailure(GoogleCloudStorageImpl.java:1634)
at
com.google.api.client.googleapis.batch.json.JsonBatchCallback.onFailure(JsonBatchCallback.java:54)
at
com.google.api.client.googleapis.batch.json.JsonBatchCallback.onFailure(JsonBatchCallback.java:50)
at
com.google.api.client.googleapis.batch.BatchUnparsedResponse.parseAndCallback(BatchUnparsedResponse.java:223)
at
com.google.api.client.googleapis.batch.BatchUnparsedResponse.parseNextResponse(BatchUnparsedResponse.java:155)
at
com.google.api.client.googleapis.batch.BatchRequest.execute(BatchRequest.java:253)
at
com.google.cloud.hadoop.gcsio.BatchHelper.lambda$flushPendingRequests$2(BatchHelper.java:219)
at
org.apache.gravitino.gcp.shaded.com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:131)
at
org.apache.gravitino.gcp.shaded.com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:76)
at
org.apache.gravitino.gcp.shaded.com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:82)
at
org.apache.gravitino.gcp.shaded.com.google.common.util.concurrent.DirectExecutorService.execute(DirectExecutorService.java:51)
at
java.util.concurrent.AbstractExecutorService.submit(AbstractExecutorService.java:134)
at
org.apache.gravitino.gcp.shaded.com.google.common.util.concurrent.AbstractListeningExecutorService.submit(AbstractListeningExecutorService.java:79)
at
org.apache.gravitino.gcp.shaded.com.google.common.util.concurrent.AbstractListeningExecutorService.submit(AbstractListeningExecutorService.java:37)
at
com.google.cloud.hadoop.gcsio.BatchHelper.flushPendingRequests(BatchHelper.java:217)
at
com.google.cloud.hadoop.gcsio.BatchHelper.flushIfPossible(BatchHelper.java:197)
at com.google.cloud.hadoop.gcsio.BatchHelper.flush(BatchHelper.java:230)
at
com.google.cloud.hadoop.gcsio.GoogleCloudStorageImpl.getItemInfos(GoogleCloudStorageImpl.java:1641)
at
com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystem.getFileInfos(GoogleCloudStorageFileSystem.java:1202)
at
com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystem.mkdirs(GoogleCloudStorageFileSystem.java:526)
at
com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.mkdirs(GoogleHadoopFileSystemBase.java:1392)
at
org.apache.gravitino.filesystem.hadoop.GravitinoVirtualFileSystem.mkdirs(GravitinoVirtualFileSystem.java:593)
at org.apache.hadoop.fs.FileSystem.mkdirs(FileSystem.java:2388)
at
org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.setupJob(FileOutputCommitter.java:356)
at
org.apache.spark.internal.io.HadoopMapReduceCommitProtocol.setupJob(HadoopMapReduceCommitProtocol.scala:178)
at
org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:182)
at
org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:186)
at
org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
at
org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
at
org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
at
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
at
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
at
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
at
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
at
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
at
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
at
org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
at
org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
at
org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
at
org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
at
org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
at
org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
at
org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:355)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
at org.apache.spark.sql.DataFrameWriter.csv(DataFrameWriter.scala:839)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at
py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
at java.lang.Thread.run(Thread.java:750)
Caused by: java.io.IOException:
{"code":403,"errors":[{"domain":"global","message":"[email protected]
does not have storage.objects.get access to the Google Cloud Storage object.
Permission 'storage.objects.get' denied on resource (or it may not
exist).","reason":"forbidden"}],"message":"[email protected]
does not have storage.objects.get access to the Google Cloud Storage object.
Permission 'storage.objects.get' denied on resource (or it may not exist)."}
at
com.google.cloud.hadoop.gcsio.GoogleCloudStorageImpl$12.onFailure(GoogleCloudStorageImpl.java:1632)
... 67 more
### How should we improve?
1. create a gcs fileset with credentail enable.
2. using spark to access it
```
spark = SparkSession.builder \
.appName("gcs_fielset_test") \
.config("spark.hadoop.fs.AbstractFileSystem.gvfs.impl",
"org.apache.gravitino.filesystem.hadoop.Gvfs") \
.config("spark.hadoop.fs.gvfs.impl",
"org.apache.gravitino.filesystem.hadoop.GravitinoVirtualFileSystem") \
.config("spark.hadoop.fs.gravitino.server.uri", "http://localhost:8090")
\
.config("spark.hadoop.fs.gravitino.client.metalake", "test") \
.config("spark.driver.memory", "2g") \
.config("spark.driver.port", "2048") \
.getOrCreate()
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]