JonasJ-ap opened a new pull request, #5939:
URL: https://github.com/apache/iceberg/pull/5939
Fix the `NotSerializableException` when using `AssumeRoleAwsClientFactory`
to configure the `GlueCatalog` of a spark shell.
Compiled iceberg-spark-runtime-3.1 and tested on Glue 3.0.
The following Spark script triggers the exception before the fix and
succeeds after the fix:
```
import org.apache.spark.SparkContext
import org.apache.spark.sql.SparkSession
import org.apache.iceberg.Table
import org.apache.iceberg.aws.glue.GlueCatalog
import org.apache.iceberg.catalog.Catalog
import org.apache.iceberg.aws.AssumeRoleAwsClientFactory
import org.apache.iceberg.catalog.TableIdentifier
import org.apache.iceberg.spark.actions.SparkActions
import scala.jdk.CollectionConverters._
object GlueApp {
def main(sysArgs: Array[String]) {
val sparkContext: SparkContext = new SparkContext()
val spark: SparkSession = SparkSession.builder.
config("spark.sql.catalog.demo",
"org.apache.iceberg.spark.SparkCatalog").
config("spark.sql.catalog.demo.warehouse",
"s3://gluetestjonas/warehouse").
config("spark.sql.catalog.demo.catalog-impl",
"org.apache.iceberg.aws.glue.GlueCatalog").
config("spark.sql.catalog.demo.client.factory",
"org.apache.iceberg.aws.AssumeRoleAwsClientFactory").
config("spark.sql.catalog.demo.client.assume-role.arn",
"arn:aws:iam::481640105715:role/jonasjiang_gluejob2").
config("spark.sql.catalog.demo.client.assume-role.region",
"us-east-1").
config("spark.sql.catalog.demo.client.assume-role.session-name",
"mytestname").
getOrCreate()
spark.sql("CREATE DATABASE IF NOT EXISTS demo.reviewsjonas")
val book_reviews_location =
"s3://amazon-reviews-pds/parquet/product_category=Books/*.parquet"
val book_reviews = spark.read.parquet(book_reviews_location)
book_reviews.writeTo("demo.reviewsjonas.book_reviews_session_name").
tableProperty("format-version", "2").
createOrReplace()
// read using SQL
// spark.sql("SELECT * FROM demo.reviews.book_reviews").show()
}
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]