JonasJ-ap opened a new pull request, #5939:
URL: https://github.com/apache/iceberg/pull/5939

   Fix the `NotSerializableException` when using `AssumeRoleAwsClientFactory` 
to configure the `GlueCatalog` of a spark shell.
   
   Compiled iceberg-spark-runtime-3.1 and tested on Glue 3.0.
   The following Spark script triggers the exception before the fix and 
succeeds after the fix:
   ```
   import org.apache.spark.SparkContext
   import org.apache.spark.sql.SparkSession
   
   import org.apache.iceberg.Table
   import org.apache.iceberg.aws.glue.GlueCatalog
   import org.apache.iceberg.catalog.Catalog
   import org.apache.iceberg.aws.AssumeRoleAwsClientFactory
   import org.apache.iceberg.catalog.TableIdentifier
   import org.apache.iceberg.spark.actions.SparkActions
   
   import scala.jdk.CollectionConverters._
   
   object GlueApp {
       def main(sysArgs: Array[String]) {
           val sparkContext: SparkContext = new SparkContext()
           val spark: SparkSession = SparkSession.builder.
             config("spark.sql.catalog.demo", 
"org.apache.iceberg.spark.SparkCatalog").
             config("spark.sql.catalog.demo.warehouse", 
"s3://gluetestjonas/warehouse").
             config("spark.sql.catalog.demo.catalog-impl", 
"org.apache.iceberg.aws.glue.GlueCatalog").
             config("spark.sql.catalog.demo.client.factory", 
"org.apache.iceberg.aws.AssumeRoleAwsClientFactory").
             config("spark.sql.catalog.demo.client.assume-role.arn", 
"arn:aws:iam::481640105715:role/jonasjiang_gluejob2").
             config("spark.sql.catalog.demo.client.assume-role.region", 
"us-east-1").
             config("spark.sql.catalog.demo.client.assume-role.session-name", 
"mytestname").
             getOrCreate()
             
           spark.sql("CREATE DATABASE IF NOT EXISTS demo.reviewsjonas")
           
           val book_reviews_location = 
"s3://amazon-reviews-pds/parquet/product_category=Books/*.parquet"
               val book_reviews = spark.read.parquet(book_reviews_location)
               
book_reviews.writeTo("demo.reviewsjonas.book_reviews_session_name").
                 tableProperty("format-version", "2").
                 createOrReplace()
               
               // read using SQL
               // spark.sql("SELECT * FROM demo.reviews.book_reviews").show()
       }
       
    
   }
   ```
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to