soumilshah1995 opened a new issue, #11257:
URL: https://github.com/apache/hudi/issues/11257
# Sample Code
```
try:
import os
import sys
import uuid
import pyspark
import datetime
from pyspark.sql import SparkSession
from pyspark import SparkConf, SparkContext
from faker import Faker
import datetime
from datetime import datetime
import random
import pandas as pd # Import Pandas library for pretty printing
print("Imports loaded ")
except Exception as e:
print("error", e)
HUDI_VERSION = '1.0.0-beta1'
SPARK_VERSION = '3.4'
os.environ["JAVA_HOME"] = "/opt/homebrew/opt/openjdk@11"
SUBMIT_ARGS = f"--packages
org.apache.hudi:hudi-spark{SPARK_VERSION}-bundle_2.12:{HUDI_VERSION}
pyspark-shell"
os.environ["PYSPARK_SUBMIT_ARGS"] = SUBMIT_ARGS
os.environ['PYSPARK_PYTHON'] = sys.executable
# Spark session
spark = SparkSession.builder \
.config('spark.serializer',
'org.apache.spark.serializer.KryoSerializer') \
.config('spark.sql.extensions',
'org.apache.spark.sql.hudi.HoodieSparkSessionExtension') \
.config('className', 'org.apache.hudi') \
.config('spark.sql.hive.convertMetastoreParquet', 'false') \
.getOrCreate()
spark._jsc.hadoopConfiguration().set("parquet.crypto.factory.class",
"org.apache.parquet.crypto.keytools.PropertiesDrivenCryptoFactory")
spark._jsc.hadoopConfiguration().set("parquet.encryption.kms.client.class" ,
"org.apache.parquet.crypto.keytools.mocks.InMemoryKMS")
spark._jsc.hadoopConfiguration().set("parquet.encryption.footer.key", "k1")
spark._jsc.hadoopConfiguration().set("parquet.encryption.column.keys",
"k2:customer_id")
global faker
faker = Faker()
def get_customer_data(total_customers=2):
customers_array = []
for i in range(0, total_customers):
customer_data = {
"customer_id": str(uuid.uuid4()),
"name": faker.name(),
"state": faker.state(),
"city": faker.city(),
"email": faker.email(),
"created_at": datetime.now().isoformat().__str__(),
"adqdress": faker.address(),
"salary": faker.random_int(min=30000, max=100000)
}
customers_array.append(customer_data)
return customers_array
global total_customers, order_data_sample_size
total_customers = 10000
customer_data = get_customer_data(total_customers=total_customers)
spark_df_customers = spark.createDataFrame(data=[tuple(i.values()) for i in
customer_data],
schema=list(customer_data[0].keys()))
spark_df_customers.show(1, truncate=False)
spark_df_customers.printSchema()
def write_to_hudi(spark_df,
table_name,
db_name,
method='upsert',
table_type='COPY_ON_WRITE',
recordkey='',
precombine='',
partition_fields='',
index_type='BLOOM'
):
path =
f"file:///Users/soumilshah/IdeaProjects/SparkProject/tem/database={db_name}/table_name{table_name}"
hudi_options = {
'hoodie.table.name': table_name,
'hoodie.datasource.write.table.type': table_type,
'hoodie.datasource.write.table.name': table_name,
'hoodie.datasource.write.operation': method,
'hoodie.datasource.write.recordkey.field': recordkey,
'hoodie.datasource.write.precombine.field': precombine,
"hoodie.datasource.write.partitionpath.field": partition_fields,
"hoodie.index.type": index_type,
}
if index_type == 'RECORD_INDEX':
hudi_options.update({
"hoodie.enable.data.skipping": "true",
"hoodie.metadata.enable": "true",
"hoodie.metadata.index.column.stats.enable": "true",
"hoodie.write.concurrency.mode":
"optimistic_concurrency_control",
"hoodie.write.lock.provider":
"org.apache.hudi.client.transaction.lock.InProcessLockProvider",
"hoodie.metadata.record.index.enable": "true"
})
print("\n")
print(path)
print("\n")
spark_df.write.format("hudi"). \
options(**hudi_options). \
mode("append"). \
save(path)
write_to_hudi(
spark_df=spark_df_customers,
db_name="default",
table_name="customers",
recordkey="customer_id",
precombine="created_at",
partition_fields="state",
index_type="BLOOM"
)
```
# Error
```
24/05/19 11:01:48 ERROR SimpleExecutor: Failed consuming records
org.apache.parquet.crypto.ParquetCryptoRuntimeException: Could not
instantiate KmsClient class: null
at
org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:350)
at
org.apache.parquet.crypto.keytools.KeyToolkit.lambda$getKmsClient$0(KeyToolkit.java:330)
at
java.base/java.util.concurrent.ConcurrentHashMap.computeIfAbsent(ConcurrentHashMap.java:1705)
at
org.apache.parquet.crypto.keytools.KeyToolkit.getKmsClient(KeyToolkit.java:329)
at
org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:80)
at
org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:109)
at
org.apache.parquet.crypto.keytools.PropertiesDrivenCryptoFactory.getFileEncryptionProperties(PropertiesDrivenCryptoFactory.java:134)
at
org.apache.parquet.hadoop.ParquetOutputFormat.createEncryptionProperties(ParquetOutputFormat.java:552)
at
org.apache.parquet.hadoop.ParquetWriter.<init>(ParquetWriter.java:285)
at
org.apache.parquet.hadoop.ParquetWriter$Builder.build(ParquetWriter.java:680)
at
org.apache.hudi.io.storage.HoodieBaseParquetWriter.<init>(HoodieBaseParquetWriter.java:80)
at
org.apache.hudi.io.storage.HoodieAvroParquetWriter.<init>(HoodieAvroParquetWriter.java:54)
at
org.apache.hudi.io.storage.HoodieAvroFileWriterFactory.newParquetFileWriter(HoodieAvroFileWriterFactory.java:69)
at
org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriterByFormat(HoodieFileWriterFactory.java:80)
at
org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriter(HoodieFileWriterFactory.java:67)
at
org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:104)
at
org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:76)
at
org.apache.hudi.io.CreateHandleFactory.create(CreateHandleFactory.java:45)
at
org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:101)
at
org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:44)
at
org.apache.hudi.common.util.queue.SimpleExecutor.execute(SimpleExecutor.java:69)
at
org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:78)
at
org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:37)
at
org.apache.hudi.client.utils.LazyIterableIterator.next(LazyIterableIterator.java:119)
at
scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:46)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
at
org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:223)
at
org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:352)
at
org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1552)
at
org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1462)
at
org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1526)
at
org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1349)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:375)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:326)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:328)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92)
at
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161)
at org.apache.spark.scheduler.Task.run(Task.scala:139)
at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:554)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1529)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:557)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: org.apache.parquet.hadoop.BadConfigurationException: could not
instantiate class org.apache.parquet.crypto.keytools.mocks.InMemoryKMS set in
job conf at parquet.encryption.kms.client.class
at
org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:40)
at
org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:342)
... 46 more
Caused by: java.lang.ClassNotFoundException: Class
org.apache.parquet.crypto.keytools.mocks.InMemoryKMS not found
at
org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2592)
at
org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:33)
... 47 more
24/05/19 11:01:48 ERROR SimpleExecutor: Failed consuming records
org.apache.parquet.crypto.ParquetCryptoRuntimeException: Could not
instantiate KmsClient class: null
at
org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:350)
at
org.apache.parquet.crypto.keytools.KeyToolkit.lambda$getKmsClient$0(KeyToolkit.java:330)
at
java.base/java.util.concurrent.ConcurrentHashMap.computeIfAbsent(ConcurrentHashMap.java:1705)
at
org.apache.parquet.crypto.keytools.KeyToolkit.getKmsClient(KeyToolkit.java:329)
at
org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:80)
at
org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:109)
at
org.apache.parquet.crypto.keytools.PropertiesDrivenCryptoFactory.getFileEncryptionProperties(PropertiesDrivenCryptoFactory.java:134)
at
org.apache.parquet.hadoop.ParquetOutputFormat.createEncryptionProperties(ParquetOutputFormat.java:552)
at
org.apache.parquet.hadoop.ParquetWriter.<init>(ParquetWriter.java:285)
at
org.apache.parquet.hadoop.ParquetWriter$Builder.build(ParquetWriter.java:680)
at
org.apache.hudi.io.storage.HoodieBaseParquetWriter.<init>(HoodieBaseParquetWriter.java:80)
at
org.apache.hudi.io.storage.HoodieAvroParquetWriter.<init>(HoodieAvroParquetWriter.java:54)
at
org.apache.hudi.io.storage.HoodieAvroFileWriterFactory.newParquetFileWriter(HoodieAvroFileWriterFactory.java:69)
at
org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriterByFormat(HoodieFileWriterFactory.java:80)
at
org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriter(HoodieFileWriterFactory.java:67)
at
org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:104)
at
org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:76)
at
org.apache.hudi.io.CreateHandleFactory.create(CreateHandleFactory.java:45)
at
org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:101)
at
org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:44)
at
org.apache.hudi.common.util.queue.SimpleExecutor.execute(SimpleExecutor.java:69)
at
org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:78)
at
org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:37)
at
org.apache.hudi.client.utils.LazyIterableIterator.next(LazyIterableIterator.java:119)
at
scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:46)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
at
org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:223)
at
org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:352)
at
org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1552)
at
org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1462)
at
org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1526)
at
org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1349)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:375)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:326)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:328)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92)
at
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161)
at org.apache.spark.scheduler.Task.run(Task.scala:139)
at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:554)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1529)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:557)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: org.apache.parquet.hadoop.BadConfigurationException: could not
instantiate class org.apache.parquet.crypto.keytools.mocks.InMemoryKMS set in
job conf at parquet.encryption.kms.client.class
at
org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:40)
at
org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:342)
... 46 more
Caused by: java.lang.ClassNotFoundException: Class
org.apache.parquet.crypto.keytools.mocks.InMemoryKMS not found
at
org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2592)
at
org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:33)
... 47 more
24/05/19 11:01:48 WARN BlockManager: Putting block rdd_153_10 failed due to
exception java.lang.RuntimeException:
org.apache.hudi.exception.HoodieException:
org.apache.hudi.exception.HoodieException:
org.apache.parquet.crypto.ParquetCryptoRuntimeException: Could not instantiate
KmsClient class: null.
24/05/19 11:01:48 WARN BlockManager: Putting block rdd_153_7 failed due to
exception java.lang.RuntimeException:
org.apache.hudi.exception.HoodieException:
org.apache.hudi.exception.HoodieException:
org.apache.parquet.crypto.ParquetCryptoRuntimeException: Could not instantiate
KmsClient class: null.
24/05/19 11:01:48 WARN BlockManager: Block rdd_153_10 could not be removed
as it was not found on disk or in memory
24/05/19 11:01:48 WARN BlockManager: Block rdd_153_7 could not be removed as
it was not found on disk or in memory
24/05/19 11:01:48 ERROR SimpleExecutor: Failed consuming records
org.apache.parquet.crypto.ParquetCryptoRuntimeException: Could not
instantiate KmsClient class: null
at
org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:350)
at
org.apache.parquet.crypto.keytools.KeyToolkit.lambda$getKmsClient$0(KeyToolkit.java:330)
at
java.base/java.util.concurrent.ConcurrentHashMap.computeIfAbsent(ConcurrentHashMap.java:1705)
at
org.apache.parquet.crypto.keytools.KeyToolkit.getKmsClient(KeyToolkit.java:329)
at
org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:80)
at
org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:109)
at
org.apache.parquet.crypto.keytools.PropertiesDrivenCryptoFactory.getFileEncryptionProperties(PropertiesDrivenCryptoFactory.java:134)
at
org.apache.parquet.hadoop.ParquetOutputFormat.createEncryptionProperties(ParquetOutputFormat.java:552)
at
org.apache.parquet.hadoop.ParquetWriter.<init>(ParquetWriter.java:285)
at
org.apache.parquet.hadoop.ParquetWriter$Builder.build(ParquetWriter.java:680)
at
org.apache.hudi.io.storage.HoodieBaseParquetWriter.<init>(HoodieBaseParquetWriter.java:80)
at
org.apache.hudi.io.storage.HoodieAvroParquetWriter.<init>(HoodieAvroParquetWriter.java:54)
at
org.apache.hudi.io.storage.HoodieAvroFileWriterFactory.newParquetFileWriter(HoodieAvroFileWriterFactory.java:69)
at
org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriterByFormat(HoodieFileWriterFactory.java:80)
at
org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriter(HoodieFileWriterFactory.java:67)
at
org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:104)
at
org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:76)
at
org.apache.hudi.io.CreateHandleFactory.create(CreateHandleFactory.java:45)
at
org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:101)
at
org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:44)
at
org.apache.hudi.common.util.queue.SimpleExecutor.execute(SimpleExecutor.java:69)
at
org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:78)
at
org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:37)
at
org.apache.hudi.client.utils.LazyIterableIterator.next(LazyIterableIterator.java:119)
at
scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:46)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
at
org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:223)
at
org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:352)
at
org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1552)
at
org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1462)
at
org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1526)
at
org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1349)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:375)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:326)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:328)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92)
at
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161)
at org.apache.spark.scheduler.Task.run(Task.scala:139)
at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:554)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1529)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:557)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: org.apache.parquet.hadoop.BadConfigurationException: could not
instantiate class org.apache.parquet.crypto.keytools.mocks.InMemoryKMS set in
job conf at parquet.encryption.kms.client.class
at
org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:40)
at
org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:342)
... 46 more
Caused by: java.lang.ClassNotFoundException: Class
org.apache.parquet.crypto.keytools.mocks.InMemoryKMS not found
at
org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2592)
at
org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:33)
... 47 more
24/05/19 11:01:48 WARN BlockManager: Putting block rdd_153_2 failed due to
exception java.lang.RuntimeException:
org.apache.hudi.exception.HoodieException:
org.apache.hudi.exception.HoodieException:
org.apache.parquet.crypto.ParquetCryptoRuntimeException: Could not instantiate
KmsClient class: null.
24/05/19 11:01:48 WARN BlockManager: Block rdd_153_2 could not be removed as
it was not found on disk or in memory
24/05/19 11:01:48 ERROR SimpleExecutor: Failed consuming records
org.apache.parquet.crypto.ParquetCryptoRuntimeException: Could not
instantiate KmsClient class: null
at
org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:350)
at
org.apache.parquet.crypto.keytools.KeyToolkit.lambda$getKmsClient$0(KeyToolkit.java:330)
at
java.base/java.util.concurrent.ConcurrentHashMap.computeIfAbsent(ConcurrentHashMap.java:1705)
at
org.apache.parquet.crypto.keytools.KeyToolkit.getKmsClient(KeyToolkit.java:329)
at
org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:80)
at
org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:109)
at
org.apache.parquet.crypto.keytools.PropertiesDrivenCryptoFactory.getFileEncryptionProperties(PropertiesDrivenCryptoFactory.java:134)
at
org.apache.parquet.hadoop.ParquetOutputFormat.createEncryptionProperties(ParquetOutputFormat.java:552)
at
org.apache.parquet.hadoop.ParquetWriter.<init>(ParquetWriter.java:285)
at
org.apache.parquet.hadoop.ParquetWriter$Builder.build(ParquetWriter.java:680)
at
org.apache.hudi.io.storage.HoodieBaseParquetWriter.<init>(HoodieBaseParquetWriter.java:80)
at
org.apache.hudi.io.storage.HoodieAvroParquetWriter.<init>(HoodieAvroParquetWriter.java:54)
at
org.apache.hudi.io.storage.HoodieAvroFileWriterFactory.newParquetFileWriter(HoodieAvroFileWriterFactory.java:69)
at
org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriterByFormat(HoodieFileWriterFactory.java:80)
at
org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriter(HoodieFileWriterFactory.java:67)
at
org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:104)
at
org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:76)
at
org.apache.hudi.io.CreateHandleFactory.create(CreateHandleFactory.java:45)
at
org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:101)
at
org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:44)
at
org.apache.hudi.common.util.queue.SimpleExecutor.execute(SimpleExecutor.java:69)
at
org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:78)
at
org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:37)
at
org.apache.hudi.client.utils.LazyIterableIterator.next(LazyIterableIterator.java:119)
at
scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:46)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
at
org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:223)
at
org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:352)
at
org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1552)
at
org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1462)
at
org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1526)
at
org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1349)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:375)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:326)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:328)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92)
at
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161)
at org.apache.spark.scheduler.Task.run(Task.scala:139)
at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:554)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1529)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:557)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: org.apache.parquet.hadoop.BadConfigurationException: could not
instantiate class org.apache.parquet.crypto.keytools.mocks.InMemoryKMS set in
job conf at parquet.encryption.kms.client.class
at
org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:40)
at
org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:342)
... 46 more
Caused by: java.lang.ClassNotFoundException: Class
org.apache.parquet.crypto.keytools.mocks.InMemoryKMS not found
at
org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2592)
at
org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:33)
... 47 more
24/05/19 11:01:48 ERROR SimpleExecutor: Failed consuming records
org.apache.parquet.crypto.ParquetCryptoRuntimeException: Could not
instantiate KmsClient class: null
at
org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:350)
at
org.apache.parquet.crypto.keytools.KeyToolkit.lambda$getKmsClient$0(KeyToolkit.java:330)
at
java.base/java.util.concurrent.ConcurrentHashMap.computeIfAbsent(ConcurrentHashMap.java:1705)
at
org.apache.parquet.crypto.keytools.KeyToolkit.getKmsClient(KeyToolkit.java:329)
at
org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:80)
at
org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:109)
at
org.apache.parquet.crypto.keytools.PropertiesDrivenCryptoFactory.getFileEncryptionProperties(PropertiesDrivenCryptoFactory.java:134)
at
org.apache.parquet.hadoop.ParquetOutputFormat.createEncryptionProperties(ParquetOutputFormat.java:552)
at
org.apache.parquet.hadoop.ParquetWriter.<init>(ParquetWriter.java:285)
at
org.apache.parquet.hadoop.ParquetWriter$Builder.build(ParquetWriter.java:680)
at
org.apache.hudi.io.storage.HoodieBaseParquetWriter.<init>(HoodieBaseParquetWriter.java:80)
at
org.apache.hudi.io.storage.HoodieAvroParquetWriter.<init>(HoodieAvroParquetWriter.java:54)
at
org.apache.hudi.io.storage.HoodieAvroFileWriterFactory.newParquetFileWriter(HoodieAvroFileWriterFactory.java:69)
at
org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriterByFormat(HoodieFileWriterFactory.java:80)
at
org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriter(HoodieFileWriterFactory.java:67)
at
org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:104)
at
org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:76)
at
org.apache.hudi.io.CreateHandleFactory.create(CreateHandleFactory.java:45)
at
org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:101)
at
org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:44)
at
org.apache.hudi.common.util.queue.SimpleExecutor.execute(SimpleExecutor.java:69)
at
org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:78)
at
org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:37)
at
org.apache.hudi.client.utils.LazyIterableIterator.next(LazyIterableIterator.java:119)
at
scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:46)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
at
org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:223)
at
org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:352)
at
org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1552)
at
org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1462)
at
org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1526)
at
org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1349)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:375)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:326)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:328)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92)
at
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161)
at org.apache.spark.scheduler.Task.run(Task.scala:139)
at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:554)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1529)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:557)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: org.apache.parquet.hadoop.BadConfigurationException: could not
instantiate class org.apache.parquet.crypto.keytools.mocks.InMemoryKMS set in
job conf at parquet.encryption.kms.client.class
at
org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:40)
at
org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:342)
... 46 more
Caused by: java.lang.ClassNotFoundException: Class
org.apache.parquet.crypto.keytools.mocks.InMemoryKMS not found
at
org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2592)
at
org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:33)
... 47 more
24/05/19 11:01:48 WARN BlockManager: Putting block rdd_153_1 failed due to
exception java.lang.RuntimeException:
org.apache.hudi.exception.HoodieException:
org.apache.hudi.exception.HoodieException:
org.apache.parquet.crypto.ParquetCryptoRuntimeException: Could not instantiate
KmsClient class: null.
24/05/19 11:01:48 WARN BlockManager: Putting block rdd_153_8 failed due to
exception java.lang.RuntimeException:
org.apache.hudi.exception.HoodieException:
org.apache.hudi.exception.HoodieException:
org.apache.parquet.crypto.ParquetCryptoRuntimeException: Could not instantiate
KmsClient class: null.
24/05/19 11:01:48 WARN BlockManager: Block rdd_153_1 could not be removed as
it was not found on disk or in memory
24/05/19 11:01:48 ERROR SimpleExecutor: Failed consuming records
org.apache.parquet.crypto.ParquetCryptoRuntimeException: Could not
instantiate KmsClient class: null
at
org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:350)
at
org.apache.parquet.crypto.keytools.KeyToolkit.lambda$getKmsClient$0(KeyToolkit.java:330)
at
java.base/java.util.concurrent.ConcurrentHashMap.computeIfAbsent(ConcurrentHashMap.java:1705)
at
org.apache.parquet.crypto.keytools.KeyToolkit.getKmsClient(KeyToolkit.java:329)
at
org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:80)
at
org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:109)
at
org.apache.parquet.crypto.keytools.PropertiesDrivenCryptoFactory.getFileEncryptionProperties(PropertiesDrivenCryptoFactory.java:134)
at
org.apache.parquet.hadoop.ParquetOutputFormat.createEncryptionProperties(ParquetOutputFormat.java:552)
at
org.apache.parquet.hadoop.ParquetWriter.<init>(ParquetWriter.java:285)
at
org.apache.parquet.hadoop.ParquetWriter$Builder.build(ParquetWriter.java:680)
at
org.apache.hudi.io.storage.HoodieBaseParquetWriter.<init>(HoodieBaseParquetWriter.java:80)
at
org.apache.hudi.io.storage.HoodieAvroParquetWriter.<init>(HoodieAvroParquetWriter.java:54)
at
org.apache.hudi.io.storage.HoodieAvroFileWriterFactory.newParquetFileWriter(HoodieAvroFileWriterFactory.java:69)
at
org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriterByFormat(HoodieFileWriterFactory.java:80)
at
org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriter(HoodieFileWriterFactory.java:67)
at
org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:104)
at
org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:76)
at
org.apache.hudi.io.CreateHandleFactory.create(CreateHandleFactory.java:45)
at
org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:101)
at
org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:44)
at
org.apache.hudi.common.util.queue.SimpleExecutor.execute(SimpleExecutor.java:69)
at
org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:78)
at
org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:37)
at
org.apache.hudi.client.utils.LazyIterableIterator.next(LazyIterableIterator.java:119)
at
scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:46)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
at
org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:223)
at
org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:352)
at
org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1552)
at
org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1462)
at
org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1526)
at
org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1349)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:375)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:326)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:328)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92)
at
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161)
at org.apache.spark.scheduler.Task.run(Task.scala:139)
at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:554)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1529)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:557)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: org.apache.parquet.hadoop.BadConfigurationException: could not
instantiate class org.apache.parquet.crypto.keytools.mocks.InMemoryKMS set in
job conf at parquet.encryption.kms.client.class
at
org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:40)
at
org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:342)
... 46 more
Caused by: java.lang.ClassNotFoundException: Class
org.apache.parquet.crypto.keytools.mocks.InMemoryKMS not found
at
org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2592)
at
org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:33)
... 47 more
24/05/19 11:01:48 WARN BlockManager: Putting block rdd_153_0 failed due to
exception java.lang.RuntimeException:
org.apache.hudi.exception.HoodieException:
org.apache.hudi.exception.HoodieException:
org.apache.parquet.crypto.ParquetCryptoRuntimeException: Could not instantiate
KmsClient class: null.
24/05/19 11:01:48 WARN BlockManager: Block rdd_153_0 could not be removed as
it was not found on disk or in memory
24/05/19 11:01:48 ERROR SimpleExecutor: Failed consuming records
org.apache.parquet.crypto.ParquetCryptoRuntimeException: Could not
instantiate KmsClient class: null
at
org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:350)
at
org.apache.parquet.crypto.keytools.KeyToolkit.lambda$getKmsClient$0(KeyToolkit.java:330)
at
java.base/java.util.concurrent.ConcurrentHashMap.computeIfAbsent(ConcurrentHashMap.java:1705)
at
org.apache.parquet.crypto.keytools.KeyToolkit.getKmsClient(KeyToolkit.java:329)
at
org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:80)
at
org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:109)
at
org.apache.parquet.crypto.keytools.PropertiesDrivenCryptoFactory.getFileEncryptionProperties(PropertiesDrivenCryptoFactory.java:134)
at
org.apache.parquet.hadoop.ParquetOutputFormat.createEncryptionProperties(ParquetOutputFormat.java:552)
at
org.apache.parquet.hadoop.ParquetWriter.<init>(ParquetWriter.java:285)
at
org.apache.parquet.hadoop.ParquetWriter$Builder.build(ParquetWriter.java:680)
at
org.apache.hudi.io.storage.HoodieBaseParquetWriter.<init>(HoodieBaseParquetWriter.java:80)
at
org.apache.hudi.io.storage.HoodieAvroParquetWriter.<init>(HoodieAvroParquetWriter.java:54)
at
org.apache.hudi.io.storage.HoodieAvroFileWriterFactory.newParquetFileWriter(HoodieAvroFileWriterFactory.java:69)
at
org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriterByFormat(HoodieFileWriterFactory.java:80)
at
org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriter(HoodieFileWriterFactory.java:67)
at
org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:104)
at
org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:76)
at
org.apache.hudi.io.CreateHandleFactory.create(CreateHandleFactory.java:45)
at
org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:101)
at
org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:44)
at
org.apache.hudi.common.util.queue.SimpleExecutor.execute(SimpleExecutor.java:69)
at
org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:78)
at
org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:37)
at
org.apache.hudi.client.utils.LazyIterableIterator.next(LazyIterableIterator.java:119)
at
scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:46)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
at
org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:223)
at
org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:352)
at
org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1552)
at
org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1462)
at
org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1526)
at
org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1349)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:375)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:326)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:328)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92)
at
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161)
at org.apache.spark.scheduler.Task.run(Task.scala:139)
at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:554)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1529)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:557)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: org.apache.parquet.hadoop.BadConfigurationException: could not
instantiate class org.apache.parquet.crypto.keytools.mocks.InMemoryKMS set in
job conf at parquet.encryption.kms.client.class
at
org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:40)
at
org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:342)
... 46 more
Caused by: java.lang.ClassNotFoundException: Class
org.apache.parquet.crypto.keytools.mocks.InMemoryKMS not found
at
org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2592)
at
org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:33)
... 47 more
24/05/19 11:01:48 WARN BlockManager: Putting block rdd_153_11 failed due to
exception java.lang.RuntimeException:
org.apache.hudi.exception.HoodieException:
org.apache.hudi.exception.HoodieException:
org.apache.parquet.crypto.ParquetCryptoRuntimeException: Could not instantiate
KmsClient class: null.
24/05/19 11:01:48 WARN BlockManager: Block rdd_153_11 could not be removed
as it was not found on disk or in memory
24/05/19 11:01:48 WARN BlockManager: Block rdd_153_8 could not be removed as
it was not found on disk or in memory
24/05/19 11:01:48 ERROR SimpleExecutor: Failed consuming records
org.apache.parquet.crypto.ParquetCryptoRuntimeException: Could not
instantiate KmsClient class: null
at
org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:350)
at
org.apache.parquet.crypto.keytools.KeyToolkit.lambda$getKmsClient$0(KeyToolkit.java:330)
at
java.base/java.util.concurrent.ConcurrentHashMap.computeIfAbsent(ConcurrentHashMap.java:1705)
at
org.apache.parquet.crypto.keytools.KeyToolkit.getKmsClient(KeyToolkit.java:329)
at
org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:80)
at
org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:109)
at
org.apache.parquet.crypto.keytools.PropertiesDrivenCryptoFactory.getFileEncryptionProperties(PropertiesDrivenCryptoFactory.java:134)
at
org.apache.parquet.hadoop.ParquetOutputFormat.createEncryptionProperties(ParquetOutputFormat.java:552)
at
org.apache.parquet.hadoop.ParquetWriter.<init>(ParquetWriter.java:285)
at
org.apache.parquet.hadoop.ParquetWriter$Builder.build(ParquetWriter.java:680)
at
org.apache.hudi.io.storage.HoodieBaseParquetWriter.<init>(HoodieBaseParquetWriter.java:80)
at
org.apache.hudi.io.storage.HoodieAvroParquetWriter.<init>(HoodieAvroParquetWriter.java:54)
at
org.apache.hudi.io.storage.HoodieAvroFileWriterFactory.newParquetFileWriter(HoodieAvroFileWriterFactory.java:69)
at
org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriterByFormat(HoodieFileWriterFactory.java:80)
at
org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriter(HoodieFileWriterFactory.java:67)
at
org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:104)
at
org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:76)
at
org.apache.hudi.io.CreateHandleFactory.create(CreateHandleFactory.java:45)
at
org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:101)
at
org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:44)
at
org.apache.hudi.common.util.queue.SimpleExecutor.execute(SimpleExecutor.java:69)
at
org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:78)
at
org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:37)
at
org.apache.hudi.client.utils.LazyIterableIterator.next(LazyIterableIterator.java:119)
at
scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:46)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
at
org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:223)
at
org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:352)
at
org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1552)
at
org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1462)
at
org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1526)
at
org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1349)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:375)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:326)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:328)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92)
at
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161)
at org.apache.spark.scheduler.Task.run(Task.scala:139)
at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:554)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1529)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:557)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: org.apache.parquet.hadoop.BadConfigurationException: could not
instantiate class org.apache.parquet.crypto.keytools.mocks.InMemoryKMS set in
job conf at parquet.encryption.kms.client.class
at
org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:40)
at
org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:342)
... 46 more
Caused by: java.lang.ClassNotFoundException: Class
org.apache.parquet.crypto.keytools.mocks.InMemoryKMS not found
at
org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2592)
at
org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:33)
... 47 more
24/05/19 11:01:48 WARN BlockManager: Putting block rdd_153_4 failed due to
exception java.lang.RuntimeException:
org.apache.hudi.exception.HoodieException:
org.apache.hudi.exception.HoodieException:
```
# REF
* https://hudi.apache.org/docs/encryption/
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]