[
https://issues.apache.org/jira/browse/HUDI-1204?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17181065#comment-17181065
]
Nishith Agarwal commented on HUDI-1204:
---------------------------------------
Perform the following changes to run the test suite in docker
diff --git a/packaging/hudi-integ-test-bundle/pom.xml
b/packaging/hudi-integ-test-bundle/pom.xml
index 1c53447..adb1447 100644
--- a/packaging/hudi-integ-test-bundle/pom.xml
+++ b/packaging/hudi-integ-test-bundle/pom.xml
@@ -74,9 +74,34 @@
<include>org.apache.hudi:hudi-utilities_${scala.binary.version}</include>
<include>org.apache.hudi:hudi-spark_${scala.binary.version}</include>
<include>org.apache.hudi:hudi-hive-sync</include>
+ <include>org.apache.hudi:hudi-sync-common</include>
<include>org.apache.hudi:hudi-hadoop-mr</include>
<include>org.apache.hudi:hudi-timeline-service</include>
<include>org.apache.hudi:hudi-integ-test</include>
+ <include>org.jetbrains.kotlin:kotlin-stdlib-jdk8</include>
+ <include>org.jetbrains.kotlin:kotlin-stdlib</include>
+ <include>org.jetbrains.kotlin:kotlin-stdlib-common</include>
+ <include>org.jetbrains:annotations</include>
+ <include>org.jetbrains.kotlin:kotlin-stdlib-jdk7</include>
+
+ <include>org.eclipse.jetty:jetty-server</include>
+ <include>org.eclipse.jetty:jetty-http</include>
+ <include>org.eclipse.jetty:jetty-util</include>
+ <include>org.eclipse.jetty:jetty-io</include>
+ <include>org.eclipse.jetty:jetty-webapp</include>
+ <include>org.eclipse.jetty:jetty-xml</include>
+ <include>org.eclipse.jetty:jetty-servlet</include>
+ <include>org.eclipse.jetty:jetty-security</include>
+ <include>org.eclipse.jetty.websocket:websocket-server</include>
+ <include>org.eclipse.jetty.websocket:websocket-common</include>
+ <include>org.eclipse.jetty.websocket:websocket-api</include>
+ <include>org.eclipse.jetty.websocket:websocket-client</include>
+ <include>org.eclipse.jetty:jetty-client</include>
+ <include>org.eclipse.jetty.websocket:websocket-servlet</include>
+ <include>org.mortbay.jetty:jetty</include>
+ <include>org.mortbay.jetty:jetty-util</include>
+
+ <include>org.rocksdb:rocksdbjni</include>
<include>com.beust:jcommander</include>
<include>com.twitter:bijection-avro_${scala.binary.version}</include>
<include>com.twitter:bijection-core_${scala.binary.version}</include>
@@ -89,6 +114,7 @@
<include>io.confluent:kafka-schema-registry-client</include>
<include>io.dropwizard.metrics:metrics-core</include>
<include>io.dropwizard.metrics:metrics-graphite</include>
+ <include>io.javalin:javalin</include>
<include>org.apache.spark:spark-streaming-kafka-0-10_${scala.binary.version}</include>
<include>org.apache.kafka:kafka_${scala.binary.version}</include>
<include>com.101tec:zkclient</include>
@@ -245,7 +271,7 @@
<dependency>
<groupId>io.javalin</groupId>
<artifactId>javalin</artifactId>
- <version>2.4.0</version>
+ <version>2.8.0</version>
</dependency>
<dependency>
@@ -276,7 +302,7 @@
<version>${project.version}</version>
<classifier>tests</classifier>
<type>test-jar</type>
- <scope>test</scope>
+ <scope>compile</scope>
</dependency>
<dependency>
@@ -323,6 +349,14 @@
<artifactId>hive-exec</artifactId>
<version>${hive.version}</version>
<classifier>${hive.exec.classifier}</classifier>
+ <scope>compile</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>${hive.groupid}</groupId>
+ <artifactId>hive-metastore</artifactId>
+ <version>${hive.version}</version>
+ <scope>provided</scope>
</dependency>
<dependency>
@@ -387,6 +421,7 @@
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-utilities_${scala.binary.version}</artifactId>
<version>${project.version}</version>
+ <scope>compile</scope>
</dependency>
diff --git a/docker/demo/config/test-suite/test-source.properties
b/docker/demo/config/test-suite/test-source.properties
index 397f871..b34cb89 100644
--- a/docker/demo/config/test-suite/test-source.properties
+++ b/docker/demo/config/test-suite/test-source.properties
@@ -17,11 +17,11 @@ hoodie.datasource.write.recordkey.field=_row_key
hoodie.deltastreamer.source.dfs.root=/user/hive/warehouse/hudi-bench/input
hoodie.datasource.write.keygenerator.class=org.apache.hudi.ComplexKeyGenerator
hoodie.datasource.write.partitionpath.field=timestamp
-hoodie.deltastreamer.schemaprovider.source.schema.file=/var/hoodie/ws/docker/demo/config/bench/source.avsc
+hoodie.deltastreamer.schemaprovider.source.schema.file=/var/hoodie/ws/docker/demo/config/test-suite/source.avsc
hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000/
hoodie.datasource.hive_sync.database=testdb
-hoodie.datasource.hive_sync.table=test_table
+hoodie.datasource.hive_sync.table=table1
hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.NonPartitionedExtractor
hoodie.datasource.hive_sync.assume_date_partitioning=true
hoodie.datasource.write.keytranslator.class=org.apache.hudi.DayBasedPartitionPathKeyTranslator
-hoodie.deltastreamer.schemaprovider.target.schema.file=/var/hoodie/ws/docker/demo/config/bench/source.avsc
\ No newline at end of file
+hoodie.deltastreamer.schemaprovider.target.schema.file=/var/hoodie/ws/docker/demo/config/test-suite/source.avsc
diff --git a/docker/demo/config/test-suite/complex-dag-cow.yaml
b/docker/demo/config/test-suite/complex-dag-cow.yaml
index 5a97688..9854ee3 100644
--- a/docker/demo/config/test-suite/complex-dag-cow.yaml
+++ b/docker/demo/config/test-suite/complex-dag-cow.yaml
@@ -17,7 +17,7 @@ first_insert:
config:
record_size: 70000
num_insert_partitions: 1
- repeat_count: 5
+ repeat_count: 1
num_records_insert: 1000
type: InsertNode
deps: none
@@ -25,7 +25,7 @@ second_insert:
config:
record_size: 70000
num_insert_partitions: 1
- repeat_count: 5
+ repeat_count: 1
num_records_insert: 10000
deps: first_insert
type: InsertNode
@@ -46,7 +46,7 @@ first_upsert:
record_size: 70000
num_insert_partitions: 1
num_records_insert: 300
- repeat_count: 5
+ repeat_count: 1
num_records_upsert: 100
num_upsert_partitions: 10
type: UpsertNode
@@ -65,9 +65,9 @@ first_hive_query:
prop3: "set hive.strict.checks.large.query=false"
prop4: "set hive.stats.autogather=false"
hive_queries:
- query1: "select count(*) from testdb1.table1 group by `_row_key` having
count(*) > 1"
+ query1: "select count(*) from testdb.table1 group by `_row_key` having
count(*) > 1"
result1: 0
- query2: "select count(*) from testdb1.table1"
+ query2: "select count(*) from testdb.table1"
result2: 22100000
type: HiveQueryNode
deps: first_hive_sync
@@ -76,7 +76,7 @@ second_upsert:
record_size: 70000
num_insert_partitions: 1
num_records_insert: 300
- repeat_count: 5
+ repeat_count: 1
num_records_upsert: 100
num_upsert_partitions: 10
type: UpsertNode
@@ -89,9 +89,9 @@ second_hive_query:
prop3: "set hive.strict.checks.large.query=false"
prop4: "set hive.stats.autogather=false"
hive_queries:
- query1: "select count(*) from testdb1.table1 group by `_row_key` having
count(*) > 1"
+ query1: "select count(*) from testdb.table1 group by `_row_key` having
count(*) > 1"
result1: 0
- query2: "select count(*) from testdb1.table1"
+ query2: "select count(*) from testdb.table1"
result2: 22100
type: HiveQueryNode
deps: second_upsert
curl -i
"https://repo1.maven.org/maven2/org/apache/hive/hive-common/2.3.1/hive-common-2.3.1.jar"
> hive-common-2.3.1.jar
curl -i
"https://repo1.maven.org/maven2/org/apache/hive/hive-exec/2.3.1/hive-exec-2.3.1-core.jar"
> hive-exec-2.3.1-core.jar
curl -i
"https://repo1.maven.org/maven2/org/apache/hive/hive-jdbc/2.3.1/hive-jdbc-2.3.1.jar"
> hive-jdbc-2.3.1.jar
curl -i
"https://repo1.maven.org/maven2/org/apache/hive/hive-llap-common/2.3.1/hive-llap-common-2.3.1.jar"
> hive-llap-common-2.3.1.jar
curl -i
"https://repo1.maven.org/maven2/org/apache/hive/hive-metastore/2.3.1/hive-metastore-2.3.1.jar"
> hive-metastore-2.3.1.jar
curl -i
"https://repo1.maven.org/maven2/org/apache/hive/hive-serde/2.3.1/hive-serde-2.3.1.jar"
> hive-serde-2.3.1.jar
curl -i
"https://repo1.maven.org/maven2/org/apache/hive/hive-service/2.3.1/hive-service-2.3.1.jar"
> hive-service-2.3.1.jar
curl -i
"https://repo1.maven.org/maven2/org/apache/hive/hive-service-rpc/2.3.1/hive-service-rpc-2.3.1.jar"
> hive-service-rpc-2.3.1.jar
curl -i
"https://repo1.maven.org/maven2/org/apache/hive/shims/hive-shims-0.23/2.3.1/hive-shims-0.23-2.3.1.jar"
> hive-shims-0.23-2.3.1.jar
curl -i
"https://repo1.maven.org/maven2/org/apache/hive/shims/hive-shims-common/2.3.1/hive-shims-common-2.3.1.jar"
> hive-shims-common-2.3.1.jar
curl -i
"https://repo1.maven.org/maven2/org/apache/hive/hive-storage-api/2.3.1/hive-storage-api-2.3.1.jar"
> hive-storage-api-2.3.1.jar
curl -i
"https://repo1.maven.org/maven2/org/apache/hive/hive-shims/2.3.1/hive-shims-2.3.1.jar"
> hive-shims-2.3.1.jar
curl -i
"[https://repo1.maven.org/maven2/org/json/json/20090211/json-20090211.jar]" >
[json-20090211.jar|https://repo1.maven.org/maven2/org/json/json/20090211/json-20090211.jar]
docker cp hive-common-2.3.1.jar adhoc-2:/opt/spark/jars/
docker cp hive-jdbc-2.3.1.jar adhoc-2:/opt/spark/jars/
docker cp hive-metastore-2.3.1.jar adhoc-2:/opt/spark/jars/
docker cp hive-service-2.3.1.jar adhoc-2:/opt/spark/jars/
docker cp hive-shims-0.23-2.3.1.jar adhoc-2:/opt/spark/jars/
docker cp hive-shims-common-2.3.1.jar hive-shims-common-2.3.1.jar
docker cp hive-shims-common-2.3.1.jar adhoc-2:/opt/spark/jars/
docker cp hive-exec-2.3.1-core.jar adhoc-2:/opt/spark/jars/
docker cp hive-llap-common-2.3.1.jar adhoc-2:/opt/spark/jars/
docker cp hive-serde-2.3.1.jar adhoc-2:/opt/spark/jars/
docker cp hive-service-rpc-2.3.1.jar adhoc-2:/opt/spark/jars/
docker cp hive-shims-2.3.1.jar adhoc-2:/opt/spark/jars/
docker cp hive-storage-api-2.3.1.jar adhoc-2:/opt/spark/jars/
docker cp json-20090211.jar adhoc-2:/opt/spark/jars/
docker cp
packaging/hudi-integ-test-bundle/target/hudi-integ-test-bundle-0.6.0-rc.jar
adhoc-2:/opt
docker cp
packaging/hudi-hive-sync-bundle/target/hudi-hive-sync-bundle-0.6.0-rc.jar
adhoc-2:/opt/
docker cp docker/demo/config/test-suite/complex-dag-cow.yaml
adhoc-2:/var/hoodie/ws/docker/demo/config/test-suite/
docker exec -it adhoc-2 /bin/bash
hdfs dfs -mkdir -p /var/hoodie/ws/docker/demo/config/test-suite/
hdfs dfs -copyFromLocal /var/hoodie/ws/docker/demo/config/test-suite/*
/var/hoodie/ws/docker/demo/config/test-suite/
spark-submit --jars /opt/hudi-hive-sync-bundle-0.6.1-SNAPSHOT.jar --packages
org.apache.spark:spark-avro_2.11:2.4.0 --conf spark.task.cpus=1 --conf
spark.executor.cores=1 --conf spark.task.maxFailures=100 --conf
spark.memory.fraction=0.4 --conf spark.rdd.compress=true --conf
spark.kryoserializer.buffer.max=2000m --conf
spark.serializer=org.apache.spark.serializer.KryoSerializer --conf
spark.memory.storageFraction=0.1 --conf spark.shuffle.service.enabled=true
--conf spark.sql.hive.convertMetastoreParquet=false --conf spark.ui.port=5555
--conf spark.driver.maxResultSize=12g --conf
spark.executor.heartbeatInterval=120s --conf spark.network.timeout=600s --conf
spark.eventLog.overwrite=true --conf spark.eventLog.enabled=true --conf
spark.yarn.max.executor.failures=10 --conf spark.sql.catalogImplementation=hive
--conf spark.sql.shuffle.partitions=1000 --conf
spark.driver.extraClassPath=hive-common-2.3.1.jar:hive-exec-2.3.1-core.jar:hive-jdbc-2.3.1.jar:hive-llap-common-2.3.1.jar:hive-metastore-2.3.1.jar:hive-serde-2.3.1.jar:hive-service-2.3.1.jar:hive-service-rpc-2.3.1.jar:hive-shims-0.23-2.3.1.jar:hive-shims-common-2.3.1.jar:hive-storage-api-2.3.1.jar:hive-shims-2.3.1.jar:spark-hive-thriftserver_2.12-3.0.0-preview2.jar:json-20090211.jar
--conf
spark.executor.extraClassPath=hive-common-2.3.1.jar:hive-exec-2.3.1-core.jar:hive-jdbc-2.3.1.jar:hive-llap-common-2.3.1.jar:hive-metastore-2.3.1.jar:hive-serde-2.3.1.jar:hive-service-2.3.1.jar:hive-service-rpc-2.3.1.jar:hive-shims-0.23-2.3.1.jar:hive-shims-common-2.3.1.jar:hive-storage-api-2.3.1.jar:hive-shims-2.3.1.jar:spark-hive-thriftserver_2.12-3.0.0-preview2.jar:json-20090211.jar
--class org.apache.hudi.integ.testsuite.HoodieTestSuiteJob
/opt/hudi-integ-test-bundle-0.6.1-SNAPSHOT.jar --source-ordering-field
timestamp --target-base-path /user/hive/warehouse/hudi-integ-test-suite/output
--input-base-path /user/hive/warehouse/hudi-integ-test-suite/input
--target-table test_table --props
/var/hoodie/ws/docker/demo/config/test-suite/test-source.properties
--schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider
--source-limit 300000000 --source-class
org.apache.hudi.utilities.sources.AvroDFSSource --input-file-size 125829120
--workload-yaml-path
/var/hoodie/ws/docker/demo/config/test-suite/complex-dag-cow-2.yaml
--workload-generator-classname
org.apache.hudi.integ.testsuite.dag.WorkflowDagGenerator --table-type
COPY_ON_WRITE --compact-scheduling-minshare 1 --hoodie-conf
"hoodie.deltastreamer.source.test.num_partitions=100" --hoodie-conf
"hoodie.deltastreamer.source.test.datagen.use_rocksdb_for_storing_existing_keys=false"
--hoodie-conf "hoodie.deltastreamer.source.test.max_unique_records=100000000"
--hoodie-conf "hoodie.embed.timeline.server=false" --hoodie-conf
"hoodie.datasource.write.recordkey.field=_row_key" --hoodie-conf
"hoodie.deltastreamer.source.dfs.root=/user/hive/warehouse/hudi-integ-test-suite/input"
--hoodie-conf
"hoodie.datasource.write.keygenerator.class=org.apache.hudi.keygen.ComplexKeyGenerator"
--hoodie-conf "hoodie.datasource.write.partitionpath.field=timestamp"
--hoodie-conf
"hoodie.deltastreamer.schemaprovider.source.schema.file=/var/hoodie/ws/docker/demo/config/test-suite/source.avsc"
--hoodie-conf "hoodie.datasource.hive_sync.assume_date_partitioning=false"
--hoodie-conf
"hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000/"
--hoodie-conf "hoodie.datasource.hive_sync.database=testdb" --hoodie-conf
"hoodie.datasource.hive_sync.table=test_table" --hoodie-conf
"hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.NonPartitionedExtractor"
--hoodie-conf "hoodie.datasource.hive_sync.assume_date_partitioning=true"
--hoodie-conf
"hoodie.datasource.write.keytranslator.class=org.apache.hudi.DayBasedPartitionPathKeyTranslator"
--hoodie-conf
"hoodie.deltastreamer.schemaprovider.target.schema.file=/var/hoodie/ws/docker/demo/config/test-suite/source.avsc"
> NoClassDefFoundError with AbstractSyncTool while running HoodieTestSuiteJob
> ---------------------------------------------------------------------------
>
> Key: HUDI-1204
> URL: https://issues.apache.org/jira/browse/HUDI-1204
> Project: Apache Hudi
> Issue Type: Bug
> Components: Testing
> Affects Versions: 0.6.1
> Reporter: sivabalan narayanan
> Assignee: Nishith Agarwal
> Priority: Major
>
> I was trying to run HoodieTestSuiteJob in my local docker set up and ran into
> dep issue.
>
> spark-submit --master local --class
> org.apache.hudi.integ.testsuite.HoodieTestSuiteJob --packages
> com.databricks:spark-avro_2.11:4.0.0
> /opt/hudi-integ-test-bundle-0.6.0-rc1.jar --source-ordering-field timestamp
> --target-base-path /user/hive/warehouse/hudi-test-suite/output
> --input-base-path /user/hive/warehouse/hudi-test-suite/input
> --target-table test_table --props [file:///opt/test-source.properties]
> --schemaprovider-class
> org.apache.hudi.utilities.schema.FilebasedSchemaProvider --source-class
> org.apache.hudi.utilities.sources.AvroDFSSource --input-file-size 12582912
> --workload-yaml-path
> /var/hoodie/ws/docker/demo/config/test-suite/complex-dag-cow.yaml
> --table-type COPY_ON_WRITE --workload-generator-classname yaml
>
> {code:java}
> 20/08/19 21:42:26 WARN NativeCodeLoader: Unable to load native-hadoop library
> for your platform... using builtin-java classes where applicable
> Exception in thread "main" java.lang.NoClassDefFoundError:
> org/apache/hudi/sync/common/AbstractSyncTool
> at java.lang.ClassLoader.defineClass1(Native Method)
> at java.lang.ClassLoader.defineClass(ClassLoader.java:763)
> at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
> at java.net.URLClassLoader.defineClass(URLClassLoader.java:468)
> at java.net.URLClassLoader.access$100(URLClassLoader.java:74)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:369)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:363)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:362)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at
> org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer$Config.<init>(HoodieDeltaStreamer.java:279)
> at
> org.apache.hudi.integ.testsuite.HoodieTestSuiteJob$HoodieTestSuiteConfig.<init>(HoodieTestSuiteJob.java:153)
> at
> org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.main(HoodieTestSuiteJob.java:114)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:498)
> at
> org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
> at
> org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:845)
> at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:161)
> at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184)
> at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
> at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:920)
> at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:929)
> at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
> Caused by: java.lang.ClassNotFoundException:
> org.apache.hudi.sync.common.AbstractSyncTool
> at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> ... 26 more
> {code}
> I tried adding hudi-sync-common as dep to hudi-utilities, but didn't fix the
> issue.
>
--
This message was sent by Atlassian Jira
(v8.3.4#803005)