[ 
https://issues.apache.org/jira/browse/HUDI-1204?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17181065#comment-17181065
 ] 

Nishith Agarwal commented on HUDI-1204:
---------------------------------------

Perform the following changes to run the test suite in docker

diff --git a/packaging/hudi-integ-test-bundle/pom.xml 
b/packaging/hudi-integ-test-bundle/pom.xml
index 1c53447..adb1447 100644
--- a/packaging/hudi-integ-test-bundle/pom.xml
+++ b/packaging/hudi-integ-test-bundle/pom.xml
@@ -74,9 +74,34 @@
 <include>org.apache.hudi:hudi-utilities_${scala.binary.version}</include>
 <include>org.apache.hudi:hudi-spark_${scala.binary.version}</include>
 <include>org.apache.hudi:hudi-hive-sync</include>
+ <include>org.apache.hudi:hudi-sync-common</include>
 <include>org.apache.hudi:hudi-hadoop-mr</include>
 <include>org.apache.hudi:hudi-timeline-service</include>
 <include>org.apache.hudi:hudi-integ-test</include>
+ <include>org.jetbrains.kotlin:kotlin-stdlib-jdk8</include>
+ <include>org.jetbrains.kotlin:kotlin-stdlib</include>
+ <include>org.jetbrains.kotlin:kotlin-stdlib-common</include>
+ <include>org.jetbrains:annotations</include>
+ <include>org.jetbrains.kotlin:kotlin-stdlib-jdk7</include>
+
+ <include>org.eclipse.jetty:jetty-server</include>
+ <include>org.eclipse.jetty:jetty-http</include>
+ <include>org.eclipse.jetty:jetty-util</include>
+ <include>org.eclipse.jetty:jetty-io</include>
+ <include>org.eclipse.jetty:jetty-webapp</include>
+ <include>org.eclipse.jetty:jetty-xml</include>
+ <include>org.eclipse.jetty:jetty-servlet</include>
+ <include>org.eclipse.jetty:jetty-security</include>
+ <include>org.eclipse.jetty.websocket:websocket-server</include>
+ <include>org.eclipse.jetty.websocket:websocket-common</include>
+ <include>org.eclipse.jetty.websocket:websocket-api</include>
+ <include>org.eclipse.jetty.websocket:websocket-client</include>
+ <include>org.eclipse.jetty:jetty-client</include>
+ <include>org.eclipse.jetty.websocket:websocket-servlet</include>
+ <include>org.mortbay.jetty:jetty</include>
+ <include>org.mortbay.jetty:jetty-util</include>
+
+ <include>org.rocksdb:rocksdbjni</include>
 <include>com.beust:jcommander</include>
 <include>com.twitter:bijection-avro_${scala.binary.version}</include>
 <include>com.twitter:bijection-core_${scala.binary.version}</include>
@@ -89,6 +114,7 @@
 <include>io.confluent:kafka-schema-registry-client</include>
 <include>io.dropwizard.metrics:metrics-core</include>
 <include>io.dropwizard.metrics:metrics-graphite</include>
+ <include>io.javalin:javalin</include>
 
<include>org.apache.spark:spark-streaming-kafka-0-10_${scala.binary.version}</include>
 <include>org.apache.kafka:kafka_${scala.binary.version}</include>
 <include>com.101tec:zkclient</include>
@@ -245,7 +271,7 @@
 <dependency>
 <groupId>io.javalin</groupId>
 <artifactId>javalin</artifactId>
- <version>2.4.0</version>
+ <version>2.8.0</version>
 </dependency>

<dependency>
@@ -276,7 +302,7 @@
 <version>${project.version}</version>
 <classifier>tests</classifier>
 <type>test-jar</type>
- <scope>test</scope>
+ <scope>compile</scope>
 </dependency>

<dependency>
@@ -323,6 +349,14 @@
 <artifactId>hive-exec</artifactId>
 <version>${hive.version}</version>
 <classifier>${hive.exec.classifier}</classifier>
+ <scope>compile</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>${hive.groupid}</groupId>
+ <artifactId>hive-metastore</artifactId>
+ <version>${hive.version}</version>
+ <scope>provided</scope>
 </dependency>

<dependency>
@@ -387,6 +421,7 @@
 <groupId>org.apache.hudi</groupId>
 <artifactId>hudi-utilities_${scala.binary.version}</artifactId>
 <version>${project.version}</version>
+ <scope>compile</scope>
 </dependency>

 

diff --git a/docker/demo/config/test-suite/test-source.properties 
b/docker/demo/config/test-suite/test-source.properties
index 397f871..b34cb89 100644
--- a/docker/demo/config/test-suite/test-source.properties
+++ b/docker/demo/config/test-suite/test-source.properties
@@ -17,11 +17,11 @@ hoodie.datasource.write.recordkey.field=_row_key
 hoodie.deltastreamer.source.dfs.root=/user/hive/warehouse/hudi-bench/input
 hoodie.datasource.write.keygenerator.class=org.apache.hudi.ComplexKeyGenerator
 hoodie.datasource.write.partitionpath.field=timestamp
-hoodie.deltastreamer.schemaprovider.source.schema.file=/var/hoodie/ws/docker/demo/config/bench/source.avsc
+hoodie.deltastreamer.schemaprovider.source.schema.file=/var/hoodie/ws/docker/demo/config/test-suite/source.avsc
 hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000/
 hoodie.datasource.hive_sync.database=testdb
-hoodie.datasource.hive_sync.table=test_table
+hoodie.datasource.hive_sync.table=table1
 
hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.NonPartitionedExtractor
 hoodie.datasource.hive_sync.assume_date_partitioning=true
 
hoodie.datasource.write.keytranslator.class=org.apache.hudi.DayBasedPartitionPathKeyTranslator
-hoodie.deltastreamer.schemaprovider.target.schema.file=/var/hoodie/ws/docker/demo/config/bench/source.avsc
\ No newline at end of file
+hoodie.deltastreamer.schemaprovider.target.schema.file=/var/hoodie/ws/docker/demo/config/test-suite/source.avsc

 

diff --git a/docker/demo/config/test-suite/complex-dag-cow.yaml 
b/docker/demo/config/test-suite/complex-dag-cow.yaml
index 5a97688..9854ee3 100644
--- a/docker/demo/config/test-suite/complex-dag-cow.yaml
+++ b/docker/demo/config/test-suite/complex-dag-cow.yaml
@@ -17,7 +17,7 @@ first_insert:
 config:
 record_size: 70000
 num_insert_partitions: 1
- repeat_count: 5
+ repeat_count: 1
 num_records_insert: 1000
 type: InsertNode
 deps: none
@@ -25,7 +25,7 @@ second_insert:
 config:
 record_size: 70000
 num_insert_partitions: 1
- repeat_count: 5
+ repeat_count: 1
 num_records_insert: 10000
 deps: first_insert
 type: InsertNode
@@ -46,7 +46,7 @@ first_upsert:
 record_size: 70000
 num_insert_partitions: 1
 num_records_insert: 300
- repeat_count: 5
+ repeat_count: 1
 num_records_upsert: 100
 num_upsert_partitions: 10
 type: UpsertNode
@@ -65,9 +65,9 @@ first_hive_query:
 prop3: "set hive.strict.checks.large.query=false"
 prop4: "set hive.stats.autogather=false"
 hive_queries:
- query1: "select count(*) from testdb1.table1 group by `_row_key` having 
count(*) > 1"
+ query1: "select count(*) from testdb.table1 group by `_row_key` having 
count(*) > 1"
 result1: 0
- query2: "select count(*) from testdb1.table1"
+ query2: "select count(*) from testdb.table1"
 result2: 22100000
 type: HiveQueryNode
 deps: first_hive_sync
@@ -76,7 +76,7 @@ second_upsert:
 record_size: 70000
 num_insert_partitions: 1
 num_records_insert: 300
- repeat_count: 5
+ repeat_count: 1
 num_records_upsert: 100
 num_upsert_partitions: 10
 type: UpsertNode
@@ -89,9 +89,9 @@ second_hive_query:
 prop3: "set hive.strict.checks.large.query=false"
 prop4: "set hive.stats.autogather=false"
 hive_queries:
- query1: "select count(*) from testdb1.table1 group by `_row_key` having 
count(*) > 1"
+ query1: "select count(*) from testdb.table1 group by `_row_key` having 
count(*) > 1"
 result1: 0
- query2: "select count(*) from testdb1.table1"
+ query2: "select count(*) from testdb.table1"
 result2: 22100
 type: HiveQueryNode
 deps: second_upsert

 

curl -i 
"https://repo1.maven.org/maven2/org/apache/hive/hive-common/2.3.1/hive-common-2.3.1.jar";
 > hive-common-2.3.1.jar
curl -i 
"https://repo1.maven.org/maven2/org/apache/hive/hive-exec/2.3.1/hive-exec-2.3.1-core.jar";
 > hive-exec-2.3.1-core.jar
curl -i 
"https://repo1.maven.org/maven2/org/apache/hive/hive-jdbc/2.3.1/hive-jdbc-2.3.1.jar";
 > hive-jdbc-2.3.1.jar
curl -i 
"https://repo1.maven.org/maven2/org/apache/hive/hive-llap-common/2.3.1/hive-llap-common-2.3.1.jar";
 > hive-llap-common-2.3.1.jar
curl -i 
"https://repo1.maven.org/maven2/org/apache/hive/hive-metastore/2.3.1/hive-metastore-2.3.1.jar";
 > hive-metastore-2.3.1.jar
curl -i 
"https://repo1.maven.org/maven2/org/apache/hive/hive-serde/2.3.1/hive-serde-2.3.1.jar";
 > hive-serde-2.3.1.jar
curl -i 
"https://repo1.maven.org/maven2/org/apache/hive/hive-service/2.3.1/hive-service-2.3.1.jar";
 > hive-service-2.3.1.jar
curl -i 
"https://repo1.maven.org/maven2/org/apache/hive/hive-service-rpc/2.3.1/hive-service-rpc-2.3.1.jar";
 > hive-service-rpc-2.3.1.jar
curl -i 
"https://repo1.maven.org/maven2/org/apache/hive/shims/hive-shims-0.23/2.3.1/hive-shims-0.23-2.3.1.jar";
 > hive-shims-0.23-2.3.1.jar
curl -i 
"https://repo1.maven.org/maven2/org/apache/hive/shims/hive-shims-common/2.3.1/hive-shims-common-2.3.1.jar";
 > hive-shims-common-2.3.1.jar
curl -i 
"https://repo1.maven.org/maven2/org/apache/hive/hive-storage-api/2.3.1/hive-storage-api-2.3.1.jar";
 > hive-storage-api-2.3.1.jar
curl -i 
"https://repo1.maven.org/maven2/org/apache/hive/hive-shims/2.3.1/hive-shims-2.3.1.jar";
 > hive-shims-2.3.1.jar

curl -i 
"[https://repo1.maven.org/maven2/org/json/json/20090211/json-20090211.jar]"; > 
[json-20090211.jar|https://repo1.maven.org/maven2/org/json/json/20090211/json-20090211.jar]

 

docker cp hive-common-2.3.1.jar adhoc-2:/opt/spark/jars/
docker cp hive-jdbc-2.3.1.jar adhoc-2:/opt/spark/jars/
docker cp hive-metastore-2.3.1.jar adhoc-2:/opt/spark/jars/
docker cp hive-service-2.3.1.jar adhoc-2:/opt/spark/jars/
docker cp hive-shims-0.23-2.3.1.jar adhoc-2:/opt/spark/jars/
docker cp hive-shims-common-2.3.1.jar hive-shims-common-2.3.1.jar
docker cp hive-shims-common-2.3.1.jar adhoc-2:/opt/spark/jars/
docker cp hive-exec-2.3.1-core.jar adhoc-2:/opt/spark/jars/
docker cp hive-llap-common-2.3.1.jar adhoc-2:/opt/spark/jars/
docker cp hive-serde-2.3.1.jar adhoc-2:/opt/spark/jars/
docker cp hive-service-rpc-2.3.1.jar adhoc-2:/opt/spark/jars/
docker cp hive-shims-2.3.1.jar adhoc-2:/opt/spark/jars/
docker cp hive-storage-api-2.3.1.jar adhoc-2:/opt/spark/jars/
docker cp json-20090211.jar adhoc-2:/opt/spark/jars/

 

docker cp 
packaging/hudi-integ-test-bundle/target/hudi-integ-test-bundle-0.6.0-rc.jar 
adhoc-2:/opt

docker cp 
packaging/hudi-hive-sync-bundle/target/hudi-hive-sync-bundle-0.6.0-rc.jar 
adhoc-2:/opt/

docker cp docker/demo/config/test-suite/complex-dag-cow.yaml 
adhoc-2:/var/hoodie/ws/docker/demo/config/test-suite/

 

docker exec -it adhoc-2 /bin/bash

hdfs dfs -mkdir -p /var/hoodie/ws/docker/demo/config/test-suite/

hdfs dfs -copyFromLocal /var/hoodie/ws/docker/demo/config/test-suite/* 
/var/hoodie/ws/docker/demo/config/test-suite/

 

spark-submit --jars /opt/hudi-hive-sync-bundle-0.6.1-SNAPSHOT.jar --packages 
org.apache.spark:spark-avro_2.11:2.4.0 --conf spark.task.cpus=1 --conf 
spark.executor.cores=1 --conf spark.task.maxFailures=100 --conf 
spark.memory.fraction=0.4 --conf spark.rdd.compress=true --conf 
spark.kryoserializer.buffer.max=2000m --conf 
spark.serializer=org.apache.spark.serializer.KryoSerializer --conf 
spark.memory.storageFraction=0.1 --conf spark.shuffle.service.enabled=true 
--conf spark.sql.hive.convertMetastoreParquet=false --conf spark.ui.port=5555 
--conf spark.driver.maxResultSize=12g --conf 
spark.executor.heartbeatInterval=120s --conf spark.network.timeout=600s --conf 
spark.eventLog.overwrite=true --conf spark.eventLog.enabled=true --conf 
spark.yarn.max.executor.failures=10 --conf spark.sql.catalogImplementation=hive 
--conf spark.sql.shuffle.partitions=1000 --conf 
spark.driver.extraClassPath=hive-common-2.3.1.jar:hive-exec-2.3.1-core.jar:hive-jdbc-2.3.1.jar:hive-llap-common-2.3.1.jar:hive-metastore-2.3.1.jar:hive-serde-2.3.1.jar:hive-service-2.3.1.jar:hive-service-rpc-2.3.1.jar:hive-shims-0.23-2.3.1.jar:hive-shims-common-2.3.1.jar:hive-storage-api-2.3.1.jar:hive-shims-2.3.1.jar:spark-hive-thriftserver_2.12-3.0.0-preview2.jar:json-20090211.jar
 --conf 
spark.executor.extraClassPath=hive-common-2.3.1.jar:hive-exec-2.3.1-core.jar:hive-jdbc-2.3.1.jar:hive-llap-common-2.3.1.jar:hive-metastore-2.3.1.jar:hive-serde-2.3.1.jar:hive-service-2.3.1.jar:hive-service-rpc-2.3.1.jar:hive-shims-0.23-2.3.1.jar:hive-shims-common-2.3.1.jar:hive-storage-api-2.3.1.jar:hive-shims-2.3.1.jar:spark-hive-thriftserver_2.12-3.0.0-preview2.jar:json-20090211.jar
 --class org.apache.hudi.integ.testsuite.HoodieTestSuiteJob 
/opt/hudi-integ-test-bundle-0.6.1-SNAPSHOT.jar --source-ordering-field 
timestamp --target-base-path /user/hive/warehouse/hudi-integ-test-suite/output 
--input-base-path /user/hive/warehouse/hudi-integ-test-suite/input 
--target-table test_table --props 
/var/hoodie/ws/docker/demo/config/test-suite/test-source.properties 
--schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider 
--source-limit 300000000 --source-class 
org.apache.hudi.utilities.sources.AvroDFSSource --input-file-size 125829120 
--workload-yaml-path 
/var/hoodie/ws/docker/demo/config/test-suite/complex-dag-cow-2.yaml 
--workload-generator-classname 
org.apache.hudi.integ.testsuite.dag.WorkflowDagGenerator --table-type 
COPY_ON_WRITE --compact-scheduling-minshare 1 --hoodie-conf 
"hoodie.deltastreamer.source.test.num_partitions=100" --hoodie-conf 
"hoodie.deltastreamer.source.test.datagen.use_rocksdb_for_storing_existing_keys=false"
 --hoodie-conf "hoodie.deltastreamer.source.test.max_unique_records=100000000" 
--hoodie-conf "hoodie.embed.timeline.server=false" --hoodie-conf 
"hoodie.datasource.write.recordkey.field=_row_key" --hoodie-conf 
"hoodie.deltastreamer.source.dfs.root=/user/hive/warehouse/hudi-integ-test-suite/input"
 --hoodie-conf 
"hoodie.datasource.write.keygenerator.class=org.apache.hudi.keygen.ComplexKeyGenerator"
 --hoodie-conf "hoodie.datasource.write.partitionpath.field=timestamp" 
--hoodie-conf 
"hoodie.deltastreamer.schemaprovider.source.schema.file=/var/hoodie/ws/docker/demo/config/test-suite/source.avsc"
 --hoodie-conf "hoodie.datasource.hive_sync.assume_date_partitioning=false" 
--hoodie-conf 
"hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000/" 
--hoodie-conf "hoodie.datasource.hive_sync.database=testdb" --hoodie-conf 
"hoodie.datasource.hive_sync.table=test_table" --hoodie-conf 
"hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.NonPartitionedExtractor"
 --hoodie-conf "hoodie.datasource.hive_sync.assume_date_partitioning=true" 
--hoodie-conf 
"hoodie.datasource.write.keytranslator.class=org.apache.hudi.DayBasedPartitionPathKeyTranslator"
 --hoodie-conf 
"hoodie.deltastreamer.schemaprovider.target.schema.file=/var/hoodie/ws/docker/demo/config/test-suite/source.avsc"

 

> NoClassDefFoundError with AbstractSyncTool while running HoodieTestSuiteJob
> ---------------------------------------------------------------------------
>
>                 Key: HUDI-1204
>                 URL: https://issues.apache.org/jira/browse/HUDI-1204
>             Project: Apache Hudi
>          Issue Type: Bug
>          Components: Testing
>    Affects Versions: 0.6.1
>            Reporter: sivabalan narayanan
>            Assignee: Nishith Agarwal
>            Priority: Major
>
> I was trying to run HoodieTestSuiteJob in my local docker set up and ran into 
> dep issue.
>  
> spark-submit --master local --class 
> org.apache.hudi.integ.testsuite.HoodieTestSuiteJob --packages 
> com.databricks:spark-avro_2.11:4.0.0 
> /opt/hudi-integ-test-bundle-0.6.0-rc1.jar  --source-ordering-field timestamp  
>   --target-base-path /user/hive/warehouse/hudi-test-suite/output    
> --input-base-path /user/hive/warehouse/hudi-test-suite/input    
> --target-table test_table    --props [file:///opt/test-source.properties]    
> --schemaprovider-class 
> org.apache.hudi.utilities.schema.FilebasedSchemaProvider    --source-class 
> org.apache.hudi.utilities.sources.AvroDFSSource    --input-file-size 12582912 
>  --workload-yaml-path 
> /var/hoodie/ws/docker/demo/config/test-suite/complex-dag-cow.yaml 
> --table-type COPY_ON_WRITE    --workload-generator-classname yaml
>  
> {code:java}
> 20/08/19 21:42:26 WARN NativeCodeLoader: Unable to load native-hadoop library 
> for your platform... using builtin-java classes where applicable
> Exception in thread "main" java.lang.NoClassDefFoundError: 
> org/apache/hudi/sync/common/AbstractSyncTool
> at java.lang.ClassLoader.defineClass1(Native Method)
> at java.lang.ClassLoader.defineClass(ClassLoader.java:763)
> at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
> at java.net.URLClassLoader.defineClass(URLClassLoader.java:468)
> at java.net.URLClassLoader.access$100(URLClassLoader.java:74)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:369)
> at java.net.URLClassLoader$1.run(URLClassLoader.java:363)
> at java.security.AccessController.doPrivileged(Native Method)
> at java.net.URLClassLoader.findClass(URLClassLoader.java:362)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at 
> org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer$Config.<init>(HoodieDeltaStreamer.java:279)
> at 
> org.apache.hudi.integ.testsuite.HoodieTestSuiteJob$HoodieTestSuiteConfig.<init>(HoodieTestSuiteJob.java:153)
> at 
> org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.main(HoodieTestSuiteJob.java:114)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at 
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:498)
> at 
> org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
> at 
> org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:845)
> at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:161)
> at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184)
> at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
> at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:920)
> at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:929)
> at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
> Caused by: java.lang.ClassNotFoundException: 
> org.apache.hudi.sync.common.AbstractSyncTool
> at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> ... 26 more
>  {code}
> I tried adding hudi-sync-common as dep to hudi-utilities, but didn't fix the 
> issue. 
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to