[ https://issues.apache.org/jira/browse/HUDI-1204?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17181065#comment-17181065 ]
Nishith Agarwal commented on HUDI-1204: --------------------------------------- Perform the following changes to run the test suite in docker diff --git a/packaging/hudi-integ-test-bundle/pom.xml b/packaging/hudi-integ-test-bundle/pom.xml index 1c53447..adb1447 100644 --- a/packaging/hudi-integ-test-bundle/pom.xml +++ b/packaging/hudi-integ-test-bundle/pom.xml @@ -74,9 +74,34 @@ <include>org.apache.hudi:hudi-utilities_${scala.binary.version}</include> <include>org.apache.hudi:hudi-spark_${scala.binary.version}</include> <include>org.apache.hudi:hudi-hive-sync</include> + <include>org.apache.hudi:hudi-sync-common</include> <include>org.apache.hudi:hudi-hadoop-mr</include> <include>org.apache.hudi:hudi-timeline-service</include> <include>org.apache.hudi:hudi-integ-test</include> + <include>org.jetbrains.kotlin:kotlin-stdlib-jdk8</include> + <include>org.jetbrains.kotlin:kotlin-stdlib</include> + <include>org.jetbrains.kotlin:kotlin-stdlib-common</include> + <include>org.jetbrains:annotations</include> + <include>org.jetbrains.kotlin:kotlin-stdlib-jdk7</include> + + <include>org.eclipse.jetty:jetty-server</include> + <include>org.eclipse.jetty:jetty-http</include> + <include>org.eclipse.jetty:jetty-util</include> + <include>org.eclipse.jetty:jetty-io</include> + <include>org.eclipse.jetty:jetty-webapp</include> + <include>org.eclipse.jetty:jetty-xml</include> + <include>org.eclipse.jetty:jetty-servlet</include> + <include>org.eclipse.jetty:jetty-security</include> + <include>org.eclipse.jetty.websocket:websocket-server</include> + <include>org.eclipse.jetty.websocket:websocket-common</include> + <include>org.eclipse.jetty.websocket:websocket-api</include> + <include>org.eclipse.jetty.websocket:websocket-client</include> + <include>org.eclipse.jetty:jetty-client</include> + <include>org.eclipse.jetty.websocket:websocket-servlet</include> + <include>org.mortbay.jetty:jetty</include> + <include>org.mortbay.jetty:jetty-util</include> + + <include>org.rocksdb:rocksdbjni</include> <include>com.beust:jcommander</include> <include>com.twitter:bijection-avro_${scala.binary.version}</include> <include>com.twitter:bijection-core_${scala.binary.version}</include> @@ -89,6 +114,7 @@ <include>io.confluent:kafka-schema-registry-client</include> <include>io.dropwizard.metrics:metrics-core</include> <include>io.dropwizard.metrics:metrics-graphite</include> + <include>io.javalin:javalin</include> <include>org.apache.spark:spark-streaming-kafka-0-10_${scala.binary.version}</include> <include>org.apache.kafka:kafka_${scala.binary.version}</include> <include>com.101tec:zkclient</include> @@ -245,7 +271,7 @@ <dependency> <groupId>io.javalin</groupId> <artifactId>javalin</artifactId> - <version>2.4.0</version> + <version>2.8.0</version> </dependency> <dependency> @@ -276,7 +302,7 @@ <version>${project.version}</version> <classifier>tests</classifier> <type>test-jar</type> - <scope>test</scope> + <scope>compile</scope> </dependency> <dependency> @@ -323,6 +349,14 @@ <artifactId>hive-exec</artifactId> <version>${hive.version}</version> <classifier>${hive.exec.classifier}</classifier> + <scope>compile</scope> + </dependency> + + <dependency> + <groupId>${hive.groupid}</groupId> + <artifactId>hive-metastore</artifactId> + <version>${hive.version}</version> + <scope>provided</scope> </dependency> <dependency> @@ -387,6 +421,7 @@ <groupId>org.apache.hudi</groupId> <artifactId>hudi-utilities_${scala.binary.version}</artifactId> <version>${project.version}</version> + <scope>compile</scope> </dependency> diff --git a/docker/demo/config/test-suite/test-source.properties b/docker/demo/config/test-suite/test-source.properties index 397f871..b34cb89 100644 --- a/docker/demo/config/test-suite/test-source.properties +++ b/docker/demo/config/test-suite/test-source.properties @@ -17,11 +17,11 @@ hoodie.datasource.write.recordkey.field=_row_key hoodie.deltastreamer.source.dfs.root=/user/hive/warehouse/hudi-bench/input hoodie.datasource.write.keygenerator.class=org.apache.hudi.ComplexKeyGenerator hoodie.datasource.write.partitionpath.field=timestamp -hoodie.deltastreamer.schemaprovider.source.schema.file=/var/hoodie/ws/docker/demo/config/bench/source.avsc +hoodie.deltastreamer.schemaprovider.source.schema.file=/var/hoodie/ws/docker/demo/config/test-suite/source.avsc hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000/ hoodie.datasource.hive_sync.database=testdb -hoodie.datasource.hive_sync.table=test_table +hoodie.datasource.hive_sync.table=table1 hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.NonPartitionedExtractor hoodie.datasource.hive_sync.assume_date_partitioning=true hoodie.datasource.write.keytranslator.class=org.apache.hudi.DayBasedPartitionPathKeyTranslator -hoodie.deltastreamer.schemaprovider.target.schema.file=/var/hoodie/ws/docker/demo/config/bench/source.avsc \ No newline at end of file +hoodie.deltastreamer.schemaprovider.target.schema.file=/var/hoodie/ws/docker/demo/config/test-suite/source.avsc diff --git a/docker/demo/config/test-suite/complex-dag-cow.yaml b/docker/demo/config/test-suite/complex-dag-cow.yaml index 5a97688..9854ee3 100644 --- a/docker/demo/config/test-suite/complex-dag-cow.yaml +++ b/docker/demo/config/test-suite/complex-dag-cow.yaml @@ -17,7 +17,7 @@ first_insert: config: record_size: 70000 num_insert_partitions: 1 - repeat_count: 5 + repeat_count: 1 num_records_insert: 1000 type: InsertNode deps: none @@ -25,7 +25,7 @@ second_insert: config: record_size: 70000 num_insert_partitions: 1 - repeat_count: 5 + repeat_count: 1 num_records_insert: 10000 deps: first_insert type: InsertNode @@ -46,7 +46,7 @@ first_upsert: record_size: 70000 num_insert_partitions: 1 num_records_insert: 300 - repeat_count: 5 + repeat_count: 1 num_records_upsert: 100 num_upsert_partitions: 10 type: UpsertNode @@ -65,9 +65,9 @@ first_hive_query: prop3: "set hive.strict.checks.large.query=false" prop4: "set hive.stats.autogather=false" hive_queries: - query1: "select count(*) from testdb1.table1 group by `_row_key` having count(*) > 1" + query1: "select count(*) from testdb.table1 group by `_row_key` having count(*) > 1" result1: 0 - query2: "select count(*) from testdb1.table1" + query2: "select count(*) from testdb.table1" result2: 22100000 type: HiveQueryNode deps: first_hive_sync @@ -76,7 +76,7 @@ second_upsert: record_size: 70000 num_insert_partitions: 1 num_records_insert: 300 - repeat_count: 5 + repeat_count: 1 num_records_upsert: 100 num_upsert_partitions: 10 type: UpsertNode @@ -89,9 +89,9 @@ second_hive_query: prop3: "set hive.strict.checks.large.query=false" prop4: "set hive.stats.autogather=false" hive_queries: - query1: "select count(*) from testdb1.table1 group by `_row_key` having count(*) > 1" + query1: "select count(*) from testdb.table1 group by `_row_key` having count(*) > 1" result1: 0 - query2: "select count(*) from testdb1.table1" + query2: "select count(*) from testdb.table1" result2: 22100 type: HiveQueryNode deps: second_upsert curl -i "https://repo1.maven.org/maven2/org/apache/hive/hive-common/2.3.1/hive-common-2.3.1.jar" > hive-common-2.3.1.jar curl -i "https://repo1.maven.org/maven2/org/apache/hive/hive-exec/2.3.1/hive-exec-2.3.1-core.jar" > hive-exec-2.3.1-core.jar curl -i "https://repo1.maven.org/maven2/org/apache/hive/hive-jdbc/2.3.1/hive-jdbc-2.3.1.jar" > hive-jdbc-2.3.1.jar curl -i "https://repo1.maven.org/maven2/org/apache/hive/hive-llap-common/2.3.1/hive-llap-common-2.3.1.jar" > hive-llap-common-2.3.1.jar curl -i "https://repo1.maven.org/maven2/org/apache/hive/hive-metastore/2.3.1/hive-metastore-2.3.1.jar" > hive-metastore-2.3.1.jar curl -i "https://repo1.maven.org/maven2/org/apache/hive/hive-serde/2.3.1/hive-serde-2.3.1.jar" > hive-serde-2.3.1.jar curl -i "https://repo1.maven.org/maven2/org/apache/hive/hive-service/2.3.1/hive-service-2.3.1.jar" > hive-service-2.3.1.jar curl -i "https://repo1.maven.org/maven2/org/apache/hive/hive-service-rpc/2.3.1/hive-service-rpc-2.3.1.jar" > hive-service-rpc-2.3.1.jar curl -i "https://repo1.maven.org/maven2/org/apache/hive/shims/hive-shims-0.23/2.3.1/hive-shims-0.23-2.3.1.jar" > hive-shims-0.23-2.3.1.jar curl -i "https://repo1.maven.org/maven2/org/apache/hive/shims/hive-shims-common/2.3.1/hive-shims-common-2.3.1.jar" > hive-shims-common-2.3.1.jar curl -i "https://repo1.maven.org/maven2/org/apache/hive/hive-storage-api/2.3.1/hive-storage-api-2.3.1.jar" > hive-storage-api-2.3.1.jar curl -i "https://repo1.maven.org/maven2/org/apache/hive/hive-shims/2.3.1/hive-shims-2.3.1.jar" > hive-shims-2.3.1.jar curl -i "[https://repo1.maven.org/maven2/org/json/json/20090211/json-20090211.jar]" > [json-20090211.jar|https://repo1.maven.org/maven2/org/json/json/20090211/json-20090211.jar] docker cp hive-common-2.3.1.jar adhoc-2:/opt/spark/jars/ docker cp hive-jdbc-2.3.1.jar adhoc-2:/opt/spark/jars/ docker cp hive-metastore-2.3.1.jar adhoc-2:/opt/spark/jars/ docker cp hive-service-2.3.1.jar adhoc-2:/opt/spark/jars/ docker cp hive-shims-0.23-2.3.1.jar adhoc-2:/opt/spark/jars/ docker cp hive-shims-common-2.3.1.jar hive-shims-common-2.3.1.jar docker cp hive-shims-common-2.3.1.jar adhoc-2:/opt/spark/jars/ docker cp hive-exec-2.3.1-core.jar adhoc-2:/opt/spark/jars/ docker cp hive-llap-common-2.3.1.jar adhoc-2:/opt/spark/jars/ docker cp hive-serde-2.3.1.jar adhoc-2:/opt/spark/jars/ docker cp hive-service-rpc-2.3.1.jar adhoc-2:/opt/spark/jars/ docker cp hive-shims-2.3.1.jar adhoc-2:/opt/spark/jars/ docker cp hive-storage-api-2.3.1.jar adhoc-2:/opt/spark/jars/ docker cp json-20090211.jar adhoc-2:/opt/spark/jars/ docker cp packaging/hudi-integ-test-bundle/target/hudi-integ-test-bundle-0.6.0-rc.jar adhoc-2:/opt docker cp packaging/hudi-hive-sync-bundle/target/hudi-hive-sync-bundle-0.6.0-rc.jar adhoc-2:/opt/ docker cp docker/demo/config/test-suite/complex-dag-cow.yaml adhoc-2:/var/hoodie/ws/docker/demo/config/test-suite/ docker exec -it adhoc-2 /bin/bash hdfs dfs -mkdir -p /var/hoodie/ws/docker/demo/config/test-suite/ hdfs dfs -copyFromLocal /var/hoodie/ws/docker/demo/config/test-suite/* /var/hoodie/ws/docker/demo/config/test-suite/ spark-submit --jars /opt/hudi-hive-sync-bundle-0.6.1-SNAPSHOT.jar --packages org.apache.spark:spark-avro_2.11:2.4.0 --conf spark.task.cpus=1 --conf spark.executor.cores=1 --conf spark.task.maxFailures=100 --conf spark.memory.fraction=0.4 --conf spark.rdd.compress=true --conf spark.kryoserializer.buffer.max=2000m --conf spark.serializer=org.apache.spark.serializer.KryoSerializer --conf spark.memory.storageFraction=0.1 --conf spark.shuffle.service.enabled=true --conf spark.sql.hive.convertMetastoreParquet=false --conf spark.ui.port=5555 --conf spark.driver.maxResultSize=12g --conf spark.executor.heartbeatInterval=120s --conf spark.network.timeout=600s --conf spark.eventLog.overwrite=true --conf spark.eventLog.enabled=true --conf spark.yarn.max.executor.failures=10 --conf spark.sql.catalogImplementation=hive --conf spark.sql.shuffle.partitions=1000 --conf spark.driver.extraClassPath=hive-common-2.3.1.jar:hive-exec-2.3.1-core.jar:hive-jdbc-2.3.1.jar:hive-llap-common-2.3.1.jar:hive-metastore-2.3.1.jar:hive-serde-2.3.1.jar:hive-service-2.3.1.jar:hive-service-rpc-2.3.1.jar:hive-shims-0.23-2.3.1.jar:hive-shims-common-2.3.1.jar:hive-storage-api-2.3.1.jar:hive-shims-2.3.1.jar:spark-hive-thriftserver_2.12-3.0.0-preview2.jar:json-20090211.jar --conf spark.executor.extraClassPath=hive-common-2.3.1.jar:hive-exec-2.3.1-core.jar:hive-jdbc-2.3.1.jar:hive-llap-common-2.3.1.jar:hive-metastore-2.3.1.jar:hive-serde-2.3.1.jar:hive-service-2.3.1.jar:hive-service-rpc-2.3.1.jar:hive-shims-0.23-2.3.1.jar:hive-shims-common-2.3.1.jar:hive-storage-api-2.3.1.jar:hive-shims-2.3.1.jar:spark-hive-thriftserver_2.12-3.0.0-preview2.jar:json-20090211.jar --class org.apache.hudi.integ.testsuite.HoodieTestSuiteJob /opt/hudi-integ-test-bundle-0.6.1-SNAPSHOT.jar --source-ordering-field timestamp --target-base-path /user/hive/warehouse/hudi-integ-test-suite/output --input-base-path /user/hive/warehouse/hudi-integ-test-suite/input --target-table test_table --props /var/hoodie/ws/docker/demo/config/test-suite/test-source.properties --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider --source-limit 300000000 --source-class org.apache.hudi.utilities.sources.AvroDFSSource --input-file-size 125829120 --workload-yaml-path /var/hoodie/ws/docker/demo/config/test-suite/complex-dag-cow-2.yaml --workload-generator-classname org.apache.hudi.integ.testsuite.dag.WorkflowDagGenerator --table-type COPY_ON_WRITE --compact-scheduling-minshare 1 --hoodie-conf "hoodie.deltastreamer.source.test.num_partitions=100" --hoodie-conf "hoodie.deltastreamer.source.test.datagen.use_rocksdb_for_storing_existing_keys=false" --hoodie-conf "hoodie.deltastreamer.source.test.max_unique_records=100000000" --hoodie-conf "hoodie.embed.timeline.server=false" --hoodie-conf "hoodie.datasource.write.recordkey.field=_row_key" --hoodie-conf "hoodie.deltastreamer.source.dfs.root=/user/hive/warehouse/hudi-integ-test-suite/input" --hoodie-conf "hoodie.datasource.write.keygenerator.class=org.apache.hudi.keygen.ComplexKeyGenerator" --hoodie-conf "hoodie.datasource.write.partitionpath.field=timestamp" --hoodie-conf "hoodie.deltastreamer.schemaprovider.source.schema.file=/var/hoodie/ws/docker/demo/config/test-suite/source.avsc" --hoodie-conf "hoodie.datasource.hive_sync.assume_date_partitioning=false" --hoodie-conf "hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000/" --hoodie-conf "hoodie.datasource.hive_sync.database=testdb" --hoodie-conf "hoodie.datasource.hive_sync.table=test_table" --hoodie-conf "hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.NonPartitionedExtractor" --hoodie-conf "hoodie.datasource.hive_sync.assume_date_partitioning=true" --hoodie-conf "hoodie.datasource.write.keytranslator.class=org.apache.hudi.DayBasedPartitionPathKeyTranslator" --hoodie-conf "hoodie.deltastreamer.schemaprovider.target.schema.file=/var/hoodie/ws/docker/demo/config/test-suite/source.avsc" > NoClassDefFoundError with AbstractSyncTool while running HoodieTestSuiteJob > --------------------------------------------------------------------------- > > Key: HUDI-1204 > URL: https://issues.apache.org/jira/browse/HUDI-1204 > Project: Apache Hudi > Issue Type: Bug > Components: Testing > Affects Versions: 0.6.1 > Reporter: sivabalan narayanan > Assignee: Nishith Agarwal > Priority: Major > > I was trying to run HoodieTestSuiteJob in my local docker set up and ran into > dep issue. > > spark-submit --master local --class > org.apache.hudi.integ.testsuite.HoodieTestSuiteJob --packages > com.databricks:spark-avro_2.11:4.0.0 > /opt/hudi-integ-test-bundle-0.6.0-rc1.jar --source-ordering-field timestamp > --target-base-path /user/hive/warehouse/hudi-test-suite/output > --input-base-path /user/hive/warehouse/hudi-test-suite/input > --target-table test_table --props [file:///opt/test-source.properties] > --schemaprovider-class > org.apache.hudi.utilities.schema.FilebasedSchemaProvider --source-class > org.apache.hudi.utilities.sources.AvroDFSSource --input-file-size 12582912 > --workload-yaml-path > /var/hoodie/ws/docker/demo/config/test-suite/complex-dag-cow.yaml > --table-type COPY_ON_WRITE --workload-generator-classname yaml > > {code:java} > 20/08/19 21:42:26 WARN NativeCodeLoader: Unable to load native-hadoop library > for your platform... using builtin-java classes where applicable > Exception in thread "main" java.lang.NoClassDefFoundError: > org/apache/hudi/sync/common/AbstractSyncTool > at java.lang.ClassLoader.defineClass1(Native Method) > at java.lang.ClassLoader.defineClass(ClassLoader.java:763) > at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142) > at java.net.URLClassLoader.defineClass(URLClassLoader.java:468) > at java.net.URLClassLoader.access$100(URLClassLoader.java:74) > at java.net.URLClassLoader$1.run(URLClassLoader.java:369) > at java.net.URLClassLoader$1.run(URLClassLoader.java:363) > at java.security.AccessController.doPrivileged(Native Method) > at java.net.URLClassLoader.findClass(URLClassLoader.java:362) > at java.lang.ClassLoader.loadClass(ClassLoader.java:424) > at java.lang.ClassLoader.loadClass(ClassLoader.java:357) > at > org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer$Config.<init>(HoodieDeltaStreamer.java:279) > at > org.apache.hudi.integ.testsuite.HoodieTestSuiteJob$HoodieTestSuiteConfig.<init>(HoodieTestSuiteJob.java:153) > at > org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.main(HoodieTestSuiteJob.java:114) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) > at > org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:845) > at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:161) > at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184) > at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86) > at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:920) > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:929) > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > Caused by: java.lang.ClassNotFoundException: > org.apache.hudi.sync.common.AbstractSyncTool > at java.net.URLClassLoader.findClass(URLClassLoader.java:382) > at java.lang.ClassLoader.loadClass(ClassLoader.java:424) > at java.lang.ClassLoader.loadClass(ClassLoader.java:357) > ... 26 more > {code} > I tried adding hudi-sync-common as dep to hudi-utilities, but didn't fix the > issue. > -- This message was sent by Atlassian Jira (v8.3.4#803005)