This is an automated email from the ASF dual-hosted git repository. vinoth pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-hudi.git
The following commit(s) were added to refs/heads/master by this push: new 74ecc27 [HUDI-846][HUDI-848] Enable Incremental cleaning and embedded timeline-server by default (#1634) 74ecc27 is described below commit 74ecc27e920c70fa4598d8e5a696954203a5b127 Author: Balaji Varadarajan <varad...@uber.com> AuthorDate: Wed May 20 05:29:43 2020 -0700 [HUDI-846][HUDI-848] Enable Incremental cleaning and embedded timeline-server by default (#1634) --- .../apache/hudi/config/HoodieCompactionConfig.java | 2 +- .../org/apache/hudi/config/HoodieWriteConfig.java | 2 +- .../table/action/compact/TestHoodieCompactor.java | 9 ++++- hudi-hive-sync/pom.xml | 6 --- .../hudi/hive/testutils/HiveTestService.java | 1 + hudi-spark/pom.xml | 44 +++++++++++++++++++++- hudi-utilities/pom.xml | 7 +--- pom.xml | 9 +---- 8 files changed, 56 insertions(+), 24 deletions(-) diff --git a/hudi-client/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java b/hudi-client/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java index bb087a2..d135a81 100644 --- a/hudi-client/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java +++ b/hudi-client/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java @@ -96,7 +96,7 @@ public class HoodieCompactionConfig extends DefaultHoodieConfig { private static final String DEFAULT_CLEANER_POLICY = HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name(); private static final String DEFAULT_AUTO_CLEAN = "true"; private static final String DEFAULT_INLINE_COMPACT = "false"; - private static final String DEFAULT_INCREMENTAL_CLEANER = "false"; + private static final String DEFAULT_INCREMENTAL_CLEANER = "true"; private static final String DEFAULT_INLINE_COMPACT_NUM_DELTA_COMMITS = "1"; private static final String DEFAULT_CLEANER_FILE_VERSIONS_RETAINED = "3"; private static final String DEFAULT_CLEANER_COMMITS_RETAINED = "10"; diff --git a/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java b/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java index 11931c1..3f0f619 100644 --- a/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java +++ b/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java @@ -82,7 +82,7 @@ public class HoodieWriteConfig extends DefaultHoodieConfig { private static final String DEFAULT_FINALIZE_WRITE_PARALLELISM = DEFAULT_PARALLELISM; private static final String EMBEDDED_TIMELINE_SERVER_ENABLED = "hoodie.embed.timeline.server"; - private static final String DEFAULT_EMBEDDED_TIMELINE_SERVER_ENABLED = "false"; + private static final String DEFAULT_EMBEDDED_TIMELINE_SERVER_ENABLED = "true"; private static final String FAIL_ON_TIMELINE_ARCHIVING_ENABLED_PROP = "hoodie.fail.on.timeline.archiving"; private static final String DEFAULT_FAIL_ON_TIMELINE_ARCHIVING_ENABLED = "true"; diff --git a/hudi-client/src/test/java/org/apache/hudi/table/action/compact/TestHoodieCompactor.java b/hudi-client/src/test/java/org/apache/hudi/table/action/compact/TestHoodieCompactor.java index 0ebebed..9aec8ad 100644 --- a/hudi-client/src/test/java/org/apache/hudi/table/action/compact/TestHoodieCompactor.java +++ b/hudi-client/src/test/java/org/apache/hudi/table/action/compact/TestHoodieCompactor.java @@ -30,6 +30,9 @@ import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.common.model.HoodieTestUtils; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; +import org.apache.hudi.common.table.timeline.HoodieInstant; +import org.apache.hudi.common.table.timeline.HoodieInstant.State; +import org.apache.hudi.common.table.timeline.HoodieTimeline; import org.apache.hudi.common.util.Option; import org.apache.hudi.config.HoodieCompactionConfig; import org.apache.hudi.config.HoodieIndexConfig; @@ -152,9 +155,13 @@ public class TestHoodieCompactor extends HoodieClientTestHarness { HoodieIndex index = new HoodieBloomIndex<>(config); updatedRecords = index.tagLocation(updatedRecordsRDD, jsc, table).collect(); - // Write them to corresponding avro logfiles + // Write them to corresponding avro logfiles. Also, set the state transition properly. HoodieTestUtils.writeRecordsToLogFiles(fs, metaClient.getBasePath(), HoodieTestDataGenerator.AVRO_SCHEMA_WITH_METADATA_FIELDS, updatedRecords); + metaClient.getActiveTimeline().transitionRequestedToInflight(new HoodieInstant(State.REQUESTED, + HoodieTimeline.DELTA_COMMIT_ACTION, newCommitTime), Option.empty()); + writeClient.commit(newCommitTime, jsc.emptyRDD(), Option.empty()); + metaClient.reloadActiveTimeline(); // Verify that all data file has one log file table = HoodieTable.create(config, hadoopConf); diff --git a/hudi-hive-sync/pom.xml b/hudi-hive-sync/pom.xml index 973cb3e..9d4b8e2 100644 --- a/hudi-hive-sync/pom.xml +++ b/hudi-hive-sync/pom.xml @@ -171,12 +171,6 @@ <scope>test</scope> </dependency> - <dependency> - <groupId>org.eclipse.jetty.aggregate</groupId> - <artifactId>jetty-all</artifactId> - <scope>test</scope> - </dependency> - <!-- Hadoop - Test --> <dependency> <groupId>org.apache.hadoop</groupId> diff --git a/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestService.java b/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestService.java index 47d9882..e3a70a8 100644 --- a/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestService.java +++ b/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestService.java @@ -105,6 +105,7 @@ public class HiveTestService { executorService = Executors.newSingleThreadExecutor(); tServer = startMetaStore(bindIP, metastorePort, serverConf); + serverConf.set("hive.in.test", "true"); hiveServer = startHiveServer(serverConf); String serverHostname; diff --git a/hudi-spark/pom.xml b/hudi-spark/pom.xml index 9bd07c0..6be663a 100644 --- a/hudi-spark/pom.xml +++ b/hudi-spark/pom.xml @@ -225,6 +225,12 @@ <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_${scala.binary.version}</artifactId> + <exclusions> + <exclusion> + <groupId>javax.servlet</groupId> + <artifactId>*</artifactId> + </exclusion> + </exclusions> </dependency> <dependency> <groupId>org.apache.spark</groupId> @@ -253,7 +259,17 @@ <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> - <scope>provided</scope> + <exclusions> + <exclusion> + <groupId>javax.servlet</groupId> + <artifactId>*</artifactId> + </exclusion> + <exclusion> + <groupId>javax.servlet.jsp</groupId> + <artifactId>*</artifactId> + </exclusion> + </exclusions> + <scope>provided</scope> </dependency> <!-- Hive --> @@ -277,16 +293,42 @@ <groupId>${hive.groupid}</groupId> <artifactId>hive-jdbc</artifactId> <version>${hive.version}</version> + <exclusions> + <exclusion> + <groupId>javax.servlet</groupId> + <artifactId>*</artifactId> + </exclusion> + <exclusion> + <groupId>javax.servlet.jsp</groupId> + <artifactId>*</artifactId> + </exclusion> + </exclusions> </dependency> <dependency> <groupId>${hive.groupid}</groupId> <artifactId>hive-metastore</artifactId> <version>${hive.version}</version> + <exclusions> + <exclusion> + <groupId>javax.servlet</groupId> + <artifactId>*</artifactId> + </exclusion> + <exclusion> + <groupId>javax.servlet.jsp</groupId> + <artifactId>*</artifactId> + </exclusion> + </exclusions> </dependency> <dependency> <groupId>${hive.groupid}</groupId> <artifactId>hive-common</artifactId> <version>${hive.version}</version> + <exclusions> + <exclusion> + <groupId>org.eclipse.jetty.orbit</groupId> + <artifactId>javax.servlet</artifactId> + </exclusion> + </exclusions> </dependency> <!-- Hoodie - Test --> diff --git a/hudi-utilities/pom.xml b/hudi-utilities/pom.xml index 0b79414..360a3d4 100644 --- a/hudi-utilities/pom.xml +++ b/hudi-utilities/pom.xml @@ -86,13 +86,8 @@ <!-- Needs to be at the top to ensure we get the correct dependency versions for jetty-server --> <groupId>org.eclipse.jetty.aggregate</groupId> <artifactId>jetty-all</artifactId> - <scope>test</scope> - </dependency> - - <dependency> - <groupId>org.eclipse.jetty</groupId> - <artifactId>jetty-server</artifactId> <version>${jetty.version}</version> + <classifier>uber</classifier> <scope>test</scope> </dependency> diff --git a/pom.xml b/pom.xml index 9fe27d7..9bcb898 100644 --- a/pom.xml +++ b/pom.xml @@ -102,7 +102,7 @@ <scalatest.version>3.0.1</scalatest.version> <surefire-log4j.file>file://${project.basedir}/src/test/resources/log4j-surefire.properties</surefire-log4j.file> <thrift.version>0.12.0</thrift.version> - <jetty.version>7.6.0.v20120127</jetty.version> + <jetty.version>9.4.15.v20190215</jetty.version> <hbase.version>1.2.3</hbase.version> <codehaus-jackson.version>1.9.13</codehaus-jackson.version> <h2.version>1.4.199</h2.version> @@ -463,13 +463,6 @@ <artifactId>jersey-container-servlet-core</artifactId> <version>${glassfish.version}</version> </dependency> - <!-- Needed for running HiveServer for Tests --> - <dependency> - <groupId>org.eclipse.jetty.aggregate</groupId> - <artifactId>jetty-all</artifactId> - <scope>test</scope> - <version>${jetty.version}</version> - </dependency> <!-- Avro --> <dependency>