This is an automated email from the ASF dual-hosted git repository. sivabalan pushed a commit to branch release-0.5.3 in repository https://gitbox.apache.org/repos/asf/hudi.git
commit b69bb18fdd3d5ab825d1f59024b10204890968ff Author: Balaji Varadarajan <[email protected]> AuthorDate: Wed May 20 05:29:43 2020 -0700 [HUDI-846][HUDI-848] Enable Incremental cleaning and embedded timeline-server by default (#1634) --- .../apache/hudi/config/HoodieCompactionConfig.java | 2 +- .../org/apache/hudi/config/HoodieWriteConfig.java | 2 +- .../hudi/table/compact/TestHoodieCompactor.java | 10 ++++- hudi-hive/pom.xml | 6 --- .../org/apache/hudi/hive/util/HiveTestService.java | 1 + hudi-spark/pom.xml | 44 +++++++++++++++++++++- hudi-utilities/pom.xml | 7 +--- pom.xml | 9 +---- 8 files changed, 57 insertions(+), 24 deletions(-) diff --git a/hudi-client/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java b/hudi-client/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java index 074ea78..5fa2b16 100644 --- a/hudi-client/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java +++ b/hudi-client/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java @@ -95,7 +95,7 @@ public class HoodieCompactionConfig extends DefaultHoodieConfig { private static final String DEFAULT_CLEANER_POLICY = HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name(); private static final String DEFAULT_AUTO_CLEAN = "true"; private static final String DEFAULT_INLINE_COMPACT = "false"; - private static final String DEFAULT_INCREMENTAL_CLEANER = "false"; + private static final String DEFAULT_INCREMENTAL_CLEANER = "true"; private static final String DEFAULT_INLINE_COMPACT_NUM_DELTA_COMMITS = "1"; private static final String DEFAULT_CLEANER_FILE_VERSIONS_RETAINED = "3"; private static final String DEFAULT_CLEANER_COMMITS_RETAINED = "10"; diff --git a/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java b/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java index 24984db..47c109a 100644 --- a/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java +++ b/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java @@ -79,7 +79,7 @@ public class HoodieWriteConfig extends DefaultHoodieConfig { private static final String DEFAULT_FINALIZE_WRITE_PARALLELISM = DEFAULT_PARALLELISM; private static final String EMBEDDED_TIMELINE_SERVER_ENABLED = "hoodie.embed.timeline.server"; - private static final String DEFAULT_EMBEDDED_TIMELINE_SERVER_ENABLED = "false"; + private static final String DEFAULT_EMBEDDED_TIMELINE_SERVER_ENABLED = "true"; private static final String FAIL_ON_TIMELINE_ARCHIVING_ENABLED_PROP = "hoodie.fail.on.timeline.archiving"; private static final String DEFAULT_FAIL_ON_TIMELINE_ARCHIVING_ENABLED = "true"; diff --git a/hudi-client/src/test/java/org/apache/hudi/table/compact/TestHoodieCompactor.java b/hudi-client/src/test/java/org/apache/hudi/table/compact/TestHoodieCompactor.java index 482d721..8fa55ec 100644 --- a/hudi-client/src/test/java/org/apache/hudi/table/compact/TestHoodieCompactor.java +++ b/hudi-client/src/test/java/org/apache/hudi/table/compact/TestHoodieCompactor.java @@ -27,8 +27,12 @@ import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.common.model.HoodieTestUtils; import org.apache.hudi.common.table.HoodieTableMetaClient; +import org.apache.hudi.common.table.HoodieTimeline; import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; import org.apache.hudi.common.util.FSUtils; +import org.apache.hudi.common.table.timeline.HoodieInstant; +import org.apache.hudi.common.table.timeline.HoodieInstant.State; +import org.apache.hudi.common.util.Option; import org.apache.hudi.config.HoodieCompactionConfig; import org.apache.hudi.config.HoodieIndexConfig; import org.apache.hudi.config.HoodieMemoryConfig; @@ -148,9 +152,13 @@ public class TestHoodieCompactor extends HoodieClientTestHarness { HoodieIndex index = new HoodieBloomIndex<>(config); updatedRecords = index.tagLocation(updatedRecordsRDD, jsc, table).collect(); - // Write them to corresponding avro logfiles + // Write them to corresponding avro logfiles. Also, set the state transition properly. HoodieTestUtils.writeRecordsToLogFiles(fs, metaClient.getBasePath(), HoodieTestDataGenerator.AVRO_SCHEMA_WITH_METADATA_FIELDS, updatedRecords); + metaClient.getActiveTimeline().transitionRequestedToInflight(new HoodieInstant(State.REQUESTED, + HoodieTimeline.DELTA_COMMIT_ACTION, newCommitTime), Option.empty()); + writeClient.commit(newCommitTime, jsc.emptyRDD(), Option.empty()); + metaClient.reloadActiveTimeline(); // Verify that all data file has one log file metaClient = HoodieTableMetaClient.reload(metaClient); diff --git a/hudi-hive/pom.xml b/hudi-hive/pom.xml index 01e6ff1..5ad8708 100644 --- a/hudi-hive/pom.xml +++ b/hudi-hive/pom.xml @@ -153,12 +153,6 @@ <scope>test</scope> </dependency> - <dependency> - <groupId>org.eclipse.jetty.aggregate</groupId> - <artifactId>jetty-all</artifactId> - <scope>test</scope> - </dependency> - <!-- Hadoop - Test --> <dependency> <groupId>org.apache.hadoop</groupId> diff --git a/hudi-hive/src/test/java/org/apache/hudi/hive/util/HiveTestService.java b/hudi-hive/src/test/java/org/apache/hudi/hive/util/HiveTestService.java index 0cef82b..ee5b09a 100644 --- a/hudi-hive/src/test/java/org/apache/hudi/hive/util/HiveTestService.java +++ b/hudi-hive/src/test/java/org/apache/hudi/hive/util/HiveTestService.java @@ -105,6 +105,7 @@ public class HiveTestService { executorService = Executors.newSingleThreadExecutor(); tServer = startMetaStore(bindIP, metastorePort, serverConf); + serverConf.set("hive.in.test", "true"); hiveServer = startHiveServer(serverConf); String serverHostname; diff --git a/hudi-spark/pom.xml b/hudi-spark/pom.xml index 501cd8f..6b90a69 100644 --- a/hudi-spark/pom.xml +++ b/hudi-spark/pom.xml @@ -220,6 +220,12 @@ <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_${scala.binary.version}</artifactId> + <exclusions> + <exclusion> + <groupId>javax.servlet</groupId> + <artifactId>*</artifactId> + </exclusion> + </exclusions> </dependency> <dependency> <groupId>org.apache.spark</groupId> @@ -248,7 +254,17 @@ <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> - <scope>provided</scope> + <exclusions> + <exclusion> + <groupId>javax.servlet</groupId> + <artifactId>*</artifactId> + </exclusion> + <exclusion> + <groupId>javax.servlet.jsp</groupId> + <artifactId>*</artifactId> + </exclusion> + </exclusions> + <scope>provided</scope> </dependency> <!-- Hive --> @@ -272,16 +288,42 @@ <groupId>${hive.groupid}</groupId> <artifactId>hive-jdbc</artifactId> <version>${hive.version}</version> + <exclusions> + <exclusion> + <groupId>javax.servlet</groupId> + <artifactId>*</artifactId> + </exclusion> + <exclusion> + <groupId>javax.servlet.jsp</groupId> + <artifactId>*</artifactId> + </exclusion> + </exclusions> </dependency> <dependency> <groupId>${hive.groupid}</groupId> <artifactId>hive-metastore</artifactId> <version>${hive.version}</version> + <exclusions> + <exclusion> + <groupId>javax.servlet</groupId> + <artifactId>*</artifactId> + </exclusion> + <exclusion> + <groupId>javax.servlet.jsp</groupId> + <artifactId>*</artifactId> + </exclusion> + </exclusions> </dependency> <dependency> <groupId>${hive.groupid}</groupId> <artifactId>hive-common</artifactId> <version>${hive.version}</version> + <exclusions> + <exclusion> + <groupId>org.eclipse.jetty.orbit</groupId> + <artifactId>javax.servlet</artifactId> + </exclusion> + </exclusions> </dependency> <!-- Hoodie - Test --> diff --git a/hudi-utilities/pom.xml b/hudi-utilities/pom.xml index b8b71ac..6211311 100644 --- a/hudi-utilities/pom.xml +++ b/hudi-utilities/pom.xml @@ -82,13 +82,8 @@ <!-- Needs to be at the top to ensure we get the correct dependency versions for jetty-server --> <groupId>org.eclipse.jetty.aggregate</groupId> <artifactId>jetty-all</artifactId> - <scope>test</scope> - </dependency> - - <dependency> - <groupId>org.eclipse.jetty</groupId> - <artifactId>jetty-server</artifactId> <version>${jetty.version}</version> + <classifier>uber</classifier> <scope>test</scope> </dependency> diff --git a/pom.xml b/pom.xml index b49c653..8603024 100644 --- a/pom.xml +++ b/pom.xml @@ -101,7 +101,7 @@ <scalatest.version>3.0.1</scalatest.version> <surefire-log4j.file>file://${project.basedir}/src/test/resources/log4j-surefire.properties</surefire-log4j.file> <thrift.version>0.12.0</thrift.version> - <jetty.version>7.6.0.v20120127</jetty.version> + <jetty.version>9.4.15.v20190215</jetty.version> <hbase.version>1.2.3</hbase.version> <codehaus-jackson.version>1.9.13</codehaus-jackson.version> <h2.version>1.4.199</h2.version> @@ -456,13 +456,6 @@ <artifactId>jersey-container-servlet-core</artifactId> <version>${glassfish.version}</version> </dependency> - <!-- Needed for running HiveServer for Tests --> - <dependency> - <groupId>org.eclipse.jetty.aggregate</groupId> - <artifactId>jetty-all</artifactId> - <scope>test</scope> - <version>${jetty.version}</version> - </dependency> <!-- Avro --> <dependency>
