This is an automated email from the ASF dual-hosted git repository.
vinoth pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 74ecc27 [HUDI-846][HUDI-848] Enable Incremental cleaning and embedded
timeline-server by default (#1634)
74ecc27 is described below
commit 74ecc27e920c70fa4598d8e5a696954203a5b127
Author: Balaji Varadarajan <[email protected]>
AuthorDate: Wed May 20 05:29:43 2020 -0700
[HUDI-846][HUDI-848] Enable Incremental cleaning and embedded
timeline-server by default (#1634)
---
.../apache/hudi/config/HoodieCompactionConfig.java | 2 +-
.../org/apache/hudi/config/HoodieWriteConfig.java | 2 +-
.../table/action/compact/TestHoodieCompactor.java | 9 ++++-
hudi-hive-sync/pom.xml | 6 ---
.../hudi/hive/testutils/HiveTestService.java | 1 +
hudi-spark/pom.xml | 44 +++++++++++++++++++++-
hudi-utilities/pom.xml | 7 +---
pom.xml | 9 +----
8 files changed, 56 insertions(+), 24 deletions(-)
diff --git
a/hudi-client/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java
b/hudi-client/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java
index bb087a2..d135a81 100644
---
a/hudi-client/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java
+++
b/hudi-client/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java
@@ -96,7 +96,7 @@ public class HoodieCompactionConfig extends
DefaultHoodieConfig {
private static final String DEFAULT_CLEANER_POLICY =
HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name();
private static final String DEFAULT_AUTO_CLEAN = "true";
private static final String DEFAULT_INLINE_COMPACT = "false";
- private static final String DEFAULT_INCREMENTAL_CLEANER = "false";
+ private static final String DEFAULT_INCREMENTAL_CLEANER = "true";
private static final String DEFAULT_INLINE_COMPACT_NUM_DELTA_COMMITS = "1";
private static final String DEFAULT_CLEANER_FILE_VERSIONS_RETAINED = "3";
private static final String DEFAULT_CLEANER_COMMITS_RETAINED = "10";
diff --git
a/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
b/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
index 11931c1..3f0f619 100644
--- a/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
+++ b/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
@@ -82,7 +82,7 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
private static final String DEFAULT_FINALIZE_WRITE_PARALLELISM =
DEFAULT_PARALLELISM;
private static final String EMBEDDED_TIMELINE_SERVER_ENABLED =
"hoodie.embed.timeline.server";
- private static final String DEFAULT_EMBEDDED_TIMELINE_SERVER_ENABLED =
"false";
+ private static final String DEFAULT_EMBEDDED_TIMELINE_SERVER_ENABLED =
"true";
private static final String FAIL_ON_TIMELINE_ARCHIVING_ENABLED_PROP =
"hoodie.fail.on.timeline.archiving";
private static final String DEFAULT_FAIL_ON_TIMELINE_ARCHIVING_ENABLED =
"true";
diff --git
a/hudi-client/src/test/java/org/apache/hudi/table/action/compact/TestHoodieCompactor.java
b/hudi-client/src/test/java/org/apache/hudi/table/action/compact/TestHoodieCompactor.java
index 0ebebed..9aec8ad 100644
---
a/hudi-client/src/test/java/org/apache/hudi/table/action/compact/TestHoodieCompactor.java
+++
b/hudi-client/src/test/java/org/apache/hudi/table/action/compact/TestHoodieCompactor.java
@@ -30,6 +30,9 @@ import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.model.HoodieTestUtils;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.table.timeline.HoodieInstant.State;
+import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieCompactionConfig;
import org.apache.hudi.config.HoodieIndexConfig;
@@ -152,9 +155,13 @@ public class TestHoodieCompactor extends
HoodieClientTestHarness {
HoodieIndex index = new HoodieBloomIndex<>(config);
updatedRecords = index.tagLocation(updatedRecordsRDD, jsc,
table).collect();
- // Write them to corresponding avro logfiles
+ // Write them to corresponding avro logfiles. Also, set the state
transition properly.
HoodieTestUtils.writeRecordsToLogFiles(fs, metaClient.getBasePath(),
HoodieTestDataGenerator.AVRO_SCHEMA_WITH_METADATA_FIELDS,
updatedRecords);
+ metaClient.getActiveTimeline().transitionRequestedToInflight(new
HoodieInstant(State.REQUESTED,
+ HoodieTimeline.DELTA_COMMIT_ACTION, newCommitTime), Option.empty());
+ writeClient.commit(newCommitTime, jsc.emptyRDD(), Option.empty());
+ metaClient.reloadActiveTimeline();
// Verify that all data file has one log file
table = HoodieTable.create(config, hadoopConf);
diff --git a/hudi-hive-sync/pom.xml b/hudi-hive-sync/pom.xml
index 973cb3e..9d4b8e2 100644
--- a/hudi-hive-sync/pom.xml
+++ b/hudi-hive-sync/pom.xml
@@ -171,12 +171,6 @@
<scope>test</scope>
</dependency>
- <dependency>
- <groupId>org.eclipse.jetty.aggregate</groupId>
- <artifactId>jetty-all</artifactId>
- <scope>test</scope>
- </dependency>
-
<!-- Hadoop - Test -->
<dependency>
<groupId>org.apache.hadoop</groupId>
diff --git
a/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestService.java
b/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestService.java
index 47d9882..e3a70a8 100644
---
a/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestService.java
+++
b/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestService.java
@@ -105,6 +105,7 @@ public class HiveTestService {
executorService = Executors.newSingleThreadExecutor();
tServer = startMetaStore(bindIP, metastorePort, serverConf);
+ serverConf.set("hive.in.test", "true");
hiveServer = startHiveServer(serverConf);
String serverHostname;
diff --git a/hudi-spark/pom.xml b/hudi-spark/pom.xml
index 9bd07c0..6be663a 100644
--- a/hudi-spark/pom.xml
+++ b/hudi-spark/pom.xml
@@ -225,6 +225,12 @@
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
+ <exclusions>
+ <exclusion>
+ <groupId>javax.servlet</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
@@ -253,7 +259,17 @@
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
- <scope>provided</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>javax.servlet</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet.jsp</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ </exclusions>
+ <scope>provided</scope>
</dependency>
<!-- Hive -->
@@ -277,16 +293,42 @@
<groupId>${hive.groupid}</groupId>
<artifactId>hive-jdbc</artifactId>
<version>${hive.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>javax.servlet</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet.jsp</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>${hive.groupid}</groupId>
<artifactId>hive-metastore</artifactId>
<version>${hive.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>javax.servlet</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet.jsp</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>${hive.groupid}</groupId>
<artifactId>hive-common</artifactId>
<version>${hive.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.eclipse.jetty.orbit</groupId>
+ <artifactId>javax.servlet</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<!-- Hoodie - Test -->
diff --git a/hudi-utilities/pom.xml b/hudi-utilities/pom.xml
index 0b79414..360a3d4 100644
--- a/hudi-utilities/pom.xml
+++ b/hudi-utilities/pom.xml
@@ -86,13 +86,8 @@
<!-- Needs to be at the top to ensure we get the correct dependency
versions for jetty-server -->
<groupId>org.eclipse.jetty.aggregate</groupId>
<artifactId>jetty-all</artifactId>
- <scope>test</scope>
- </dependency>
-
- <dependency>
- <groupId>org.eclipse.jetty</groupId>
- <artifactId>jetty-server</artifactId>
<version>${jetty.version}</version>
+ <classifier>uber</classifier>
<scope>test</scope>
</dependency>
diff --git a/pom.xml b/pom.xml
index 9fe27d7..9bcb898 100644
--- a/pom.xml
+++ b/pom.xml
@@ -102,7 +102,7 @@
<scalatest.version>3.0.1</scalatest.version>
<surefire-log4j.file>file://${project.basedir}/src/test/resources/log4j-surefire.properties</surefire-log4j.file>
<thrift.version>0.12.0</thrift.version>
- <jetty.version>7.6.0.v20120127</jetty.version>
+ <jetty.version>9.4.15.v20190215</jetty.version>
<hbase.version>1.2.3</hbase.version>
<codehaus-jackson.version>1.9.13</codehaus-jackson.version>
<h2.version>1.4.199</h2.version>
@@ -463,13 +463,6 @@
<artifactId>jersey-container-servlet-core</artifactId>
<version>${glassfish.version}</version>
</dependency>
- <!-- Needed for running HiveServer for Tests -->
- <dependency>
- <groupId>org.eclipse.jetty.aggregate</groupId>
- <artifactId>jetty-all</artifactId>
- <scope>test</scope>
- <version>${jetty.version}</version>
- </dependency>
<!-- Avro -->
<dependency>