Build failed in Jenkins: hudi-snapshot-deployment-0.5 #282

2020-05-18 Thread Apache Jenkins Server
See 


Changes:


--
[...truncated 2.38 KB...]
/home/jenkins/tools/maven/apache-maven-3.5.4/conf:
logging
settings.xml
toolchains.xml

/home/jenkins/tools/maven/apache-maven-3.5.4/conf/logging:
simplelogger.properties

/home/jenkins/tools/maven/apache-maven-3.5.4/lib:
aopalliance-1.0.jar
cdi-api-1.0.jar
cdi-api.license
commons-cli-1.4.jar
commons-cli.license
commons-io-2.5.jar
commons-io.license
commons-lang3-3.5.jar
commons-lang3.license
ext
guava-20.0.jar
guice-4.2.0-no_aop.jar
jansi-1.17.1.jar
jansi-native
javax.inject-1.jar
jcl-over-slf4j-1.7.25.jar
jcl-over-slf4j.license
jsr250-api-1.0.jar
jsr250-api.license
maven-artifact-3.5.4.jar
maven-artifact.license
maven-builder-support-3.5.4.jar
maven-builder-support.license
maven-compat-3.5.4.jar
maven-compat.license
maven-core-3.5.4.jar
maven-core.license
maven-embedder-3.5.4.jar
maven-embedder.license
maven-model-3.5.4.jar
maven-model-builder-3.5.4.jar
maven-model-builder.license
maven-model.license
maven-plugin-api-3.5.4.jar
maven-plugin-api.license
maven-repository-metadata-3.5.4.jar
maven-repository-metadata.license
maven-resolver-api-1.1.1.jar
maven-resolver-api.license
maven-resolver-connector-basic-1.1.1.jar
maven-resolver-connector-basic.license
maven-resolver-impl-1.1.1.jar
maven-resolver-impl.license
maven-resolver-provider-3.5.4.jar
maven-resolver-provider.license
maven-resolver-spi-1.1.1.jar
maven-resolver-spi.license
maven-resolver-transport-wagon-1.1.1.jar
maven-resolver-transport-wagon.license
maven-resolver-util-1.1.1.jar
maven-resolver-util.license
maven-settings-3.5.4.jar
maven-settings-builder-3.5.4.jar
maven-settings-builder.license
maven-settings.license
maven-shared-utils-3.2.1.jar
maven-shared-utils.license
maven-slf4j-provider-3.5.4.jar
maven-slf4j-provider.license
org.eclipse.sisu.inject-0.3.3.jar
org.eclipse.sisu.inject.license
org.eclipse.sisu.plexus-0.3.3.jar
org.eclipse.sisu.plexus.license
plexus-cipher-1.7.jar
plexus-cipher.license
plexus-component-annotations-1.7.1.jar
plexus-component-annotations.license
plexus-interpolation-1.24.jar
plexus-interpolation.license
plexus-sec-dispatcher-1.4.jar
plexus-sec-dispatcher.license
plexus-utils-3.1.0.jar
plexus-utils.license
slf4j-api-1.7.25.jar
slf4j-api.license
wagon-file-3.1.0.jar
wagon-file.license
wagon-http-3.1.0-shaded.jar
wagon-http.license
wagon-provider-api-3.1.0.jar
wagon-provider-api.license

/home/jenkins/tools/maven/apache-maven-3.5.4/lib/ext:
README.txt

/home/jenkins/tools/maven/apache-maven-3.5.4/lib/jansi-native:
freebsd32
freebsd64
linux32
linux64
osx
README.txt
windows32
windows64

/home/jenkins/tools/maven/apache-maven-3.5.4/lib/jansi-native/freebsd32:
libjansi.so

/home/jenkins/tools/maven/apache-maven-3.5.4/lib/jansi-native/freebsd64:
libjansi.so

/home/jenkins/tools/maven/apache-maven-3.5.4/lib/jansi-native/linux32:
libjansi.so

/home/jenkins/tools/maven/apache-maven-3.5.4/lib/jansi-native/linux64:
libjansi.so

/home/jenkins/tools/maven/apache-maven-3.5.4/lib/jansi-native/osx:
libjansi.jnilib

/home/jenkins/tools/maven/apache-maven-3.5.4/lib/jansi-native/windows32:
jansi.dll

/home/jenkins/tools/maven/apache-maven-3.5.4/lib/jansi-native/windows64:
jansi.dll
Finished /home/jenkins/tools/maven/apache-maven-3.5.4 Directory Listing :
Detected current version as: 
'HUDI_home=
0.6.0-SNAPSHOT'
[INFO] Scanning for projects...
[WARNING] 
[WARNING] Some problems were encountered while building the effective model for 
org.apache.hudi:hudi-spark_2.11:jar:0.6.0-SNAPSHOT
[WARNING] 'artifactId' contains an expression but should be a constant. @ 
org.apache.hudi:hudi-spark_${scala.binary.version}:[unknown-version], 

 line 26, column 15
[WARNING] 
[WARNING] Some problems were encountered while building the effective model for 
org.apache.hudi:hudi-timeline-service:jar:0.6.0-SNAPSHOT
[WARNING] 'build.plugins.plugin.(groupId:artifactId)' must be unique but found 
duplicate declaration of plugin org.jacoco:jacoco-maven-plugin @ 
org.apache.hudi:hudi-timeline-service:[unknown-version], 

 line 58, column 15
[WARNING] 
[WARNING] Some problems were encountered while building the effective model for 
org.apache.hudi:hudi-utilities_2.11:jar:0.6.0-SNAPSHOT
[WARNING] 'artifactId' contains an expression but should be a constant. @ 
org.apache.hudi:hudi-utilities_${scala.binary.version}:[unknown-version], 

 line 26, column 15
[WARNING] 
[WARNING] Some problems were encountered while building the effective model for 
org.apache.hudi:hudi-spark-bundle_2.11:jar:0.6.0-SNAPSHOT
[WARNING] 'artifactId' contains an expression but should be a constant. @ 

[jira] [Commented] (HUDI-648) Implement error log/table for Datasource/DeltaStreamer/WriteClient/Compaction writes

2020-05-18 Thread liujinhui (Jira)


[ 
https://issues.apache.org/jira/browse/HUDI-648?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17110824#comment-17110824
 ] 

liujinhui commented on HUDI-648:


Hello, are there any good ideas and design suggestions for this proposal? 
[~vinoth] [~rxu]

> Implement error log/table for Datasource/DeltaStreamer/WriteClient/Compaction 
> writes
> 
>
> Key: HUDI-648
> URL: https://issues.apache.org/jira/browse/HUDI-648
> Project: Apache Hudi (incubating)
>  Issue Type: New Feature
>  Components: DeltaStreamer, Spark Integration, Writer Core
>Reporter: Vinoth Chandar
>Priority: Major
>
> We would like a way to hand the erroring records from writing or compaction 
> back to the users, in a separate table or log. This needs to work generically 
> across all the different writer paths.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[GitHub] [incubator-hudi] codecov-commenter edited a comment on pull request #1640: [WIP] travis jvm fork issue

2020-05-18 Thread GitBox


codecov-commenter edited a comment on pull request #1640:
URL: https://github.com/apache/incubator-hudi/pull/1640#issuecomment-630496124


   # 
[Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=h1) 
Report
   > Merging 
[#1640](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=desc) 
into 
[master](https://codecov.io/gh/apache/incubator-hudi/commit/2600d2de8d9d963db920ff486032482a946fe3d6=desc)
 will **not change** coverage.
   > The diff coverage is `n/a`.
   
   [![Impacted file tree 
graph](https://codecov.io/gh/apache/incubator-hudi/pull/1640/graphs/tree.svg?width=650=150=pr=VTTXabwbs2)](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=tree)
   
   ```diff
   @@Coverage Diff@@
   ## master#1640   +/-   ##
   =
 Coverage 16.59%   16.59%   
 Complexity  798  798   
   =
 Files   344  344   
 Lines 1516015160   
 Branches   1510 1510   
   =
 Hits   2516 2516   
 Misses1231412314   
 Partials330  330   
   ```
   
   
   
   --
   
   [Continue to review full report at 
Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=continue).
   > **Legend** - [Click here to learn 
more](https://docs.codecov.io/docs/codecov-delta)
   > `Δ = absolute  (impact)`, `ø = not affected`, `? = missing data`
   > Powered by 
[Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=footer).
 Last update 
[2600d2d...98cf454](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=lastupdated).
 Read the [comment docs](https://docs.codecov.io/docs/pull-request-comments).
   



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [incubator-hudi] codecov-commenter edited a comment on pull request #1640: [WIP] travis jvm fork issue

2020-05-18 Thread GitBox


codecov-commenter edited a comment on pull request #1640:
URL: https://github.com/apache/incubator-hudi/pull/1640#issuecomment-630496124


   # 
[Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=h1) 
Report
   > Merging 
[#1640](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=desc) 
into 
[master](https://codecov.io/gh/apache/incubator-hudi/commit/2600d2de8d9d963db920ff486032482a946fe3d6=desc)
 will **not change** coverage.
   > The diff coverage is `n/a`.
   
   [![Impacted file tree 
graph](https://codecov.io/gh/apache/incubator-hudi/pull/1640/graphs/tree.svg?width=650=150=pr=VTTXabwbs2)](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=tree)
   
   ```diff
   @@Coverage Diff@@
   ## master#1640   +/-   ##
   =
 Coverage 16.59%   16.59%   
 Complexity  798  798   
   =
 Files   344  344   
 Lines 1516015160   
 Branches   1510 1510   
   =
 Hits   2516 2516   
 Misses1231412314   
 Partials330  330   
   ```
   
   
   
   --
   
   [Continue to review full report at 
Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=continue).
   > **Legend** - [Click here to learn 
more](https://docs.codecov.io/docs/codecov-delta)
   > `Δ = absolute  (impact)`, `ø = not affected`, `? = missing data`
   > Powered by 
[Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=footer).
 Last update 
[2600d2d...98cf454](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=lastupdated).
 Read the [comment docs](https://docs.codecov.io/docs/pull-request-comments).
   



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[incubator-hudi] branch master updated: [HUDI-858] Allow multiple operations to be executed within a single commit (#1633)

2020-05-18 Thread vbalaji
This is an automated email from the ASF dual-hosted git repository.

vbalaji pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-hudi.git


The following commit(s) were added to refs/heads/master by this push:
 new e6f3bf1  [HUDI-858] Allow multiple operations to be executed within a 
single commit (#1633)
e6f3bf1 is described below

commit e6f3bf10cf2c62a1008b82765abdcd33cfd64c67
Author: Balaji Varadarajan 
AuthorDate: Mon May 18 19:27:24 2020 -0700

[HUDI-858] Allow multiple operations to be executed within a single commit 
(#1633)
---
 .../org/apache/hudi/config/HoodieWriteConfig.java  | 26 ++-
 .../action/commit/BaseCommitActionExecutor.java|  3 +-
 .../hudi/table/action/commit/BulkInsertHelper.java |  3 +-
 .../TestHoodieClientOnCopyOnWriteStorage.java  | 38 ++
 .../table/timeline/HoodieActiveTimeline.java   | 20 ++--
 5 files changed, 84 insertions(+), 6 deletions(-)

diff --git 
a/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java 
b/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
index 0467657..11931c1 100644
--- a/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
+++ b/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
@@ -99,6 +99,20 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
   private static final String MAX_CONSISTENCY_CHECKS_PROP = 
"hoodie.consistency.check.max_checks";
   private static int DEFAULT_MAX_CONSISTENCY_CHECKS = 7;
 
+  /**
+   * HUDI-858 : There are users who had been directly using RDD APIs and have 
relied on a behavior in 0.4.x to allow
+   * multiple write operations (upsert/buk-insert/...) to be executed within a 
single commit.
+   *
+   * Given Hudi commit protocol, these are generally unsafe operations and 
user need to handle failure scenarios. It
+   * only works with COW table. Hudi 0.5.x had stopped this behavior.
+   *
+   * Given the importance of supporting such cases for the user's migration to 
0.5.x, we are proposing a safety flag
+   * (disabled by default) which will allow this old behavior.
+   */
+  private static final String ALLOW_MULTI_WRITE_ON_SAME_INSTANT =
+  "_.hoodie.allow.multi.write.on.same.instant";
+  private static final String DEFAULT_ALLOW_MULTI_WRITE_ON_SAME_INSTANT = 
"false";
+
   private ConsistencyGuardConfig consistencyGuardConfig;
 
   // Hoodie Write Client transparently rewrites File System View config when 
embedded mode is enabled
@@ -194,6 +208,10 @@ public class HoodieWriteConfig extends DefaultHoodieConfig 
{
 return Boolean.parseBoolean(props.getProperty(COMBINE_BEFORE_DELETE_PROP));
   }
 
+  public boolean shouldAllowMultiWriteOnSameInstant() {
+return 
Boolean.parseBoolean(props.getProperty(ALLOW_MULTI_WRITE_ON_SAME_INSTANT));
+  }
+
   public String getWriteStatusClassName() {
 return props.getProperty(HOODIE_WRITE_STATUS_CLASS_PROP);
   }
@@ -723,6 +741,11 @@ public class HoodieWriteConfig extends DefaultHoodieConfig 
{
   return this;
 }
 
+public Builder withAllowMultiWriteOnSameInstant(boolean allow) {
+  props.setProperty(ALLOW_MULTI_WRITE_ON_SAME_INSTANT, 
String.valueOf(allow));
+  return this;
+}
+
 public HoodieWriteConfig build() {
   // Check for mandatory properties
   setDefaultOnCondition(props, !props.containsKey(INSERT_PARALLELISM), 
INSERT_PARALLELISM, DEFAULT_PARALLELISM);
@@ -738,6 +761,8 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
   DEFAULT_COMBINE_BEFORE_UPSERT);
   setDefaultOnCondition(props, 
!props.containsKey(COMBINE_BEFORE_DELETE_PROP), COMBINE_BEFORE_DELETE_PROP,
   DEFAULT_COMBINE_BEFORE_DELETE);
+  setDefaultOnCondition(props, 
!props.containsKey(ALLOW_MULTI_WRITE_ON_SAME_INSTANT),
+  ALLOW_MULTI_WRITE_ON_SAME_INSTANT, 
DEFAULT_ALLOW_MULTI_WRITE_ON_SAME_INSTANT);
   setDefaultOnCondition(props, 
!props.containsKey(WRITE_STATUS_STORAGE_LEVEL), WRITE_STATUS_STORAGE_LEVEL,
   DEFAULT_WRITE_STATUS_STORAGE_LEVEL);
   setDefaultOnCondition(props, 
!props.containsKey(HOODIE_AUTO_COMMIT_PROP), HOODIE_AUTO_COMMIT_PROP,
@@ -778,7 +803,6 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
   // Ensure Layout Version is good
   new TimelineLayoutVersion(Integer.parseInt(layoutVersion));
 
-
   // Build WriteConfig at the end
   HoodieWriteConfig config = new HoodieWriteConfig(props);
   Objects.requireNonNull(config.getBasePath());
diff --git 
a/hudi-client/src/main/java/org/apache/hudi/table/action/commit/BaseCommitActionExecutor.java
 
b/hudi-client/src/main/java/org/apache/hudi/table/action/commit/BaseCommitActionExecutor.java
index a846de8..0717fd2 100644
--- 
a/hudi-client/src/main/java/org/apache/hudi/table/action/commit/BaseCommitActionExecutor.java
+++ 

[GitHub] [incubator-hudi] bvaradar merged pull request #1633: [HUDI-858] Allow multiple operations to be executed within a single commit

2020-05-18 Thread GitBox


bvaradar merged pull request #1633:
URL: https://github.com/apache/incubator-hudi/pull/1633


   



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [incubator-hudi] codecov-commenter edited a comment on pull request #1640: [WIP] travis jvm fork issue

2020-05-18 Thread GitBox


codecov-commenter edited a comment on pull request #1640:
URL: https://github.com/apache/incubator-hudi/pull/1640#issuecomment-630496124


   # 
[Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=h1) 
Report
   > Merging 
[#1640](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=desc) 
into 
[master](https://codecov.io/gh/apache/incubator-hudi/commit/2600d2de8d9d963db920ff486032482a946fe3d6=desc)
 will **not change** coverage.
   > The diff coverage is `n/a`.
   
   [![Impacted file tree 
graph](https://codecov.io/gh/apache/incubator-hudi/pull/1640/graphs/tree.svg?width=650=150=pr=VTTXabwbs2)](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=tree)
   
   ```diff
   @@Coverage Diff@@
   ## master#1640   +/-   ##
   =
 Coverage 16.59%   16.59%   
 Complexity  798  798   
   =
 Files   344  344   
 Lines 1516015160   
 Branches   1510 1510   
   =
 Hits   2516 2516   
 Misses1231412314   
 Partials330  330   
   ```
   
   
   
   --
   
   [Continue to review full report at 
Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=continue).
   > **Legend** - [Click here to learn 
more](https://docs.codecov.io/docs/codecov-delta)
   > `Δ = absolute  (impact)`, `ø = not affected`, `? = missing data`
   > Powered by 
[Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=footer).
 Last update 
[2600d2d...2b005ea](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=lastupdated).
 Read the [comment docs](https://docs.codecov.io/docs/pull-request-comments).
   



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [incubator-hudi] codecov-commenter edited a comment on pull request #1640: [WIP] travis jvm fork issue

2020-05-18 Thread GitBox


codecov-commenter edited a comment on pull request #1640:
URL: https://github.com/apache/incubator-hudi/pull/1640#issuecomment-630496124


   # 
[Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=h1) 
Report
   > Merging 
[#1640](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=desc) 
into 
[master](https://codecov.io/gh/apache/incubator-hudi/commit/2600d2de8d9d963db920ff486032482a946fe3d6=desc)
 will **not change** coverage.
   > The diff coverage is `n/a`.
   
   [![Impacted file tree 
graph](https://codecov.io/gh/apache/incubator-hudi/pull/1640/graphs/tree.svg?width=650=150=pr=VTTXabwbs2)](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=tree)
   
   ```diff
   @@Coverage Diff@@
   ## master#1640   +/-   ##
   =
 Coverage 16.59%   16.59%   
 Complexity  798  798   
   =
 Files   344  344   
 Lines 1516015160   
 Branches   1510 1510   
   =
 Hits   2516 2516   
 Misses1231412314   
 Partials330  330   
   ```
   
   
   
   --
   
   [Continue to review full report at 
Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=continue).
   > **Legend** - [Click here to learn 
more](https://docs.codecov.io/docs/codecov-delta)
   > `Δ = absolute  (impact)`, `ø = not affected`, `? = missing data`
   > Powered by 
[Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=footer).
 Last update 
[2600d2d...2b005ea](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=lastupdated).
 Read the [comment docs](https://docs.codecov.io/docs/pull-request-comments).
   



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [incubator-hudi] codecov-commenter edited a comment on pull request #1640: [WIP] travis jvm fork issue

2020-05-18 Thread GitBox


codecov-commenter edited a comment on pull request #1640:
URL: https://github.com/apache/incubator-hudi/pull/1640#issuecomment-630496124


   # 
[Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=h1) 
Report
   > Merging 
[#1640](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=desc) 
into 
[master](https://codecov.io/gh/apache/incubator-hudi/commit/2600d2de8d9d963db920ff486032482a946fe3d6=desc)
 will **decrease** coverage by `0.01%`.
   > The diff coverage is `n/a`.
   
   [![Impacted file tree 
graph](https://codecov.io/gh/apache/incubator-hudi/pull/1640/graphs/tree.svg?width=650=150=pr=VTTXabwbs2)](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=tree)
   
   ```diff
   @@ Coverage Diff  @@
   ## master#1640  +/-   ##
   
   - Coverage 16.59%   16.58%   -0.02% 
   + Complexity  798  797   -1 
   
 Files   344  344  
 Lines 1516015160  
 Branches   1510 1510  
   
   - Hits   2516 2514   -2 
   - Misses1231412316   +2 
 Partials330  330  
   ```
   
   
   | [Impacted 
Files](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=tree) | 
Coverage Δ | Complexity Δ | |
   |---|---|---|---|
   | 
[...apache/hudi/common/fs/HoodieWrapperFileSystem.java](https://codecov.io/gh/apache/incubator-hudi/pull/1640/diff?src=pr=tree#diff-aHVkaS1jb21tb24vc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvY29tbW9uL2ZzL0hvb2RpZVdyYXBwZXJGaWxlU3lzdGVtLmphdmE=)
 | `21.98% <0.00%> (-0.71%)` | `28.00% <0.00%> (-1.00%)` | |
   
   --
   
   [Continue to review full report at 
Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=continue).
   > **Legend** - [Click here to learn 
more](https://docs.codecov.io/docs/codecov-delta)
   > `Δ = absolute  (impact)`, `ø = not affected`, `? = missing data`
   > Powered by 
[Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=footer).
 Last update 
[2600d2d...e092b32](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=lastupdated).
 Read the [comment docs](https://docs.codecov.io/docs/pull-request-comments).
   



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [incubator-hudi] codecov-io commented on pull request #1640: [WIP] travis jvm fork issue

2020-05-18 Thread GitBox


codecov-io commented on pull request #1640:
URL: https://github.com/apache/incubator-hudi/pull/1640#issuecomment-630495938


   # 
[Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=h1) 
Report
   > Merging 
[#1640](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=desc) 
into 
[master](https://codecov.io/gh/apache/incubator-hudi/commit/2600d2de8d9d963db920ff486032482a946fe3d6=desc)
 will **not change** coverage.
   > The diff coverage is `n/a`.
   
   [![Impacted file tree 
graph](https://codecov.io/gh/apache/incubator-hudi/pull/1640/graphs/tree.svg?width=650=150=pr=VTTXabwbs2)](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=tree)
   
   ```diff
   @@Coverage Diff@@
   ## master#1640   +/-   ##
   =
 Coverage 16.59%   16.59%   
 Complexity  798  798   
   =
 Files   344  344   
 Lines 1516015160   
 Branches   1510 1510   
   =
 Hits   2516 2516   
 Misses1231412314   
 Partials330  330   
   ```
   
   
   
   --
   
   [Continue to review full report at 
Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=continue).
   > **Legend** - [Click here to learn 
more](https://docs.codecov.io/docs/codecov-delta)
   > `Δ = absolute  (impact)`, `ø = not affected`, `? = missing data`
   > Powered by 
[Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=footer).
 Last update 
[2600d2d...30901b1](https://codecov.io/gh/apache/incubator-hudi/pull/1640?src=pr=lastupdated).
 Read the [comment docs](https://docs.codecov.io/docs/pull-request-comments).
   



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [incubator-hudi] bvaradar commented on pull request #1634: [HUDI-846][HUDI-848] Enable Incremental cleaning and embedded timeline-server by default

2020-05-18 Thread GitBox


bvaradar commented on pull request #1634:
URL: https://github.com/apache/incubator-hudi/pull/1634#issuecomment-630393889


   @vinothchandar : Addressed review comment.



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [incubator-hudi] bvaradar commented on pull request #1633: [HUDI-858] Allow multiple operations to be executed within a single commit

2020-05-18 Thread GitBox


bvaradar commented on pull request #1633:
URL: https://github.com/apache/incubator-hudi/pull/1633#issuecomment-630392894


   @leesf: the code coverage looks good now. 
   



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [incubator-hudi] xushiyan opened a new pull request #1640: [WIP] remove -Xms

2020-05-18 Thread GitBox


xushiyan opened a new pull request #1640:
URL: https://github.com/apache/incubator-hudi/pull/1640


   



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[incubator-hudi] branch master updated (459356e -> 2600d2d)

2020-05-18 Thread vbalaji
This is an automated email from the ASF dual-hosted git repository.

vbalaji pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-hudi.git.


from 459356e  [HUDI-863] get decimal properties from derived spark DataType 
(#1596)
 add 2600d2d  [MINOR] Fix apache-rat violations (#1639)

No new revisions were added by this update.

Summary of changes:
 .../hudi/common/util/ObjectSizeCalculator.java | 32 --
 hudi-integ-test/pom.xml|  4 +++
 hudi-utilities/pom.xml |  4 +++
 .../exception/HoodieSnapshotExporterException.java | 18 
 .../hudi/utilities/sources/TestInputBatch.java | 18 
 pom.xml|  1 +
 6 files changed, 62 insertions(+), 15 deletions(-)



[GitHub] [incubator-hudi] bvaradar merged pull request #1639: [MINOR] Fix apache-rat violations

2020-05-18 Thread GitBox


bvaradar merged pull request #1639:
URL: https://github.com/apache/incubator-hudi/pull/1639


   



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [incubator-hudi] bvaradar commented on pull request #1639: [MINOR] Fix apache-rat violations

2020-05-18 Thread GitBox


bvaradar commented on pull request #1639:
URL: https://github.com/apache/incubator-hudi/pull/1639#issuecomment-630352568


   @vinothchandar : Added rat check to hudi-integ-test. 



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [incubator-hudi] bvaradar commented on pull request #1633: [HUDI-858] Allow multiple operations to be executed within a single commit

2020-05-18 Thread GitBox


bvaradar commented on pull request #1633:
URL: https://github.com/apache/incubator-hudi/pull/1633#issuecomment-630316425


   @leesf : Looking at the code changes, I dont think it should cause such a 
loss. I had tried force-pushing to trigger another rounds of testing but I dont 
see code-coverage report getting recreated from the new round of tests. ANy 
ideas ?



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [incubator-hudi] codecov-io edited a comment on pull request #1633: [HUDI-858] Allow multiple operations to be executed within a single commit

2020-05-18 Thread GitBox


codecov-io edited a comment on pull request #1633:
URL: https://github.com/apache/incubator-hudi/pull/1633#issuecomment-629848790


   # 
[Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1633?src=pr=h1) 
Report
   > Merging 
[#1633](https://codecov.io/gh/apache/incubator-hudi/pull/1633?src=pr=desc) 
into 
[master](https://codecov.io/gh/apache/incubator-hudi/commit/57132f79bb2dad6cfb215480b435a778714a442d=desc)
 will **increase** coverage by `0.00%`.
   > The diff coverage is `41.17%`.
   
   [![Impacted file tree 
graph](https://codecov.io/gh/apache/incubator-hudi/pull/1633/graphs/tree.svg?width=650=150=pr=VTTXabwbs2)](https://codecov.io/gh/apache/incubator-hudi/pull/1633?src=pr=tree)
   
   ```diff
   @@Coverage Diff@@
   ## master#1633   +/-   ##
   =
 Coverage 16.61%   16.61%   
   - Complexity  798  799+1 
   =
 Files   344  344   
 Lines 1516415176   +12 
 Branches   1510 1512+2 
   =
   + Hits   2519 2521+2 
   - Misses1231512323+8 
   - Partials330  332+2 
   ```
   
   
   | [Impacted 
Files](https://codecov.io/gh/apache/incubator-hudi/pull/1633?src=pr=tree) | 
Coverage Δ | Complexity Δ | |
   |---|---|---|---|
   | 
[...che/hudi/table/action/commit/BulkInsertHelper.java](https://codecov.io/gh/apache/incubator-hudi/pull/1633/diff?src=pr=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9CdWxrSW5zZXJ0SGVscGVyLmphdmE=)
 | `0.00% <0.00%> (ø)` | `0.00 <0.00> (ø)` | |
   | 
[...java/org/apache/hudi/config/HoodieWriteConfig.java](https://codecov.io/gh/apache/incubator-hudi/pull/1633/diff?src=pr=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvY29uZmlnL0hvb2RpZVdyaXRlQ29uZmlnLmphdmE=)
 | `42.25% <25.00%> (-0.30%)` | `48.00 <1.00> (+1.00)` | :arrow_down: |
   | 
[...di/common/table/timeline/HoodieActiveTimeline.java](https://codecov.io/gh/apache/incubator-hudi/pull/1633/diff?src=pr=tree#diff-aHVkaS1jb21tb24vc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvY29tbW9uL3RhYmxlL3RpbWVsaW5lL0hvb2RpZUFjdGl2ZVRpbWVsaW5lLmphdmE=)
 | `28.49% <44.44%> (+0.15%)` | `17.00 <1.00> (+1.00)` | |
   | 
[.../table/action/commit/BaseCommitActionExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1633/diff?src=pr=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9CYXNlQ29tbWl0QWN0aW9uRXhlY3V0b3IuamF2YQ==)
 | `46.01% <100.00%> (+0.48%)` | `14.00 <0.00> (ø)` | |
   | 
[...apache/hudi/common/fs/HoodieWrapperFileSystem.java](https://codecov.io/gh/apache/incubator-hudi/pull/1633/diff?src=pr=tree#diff-aHVkaS1jb21tb24vc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvY29tbW9uL2ZzL0hvb2RpZVdyYXBwZXJGaWxlU3lzdGVtLmphdmE=)
 | `21.98% <0.00%> (-0.71%)` | `28.00% <0.00%> (-1.00%)` | |
   
   --
   
   [Continue to review full report at 
Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1633?src=pr=continue).
   > **Legend** - [Click here to learn 
more](https://docs.codecov.io/docs/codecov-delta)
   > `Δ = absolute  (impact)`, `ø = not affected`, `? = missing data`
   > Powered by 
[Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1633?src=pr=footer).
 Last update 
[57132f7...47cd94c](https://codecov.io/gh/apache/incubator-hudi/pull/1633?src=pr=lastupdated).
 Read the [comment docs](https://docs.codecov.io/docs/pull-request-comments).
   



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [incubator-hudi] pratyakshsharma commented on pull request #1558: [HUDI-796]: added deduping logic for upserts case

2020-05-18 Thread GitBox


pratyakshsharma commented on pull request #1558:
URL: https://github.com/apache/incubator-hudi/pull/1558#issuecomment-630299786


   @hddong shared the logs with you over slack. 



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [incubator-hudi] pratyakshsharma commented on a change in pull request #1433: [HUDI-728]: Implement custom key generator

2020-05-18 Thread GitBox


pratyakshsharma commented on a change in pull request #1433:
URL: https://github.com/apache/incubator-hudi/pull/1433#discussion_r426735110



##
File path: 
hudi-spark/src/main/java/org/apache/hudi/keygen/TimestampBasedKeyGenerator.java
##
@@ -88,6 +86,13 @@ public TimestampBasedKeyGenerator(TypedProperties config) {
 
   @Override
   public HoodieKey getKey(GenericRecord record) {
+String recordKey = getRecordKey(record);

Review comment:
   My bad. TimestampBasedKeyGenerator is reusing function getRecordKey() 
from SimpleKeyGenerator. So I guess we will keep it this way only. 





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [incubator-hudi] pratyakshsharma commented on pull request #1433: [HUDI-728]: Implement custom key generator

2020-05-18 Thread GitBox


pratyakshsharma commented on pull request #1433:
URL: https://github.com/apache/incubator-hudi/pull/1433#issuecomment-630278010


   > Can you point me to exact commits where you addressed my last set of 
comments and commits where you pulled in the other PR?
   
   Sure. I addressed your last set of comments in - 
https://github.com/apache/incubator-hudi/pull/1433/commits/4ebd50561fa9d130118b84db501ca23600d2c9d6
   
   I pulled the other PR in - 
https://github.com/apache/incubator-hudi/pull/1433/commits/cfe94e4ac8995f5b25e4385e7446e7b86e88e4b6.
   
   Hope that helps. 



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [incubator-hudi] pratyakshsharma commented on a change in pull request #1433: [HUDI-728]: Implement custom key generator

2020-05-18 Thread GitBox


pratyakshsharma commented on a change in pull request #1433:
URL: https://github.com/apache/incubator-hudi/pull/1433#discussion_r426730912



##
File path: 
hudi-spark/src/main/java/org/apache/hudi/keygen/CustomKeyGenerator.java
##
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.keygen;
+
+import org.apache.hudi.DataSourceWriteOptions;
+import org.apache.hudi.common.model.HoodieKey;
+import org.apache.hudi.common.config.TypedProperties;
+
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hudi.exception.HoodieDeltaStreamerException;
+import org.apache.hudi.exception.HoodieKeyException;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * This is a generic implementation of KeyGenerator where users can configure 
record key as a single field or a combination of fields.
+ * Similarly partition path can be configured to have multiple fields or only 
one field. This class expects value for prop
+ * "hoodie.datasource.write.partitionpath.field" in a specific format. For 
example:
+ *
+ * properties.put("hoodie.datasource.write.partitionpath.field", 
"field1:PartitionKeyType1,field2:PartitionKeyType2").
+ *
+ * The complete partition path is created as / and so on.
+ *
+ * Few points to consider:
+ * 1. If you want to customize some partition path field on a timestamp basis, 
you can use field1:timestampBased
+ * 2. If you simply want to have the value of your configured field in the 
partition path, use field1:simple
+ * 3. If you want your table to be non partitioned, simply leave it as blank.
+ *
+ * RecordKey is internally generated using either SimpleKeyGenerator or 
ComplexKeyGenerator.
+ */
+public class CustomKeyGenerator extends KeyGenerator {
+
+  protected final List recordKeyFields;
+  protected final List partitionPathFields;
+  protected final TypedProperties properties;
+  private static final String DEFAULT_PARTITION_PATH_SEPARATOR = "/";
+  private static final String SPLIT_REGEX = ":";
+
+  /**
+   * Used as a part of config in CustomKeyGenerator.java.
+   */
+  public enum PartitionKeyType {
+SIMPLE, TIMESTAMP
+  }
+
+  public CustomKeyGenerator(TypedProperties props) {
+super(props);
+this.properties = props;
+this.recordKeyFields = 
Arrays.stream(props.getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY()).split(",")).map(String::trim).collect(Collectors.toList());
+this.partitionPathFields =
+  
Arrays.stream(props.getString(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY()).split(",")).map(String::trim).collect(Collectors.toList());
+  }
+
+  @Override
+  public HoodieKey getKey(GenericRecord record) {
+//call function to get the record key
+String recordKey = getRecordKey(record);
+//call function to get the partition key based on the type for that 
partition path field
+String partitionPath = getPartitionPath(record);
+return new HoodieKey(recordKey, partitionPath);
+  }
+
+  private String getPartitionPath(GenericRecord record) {
+if (partitionPathFields == null) {
+  throw new HoodieKeyException("Unable to find field names for partition 
path in cfg");
+}
+
+String partitionPathField;
+StringBuilder partitionPath = new StringBuilder();
+
+//Corresponds to no partition case
+if (partitionPathFields.size() == 1 && 
partitionPathFields.get(0).isEmpty()) {
+  return "";
+}
+for (String field : partitionPathFields) {
+  String[] fieldWithType = field.split(SPLIT_REGEX);
+  if (fieldWithType.length != 2) {
+throw new HoodieKeyException("Unable to find field names for partition 
path in proper format");
+  }
+
+  partitionPathField = fieldWithType[0];
+  PartitionKeyType keyType = 
PartitionKeyType.valueOf(fieldWithType[1].toUpperCase());
+  switch (keyType) {
+case SIMPLE:
+  partitionPath.append(new 
SimpleKeyGenerator(properties).getPartitionPath(record, partitionPathField));
+  break;
+case TIMESTAMP:
+  partitionPath.append(new 
TimestampBasedKeyGenerator(properties).getPartitionPath(record, 
partitionPathField));
+  break;
+

[GitHub] [incubator-hudi] pratyakshsharma commented on a change in pull request #1433: [HUDI-728]: Implement custom key generator

2020-05-18 Thread GitBox


pratyakshsharma commented on a change in pull request #1433:
URL: https://github.com/apache/incubator-hudi/pull/1433#discussion_r426724593



##
File path: 
hudi-spark/src/main/java/org/apache/hudi/keygen/CustomKeyGenerator.java
##
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.keygen;
+
+import org.apache.hudi.DataSourceWriteOptions;
+import org.apache.hudi.common.model.HoodieKey;
+import org.apache.hudi.common.config.TypedProperties;
+
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hudi.exception.HoodieDeltaStreamerException;
+import org.apache.hudi.exception.HoodieKeyException;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * This is a generic implementation of KeyGenerator where users can configure 
record key as a single field or a combination of fields.
+ * Similarly partition path can be configured to have multiple fields or only 
one field. This class expects value for prop
+ * "hoodie.datasource.write.partitionpath.field" in a specific format. For 
example:
+ *
+ * properties.put("hoodie.datasource.write.partitionpath.field", 
"field1:PartitionKeyType1,field2:PartitionKeyType2").
+ *
+ * The complete partition path is created as / and so on.
+ *
+ * Few points to consider:
+ * 1. If you want to customise some partition path field on a timestamp basis, 
you can use field1:timestampBased
+ * 2. If you simply want to have the value of your configured field in the 
partition path, use field1:simple
+ * 3. If you want your table to be non partitioned, simply leave it as blank.
+ *
+ * RecordKey is internally generated using either SimpleKeyGenerator or 
ComplexKeyGenerator.
+ */
+public class CustomKeyGenerator extends KeyGenerator {
+
+  protected final List recordKeyFields;
+  protected final List partitionPathFields;
+  protected final TypedProperties properties;
+  private static final String DEFAULT_PARTITION_PATH_SEPARATOR = "/";
+  private static final String SPLIT_REGEX = ":";
+
+  /**
+   * Used as a part of config in CustomKeyGenerator.java.
+   */
+  public enum PartitionKeyType {
+SIMPLE, TIMESTAMP
+  }
+
+  public CustomKeyGenerator(TypedProperties props) {
+super(props);
+this.properties = props;
+this.recordKeyFields = 
Arrays.stream(props.getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY()).split(",")).map(String::trim).collect(Collectors.toList());
+this.partitionPathFields =
+  
Arrays.stream(props.getString(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY()).split(",")).map(String::trim).collect(Collectors.toList());
+  }
+
+  @Override
+  public HoodieKey getKey(GenericRecord record) {
+//call function to get the record key
+String recordKey = getRecordKey(record);
+//call function to get the partition key based on the type for that 
partition path field
+String partitionPath = getPartitionPath(record);
+return new HoodieKey(recordKey, partitionPath);
+  }
+
+  public String getPartitionPath(GenericRecord record) {
+if (partitionPathFields == null) {
+  throw new HoodieKeyException("Unable to find field names for partition 
path in cfg");
+}
+
+String partitionPathField;
+StringBuilder partitionPath = new StringBuilder();
+
+//Corresponds to no partition case
+if (partitionPathFields.size() == 1 && 
partitionPathFields.get(0).isEmpty()) {

Review comment:
   > if users wants to not have any partitions, might as well not set it 
only
   
   Actually in the constructor of CustomKeyGenerator, partitionPathFields get 
initialised through a call to props.getString() which internally checks if key 
exists. So user will have to include this key always. I am ok with both ways, 
so chose to follow what Vinoth suggested. 
   
   > So, the list should be empty in that case in my understanding. correct me 
if I am wrong
   
   No the list actually contains one element always which is an empty string. I 
have verified this. :) 





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL 

[GitHub] [incubator-hudi] pratyakshsharma commented on a change in pull request #1433: [HUDI-728]: Implement custom key generator

2020-05-18 Thread GitBox


pratyakshsharma commented on a change in pull request #1433:
URL: https://github.com/apache/incubator-hudi/pull/1433#discussion_r426715335



##
File path: 
hudi-spark/src/main/java/org/apache/hudi/keygen/TimestampBasedKeyGenerator.java
##
@@ -88,6 +86,13 @@ public TimestampBasedKeyGenerator(TypedProperties config) {
 
   @Override
   public HoodieKey getKey(GenericRecord record) {
+String recordKey = getRecordKey(record);

Review comment:
   @nsivabalan actually TimestampBasedKeyGenerator extending 
SimpleKeyGenerator has been there historically. I agree with your point, if you 
wish, I can refactor the code. 
   
   Let me actually go ahead and do it. 





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[jira] [Updated] (HUDI-913) Introduce HoodieEngineContext for hudi write client

2020-05-18 Thread wangxianghu (Jira)


 [ 
https://issues.apache.org/jira/browse/HUDI-913?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

wangxianghu updated HUDI-913:
-
Summary: Introduce HoodieEngineContext  for hudi write client  (was: 
Introduce HoodieEngineContext)

> Introduce HoodieEngineContext  for hudi write client
> 
>
> Key: HUDI-913
> URL: https://issues.apache.org/jira/browse/HUDI-913
> Project: Apache Hudi (incubating)
>  Issue Type: Sub-task
>Reporter: wangxianghu
>Assignee: wangxianghu
>Priority: Major
>
> Introduce HoodieEngineContext  as multi-engine execution environment



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HUDI-911) Introduce HoodieWriteOutput for hudi write client

2020-05-18 Thread wangxianghu (Jira)


 [ 
https://issues.apache.org/jira/browse/HUDI-911?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

wangxianghu updated HUDI-911:
-
Summary: Introduce HoodieWriteOutput for hudi write client  (was: Introduce 
HoodieWriteOutput)

> Introduce HoodieWriteOutput for hudi write client
> -
>
> Key: HUDI-911
> URL: https://issues.apache.org/jira/browse/HUDI-911
> Project: Apache Hudi (incubating)
>  Issue Type: Sub-task
>Reporter: wangxianghu
>Assignee: wangxianghu
>Priority: Major
>
> Introduce HoodieWriteOutput as the unified output format of Hudi



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HUDI-912) Introduce HoodieWriteKey for hudi write client

2020-05-18 Thread wangxianghu (Jira)


 [ 
https://issues.apache.org/jira/browse/HUDI-912?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

wangxianghu updated HUDI-912:
-
Summary: Introduce HoodieWriteKey for hudi write client  (was: Introduce 
HoodieWriteKey)

> Introduce HoodieWriteKey for hudi write client
> --
>
> Key: HUDI-912
> URL: https://issues.apache.org/jira/browse/HUDI-912
> Project: Apache Hudi (incubating)
>  Issue Type: Sub-task
>Reporter: wangxianghu
>Assignee: wangxianghu
>Priority: Major
>
> Introduce HoodieWriteKey as Hudi key's unified format



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HUDI-910) Introduce HoodieWriteInput for hudi write client

2020-05-18 Thread wangxianghu (Jira)


 [ 
https://issues.apache.org/jira/browse/HUDI-910?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

wangxianghu updated HUDI-910:
-
Summary: Introduce HoodieWriteInput for hudi write client  (was: Introduce 
HoodieWriteInput)

> Introduce HoodieWriteInput for hudi write client
> 
>
> Key: HUDI-910
> URL: https://issues.apache.org/jira/browse/HUDI-910
> Project: Apache Hudi (incubating)
>  Issue Type: Sub-task
>Reporter: wangxianghu
>Assignee: wangxianghu
>Priority: Major
>
> Introduce HoodieWriteInput as the unified input format of Hudi



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Assigned] (HUDI-913) Introduce HoodieEngineContext

2020-05-18 Thread wangxianghu (Jira)


 [ 
https://issues.apache.org/jira/browse/HUDI-913?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

wangxianghu reassigned HUDI-913:


Assignee: wangxianghu

> Introduce HoodieEngineContext
> -
>
> Key: HUDI-913
> URL: https://issues.apache.org/jira/browse/HUDI-913
> Project: Apache Hudi (incubating)
>  Issue Type: Sub-task
>Reporter: wangxianghu
>Assignee: wangxianghu
>Priority: Major
>
> Introduce HoodieEngineContext  as multi-engine execution environment



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Assigned] (HUDI-910) Introduce HoodieWriteInput

2020-05-18 Thread wangxianghu (Jira)


 [ 
https://issues.apache.org/jira/browse/HUDI-910?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

wangxianghu reassigned HUDI-910:


Assignee: wangxianghu

> Introduce HoodieWriteInput
> --
>
> Key: HUDI-910
> URL: https://issues.apache.org/jira/browse/HUDI-910
> Project: Apache Hudi (incubating)
>  Issue Type: Sub-task
>Reporter: wangxianghu
>Assignee: wangxianghu
>Priority: Major
>
> Introduce HoodieWriteInput as the unified input format of Hudi



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HUDI-912) Introduce HoodieWriteKey

2020-05-18 Thread wangxianghu (Jira)


 [ 
https://issues.apache.org/jira/browse/HUDI-912?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

wangxianghu updated HUDI-912:
-
Summary: Introduce HoodieWriteKey  (was: Introduce HoodieWriteInput)

> Introduce HoodieWriteKey
> 
>
> Key: HUDI-912
> URL: https://issues.apache.org/jira/browse/HUDI-912
> Project: Apache Hudi (incubating)
>  Issue Type: Sub-task
>Reporter: wangxianghu
>Priority: Major
>
> Introduce HoodieWriteKey as Hudi key's unified format



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Assigned] (HUDI-912) Introduce HoodieWriteKey

2020-05-18 Thread wangxianghu (Jira)


 [ 
https://issues.apache.org/jira/browse/HUDI-912?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

wangxianghu reassigned HUDI-912:


Assignee: wangxianghu

> Introduce HoodieWriteKey
> 
>
> Key: HUDI-912
> URL: https://issues.apache.org/jira/browse/HUDI-912
> Project: Apache Hudi (incubating)
>  Issue Type: Sub-task
>Reporter: wangxianghu
>Assignee: wangxianghu
>Priority: Major
>
> Introduce HoodieWriteKey as Hudi key's unified format



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HUDI-910) Introduce HoodieWriteInput

2020-05-18 Thread wangxianghu (Jira)


 [ 
https://issues.apache.org/jira/browse/HUDI-910?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

wangxianghu updated HUDI-910:
-
Summary: Introduce HoodieWriteInput  (was: Introduce unified input format 
of Hudi)

> Introduce HoodieWriteInput
> --
>
> Key: HUDI-910
> URL: https://issues.apache.org/jira/browse/HUDI-910
> Project: Apache Hudi (incubating)
>  Issue Type: Sub-task
>Reporter: wangxianghu
>Priority: Major
>
> Introduce HoodieWriteInput as the unified input format of Hudi



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HUDI-912) Introduce HoodieWriteInput

2020-05-18 Thread wangxianghu (Jira)


 [ 
https://issues.apache.org/jira/browse/HUDI-912?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

wangxianghu updated HUDI-912:
-
Summary: Introduce HoodieWriteInput  (was: Introduce unified Hudi key 
format)

> Introduce HoodieWriteInput
> --
>
> Key: HUDI-912
> URL: https://issues.apache.org/jira/browse/HUDI-912
> Project: Apache Hudi (incubating)
>  Issue Type: Sub-task
>Reporter: wangxianghu
>Priority: Major
>
> Introduce HoodieWriteKey as Hudi key's unified format



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HUDI-913) Introduce HoodieEngineContext

2020-05-18 Thread wangxianghu (Jira)


 [ 
https://issues.apache.org/jira/browse/HUDI-913?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

wangxianghu updated HUDI-913:
-
Summary: Introduce HoodieEngineContext  (was: Introduce 
HoodieEngineContext.java)

> Introduce HoodieEngineContext
> -
>
> Key: HUDI-913
> URL: https://issues.apache.org/jira/browse/HUDI-913
> Project: Apache Hudi (incubating)
>  Issue Type: Sub-task
>Reporter: wangxianghu
>Priority: Major
>
> Introduce HoodieEngineContext  as multi-engine execution environment



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HUDI-911) Introduce HoodieWriteOutput

2020-05-18 Thread wangxianghu (Jira)


 [ 
https://issues.apache.org/jira/browse/HUDI-911?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

wangxianghu updated HUDI-911:
-
Summary: Introduce HoodieWriteOutput  (was: Introduce unified output format 
of Hudi)

> Introduce HoodieWriteOutput
> ---
>
> Key: HUDI-911
> URL: https://issues.apache.org/jira/browse/HUDI-911
> Project: Apache Hudi (incubating)
>  Issue Type: Sub-task
>Reporter: wangxianghu
>Assignee: wangxianghu
>Priority: Major
>
> Introduce HoodieWriteOutput as the unified output format of Hudi



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Assigned] (HUDI-911) Introduce unified output format of Hudi

2020-05-18 Thread wangxianghu (Jira)


 [ 
https://issues.apache.org/jira/browse/HUDI-911?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

wangxianghu reassigned HUDI-911:


Assignee: wangxianghu

> Introduce unified output format of Hudi
> ---
>
> Key: HUDI-911
> URL: https://issues.apache.org/jira/browse/HUDI-911
> Project: Apache Hudi (incubating)
>  Issue Type: Sub-task
>Reporter: wangxianghu
>Assignee: wangxianghu
>Priority: Major
>
> Introduce HoodieWriteOutput as the unified output format of Hudi



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Created] (HUDI-913) Introduce HoodieEngineContext.java

2020-05-18 Thread wangxianghu (Jira)
wangxianghu created HUDI-913:


 Summary: Introduce HoodieEngineContext.java
 Key: HUDI-913
 URL: https://issues.apache.org/jira/browse/HUDI-913
 Project: Apache Hudi (incubating)
  Issue Type: Sub-task
Reporter: wangxianghu


Introduce HoodieEngineContext  as multi-engine execution environment



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Created] (HUDI-912) Introduce unified Hudi key format

2020-05-18 Thread wangxianghu (Jira)
wangxianghu created HUDI-912:


 Summary: Introduce unified Hudi key format
 Key: HUDI-912
 URL: https://issues.apache.org/jira/browse/HUDI-912
 Project: Apache Hudi (incubating)
  Issue Type: Sub-task
Reporter: wangxianghu


Introduce HoodieWriteKey as Hudi key's unified format



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Created] (HUDI-911) Introduce unified output format of Hudi

2020-05-18 Thread wangxianghu (Jira)
wangxianghu created HUDI-911:


 Summary: Introduce unified output format of Hudi
 Key: HUDI-911
 URL: https://issues.apache.org/jira/browse/HUDI-911
 Project: Apache Hudi (incubating)
  Issue Type: Sub-task
Reporter: wangxianghu


Introduce HoodieWriteOutput as the unified output format of Hudi



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Created] (HUDI-910) Introduce unified input format of Hudi

2020-05-18 Thread wangxianghu (Jira)
wangxianghu created HUDI-910:


 Summary: Introduce unified input format of Hudi
 Key: HUDI-910
 URL: https://issues.apache.org/jira/browse/HUDI-910
 Project: Apache Hudi (incubating)
  Issue Type: Sub-task
Reporter: wangxianghu


Introduce HoodieWriteInput as the unified input format of Hudi



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Assigned] (HUDI-4) Support for writing to EMRFS

2020-05-18 Thread vinoyang (Jira)


 [ 
https://issues.apache.org/jira/browse/HUDI-4?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

vinoyang reassigned HUDI-4:
---

Assignee: liujinhui  (was: vinoyang)

> Support for writing to EMRFS
> 
>
> Key: HUDI-4
> URL: https://issues.apache.org/jira/browse/HUDI-4
> Project: Apache Hudi (incubating)
>  Issue Type: New Feature
>  Components: newbie, Usability, Writer Core
>Reporter: Vinoth Chandar
>Assignee: liujinhui
>Priority: Major
>  Labels: bug-bash-0.6.0
>
> https://github.com/uber/hudi/issues/588



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Created] (HUDI-909) Introduce high level abstract of hudi write client

2020-05-18 Thread wangxianghu (Jira)
wangxianghu created HUDI-909:


 Summary: Introduce high level abstract of hudi write client
 Key: HUDI-909
 URL: https://issues.apache.org/jira/browse/HUDI-909
 Project: Apache Hudi (incubating)
  Issue Type: Wish
Reporter: wangxianghu


To make hudi support more engines, we should redesigin the high level abstract 
of hudi.

Such as HoodieTable, HoodieClient(including HoodieWriteClient,HoodieReadClient 
etc.), HoodieIndex, ActionExecutor etc.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[GitHub] [incubator-hudi] xushiyan commented on a change in pull request #1572: [HUDI-836] Implement datadog metrics reporter

2020-05-18 Thread GitBox


xushiyan commented on a change in pull request #1572:
URL: https://github.com/apache/incubator-hudi/pull/1572#discussion_r426634201



##
File path: 
hudi-client/src/main/java/org/apache/hudi/metrics/datadog/DatadogReporter.java
##
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.metrics.datadog;
+
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.ValidationUtils;
+
+import com.codahale.metrics.Clock;
+import com.codahale.metrics.Counter;
+import com.codahale.metrics.Gauge;
+import com.codahale.metrics.Histogram;
+import com.codahale.metrics.Meter;
+import com.codahale.metrics.MetricFilter;
+import com.codahale.metrics.MetricRegistry;
+import com.codahale.metrics.ScheduledReporter;
+import com.codahale.metrics.Timer;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ArrayNode;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import com.fasterxml.jackson.databind.node.TextNode;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Objects;
+import java.util.SortedMap;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+/**
+ * A reporter which publishes metric values to Datadog API.
+ * 
+ * Responsible for collecting and composing metrics payload.
+ * 
+ * Internally use {@link DatadogHttpClient} to interact with Datadog APIs.
+ */
+public class DatadogReporter extends ScheduledReporter {
+
+  private static final Logger LOG = 
LogManager.getLogger(DatadogReporter.class);
+
+  private final DatadogHttpClient client;
+  private final String prefix;
+  private final Option host;
+  private final Option> tags;
+  private final Clock clock;
+
+  protected DatadogReporter(
+  MetricRegistry registry,
+  DatadogHttpClient client,
+  String prefix,
+  Option host,
+  Option> tags,
+  MetricFilter filter,
+  TimeUnit rateUnit,
+  TimeUnit durationUnit) {
+super(registry, "hudi-datadog-reporter", filter, rateUnit, durationUnit);
+this.client = client;
+this.prefix = prefix;
+this.host = host;
+this.tags = tags;
+this.clock = Clock.defaultClock();
+  }
+
+  @Override
+  public void report(
+  SortedMap gauges,
+  SortedMap counters,
+  SortedMap histograms,
+  SortedMap meters,
+  SortedMap timers) {
+final long now = clock.getTime() / 1000;
+final PayloadBuilder builder = new PayloadBuilder();
+
+builder.withType("gauge");
+gauges.forEach((metricName, metric) -> {
+  builder.addGauge(prefix(metricName), now, (long) metric.getValue());
+});
+
+host.ifPresent(builder::withHost);
+tags.ifPresent(builder::withTags);
+
+client.send(builder.build());
+  }
+
+  protected String prefix(String... components) {
+return MetricRegistry.name(prefix, components);
+  }
+
+  @Override
+  public void stop() {
+try {
+  super.stop();
+} finally {
+  try {
+client.close();
+  } catch (IOException e) {
+LOG.warn("Error disconnecting from Datadog.", e);
+  }
+}
+  }
+
+  /**
+   * Build payload that contains metrics data.
+   * 
+   * Refer to Datadog API reference 
https://docs.datadoghq.com/api/?lang=bash#post-timeseries-points
+   */
+  static class PayloadBuilder {
+
+private static final ObjectMapper MAPPER = new ObjectMapper();
+
+private final ObjectNode payload;
+private final ArrayNode series;
+private String type;
+
+PayloadBuilder() {
+  payload = MAPPER.createObjectNode();
+  series = payload.putArray("series");
+}
+
+PayloadBuilder withType(String type) {
+  this.type = type;
+  return this;
+}
+
+PayloadBuilder addGauge(String metric, long timestamp, long gaugeValue) {
+  ValidationUtils.checkState(Objects.equals(type, "gauge"));

Review comment:
   @yanghua fixed.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific 

[GitHub] [incubator-hudi] hddong commented on pull request #1558: [HUDI-796]: added deduping logic for upserts case

2020-05-18 Thread GitBox


hddong commented on pull request #1558:
URL: https://github.com/apache/incubator-hudi/pull/1558#issuecomment-630131586


   @prashantwason : Had send you message in slack, can I hava a look of your 
log?



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [incubator-hudi] nsivabalan commented on pull request #1602: [HUDI-494] fix incorrect record size estimation

2020-05-18 Thread GitBox


nsivabalan commented on pull request #1602:
URL: https://github.com/apache/incubator-hudi/pull/1602#issuecomment-630126880


   I have started going thru the code base, but a naive question in the mean 
time. If we could log bytesOccupied by actual records separately and then bytes 
including bloom separately, it might solve the issue and it would be simpler 
isn't? 
   
   Also, can one of you point me to the code where this size is 
estimated(specifically where the bloom is getting added in the size). I am 
going through HoodieWriteStat and HoodieAppendHandle for how 
estimatedNumberOfBytesWritten is calculated and I don't see the bloom filter is 
getting added. am I looking at the wrong place?
   




This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [incubator-hudi] vinothchandar merged pull request #1596: [HUDI-863] get decimal properties from derived spark DataType

2020-05-18 Thread GitBox


vinothchandar merged pull request #1596:
URL: https://github.com/apache/incubator-hudi/pull/1596


   



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[incubator-hudi] branch master updated: [HUDI-863] get decimal properties from derived spark DataType (#1596)

2020-05-18 Thread vinoth
This is an automated email from the ASF dual-hosted git repository.

vinoth pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-hudi.git


The following commit(s) were added to refs/heads/master by this push:
 new 459356e  [HUDI-863] get decimal properties from derived spark DataType 
(#1596)
459356e is described below

commit 459356e292ea869ffe5f39235646dc474da76ea5
Author: rolandjohann 
AuthorDate: Mon May 18 13:28:27 2020 +0200

[HUDI-863] get decimal properties from derived spark DataType (#1596)
---
 .../org/apache/hudi/AvroConversionHelper.scala | 22 ++
 .../org/apache/hudi/AvroConversionUtils.scala  |  4 +---
 2 files changed, 11 insertions(+), 15 deletions(-)

diff --git 
a/hudi-spark/src/main/scala/org/apache/hudi/AvroConversionHelper.scala 
b/hudi-spark/src/main/scala/org/apache/hudi/AvroConversionHelper.scala
index 43225bc..69e6376 100644
--- a/hudi-spark/src/main/scala/org/apache/hudi/AvroConversionHelper.scala
+++ b/hudi-spark/src/main/scala/org/apache/hudi/AvroConversionHelper.scala
@@ -268,8 +268,7 @@ object AvroConversionHelper {
 createConverter(sourceAvroSchema, targetSqlType, List.empty[String])
   }
 
-  def createConverterToAvro(avroSchema: Schema,
-dataType: DataType,
+  def createConverterToAvro(dataType: DataType,
 structName: String,
 recordNamespace: String): Any => Any = {
 dataType match {
@@ -284,13 +283,15 @@ object AvroConversionHelper {
 if (item == null) null else item.asInstanceOf[Byte].intValue
   case ShortType => (item: Any) =>
 if (item == null) null else item.asInstanceOf[Short].intValue
-  case dec: DecimalType => (item: Any) =>
-Option(item).map { _ =>
-  val bigDecimalValue = item.asInstanceOf[java.math.BigDecimal]
-  val decimalConversions = new DecimalConversion()
-  decimalConversions.toFixed(bigDecimalValue, 
avroSchema.getField(structName).schema().getTypes.get(0),
-LogicalTypes.decimal(dec.precision, dec.scale))
-}.orNull
+  case dec: DecimalType =>
+val schema = SchemaConverters.toAvroType(dec, nullable = false, 
structName, recordNamespace)
+(item: Any) => {
+  Option(item).map { _ =>
+val bigDecimalValue = item.asInstanceOf[java.math.BigDecimal]
+val decimalConversions = new DecimalConversion()
+decimalConversions.toFixed(bigDecimalValue, schema, 
LogicalTypes.decimal(dec.precision, dec.scale))
+  }.orNull
+}
   case TimestampType => (item: Any) =>
 // Convert time to microseconds since spark-avro by default converts 
TimestampType to
 // Avro Logical TimestampMicros
@@ -299,7 +300,6 @@ object AvroConversionHelper {
 
Option(item).map(_.asInstanceOf[Date].toLocalDate.toEpochDay.toInt).orNull
   case ArrayType(elementType, _) =>
 val elementConverter = createConverterToAvro(
-  avroSchema,
   elementType,
   structName,
   recordNamespace)
@@ -320,7 +320,6 @@ object AvroConversionHelper {
 }
   case MapType(StringType, valueType, _) =>
 val valueConverter = createConverterToAvro(
-  avroSchema,
   valueType,
   structName,
   recordNamespace)
@@ -340,7 +339,6 @@ object AvroConversionHelper {
 val childNameSpace = if (recordNamespace != "") 
s"$recordNamespace.$structName" else structName
 val fieldConverters = structType.fields.map(field =>
   createConverterToAvro(
-avroSchema,
 field.dataType,
 field.name,
 childNameSpace))
diff --git 
a/hudi-spark/src/main/scala/org/apache/hudi/AvroConversionUtils.scala 
b/hudi-spark/src/main/scala/org/apache/hudi/AvroConversionUtils.scala
index 04de1c7..bdb8955 100644
--- a/hudi-spark/src/main/scala/org/apache/hudi/AvroConversionUtils.scala
+++ b/hudi-spark/src/main/scala/org/apache/hudi/AvroConversionUtils.scala
@@ -38,14 +38,12 @@ object AvroConversionUtils {
   : RDD[GenericRecord] = {
 // Use the Avro schema to derive the StructType which has the correct 
nullability information
 val dataType = 
SchemaConverters.toSqlType(avroSchema).dataType.asInstanceOf[StructType]
-val avroSchemaAsJsonString = avroSchema.toString
 val encoder = RowEncoder.apply(dataType).resolveAndBind()
 df.queryExecution.toRdd.map(encoder.fromRow)
   .mapPartitions { records =>
 if (records.isEmpty) Iterator.empty
 else {
-  val avroSchema = new Schema.Parser().parse(avroSchemaAsJsonString)
-  val convertor = 
AvroConversionHelper.createConverterToAvro(avroSchema, dataType, structName, 
recordNamespace)
+  val convertor = AvroConversionHelper.createConverterToAvro(dataType, 
structName, recordNamespace)
   records.map { x => 

[GitHub] [incubator-hudi] vinothchandar commented on pull request #1596: [HUDI-863] get decimal properties from derived spark DataType

2020-05-18 Thread GitBox


vinothchandar commented on pull request #1596:
URL: https://github.com/apache/incubator-hudi/pull/1596#issuecomment-630118280


   Spent some time on this. It's note straight forward for a few reasons. the 
datasource tests convert avro to json back to DF.. I managed to add a decimal 
type at the data frame level using `withColumn()` , but getting it nested is a 
separate story.. I filed HUDI-908 to revisit this in 0.6.0 timeline



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[jira] [Commented] (HUDI-908) Consistent test data generation with data type coverage

2020-05-18 Thread Vinoth Chandar (Jira)


[ 
https://issues.apache.org/jira/browse/HUDI-908?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17110150#comment-17110150
 ] 

Vinoth Chandar commented on HUDI-908:
-

Should include support for decimals, all the spark types at the datasource 
level.
Should include support for all the avro types at the deltastreamer level

> Consistent test data generation with data type coverage
> ---
>
> Key: HUDI-908
> URL: https://issues.apache.org/jira/browse/HUDI-908
> Project: Apache Hudi (incubating)
>  Issue Type: Improvement
>  Components: Code Cleanup, Testing
>Reporter: Vinoth Chandar
>Priority: Major
>  Labels: bug-bash-0.6.0
> Fix For: 0.6.0
>
>
> Let's clean up Clean up HoodieTestDataGenerator in the process. 



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Created] (HUDI-908) Consistent test data generation with data type coverage

2020-05-18 Thread Vinoth Chandar (Jira)
Vinoth Chandar created HUDI-908:
---

 Summary: Consistent test data generation with data type coverage
 Key: HUDI-908
 URL: https://issues.apache.org/jira/browse/HUDI-908
 Project: Apache Hudi (incubating)
  Issue Type: Improvement
  Components: Code Cleanup, Testing
Reporter: Vinoth Chandar
 Fix For: 0.6.0


Let's clean up Clean up HoodieTestDataGenerator in the process. 



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[GitHub] [incubator-hudi] vinothchandar commented on a change in pull request #1634: [WIP] [HUDI-846][HUDI-848] Enable Incremental cleaning and embedded timeline-server by default

2020-05-18 Thread GitBox


vinothchandar commented on a change in pull request #1634:
URL: https://github.com/apache/incubator-hudi/pull/1634#discussion_r426503762



##
File path: hudi-utilities/pom.xml
##
@@ -82,6 +82,8 @@
   
   org.eclipse.jetty.aggregate
   jetty-all
+  9.4.15.v20190215

Review comment:
   pull the version to a property? 





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [incubator-hudi] vinothchandar commented on pull request #1639: [MINOR] Fix apache-rat violations

2020-05-18 Thread GitBox


vinothchandar commented on pull request #1639:
URL: https://github.com/apache/incubator-hudi/pull/1639#issuecomment-630072442


   @bvaradar  `hudi-integ-test` also does not list the rat plugin..  Might be 
good to add these to bundles as well, just in case. (our release script will 
catch all this.. however good to do upfront) 
   



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [incubator-hudi] vinothchandar commented on a change in pull request #1633: [HUDI-858] Allow multiple operations to be executed within a single commit

2020-05-18 Thread GitBox


vinothchandar commented on a change in pull request #1633:
URL: https://github.com/apache/incubator-hudi/pull/1633#discussion_r426492576



##
File path: 
hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
##
@@ -99,6 +99,20 @@
   private static final String MAX_CONSISTENCY_CHECKS_PROP = 
"hoodie.consistency.check.max_checks";
   private static int DEFAULT_MAX_CONSISTENCY_CHECKS = 7;
 
+  /**
+   * HUDI-858 : There are users who had been directly using RDD APIs and have 
relied on a behavior in 0.4.x to allow
+   * multiple write operations (upsert/buk-insert/...) to be executed within a 
single commit.
+   *
+   * Given Hudi commit protocol, these are generally unsafe operations and 
user need to handle failure scenarios. It
+   * only works with COW table. Hudi 0.5.x had stopped this behavior.
+   *
+   * Given the importance of supporting such cases for the user's migration to 
0.5.x, we are proposing a safety flag
+   * (disabled by default) which will allow this old behavior.
+   */
+  private static final String ALLOW_UNSAFE_MULTI_OPERATIONS_PER_COMMIT =
+  "hoodie.allow.unsafe.multi_operations_per_commit";

Review comment:
   @n3nash was also thinking about adding one of these.. Let's give these a 
consistent name.prefix.. `_.hoodie.allow.multi.write.on.same.instant` .. 
beginning with underscore can mean (spark also has similar undoced params.. ) 
its undoc-ed/hidden and to be used with full understanding.. 

##
File path: 
hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
##
@@ -99,6 +99,20 @@
   private static final String MAX_CONSISTENCY_CHECKS_PROP = 
"hoodie.consistency.check.max_checks";
   private static int DEFAULT_MAX_CONSISTENCY_CHECKS = 7;
 
+  /**
+   * HUDI-858 : There are users who had been directly using RDD APIs and have 
relied on a behavior in 0.4.x to allow
+   * multiple write operations (upsert/buk-insert/...) to be executed within a 
single commit.
+   *
+   * Given Hudi commit protocol, these are generally unsafe operations and 
user need to handle failure scenarios. It
+   * only works with COW table. Hudi 0.5.x had stopped this behavior.
+   *
+   * Given the importance of supporting such cases for the user's migration to 
0.5.x, we are proposing a safety flag
+   * (disabled by default) which will allow this old behavior.
+   */
+  private static final String ALLOW_UNSAFE_MULTI_OPERATIONS_PER_COMMIT =
+  "hoodie.allow.unsafe.multi_operations_per_commit";

Review comment:
   lets not mix underscore and dot notation? :) 

##
File path: 
hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
##
@@ -723,6 +741,11 @@ public Builder withEmbeddedTimelineServerEnabled(boolean 
enabled) {
   return this;
 }
 
+public Builder withAllowUnsafeMultiOperationsPerCommit(boolean allow) {
+  props.setProperty(ALLOW_UNSAFE_MULTI_OPERATIONS_PER_COMMIT, 
String.valueOf(allow));

Review comment:
   rename based on property name suggested above? 





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [incubator-hudi] yanghua commented on a change in pull request #1572: [HUDI-836] Implement datadog metrics reporter

2020-05-18 Thread GitBox


yanghua commented on a change in pull request #1572:
URL: https://github.com/apache/incubator-hudi/pull/1572#discussion_r426435865



##
File path: 
hudi-client/src/main/java/org/apache/hudi/metrics/datadog/DatadogReporter.java
##
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.metrics.datadog;
+
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.ValidationUtils;
+
+import com.codahale.metrics.Clock;
+import com.codahale.metrics.Counter;
+import com.codahale.metrics.Gauge;
+import com.codahale.metrics.Histogram;
+import com.codahale.metrics.Meter;
+import com.codahale.metrics.MetricFilter;
+import com.codahale.metrics.MetricRegistry;
+import com.codahale.metrics.ScheduledReporter;
+import com.codahale.metrics.Timer;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ArrayNode;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import com.fasterxml.jackson.databind.node.TextNode;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Objects;
+import java.util.SortedMap;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+/**
+ * A reporter which publishes metric values to Datadog API.
+ * 
+ * Responsible for collecting and composing metrics payload.
+ * 
+ * Internally use {@link DatadogHttpClient} to interact with Datadog APIs.
+ */
+public class DatadogReporter extends ScheduledReporter {
+
+  private static final Logger LOG = 
LogManager.getLogger(DatadogReporter.class);
+
+  private final DatadogHttpClient client;
+  private final String prefix;
+  private final Option host;
+  private final Option> tags;
+  private final Clock clock;
+
+  protected DatadogReporter(
+  MetricRegistry registry,
+  DatadogHttpClient client,
+  String prefix,
+  Option host,
+  Option> tags,
+  MetricFilter filter,
+  TimeUnit rateUnit,
+  TimeUnit durationUnit) {
+super(registry, "hudi-datadog-reporter", filter, rateUnit, durationUnit);
+this.client = client;
+this.prefix = prefix;
+this.host = host;
+this.tags = tags;
+this.clock = Clock.defaultClock();
+  }
+
+  @Override
+  public void report(
+  SortedMap gauges,
+  SortedMap counters,
+  SortedMap histograms,
+  SortedMap meters,
+  SortedMap timers) {
+final long now = clock.getTime() / 1000;
+final PayloadBuilder builder = new PayloadBuilder();
+
+builder.withType("gauge");
+gauges.forEach((metricName, metric) -> {
+  builder.addGauge(prefix(metricName), now, (long) metric.getValue());
+});
+
+host.ifPresent(builder::withHost);
+tags.ifPresent(builder::withTags);
+
+client.send(builder.build());
+  }
+
+  protected String prefix(String... components) {
+return MetricRegistry.name(prefix, components);
+  }
+
+  @Override
+  public void stop() {
+try {
+  super.stop();
+} finally {
+  try {
+client.close();
+  } catch (IOException e) {
+LOG.warn("Error disconnecting from Datadog.", e);
+  }
+}
+  }
+
+  /**
+   * Build payload that contains metrics data.
+   * 
+   * Refer to Datadog API reference 
https://docs.datadoghq.com/api/?lang=bash#post-timeseries-points
+   */
+  static class PayloadBuilder {
+
+private static final ObjectMapper MAPPER = new ObjectMapper();
+
+private final ObjectNode payload;
+private final ArrayNode series;
+private String type;
+
+PayloadBuilder() {
+  payload = MAPPER.createObjectNode();
+  series = payload.putArray("series");
+}
+
+PayloadBuilder withType(String type) {
+  this.type = type;
+  return this;
+}
+
+PayloadBuilder addGauge(String metric, long timestamp, long gaugeValue) {
+  ValidationUtils.checkState(Objects.equals(type, "gauge"));

Review comment:
   It would be better to extract these hard codes to be constants





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub 

[GitHub] [incubator-hudi] zhedoubushishi edited a comment on pull request #1638: HUDI-515 Resolve API conflict for Hive 2 & Hive 3

2020-05-18 Thread GitBox


zhedoubushishi edited a comment on pull request #1638:
URL: https://github.com/apache/incubator-hudi/pull/1638#issuecomment-629990534


   > @zhedoubushishi what does it take to support Hive 3.x for Hudi with a mvn 
flag ? If we cannot support hive 3.x, what is the intention of this PR ? I'm 
not very inclined to use reflection with try-catch here since it's not a clear 
indication of Hive 3.x support.
   
   Hi @n3nash, we internally have a Hudi branch to support Hive 3.x & Hadoop 
3.x, basically what we do is first fix API level conflict and then fix 
dependency level conflict. This PR is like a prerequisite for the support of 
Hive 3.x. 
   My idea is instead of creating a big pull request to include all the changes 
to support Hadoop 3 & Hive 3 in one PR, I am trying to split my code changes 
into several small commits. Like this PR only resolve [HUDI-515] issue.
   What's your idea?
   



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [incubator-hudi] zhedoubushishi commented on pull request #1638: HUDI-515 Resolve API conflict for Hive 2 & Hive 3

2020-05-18 Thread GitBox


zhedoubushishi commented on pull request #1638:
URL: https://github.com/apache/incubator-hudi/pull/1638#issuecomment-629990534


   > @zhedoubushishi what does it take to support Hive 3.x for Hudi with a mvn 
flag ? If we cannot support hive 3.x, what is the intention of this PR ? I'm 
not very inclined to use reflection with try-catch here since it's not a clear 
indication of Hive 3.x support.
   
   
   
   > @zhedoubushishi what does it take to support Hive 3.x for Hudi with a mvn 
flag ? If we cannot support hive 3.x, what is the intention of this PR ? I'm 
not very inclined to use reflection with try-catch here since it's not a clear 
indication of Hive 3.x support.
   Hi @n3nash, we internally have a Hudi branch to support Hive 3.x & Hadoop 
3.x, basically what we do is first fix API level conflict and then fix 
dependency level conflict. This PR is like a prerequisite for the support of 
Hive 3.x. 
   My idea is instead of creating a big pull request to include all the changes 
to support Hadoop 3 & Hive 3 in one PR, I am trying to split my code changes 
into several small commits. Like this PR only resolve [HUDI-515] issue.
   What's your idea?
   



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [incubator-hudi] reste85 closed issue #1598: [SUPPORT] Slow upsert time reading from Kafka

2020-05-18 Thread GitBox


reste85 closed issue #1598:
URL: https://github.com/apache/incubator-hudi/issues/1598


   



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [incubator-hudi] reste85 edited a comment on issue #1598: [SUPPORT] Slow upsert time reading from Kafka

2020-05-18 Thread GitBox


reste85 edited a comment on issue #1598:
URL: https://github.com/apache/incubator-hudi/issues/1598#issuecomment-629988384


   Hi guys,
   As stated in chat, this is not related to Hudi in general. At first sight we 
thought that the problem was due this: 
https://issues.apache.org/jira/browse/KAFKA-4753. Indeed, the problem could be 
addressed by the fact that we're using transactional producers, and offsets in 
Kafka with transactional producers have different meaning and some of them can 
be used to handle transactions. This offset usage is probably the thing that 
makes our consumers hanging up in the "last run" due to the fact that the 
ending offset is not reachable. So this problem probably relates to this. Have 
a look at:
   
https://stackoverflow.com/questions/59763422/in-my-kafka-topic-end-of-the-offset-is-higher-than-last-messagess-offset-numbe
   https://issues.apache.org/jira/browse/KAFKA-8358
   
https://stackoverflow.com/questions/56182606/in-kafka-when-producing-message-with-transactional-consumer-offset-doubled-up
   
   I'm closing the issue, thank you for your support!
   



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [incubator-hudi] reste85 commented on issue #1598: [SUPPORT] Slow upsert time reading from Kafka

2020-05-18 Thread GitBox


reste85 commented on issue #1598:
URL: https://github.com/apache/incubator-hudi/issues/1598#issuecomment-629988384


   Hi guys,
   As stated in chat, this is not related to Hudi in general. At first sight we 
thought that the problem was due this: 
https://issues.apache.org/jira/browse/KAFKA-4753. Indeed, the problem could be 
addressed by the fact that we're using transactional producers, and offsets in 
Kafka with transactional producers have different meaning and some of them can 
be used to handle transactions. This offset usage is probably the thing that 
makes our consumers hanging up in the "last run" due to the fact that the 
ending offset is not reachable. So this problem probably relates to this. Have 
a look at:
   
https://stackoverflow.com/questions/59763422/in-my-kafka-topic-end-of-the-offset-is-higher-than-last-messagess-offset-numbe
   https://issues.apache.org/jira/browse/KAFKA-8358
   
https://stackoverflow.com/questions/56182606/in-kafka-when-producing-message-with-transactional-consumer-offset-doubled-up
   
   You can close the issue, thank you for your support!
   



This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [incubator-hudi] codecov-io edited a comment on pull request #1633: [HUDI-858] Allow multiple operations to be executed within a single commit

2020-05-18 Thread GitBox


codecov-io edited a comment on pull request #1633:
URL: https://github.com/apache/incubator-hudi/pull/1633#issuecomment-629848790


   # 
[Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1633?src=pr=h1) 
Report
   > Merging 
[#1633](https://codecov.io/gh/apache/incubator-hudi/pull/1633?src=pr=desc) 
into 
[master](https://codecov.io/gh/apache/incubator-hudi/commit/25e0b75b3d03b6d460dc18d1a5fce7b881b0e019=desc)
 will **decrease** coverage by `54.59%`.
   > The diff coverage is `38.88%`.
   
   [![Impacted file tree 
graph](https://codecov.io/gh/apache/incubator-hudi/pull/1633/graphs/tree.svg?width=650=150=pr=VTTXabwbs2)](https://codecov.io/gh/apache/incubator-hudi/pull/1633?src=pr=tree)
   
   ```diff
   @@  Coverage Diff  @@
   ## master#1633   +/-   ##
   =
   - Coverage 71.81%   17.22%   -54.60% 
   + Complexity 1092  827  -265 
   =
 Files   386  344   -42 
 Lines 1660815481 -1127 
 Branches   1667 1582   -85 
   =
   - Hits  11927 2666 -9261 
   - Misses 395512465 +8510 
   + Partials726  350  -376 
   ```
   
   
   | [Impacted 
Files](https://codecov.io/gh/apache/incubator-hudi/pull/1633?src=pr=tree) | 
Coverage Δ | Complexity Δ | |
   |---|---|---|---|
   | 
[...che/hudi/table/action/commit/BulkInsertHelper.java](https://codecov.io/gh/apache/incubator-hudi/pull/1633/diff?src=pr=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9CdWxrSW5zZXJ0SGVscGVyLmphdmE=)
 | `0.00% <0.00%> (-85.00%)` | `0.00 <0.00> (ø)` | |
   | 
[...java/org/apache/hudi/config/HoodieWriteConfig.java](https://codecov.io/gh/apache/incubator-hudi/pull/1633/diff?src=pr=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvY29uZmlnL0hvb2RpZVdyaXRlQ29uZmlnLmphdmE=)
 | `49.15% <20.00%> (-35.70%)` | `53.00 <1.00> (+6.00)` | :arrow_down: |
   | 
[...di/common/table/timeline/HoodieActiveTimeline.java](https://codecov.io/gh/apache/incubator-hudi/pull/1633/diff?src=pr=tree#diff-aHVkaS1jb21tb24vc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvY29tbW9uL3RhYmxlL3RpbWVsaW5lL0hvb2RpZUFjdGl2ZVRpbWVsaW5lLmphdmE=)
 | `28.49% <44.44%> (-54.40%)` | `17.00 <1.00> (+1.00)` | :arrow_down: |
   | 
[.../table/action/commit/BaseCommitActionExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1633/diff?src=pr=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9CYXNlQ29tbWl0QWN0aW9uRXhlY3V0b3IuamF2YQ==)
 | `46.01% <100.00%> (-38.81%)` | `14.00 <0.00> (ø)` | |
   | 
[...n/java/org/apache/hudi/io/AppendHandleFactory.java](https://codecov.io/gh/apache/incubator-hudi/pull/1633/diff?src=pr=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvaW8vQXBwZW5kSGFuZGxlRmFjdG9yeS5qYXZh)
 | `0.00% <0.00%> (-100.00%)` | `0.00% <0.00%> (ø%)` | |
   | 
[.../java/org/apache/hudi/client/HoodieReadClient.java](https://codecov.io/gh/apache/incubator-hudi/pull/1633/diff?src=pr=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvY2xpZW50L0hvb2RpZVJlYWRDbGllbnQuamF2YQ==)
 | `0.00% <0.00%> (-100.00%)` | `0.00% <0.00%> (ø%)` | |
   | 
[.../java/org/apache/hudi/metrics/MetricsReporter.java](https://codecov.io/gh/apache/incubator-hudi/pull/1633/diff?src=pr=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvbWV0cmljcy9NZXRyaWNzUmVwb3J0ZXIuamF2YQ==)
 | `0.00% <0.00%> (-100.00%)` | `0.00% <0.00%> (ø%)` | |
   | 
[.../java/org/apache/hudi/common/model/ActionType.java](https://codecov.io/gh/apache/incubator-hudi/pull/1633/diff?src=pr=tree#diff-aHVkaS1jb21tb24vc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvY29tbW9uL21vZGVsL0FjdGlvblR5cGUuamF2YQ==)
 | `0.00% <0.00%> (-100.00%)` | `0.00% <0.00%> (ø%)` | |
   | 
[...java/org/apache/hudi/io/HoodieRangeInfoHandle.java](https://codecov.io/gh/apache/incubator-hudi/pull/1633/diff?src=pr=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvaW8vSG9vZGllUmFuZ2VJbmZvSGFuZGxlLmphdmE=)
 | `0.00% <0.00%> (-100.00%)` | `0.00% <0.00%> (ø%)` | |
   | 
[.../java/org/apache/hudi/hadoop/InputPathHandler.java](https://codecov.io/gh/apache/incubator-hudi/pull/1633/diff?src=pr=tree#diff-aHVkaS1oYWRvb3AtbXIvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvaGFkb29wL0lucHV0UGF0aEhhbmRsZXIuamF2YQ==)
 | `0.00% <0.00%> (-100.00%)` | `0.00% <0.00%> (ø%)` | |
   | ... and [314 
more](https://codecov.io/gh/apache/incubator-hudi/pull/1633/diff?src=pr=tree-more)
 | |
   
   --
   
   [Continue to review full report at 
Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1633?src=pr=continue).
   > **Legend** - [Click here to learn 
more](https://docs.codecov.io/docs/codecov-delta)
   > `Δ = absolute  (impact)`, `ø = not affected`, `? = missing data`
   > Powered by 

[jira] [Updated] (HUDI-907) Test Presto mor query support changes in HDFS Env

2020-05-18 Thread Bhavani Sudha (Jira)


 [ 
https://issues.apache.org/jira/browse/HUDI-907?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Bhavani Sudha updated HUDI-907:
---
Status: Open  (was: New)

> Test Presto mor query support changes in HDFS Env
> -
>
> Key: HUDI-907
> URL: https://issues.apache.org/jira/browse/HUDI-907
> Project: Apache Hudi (incubating)
>  Issue Type: Sub-task
>  Components: Presto Integration
>Reporter: Bhavani Sudha
>Assignee: Bhavani Sudha
>Priority: Major
> Fix For: 0.5.3
>
>
> Test presto integration for HDFS environment as well in addition to S3.
>  
> Blockers faced so far
> [~bdscheller] I tried to apply your presto patch to test mor queries on 
> Presto. The way I set it up was create a docker image from your presto patch 
> and use that image in hudi local docker environment. I observed couple of 
> issues there:
>  * I got NoClassDefFoundError for these classes:
>  ** org/apache/parquet/avro/AvroSchemaConverter
>  ** org/apache/parquet/hadoop/ParquetFileReader
>  ** org/apache/parquet/io/InputFile
>  ** org/apache/parquet/format/TypeDefinedOrder
> I was able to get around the first three errors by shading org.apache.parquet 
> inside hudi-presto-bundle and changing presto-hive to depend on the 
> hudi-presto-bundle. However, for the last one shading dint help because its 
> already a Thrift generated class. I am wondering you  also ran into similar 
> issues while testing S3.  
> Could you please elaborate your test set up so we can do similar thing for 
> HDFS as well. If we need to add more changes to hudi-presto-bundle, we would 
> need to prioritize that for 0.5.3 release asap.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Created] (HUDI-907) Test Presto mor query support changes in HDFS Env

2020-05-18 Thread Bhavani Sudha (Jira)
Bhavani Sudha created HUDI-907:
--

 Summary: Test Presto mor query support changes in HDFS Env
 Key: HUDI-907
 URL: https://issues.apache.org/jira/browse/HUDI-907
 Project: Apache Hudi (incubating)
  Issue Type: Sub-task
  Components: Presto Integration
Reporter: Bhavani Sudha
Assignee: Bhavani Sudha
 Fix For: 0.5.3


Test presto integration for HDFS environment as well in addition to S3.

 

Blockers faced so far

[~bdscheller] I tried to apply your presto patch to test mor queries on Presto. 
The way I set it up was create a docker image from your presto patch and use 
that image in hudi local docker environment. I observed couple of issues there:
 * I got NoClassDefFoundError for these classes:
 ** org/apache/parquet/avro/AvroSchemaConverter
 ** org/apache/parquet/hadoop/ParquetFileReader
 ** org/apache/parquet/io/InputFile
 ** org/apache/parquet/format/TypeDefinedOrder

I was able to get around the first three errors by shading org.apache.parquet 
inside hudi-presto-bundle and changing presto-hive to depend on the 
hudi-presto-bundle. However, for the last one shading dint help because its 
already a Thrift generated class. I am wondering you  also ran into similar 
issues while testing S3.  

Could you please elaborate your test set up so we can do similar thing for HDFS 
as well. If we need to add more changes to hudi-presto-bundle, we would need to 
prioritize that for 0.5.3 release asap.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HUDI-305) Presto MOR "_rt" queries only reads base parquet file

2020-05-18 Thread Bhavani Sudha (Jira)


 [ 
https://issues.apache.org/jira/browse/HUDI-305?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Bhavani Sudha updated HUDI-305:
---
Fix Version/s: 0.5.3
   0.6.0

> Presto MOR "_rt" queries only reads base parquet file 
> --
>
> Key: HUDI-305
> URL: https://issues.apache.org/jira/browse/HUDI-305
> Project: Apache Hudi (incubating)
>  Issue Type: Bug
>  Components: Presto Integration
> Environment: On AWS EMR
>Reporter: Brandon Scheller
>Assignee: Bhavani Sudha
>Priority: Major
>  Labels: pull-request-available
> Fix For: 0.6.0, 0.5.3
>
>
> Code example to reproduce.
> {code:java}
> import org.apache.hudi.DataSourceWriteOptions
> import org.apache.hudi.config.HoodieWriteConfig
> import org.apache.spark.sql.SaveMode
> val df = Seq(
>   ("100", "event_name_900", "2015-01-01T13:51:39.340396Z", "type1"),
>   ("101", "event_name_546", "2015-01-01T12:14:58.597216Z", "type2"),
>   ("104", "event_name_123", "2015-01-01T12:15:00.512679Z", "type1"),
>   ("105", "event_name_678", "2015-01-01T13:51:42.248818Z", "type2")
>   ).toDF("event_id", "event_name", "event_ts", "event_type")
> var tableName = "hudi_events_mor_1"
> var tablePath = "s3://emr-users/wenningd/hudi/tables/events/" + tableName
> // write hudi dataset
> df.write.format("org.apache.hudi")
>   .option(HoodieWriteConfig.TABLE_NAME, tableName)
>   .option(DataSourceWriteOptions.OPERATION_OPT_KEY, 
> DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
>   .option(DataSourceWriteOptions.STORAGE_TYPE_OPT_KEY, 
> DataSourceWriteOptions.MOR_STORAGE_TYPE_OPT_VAL)
>   .option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, "event_id")
>   .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, "event_type") 
>   .option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY, "event_ts")
>   .option(DataSourceWriteOptions.HIVE_SYNC_ENABLED_OPT_KEY, "true")
>   .option(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY, tableName)
>   .option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY, "event_type")
>   .option(DataSourceWriteOptions.HIVE_ASSUME_DATE_PARTITION_OPT_KEY, "false")
>   .option(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY, 
> "org.apache.hudi.hive.MultiPartKeysValueExtractor")
>   .mode(SaveMode.Overwrite)
>   .save(tablePath)
> // update a record with event_name "event_name_123" => "event_name_changed"
> val df1 = spark.read.format("org.apache.hudi").load(tablePath + "/*/*")
> val df2 = df1.filter($"event_id" === "104")
> val df3 = df2.withColumn("event_name", lit("event_name_changed"))
> // update hudi dataset
> df3.write.format("org.apache.hudi")
>.option(HoodieWriteConfig.TABLE_NAME, tableName)
>.option(DataSourceWriteOptions.OPERATION_OPT_KEY, 
> DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL)
>.option(DataSourceWriteOptions.STORAGE_TYPE_OPT_KEY, 
> DataSourceWriteOptions.MOR_STORAGE_TYPE_OPT_VAL)
>.option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, "event_id")
>.option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, "event_type") 
>.option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY, "event_ts")
>.option("hoodie.compact.inline", "false")
>.option(DataSourceWriteOptions.HIVE_SYNC_ENABLED_OPT_KEY, "true")
>.option(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY, tableName)
>.option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY, "event_type")
>.option(DataSourceWriteOptions.HIVE_ASSUME_DATE_PARTITION_OPT_KEY, "false")
>.option(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY, 
> "org.apache.hudi.hive.MultiPartKeysValueExtractor")
>.mode(SaveMode.Append)
>.save(tablePath)
> {code}
> Now when querying the real-time table from Hive, we have no issue seeing the 
> updated value:
> {code:java}
> hive> select event_name from hudi_events_mor_1_rt;
> OK
> event_name_900
> event_name_changed
> event_name_546
> event_name_678
> Time taken: 0.103 seconds, Fetched: 4 row(s)
> {code}
> But when querying the real-time table from Presto, we only read the base 
> parquet file and do not see the update that should be merged in from the log 
> file.
> {code:java}
> presto:default> select event_name from hudi_events_mor_1_rt;
>event_name
> 
>  event_name_900
>  event_name_123
>  event_name_546
>  event_name_678
> (4 rows)
> {code}
> Our current understanding of this issue is that while the 
> HoodieParquetRealtimeInputFormat correctly generates the splits. The 
> RealtimeCompactedRecordReader record reader is not used so it is not reading 
> the log file and only reading the base parquet file.
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HUDI-705) Add unit test for RollbacksCommand

2020-05-18 Thread vinoyang (Jira)


 [ 
https://issues.apache.org/jira/browse/HUDI-705?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

vinoyang updated HUDI-705:
--
Status: Open  (was: New)

> Add unit test for RollbacksCommand
> --
>
> Key: HUDI-705
> URL: https://issues.apache.org/jira/browse/HUDI-705
> Project: Apache Hudi (incubating)
>  Issue Type: Sub-task
>  Components: CLI, Testing
>Reporter: hong dongdong
>Assignee: hong dongdong
>Priority: Major
>  Labels: pull-request-available
>




--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[incubator-hudi] branch master updated: [HUDI-705] Add unit test for RollbacksCommand (#1611)

2020-05-18 Thread vinoyang
This is an automated email from the ASF dual-hosted git repository.

vinoyang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-hudi.git


The following commit(s) were added to refs/heads/master by this push:
 new 57132f7  [HUDI-705] Add unit test for RollbacksCommand (#1611)
57132f7 is described below

commit 57132f79bb2dad6cfb215480b435a778714a442d
Author: hongdd 
AuthorDate: Mon May 18 14:04:06 2020 +0800

[HUDI-705] Add unit test for RollbacksCommand (#1611)
---
 .../apache/hudi/cli/HoodieTableHeaderFields.java   |  10 ++
 .../apache/hudi/cli/commands/RollbacksCommand.java |  19 ++-
 .../hudi/cli/commands/TestRollbacksCommand.java| 182 +
 3 files changed, 204 insertions(+), 7 deletions(-)

diff --git 
a/hudi-cli/src/main/java/org/apache/hudi/cli/HoodieTableHeaderFields.java 
b/hudi-cli/src/main/java/org/apache/hudi/cli/HoodieTableHeaderFields.java
index 5e31e5c..4fc41a1 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/HoodieTableHeaderFields.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/HoodieTableHeaderFields.java
@@ -23,6 +23,7 @@ package org.apache.hudi.cli;
  */
 public class HoodieTableHeaderFields {
   public static final String HEADER_PARTITION = "Partition";
+  public static final String HEADER_INSTANT = "Instant";
   public static final String HEADER_PARTITION_PATH = HEADER_PARTITION + " 
Path";
   public static final String HEADER_FILE_ID = "FileId";
   public static final String HEADER_BASE_INSTANT = "Base-Instant";
@@ -81,4 +82,13 @@ public class HoodieTableHeaderFields {
   public static final String HEADER_HOODIE_PROPERTY = "Property";
   public static final String HEADER_OLD_VALUE = "Old Value";
   public static final String HEADER_NEW_VALUE = "New Value";
+
+  /**
+   * Fields of Rollback.
+   */
+  public static final String HEADER_ROLLBACK_INSTANT = "Rolledback " + 
HEADER_INSTANT;
+  public static final String HEADER_TIME_TOKEN_MILLIS = "Time taken in millis";
+  public static final String HEADER_TOTAL_PARTITIONS = "Total Partitions";
+  public static final String HEADER_DELETED_FILE = "Deleted File";
+  public static final String HEADER_SUCCEEDED = "Succeeded";
 }
diff --git 
a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/RollbacksCommand.java 
b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/RollbacksCommand.java
index 70b34bc..4feb4c1 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/RollbacksCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/RollbacksCommand.java
@@ -21,6 +21,7 @@ package org.apache.hudi.cli.commands;
 import org.apache.hudi.avro.model.HoodieRollbackMetadata;
 import org.apache.hudi.cli.HoodieCLI;
 import org.apache.hudi.cli.HoodiePrintHelper;
+import org.apache.hudi.cli.HoodieTableHeaderFields;
 import org.apache.hudi.cli.TableHeader;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
@@ -56,8 +57,7 @@ public class RollbacksCommand implements CommandMarker {
   @CliOption(key = {"sortBy"}, help = "Sorting Field", 
unspecifiedDefaultValue = "") final String sortByField,
   @CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = 
"false") final boolean descending,
   @CliOption(key = {"headeronly"}, help = "Print Header Only",
-  unspecifiedDefaultValue = "false") final boolean headerOnly)
-  throws IOException {
+  unspecifiedDefaultValue = "false") final boolean headerOnly) {
 HoodieActiveTimeline activeTimeline = new 
RollbackTimeline(HoodieCLI.getTableMetaClient());
 HoodieTimeline rollback = 
activeTimeline.getRollbackTimeline().filterCompletedInstants();
 
@@ -79,9 +79,11 @@ public class RollbacksCommand implements CommandMarker {
 e.printStackTrace();
   }
 });
-TableHeader header = new 
TableHeader().addTableHeaderField("Instant").addTableHeaderField("Rolledback 
Instant")
-.addTableHeaderField("Total Files Deleted").addTableHeaderField("Time 
taken in millis")
-.addTableHeaderField("Total Partitions");
+TableHeader header = new 
TableHeader().addTableHeaderField(HoodieTableHeaderFields.HEADER_INSTANT)
+.addTableHeaderField(HoodieTableHeaderFields.HEADER_ROLLBACK_INSTANT)
+
.addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_FILES_DELETED)
+.addTableHeaderField(HoodieTableHeaderFields.HEADER_TIME_TOKEN_MILLIS)
+.addTableHeaderField(HoodieTableHeaderFields.HEADER_TOTAL_PARTITIONS);
 return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, 
descending, limit, headerOnly, rows);
   }
 
@@ -112,8 +114,11 @@ public class RollbacksCommand implements CommandMarker {
   rows.add(row);
 }));
 
-TableHeader header = new 
TableHeader().addTableHeaderField("Instant").addTableHeaderField("Rolledback 
Instants")
-.addTableHeaderField("Partition").addTableHeaderField("Deleted