jsbali commented on a change in pull request #2542:
URL: https://github.com/apache/hudi/pull/2542#discussion_r655672894
##########
File path:
hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestUtil.java
##########
@@ -120,48 +120,45 @@ public static void setUp() throws IOException,
InterruptedException {
clear();
}
- public static void clear() throws IOException {
+ public static void clear() throws Exception {
fileSystem.delete(new Path(hiveSyncConfig.basePath), true);
HoodieTableMetaClient.withPropertyBuilder()
- .setTableType(HoodieTableType.COPY_ON_WRITE)
- .setTableName(hiveSyncConfig.tableName)
- .setPayloadClass(HoodieAvroPayload.class)
- .initTable(configuration, hiveSyncConfig.basePath);
+ .setTableType(HoodieTableType.COPY_ON_WRITE)
Review comment:
This is the only place where I have left it as is because it was
according to the checkstyle spec.
##########
File path:
hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormat.java
##########
@@ -253,4 +253,4 @@ private BootstrapBaseFileSplit
makeExternalFileSplit(PathWithBootstrapFileStatus
throw new HoodieIOException(e.getMessage(), e);
}
}
-}
\ No newline at end of file
+}
Review comment:
Done
##########
File path:
hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java
##########
@@ -98,8 +98,8 @@
private ConsistencyGuardConfig consistencyGuardConfig =
ConsistencyGuardConfig.newBuilder().build();
private HoodieTableMetaClient(Configuration conf, String basePath, boolean
loadActiveTimelineOnLoad,
- ConsistencyGuardConfig consistencyGuardConfig,
Option<TimelineLayoutVersion> layoutVersion,
- String payloadClassName) {
+ ConsistencyGuardConfig
consistencyGuardConfig, Option<TimelineLayoutVersion> layoutVersion,
Review comment:
Done
##########
File path:
hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestGloballyConsistentTimeStampFilteringInputFormat.java
##########
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.hadoop;
+
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.hadoop.testutils.InputFormatTestUtil;
+import org.apache.hudi.hadoop.utils.HoodieHiveUtils;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class TestGloballyConsistentTimeStampFilteringInputFormat
+ extends TestHoodieParquetInputFormat {
+
+ @BeforeEach
+ public void setUp() {
+ super.setUp();
+ //jobConf.set(HoodieHiveUtils.GLOBALLY_CONSISTENT_READ_TIMESTAMP,
String.valueOf(Long.MAX_VALUE));
Review comment:
Done
##########
File path:
hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestGloballyConsistentTimeStampFilteringInputFormat.java
##########
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.hadoop;
+
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.hadoop.testutils.InputFormatTestUtil;
+import org.apache.hudi.hadoop.utils.HoodieHiveUtils;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class TestGloballyConsistentTimeStampFilteringInputFormat
+ extends TestHoodieParquetInputFormat {
+
+ @BeforeEach
+ public void setUp() {
+ super.setUp();
+ //jobConf.set(HoodieHiveUtils.GLOBALLY_CONSISTENT_READ_TIMESTAMP,
String.valueOf(Long.MAX_VALUE));
+ //jobConf.set(HoodieHiveUtils.DISABLE_HOODIE_GLOBALLY_CONSISTENT_READS,
"false");
+ }
+
+ @Test
+ public void testInputFormatLoad() throws IOException {
+ super.testInputFormatLoad();
+
+ // set filtering timestamp to 0 now the timeline wont have any commits.
+ InputFormatTestUtil.setupSnapshotMaxCommitTimeQueryMode(jobConf, "0");
+
+ Assertions.assertThrows(HoodieIOException.class, () ->
inputFormat.getSplits(jobConf, 10));
+ Assertions.assertThrows(HoodieIOException.class, () ->
inputFormat.listStatus(jobConf));
+ }
+
+ @Test
+ public void testInputFormatUpdates() throws IOException {
+ super.testInputFormatUpdates();
+
+ // set the globally replicated timestamp to 199 so only 100 is read and
update is ignored.
+ InputFormatTestUtil.setupSnapshotMaxCommitTimeQueryMode(jobConf, "100");
+
+ FileStatus[] files = inputFormat.listStatus(jobConf);
+ assertEquals(10, files.length);
+
+ ensureFilesInCommit("5 files have been updated to commit 200. but should
get filtered out ",
+ files,"200", 0);
+ ensureFilesInCommit("We should see 10 files from commit 100 ", files,
"100", 10);
+ }
+
+ @Override
+ public void testIncrementalSimple() throws IOException {
+ // setting filtering timestamp to zero should not in any way alter the
result of the test which
+ // pulls in zero files due to incremental ts being the actual commit time
+ jobConf.set(HoodieHiveUtils.GLOBALLY_CONSISTENT_READ_TIMESTAMP, "0");
+ super.testIncrementalSimple();
+ }
+
+ @Override
+ public void testIncrementalWithMultipleCommits() throws IOException {
+ super.testIncrementalWithMultipleCommits();
+
+ // set globally replicated timestamp to 400 so commits from 500, 600 does
not show up
+ InputFormatTestUtil.setupSnapshotMaxCommitTimeQueryMode(jobConf, "400");
+ //jobConf.set(HoodieHiveUtils.GLOBALLY_CONSISTENT_READ_TIMESTAMP, "400");
Review comment:
Done
##########
File path:
hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestGloballyConsistentTimeStampFilteringInputFormat.java
##########
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.hadoop;
+
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.hadoop.testutils.InputFormatTestUtil;
+import org.apache.hudi.hadoop.utils.HoodieHiveUtils;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class TestGloballyConsistentTimeStampFilteringInputFormat
+ extends TestHoodieParquetInputFormat {
+
+ @BeforeEach
+ public void setUp() {
+ super.setUp();
+ //jobConf.set(HoodieHiveUtils.GLOBALLY_CONSISTENT_READ_TIMESTAMP,
String.valueOf(Long.MAX_VALUE));
+ //jobConf.set(HoodieHiveUtils.DISABLE_HOODIE_GLOBALLY_CONSISTENT_READS,
"false");
+ }
+
+ @Test
+ public void testInputFormatLoad() throws IOException {
+ super.testInputFormatLoad();
+
+ // set filtering timestamp to 0 now the timeline wont have any commits.
+ InputFormatTestUtil.setupSnapshotMaxCommitTimeQueryMode(jobConf, "0");
+
+ Assertions.assertThrows(HoodieIOException.class, () ->
inputFormat.getSplits(jobConf, 10));
+ Assertions.assertThrows(HoodieIOException.class, () ->
inputFormat.listStatus(jobConf));
+ }
+
+ @Test
+ public void testInputFormatUpdates() throws IOException {
+ super.testInputFormatUpdates();
+
+ // set the globally replicated timestamp to 199 so only 100 is read and
update is ignored.
+ InputFormatTestUtil.setupSnapshotMaxCommitTimeQueryMode(jobConf, "100");
+
+ FileStatus[] files = inputFormat.listStatus(jobConf);
+ assertEquals(10, files.length);
+
+ ensureFilesInCommit("5 files have been updated to commit 200. but should
get filtered out ",
+ files,"200", 0);
+ ensureFilesInCommit("We should see 10 files from commit 100 ", files,
"100", 10);
+ }
+
+ @Override
+ public void testIncrementalSimple() throws IOException {
+ // setting filtering timestamp to zero should not in any way alter the
result of the test which
+ // pulls in zero files due to incremental ts being the actual commit time
+ jobConf.set(HoodieHiveUtils.GLOBALLY_CONSISTENT_READ_TIMESTAMP, "0");
+ super.testIncrementalSimple();
+ }
+
+ @Override
+ public void testIncrementalWithMultipleCommits() throws IOException {
+ super.testIncrementalWithMultipleCommits();
+
+ // set globally replicated timestamp to 400 so commits from 500, 600 does
not show up
+ InputFormatTestUtil.setupSnapshotMaxCommitTimeQueryMode(jobConf, "400");
+ //jobConf.set(HoodieHiveUtils.GLOBALLY_CONSISTENT_READ_TIMESTAMP, "400");
+ InputFormatTestUtil.setupIncremental(jobConf, "100",
HoodieHiveUtils.MAX_COMMIT_ALL);
+
+ FileStatus[] files = inputFormat.listStatus(jobConf);
+
+ assertEquals(
+ 5, files.length,"Pulling ALL commits from 100, should get us the 3
files from 400 commit, 1 file from 300 "
+ + "commit and 1 file from 200 commit");
+ ensureFilesInCommit("Pulling 3 commits from 100, should get us the 3 files
from 400 commit",
+ files, "400", 3);
+ ensureFilesInCommit("Pulling 3 commits from 100, should get us the 1 files
from 300 commit",
+ files, "300", 1);
+ ensureFilesInCommit("Pulling 3 commits from 100, should get us the 1 files
from 200 commit",
+ files, "200", 1);
+
+ List<String> commits = Arrays.asList("100", "200", "300", "400", "500",
"600");
+ for (int idx = 0; idx < commits.size(); ++idx) {
+ for (int jdx = 0; jdx < commits.size(); ++jdx) {
+ InputFormatTestUtil.setupIncremental(jobConf, commits.get(idx),
HoodieHiveUtils.MAX_COMMIT_ALL);
+ InputFormatTestUtil.setupSnapshotMaxCommitTimeQueryMode(jobConf,
commits.get(jdx));
+
+ //jobConf.set(HoodieHiveUtils.GLOBALLY_CONSISTENT_READ_TIMESTAMP,
Review comment:
Done
##########
File path:
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java
##########
@@ -104,6 +104,7 @@
@Parameter(names = {"--decode-partition"}, description = "Decode the
partition value if the partition has encoded during writing")
public Boolean decodePartition = false;
+ // enahnce the similar function in child class
Review comment:
Fixed
##########
File path:
hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestUtil.java
##########
@@ -82,35 +80,37 @@
@SuppressWarnings("SameParameterValue")
public class HiveTestUtil {
+ public static HiveSyncConfig hiveSyncConfig;
+ public static FileSystem fileSystem;
private static ZooKeeperServer zkServer;
private static HiveServer2 hiveServer;
public static HiveTestService hiveTestService;
private static ZookeeperTestService zkService;
private static Configuration configuration;
- public static HiveSyncConfig hiveSyncConfig;
private static DateTimeFormatter dtfOut;
- public static FileSystem fileSystem;
private static Set<String> createdTablesSet = new HashSet<>();
- public static void setUp() throws IOException, InterruptedException {
- configuration = new Configuration();
+ private static TestCluster cluster;
+
+ public static void setUp() throws Exception {
+ if (cluster == null) {
+ cluster = new TestCluster();
+ cluster.setup();
+ configuration = cluster.getConf();
+ fileSystem = cluster.dfsCluster.getFileSystem();
+ }
+ //configuration = new Configuration();
if (zkServer == null) {
zkService = new ZookeeperTestService(configuration);
zkServer = zkService.start();
}
- if (hiveServer == null) {
- hiveTestService = new HiveTestService(configuration);
- hiveServer = hiveTestService.start();
- }
- fileSystem = FileSystem.get(configuration);
-
hiveSyncConfig = new HiveSyncConfig();
- hiveSyncConfig.jdbcUrl = hiveTestService.getJdbcHive2Url();
- hiveSyncConfig.hiveUser = "";
+ hiveSyncConfig.hiveUser = System.getProperty("user.name");
+ hiveSyncConfig.jdbcUrl = cluster.getHiveJdBcUrl();
hiveSyncConfig.hivePass = "";
hiveSyncConfig.databaseName = "testdb";
hiveSyncConfig.tableName = "test1";
- hiveSyncConfig.basePath = Files.createTempDirectory("hivesynctest" +
Instant.now().toEpochMilli()).toUri().toString();
+ hiveSyncConfig.basePath = "/tmp/hdfs/TestHiveSyncTool/";
Review comment:
Done
##########
File path:
hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestUtil.java
##########
@@ -82,35 +80,37 @@
@SuppressWarnings("SameParameterValue")
public class HiveTestUtil {
+ public static HiveSyncConfig hiveSyncConfig;
+ public static FileSystem fileSystem;
private static ZooKeeperServer zkServer;
private static HiveServer2 hiveServer;
public static HiveTestService hiveTestService;
private static ZookeeperTestService zkService;
private static Configuration configuration;
- public static HiveSyncConfig hiveSyncConfig;
private static DateTimeFormatter dtfOut;
- public static FileSystem fileSystem;
private static Set<String> createdTablesSet = new HashSet<>();
- public static void setUp() throws IOException, InterruptedException {
- configuration = new Configuration();
+ private static TestCluster cluster;
+
+ public static void setUp() throws Exception {
+ if (cluster == null) {
+ cluster = new TestCluster();
+ cluster.setup();
+ configuration = cluster.getConf();
+ fileSystem = cluster.dfsCluster.getFileSystem();
+ }
+ //configuration = new Configuration();
Review comment:
Done
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]