vinothchandar commented on a change in pull request #2417:
URL: https://github.com/apache/hudi/pull/2417#discussion_r554529441
##########
File path:
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/metadata/TestHoodieBackedMetadata.java
##########
@@ -444,7 +445,8 @@ public void testSync(HoodieTableType tableType) throws
Exception {
assertTrue(metadata(client).isInSync());
}
- // Various table operations without metadata table enabled
+ // Various table operations without metadata table enabled. When metadata
is disabled, file system
Review comment:
but these calls seem like they enable metadata? first arg of
`getWriteConfig(..)`
##########
File path:
hudi-common/src/main/java/org/apache/hudi/common/engine/HoodieLocalEngineContext.java
##########
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.engine;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hudi.common.config.SerializableConfiguration;
+import org.apache.hudi.common.function.SerializableConsumer;
+import org.apache.hudi.common.function.SerializableFunction;
+import org.apache.hudi.common.function.SerializablePairFunction;
+import org.apache.hudi.common.util.Option;
+
+import org.apache.hudi.common.util.collection.Pair;
+
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import static java.util.stream.Collectors.toList;
+import static
org.apache.hudi.common.function.FunctionWrapper.throwingFlatMapWrapper;
+import static
org.apache.hudi.common.function.FunctionWrapper.throwingForeachWrapper;
+import static
org.apache.hudi.common.function.FunctionWrapper.throwingMapToPairWrapper;
+import static
org.apache.hudi.common.function.FunctionWrapper.throwingMapWrapper;
+
+/**
+ * A java based engine context that can be used from map-reduce tasks
executing in query engines like
Review comment:
remove reference to map-reduce task in doc. This is just generic enough
to be used from a single jvm. thats what it could say. Talking about MR is kind
of misleading
##########
File path:
hudi-common/src/main/java/org/apache/hudi/metadata/BaseTableMetadata.java
##########
@@ -66,10 +67,11 @@
protected final String spillableMapDirectory;
private transient HoodieMetadataMergedInstantRecordScanner
timelineRecordScanner;
- protected BaseTableMetadata(Configuration hadoopConf, String
datasetBasePath, String spillableMapDirectory,
+ protected BaseTableMetadata(HoodieEngineContext engineContext, String
datasetBasePath, String spillableMapDirectory,
Review comment:
avoid these reformatting?
##########
File path:
hudi-common/src/test/java/org/apache/hudi/metadata/TestFileSystemBackedTableMetadata.java
##########
@@ -0,0 +1,181 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.metadata;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hudi.common.config.SerializableConfiguration;
+import org.apache.hudi.common.engine.HoodieLocalEngineContext;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.HoodiePartitionMetadata;
+import org.apache.hudi.common.testutils.FileCreateUtils;
+import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.List;
+import java.util.UUID;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+public class TestFileSystemBackedTableMetadata extends HoodieCommonTestHarness
{
+
+ private static final int NUM_FILE_IDS_PER_PARTITION = 10;
+
+ private static String TEST_WRITE_TOKEN = "1-0-1";
+
+ private final List<String> DATE_PARTITIONS = Arrays.asList("2019/01/01",
"2020/01/02", "2021/03/01");
+ private final List<String> ONE_LEVEL_PARTITIONS = Arrays.asList("2019",
"2020", "2021");
+ private final List<String> MULTI_LEVEL_PARTITIONS = Arrays.asList("2019/01",
"2020/01", "2021/01");
+ private final List<String> fileIdsPerPartition =
+ IntStream.range(0, NUM_FILE_IDS_PER_PARTITION).mapToObj(x ->
UUID.randomUUID().toString()).collect(Collectors.toList());
+
+ @BeforeEach
+ public void setUp() throws IOException {
+ initMetaClient();
+ }
+
+ @AfterEach
+ public void tearDown() throws IOException {
+ metaClient.getFs().delete(new Path(metaClient.getBasePath()), true);
+ }
+
+ /**
+ * Test non partition hoodie table
+ * @throws IOException
+ */
+ @Test
+ public void testNonPartitionedTable() throws IOException {
+ // Generate 10 files under basepath
+ String instant = "100";
+ createDataFiles(basePath, instant);
+ FileCreateUtils.createCommit(basePath, instant);
+ HoodieLocalEngineContext localEngineContext = new
HoodieLocalEngineContext(metaClient.getHadoopConf());
+ FileSystemBackedTableMetadata fileSystemBackedTableMetadata =
+ new FileSystemBackedTableMetadata(localEngineContext, new
SerializableConfiguration(metaClient.getHadoopConf()), basePath, false);
+
Assertions.assertTrue(fileSystemBackedTableMetadata.getAllPartitionPaths().size()
== 0);
+
Assertions.assertTrue(fileSystemBackedTableMetadata.getAllFilesInPartition(new
Path(basePath)).length == 10);
+ }
+
+ /**
+ * Test listing of partitions result for date based partitions
+ * @throws IOException
+ */
+ @Test
+ public void testDatePartitionedTable() throws IOException {
+ String instant = "100";
+ // Generate 10 files under each partition
+ DATE_PARTITIONS.stream().map(p -> createPartition(p, false))
+ .forEach(p -> createDataFiles(basePath + "/" + p, instant));
+ FileCreateUtils.createCommit(basePath, instant);
+ HoodieLocalEngineContext localEngineContext = new
HoodieLocalEngineContext(metaClient.getHadoopConf());
+ FileSystemBackedTableMetadata fileSystemBackedTableMetadata =
+ new FileSystemBackedTableMetadata(localEngineContext, new
SerializableConfiguration(metaClient.getHadoopConf()), basePath, true);
+
Assertions.assertTrue(fileSystemBackedTableMetadata.getAllPartitionPaths().size()
== 3);
+
Assertions.assertTrue(fileSystemBackedTableMetadata.getAllFilesInPartition(new
Path(basePath + "/" + DATE_PARTITIONS.get(0))).length == 10);
+ }
+
+ /**
+ * Test listing of partitions result for date based partitions with
assumeDataPartitioning = false
+ * @throws IOException
+ */
+ @Test
+ public void testDatePartitionedTableWithAssumeDateIsFalse() throws
IOException {
+ String instant = "100";
+ // Generate 10 files under each partition
+ DATE_PARTITIONS.stream().map(p -> createPartition(p, false))
+ .forEach(p -> createDataFiles(basePath + "/" + p, instant));
+ FileCreateUtils.createCommit(basePath, instant);
+ HoodieLocalEngineContext localEngineContext = new
HoodieLocalEngineContext(metaClient.getHadoopConf());
+ FileSystemBackedTableMetadata fileSystemBackedTableMetadata =
+ new FileSystemBackedTableMetadata(localEngineContext, new
SerializableConfiguration(metaClient.getHadoopConf()), basePath, false);
+
Assertions.assertTrue(fileSystemBackedTableMetadata.getAllPartitionPaths().size()
== 0);
+ }
+
+ @Test
+ public void testOneLevelPartitionedTableWithoutHoodiePartitionMetaFile()
throws IOException {
+ String instant = "100";
+ // Generate 10 files under each partition
+ ONE_LEVEL_PARTITIONS.stream().map(p -> createPartition(p, true))
+ .forEach(p -> createDataFiles(basePath + "/" + p, instant));
+ FileCreateUtils.createCommit(basePath, instant);
+ HoodieLocalEngineContext localEngineContext = new
HoodieLocalEngineContext(metaClient.getHadoopConf());
+ FileSystemBackedTableMetadata fileSystemBackedTableMetadata =
+ new FileSystemBackedTableMetadata(localEngineContext, new
SerializableConfiguration(metaClient.getHadoopConf()), basePath, false);
+
Assertions.assertTrue(fileSystemBackedTableMetadata.getAllPartitionPaths().size()
== 3);
+
Assertions.assertTrue(fileSystemBackedTableMetadata.getAllFilesInPartition(new
Path(basePath + "/" + ONE_LEVEL_PARTITIONS.get(0))).length == 10);
+ }
+
+ @Test
+ public void testMultiLevelPartitionedTable() throws IOException {
+ String instant = "100";
+ // Generate 10 files under each partition
+ MULTI_LEVEL_PARTITIONS.stream().map(p -> createPartition(p, true))
+ .forEach(p -> createDataFiles(basePath + "/" + p, instant));
+ FileCreateUtils.createCommit(basePath, instant);
+ HoodieLocalEngineContext localEngineContext = new
HoodieLocalEngineContext(metaClient.getHadoopConf());
+ FileSystemBackedTableMetadata fileSystemBackedTableMetadata =
+ new FileSystemBackedTableMetadata(localEngineContext, new
SerializableConfiguration(metaClient.getHadoopConf()), basePath, false);
+
Assertions.assertTrue(fileSystemBackedTableMetadata.getAllPartitionPaths().size()
== 3);
+
Assertions.assertTrue(fileSystemBackedTableMetadata.getAllFilesInPartition(new
Path(basePath + "/" + MULTI_LEVEL_PARTITIONS.get(0))).length == 10);
+ }
+
+ @Test
+ public void testMultiLevelEmptyPartitionTable() throws IOException {
+ String instant = "100";
+ // Generate 10 files under each partition
+ MULTI_LEVEL_PARTITIONS.stream().forEach(p -> createPartition(p, true));
+ FileCreateUtils.createCommit(basePath, instant);
+ HoodieLocalEngineContext localEngineContext = new
HoodieLocalEngineContext(metaClient.getHadoopConf());
+ FileSystemBackedTableMetadata fileSystemBackedTableMetadata =
+ new FileSystemBackedTableMetadata(localEngineContext, new
SerializableConfiguration(metaClient.getHadoopConf()), basePath, false);
+
Assertions.assertTrue(fileSystemBackedTableMetadata.getAllPartitionPaths().size()
== 3);
+
Assertions.assertTrue(fileSystemBackedTableMetadata.getAllFilesInPartition(new
Path(basePath + "/" + MULTI_LEVEL_PARTITIONS.get(0))).length == 0);
+ }
+
+ private void createDataFiles(String fullPath, String instant) {
+ fileIdsPerPartition.stream().forEach(fId -> {
+ try {
+ new File(fullPath + "/" + FSUtils.makeDataFileName(instant,
TEST_WRITE_TOKEN, fId)).createNewFile();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ });
+ }
+
+ private String createPartition(String p, Boolean hoodiePartition) {
Review comment:
same comment
##########
File path:
hudi-common/src/test/java/org/apache/hudi/metadata/TestFileSystemBackedTableMetadata.java
##########
@@ -0,0 +1,181 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.metadata;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hudi.common.config.SerializableConfiguration;
+import org.apache.hudi.common.engine.HoodieLocalEngineContext;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.HoodiePartitionMetadata;
+import org.apache.hudi.common.testutils.FileCreateUtils;
+import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.List;
+import java.util.UUID;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+public class TestFileSystemBackedTableMetadata extends HoodieCommonTestHarness
{
+
+ private static final int NUM_FILE_IDS_PER_PARTITION = 10;
+
+ private static String TEST_WRITE_TOKEN = "1-0-1";
+
+ private final List<String> DATE_PARTITIONS = Arrays.asList("2019/01/01",
"2020/01/02", "2021/03/01");
+ private final List<String> ONE_LEVEL_PARTITIONS = Arrays.asList("2019",
"2020", "2021");
+ private final List<String> MULTI_LEVEL_PARTITIONS = Arrays.asList("2019/01",
"2020/01", "2021/01");
+ private final List<String> fileIdsPerPartition =
+ IntStream.range(0, NUM_FILE_IDS_PER_PARTITION).mapToObj(x ->
UUID.randomUUID().toString()).collect(Collectors.toList());
+
+ @BeforeEach
+ public void setUp() throws IOException {
+ initMetaClient();
+ }
+
+ @AfterEach
+ public void tearDown() throws IOException {
+ metaClient.getFs().delete(new Path(metaClient.getBasePath()), true);
+ }
+
+ /**
+ * Test non partition hoodie table
+ * @throws IOException
+ */
+ @Test
+ public void testNonPartitionedTable() throws IOException {
+ // Generate 10 files under basepath
+ String instant = "100";
+ createDataFiles(basePath, instant);
+ FileCreateUtils.createCommit(basePath, instant);
+ HoodieLocalEngineContext localEngineContext = new
HoodieLocalEngineContext(metaClient.getHadoopConf());
+ FileSystemBackedTableMetadata fileSystemBackedTableMetadata =
+ new FileSystemBackedTableMetadata(localEngineContext, new
SerializableConfiguration(metaClient.getHadoopConf()), basePath, false);
+
Assertions.assertTrue(fileSystemBackedTableMetadata.getAllPartitionPaths().size()
== 0);
+
Assertions.assertTrue(fileSystemBackedTableMetadata.getAllFilesInPartition(new
Path(basePath)).length == 10);
+ }
+
+ /**
+ * Test listing of partitions result for date based partitions
+ * @throws IOException
+ */
+ @Test
+ public void testDatePartitionedTable() throws IOException {
+ String instant = "100";
+ // Generate 10 files under each partition
+ DATE_PARTITIONS.stream().map(p -> createPartition(p, false))
+ .forEach(p -> createDataFiles(basePath + "/" + p, instant));
+ FileCreateUtils.createCommit(basePath, instant);
+ HoodieLocalEngineContext localEngineContext = new
HoodieLocalEngineContext(metaClient.getHadoopConf());
+ FileSystemBackedTableMetadata fileSystemBackedTableMetadata =
+ new FileSystemBackedTableMetadata(localEngineContext, new
SerializableConfiguration(metaClient.getHadoopConf()), basePath, true);
+
Assertions.assertTrue(fileSystemBackedTableMetadata.getAllPartitionPaths().size()
== 3);
+
Assertions.assertTrue(fileSystemBackedTableMetadata.getAllFilesInPartition(new
Path(basePath + "/" + DATE_PARTITIONS.get(0))).length == 10);
+ }
+
+ /**
+ * Test listing of partitions result for date based partitions with
assumeDataPartitioning = false
+ * @throws IOException
+ */
+ @Test
+ public void testDatePartitionedTableWithAssumeDateIsFalse() throws
IOException {
+ String instant = "100";
+ // Generate 10 files under each partition
+ DATE_PARTITIONS.stream().map(p -> createPartition(p, false))
+ .forEach(p -> createDataFiles(basePath + "/" + p, instant));
+ FileCreateUtils.createCommit(basePath, instant);
+ HoodieLocalEngineContext localEngineContext = new
HoodieLocalEngineContext(metaClient.getHadoopConf());
+ FileSystemBackedTableMetadata fileSystemBackedTableMetadata =
+ new FileSystemBackedTableMetadata(localEngineContext, new
SerializableConfiguration(metaClient.getHadoopConf()), basePath, false);
+
Assertions.assertTrue(fileSystemBackedTableMetadata.getAllPartitionPaths().size()
== 0);
+ }
+
+ @Test
+ public void testOneLevelPartitionedTableWithoutHoodiePartitionMetaFile()
throws IOException {
+ String instant = "100";
+ // Generate 10 files under each partition
+ ONE_LEVEL_PARTITIONS.stream().map(p -> createPartition(p, true))
+ .forEach(p -> createDataFiles(basePath + "/" + p, instant));
+ FileCreateUtils.createCommit(basePath, instant);
+ HoodieLocalEngineContext localEngineContext = new
HoodieLocalEngineContext(metaClient.getHadoopConf());
+ FileSystemBackedTableMetadata fileSystemBackedTableMetadata =
+ new FileSystemBackedTableMetadata(localEngineContext, new
SerializableConfiguration(metaClient.getHadoopConf()), basePath, false);
+
Assertions.assertTrue(fileSystemBackedTableMetadata.getAllPartitionPaths().size()
== 3);
+
Assertions.assertTrue(fileSystemBackedTableMetadata.getAllFilesInPartition(new
Path(basePath + "/" + ONE_LEVEL_PARTITIONS.get(0))).length == 10);
+ }
+
+ @Test
+ public void testMultiLevelPartitionedTable() throws IOException {
+ String instant = "100";
+ // Generate 10 files under each partition
+ MULTI_LEVEL_PARTITIONS.stream().map(p -> createPartition(p, true))
+ .forEach(p -> createDataFiles(basePath + "/" + p, instant));
+ FileCreateUtils.createCommit(basePath, instant);
+ HoodieLocalEngineContext localEngineContext = new
HoodieLocalEngineContext(metaClient.getHadoopConf());
+ FileSystemBackedTableMetadata fileSystemBackedTableMetadata =
+ new FileSystemBackedTableMetadata(localEngineContext, new
SerializableConfiguration(metaClient.getHadoopConf()), basePath, false);
+
Assertions.assertTrue(fileSystemBackedTableMetadata.getAllPartitionPaths().size()
== 3);
+
Assertions.assertTrue(fileSystemBackedTableMetadata.getAllFilesInPartition(new
Path(basePath + "/" + MULTI_LEVEL_PARTITIONS.get(0))).length == 10);
+ }
+
+ @Test
+ public void testMultiLevelEmptyPartitionTable() throws IOException {
+ String instant = "100";
+ // Generate 10 files under each partition
+ MULTI_LEVEL_PARTITIONS.stream().forEach(p -> createPartition(p, true));
+ FileCreateUtils.createCommit(basePath, instant);
+ HoodieLocalEngineContext localEngineContext = new
HoodieLocalEngineContext(metaClient.getHadoopConf());
+ FileSystemBackedTableMetadata fileSystemBackedTableMetadata =
+ new FileSystemBackedTableMetadata(localEngineContext, new
SerializableConfiguration(metaClient.getHadoopConf()), basePath, false);
+
Assertions.assertTrue(fileSystemBackedTableMetadata.getAllPartitionPaths().size()
== 3);
+
Assertions.assertTrue(fileSystemBackedTableMetadata.getAllFilesInPartition(new
Path(basePath + "/" + MULTI_LEVEL_PARTITIONS.get(0))).length == 0);
+ }
+
+ private void createDataFiles(String fullPath, String instant) {
Review comment:
can we please use the `HoodieTestUtils` methods? We spend a lot of time
chasing after these one-offs.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]