This is an automated email from the ASF dual-hosted git repository.
yihua pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new badcca2ebe8 [HUDI-7742] Move Hadoop-dependent reader util classes to
hudi-hadoop-common module (#11190)
badcca2ebe8 is described below
commit badcca2ebe8c30efa3fc13cad4c3f0114101874a
Author: Y Ethan Guo <[email protected]>
AuthorDate: Fri May 10 14:20:00 2024 -0700
[HUDI-7742] Move Hadoop-dependent reader util classes to hudi-hadoop-common
module (#11190)
---
.../action/bootstrap/OrcBootstrapMetadataHandler.java | 2 +-
.../common/table/log/block/HoodieHFileDataBlock.java | 5 +++--
.../hudi/common/testutils/HoodieTestDataGenerator.java | 4 ----
.../java/org/apache/hudi/common/util/AvroOrcUtils.java | 0
.../main/java/org/apache/hudi/common/util/OrcUtils.java | 1 +
.../org/apache/hudi/io/hadoop/HoodieAvroOrcReader.java | 1 -
.../org/apache/hudi/io/hadoop}/OrcReaderIterator.java | 17 ++++++++++-------
.../apache/hudi/io/storage/HoodieHBaseKVComparator.java | 0
.../parquet/avro/HoodieAvroParquetReaderBuilder.java | 0
.../org/apache/parquet/avro/HoodieAvroReadSupport.java | 0
.../org/apache/hudi/common/util/TestAvroOrcUtils.java | 4 ++++
.../apache/hudi/io/hadoop}/TestOrcReaderIterator.java | 17 ++++++++++-------
.../org/apache/hudi/functional/TestOrcBootstrap.java | 2 +-
.../deltastreamer/HoodieDeltaStreamerTestBase.java | 3 ++-
.../hudi/utilities/testutils/UtilitiesTestBase.java | 3 ++-
15 files changed, 34 insertions(+), 25 deletions(-)
diff --git
a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/OrcBootstrapMetadataHandler.java
b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/OrcBootstrapMetadataHandler.java
index 2d4457d575b..86944ae3f5b 100644
---
a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/OrcBootstrapMetadataHandler.java
+++
b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/OrcBootstrapMetadataHandler.java
@@ -25,11 +25,11 @@ import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType;
import org.apache.hudi.common.util.AvroOrcUtils;
-import org.apache.hudi.common.util.OrcReaderIterator;
import org.apache.hudi.common.util.queue.HoodieExecutor;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.io.HoodieBootstrapHandle;
+import org.apache.hudi.io.hadoop.OrcReaderIterator;
import org.apache.hudi.keygen.KeyGeneratorInterface;
import org.apache.hudi.storage.StoragePath;
import org.apache.hudi.table.HoodieTable;
diff --git
a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java
b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java
index a379e305d0e..0893637b956 100644
---
a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java
+++
b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java
@@ -26,6 +26,7 @@ import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType;
import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.ReflectionUtils;
import org.apache.hudi.common.util.collection.ClosableIterator;
import org.apache.hudi.common.util.collection.CloseableMappingIterator;
import org.apache.hudi.exception.HoodieIOException;
@@ -33,7 +34,6 @@ import org.apache.hudi.io.SeekableDataInputStream;
import org.apache.hudi.io.storage.HoodieAvroHFileReaderImplBase;
import org.apache.hudi.io.storage.HoodieFileReader;
import org.apache.hudi.io.storage.HoodieFileReaderFactory;
-import org.apache.hudi.io.storage.HoodieHBaseKVComparator;
import org.apache.hudi.storage.HoodieStorage;
import org.apache.hudi.storage.HoodieStorageUtils;
import org.apache.hudi.storage.StorageConfiguration;
@@ -76,6 +76,7 @@ import static
org.apache.hudi.common.util.ValidationUtils.checkState;
public class HoodieHFileDataBlock extends HoodieDataBlock {
private static final Logger LOG =
LoggerFactory.getLogger(HoodieHFileDataBlock.class);
private static final int DEFAULT_BLOCK_SIZE = 1024 * 1024;
+ private static final String KV_COMPARATOR_CLASS_NAME =
"org.apache.hudi.io.storage.HoodieHBaseKVComparator";
private final Option<Compression.Algorithm> compressionAlgorithm;
// This path is used for constructing HFile reader context, which should not
be
@@ -121,7 +122,7 @@ public class HoodieHFileDataBlock extends HoodieDataBlock {
HFileContext context = new HFileContextBuilder()
.withBlockSize(DEFAULT_BLOCK_SIZE)
.withCompression(compressionAlgorithm.get())
- .withCellComparator(new HoodieHBaseKVComparator())
+
.withCellComparator(ReflectionUtils.loadClass(KV_COMPARATOR_CLASS_NAME))
.build();
Configuration conf = new Configuration();
diff --git
a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java
b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java
index 4139f1fa396..31f6b1c562d 100644
---
a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java
+++
b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java
@@ -32,7 +32,6 @@ import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
-import org.apache.hudi.common.util.AvroOrcUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
@@ -49,7 +48,6 @@ import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericFixed;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.fs.Path;
-import org.apache.orc.TypeDescription;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -162,12 +160,10 @@ public class HoodieTestDataGenerator implements
AutoCloseable {
public static final Schema AVRO_SCHEMA = new
Schema.Parser().parse(TRIP_EXAMPLE_SCHEMA);
public static final Schema NESTED_AVRO_SCHEMA = new
Schema.Parser().parse(TRIP_NESTED_EXAMPLE_SCHEMA);
- public static final TypeDescription ORC_SCHEMA =
AvroOrcUtils.createOrcSchema(new Schema.Parser().parse(TRIP_EXAMPLE_SCHEMA));
public static final Schema AVRO_SCHEMA_WITH_METADATA_FIELDS =
HoodieAvroUtils.addMetadataFields(AVRO_SCHEMA);
public static final Schema AVRO_SHORT_TRIP_SCHEMA = new
Schema.Parser().parse(SHORT_TRIP_SCHEMA);
public static final Schema AVRO_TRIP_SCHEMA = new
Schema.Parser().parse(TRIP_SCHEMA);
- public static final TypeDescription ORC_TRIP_SCHEMA =
AvroOrcUtils.createOrcSchema(new Schema.Parser().parse(TRIP_SCHEMA));
public static final Schema FLATTENED_AVRO_SCHEMA = new
Schema.Parser().parse(TRIP_FLATTENED_SCHEMA);
private final Random rand;
diff --git
a/hudi-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java
b/hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java
similarity index 100%
rename from
hudi-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java
rename to
hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java
diff --git
a/hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java
b/hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java
index e34f8c4f195..6bbae77d4b9 100644
--- a/hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java
+++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java
@@ -29,6 +29,7 @@ import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.MetadataNotFoundException;
import org.apache.hudi.hadoop.fs.HadoopFSUtils;
+import org.apache.hudi.io.hadoop.OrcReaderIterator;
import org.apache.hudi.keygen.BaseKeyGenerator;
import org.apache.hudi.storage.HoodieStorage;
import org.apache.hudi.storage.HoodieStorageUtils;
diff --git
a/hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/HoodieAvroOrcReader.java
b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/HoodieAvroOrcReader.java
index c1f5b79c227..917b8a1a627 100644
---
a/hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/HoodieAvroOrcReader.java
+++
b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/HoodieAvroOrcReader.java
@@ -23,7 +23,6 @@ import org.apache.hudi.common.bloom.BloomFilter;
import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.util.AvroOrcUtils;
import org.apache.hudi.common.util.BaseFileUtils;
-import org.apache.hudi.common.util.OrcReaderIterator;
import org.apache.hudi.common.util.collection.ClosableIterator;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.exception.HoodieIOException;
diff --git
a/hudi-common/src/main/java/org/apache/hudi/common/util/OrcReaderIterator.java
b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/OrcReaderIterator.java
similarity index 87%
rename from
hudi-common/src/main/java/org/apache/hudi/common/util/OrcReaderIterator.java
rename to
hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/OrcReaderIterator.java
index 6b6e46e7a8d..3ef5c911760 100644
---
a/hudi-common/src/main/java/org/apache/hudi/common/util/OrcReaderIterator.java
+++
b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/OrcReaderIterator.java
@@ -7,17 +7,20 @@
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package org.apache.hudi.common.util;
+package org.apache.hudi.io.hadoop;
+import org.apache.hudi.common.util.AvroOrcUtils;
+import org.apache.hudi.common.util.FileIOUtils;
import org.apache.hudi.common.util.collection.ClosableIterator;
import org.apache.hudi.exception.HoodieIOException;
diff --git
a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHBaseKVComparator.java
b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/storage/HoodieHBaseKVComparator.java
similarity index 100%
rename from
hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHBaseKVComparator.java
rename to
hudi-hadoop-common/src/main/java/org/apache/hudi/io/storage/HoodieHBaseKVComparator.java
diff --git
a/hudi-common/src/main/java/org/apache/parquet/avro/HoodieAvroParquetReaderBuilder.java
b/hudi-hadoop-common/src/main/java/org/apache/parquet/avro/HoodieAvroParquetReaderBuilder.java
similarity index 100%
rename from
hudi-common/src/main/java/org/apache/parquet/avro/HoodieAvroParquetReaderBuilder.java
rename to
hudi-hadoop-common/src/main/java/org/apache/parquet/avro/HoodieAvroParquetReaderBuilder.java
diff --git
a/hudi-common/src/main/java/org/apache/parquet/avro/HoodieAvroReadSupport.java
b/hudi-hadoop-common/src/main/java/org/apache/parquet/avro/HoodieAvroReadSupport.java
similarity index 100%
rename from
hudi-common/src/main/java/org/apache/parquet/avro/HoodieAvroReadSupport.java
rename to
hudi-hadoop-common/src/main/java/org/apache/parquet/avro/HoodieAvroReadSupport.java
diff --git
a/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestAvroOrcUtils.java
b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestAvroOrcUtils.java
index 6c157349974..de7968b3ce0 100644
---
a/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestAvroOrcUtils.java
+++
b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/util/TestAvroOrcUtils.java
@@ -30,12 +30,16 @@ import java.util.Arrays;
import java.util.List;
import static
org.apache.hudi.common.testutils.HoodieTestDataGenerator.AVRO_SCHEMA;
+import static
org.apache.hudi.common.testutils.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA;
+import static
org.apache.hudi.common.testutils.HoodieTestDataGenerator.TRIP_SCHEMA;
import static org.junit.jupiter.api.Assertions.assertEquals;
/**
* Tests {@link AvroOrcUtils}.
*/
public class TestAvroOrcUtils extends HoodieCommonTestHarness {
+ public static final TypeDescription ORC_SCHEMA =
AvroOrcUtils.createOrcSchema(new Schema.Parser().parse(TRIP_EXAMPLE_SCHEMA));
+ public static final TypeDescription ORC_TRIP_SCHEMA =
AvroOrcUtils.createOrcSchema(new Schema.Parser().parse(TRIP_SCHEMA));
public static List<Arguments> testCreateOrcSchemaArgs() {
// the ORC schema is constructed in the order as AVRO_SCHEMA:
diff --git
a/hudi-common/src/test/java/org/apache/hudi/common/util/TestOrcReaderIterator.java
b/hudi-hadoop-common/src/test/java/org/apache/hudi/io/hadoop/TestOrcReaderIterator.java
similarity index 88%
rename from
hudi-common/src/test/java/org/apache/hudi/common/util/TestOrcReaderIterator.java
rename to
hudi-hadoop-common/src/test/java/org/apache/hudi/io/hadoop/TestOrcReaderIterator.java
index b439d816724..4cf6f7c27c7 100644
---
a/hudi-common/src/test/java/org/apache/hudi/common/util/TestOrcReaderIterator.java
+++
b/hudi-hadoop-common/src/test/java/org/apache/hudi/io/hadoop/TestOrcReaderIterator.java
@@ -7,16 +7,19 @@
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
*/
-package org.apache.hudi.common.util;
+package org.apache.hudi.io.hadoop;
+
+import org.apache.hudi.common.util.AvroOrcUtils;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
diff --git
a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java
b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java
index 9b0199a869c..2db842c13a8 100644
---
a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java
+++
b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java
@@ -43,7 +43,6 @@ import org.apache.hudi.common.testutils.HoodieTestUtils;
import org.apache.hudi.common.testutils.RawTripTestPayload;
import org.apache.hudi.common.util.AvroOrcUtils;
import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.OrcReaderIterator;
import org.apache.hudi.common.util.PartitionPathEncodeUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieBootstrapConfig;
@@ -52,6 +51,7 @@ import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.hadoop.HoodieParquetInputFormat;
import org.apache.hudi.index.HoodieIndex.IndexType;
+import org.apache.hudi.io.hadoop.OrcReaderIterator;
import org.apache.hudi.keygen.NonpartitionedKeyGenerator;
import org.apache.hudi.keygen.SimpleKeyGenerator;
import org.apache.hudi.table.action.bootstrap.BootstrapUtils;
diff --git
a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerTestBase.java
b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerTestBase.java
index e0093f3c92a..6aebde9a443 100644
---
a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerTestBase.java
+++
b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerTestBase.java
@@ -30,6 +30,7 @@ import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
import org.apache.hudi.common.util.StringUtils;
+import org.apache.hudi.common.util.TestAvroOrcUtils;
import org.apache.hudi.config.HoodieCleanConfig;
import org.apache.hudi.config.HoodieClusteringConfig;
import org.apache.hudi.hive.HiveSyncConfigHolder;
@@ -439,7 +440,7 @@ public class HoodieDeltaStreamerTestBase extends
UtilitiesTestBase {
if (useCustomSchema) {
Helpers.saveORCToDFS(Helpers.toGenericRecords(
dataGenerator.generateInsertsAsPerSchema("000", numRecords,
schemaStr),
- schema), new Path(path), HoodieTestDataGenerator.ORC_TRIP_SCHEMA);
+ schema), new Path(path), TestAvroOrcUtils.ORC_TRIP_SCHEMA);
} else {
Helpers.saveORCToDFS(Helpers.toGenericRecords(
dataGenerator.generateInserts("000", numRecords)), new Path(path));
diff --git
a/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java
b/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java
index 90104ab7ab2..afb9a9ad97c 100644
---
a/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java
+++
b/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java
@@ -31,6 +31,7 @@ import
org.apache.hudi.common.testutils.minicluster.HdfsTestService;
import org.apache.hudi.common.testutils.minicluster.ZookeeperTestService;
import org.apache.hudi.common.util.AvroOrcUtils;
import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.TestAvroOrcUtils;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.hive.HiveSyncConfig;
import org.apache.hudi.hive.ddl.JDBCExecutor;
@@ -429,7 +430,7 @@ public class UtilitiesTestBase {
}
public static void saveORCToDFS(List<GenericRecord> records, Path
targetFile) throws IOException {
- saveORCToDFS(records, targetFile, HoodieTestDataGenerator.ORC_SCHEMA);
+ saveORCToDFS(records, targetFile, TestAvroOrcUtils.ORC_SCHEMA);
}
public static void saveORCToDFS(List<GenericRecord> records, Path
targetFile, TypeDescription schema) throws IOException {