This is an automated email from the ASF dual-hosted git repository. vinoyang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-hudi.git
The following commit(s) were added to refs/heads/master by this push: new f9139c0 [HUDI-366] Refactor some module codes based on new ImportOrder code style rule (#1055) f9139c0 is described below commit f9139c0f616775f4b3d0df95772f2621f0e7c9f1 Author: 谢磊 <lamber...@163.com> AuthorDate: Wed Nov 27 21:32:43 2019 +0800 [HUDI-366] Refactor some module codes based on new ImportOrder code style rule (#1055) [HUDI-366] Refactor hudi-hadoop-mr / hudi-timeline-service / hudi-spark / hudi-integ-test / hudi- utilities based on new ImportOrder code style rule --- .../hudi/hadoop/HoodieParquetInputFormat.java | 34 +++++++------ .../hudi/hadoop/HoodieROTablePathFilter.java | 20 ++++---- .../hudi/hadoop/RecordReaderValueIterator.java | 10 ++-- .../hadoop/SafeParquetRecordReaderWrapper.java | 3 +- .../hadoop/hive/HoodieCombineHiveInputFormat.java | 38 +++++++------- .../realtime/AbstractRealtimeRecordReader.java | 34 +++++++------ .../realtime/HoodieParquetRealtimeInputFormat.java | 46 +++++++++-------- .../hadoop/realtime/HoodieRealtimeFileSplit.java | 3 +- .../realtime/HoodieRealtimeRecordReader.java | 6 ++- .../realtime/RealtimeCompactedRecordReader.java | 18 ++++--- .../realtime/RealtimeUnmergedRecordReader.java | 20 ++++---- .../apache/hudi/hadoop/InputFormatTestUtil.java | 28 +++++----- .../org/apache/hudi/hadoop/TestAnnotation.java | 5 +- .../apache/hudi/hadoop/TestHoodieInputFormat.java | 10 ++-- .../hudi/hadoop/TestHoodieROTablePathFilter.java | 16 +++--- .../hudi/hadoop/TestRecordReaderValueIterator.java | 12 +++-- .../realtime/TestHoodieRealtimeRecordReader.java | 59 ++++++++++++---------- .../java/org/apache/hudi/integ/ITTestBase.java | 18 ++++--- .../org/apache/hudi/integ/ITTestHoodieDemo.java | 6 ++- .../org/apache/hudi/integ/ITTestHoodieSanity.java | 1 + .../main/java/org/apache/hudi/BaseAvroPayload.java | 8 +-- .../java/org/apache/hudi/ComplexKeyGenerator.java | 8 +-- .../main/java/org/apache/hudi/DataSourceUtils.java | 18 ++++--- .../org/apache/hudi/HoodieDataSourceHelpers.java | 10 ++-- .../main/java/org/apache/hudi/KeyGenerator.java | 6 ++- .../apache/hudi/NonpartitionedKeyGenerator.java | 3 +- .../hudi/OverwriteWithLatestAvroPayload.java | 10 ++-- .../main/java/org/apache/hudi/QuickstartUtils.java | 18 ++++--- .../java/org/apache/hudi/SimpleKeyGenerator.java | 3 +- hudi-spark/src/test/java/DataSourceTestUtils.java | 7 +-- hudi-spark/src/test/java/HoodieJavaApp.java | 12 +++-- .../src/test/java/HoodieJavaStreamingApp.java | 18 ++++--- .../timeline/service/FileSystemViewHandler.java | 24 +++++---- .../hudi/timeline/service/TimelineService.java | 16 +++--- .../timeline/service/handlers/DataFileHandler.java | 8 +-- .../service/handlers/FileSliceHandler.java | 12 +++-- .../hudi/timeline/service/handlers/Handler.java | 6 ++- .../timeline/service/handlers/TimelineHandler.java | 10 ++-- .../view/TestRemoteHoodieTableFileSystemView.java | 1 + .../apache/hudi/utilities/HDFSParquetImporter.java | 43 ++++++++-------- .../hudi/utilities/HiveIncrementalPuller.java | 32 ++++++------ .../org/apache/hudi/utilities/HoodieCleaner.java | 18 ++++--- .../hudi/utilities/HoodieCompactionAdminTool.java | 18 ++++--- .../org/apache/hudi/utilities/HoodieCompactor.java | 16 +++--- .../hudi/utilities/HoodieSnapshotCopier.java | 25 +++++---- .../hudi/utilities/HoodieWithTimelineServer.java | 11 ++-- .../org/apache/hudi/utilities/UtilHelpers.java | 26 +++++----- .../adhoc/UpgradePayloadFromUberToApache.java | 20 ++++---- .../AbstractDeltaStreamerService.java | 8 +-- .../hudi/utilities/deltastreamer/Compactor.java | 6 ++- .../hudi/utilities/deltastreamer/DeltaSync.java | 42 ++++++++------- .../deltastreamer/HoodieDeltaStreamer.java | 50 +++++++++--------- .../deltastreamer/HoodieDeltaStreamerMetrics.java | 3 +- .../deltastreamer/SchedulerConfGenerator.java | 12 +++-- .../deltastreamer/SourceFormatAdapter.java | 11 ++-- .../exception/HoodieIncrementalPullException.java | 3 +- .../keygen/TimestampBasedKeyGenerator.java | 18 ++++--- .../hudi/utilities/perf/TimelineServerPerf.java | 36 ++++++------- .../utilities/schema/FilebasedSchemaProvider.java | 12 +++-- .../schema/NullTargetSchemaRegistryProvider.java | 3 +- .../utilities/schema/RowBasedSchemaProvider.java | 3 +- .../hudi/utilities/schema/SchemaProvider.java | 6 ++- .../utilities/schema/SchemaRegistryProvider.java | 12 +++-- .../hudi/utilities/sources/AvroDFSSource.java | 9 ++-- .../hudi/utilities/sources/AvroKafkaSource.java | 7 +-- .../apache/hudi/utilities/sources/AvroSource.java | 3 +- .../hudi/utilities/sources/HiveIncrPullSource.java | 28 +++++----- .../hudi/utilities/sources/HoodieIncrSource.java | 4 +- .../hudi/utilities/sources/JsonDFSSource.java | 1 + .../hudi/utilities/sources/JsonKafkaSource.java | 3 +- .../apache/hudi/utilities/sources/JsonSource.java | 1 + .../hudi/utilities/sources/ParquetDFSSource.java | 3 +- .../hudi/utilities/sources/ParquetSource.java | 3 +- .../apache/hudi/utilities/sources/RowSource.java | 1 + .../org/apache/hudi/utilities/sources/Source.java | 4 +- .../utilities/sources/helpers/AvroConvertor.java | 8 +-- .../utilities/sources/helpers/DFSPathSelector.java | 26 +++++----- .../sources/helpers/IncrSourceHelper.java | 3 +- .../utilities/sources/helpers/KafkaOffsetGen.java | 17 ++++--- .../utilities/transform/FlatteningTransformer.java | 4 +- .../utilities/transform/IdentityTransformer.java | 1 + .../transform/SqlQueryBasedTransformer.java | 4 +- .../hudi/utilities/transform/Transformer.java | 1 + .../hudi/utilities/TestFlatteningTransformer.java | 5 +- .../hudi/utilities/TestHDFSParquetImporter.java | 40 ++++++++------- .../hudi/utilities/TestHoodieDeltaStreamer.java | 36 ++++++------- .../hudi/utilities/TestHoodieSnapshotCopier.java | 20 ++++---- .../hudi/utilities/TestSchedulerConfGenerator.java | 10 ++-- .../apache/hudi/utilities/UtilitiesTestBase.java | 32 ++++++------ .../utilities/sources/AbstractBaseTestSource.java | 16 +++--- .../sources/DistributedTestDataSource.java | 10 ++-- .../hudi/utilities/sources/TestDFSSource.java | 22 ++++---- .../hudi/utilities/sources/TestDataSource.java | 8 +-- .../hudi/utilities/sources/TestKafkaSource.java | 14 ++--- 94 files changed, 765 insertions(+), 598 deletions(-) diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormat.java index 5dde86a..bd9672c 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormat.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormat.java @@ -18,12 +18,17 @@ package org.apache.hudi.hadoop; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; +import org.apache.hudi.common.model.HoodieDataFile; +import org.apache.hudi.common.model.HoodiePartitionMetadata; +import org.apache.hudi.common.table.HoodieTableMetaClient; +import org.apache.hudi.common.table.HoodieTimeline; +import org.apache.hudi.common.table.TableFileSystemView.ReadOptimizedView; +import org.apache.hudi.common.table.timeline.HoodieInstant; +import org.apache.hudi.common.table.view.HoodieTableFileSystemView; +import org.apache.hudi.exception.DatasetNotFoundException; +import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.exception.InvalidDatasetException; + import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; @@ -37,19 +42,16 @@ import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapreduce.Job; -import org.apache.hudi.common.model.HoodieDataFile; -import org.apache.hudi.common.model.HoodiePartitionMetadata; -import org.apache.hudi.common.table.HoodieTableMetaClient; -import org.apache.hudi.common.table.HoodieTimeline; -import org.apache.hudi.common.table.TableFileSystemView.ReadOptimizedView; -import org.apache.hudi.common.table.timeline.HoodieInstant; -import org.apache.hudi.common.table.view.HoodieTableFileSystemView; -import org.apache.hudi.exception.DatasetNotFoundException; -import org.apache.hudi.exception.HoodieIOException; -import org.apache.hudi.exception.InvalidDatasetException; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + /** * HoodieInputFormat which understands the Hoodie File Structure and filters files based on the Hoodie Mode. If paths * that does not correspond to a hoodie dataset then they are passed in as is (as what FileInputFormat.listStatus() diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java index bbd53ec..51a9805 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieROTablePathFilter.java @@ -18,24 +18,26 @@ package org.apache.hudi.hadoop; -import java.io.Serializable; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.stream.Collectors; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PathFilter; import org.apache.hudi.common.model.HoodieDataFile; import org.apache.hudi.common.model.HoodiePartitionMetadata; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.view.HoodieTableFileSystemView; import org.apache.hudi.exception.DatasetNotFoundException; import org.apache.hudi.exception.HoodieException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; +import java.io.Serializable; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.stream.Collectors; + /** * Given a path is a part of - Hoodie dataset = accepts ONLY the latest version of each path - Non-Hoodie dataset = then * always accept diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/RecordReaderValueIterator.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/RecordReaderValueIterator.java index d68afb4..4201470 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/RecordReaderValueIterator.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/RecordReaderValueIterator.java @@ -18,14 +18,16 @@ package org.apache.hudi.hadoop; -import java.io.IOException; -import java.util.Iterator; -import java.util.NoSuchElementException; -import org.apache.hadoop.mapred.RecordReader; import org.apache.hudi.exception.HoodieException; + +import org.apache.hadoop.mapred.RecordReader; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; +import java.io.IOException; +import java.util.Iterator; +import java.util.NoSuchElementException; + /** * Provides Iterator Interface to iterate value entries read from record reader * diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/SafeParquetRecordReaderWrapper.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/SafeParquetRecordReaderWrapper.java index 8a42b13..15b9a2b 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/SafeParquetRecordReaderWrapper.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/SafeParquetRecordReaderWrapper.java @@ -18,12 +18,13 @@ package org.apache.hudi.hadoop; -import java.io.IOException; import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.RecordReader; +import java.io.IOException; + /** * Record Reader for parquet. Records read from this reader is safe to be buffered for concurrent processing. * diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/hive/HoodieCombineHiveInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/hive/HoodieCombineHiveInputFormat.java index 5a39590..6512d94 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/hive/HoodieCombineHiveInputFormat.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/hive/HoodieCombineHiveInputFormat.java @@ -18,23 +18,10 @@ package org.apache.hudi.hadoop.hive; +import org.apache.hudi.hadoop.HoodieParquetInputFormat; +import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat; + import com.google.common.annotations.VisibleForTesting; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -70,11 +57,26 @@ import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.lib.CombineFileInputFormat; import org.apache.hadoop.mapred.lib.CombineFileSplit; import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hudi.hadoop.HoodieParquetInputFormat; -import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + /** * This is just a copy of the org.apache.hadoop.hive.ql.io.CombineHiveInputFormat from Hive 2.x Search for **MOD** to * see minor modifications to support custom inputformat in CombineHiveInputFormat. See diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/AbstractRealtimeRecordReader.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/AbstractRealtimeRecordReader.java index 215193f..68bf517 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/AbstractRealtimeRecordReader.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/AbstractRealtimeRecordReader.java @@ -18,15 +18,14 @@ package org.apache.hudi.hadoop.realtime; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; -import java.util.stream.Collectors; +import org.apache.hudi.common.model.HoodieAvroPayload; +import org.apache.hudi.common.table.HoodieTableMetaClient; +import org.apache.hudi.common.util.HoodieAvroUtils; +import org.apache.hudi.common.util.LogReaderUtils; +import org.apache.hudi.common.util.collection.Pair; +import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.exception.HoodieIOException; + import org.apache.avro.Schema; import org.apache.avro.Schema.Field; import org.apache.avro.generic.GenericArray; @@ -46,19 +45,22 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.JobConf; -import org.apache.hudi.common.model.HoodieAvroPayload; -import org.apache.hudi.common.table.HoodieTableMetaClient; -import org.apache.hudi.common.util.HoodieAvroUtils; -import org.apache.hudi.common.util.LogReaderUtils; -import org.apache.hudi.common.util.collection.Pair; -import org.apache.hudi.exception.HoodieException; -import org.apache.hudi.exception.HoodieIOException; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.parquet.avro.AvroSchemaConverter; import org.apache.parquet.hadoop.ParquetFileReader; import org.apache.parquet.schema.MessageType; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.stream.Collectors; + /** * Record Reader implementation to merge fresh avro data with base parquet data, to support real time queries. */ diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java index 7a3492e..6d46728 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java @@ -18,17 +18,22 @@ package org.apache.hudi.hadoop.realtime; +import org.apache.hudi.common.model.FileSlice; +import org.apache.hudi.common.model.HoodieLogFile; +import org.apache.hudi.common.model.HoodieRecord; +import org.apache.hudi.common.table.HoodieTableMetaClient; +import org.apache.hudi.common.table.HoodieTimeline; +import org.apache.hudi.common.table.timeline.HoodieInstant; +import org.apache.hudi.common.table.view.HoodieTableFileSystemView; +import org.apache.hudi.common.util.FSUtils; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.hadoop.HoodieParquetInputFormat; +import org.apache.hudi.hadoop.UseFileSplitsFromInputFormat; + import com.google.common.base.Preconditions; import com.google.common.collect.Sets; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.function.Function; -import java.util.stream.Collectors; -import java.util.stream.Stream; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; @@ -41,22 +46,19 @@ import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; -import org.apache.hudi.common.model.FileSlice; -import org.apache.hudi.common.model.HoodieLogFile; -import org.apache.hudi.common.model.HoodieRecord; -import org.apache.hudi.common.table.HoodieTableMetaClient; -import org.apache.hudi.common.table.HoodieTimeline; -import org.apache.hudi.common.table.timeline.HoodieInstant; -import org.apache.hudi.common.table.view.HoodieTableFileSystemView; -import org.apache.hudi.common.util.FSUtils; -import org.apache.hudi.common.util.Option; -import org.apache.hudi.exception.HoodieException; -import org.apache.hudi.exception.HoodieIOException; -import org.apache.hudi.hadoop.HoodieParquetInputFormat; -import org.apache.hudi.hadoop.UseFileSplitsFromInputFormat; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; + /** * Input Format, that provides a real-time view of data in a Hoodie dataset */ diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeFileSplit.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeFileSplit.java index 5ae344e..0a050be 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeFileSplit.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeFileSplit.java @@ -18,13 +18,14 @@ package org.apache.hudi.hadoop.realtime; +import org.apache.hadoop.mapred.FileSplit; + import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; -import org.apache.hadoop.mapred.FileSplit; /** * Filesplit that wraps the base split and a list of log files to merge deltas from. diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeRecordReader.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeRecordReader.java index fb11d39..e156316 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeRecordReader.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeRecordReader.java @@ -18,15 +18,17 @@ package org.apache.hudi.hadoop.realtime; -import java.io.IOException; +import org.apache.hudi.exception.HoodieException; + import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; -import org.apache.hudi.exception.HoodieException; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; +import java.io.IOException; + /** * Realtime Record Reader which can do compacted (merge-on-read) record reading or unmerged reading (parquet and log * files read in parallel) based on job configuration. diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java index 7019907..deeaaf4 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java @@ -18,23 +18,25 @@ package org.apache.hudi.hadoop.realtime; -import java.io.IOException; -import java.util.Map; -import org.apache.avro.generic.GenericRecord; -import org.apache.hadoop.io.ArrayWritable; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.RecordReader; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecordPayload; import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner; import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.HoodieAvroUtils; import org.apache.hudi.common.util.Option; + +import org.apache.avro.generic.GenericRecord; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordReader; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; +import java.io.IOException; +import java.util.Map; + class RealtimeCompactedRecordReader extends AbstractRealtimeRecordReader implements RecordReader<NullWritable, ArrayWritable> { diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeUnmergedRecordReader.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeUnmergedRecordReader.java index f7a51e8..cd6f41d 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeUnmergedRecordReader.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeUnmergedRecordReader.java @@ -18,15 +18,6 @@ package org.apache.hudi.hadoop.realtime; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import org.apache.avro.generic.GenericRecord; -import org.apache.hadoop.io.ArrayWritable; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.RecordReader; import org.apache.hudi.common.table.log.HoodieUnMergedLogRecordScanner; import org.apache.hudi.common.util.DefaultSizeEstimator; import org.apache.hudi.common.util.FSUtils; @@ -38,6 +29,17 @@ import org.apache.hudi.common.util.queue.IteratorBasedQueueProducer; import org.apache.hudi.hadoop.RecordReaderValueIterator; import org.apache.hudi.hadoop.SafeParquetRecordReaderWrapper; +import org.apache.avro.generic.GenericRecord; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordReader; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + class RealtimeUnmergedRecordReader extends AbstractRealtimeRecordReader implements RecordReader<NullWritable, ArrayWritable> { diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/InputFormatTestUtil.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/InputFormatTestUtil.java index 7a5b7d4..8dd2f07 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/InputFormatTestUtil.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/InputFormatTestUtil.java @@ -18,28 +18,30 @@ package org.apache.hudi.hadoop; -import java.io.File; -import java.io.FilenameFilter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.UUID; -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.generic.IndexedRecord; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapred.JobConf; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieTestUtils; import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.HoodieAvroUtils; import org.apache.hudi.common.util.SchemaTestUtil; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.IndexedRecord; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapred.JobConf; import org.apache.parquet.avro.AvroParquetWriter; import org.junit.rules.TemporaryFolder; +import java.io.File; +import java.io.FilenameFilter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.UUID; + public class InputFormatTestUtil { private static String TEST_WRITE_TOKEN = "1-0-1"; diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestAnnotation.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestAnnotation.java index 7e8f46d..3bdaa64 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestAnnotation.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestAnnotation.java @@ -18,10 +18,11 @@ package org.apache.hudi.hadoop; -import static org.junit.Assert.assertTrue; +import org.junit.Test; import java.lang.annotation.Annotation; -import org.junit.Test; + +import static org.junit.Assert.assertTrue; public class TestAnnotation { diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieInputFormat.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieInputFormat.java index 3bcf97d..0c3a002 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieInputFormat.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieInputFormat.java @@ -18,10 +18,8 @@ package org.apache.hudi.hadoop; -import static org.junit.Assert.assertEquals; +import org.apache.hudi.common.util.FSUtils; -import java.io.File; -import java.io.IOException; import org.apache.avro.Schema; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.io.ArrayWritable; @@ -30,12 +28,16 @@ import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; -import org.apache.hudi.common.util.FSUtils; import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; +import java.io.File; +import java.io.IOException; + +import static org.junit.Assert.assertEquals; + public class TestHoodieInputFormat { private HoodieParquetInputFormat inputFormat; diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieROTablePathFilter.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieROTablePathFilter.java index 3d17140..8d46b10 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieROTablePathFilter.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieROTablePathFilter.java @@ -18,20 +18,22 @@ package org.apache.hudi.hadoop; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import org.apache.hadoop.fs.Path; import org.apache.hudi.common.HoodieCommonTestHarness; import org.apache.hudi.common.model.HoodieTestUtils; import org.apache.hudi.common.table.HoodieTableMetaClient; + +import org.apache.hadoop.fs.Path; import org.junit.Before; import org.junit.Test; import org.junit.rules.TemporaryFolder; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + /** * */ diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestRecordReaderValueIterator.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestRecordReaderValueIterator.java index 9c050cd..2dfd036 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestRecordReaderValueIterator.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestRecordReaderValueIterator.java @@ -18,17 +18,19 @@ package org.apache.hudi.hadoop; -import java.io.IOException; -import java.util.List; -import java.util.stream.Collectors; -import java.util.stream.IntStream; +import org.apache.hudi.common.util.collection.Pair; + import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.RecordReader; -import org.apache.hudi.common.util.collection.Pair; import org.junit.Assert; import org.junit.Test; +import java.io.IOException; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + public class TestRecordReaderValueIterator { @Test diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java index 021bf78..69c3385 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java @@ -18,18 +18,26 @@ package org.apache.hudi.hadoop.realtime; -import static org.junit.Assert.assertTrue; +import org.apache.hudi.common.model.FileSlice; +import org.apache.hudi.common.model.HoodieLogFile; +import org.apache.hudi.common.model.HoodieTableType; +import org.apache.hudi.common.model.HoodieTestUtils; +import org.apache.hudi.common.table.HoodieTimeline; +import org.apache.hudi.common.table.log.HoodieLogFormat; +import org.apache.hudi.common.table.log.HoodieLogFormat.Writer; +import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock; +import org.apache.hudi.common.table.log.block.HoodieCommandBlock; +import org.apache.hudi.common.table.log.block.HoodieCommandBlock.HoodieCommandBlockTypeEnum; +import org.apache.hudi.common.table.log.block.HoodieLogBlock; +import org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType; +import org.apache.hudi.common.util.FSUtils; +import org.apache.hudi.common.util.HoodieAvroUtils; +import org.apache.hudi.common.util.SchemaTestUtil; +import org.apache.hudi.common.util.collection.Pair; +import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.hadoop.InputFormatTestUtil; import com.google.common.collect.Maps; -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; import org.apache.avro.Schema; import org.apache.avro.Schema.Field; import org.apache.avro.generic.IndexedRecord; @@ -51,30 +59,25 @@ import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; -import org.apache.hudi.common.model.FileSlice; -import org.apache.hudi.common.model.HoodieLogFile; -import org.apache.hudi.common.model.HoodieTableType; -import org.apache.hudi.common.model.HoodieTestUtils; -import org.apache.hudi.common.table.HoodieTimeline; -import org.apache.hudi.common.table.log.HoodieLogFormat; -import org.apache.hudi.common.table.log.HoodieLogFormat.Writer; -import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock; -import org.apache.hudi.common.table.log.block.HoodieCommandBlock; -import org.apache.hudi.common.table.log.block.HoodieCommandBlock.HoodieCommandBlockTypeEnum; -import org.apache.hudi.common.table.log.block.HoodieLogBlock; -import org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType; -import org.apache.hudi.common.util.FSUtils; -import org.apache.hudi.common.util.HoodieAvroUtils; -import org.apache.hudi.common.util.SchemaTestUtil; -import org.apache.hudi.common.util.collection.Pair; -import org.apache.hudi.exception.HoodieException; -import org.apache.hudi.hadoop.InputFormatTestUtil; import org.junit.Assert; import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + + +import static org.junit.Assert.assertTrue; + public class TestHoodieRealtimeRecordReader { private static final String PARTITION_COLUMN = "datestr"; diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java index 647a390..62e9711 100644 --- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java +++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java @@ -18,8 +18,8 @@ package org.apache.hudi.integ; -import static java.util.concurrent.TimeUnit.SECONDS; -import static org.awaitility.Awaitility.await; +import org.apache.hudi.common.util.FileIOUtils; +import org.apache.hudi.common.util.collection.Pair; import com.github.dockerjava.api.DockerClient; import com.github.dockerjava.api.command.DockerCmdExecFactory; @@ -31,6 +31,11 @@ import com.github.dockerjava.core.DockerClientBuilder; import com.github.dockerjava.core.DockerClientConfig; import com.github.dockerjava.core.command.ExecStartResultCallback; import com.github.dockerjava.jaxrs.JerseyDockerCmdExecFactory; +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; +import org.junit.Assert; +import org.junit.Before; + import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.ArrayList; @@ -38,12 +43,9 @@ import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.stream.Collectors; -import org.apache.hudi.common.util.FileIOUtils; -import org.apache.hudi.common.util.collection.Pair; -import org.apache.log4j.LogManager; -import org.apache.log4j.Logger; -import org.junit.Assert; -import org.junit.Before; + +import static java.util.concurrent.TimeUnit.SECONDS; +import static org.awaitility.Awaitility.await; public abstract class ITTestBase { diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java index e92ef7b..99082ec 100644 --- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java +++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java @@ -18,11 +18,13 @@ package org.apache.hudi.integ; -import com.google.common.collect.ImmutableList; -import java.util.List; import org.apache.hudi.common.util.collection.Pair; + +import com.google.common.collect.ImmutableList; import org.junit.Test; +import java.util.List; + /** * Goes through steps described in https://hudi.incubator.apache.org/docker_demo.html * diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java index 7e73460..ac54df9 100644 --- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java +++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java @@ -20,6 +20,7 @@ package org.apache.hudi.integ; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.common.model.HoodieTableType; + import org.junit.Assert; import org.junit.Test; diff --git a/hudi-spark/src/main/java/org/apache/hudi/BaseAvroPayload.java b/hudi-spark/src/main/java/org/apache/hudi/BaseAvroPayload.java index 30c3fdd..1268362 100644 --- a/hudi-spark/src/main/java/org/apache/hudi/BaseAvroPayload.java +++ b/hudi-spark/src/main/java/org/apache/hudi/BaseAvroPayload.java @@ -18,13 +18,15 @@ package org.apache.hudi; -import java.io.IOException; -import java.io.Serializable; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.common.util.HoodieAvroUtils; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; +import org.apache.avro.generic.GenericRecord; + +import java.io.IOException; +import java.io.Serializable; + /** * Base class for all AVRO record based payloads, that can be ordered based on a field */ diff --git a/hudi-spark/src/main/java/org/apache/hudi/ComplexKeyGenerator.java b/hudi-spark/src/main/java/org/apache/hudi/ComplexKeyGenerator.java index 15f47d3..9441947 100644 --- a/hudi-spark/src/main/java/org/apache/hudi/ComplexKeyGenerator.java +++ b/hudi-spark/src/main/java/org/apache/hudi/ComplexKeyGenerator.java @@ -18,13 +18,15 @@ package org.apache.hudi; -import java.util.Arrays; -import java.util.List; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.exception.HoodieKeyException; +import org.apache.avro.generic.GenericRecord; + +import java.util.Arrays; +import java.util.List; + /** * Complex key generator, which takes names of fields to be used for recordKey and partitionPath as configs. */ diff --git a/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java b/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java index 6938e4b..e7cfcc8 100644 --- a/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java +++ b/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java @@ -18,14 +18,6 @@ package org.apache.hudi; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; -import org.apache.avro.Schema.Field; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.client.embedded.EmbeddedTimelineService; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.model.HoodieRecord; @@ -43,9 +35,19 @@ import org.apache.hudi.hive.HiveSyncConfig; import org.apache.hudi.hive.PartitionValueExtractor; import org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor; import org.apache.hudi.index.HoodieIndex; + +import org.apache.avro.Schema.Field; +import org.apache.avro.generic.GenericRecord; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + /** * Utilities used throughout the data source */ diff --git a/hudi-spark/src/main/java/org/apache/hudi/HoodieDataSourceHelpers.java b/hudi-spark/src/main/java/org/apache/hudi/HoodieDataSourceHelpers.java index d310368..f9df30a 100644 --- a/hudi-spark/src/main/java/org/apache/hudi/HoodieDataSourceHelpers.java +++ b/hudi-spark/src/main/java/org/apache/hudi/HoodieDataSourceHelpers.java @@ -18,16 +18,18 @@ package org.apache.hudi; -import com.google.common.collect.Sets; -import java.util.List; -import java.util.stream.Collectors; -import org.apache.hadoop.fs.FileSystem; import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.HoodieTimeline; import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; import org.apache.hudi.common.table.timeline.HoodieInstant; +import com.google.common.collect.Sets; +import org.apache.hadoop.fs.FileSystem; + +import java.util.List; +import java.util.stream.Collectors; + /** * List of helpers to aid, construction of instanttime for read and write operations using datasource */ diff --git a/hudi-spark/src/main/java/org/apache/hudi/KeyGenerator.java b/hudi-spark/src/main/java/org/apache/hudi/KeyGenerator.java index c35663e..4b8084b 100644 --- a/hudi-spark/src/main/java/org/apache/hudi/KeyGenerator.java +++ b/hudi-spark/src/main/java/org/apache/hudi/KeyGenerator.java @@ -18,11 +18,13 @@ package org.apache.hudi; -import java.io.Serializable; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.util.TypedProperties; +import org.apache.avro.generic.GenericRecord; + +import java.io.Serializable; + /** * Abstract class to extend for plugging in extraction of {@link HoodieKey} from an Avro record */ diff --git a/hudi-spark/src/main/java/org/apache/hudi/NonpartitionedKeyGenerator.java b/hudi-spark/src/main/java/org/apache/hudi/NonpartitionedKeyGenerator.java index 4cfbd55..8c0a664 100644 --- a/hudi-spark/src/main/java/org/apache/hudi/NonpartitionedKeyGenerator.java +++ b/hudi-spark/src/main/java/org/apache/hudi/NonpartitionedKeyGenerator.java @@ -18,11 +18,12 @@ package org.apache.hudi; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.exception.HoodieKeyException; +import org.apache.avro.generic.GenericRecord; + /** * Simple Key generator for unpartitioned Hive Tables */ diff --git a/hudi-spark/src/main/java/org/apache/hudi/OverwriteWithLatestAvroPayload.java b/hudi-spark/src/main/java/org/apache/hudi/OverwriteWithLatestAvroPayload.java index f2646cc..e860837 100644 --- a/hudi-spark/src/main/java/org/apache/hudi/OverwriteWithLatestAvroPayload.java +++ b/hudi-spark/src/main/java/org/apache/hudi/OverwriteWithLatestAvroPayload.java @@ -18,14 +18,16 @@ package org.apache.hudi; -import java.io.IOException; -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.generic.IndexedRecord; import org.apache.hudi.common.model.HoodieRecordPayload; import org.apache.hudi.common.util.HoodieAvroUtils; import org.apache.hudi.common.util.Option; +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.IndexedRecord; + +import java.io.IOException; + /** * Default payload used for delta streamer. * <p> diff --git a/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java b/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java index 504ad18..d09716d 100644 --- a/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java +++ b/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java @@ -18,6 +18,16 @@ package org.apache.hudi; +import org.apache.hudi.common.model.HoodieKey; +import org.apache.hudi.common.model.HoodieRecord; +import org.apache.hudi.common.util.HoodieAvroUtils; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.exception.HoodieIOException; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericRecord; + import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; @@ -29,14 +39,6 @@ import java.util.UUID; import java.util.stream.Collectors; import java.util.stream.IntStream; import java.util.stream.Stream; -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericData; -import org.apache.avro.generic.GenericRecord; -import org.apache.hudi.common.model.HoodieKey; -import org.apache.hudi.common.model.HoodieRecord; -import org.apache.hudi.common.util.HoodieAvroUtils; -import org.apache.hudi.common.util.Option; -import org.apache.hudi.exception.HoodieIOException; /** * Class to be used in quickstart guide for generating inserts and updates against a corpus. Test data uses a toy Uber diff --git a/hudi-spark/src/main/java/org/apache/hudi/SimpleKeyGenerator.java b/hudi-spark/src/main/java/org/apache/hudi/SimpleKeyGenerator.java index 8f59484..f458906 100644 --- a/hudi-spark/src/main/java/org/apache/hudi/SimpleKeyGenerator.java +++ b/hudi-spark/src/main/java/org/apache/hudi/SimpleKeyGenerator.java @@ -18,11 +18,12 @@ package org.apache.hudi; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.exception.HoodieKeyException; +import org.apache.avro.generic.GenericRecord; + /** * Simple key generator, which takes names of fields to be used for recordKey and partitionPath as configs. */ diff --git a/hudi-spark/src/test/java/DataSourceTestUtils.java b/hudi-spark/src/test/java/DataSourceTestUtils.java index 15fea33..ab76444 100644 --- a/hudi-spark/src/test/java/DataSourceTestUtils.java +++ b/hudi-spark/src/test/java/DataSourceTestUtils.java @@ -16,14 +16,15 @@ * limitations under the License. */ -import java.io.IOException; -import java.util.List; -import java.util.stream.Collectors; import org.apache.hudi.common.TestRawTripPayload; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.util.Option; +import java.io.IOException; +import java.util.List; +import java.util.stream.Collectors; + /** * Test utils for data source tests. */ diff --git a/hudi-spark/src/test/java/HoodieJavaApp.java b/hudi-spark/src/test/java/HoodieJavaApp.java index 50ac65c..389c015 100644 --- a/hudi-spark/src/test/java/HoodieJavaApp.java +++ b/hudi-spark/src/test/java/HoodieJavaApp.java @@ -16,11 +16,6 @@ * limitations under the License. */ -import com.beust.jcommander.JCommander; -import com.beust.jcommander.Parameter; -import java.util.ArrayList; -import java.util.List; -import org.apache.hadoop.fs.FileSystem; import org.apache.hudi.DataSourceReadOptions; import org.apache.hudi.DataSourceWriteOptions; import org.apache.hudi.HoodieDataSourceHelpers; @@ -33,6 +28,10 @@ import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.hive.MultiPartKeysValueExtractor; import org.apache.hudi.hive.NonPartitionedExtractor; + +import com.beust.jcommander.JCommander; +import com.beust.jcommander.Parameter; +import org.apache.hadoop.fs.FileSystem; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaSparkContext; @@ -42,6 +41,9 @@ import org.apache.spark.sql.Row; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; +import java.util.ArrayList; +import java.util.List; + /** * Sample program that writes & reads hoodie datasets via the Spark datasource */ diff --git a/hudi-spark/src/test/java/HoodieJavaStreamingApp.java b/hudi-spark/src/test/java/HoodieJavaStreamingApp.java index 492cc98..c448abb 100644 --- a/hudi-spark/src/test/java/HoodieJavaStreamingApp.java +++ b/hudi-spark/src/test/java/HoodieJavaStreamingApp.java @@ -16,14 +16,6 @@ * limitations under the License. */ -import com.beust.jcommander.JCommander; -import com.beust.jcommander.Parameter; -import java.util.List; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hudi.DataSourceReadOptions; import org.apache.hudi.DataSourceWriteOptions; import org.apache.hudi.HoodieDataSourceHelpers; @@ -31,6 +23,11 @@ import org.apache.hudi.common.HoodieTestDataGenerator; import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.hive.MultiPartKeysValueExtractor; + +import com.beust.jcommander.JCommander; +import com.beust.jcommander.Parameter; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaSparkContext; @@ -42,6 +39,11 @@ import org.apache.spark.sql.streaming.DataStreamWriter; import org.apache.spark.sql.streaming.OutputMode; import org.apache.spark.sql.streaming.ProcessingTime; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + /** * Sample program that writes & reads hoodie datasets via the Spark datasource streaming */ diff --git a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/FileSystemViewHandler.java b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/FileSystemViewHandler.java index cb3d8a7..da2f305 100644 --- a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/FileSystemViewHandler.java +++ b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/FileSystemViewHandler.java @@ -18,17 +18,6 @@ package org.apache.hudi.timeline.service; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Preconditions; -import io.javalin.Context; -import io.javalin.Handler; -import io.javalin.Javalin; -import java.io.IOException; -import java.util.Arrays; -import java.util.List; -import java.util.stream.Collectors; -import org.apache.hadoop.conf.Configuration; import org.apache.hudi.common.table.HoodieTimeline; import org.apache.hudi.common.table.SyncableFileSystemView; import org.apache.hudi.common.table.timeline.dto.CompactionOpDTO; @@ -42,10 +31,23 @@ import org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView; import org.apache.hudi.timeline.service.handlers.DataFileHandler; import org.apache.hudi.timeline.service.handlers.FileSliceHandler; import org.apache.hudi.timeline.service.handlers.TimelineHandler; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Preconditions; +import io.javalin.Context; +import io.javalin.Handler; +import io.javalin.Javalin; +import org.apache.hadoop.conf.Configuration; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.jetbrains.annotations.NotNull; +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + /** * Main REST Handler class that handles local view staleness and delegates calls to slice/data-file/timeline handlers */ diff --git a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/TimelineService.java b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/TimelineService.java index 2272757..16ff2f7 100644 --- a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/TimelineService.java +++ b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/TimelineService.java @@ -18,21 +18,23 @@ package org.apache.hudi.timeline.service; -import com.beust.jcommander.JCommander; -import com.beust.jcommander.Parameter; -import io.javalin.Javalin; -import java.io.IOException; -import java.io.Serializable; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; import org.apache.hudi.common.SerializableConfiguration; import org.apache.hudi.common.table.view.FileSystemViewManager; import org.apache.hudi.common.table.view.FileSystemViewStorageConfig; import org.apache.hudi.common.table.view.FileSystemViewStorageType; import org.apache.hudi.common.util.FSUtils; + +import com.beust.jcommander.JCommander; +import com.beust.jcommander.Parameter; +import io.javalin.Javalin; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; +import java.io.IOException; +import java.io.Serializable; + /** * A stand alone timeline service exposing File-System View interfaces to clients */ diff --git a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/DataFileHandler.java b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/DataFileHandler.java index f336449..2b92bbf 100644 --- a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/DataFileHandler.java +++ b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/DataFileHandler.java @@ -18,14 +18,16 @@ package org.apache.hudi.timeline.service.handlers; +import org.apache.hudi.common.table.timeline.dto.DataFileDTO; +import org.apache.hudi.common.table.view.FileSystemViewManager; + +import org.apache.hadoop.conf.Configuration; + import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.stream.Collectors; -import org.apache.hadoop.conf.Configuration; -import org.apache.hudi.common.table.timeline.dto.DataFileDTO; -import org.apache.hudi.common.table.view.FileSystemViewManager; /** * REST Handler servicing data-file requests diff --git a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/FileSliceHandler.java b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/FileSliceHandler.java index eb283fb..e58f835 100644 --- a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/FileSliceHandler.java +++ b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/FileSliceHandler.java @@ -18,16 +18,18 @@ package org.apache.hudi.timeline.service.handlers; +import org.apache.hudi.common.table.timeline.dto.CompactionOpDTO; +import org.apache.hudi.common.table.timeline.dto.FileGroupDTO; +import org.apache.hudi.common.table.timeline.dto.FileSliceDTO; +import org.apache.hudi.common.table.view.FileSystemViewManager; + +import org.apache.hadoop.conf.Configuration; + import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.stream.Collectors; -import org.apache.hadoop.conf.Configuration; -import org.apache.hudi.common.table.timeline.dto.CompactionOpDTO; -import org.apache.hudi.common.table.timeline.dto.FileGroupDTO; -import org.apache.hudi.common.table.timeline.dto.FileSliceDTO; -import org.apache.hudi.common.table.view.FileSystemViewManager; /** * REST Handler servicing file-slice requests diff --git a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/Handler.java b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/Handler.java index 03be706..884bc42 100644 --- a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/Handler.java +++ b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/Handler.java @@ -18,10 +18,12 @@ package org.apache.hudi.timeline.service.handlers; -import java.io.IOException; +import org.apache.hudi.common.table.view.FileSystemViewManager; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; -import org.apache.hudi.common.table.view.FileSystemViewManager; + +import java.io.IOException; public abstract class Handler { diff --git a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/TimelineHandler.java b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/TimelineHandler.java index 49fc2ce..faa81a4 100644 --- a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/TimelineHandler.java +++ b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/TimelineHandler.java @@ -18,14 +18,16 @@ package org.apache.hudi.timeline.service.handlers; +import org.apache.hudi.common.table.timeline.dto.InstantDTO; +import org.apache.hudi.common.table.timeline.dto.TimelineDTO; +import org.apache.hudi.common.table.view.FileSystemViewManager; + +import org.apache.hadoop.conf.Configuration; + import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import org.apache.hadoop.conf.Configuration; -import org.apache.hudi.common.table.timeline.dto.InstantDTO; -import org.apache.hudi.common.table.timeline.dto.TimelineDTO; -import org.apache.hudi.common.table.view.FileSystemViewManager; /** * REST Handler servicing timeline requests diff --git a/hudi-timeline-service/src/test/java/org/apache/hudi/timeline/table/view/TestRemoteHoodieTableFileSystemView.java b/hudi-timeline-service/src/test/java/org/apache/hudi/timeline/table/view/TestRemoteHoodieTableFileSystemView.java index 7c55949..d69a828 100644 --- a/hudi-timeline-service/src/test/java/org/apache/hudi/timeline/table/view/TestRemoteHoodieTableFileSystemView.java +++ b/hudi-timeline-service/src/test/java/org/apache/hudi/timeline/table/view/TestRemoteHoodieTableFileSystemView.java @@ -27,6 +27,7 @@ import org.apache.hudi.common.table.view.FileSystemViewStorageType; import org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView; import org.apache.hudi.common.table.view.TestHoodieTableFileSystemView; import org.apache.hudi.timeline.service.TimelineService; + import org.apache.log4j.LogManager; import org.apache.log4j.Logger; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HDFSParquetImporter.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HDFSParquetImporter.java index 34552bb..62d2aa7 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HDFSParquetImporter.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HDFSParquetImporter.java @@ -18,26 +18,6 @@ package org.apache.hudi.utilities; -import com.beust.jcommander.IValueValidator; -import com.beust.jcommander.JCommander; -import com.beust.jcommander.Parameter; -import com.beust.jcommander.ParameterException; -import com.google.common.annotations.VisibleForTesting; -import java.io.IOException; -import java.io.Serializable; -import java.time.Instant; -import java.time.ZoneId; -import java.time.format.DateTimeFormatter; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Properties; -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericRecord; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hudi.HoodieWriteClient; import org.apache.hudi.WriteStatus; import org.apache.hudi.common.HoodieJsonPayload; @@ -50,12 +30,35 @@ import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.exception.HoodieIOException; + +import com.beust.jcommander.IValueValidator; +import com.beust.jcommander.JCommander; +import com.beust.jcommander.Parameter; +import com.beust.jcommander.ParameterException; +import com.google.common.annotations.VisibleForTesting; +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericRecord; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.parquet.avro.AvroReadSupport; import org.apache.parquet.hadoop.ParquetInputFormat; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; + +import java.io.IOException; +import java.io.Serializable; +import java.time.Instant; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Properties; + import scala.Tuple2; /** diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HiveIncrementalPuller.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HiveIncrementalPuller.java index 2cba5fe..62ae769 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HiveIncrementalPuller.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HiveIncrementalPuller.java @@ -18,8 +18,25 @@ package org.apache.hudi.utilities; +import org.apache.hudi.common.table.HoodieTableMetaClient; +import org.apache.hudi.common.table.timeline.HoodieInstant; +import org.apache.hudi.common.util.FileIOUtils; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.utilities.exception.HoodieIncrementalPullException; +import org.apache.hudi.utilities.exception.HoodieIncrementalPullSQLException; + import com.beust.jcommander.JCommander; import com.beust.jcommander.Parameter; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; +import org.stringtemplate.v4.ST; + import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; @@ -32,21 +49,6 @@ import java.sql.Statement; import java.util.List; import java.util.Scanner; import java.util.stream.Collectors; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.permission.FsAction; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hudi.common.table.HoodieTableMetaClient; -import org.apache.hudi.common.table.timeline.HoodieInstant; -import org.apache.hudi.common.util.FileIOUtils; -import org.apache.hudi.common.util.Option; -import org.apache.hudi.exception.HoodieException; -import org.apache.hudi.utilities.exception.HoodieIncrementalPullException; -import org.apache.hudi.utilities.exception.HoodieIncrementalPullSQLException; -import org.apache.log4j.LogManager; -import org.apache.log4j.Logger; -import org.stringtemplate.v4.ST; /** * Utility to pull data after a given commit, based on the supplied HiveQL and save the delta as another hive temporary diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCleaner.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCleaner.java index 8ca8cd1..27c3220 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCleaner.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCleaner.java @@ -18,22 +18,24 @@ package org.apache.hudi.utilities; -import com.beust.jcommander.JCommander; -import com.beust.jcommander.Parameter; -import java.io.IOException; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hudi.HoodieWriteClient; import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.config.HoodieWriteConfig; + +import com.beust.jcommander.JCommander; +import com.beust.jcommander.Parameter; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaSparkContext; +import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; + public class HoodieCleaner { private static volatile Logger log = LogManager.getLogger(HoodieCleaner.class); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactionAdminTool.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactionAdminTool.java index 63bf441..d42a45d 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactionAdminTool.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactionAdminTool.java @@ -18,22 +18,24 @@ package org.apache.hudi.utilities; -import com.beust.jcommander.JCommander; -import com.beust.jcommander.Parameter; -import java.io.ObjectOutputStream; -import java.io.Serializable; -import java.util.List; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hudi.CompactionAdminClient; import org.apache.hudi.CompactionAdminClient.RenameOpResult; import org.apache.hudi.CompactionAdminClient.ValidationOpResult; import org.apache.hudi.common.model.HoodieFileGroupId; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.util.FSUtils; + +import com.beust.jcommander.JCommander; +import com.beust.jcommander.Parameter; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.spark.api.java.JavaSparkContext; +import java.io.ObjectOutputStream; +import java.io.Serializable; +import java.util.List; + public class HoodieCompactionAdminTool { private final Config cfg; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactor.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactor.java index 4f72b24..48fbbaf 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactor.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactor.java @@ -18,23 +18,25 @@ package org.apache.hudi.utilities; -import com.beust.jcommander.JCommander; -import com.beust.jcommander.Parameter; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hudi.HoodieWriteClient; import org.apache.hudi.WriteStatus; import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.TypedProperties; + +import com.beust.jcommander.JCommander; +import com.beust.jcommander.Parameter; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; + public class HoodieCompactor { private static volatile Logger logger = LogManager.getLogger(HoodieCompactor.class); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java index a37f7da..4f76f43 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java @@ -18,17 +18,6 @@ package org.apache.hudi.utilities; -import com.beust.jcommander.JCommander; -import com.beust.jcommander.Parameter; -import java.io.IOException; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; -import java.util.stream.Stream; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.FileUtil; -import org.apache.hadoop.fs.Path; import org.apache.hudi.common.SerializableConfiguration; import org.apache.hudi.common.model.HoodieDataFile; import org.apache.hudi.common.model.HoodiePartitionMetadata; @@ -40,10 +29,24 @@ import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.common.table.view.HoodieTableFileSystemView; import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.Option; + +import com.beust.jcommander.JCommander; +import com.beust.jcommander.Parameter; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; + +import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Stream; + import scala.Tuple2; /** diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieWithTimelineServer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieWithTimelineServer.java index dc49ebd..16ccb14 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieWithTimelineServer.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieWithTimelineServer.java @@ -22,6 +22,12 @@ import com.beust.jcommander.JCommander; import com.beust.jcommander.Parameter; import com.google.common.base.Preconditions; import io.javalin.Javalin; +import org.apache.http.HttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClientBuilder; +import org.apache.spark.api.java.JavaSparkContext; + import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; @@ -31,11 +37,6 @@ import java.net.UnknownHostException; import java.util.ArrayList; import java.util.List; import java.util.stream.IntStream; -import org.apache.http.HttpResponse; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClientBuilder; -import org.apache.spark.api.java.JavaSparkContext; public class HoodieWithTimelineServer implements Serializable { diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java index bbb18af..1f33ca6 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java @@ -18,18 +18,6 @@ package org.apache.hudi.utilities; -import com.google.common.base.Preconditions; -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.StringReader; -import java.nio.ByteBuffer; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hudi.HoodieWriteClient; import org.apache.hudi.WriteStatus; import org.apache.hudi.common.util.DFSPropertiesConfiguration; @@ -44,6 +32,11 @@ import org.apache.hudi.index.HoodieIndex; import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.sources.Source; import org.apache.hudi.utilities.transform.Transformer; + +import com.google.common.base.Preconditions; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.Accumulator; @@ -52,6 +45,15 @@ import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.StringReader; +import java.nio.ByteBuffer; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + /** * Bunch of helper methods */ diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/adhoc/UpgradePayloadFromUberToApache.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/adhoc/UpgradePayloadFromUberToApache.java index 8c04f67..6793d94 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/adhoc/UpgradePayloadFromUberToApache.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/adhoc/UpgradePayloadFromUberToApache.java @@ -18,8 +18,19 @@ package org.apache.hudi.utilities.adhoc; +import org.apache.hudi.common.model.HoodieTableType; +import org.apache.hudi.common.table.HoodieTableConfig; +import org.apache.hudi.common.table.HoodieTableMetaClient; +import org.apache.hudi.common.util.FSUtils; +import org.apache.hudi.config.HoodieCompactionConfig; + import com.beust.jcommander.JCommander; import com.beust.jcommander.Parameter; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; + import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; @@ -27,15 +38,6 @@ import java.io.Serializable; import java.util.HashMap; import java.util.Map; import java.util.Properties; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hudi.common.model.HoodieTableType; -import org.apache.hudi.common.table.HoodieTableConfig; -import org.apache.hudi.common.table.HoodieTableMetaClient; -import org.apache.hudi.common.util.FSUtils; -import org.apache.hudi.config.HoodieCompactionConfig; -import org.apache.log4j.LogManager; -import org.apache.log4j.Logger; /** * This is an one-time use class meant for migrating the configuration for "hoodie.compaction.payload.class" in diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/AbstractDeltaStreamerService.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/AbstractDeltaStreamerService.java index ceb745a..bd4d8a2 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/AbstractDeltaStreamerService.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/AbstractDeltaStreamerService.java @@ -18,6 +18,11 @@ package org.apache.hudi.utilities.deltastreamer; +import org.apache.hudi.common.util.collection.Pair; + +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; + import java.io.Serializable; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; @@ -25,9 +30,6 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import java.util.function.Function; -import org.apache.hudi.common.util.collection.Pair; -import org.apache.log4j.LogManager; -import org.apache.log4j.Logger; /** * Base Class for running delta-sync/compaction in separate thread and controlling their life-cyle diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/Compactor.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/Compactor.java index a72b68a..3285ba7 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/Compactor.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/Compactor.java @@ -18,18 +18,20 @@ package org.apache.hudi.utilities.deltastreamer; -import java.io.IOException; -import java.io.Serializable; import org.apache.hudi.HoodieWriteClient; import org.apache.hudi.WriteStatus; import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.common.util.Option; import org.apache.hudi.exception.HoodieException; + import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; +import java.io.IOException; +import java.io.Serializable; + /** * Run one round of compaction */ diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java index 73ef8a3..532ec69 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java @@ -18,25 +18,6 @@ package org.apache.hudi.utilities.deltastreamer; -import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE; -import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME; - -import com.codahale.metrics.Timer; -import com.google.common.base.Preconditions; -import java.io.IOException; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Objects; -import java.util.function.Function; -import java.util.stream.Collectors; -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericRecord; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hudi.AvroConversionUtils; import org.apache.hudi.DataSourceUtils; import org.apache.hudi.HoodieWriteClient; @@ -66,6 +47,15 @@ import org.apache.hudi.utilities.schema.RowBasedSchemaProvider; import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.sources.InputBatch; import org.apache.hudi.utilities.transform.Transformer; + +import com.codahale.metrics.Timer; +import com.google.common.base.Preconditions; +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericRecord; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaRDD; @@ -73,8 +63,22 @@ import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; + +import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Objects; +import java.util.function.Function; +import java.util.stream.Collectors; + import scala.collection.JavaConversions; +import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE; +import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME; + + /** * Sync's one batch of data to hoodie dataset */ diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java index db8b40a..9893f0d 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java @@ -18,30 +18,6 @@ package org.apache.hudi.utilities.deltastreamer; -import com.beust.jcommander.IStringConverter; -import com.beust.jcommander.JCommander; -import com.beust.jcommander.Parameter; -import com.beust.jcommander.ParameterException; -import com.google.common.base.Preconditions; -import java.io.IOException; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.locks.Condition; -import java.util.concurrent.locks.ReentrantLock; -import java.util.stream.Collectors; -import java.util.stream.IntStream; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hudi.HoodieWriteClient; import org.apache.hudi.OverwriteWithLatestAvroPayload; import org.apache.hudi.common.model.HoodieTableType; @@ -60,11 +36,37 @@ import org.apache.hudi.utilities.HiveIncrementalPuller; import org.apache.hudi.utilities.UtilHelpers; import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.sources.JsonDFSSource; + +import com.beust.jcommander.IStringConverter; +import com.beust.jcommander.JCommander; +import com.beust.jcommander.Parameter; +import com.beust.jcommander.ParameterException; +import com.google.common.base.Preconditions; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; +import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.ReentrantLock; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + /** * An Utility which can incrementally take the output from {@link HiveIncrementalPuller} and apply it to the target * dataset. Does not maintain any state, queries at runtime to see how far behind the target dataset is from the source diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerMetrics.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerMetrics.java index 4132f5b..19f8e10 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerMetrics.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerMetrics.java @@ -18,10 +18,11 @@ package org.apache.hudi.utilities.deltastreamer; -import com.codahale.metrics.Timer; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.metrics.Metrics; +import com.codahale.metrics.Timer; + public class HoodieDeltaStreamerMetrics { private HoodieWriteConfig config = null; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SchedulerConfGenerator.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SchedulerConfGenerator.java index cb4477a..66d2c47 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SchedulerConfGenerator.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SchedulerConfGenerator.java @@ -18,6 +18,13 @@ package org.apache.hudi.utilities.deltastreamer; +import org.apache.hudi.common.model.HoodieTableType; +import org.apache.hudi.common.util.Option; + +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; +import org.apache.spark.SparkConf; + import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; @@ -25,11 +32,6 @@ import java.io.IOException; import java.util.HashMap; import java.util.Map; import java.util.UUID; -import org.apache.hudi.common.model.HoodieTableType; -import org.apache.hudi.common.util.Option; -import org.apache.log4j.LogManager; -import org.apache.log4j.Logger; -import org.apache.spark.SparkConf; /** * Utility Class to generate Spark Scheduling allocation file. This kicks in only when user sets diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SourceFormatAdapter.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SourceFormatAdapter.java index ab3b070..b41efcd 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SourceFormatAdapter.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SourceFormatAdapter.java @@ -18,11 +18,6 @@ package org.apache.hudi.utilities.deltastreamer; -import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE; -import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME; - -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.AvroConversionUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.utilities.sources.AvroSource; @@ -32,11 +27,17 @@ import org.apache.hudi.utilities.sources.ParquetSource; import org.apache.hudi.utilities.sources.RowSource; import org.apache.hudi.utilities.sources.Source; import org.apache.hudi.utilities.sources.helpers.AvroConvertor; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericRecord; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.types.StructType; +import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE; +import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME; + /** * Adapts data-format provided by the source to the data-format required by the client (DeltaStreamer) */ diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/exception/HoodieIncrementalPullException.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/exception/HoodieIncrementalPullException.java index 58cad2c..bab12b0 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/exception/HoodieIncrementalPullException.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/exception/HoodieIncrementalPullException.java @@ -18,9 +18,10 @@ package org.apache.hudi.utilities.exception; -import java.sql.SQLException; import org.apache.hudi.exception.HoodieException; +import java.sql.SQLException; + public class HoodieIncrementalPullException extends HoodieException { public HoodieIncrementalPullException(String msg, SQLException e) { diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java index 4f91f95..04c6ece 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java @@ -18,14 +18,6 @@ package org.apache.hudi.utilities.keygen; -import java.io.Serializable; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.Arrays; -import java.util.Collections; -import java.util.Date; -import java.util.TimeZone; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.DataSourceUtils; import org.apache.hudi.SimpleKeyGenerator; import org.apache.hudi.common.model.HoodieKey; @@ -34,6 +26,16 @@ import org.apache.hudi.exception.HoodieKeyException; import org.apache.hudi.exception.HoodieNotSupportedException; import org.apache.hudi.utilities.exception.HoodieDeltaStreamerException; +import org.apache.avro.generic.GenericRecord; + +import java.io.Serializable; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Arrays; +import java.util.Collections; +import java.util.Date; +import java.util.TimeZone; + /** * Key generator, that relies on timestamps for partitioning field. Still picks record key by name. */ diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java index ef1e986..a840ff4 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java @@ -18,11 +18,30 @@ package org.apache.hudi.utilities.perf; +import org.apache.hudi.common.model.FileSlice; +import org.apache.hudi.common.table.HoodieTableMetaClient; +import org.apache.hudi.common.table.SyncableFileSystemView; +import org.apache.hudi.common.table.view.FileSystemViewStorageConfig; +import org.apache.hudi.common.table.view.FileSystemViewStorageType; +import org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView; +import org.apache.hudi.common.util.FSUtils; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.timeline.service.TimelineService; +import org.apache.hudi.utilities.UtilHelpers; + import com.beust.jcommander.JCommander; import com.beust.jcommander.Parameter; import com.codahale.metrics.Histogram; import com.codahale.metrics.Snapshot; import com.codahale.metrics.UniformReservoir; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaSparkContext; + import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; @@ -36,23 +55,6 @@ import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import java.util.stream.IntStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hudi.common.model.FileSlice; -import org.apache.hudi.common.table.HoodieTableMetaClient; -import org.apache.hudi.common.table.SyncableFileSystemView; -import org.apache.hudi.common.table.view.FileSystemViewStorageConfig; -import org.apache.hudi.common.table.view.FileSystemViewStorageType; -import org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView; -import org.apache.hudi.common.util.FSUtils; -import org.apache.hudi.common.util.Option; -import org.apache.hudi.timeline.service.TimelineService; -import org.apache.hudi.utilities.UtilHelpers; -import org.apache.log4j.LogManager; -import org.apache.log4j.Logger; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; public class TimelineServerPerf implements Serializable { diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/FilebasedSchemaProvider.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/FilebasedSchemaProvider.java index 3eb3a44..5776984 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/FilebasedSchemaProvider.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/FilebasedSchemaProvider.java @@ -18,17 +18,19 @@ package org.apache.hudi.utilities.schema; -import java.io.IOException; -import java.util.Collections; -import org.apache.avro.Schema; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hudi.DataSourceUtils; import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.exception.HoodieIOException; + +import org.apache.avro.Schema; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.spark.api.java.JavaSparkContext; +import java.io.IOException; +import java.util.Collections; + /** * A simple schema provider, that reads off files on DFS */ diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/NullTargetSchemaRegistryProvider.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/NullTargetSchemaRegistryProvider.java index d7415a0..7b3172c 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/NullTargetSchemaRegistryProvider.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/NullTargetSchemaRegistryProvider.java @@ -18,8 +18,9 @@ package org.apache.hudi.utilities.schema; -import org.apache.avro.Schema; import org.apache.hudi.common.util.TypedProperties; + +import org.apache.avro.Schema; import org.apache.spark.api.java.JavaSparkContext; /** diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/RowBasedSchemaProvider.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/RowBasedSchemaProvider.java index 3cec79c..4b708fa 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/RowBasedSchemaProvider.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/RowBasedSchemaProvider.java @@ -18,8 +18,9 @@ package org.apache.hudi.utilities.schema; -import org.apache.avro.Schema; import org.apache.hudi.AvroConversionUtils; + +import org.apache.avro.Schema; import org.apache.spark.sql.types.StructType; public class RowBasedSchemaProvider extends SchemaProvider { diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaProvider.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaProvider.java index c1f7c34..8378383 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaProvider.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaProvider.java @@ -18,11 +18,13 @@ package org.apache.hudi.utilities.schema; -import java.io.Serializable; -import org.apache.avro.Schema; import org.apache.hudi.common.util.TypedProperties; + +import org.apache.avro.Schema; import org.apache.spark.api.java.JavaSparkContext; +import java.io.Serializable; + /** * Class to provide schema for reading data and also writing into a Hoodie table */ diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaRegistryProvider.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaRegistryProvider.java index 7e2ac16..d03c6da 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaRegistryProvider.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaRegistryProvider.java @@ -18,16 +18,18 @@ package org.apache.hudi.utilities.schema; +import org.apache.hudi.DataSourceUtils; +import org.apache.hudi.common.util.TypedProperties; +import org.apache.hudi.exception.HoodieIOException; + import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.avro.Schema; +import org.apache.spark.api.java.JavaSparkContext; + import java.io.IOException; import java.net.URL; import java.util.Collections; -import org.apache.avro.Schema; -import org.apache.hudi.DataSourceUtils; -import org.apache.hudi.common.util.TypedProperties; -import org.apache.hudi.exception.HoodieIOException; -import org.apache.spark.api.java.JavaSparkContext; /** * Obtains latest schema from the Confluent/Kafka schema-registry diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroDFSSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroDFSSource.java index b7f6f8c..4cd8f07 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroDFSSource.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroDFSSource.java @@ -18,15 +18,16 @@ package org.apache.hudi.utilities.sources; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.mapred.AvroKey; -import org.apache.avro.mapreduce.AvroKeyInputFormat; -import org.apache.hadoop.io.NullWritable; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.sources.helpers.DFSPathSelector; + +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.mapred.AvroKey; +import org.apache.avro.mapreduce.AvroKeyInputFormat; +import org.apache.hadoop.io.NullWritable; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroKafkaSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroKafkaSource.java index 2ce8b43..9588a81 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroKafkaSource.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroKafkaSource.java @@ -18,14 +18,15 @@ package org.apache.hudi.utilities.sources; -import io.confluent.kafka.serializers.KafkaAvroDecoder; -import kafka.serializer.StringDecoder; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen; import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.CheckpointUtils; + +import io.confluent.kafka.serializers.KafkaAvroDecoder; +import kafka.serializer.StringDecoder; +import org.apache.avro.generic.GenericRecord; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaRDD; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroSource.java index 3137cd6..24d7878 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroSource.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroSource.java @@ -18,9 +18,10 @@ package org.apache.hudi.utilities.sources; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.utilities.schema.SchemaProvider; + +import org.apache.avro.generic.GenericRecord; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HiveIncrPullSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HiveIncrPullSource.java index b37a219..4e4d603 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HiveIncrPullSource.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HiveIncrPullSource.java @@ -18,19 +18,6 @@ package org.apache.hudi.utilities.sources; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.stream.Collectors; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.mapred.AvroKey; -import org.apache.avro.mapreduce.AvroKeyInputFormat; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.NullWritable; import org.apache.hudi.DataSourceUtils; import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.Option; @@ -38,6 +25,14 @@ import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.utilities.HiveIncrementalPuller; import org.apache.hudi.utilities.schema.SchemaProvider; + +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.mapred.AvroKey; +import org.apache.avro.mapreduce.AvroKeyInputFormat; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.NullWritable; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaPairRDD; @@ -45,6 +40,13 @@ import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + /** * Source to read deltas produced by {@link HiveIncrementalPuller}, commit by commit and apply to the target table * <p> diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HoodieIncrSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HoodieIncrSource.java index f5ed4e3..3edb296 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HoodieIncrSource.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HoodieIncrSource.java @@ -18,7 +18,6 @@ package org.apache.hudi.utilities.sources; -import java.util.Arrays; import org.apache.hudi.DataSourceReadOptions; import org.apache.hudi.DataSourceUtils; import org.apache.hudi.common.model.HoodieRecord; @@ -28,12 +27,15 @@ import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor; import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.sources.helpers.IncrSourceHelper; + import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.DataFrameReader; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; +import java.util.Arrays; + public class HoodieIncrSource extends RowSource { protected static class Config { diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonDFSSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonDFSSource.java index 6be3a54..ed9b82a 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonDFSSource.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonDFSSource.java @@ -23,6 +23,7 @@ import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.sources.helpers.DFSPathSelector; + import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonKafkaSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonKafkaSource.java index 8e95a8c..ba68ac9 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonKafkaSource.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonKafkaSource.java @@ -18,12 +18,13 @@ package org.apache.hudi.utilities.sources; -import kafka.serializer.StringDecoder; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen; import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.CheckpointUtils; + +import kafka.serializer.StringDecoder; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaRDD; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonSource.java index 5c9db18..fb569e9 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonSource.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonSource.java @@ -20,6 +20,7 @@ package org.apache.hudi.utilities.sources; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.utilities.schema.SchemaProvider; + import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ParquetDFSSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ParquetDFSSource.java index 22ac3f9..3695fce 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ParquetDFSSource.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ParquetDFSSource.java @@ -18,12 +18,13 @@ package org.apache.hudi.utilities.sources; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.sources.helpers.DFSPathSelector; + +import org.apache.avro.generic.GenericRecord; import org.apache.parquet.avro.AvroParquetInputFormat; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ParquetSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ParquetSource.java index edcc688..58fe5ad 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ParquetSource.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ParquetSource.java @@ -18,9 +18,10 @@ package org.apache.hudi.utilities.sources; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.utilities.schema.SchemaProvider; + +import org.apache.avro.generic.GenericRecord; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/RowSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/RowSource.java index 467c667..9e289f1 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/RowSource.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/RowSource.java @@ -23,6 +23,7 @@ import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.utilities.schema.RowBasedSchemaProvider; import org.apache.hudi.utilities.schema.SchemaProvider; + import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/Source.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/Source.java index 0ed1e6c..48e3bd7 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/Source.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/Source.java @@ -18,15 +18,17 @@ package org.apache.hudi.utilities.sources; -import java.io.Serializable; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.utilities.schema.SchemaProvider; + import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; +import java.io.Serializable; + /** * Represents a source from which we can tail data. Assumes a constructor that takes properties. */ diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/AvroConvertor.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/AvroConvertor.java index d5c4621..97bb937 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/AvroConvertor.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/AvroConvertor.java @@ -18,13 +18,15 @@ package org.apache.hudi.utilities.sources.helpers; +import org.apache.hudi.avro.MercifulJsonConverter; + import com.twitter.bijection.Injection; import com.twitter.bijection.avro.GenericAvroCodecs; -import java.io.IOException; -import java.io.Serializable; import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; -import org.apache.hudi.avro.MercifulJsonConverter; + +import java.io.IOException; +import java.io.Serializable; /** * Convert a variety of datum into Avro GenericRecords. Has a bunch of lazy fields to circumvent issues around diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/DFSPathSelector.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/DFSPathSelector.java index ce979d6..2bc0466 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/DFSPathSelector.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/DFSPathSelector.java @@ -18,18 +18,6 @@ package org.apache.hudi.utilities.sources.helpers; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Comparator; -import java.util.List; -import java.util.stream.Collectors; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocatedFileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.RemoteIterator; import org.apache.hudi.DataSourceUtils; import org.apache.hudi.common.util.FSUtils; import org.apache.hudi.common.util.Option; @@ -38,6 +26,20 @@ import org.apache.hudi.common.util.collection.ImmutablePair; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.List; +import java.util.stream.Collectors; + public class DFSPathSelector { /** diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/IncrSourceHelper.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/IncrSourceHelper.java index 62d35cd..c6430ea 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/IncrSourceHelper.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/IncrSourceHelper.java @@ -18,12 +18,13 @@ package org.apache.hudi.utilities.sources.helpers; -import com.google.common.base.Preconditions; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.HoodieTimeline; import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.collection.Pair; + +import com.google.common.base.Preconditions; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Row; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/KafkaOffsetGen.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/KafkaOffsetGen.java index 873e793..6bde10f 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/KafkaOffsetGen.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/KafkaOffsetGen.java @@ -18,23 +18,26 @@ package org.apache.hudi.utilities.sources.helpers; -import java.util.Arrays; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.stream.Collectors; -import kafka.common.TopicAndPartition; import org.apache.hudi.DataSourceUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.exception.HoodieNotSupportedException; import org.apache.hudi.utilities.exception.HoodieDeltaStreamerException; + +import kafka.common.TopicAndPartition; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.streaming.kafka.KafkaCluster; import org.apache.spark.streaming.kafka.KafkaCluster.LeaderOffset; import org.apache.spark.streaming.kafka.OffsetRange; + +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.stream.Collectors; + import scala.Predef; import scala.collection.JavaConverters; import scala.collection.immutable.Map; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/FlatteningTransformer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/FlatteningTransformer.java index 858fbd8..7c41e8d 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/FlatteningTransformer.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/FlatteningTransformer.java @@ -18,8 +18,8 @@ package org.apache.hudi.utilities.transform; -import java.util.UUID; import org.apache.hudi.common.util.TypedProperties; + import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaSparkContext; @@ -29,6 +29,8 @@ import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; +import java.util.UUID; + /** * Transformer that can flatten nested objects. It currently doesn't unnest arrays. */ diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/IdentityTransformer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/IdentityTransformer.java index ffc0fad..f74291f 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/IdentityTransformer.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/IdentityTransformer.java @@ -19,6 +19,7 @@ package org.apache.hudi.utilities.transform; import org.apache.hudi.common.util.TypedProperties; + import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/SqlQueryBasedTransformer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/SqlQueryBasedTransformer.java index b5e2e37..ff563a1 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/SqlQueryBasedTransformer.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/SqlQueryBasedTransformer.java @@ -18,8 +18,8 @@ package org.apache.hudi.utilities.transform; -import java.util.UUID; import org.apache.hudi.common.util.TypedProperties; + import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaSparkContext; @@ -27,6 +27,8 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; +import java.util.UUID; + /** * A transformer that allows a sql-query template be used to transform the source before writing to Hudi data-set. * diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/Transformer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/Transformer.java index 7433f6f..f97b302 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/Transformer.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/Transformer.java @@ -19,6 +19,7 @@ package org.apache.hudi.utilities.transform; import org.apache.hudi.common.util.TypedProperties; + import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestFlatteningTransformer.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestFlatteningTransformer.java index 3e7d217..d119102 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestFlatteningTransformer.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestFlatteningTransformer.java @@ -18,15 +18,16 @@ package org.apache.hudi.utilities; -import static org.junit.Assert.assertEquals; - import org.apache.hudi.utilities.transform.FlatteningTransformer; + import org.apache.spark.sql.types.DataTypes; import org.apache.spark.sql.types.Metadata; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; import org.junit.Test; +import static org.junit.Assert.assertEquals; + public class TestFlatteningTransformer { @Test diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHDFSParquetImporter.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHDFSParquetImporter.java index 045042f..6ed937c 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHDFSParquetImporter.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHDFSParquetImporter.java @@ -18,19 +18,14 @@ package org.apache.hudi.utilities; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import org.apache.hudi.HoodieReadClient; +import org.apache.hudi.HoodieWriteClient; +import org.apache.hudi.common.HoodieTestDataGenerator; +import org.apache.hudi.common.minicluster.HdfsTestService; +import org.apache.hudi.common.model.HoodieTestUtils; +import org.apache.hudi.common.table.HoodieTimeline; +import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; -import java.io.IOException; -import java.io.Serializable; -import java.text.ParseException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; import org.apache.avro.generic.GenericRecord; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.LocatedFileStatus; @@ -38,13 +33,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.MiniDFSCluster; -import org.apache.hudi.HoodieReadClient; -import org.apache.hudi.HoodieWriteClient; -import org.apache.hudi.common.HoodieTestDataGenerator; -import org.apache.hudi.common.minicluster.HdfsTestService; -import org.apache.hudi.common.model.HoodieTestUtils; -import org.apache.hudi.common.table.HoodieTimeline; -import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; import org.apache.parquet.avro.AvroParquetWriter; import org.apache.parquet.hadoop.ParquetWriter; import org.apache.spark.SparkConf; @@ -54,6 +42,20 @@ import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; +import java.io.IOException; +import java.io.Serializable; +import java.text.ParseException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + public class TestHDFSParquetImporter implements Serializable { private static String dfsBasePath; diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieDeltaStreamer.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieDeltaStreamer.java index 5cdb532..d2be913 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieDeltaStreamer.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieDeltaStreamer.java @@ -18,23 +18,6 @@ package org.apache.hudi.utilities; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Random; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.TimeUnit; -import java.util.function.Function; -import java.util.stream.Collectors; -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericRecord; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hudi.DataSourceWriteOptions; import org.apache.hudi.SimpleKeyGenerator; import org.apache.hudi.common.model.HoodieCommitMetadata; @@ -62,6 +45,11 @@ import org.apache.hudi.utilities.sources.TestDataSource; import org.apache.hudi.utilities.sources.config.TestSourceConfig; import org.apache.hudi.utilities.transform.SqlQueryBasedTransformer; import org.apache.hudi.utilities.transform.Transformer; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericRecord; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaRDD; @@ -80,6 +68,20 @@ import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.function.Function; +import java.util.stream.Collectors; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + /** * Basic tests against {@link HoodieDeltaStreamer}, by issuing bulk_inserts, upserts, inserts. Check counts at the end. */ diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieSnapshotCopier.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieSnapshotCopier.java index f64c38b..349b5aa 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieSnapshotCopier.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieSnapshotCopier.java @@ -18,25 +18,27 @@ package org.apache.hudi.utilities; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import java.io.File; -import java.io.IOException; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hudi.common.HoodieCommonTestHarness; import org.apache.hudi.common.HoodieTestDataGenerator; import org.apache.hudi.common.model.HoodieTestUtils; import org.apache.hudi.common.util.FSUtils; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; import org.junit.After; import org.junit.Before; import org.junit.Test; +import java.io.File; +import java.io.IOException; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + public class TestHoodieSnapshotCopier extends HoodieCommonTestHarness { private static String TEST_WRITE_TOKEN = "1-0-1"; diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestSchedulerConfGenerator.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestSchedulerConfGenerator.java index 3f83752..af1f566 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestSchedulerConfGenerator.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestSchedulerConfGenerator.java @@ -18,15 +18,17 @@ package org.apache.hudi.utilities; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; - -import java.util.Map; import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer; import org.apache.hudi.utilities.deltastreamer.SchedulerConfGenerator; + import org.junit.Test; +import java.util.Map; + +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; + public class TestSchedulerConfGenerator { @Test diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/UtilitiesTestBase.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/UtilitiesTestBase.java index 46b0dab..cfe6798 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/UtilitiesTestBase.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/UtilitiesTestBase.java @@ -18,21 +18,6 @@ package org.apache.hudi.utilities; -import com.google.common.collect.ImmutableList; -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.PrintStream; -import java.util.ArrayList; -import java.util.List; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.generic.IndexedRecord; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hdfs.DistributedFileSystem; -import org.apache.hadoop.hdfs.MiniDFSCluster; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hive.service.server.HiveServer2; import org.apache.hudi.common.HoodieTestDataGenerator; import org.apache.hudi.common.TestRawTripPayload; import org.apache.hudi.common.minicluster.HdfsTestService; @@ -46,6 +31,16 @@ import org.apache.hudi.hive.HiveSyncConfig; import org.apache.hudi.hive.HoodieHiveClient; import org.apache.hudi.hive.util.HiveTestService; import org.apache.hudi.utilities.sources.TestDataSource; + +import com.google.common.collect.ImmutableList; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.IndexedRecord; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hive.service.server.HiveServer2; import org.apache.parquet.avro.AvroParquetWriter; import org.apache.parquet.hadoop.ParquetWriter; import org.apache.spark.api.java.JavaSparkContext; @@ -56,6 +51,13 @@ import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.List; + /** * Abstract test that provides a dfs & spark contexts. * diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/AbstractBaseTestSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/AbstractBaseTestSource.java index 58a7d58..3dc6b33 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/AbstractBaseTestSource.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/AbstractBaseTestSource.java @@ -18,13 +18,6 @@ package org.apache.hudi.utilities.sources; -import java.io.File; -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; -import java.util.stream.Stream; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.generic.IndexedRecord; import org.apache.hudi.common.HoodieTestDataGenerator; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.util.Option; @@ -33,9 +26,18 @@ import org.apache.hudi.common.util.collection.RocksDBBasedMap; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.sources.config.TestSourceConfig; + +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.IndexedRecord; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.stream.Stream; + public abstract class AbstractBaseTestSource extends AvroSource { static final int DEFAULT_PARTITION_NUM = 0; diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/DistributedTestDataSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/DistributedTestDataSource.java index 8e87767..9e8b3c4 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/DistributedTestDataSource.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/DistributedTestDataSource.java @@ -18,18 +18,20 @@ package org.apache.hudi.utilities.sources; -import java.util.Iterator; -import java.util.stream.Collectors; -import java.util.stream.IntStream; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.sources.config.TestSourceConfig; + +import org.apache.avro.generic.GenericRecord; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; +import java.util.Iterator; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + /** * A Test DataSource which scales test-data generation by using spark parallelism. */ diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestDFSSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestDFSSource.java index 4d4fafb..f8b4869 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestDFSSource.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestDFSSource.java @@ -18,16 +18,6 @@ package org.apache.hudi.utilities.sources; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import java.io.IOException; -import java.util.List; -import org.apache.avro.generic.GenericRecord; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.LocatedFileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.RemoteIterator; import org.apache.hudi.AvroConversionUtils; import org.apache.hudi.common.HoodieTestDataGenerator; import org.apache.hudi.common.util.Option; @@ -35,6 +25,12 @@ import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.utilities.UtilitiesTestBase; import org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter; import org.apache.hudi.utilities.schema.FilebasedSchemaProvider; + +import org.apache.avro.generic.GenericRecord; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; @@ -45,6 +41,12 @@ import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; +import java.io.IOException; +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + /** * Basic tests against all subclasses of {@link JsonDFSSource} and {@link ParquetDFSSource} */ diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestDataSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestDataSource.java index 1ba75ea..c6130a6 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestDataSource.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestDataSource.java @@ -18,18 +18,20 @@ package org.apache.hudi.utilities.sources; -import java.util.List; -import java.util.stream.Collectors; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.utilities.schema.SchemaProvider; + +import org.apache.avro.generic.GenericRecord; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; +import java.util.List; +import java.util.stream.Collectors; + /** * An implementation of {@link Source}, that emits test upserts. */ diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestKafkaSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestKafkaSource.java index c1ca1f0..f2aa794 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestKafkaSource.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestKafkaSource.java @@ -18,12 +18,6 @@ package org.apache.hudi.utilities.sources; -import static org.junit.Assert.assertEquals; - -import java.io.IOException; -import java.util.HashMap; -import kafka.common.TopicAndPartition; -import org.apache.avro.generic.GenericRecord; import org.apache.hudi.AvroConversionUtils; import org.apache.hudi.common.HoodieTestDataGenerator; import org.apache.hudi.common.util.Option; @@ -33,6 +27,9 @@ import org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter; import org.apache.hudi.utilities.schema.FilebasedSchemaProvider; import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.CheckpointUtils; import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.Config; + +import kafka.common.TopicAndPartition; +import org.apache.avro.generic.GenericRecord; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; @@ -45,6 +42,11 @@ import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; +import java.io.IOException; +import java.util.HashMap; + +import static org.junit.Assert.assertEquals; + /** * Tests against {@link AvroKafkaSource} */