This is an automated email from the ASF dual-hosted git repository.
aokolnychyi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new da38680a4 Spark 3.2: Clean static vars in SparkTableUtil (#4765)
da38680a4 is described below
commit da38680a451fa2ce43fc1ab190b223b85763689c
Author: Anton Okolnychyi <[email protected]>
AuthorDate: Fri May 13 17:00:09 2022 -0700
Spark 3.2: Clean static vars in SparkTableUtil (#4765)
---
.../java/org/apache/iceberg/spark/SparkTableUtil.java | 16 +++-------------
1 file changed, 3 insertions(+), 13 deletions(-)
diff --git
a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/SparkTableUtil.java
b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/SparkTableUtil.java
index b2dbac381..401787d1a 100644
---
a/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/SparkTableUtil.java
+++
b/spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/SparkTableUtil.java
@@ -29,7 +29,6 @@ import java.util.Map;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
import org.apache.iceberg.AppendFiles;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.FileFormat;
@@ -86,8 +85,6 @@ import org.apache.spark.sql.catalyst.parser.ParseException;
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan;
import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation;
import org.apache.spark.sql.util.CaseInsensitiveStringMap;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import scala.Function2;
import scala.Option;
import scala.Some;
@@ -106,14 +103,7 @@ import static org.apache.spark.sql.functions.col;
*/
public class SparkTableUtil {
- private static final Logger LOG =
LoggerFactory.getLogger(SparkTableUtil.class);
-
- private static final Joiner.MapJoiner MAP_JOINER =
Joiner.on(",").withKeyValueSeparator("=");
-
- private static final PathFilter HIDDEN_PATH_FILTER =
- p -> !p.getName().startsWith("_") && !p.getName().startsWith(".");
-
- private static final String duplicateFileMessage = "Cannot complete import
because data files " +
+ private static final String DUPLICATE_FILE_MESSAGE = "Cannot complete import
because data files " +
"to be imported already exist within the target table: %s. " +
"This is disabled by default as Iceberg is not designed for mulitple
references to the same file" +
" within the same table. If you are sure, you may set
'check_duplicate_files' to false to force the import.";
@@ -479,7 +469,7 @@ public class SparkTableUtil {
Dataset<String> duplicates = importedFiles.join(existingFiles,
joinCond)
.select("file_path").as(Encoders.STRING());
Preconditions.checkState(duplicates.isEmpty(),
- String.format(duplicateFileMessage, Joiner.on(",").join((String[])
duplicates.take(10))));
+ String.format(DUPLICATE_FILE_MESSAGE,
Joiner.on(",").join((String[]) duplicates.take(10))));
}
AppendFiles append = targetTable.newAppend();
@@ -535,7 +525,7 @@ public class SparkTableUtil {
Dataset<String> duplicates = importedFiles.join(existingFiles, joinCond)
.select("file_path").as(Encoders.STRING());
Preconditions.checkState(duplicates.isEmpty(),
- String.format(duplicateFileMessage, Joiner.on(",").join((String[])
duplicates.take(10))));
+ String.format(DUPLICATE_FILE_MESSAGE, Joiner.on(",").join((String[])
duplicates.take(10))));
}
List<ManifestFile> manifests = filesToImport