This is an automated email from the ASF dual-hosted git repository. dbecker pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 374d011a7caae3ad91e209f39b4537f9152a8ca2 Author: Andrew Sherman <[email protected]> AuthorDate: Fri Aug 5 14:20:21 2022 -0700 IMPALA-11479: Add Java unit tests for IcebergUtil. This does not test all of IcebergUtil, but it is a start. Tidy up some IcebergUtil code and fix a few spelling mistakes. Change-Id: Ib15993a9ed3d5802dda4edb2011a90ead6d06ed4 Reviewed-on: http://gerrit.cloudera.org:8080/19543 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../apache/impala/common/TransactionKeepalive.java | 1 - .../java/org/apache/impala/util/IcebergUtil.java | 113 +++--- .../impala/catalog/local/LocalCatalogTest.java | 2 +- .../org/apache/impala/util/IcebergUtilTest.java | 400 +++++++++++++++++++++ 4 files changed, 460 insertions(+), 56 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/common/TransactionKeepalive.java b/fe/src/main/java/org/apache/impala/common/TransactionKeepalive.java index 52dff28fd..126937826 100644 --- a/fe/src/main/java/org/apache/impala/common/TransactionKeepalive.java +++ b/fe/src/main/java/org/apache/impala/common/TransactionKeepalive.java @@ -37,7 +37,6 @@ import org.apache.impala.thrift.TQueryCtx; import org.apache.log4j.Logger; import com.google.common.base.Preconditions; -import com.sun.tools.javac.code.Attribute.Array; /** * Object of this class creates a daemon thread that periodically heartbeats the diff --git a/fe/src/main/java/org/apache/impala/util/IcebergUtil.java b/fe/src/main/java/org/apache/impala/util/IcebergUtil.java index 5dbbf61f2..c6595a968 100644 --- a/fe/src/main/java/org/apache/impala/util/IcebergUtil.java +++ b/fe/src/main/java/org/apache/impala/util/IcebergUtil.java @@ -99,10 +99,13 @@ import org.apache.impala.thrift.TIcebergPartitionField; import org.apache.impala.thrift.TIcebergPartitionSpec; import org.apache.impala.thrift.TIcebergPartitionTransformType; +@SuppressWarnings("UnstableApiUsage") public class IcebergUtil { private static final int ICEBERG_EPOCH_YEAR = 1970; private static final int ICEBERG_EPOCH_MONTH = 1; + @SuppressWarnings("unused") private static final int ICEBERG_EPOCH_DAY = 1; + @SuppressWarnings("unused") private static final int ICEBERG_EPOCH_HOUR = 0; /** @@ -124,8 +127,7 @@ public class IcebergUtil { case HIVE_CATALOG: return IcebergHiveCatalog.getInstance(); case HADOOP_CATALOG: return new IcebergHadoopCatalog(location); case CATALOGS: return IcebergCatalogs.getInstance(); - default: throw new ImpalaRuntimeException ( - "Unexpected catalog type: " + catalog.toString()); + default: throw new ImpalaRuntimeException("Unexpected catalog type: " + catalog); } } @@ -186,8 +188,7 @@ public class IcebergUtil { * Get Iceberg Transaction for 'feTable', usually use Transaction to update Iceberg * table schema. */ - public static Transaction getIcebergTransaction(FeIcebergTable feTable) - throws TableLoadingException, ImpalaRuntimeException { + public static Transaction getIcebergTransaction(FeIcebergTable feTable) { return feTable.getIcebergApiTable().newTransaction(); } @@ -317,7 +318,7 @@ public class IcebergUtil { */ public static TIcebergFileFormat getIcebergFileFormat( org.apache.hadoop.hive.metastore.api.Table msTable) { - TIcebergFileFormat fileFormat = null; + TIcebergFileFormat fileFormat; Map<String, String> params = msTable.getParameters(); if (params.containsKey(IcebergTable.ICEBERG_FILE_FORMAT)) { fileFormat = IcebergUtil.getIcebergFileFormat( @@ -389,7 +390,7 @@ public class IcebergUtil { PartitionField field, HashMap<String, Integer> transformParams) throws TableLoadingException { String type = field.transform().toString(); - String transformMappingKey = getPartitonTransformMappingKey(field.sourceId(), + String transformMappingKey = getPartitionTransformMappingKey(field.sourceId(), getPartitionTransformType(type)); return getPartitionTransform(type, transformParams.get(transformMappingKey)); } @@ -405,15 +406,15 @@ public class IcebergUtil { return getPartitionTransform(transformType, null); } - public static TIcebergPartitionTransformType getPartitionTransformType( + private static TIcebergPartitionTransformType getPartitionTransformType( String transformType) throws TableLoadingException { Preconditions.checkNotNull(transformType); transformType = transformType.toUpperCase(); if ("IDENTITY".equals(transformType)) { return TIcebergPartitionTransformType.IDENTITY; - } else if (transformType != null && transformType.startsWith("BUCKET")) { + } else if (transformType.startsWith("BUCKET")) { return TIcebergPartitionTransformType.BUCKET; - } else if (transformType != null && transformType.startsWith("TRUNCATE")) { + } else if (transformType.startsWith("TRUNCATE")) { return TIcebergPartitionTransformType.TRUNCATE; } switch (transformType) { @@ -428,8 +429,8 @@ public class IcebergUtil { } } - private static String getPartitonTransformMappingKey(int sourceId, - TIcebergPartitionTransformType transformType) { + private static String getPartitionTransformMappingKey( + int sourceId, TIcebergPartitionTransformType transformType) { return sourceId + "_" + transformType.toString(); } @@ -438,10 +439,10 @@ public class IcebergUtil { * PartitionSpec and its transform's parameter. Only Bucket and Truncate transforms * have a parameter, for other transforms this mapping will have a null. * source ID and the transform type are needed together to uniquely identify a specific - * field in the PartitionSpec. (Unfortunaltely, fieldId is not available in the Visitor + * field in the PartitionSpec. (Unfortunately, fieldId is not available in the Visitor * class below.) * The reason for implementing the PartitionSpecVisitor below was that Iceberg doesn't - * expose the interface of the transform types outside of their package and the only + * expose the interface of the transform types outside their package and the only * way to get the transform's parameter is implementing this visitor class. */ public static HashMap<String, Integer> getPartitionTransformParams(PartitionSpec spec) { @@ -449,61 +450,61 @@ public class IcebergUtil { spec, new PartitionSpecVisitor<Pair<String, Integer>>() { @Override public Pair<String, Integer> identity(String sourceName, int sourceId) { - String mappingKey = getPartitonTransformMappingKey(sourceId, + String mappingKey = getPartitionTransformMappingKey(sourceId, TIcebergPartitionTransformType.IDENTITY); - return new Pair<String, Integer>(mappingKey, null); + return new Pair<>(mappingKey, null); } @Override public Pair<String, Integer> bucket(String sourceName, int sourceId, int numBuckets) { - String mappingKey = getPartitonTransformMappingKey(sourceId, + String mappingKey = getPartitionTransformMappingKey(sourceId, TIcebergPartitionTransformType.BUCKET); - return new Pair<String, Integer>(mappingKey, numBuckets); + return new Pair<>(mappingKey, numBuckets); } @Override public Pair<String, Integer> truncate(String sourceName, int sourceId, int width) { - String mappingKey = getPartitonTransformMappingKey(sourceId, + String mappingKey = getPartitionTransformMappingKey(sourceId, TIcebergPartitionTransformType.TRUNCATE); - return new Pair<String, Integer>(mappingKey, width); + return new Pair<>(mappingKey, width); } @Override public Pair<String, Integer> year(String sourceName, int sourceId) { - String mappingKey = getPartitonTransformMappingKey(sourceId, + String mappingKey = getPartitionTransformMappingKey(sourceId, TIcebergPartitionTransformType.YEAR); - return new Pair<String, Integer>(mappingKey, null); + return new Pair<>(mappingKey, null); } @Override public Pair<String, Integer> month(String sourceName, int sourceId) { - String mappingKey = getPartitonTransformMappingKey(sourceId, + String mappingKey = getPartitionTransformMappingKey(sourceId, TIcebergPartitionTransformType.MONTH); - return new Pair<String, Integer>(mappingKey, null); + return new Pair<>(mappingKey, null); } @Override public Pair<String, Integer> day(String sourceName, int sourceId) { - String mappingKey = getPartitonTransformMappingKey(sourceId, + String mappingKey = getPartitionTransformMappingKey(sourceId, TIcebergPartitionTransformType.DAY); - return new Pair<String, Integer>(mappingKey, null); + return new Pair<>(mappingKey, null); } @Override public Pair<String, Integer> hour(String sourceName, int sourceId) { - String mappingKey = getPartitonTransformMappingKey(sourceId, + String mappingKey = getPartitionTransformMappingKey(sourceId, TIcebergPartitionTransformType.HOUR); - return new Pair<String, Integer>(mappingKey, null); + return new Pair<>(mappingKey, null); } @Override public Pair<String, Integer> alwaysNull(int fieldId, String sourceName, int sourceId) { - String mappingKey = getPartitonTransformMappingKey(sourceId, + String mappingKey = getPartitionTransformMappingKey(sourceId, TIcebergPartitionTransformType.VOID); - return new Pair<String, Integer>(mappingKey, null); + return new Pair<>(mappingKey, null); } }); // Move the content of the List into a HashMap for faster querying in the future. @@ -539,8 +540,14 @@ public class IcebergUtil { /** * Transform TIcebergFileFormat to HdfsFileFormat */ + @SuppressWarnings("unused") public static HdfsFileFormat toHdfsFileFormat(String format) { - return HdfsFileFormat.fromThrift(toTHdfsFileFormat(getIcebergFileFormat(format))); + TIcebergFileFormat icebergFileFormat = getIcebergFileFormat(format); + if (icebergFileFormat == null) { + // Can't pass null to toTHdfsFileFormat(), so throw. + throw new IllegalArgumentException("unknown table format " + format); + } + return HdfsFileFormat.fromThrift(toTHdfsFileFormat(icebergFileFormat)); } /** @@ -645,7 +652,6 @@ public class IcebergUtil { } @Override - @SuppressWarnings("unchecked") public <T> T get(int pos, Class<T> javaClass) { return javaClass.cast(values[pos]); } @@ -741,7 +747,7 @@ public class IcebergUtil { * return value should be 14. */ private static Integer parseYearToTransformYear(String yearStr) { - Integer year = Integer.valueOf(yearStr); + int year = Integer.parseInt(yearStr); return year - ICEBERG_EPOCH_YEAR; } @@ -749,12 +755,11 @@ public class IcebergUtil { * In the partition path months are represented as <year>-<month>, e.g. 2021-01. We * need to convert it to a single integer which represents the months from '1970-01'. */ - private static Integer parseMonthToTransformMonth(String monthStr) - throws ImpalaRuntimeException { + private static Integer parseMonthToTransformMonth(String monthStr) { String[] parts = monthStr.split("-", -1); Preconditions.checkState(parts.length == 2); - Integer year = Integer.valueOf(parts[0]); - Integer month = Integer.valueOf(parts[1]); + int year = Integer.parseInt(parts[0]); + int month = Integer.parseInt(parts[1]); int years = year - ICEBERG_EPOCH_YEAR; int months = month - ICEBERG_EPOCH_MONTH; return years * 12 + months; @@ -769,10 +774,10 @@ public class IcebergUtil { final OffsetDateTime EPOCH = Instant.ofEpochSecond(0).atOffset(ZoneOffset.UTC); String[] parts = hourStr.split("-", -1); Preconditions.checkState(parts.length == 4); - Integer year = Integer.valueOf(parts[0]); - Integer month = Integer.valueOf(parts[1]); - Integer day = Integer.valueOf(parts[2]); - Integer hour = Integer.valueOf(parts[3]); + int year = Integer.parseInt(parts[0]); + int month = Integer.parseInt(parts[1]); + int day = Integer.parseInt(parts[2]); + int hour = Integer.parseInt(parts[3]); OffsetDateTime datetime = OffsetDateTime.of( LocalDateTime.of(year, month, day, hour, /*minute=*/0), ZoneOffset.UTC); @@ -812,9 +817,9 @@ public class IcebergUtil { clevel > IcebergTable.MAX_PARQUET_COMPRESSION_LEVEL) { errMsg.append("Parquet compression level for Iceberg table should fall in " + "the range of [") - .append(String.valueOf(IcebergTable.MIN_PARQUET_COMPRESSION_LEVEL)) + .append(IcebergTable.MIN_PARQUET_COMPRESSION_LEVEL) .append("..") - .append(String.valueOf(IcebergTable.MAX_PARQUET_COMPRESSION_LEVEL)) + .append(IcebergTable.MAX_PARQUET_COMPRESSION_LEVEL) .append("]"); return null; } @@ -836,9 +841,9 @@ public class IcebergUtil { rowGroupSize > IcebergTable.MAX_PARQUET_ROW_GROUP_SIZE) { errMsg.append("Parquet row group size for Iceberg table should ") .append("fall in the range of [") - .append(String.valueOf(IcebergTable.MIN_PARQUET_ROW_GROUP_SIZE)) + .append(IcebergTable.MIN_PARQUET_ROW_GROUP_SIZE) .append("..") - .append(String.valueOf(IcebergTable.MAX_PARQUET_ROW_GROUP_SIZE)) + .append(IcebergTable.MAX_PARQUET_ROW_GROUP_SIZE) .append("]"); return null; } @@ -873,9 +878,9 @@ public class IcebergUtil { errMsg.append("Parquet ") .append(descr) .append(" for Iceberg table should fall in the range of [") - .append(String.valueOf(IcebergTable.MIN_PARQUET_PAGE_SIZE)) + .append(IcebergTable.MIN_PARQUET_PAGE_SIZE) .append("..") - .append(String.valueOf(IcebergTable.MAX_PARQUET_PAGE_SIZE)) + .append(IcebergTable.MAX_PARQUET_PAGE_SIZE) .append("]"); return null; } @@ -979,45 +984,45 @@ public class IcebergUtil { schema, field, new PartitionSpecVisitor<Pair<Byte, Integer>>() { @Override public Pair<Byte, Integer> identity(String sourceName, int sourceId) { - return new Pair<Byte, Integer>(FbIcebergTransformType.IDENTITY, null); + return new Pair<>(FbIcebergTransformType.IDENTITY, null); } @Override public Pair<Byte, Integer> bucket(String sourceName, int sourceId, int numBuckets) { - return new Pair<Byte, Integer>(FbIcebergTransformType.BUCKET, numBuckets); + return new Pair<>(FbIcebergTransformType.BUCKET, numBuckets); } @Override public Pair<Byte, Integer> truncate(String sourceName, int sourceId, int width) { - return new Pair<Byte, Integer>(FbIcebergTransformType.TRUNCATE, width); + return new Pair<>(FbIcebergTransformType.TRUNCATE, width); } @Override public Pair<Byte, Integer> year(String sourceName, int sourceId) { - return new Pair<Byte, Integer>(FbIcebergTransformType.YEAR, null); + return new Pair<>(FbIcebergTransformType.YEAR, null); } @Override public Pair<Byte, Integer> month(String sourceName, int sourceId) { - return new Pair<Byte, Integer>(FbIcebergTransformType.MONTH, null); + return new Pair<>(FbIcebergTransformType.MONTH, null); } @Override public Pair<Byte, Integer> day(String sourceName, int sourceId) { - return new Pair<Byte, Integer>(FbIcebergTransformType.DAY, null); + return new Pair<>(FbIcebergTransformType.DAY, null); } @Override public Pair<Byte, Integer> hour(String sourceName, int sourceId) { - return new Pair<Byte, Integer>(FbIcebergTransformType.HOUR, null); + return new Pair<>(FbIcebergTransformType.HOUR, null); } @Override public Pair<Byte, Integer> alwaysNull(int fieldId, String sourceName, int sourceId) { - return new Pair<Byte, Integer>(FbIcebergTransformType.VOID, null); + return new Pair<>(FbIcebergTransformType.VOID, null); } }); } diff --git a/fe/src/test/java/org/apache/impala/catalog/local/LocalCatalogTest.java b/fe/src/test/java/org/apache/impala/catalog/local/LocalCatalogTest.java index 5f6e78fd2..a2a72ce3c 100644 --- a/fe/src/test/java/org/apache/impala/catalog/local/LocalCatalogTest.java +++ b/fe/src/test/java/org/apache/impala/catalog/local/LocalCatalogTest.java @@ -272,7 +272,7 @@ public class LocalCatalogTest { } /** - * This test verifies that the network adresses used by the LocalIcebergTable are + * This test verifies that the network addresses used by the LocalIcebergTable are * the same used by CatalogD. */ @Test diff --git a/fe/src/test/java/org/apache/impala/util/IcebergUtilTest.java b/fe/src/test/java/org/apache/impala/util/IcebergUtilTest.java new file mode 100644 index 000000000..3a3d0031c --- /dev/null +++ b/fe/src/test/java/org/apache/impala/util/IcebergUtilTest.java @@ -0,0 +1,400 @@ + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +package org.apache.impala.util; + +import static org.apache.impala.thrift.TIcebergCatalog.CATALOGS; +import static org.apache.impala.thrift.TIcebergCatalog.HADOOP_CATALOG; +import static org.apache.impala.thrift.TIcebergCatalog.HADOOP_TABLES; +import static org.apache.impala.thrift.TIcebergCatalog.HIVE_CATALOG; +import static org.apache.impala.util.IcebergUtil.getFilePathHash; +import static org.apache.impala.util.IcebergUtil.getIcebergFileFormat; +import static org.apache.impala.util.IcebergUtil.getPartitionTransform; +import static org.apache.impala.util.IcebergUtil.getPartitionTransformParams; +import static org.apache.impala.util.IcebergUtil.isPartitionColumn; +import static org.apache.impala.util.IcebergUtil.toHdfsFileFormat; +import static org.apache.impala.util.IcebergUtil.toTHdfsFileFormat; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import com.google.common.collect.ImmutableList; + +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.iceberg.DataFile; +import org.apache.iceberg.DataFiles; +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.Schema; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.mr.Catalogs; +import org.apache.iceberg.types.Types; +import org.apache.impala.analysis.IcebergPartitionField; +import org.apache.impala.analysis.IcebergPartitionSpec; +import org.apache.impala.analysis.IcebergPartitionTransform; +import org.apache.impala.catalog.HdfsFileFormat; +import org.apache.impala.catalog.IcebergColumn; +import org.apache.impala.catalog.IcebergTable; +import org.apache.impala.catalog.TableLoadingException; +import org.apache.impala.catalog.Type; +import org.apache.impala.catalog.iceberg.IcebergCatalog; +import org.apache.impala.catalog.iceberg.IcebergCatalogs; +import org.apache.impala.catalog.iceberg.IcebergHadoopCatalog; +import org.apache.impala.catalog.iceberg.IcebergHadoopTables; +import org.apache.impala.catalog.iceberg.IcebergHiveCatalog; +import org.apache.impala.common.AnalysisException; +import org.apache.impala.common.ImpalaRuntimeException; +import org.apache.impala.thrift.THdfsFileFormat; +import org.apache.impala.thrift.TIcebergCatalog; +import org.apache.impala.thrift.TIcebergFileFormat; +import org.apache.impala.thrift.TIcebergPartitionTransformType; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +/** + * Unit tests for Iceberg Utilities. + */ +public class IcebergUtilTest { + /** + * Unit test for IcebergUtil.getTIcebergCatalog() and IcebergUtil.getIcebergCatalog(). + */ + @Test + public void testGetCatalog() throws ImpalaRuntimeException { + CatalogMapping[] mappings = new CatalogMapping[] { + new CatalogMapping("hadoop.tables", HADOOP_TABLES, IcebergHadoopTables.class), + new CatalogMapping("hadoop.catalog", HADOOP_CATALOG, IcebergHadoopCatalog.class), + new CatalogMapping("hive.catalog", HIVE_CATALOG, IcebergHiveCatalog.class), + new CatalogMapping(null, HIVE_CATALOG, IcebergHiveCatalog.class), + new CatalogMapping("other string", CATALOGS, IcebergCatalogs.class), + }; + for (CatalogMapping testValue : mappings) { + TIcebergCatalog catalog = IcebergUtil.getTIcebergCatalog(testValue.propertyName); + assertEquals("err for " + testValue.propertyName, testValue.catalog, catalog); + IcebergCatalog impl = IcebergUtil.getIcebergCatalog(catalog, "location"); + assertEquals("err for " + testValue.propertyName, testValue.clazz, impl.getClass()); + } + } + + /** + * Unit test for IcebergUtil.getIcebergTableIdentifier(). + */ + @Test + public void testGetIcebergTableIdentifier() { + // Test a table with no table properties. + Table table = new Table(); + table.setParameters(new HashMap<>()); + String tableName = "table_name"; + table.setTableName(tableName); + String dbname = "database_name"; + table.setDbName(dbname); + TableIdentifier icebergTableIdentifier = IcebergUtil.getIcebergTableIdentifier(table); + assertEquals( + TableIdentifier.parse("database_name.table_name"), icebergTableIdentifier); + + // If iceberg.table_identifier is not set then the value of the "name" property + // is used. + String nameId = "db.table"; + table.putToParameters(Catalogs.NAME, nameId); + icebergTableIdentifier = IcebergUtil.getIcebergTableIdentifier(table); + assertEquals(TableIdentifier.parse(nameId), icebergTableIdentifier); + + // If iceberg.table_identifier is set then that is used. + String tableId = "foo.bar"; + table.putToParameters(IcebergTable.ICEBERG_TABLE_IDENTIFIER, tableId); + icebergTableIdentifier = IcebergUtil.getIcebergTableIdentifier(table); + assertEquals(TableIdentifier.parse(tableId), icebergTableIdentifier); + + // If iceberg.table_identifier set to a simple name, then the default catalog is used. + table.putToParameters(IcebergTable.ICEBERG_TABLE_IDENTIFIER, "noDatabase"); + icebergTableIdentifier = IcebergUtil.getIcebergTableIdentifier(table); + assertEquals(TableIdentifier.parse("default.noDatabase"), icebergTableIdentifier); + } + + /** + * Unit test for isHiveCatalog(). + */ + @Test + public void testIsHiveCatalog() { + CatalogType[] catalogTypes = new CatalogType[] { + // For hadoop.tables amd hadoop.catalog we are not using Hive Catalog. + new CatalogType("hadoop.tables", false), + new CatalogType("hadoop.catalog", false), + // For all other values of ICEBERG_CATALOG then Hive Catalog is used. + new CatalogType("hive.catalog", true), + new CatalogType(null, true), + new CatalogType("other string", true), + }; + for (CatalogType testValue : catalogTypes) { + Table table = new Table(); + table.putToParameters(IcebergTable.ICEBERG_CATALOG, testValue.propertyName); + assertEquals("err in " + testValue.propertyName, testValue.isHiveCatalog, + IcebergUtil.isHiveCatalog(table)); + } + } + + /** + * Unit test for getIcebergFileFormat(), toHdfsFileFormat() and toTHdfsFileFormat(). + */ + @Test + public void testToHdfsFileFormat() { + assertEquals(THdfsFileFormat.ORC, toTHdfsFileFormat(TIcebergFileFormat.ORC)); + assertEquals(THdfsFileFormat.PARQUET, toTHdfsFileFormat(TIcebergFileFormat.PARQUET)); + assertEquals(HdfsFileFormat.ORC, toHdfsFileFormat(TIcebergFileFormat.ORC)); + assertEquals(HdfsFileFormat.PARQUET, toHdfsFileFormat(TIcebergFileFormat.PARQUET)); + assertEquals(HdfsFileFormat.ORC, toHdfsFileFormat("ORC")); + assertEquals(HdfsFileFormat.PARQUET, toHdfsFileFormat("PARQUET")); + assertEquals(HdfsFileFormat.PARQUET, toHdfsFileFormat((String) null)); + try { + toHdfsFileFormat("unknown"); + fail("did not get expected assertion"); + } catch (IllegalArgumentException e) { + // fall through + } + assertEquals(TIcebergFileFormat.ORC, getIcebergFileFormat("ORC")); + assertEquals(TIcebergFileFormat.PARQUET, getIcebergFileFormat("PARQUET")); + assertNull(getIcebergFileFormat("unknown")); + } + + /** + * Unit test forgetPartitionTransform(). + */ + @Test + public void testGetPartitionTransform() { + // Case 1 + // Transforms that work OK. + PartitionTransform[] goodTransforms = new PartitionTransform[] { + new PartitionTransform("BUCKET", 5), + new PartitionTransform("TRUNCATE", 4), + new PartitionTransform("HOUR", null), + new PartitionTransform("HOURS", null), + new PartitionTransform("DAY", null), + new PartitionTransform("DAYS", null), + new PartitionTransform("MONTH", null), + new PartitionTransform("MONTHS", null), + new PartitionTransform("YEAR", null), + new PartitionTransform("YEARS", null), + new PartitionTransform("VOID", null), + new PartitionTransform("IDENTITY", null), + }; + for (PartitionTransform partitionTransform : goodTransforms) { + IcebergPartitionTransform transform = null; + try { + transform = getPartitionTransform( + partitionTransform.transformName, partitionTransform.parameter); + } catch (TableLoadingException t) { + fail("Transform " + partitionTransform + " caught unexpected " + t); + } + assertNotNull(transform); + try { + transform.analyze(null); + } catch (AnalysisException t) { + fail("Transform " + partitionTransform + " caught unexpected " + t); + } + } + + // Case 2 + // Transforms that get TableLoadingException. + PartitionTransform[] tableExceptions = new PartitionTransform[] { + new PartitionTransform("JUNK", -5), + }; + for (PartitionTransform partitionTransform : tableExceptions) { + try { + /* IcebergPartitionTransform transform = */ getPartitionTransform( + partitionTransform.transformName, partitionTransform.parameter); + fail("Transform " + partitionTransform + " should have got exception"); + } catch (TableLoadingException t) { + // OK, fall through + } + } + + // Case 3 + // Transforms that fail analysis. + PartitionTransform[] failAnalysis = new PartitionTransform[] { + new PartitionTransform("BUCKET", -5), + new PartitionTransform("TRUNCATE", -4), + }; + for (PartitionTransform partitionTransform : failAnalysis) { + IcebergPartitionTransform transform = null; + try { + transform = getPartitionTransform( + partitionTransform.transformName, partitionTransform.parameter); + } catch (TableLoadingException t) { + fail("Transform " + partitionTransform + " caught unexpected " + t); + } + assertNotNull(transform); + try { + transform.analyze(null); + fail("Transform " + partitionTransform + " should have got exception"); + } catch (AnalysisException t) { + // OK, fall through + } + } + } + + /** + * Unit test for getDataFilePathHash(). + */ + @Test + public void testGetDataFilePathHash() { + String hash = getFilePathHash(FILE_A); + assertNotNull(hash); + String hash2 = getFilePathHash(FILE_A); + assertEquals(hash, hash2); + } + + /** + * Unit test for getPartitionTransformParams(). + */ + @Test + public void testGetPartitionTransformParams() { + int numBuckets = 128; + PartitionSpec partitionSpec = + PartitionSpec.builderFor(SCHEMA).bucket("i", numBuckets).build(); + HashMap<String, Integer> partitionTransformParams = + getPartitionTransformParams(partitionSpec); + assertNotNull(partitionTransformParams); + String expectedKey = "1_BUCKET"; + assertTrue(partitionTransformParams.containsKey(expectedKey)); + assertEquals(numBuckets, (long) partitionTransformParams.get(expectedKey)); + } + + /** + * Unit test for isPartitionColumn(). + */ + @Test + public void testIsPartitionColumn() { + { + // Case 1 + // No partition fields: isPartitionColumn() should return false. + int fieldId = 3; + IcebergColumn column = + new IcebergColumn("name", Type.BOOLEAN, "comment", 0, fieldId, 5, 0, true); + List<IcebergPartitionField> fieldList = new ArrayList<>(); + IcebergPartitionSpec icebergPartitionSpec = new IcebergPartitionSpec(4, fieldList); + assertFalse(isPartitionColumn(column, icebergPartitionSpec)); + } + { + // Case 2 + // A partition field source id matches a column field id: isPartitionColumn() should + // return true. + int id = 3; + IcebergColumn column = + new IcebergColumn("name", Type.BOOLEAN, "comment", 0, id, 105, 0, true); + IcebergPartitionTransform icebergPartitionTransform = + new IcebergPartitionTransform(TIcebergPartitionTransformType.IDENTITY); + IcebergPartitionField field = + new IcebergPartitionField(id, 106, "name", "name", icebergPartitionTransform); + ImmutableList<IcebergPartitionField> fieldList = ImmutableList.of(field); + IcebergPartitionSpec icebergPartitionSpec = new IcebergPartitionSpec(4, fieldList); + assertTrue(isPartitionColumn(column, icebergPartitionSpec)); + } + { + // Case 3 + // Partition field source id does not match a column field id: isPartitionColumn() + // should return false. + IcebergColumn column = + new IcebergColumn("name", Type.BOOLEAN, "comment", 0, 108, 105, 0, true); + IcebergPartitionTransform icebergPartitionTransform = + new IcebergPartitionTransform(TIcebergPartitionTransformType.IDENTITY); + IcebergPartitionField field = + new IcebergPartitionField(107, 106, "name", "name", icebergPartitionTransform); + ImmutableList<IcebergPartitionField> fieldList = ImmutableList.of(field); + IcebergPartitionSpec icebergPartitionSpec = new IcebergPartitionSpec(4, fieldList); + assertFalse(isPartitionColumn(column, icebergPartitionSpec)); + } + } + + /** + * Holder class for testing Partition transforms. + */ + static class PartitionTransform { + String transformName; + Integer parameter; + + PartitionTransform(String transformName, Integer parameter) { + this.transformName = transformName; + this.parameter = parameter; + } + + @Override + public String toString() { + return "PartitionTransform{" + + "transformName='" + transformName + '\'' + ", parameter=" + parameter + '}'; + } + } + + /** + * A simple Schema object. + */ + public static final Schema SCHEMA = + new Schema(Types.NestedField.required(1, "i", Types.IntegerType.get()), + Types.NestedField.required(2, "l", Types.LongType.get()), + Types.NestedField.required(3, "id", Types.IntegerType.get()), + Types.NestedField.required(4, "data", Types.StringType.get())); + + /** + * Partition spec used to create tables. + */ + protected static final PartitionSpec SPEC = + PartitionSpec.builderFor(SCHEMA).bucket("data", 10).build(); + + /** + * A test DataFile. + */ + static final DataFile FILE_A = + DataFiles.builder(SPEC) + .withPath("/path/to/data-a.parquet") + .withFileSizeInBytes(10) + .withPartitionPath("data_bucket=0") // Easy way to set partition data for now. + .withRecordCount(1) + .build(); + + /** + * Holder class for testing isHiveCatalog(). + */ + static class CatalogType { + String propertyName; + boolean isHiveCatalog; + + CatalogType(String propertyName, boolean isHiveCatalog) { + this.propertyName = propertyName; + this.isHiveCatalog = isHiveCatalog; + } + } + + /** + * Holder class for test of catalog functions. + */ + static class CatalogMapping { + String propertyName; + TIcebergCatalog catalog; + Class<?> clazz; + + CatalogMapping(String propertyName, TIcebergCatalog catalog, Class<?> clazz) { + this.propertyName = propertyName; + this.catalog = catalog; + this.clazz = clazz; + } + } +} \ No newline at end of file
