This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit cc26f345a40d10cd5d0dc69f1dc3623fdddf16fd Author: LPL <[email protected]> AuthorDate: Tue Aug 23 15:00:50 2022 +0800 IMPALA-11507: Use absolute_path when Iceberg data files are outside of the table location For Iceberg tables, when one of the following properties is used, it is considered that the table is possible to have data outside the table location directory: - 'write.object-storage.enabled' is true - 'write.data.path' is not empty - 'write.location-provider.impl' is configured - 'write.object-storage.path'(Deprecated) is not empty - 'write.folder-storage.path'(Deprecated) is not empty We should tolerate the situation that relative path of the data files cannot be obtained by the table location path, and we could use the absolute path in that case. E.g. the ETL program will write the table that the metadata of the Iceberg tables is placed in 'hdfs://nameservice_meta/warehouse/hadoop_catalog/ice_tbl/metadata', the recent data files in 'hdfs://nameservice_data/warehouse/hadoop_catalog/ice_tbl/data', and the data files half a year ago in 's3a://nameservice_data/warehouse/hadoop_catalog/ice_tbl/data', it should still be queried normally by Impala. Testing: - added e2e tests Change-Id: I666bed21d20d5895f4332e92eb30a94fa24250be Reviewed-on: http://gerrit.cloudera.org:8080/18894 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- be/src/exec/hdfs-scan-node-base.cc | 10 +- be/src/scheduling/scheduler.cc | 7 + common/fbs/CatalogObjects.fbs | 4 + common/protobuf/planner.proto | 4 + common/thrift/PlanNodes.thrift | 4 + .../java/org/apache/impala/catalog/FeFsTable.java | 13 +- .../org/apache/impala/catalog/FeIcebergTable.java | 74 +++++--- .../apache/impala/catalog/FileMetadataLoader.java | 6 +- .../org/apache/impala/catalog/HdfsPartition.java | 80 ++++++--- .../org/apache/impala/planner/HdfsScanNode.java | 17 +- .../apache/impala/planner/IcebergScanPlanner.java | 2 +- .../java/org/apache/impala/util/AcidUtils.java | 38 ++-- .../org/apache/impala/planner/ExplainTest.java | 1 + .../apache/impala/testutil/BlockIdGenerator.java | 2 +- testdata/data/README | 30 ++++ .../42056022-e2d2-4548-9376-8993109c2ace-m0.avro | Bin 0 -> 6489 bytes .../b5880d95-f4f1-49cb-ba55-143c221017fe-m0.avro | Bin 0 -> 6481 bytes .../ce7ad1c8-1ad5-4391-a640-b203d7c476a4-m0.avro | Bin 0 -> 6488 bytes ...305-1-b5880d95-f4f1-49cb-ba55-143c221017fe.avro | Bin 0 -> 3797 bytes ...668-1-ce7ad1c8-1ad5-4391-a640-b203d7c476a4.avro | Bin 0 -> 3877 bytes ...981-1-42056022-e2d2-4548-9376-8993109c2ace.avro | Bin 0 -> 3924 bytes .../metadata/v1.metadata.json | 117 ++++++++++++ .../metadata/v2.metadata.json | 141 +++++++++++++++ .../metadata/v3.metadata.json | 144 +++++++++++++++ .../metadata/v4.metadata.json | 169 ++++++++++++++++++ .../metadata/v5.metadata.json | 172 ++++++++++++++++++ .../metadata/v6.metadata.json | 197 +++++++++++++++++++++ .../metadata/version-hint.text | 1 + ...4b6af-6ee7-4910-9bf5-165a9a4e71df-00001.parquet | Bin 0 -> 2199 bytes ...4b6af-6ee7-4910-9bf5-165a9a4e71df-00002.parquet | Bin 0 -> 1982 bytes ...79643-e19f-4294-914e-7b122aff576c-00001.parquet | Bin 0 -> 2199 bytes ...79643-e19f-4294-914e-7b122aff576c-00002.parquet | Bin 0 -> 1982 bytes ...c91ef-b403-4b65-a6b0-566396b8d097-00002.parquet | Bin 0 -> 1982 bytes ...c91ef-b403-4b65-a6b0-566396b8d097-00001.parquet | Bin 0 -> 2199 bytes .../functional/functional_schema_template.sql | 17 ++ .../datasets/functional/schema_constraints.csv | 1 + .../iceberg-multiple-storage-locations-table.test | 75 ++++++++ tests/query_test/test_iceberg.py | 6 +- 38 files changed, 1239 insertions(+), 93 deletions(-) diff --git a/be/src/exec/hdfs-scan-node-base.cc b/be/src/exec/hdfs-scan-node-base.cc index 8d3f95a47..4c12799bf 100644 --- a/be/src/exec/hdfs-scan-node-base.cc +++ b/be/src/exec/hdfs-scan-node-base.cc @@ -278,8 +278,14 @@ Status HdfsScanPlanNode::ProcessScanRangesAndInitSharedState(FragmentState* stat " Try rerunning the query."); } - filesystem::path file_path(partition_desc->location()); - file_path.append(split.relative_path(), filesystem::path::codecvt()); + filesystem::path file_path; + if (hdfs_table_->IsIcebergTable() && split.relative_path().empty()) { + file_path.append(split.absolute_path(), filesystem::path::codecvt()); + } else { + file_path.append(partition_desc->location(), filesystem::path::codecvt()) + .append(split.relative_path(), filesystem::path::codecvt()); + } + const string& native_file_path = file_path.native(); auto file_desc_map_key = make_pair(partition_desc->id(), native_file_path); diff --git a/be/src/scheduling/scheduler.cc b/be/src/scheduling/scheduler.cc index 19e830e32..5afaf73c6 100644 --- a/be/src/scheduling/scheduler.cc +++ b/be/src/scheduling/scheduler.cc @@ -134,6 +134,9 @@ Status Scheduler::GenerateScanRanges(const vector<TFileSplitGeneratorSpec>& spec hdfs_scan_range.__set_offset(scan_range_offset); hdfs_scan_range.__set_partition_id(spec.partition_id); hdfs_scan_range.__set_partition_path_hash(spec.partition_path_hash); + if (fb_desc->absolute_path() != nullptr) { + hdfs_scan_range.__set_absolute_path(fb_desc->absolute_path()->str()); + } TScanRange scan_range; scan_range.__set_hdfs_file_split(hdfs_scan_range); if (spec.file_desc.__isset.file_metadata) { @@ -1120,6 +1123,10 @@ void TScanRangeToScanRangePB(const TScanRange& tscan_range, ScanRangePB* scan_ra hdfs_file_split->set_mtime(tscan_range.hdfs_file_split.mtime); hdfs_file_split->set_partition_path_hash( tscan_range.hdfs_file_split.partition_path_hash); + if (tscan_range.hdfs_file_split.__isset.absolute_path) { + hdfs_file_split->set_absolute_path( + tscan_range.hdfs_file_split.absolute_path); + } } if (tscan_range.__isset.hbase_key_range) { HBaseKeyRangePB* hbase_key_range = scan_range_pb->mutable_hbase_key_range(); diff --git a/common/fbs/CatalogObjects.fbs b/common/fbs/CatalogObjects.fbs index ce0f1a693..973007d2c 100644 --- a/common/fbs/CatalogObjects.fbs +++ b/common/fbs/CatalogObjects.fbs @@ -79,6 +79,10 @@ table FbFileDesc { // Whether this file is erasure-coded is_ec: bool = false (id: 5); + + // The absolute path of the file, it`s used only when data files are outside of + // the Iceberg table location (IMPALA-11507). + absolute_path: string (id: 6); } // Additional file-related metadata diff --git a/common/protobuf/planner.proto b/common/protobuf/planner.proto index adccdeed2..208ebecd4 100644 --- a/common/protobuf/planner.proto +++ b/common/protobuf/planner.proto @@ -54,6 +54,10 @@ message HdfsFileSplitPB { // Java's String.hashCode(), which is consistent. For testing purposes, this can use // any consistent hash. optional int32 partition_path_hash = 9; + + // The absolute path of the file, it`s used only when data files are outside of + // the Iceberg table location (IMPALA-11507). + optional string absolute_path = 10; } // Key range for single THBaseScanNode. Corresponds to THBaseKeyRange and should be kept diff --git a/common/thrift/PlanNodes.thrift b/common/thrift/PlanNodes.thrift index 19074de6e..70e208637 100644 --- a/common/thrift/PlanNodes.thrift +++ b/common/thrift/PlanNodes.thrift @@ -223,6 +223,10 @@ struct THdfsFileSplit { // Java's String.hashCode(), which is consistent. For testing purposes, this can use // any consistent hash. 9: required i32 partition_path_hash + + // The absolute path of the file, it`s used only when data files are outside of + // the Iceberg table location (IMPALA-11507). + 10: optional string absolute_path } // Key range for single THBaseScanNode. Corresponds to HBaseKeyRangePB and should be kept diff --git a/fe/src/main/java/org/apache/impala/catalog/FeFsTable.java b/fe/src/main/java/org/apache/impala/catalog/FeFsTable.java index 77b2c7514..f112ce576 100644 --- a/fe/src/main/java/org/apache/impala/catalog/FeFsTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/FeFsTable.java @@ -16,6 +16,10 @@ // under the License. package org.apache.impala.catalog; +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; + import java.io.IOException; import java.util.ArrayList; import java.util.Collection; @@ -37,8 +41,6 @@ import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey; import org.apache.impala.analysis.Expr; import org.apache.impala.analysis.LiteralExpr; import org.apache.impala.analysis.PartitionKeyValue; -import org.apache.impala.analysis.TimeTravelSpec; -import org.apache.impala.analysis.TimeTravelSpec.Kind; import org.apache.impala.catalog.HdfsPartition.FileDescriptor; import org.apache.impala.common.AnalysisException; import org.apache.impala.common.FileSystemUtil; @@ -57,9 +59,6 @@ import org.apache.impala.util.TAccessLevelUtil; import org.apache.impala.util.TResultRowBuilder; import org.apache.thrift.TException; -import com.google.common.base.Preconditions; -import com.google.common.base.Joiner; -import com.google.common.collect.Lists; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -458,7 +457,7 @@ public interface FeFsTable extends FeTable { Collections.sort(orderedFds); for (FileDescriptor fd: orderedFds) { TResultRowBuilder rowBuilder = new TResultRowBuilder(); - rowBuilder.add(p.getLocation() + "/" + fd.getRelativePath()); + rowBuilder.add(fd.getAbsolutePath(p.getLocation())); rowBuilder.add(PrintUtils.printBytes(fd.getFileLength())); rowBuilder.add(p.getPartitionName()); result.addToRows(rowBuilder.get()); @@ -477,7 +476,7 @@ public interface FeFsTable extends FeTable { Collections.sort(orderedFds); for (FileDescriptor fd : orderedFds) { TResultRowBuilder rowBuilder = new TResultRowBuilder(); - rowBuilder.add(table.getLocation() + "/" + fd.getRelativePath()); + rowBuilder.add(fd.getAbsolutePath(table.getLocation())); rowBuilder.add(PrintUtils.printBytes(fd.getFileLength())); rowBuilder.add(""); result.addToRows(rowBuilder.get()); diff --git a/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java b/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java index bea82e370..5c25c0960 100644 --- a/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/FeIcebergTable.java @@ -17,8 +17,14 @@ package org.apache.impala.catalog; +import com.google.common.base.Preconditions; +import com.google.common.base.Strings; +import com.google.common.collect.Iterables; import com.google.common.collect.Lists; +import com.google.common.primitives.Ints; + import java.io.IOException; +import java.net.URI; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; @@ -29,6 +35,7 @@ import java.util.Set; import java.util.TreeMap; import java.util.stream.Collectors; +import org.apache.commons.lang.StringUtils; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -44,11 +51,12 @@ import org.apache.iceberg.Schema; import org.apache.iceberg.Snapshot; import org.apache.iceberg.SnapshotSummary; import org.apache.iceberg.Table; +import org.apache.iceberg.TableProperties; +import org.apache.iceberg.util.PropertyUtil; import org.apache.iceberg.util.SnapshotUtil; import org.apache.impala.analysis.IcebergPartitionField; import org.apache.impala.analysis.IcebergPartitionSpec; import org.apache.impala.analysis.LiteralExpr; -import org.apache.impala.analysis.PartitionKeyValue; import org.apache.impala.analysis.TimeTravelSpec; import org.apache.impala.analysis.TimeTravelSpec.Kind; import org.apache.impala.catalog.HdfsPartition.FileDescriptor; @@ -60,8 +68,8 @@ import org.apache.impala.thrift.TColumn; import org.apache.impala.thrift.TCompressionCodec; import org.apache.impala.thrift.THdfsCompression; import org.apache.impala.thrift.THdfsFileDesc; -import org.apache.impala.thrift.THdfsTable; import org.apache.impala.thrift.THdfsPartition; +import org.apache.impala.thrift.THdfsTable; import org.apache.impala.thrift.TIcebergCatalog; import org.apache.impala.thrift.TIcebergFileFormat; import org.apache.impala.thrift.TIcebergPartitionStats; @@ -72,13 +80,8 @@ import org.apache.impala.thrift.TResultSetMetadata; import org.apache.impala.util.IcebergUtil; import org.apache.impala.util.ListMap; import org.apache.impala.util.TResultRowBuilder; -import org.json.simple.JSONValue; - -import com.google.common.base.Preconditions; -import com.google.common.base.Strings; -import com.google.common.collect.Iterables; -import com.google.common.primitives.Ints; +import org.json.simple.JSONValue; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -106,7 +109,7 @@ public interface FeIcebergTable extends FeFsTable { /** * Returns the cached Iceberg Table object that stores the metadata loaded by Iceberg. */ - org.apache.iceberg.Table getIcebergApiTable(); + Table getIcebergApiTable(); /** * Return Iceberg catalog location, we use this location to load metadata from Iceberg @@ -510,20 +513,34 @@ public interface FeIcebergTable extends FeFsTable { * Get FileDescriptor by data file location */ public static HdfsPartition.FileDescriptor getFileDescriptor(Path fileLoc, - Path tableLoc, ListMap<TNetworkAddress> hostIndex) throws IOException { + Path tableLoc, FeIcebergTable table) throws IOException { FileSystem fs = FileSystemUtil.getFileSystemForPath(tableLoc); FileStatus fileStatus = fs.getFileStatus(fileLoc); - return getFileDescriptor(fs, tableLoc, fileStatus, hostIndex); + return getFileDescriptor(fs, tableLoc, fileStatus, table); } private static HdfsPartition.FileDescriptor getFileDescriptor(FileSystem fs, - Path tableLoc, FileStatus fileStatus, ListMap<TNetworkAddress> hostIndex) + Path tableLoc, FileStatus fileStatus, FeIcebergTable table) throws IOException { - Reference<Long> numUnknownDiskIds = new Reference<Long>(Long.valueOf(0)); - String relPath = FileSystemUtil.relativizePath(fileStatus.getPath(), tableLoc); + Reference<Long> numUnknownDiskIds = new Reference<>(0L); + + String relPath = null; + String absPath = null; + URI relUri = tableLoc.toUri().relativize(fileStatus.getPath().toUri()); + if (relUri.isAbsolute() || relUri.getPath().startsWith(Path.SEPARATOR)) { + if (Utils.requiresDataFilesInTableLocation(table)) { + throw new RuntimeException(fileStatus.getPath() + + " is outside of the Iceberg table location " + tableLoc); + } + absPath = fileStatus.getPath().toString(); + } else { + relPath = relUri.getPath(); + } + if (!FileSystemUtil.supportsStorageIds(fs)) { - return HdfsPartition.FileDescriptor.createWithNoBlocks(fileStatus, relPath); + return HdfsPartition.FileDescriptor.createWithNoBlocks(fileStatus, + StringUtils.isNotEmpty(relPath) ? relPath : absPath); } BlockLocation[] locations; @@ -534,7 +551,8 @@ public interface FeIcebergTable extends FeFsTable { } return HdfsPartition.FileDescriptor.create(fileStatus, relPath, locations, - hostIndex, HdfsShim.isErasureCoded(fileStatus), numUnknownDiskIds); + table.getHostIndex(), HdfsShim.isErasureCoded(fileStatus), numUnknownDiskIds, + absPath); } /** @@ -550,8 +568,7 @@ public interface FeIcebergTable extends FeFsTable { ((HdfsTable)table.getFeFsTable()).partitionMap_.values(); for (HdfsPartition partition : partitions) { for (FileDescriptor fileDesc : partition.getFileDescriptors()) { - Path path = new Path(table.getHdfsBaseDir() + Path.SEPARATOR + - fileDesc.getRelativePath()); + Path path = new Path(fileDesc.getAbsolutePath(table.getHdfsBaseDir())); hdfsFileDescMap.put(path.toUri().getPath(), fileDesc); } } @@ -569,11 +586,13 @@ public interface FeIcebergTable extends FeFsTable { IcebergUtil.createIcebergMetadata(table, contentFile)); fileDescMap.put(pathHash, iceFd); } else { - LOG.warn("Iceberg file '{}' cannot be found in the HDFS recursive file " - + "listing results.", path.toString()); + if (Utils.requiresDataFilesInTableLocation(table)) { + LOG.warn("Iceberg file '{}' cannot be found in the HDFS recursive" + + "file listing results.", path.toString()); + } HdfsPartition.FileDescriptor fileDesc = getFileDescriptor( new Path(contentFile.path().toString()), - new Path(table.getIcebergTableLocation()), table.getHostIndex()); + new Path(table.getIcebergTableLocation()), table); HdfsPartition.FileDescriptor iceFd = fileDesc.cloneWithFileMetadata( IcebergUtil.createIcebergMetadata(table, contentFile)); fileDescMap.put(IcebergUtil.getFilePathHash(contentFile), iceFd); @@ -743,5 +762,18 @@ public interface FeIcebergTable extends FeFsTable { } return snapshot; } + + public static boolean requiresDataFilesInTableLocation(FeIcebergTable icebergTable) { + Map<String, String> properties = icebergTable.getIcebergApiTable().properties(); + return !(PropertyUtil.propertyAsBoolean(properties, + TableProperties.OBJECT_STORE_ENABLED, + TableProperties.OBJECT_STORE_ENABLED_DEFAULT) + || StringUtils.isNotEmpty(properties.get(TableProperties.WRITE_DATA_LOCATION)) + || StringUtils + .isNotEmpty(properties.get(TableProperties.WRITE_LOCATION_PROVIDER_IMPL)) + || StringUtils.isNotEmpty(properties.get(TableProperties.OBJECT_STORE_PATH)) + || StringUtils + .isNotEmpty(properties.get(TableProperties.WRITE_FOLDER_STORAGE_LOCATION))); + } } } diff --git a/fe/src/main/java/org/apache/impala/catalog/FileMetadataLoader.java b/fe/src/main/java/org/apache/impala/catalog/FileMetadataLoader.java index b96ec25b2..272a24f0a 100644 --- a/fe/src/main/java/org/apache/impala/catalog/FileMetadataLoader.java +++ b/fe/src/main/java/org/apache/impala/catalog/FileMetadataLoader.java @@ -97,7 +97,7 @@ public class FileMetadataLoader { partDir_ = Preconditions.checkNotNull(partDir); recursive_ = recursive; hostIndex_ = Preconditions.checkNotNull(hostIndex); - oldFdsByRelPath_ = Maps.uniqueIndex(oldFds, FileDescriptor::getRelativePath); + oldFdsByRelPath_ = Maps.uniqueIndex(oldFds, FileDescriptor::getPath); writeIds_ = writeIds; validTxnList_ = validTxnList; fileFormat_ = fileFormat; @@ -278,9 +278,9 @@ public class FileMetadataLoader { public boolean hasFilesChangedCompareTo(List<FileDescriptor> oldFds) { if (oldFds.size() != loadedFds_.size()) return true; ImmutableMap<String, FileDescriptor> oldFdsByRelPath = - Maps.uniqueIndex(oldFds, FileDescriptor::getRelativePath); + Maps.uniqueIndex(oldFds, FileDescriptor::getPath); for (FileDescriptor fd : loadedFds_) { - FileDescriptor oldFd = oldFdsByRelPath.get(fd.getRelativePath()); + FileDescriptor oldFd = oldFdsByRelPath.get(fd.getPath()); if (fd.isChanged(oldFd)) return true; } return false; diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java b/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java index b29428999..6acd9b9d8 100644 --- a/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java +++ b/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java @@ -17,6 +17,21 @@ package org.apache.impala.catalog; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Function; +import com.google.common.base.Joiner; +import com.google.common.base.MoreObjects; +import com.google.common.base.MoreObjects.ToStringHelper; +import com.google.common.base.Preconditions; +import com.google.common.base.Predicate; +import com.google.common.base.Strings; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; +import com.google.flatbuffers.FlatBufferBuilder; + import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -29,10 +44,10 @@ import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicLong; - import javax.annotation.Nonnull; import javax.annotation.Nullable; +import org.apache.commons.lang.StringUtils; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; @@ -69,20 +84,6 @@ import org.apache.impala.util.ListMap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Function; -import com.google.common.base.Joiner; -import com.google.common.base.MoreObjects; -import com.google.common.base.Preconditions; -import com.google.common.base.Predicate; -import com.google.common.base.Strings; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.collect.Sets; -import com.google.flatbuffers.FlatBufferBuilder; - /** * Query-relevant information for one table partition. Partitions are comparable * based on their partition-key values. The comparison orders partitions in ascending @@ -191,7 +192,7 @@ public class HdfsPartition extends CatalogObjectImpl */ public static FileDescriptor create(FileStatus fileStatus, String relPath, BlockLocation[] blockLocations, ListMap<TNetworkAddress> hostIndex, boolean isEc, - Reference<Long> numUnknownDiskIds) throws IOException { + Reference<Long> numUnknownDiskIds, String absPath) throws IOException { FlatBufferBuilder fbb = new FlatBufferBuilder(1); int[] fbFileBlockOffsets = new int[blockLocations.length]; int blockIdx = 0; @@ -206,7 +207,14 @@ public class HdfsPartition extends CatalogObjectImpl } } return new FileDescriptor(createFbFileDesc(fbb, fileStatus, relPath, - fbFileBlockOffsets, isEc)); + fbFileBlockOffsets, isEc, absPath)); + } + + public static FileDescriptor create(FileStatus fileStatus, String relPath, + BlockLocation[] blockLocations, ListMap<TNetworkAddress> hostIndex, boolean isEc, + Reference<Long> numUnknownDiskIds) throws IOException { + return create(fileStatus, relPath, blockLocations, hostIndex, isEc, + numUnknownDiskIds, null); } /** @@ -216,7 +224,8 @@ public class HdfsPartition extends CatalogObjectImpl public static FileDescriptor createWithNoBlocks(FileStatus fileStatus, String relPath) { FlatBufferBuilder fbb = new FlatBufferBuilder(1); - return new FileDescriptor(createFbFileDesc(fbb, fileStatus, relPath, null, false)); + return new FileDescriptor(createFbFileDesc(fbb, fileStatus, relPath, null, false, + null)); } /** @@ -226,13 +235,16 @@ public class HdfsPartition extends CatalogObjectImpl * in the underlying buffer. Can be null if there are no blocks. */ private static FbFileDesc createFbFileDesc(FlatBufferBuilder fbb, - FileStatus fileStatus, String relPath, int[] fbFileBlockOffets, boolean isEc) { - int relPathOffset = fbb.createString(relPath); + FileStatus fileStatus, String relPath, int[] fbFileBlockOffets, boolean isEc, + String absPath) { + int relPathOffset = fbb.createString(relPath == null ? StringUtils.EMPTY : relPath); // A negative block vector offset is used when no block offsets are specified. int blockVectorOffset = -1; if (fbFileBlockOffets != null) { blockVectorOffset = FbFileDesc.createFileBlocksVector(fbb, fbFileBlockOffets); } + int absPathOffset = -1; + if (StringUtils.isNotEmpty(absPath)) absPathOffset = fbb.createString(absPath); FbFileDesc.startFbFileDesc(fbb); // TODO(todd) rename to RelativePathin the FBS FbFileDesc.addRelativePath(fbb, relPathOffset); @@ -242,6 +254,7 @@ public class HdfsPartition extends CatalogObjectImpl HdfsCompression comp = HdfsCompression.fromFileName(fileStatus.getPath().getName()); FbFileDesc.addCompression(fbb, comp.toFb()); if (blockVectorOffset >= 0) FbFileDesc.addFileBlocks(fbb, blockVectorOffset); + if (absPathOffset >= 0) FbFileDesc.addAbsolutePath(fbb, absPathOffset); fbb.finish(FbFileDesc.endFbFileDesc(fbb)); // To eliminate memory fragmentation, copy the contents of the FlatBuffer to the // smallest possible ByteBuffer. @@ -252,6 +265,20 @@ public class HdfsPartition extends CatalogObjectImpl } public String getRelativePath() { return fbFileDescriptor_.relativePath(); } + + public String getAbsolutePath() { return fbFileDescriptor_.absolutePath(); } + + public String getAbsolutePath(String rootPath) { + return StringUtils.isNotEmpty(fbFileDescriptor_.relativePath()) + ? rootPath + Path.SEPARATOR + fbFileDescriptor_.relativePath() + : fbFileDescriptor_.absolutePath(); + } + + public String getPath() { + return StringUtils.isNotEmpty(fbFileDescriptor_.relativePath()) + ? fbFileDescriptor_.relativePath() : fbFileDescriptor_.absolutePath(); + } + public long getFileLength() { return fbFileDescriptor_.length(); } /** Compute the total length of files in fileDescs */ @@ -296,17 +323,21 @@ public class HdfsPartition extends CatalogObjectImpl for (int i = 0; i < numFileBlocks; ++i) { blocks.add(FileBlock.debugString(getFbFileBlock(i))); } - return MoreObjects.toStringHelper(this) + ToStringHelper stringHelper = MoreObjects.toStringHelper(this) .add("RelativePath", getRelativePath()) .add("Length", getFileLength()) .add("Compression", getFileCompression()) .add("ModificationTime", getModificationTime()) - .add("Blocks", Joiner.on(", ").join(blocks)).toString(); + .add("Blocks", Joiner.on(", ").join(blocks)); + if (StringUtils.isNotEmpty(getAbsolutePath())) { + stringHelper.add("AbsolutePath", getAbsolutePath()); + } + return stringHelper.toString(); } @Override public int compareTo(FileDescriptor otherFd) { - return getRelativePath().compareTo(otherFd.getRelativePath()); + return getPath().compareTo(otherFd.getPath()); } /** @@ -980,9 +1011,8 @@ public class HdfsPartition extends CatalogObjectImpl List<FileDescriptor> fdList = getFileDescriptors(); Set<String> fileNames = new HashSet<>(fdList.size()); // Fully qualified file names. - String location = getLocation(); for (FileDescriptor fd : fdList) { - fileNames.add(location + Path.SEPARATOR + fd.getRelativePath()); + fileNames.add(fd.getAbsolutePath(getLocation())); } return fileNames; } diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java index 227911426..708b9996b 100644 --- a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java +++ b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java @@ -1230,9 +1230,9 @@ public class HdfsScanNode extends ScanNode { for (FileDescriptor fileDesc: fileDescs) { if (!analyzer.getQueryOptions().isAllow_erasure_coded_files() && fileDesc.getIsEc()) { - throw new ImpalaRuntimeException(String.format( - "Scanning of HDFS erasure-coded file (%s/%s) is not supported", - partition.getLocation(), fileDesc.getRelativePath())); + throw new ImpalaRuntimeException(String + .format("Scanning of HDFS erasure-coded file (%s) is not supported", + fileDesc.getAbsolutePath(partition.getLocation()))); } // Accumulate on the number of EC files and the total size of such files. @@ -1338,7 +1338,7 @@ public class HdfsScanNode extends ScanNode { Preconditions.checkArgument(maxBlockSize > 0); if (fileDesc.getFileLength() <= 0) return; boolean splittable = partition.getFileFormat().isSplittable( - HdfsCompression.fromFileName(fileDesc.getRelativePath())); + HdfsCompression.fromFileName(fileDesc.getPath())); TFileSplitGeneratorSpec splitSpec = new TFileSplitGeneratorSpec( fileDesc.toThrift(), maxBlockSize, splittable, partition.getId(), partition.getLocation().hashCode()); @@ -1404,10 +1404,12 @@ public class HdfsScanNode extends ScanNode { currentLength = scanRangeBytesLimit; } TScanRange scanRange = new TScanRange(); - scanRange.setHdfs_file_split(new THdfsFileSplit(fileDesc.getRelativePath(), + THdfsFileSplit hdfsFileSplit = new THdfsFileSplit(fileDesc.getRelativePath(), currentOffset, currentLength, partition.getId(), fileDesc.getFileLength(), fileDesc.getFileCompression().toThrift(), fileDesc.getModificationTime(), - partition.getLocation().hashCode())); + partition.getLocation().hashCode()); + hdfsFileSplit.setAbsolute_path(fileDesc.getAbsolutePath()); + scanRange.setHdfs_file_split(hdfsFileSplit); if (fileDesc.getFbFileMetadata() != null) { scanRange.setFile_metadata(fileDesc.getFbFileMetadata().getByteBuffer()); } @@ -1642,8 +1644,7 @@ public class HdfsScanNode extends ScanNode { long estimatedPartitionSize = 0; if (format == HdfsFileFormat.TEXT || format == HdfsFileFormat.JSON) { for (FileDescriptor desc : p.getFileDescriptors()) { - HdfsCompression compression - = HdfsCompression.fromFileName(desc.getRelativePath().toString()); + HdfsCompression compression = HdfsCompression.fromFileName(desc.getPath()); if (HdfsCompression.SUFFIX_MAP.containsValue(compression)) { estimatedPartitionSize += Math.round(desc.getFileLength() * ESTIMATED_COMPRESSION_FACTOR_LEGACY); diff --git a/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java b/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java index a974d9322..e5989a451 100644 --- a/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java +++ b/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java @@ -351,7 +351,7 @@ public class IcebergScanPlanner { fileDesc = FeIcebergTable.Utils.getFileDescriptor( new Path(cf.path().toString()), new Path(getIceTable().getIcebergTableLocation()), - getIceTable().getHostIndex()); + getIceTable()); } catch (IOException ex) { throw new ImpalaRuntimeException( "Cannot load file descriptor for " + cf.path(), ex); diff --git a/fe/src/main/java/org/apache/impala/util/AcidUtils.java b/fe/src/main/java/org/apache/impala/util/AcidUtils.java index f613ebf9a..c8efe788b 100644 --- a/fe/src/main/java/org/apache/impala/util/AcidUtils.java +++ b/fe/src/main/java/org/apache/impala/util/AcidUtils.java @@ -22,13 +22,24 @@ import com.google.common.base.Stopwatch; import com.google.errorprone.annotations.Immutable; import java.io.IOException; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import javax.annotation.Nullable; + import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.common.ValidWriteIdList; import org.apache.hadoop.hive.common.ValidWriteIdList.RangeResponse; -import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.impala.catalog.CatalogException; import org.apache.impala.catalog.CatalogServiceCatalog; import org.apache.impala.catalog.Column; @@ -36,38 +47,17 @@ import org.apache.impala.catalog.FileMetadataLoader.LoadStats; import org.apache.impala.catalog.HdfsPartition; import org.apache.impala.catalog.HdfsPartition.FileDescriptor; import org.apache.impala.catalog.HdfsTable; -import org.apache.impala.catalog.MetaStoreClientPool; import org.apache.impala.catalog.ScalarType; import org.apache.impala.catalog.StructField; import org.apache.impala.catalog.StructType; import org.apache.impala.common.FileSystemUtil; import org.apache.impala.common.Pair; import org.apache.impala.common.PrintUtils; -import org.apache.impala.common.Reference; import org.apache.impala.compat.MetastoreShim; -import org.apache.impala.thrift.THdfsFileDesc; -import org.apache.impala.thrift.TPartialPartitionInfo; import org.apache.impala.thrift.TTransactionalType; -import org.apache.thrift.TException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.TimeUnit; -import java.util.function.Predicate; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import java.util.stream.Collectors; - -import javax.annotation.Nullable; - /** * Contains utility functions for working with Acid tables. * <p> @@ -419,7 +409,7 @@ public class AcidUtils { * Returns true if 'fd' refers to a delete delta file. */ public static boolean isDeleteDeltaFd(FileDescriptor fd) { - return fd.getRelativePath().startsWith("delete_delta_"); + return fd.getPath().startsWith("delete_delta_"); } /** @@ -447,7 +437,7 @@ public class AcidUtils { Iterator<FileDescriptor> it = fds.iterator(); int numRemoved = 0; while (it.hasNext()) { - if (!writeListBasedPredicate.check(it.next().getRelativePath())) { + if (!writeListBasedPredicate.check(it.next().getPath())) { it.remove(); numRemoved++; } diff --git a/fe/src/test/java/org/apache/impala/planner/ExplainTest.java b/fe/src/test/java/org/apache/impala/planner/ExplainTest.java index 4d32b93f1..ec84ea947 100644 --- a/fe/src/test/java/org/apache/impala/planner/ExplainTest.java +++ b/fe/src/test/java/org/apache/impala/planner/ExplainTest.java @@ -158,6 +158,7 @@ public class ExplainTest extends FrontendTestBase { HdfsPartition.FileDescriptor mockFileDesc = mock(HdfsPartition.FileDescriptor.class); when(mockFileDesc.getFileLength()).thenReturn(1L); when(mockFileDesc.getRelativePath()).thenReturn(""); + when(mockFileDesc.getPath()).thenReturn(""); mockFilesDescs.add(mockFileDesc); when(mockHdfsPartition.getLocationPath()) diff --git a/fe/src/test/java/org/apache/impala/testutil/BlockIdGenerator.java b/fe/src/test/java/org/apache/impala/testutil/BlockIdGenerator.java index 6d59e680e..50dfe99e5 100644 --- a/fe/src/test/java/org/apache/impala/testutil/BlockIdGenerator.java +++ b/fe/src/test/java/org/apache/impala/testutil/BlockIdGenerator.java @@ -78,7 +78,7 @@ public class BlockIdGenerator { for (FeFsPartition partition : parts) { List<FileDescriptor> fileDescriptors = partition.getFileDescriptors(); for (FileDescriptor fd : fileDescriptors) { - Path p = new Path(partition.getLocation(), fd.getRelativePath()); + Path p = new Path(fd.getAbsolutePath(partition.getLocation())); // Use a deprecated API to get block ids DistributedFileSystem dfs = diff --git a/testdata/data/README b/testdata/data/README index 21a5cd9b6..3f02fc9be 100644 --- a/testdata/data/README +++ b/testdata/data/README @@ -866,3 +866,33 @@ location '/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_positional_ tblproperties ('format-version'='2'); insert into iceberg_v2_positional_update_all_rows values (1,'a'), (2,'b'), (3,'c') update iceberg_v2_positional_update_all_rows set s = upper(s); + +iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations*: +- 'iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations' +- 'iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data' +- 'iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data01' +- 'iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data02' +Generated by Iceberg Java API version 0.13.2, the address of the document is https://iceberg.apache.org/docs/latest/api/ +Step 1, create the Iceberg table 'iceberg_multiple_storage_locations' that location is 'iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations': +'col_name','data_type' +col_int,int +col_bigint,bigint +col_float,float +col_double,double +col_string,string +col_timestamp,timestamp +col_date,date +'col_name','transform_type' +col_int,IDENTITY +Step 2, set the table property 'write.data.path' to '/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data' and insert 3 records: +0,12345678900,3.1400001049,2.7182,'a',1970-01-01 00:00:00,1974-02-09 +0,12345678901,3.1400001049,2.71821,'b',1970-01-01 00:00:00,1974-02-09 +1,12345678902,3.1400001049,2.71822,'c',1970-01-01 00:00:00,1974-02-09 +Step 3, update the table property 'write.data.path' to '/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data01' and insert 3 records: +1,12345678900,3.1400001049,2.7182,'a',1970-01-01 00:00:00,1974-02-09 +1,12345678901,3.1400001049,2.71821,'b',1970-01-01 00:00:00,1974-02-09 +2,12345678902,3.1400001049,2.71822,'c',1970-01-01 00:00:00,1974-02-09 +Step 4, update the table property 'write.data.path' to '/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data02' and insert 3 records: +2,12345678900,3.1400001049,2.7182,'a',1970-01-01 00:00:00,1974-02-09 +2,12345678901,3.1400001049,2.71821,'b',1970-01-01 00:00:00,1974-02-09 +0,12345678902,3.1400001049,2.71822,'c',1970-01-01 00:00:00,1974-02-09 diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/42056022-e2d2-4548-9376-8993109c2ace-m0.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/42056022-e2d2-4548-9376-8993109c2ace-m0.avro new file mode 100644 index 000000000..de91bbc27 Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/42056022-e2d2-4548-9376-8993109c2ace-m0.avro differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/b5880d95-f4f1-49cb-ba55-143c221017fe-m0.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/b5880d95-f4f1-49cb-ba55-143c221017fe-m0.avro new file mode 100644 index 000000000..597ba1d39 Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/b5880d95-f4f1-49cb-ba55-143c221017fe-m0.avro differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/ce7ad1c8-1ad5-4391-a640-b203d7c476a4-m0.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/ce7ad1c8-1ad5-4391-a640-b203d7c476a4-m0.avro new file mode 100644 index 000000000..adbfe2598 Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/ce7ad1c8-1ad5-4391-a640-b203d7c476a4-m0.avro differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/snap-4264681048229339305-1-b5880d95-f4f1-49cb-ba55-143c221017fe.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/snap-4264681048229339305-1-b5880d95-f4f1-49cb-ba55-143c221017fe.avro new file mode 100644 index 000000000..c288d924f Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/snap-4264681048229339305-1-b5880d95-f4f1-49cb-ba55-143c221017fe.avro differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/snap-4265463682522664668-1-ce7ad1c8-1ad5-4391-a640-b203d7c476a4.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/snap-4265463682522664668-1-ce7ad1c8-1ad5-4391-a640-b203d7c476a4.avro new file mode 100644 index 000000000..5d426fed2 Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/snap-4265463682522664668-1-ce7ad1c8-1ad5-4391-a640-b203d7c476a4.avro differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/snap-7684033746298894981-1-42056022-e2d2-4548-9376-8993109c2ace.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/snap-7684033746298894981-1-42056022-e2d2-4548-9376-8993109c2ace.avro new file mode 100644 index 000000000..cc475d469 Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/snap-7684033746298894981-1-42056022-e2d2-4548-9376-8993109c2ace.avro differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v1.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v1.metadata.json new file mode 100644 index 000000000..7a1018534 --- /dev/null +++ b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v1.metadata.json @@ -0,0 +1,117 @@ +{ + "format-version" : 1, + "table-uuid" : "4a1fdbf7-c105-4dce-a924-85a59993bb32", + "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations", + "last-updated-ms" : 1661415755605, + "last-column-id" : 7, + "schema" : { + "type" : "struct", + "schema-id" : 0, + "fields" : [ { + "id" : 1, + "name" : "col_int", + "required" : true, + "type" : "int" + }, { + "id" : 2, + "name" : "col_bigint", + "required" : true, + "type" : "long" + }, { + "id" : 3, + "name" : "col_float", + "required" : true, + "type" : "float" + }, { + "id" : 4, + "name" : "col_double", + "required" : true, + "type" : "double" + }, { + "id" : 5, + "name" : "col_string", + "required" : true, + "type" : "string" + }, { + "id" : 6, + "name" : "col_timestamp", + "required" : true, + "type" : "timestamp" + }, { + "id" : 7, + "name" : "col_date", + "required" : true, + "type" : "date" + } ] + }, + "current-schema-id" : 0, + "schemas" : [ { + "type" : "struct", + "schema-id" : 0, + "fields" : [ { + "id" : 1, + "name" : "col_int", + "required" : true, + "type" : "int" + }, { + "id" : 2, + "name" : "col_bigint", + "required" : true, + "type" : "long" + }, { + "id" : 3, + "name" : "col_float", + "required" : true, + "type" : "float" + }, { + "id" : 4, + "name" : "col_double", + "required" : true, + "type" : "double" + }, { + "id" : 5, + "name" : "col_string", + "required" : true, + "type" : "string" + }, { + "id" : 6, + "name" : "col_timestamp", + "required" : true, + "type" : "timestamp" + }, { + "id" : 7, + "name" : "col_date", + "required" : true, + "type" : "date" + } ] + } ], + "partition-spec" : [ { + "name" : "col_int", + "transform" : "identity", + "source-id" : 1, + "field-id" : 1000 + } ], + "default-spec-id" : 0, + "partition-specs" : [ { + "spec-id" : 0, + "fields" : [ { + "name" : "col_int", + "transform" : "identity", + "source-id" : 1, + "field-id" : 1000 + } ] + } ], + "last-partition-id" : 1000, + "default-sort-order-id" : 0, + "sort-orders" : [ { + "order-id" : 0, + "fields" : [ ] + } ], + "properties" : { + "write.data.path" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data" + }, + "current-snapshot-id" : -1, + "snapshots" : [ ], + "snapshot-log" : [ ], + "metadata-log" : [ ] +} \ No newline at end of file diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v2.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v2.metadata.json new file mode 100644 index 000000000..0696c02c1 --- /dev/null +++ b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v2.metadata.json @@ -0,0 +1,141 @@ +{ + "format-version" : 1, + "table-uuid" : "4a1fdbf7-c105-4dce-a924-85a59993bb32", + "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations", + "last-updated-ms" : 1661415756879, + "last-column-id" : 7, + "schema" : { + "type" : "struct", + "schema-id" : 0, + "fields" : [ { + "id" : 1, + "name" : "col_int", + "required" : true, + "type" : "int" + }, { + "id" : 2, + "name" : "col_bigint", + "required" : true, + "type" : "long" + }, { + "id" : 3, + "name" : "col_float", + "required" : true, + "type" : "float" + }, { + "id" : 4, + "name" : "col_double", + "required" : true, + "type" : "double" + }, { + "id" : 5, + "name" : "col_string", + "required" : true, + "type" : "string" + }, { + "id" : 6, + "name" : "col_timestamp", + "required" : true, + "type" : "timestamp" + }, { + "id" : 7, + "name" : "col_date", + "required" : true, + "type" : "date" + } ] + }, + "current-schema-id" : 0, + "schemas" : [ { + "type" : "struct", + "schema-id" : 0, + "fields" : [ { + "id" : 1, + "name" : "col_int", + "required" : true, + "type" : "int" + }, { + "id" : 2, + "name" : "col_bigint", + "required" : true, + "type" : "long" + }, { + "id" : 3, + "name" : "col_float", + "required" : true, + "type" : "float" + }, { + "id" : 4, + "name" : "col_double", + "required" : true, + "type" : "double" + }, { + "id" : 5, + "name" : "col_string", + "required" : true, + "type" : "string" + }, { + "id" : 6, + "name" : "col_timestamp", + "required" : true, + "type" : "timestamp" + }, { + "id" : 7, + "name" : "col_date", + "required" : true, + "type" : "date" + } ] + } ], + "partition-spec" : [ { + "name" : "col_int", + "transform" : "identity", + "source-id" : 1, + "field-id" : 1000 + } ], + "default-spec-id" : 0, + "partition-specs" : [ { + "spec-id" : 0, + "fields" : [ { + "name" : "col_int", + "transform" : "identity", + "source-id" : 1, + "field-id" : 1000 + } ] + } ], + "last-partition-id" : 1000, + "default-sort-order-id" : 0, + "sort-orders" : [ { + "order-id" : 0, + "fields" : [ ] + } ], + "properties" : { + "write.data.path" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data" + }, + "current-snapshot-id" : 4264681048229339305, + "snapshots" : [ { + "snapshot-id" : 4264681048229339305, + "timestamp-ms" : 1661415756879, + "summary" : { + "operation" : "append", + "added-data-files" : "2", + "added-records" : "3", + "added-files-size" : "4181", + "changed-partition-count" : "2", + "total-records" : "3", + "total-files-size" : "4181", + "total-data-files" : "2", + "total-delete-files" : "0", + "total-position-deletes" : "0", + "total-equality-deletes" : "0" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/snap-4264681048229339305-1-b5880d95-f4f1-49cb-ba55-143c221017fe.avro", + "schema-id" : 0 + } ], + "snapshot-log" : [ { + "timestamp-ms" : 1661415756879, + "snapshot-id" : 4264681048229339305 + } ], + "metadata-log" : [ { + "timestamp-ms" : 1661415755605, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v1.metadata.json" + } ] +} \ No newline at end of file diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v3.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v3.metadata.json new file mode 100644 index 000000000..3eb9790b0 --- /dev/null +++ b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v3.metadata.json @@ -0,0 +1,144 @@ +{ + "format-version" : 1, + "table-uuid" : "4a1fdbf7-c105-4dce-a924-85a59993bb32", + "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations", + "last-updated-ms" : 1661415756955, + "last-column-id" : 7, + "schema" : { + "type" : "struct", + "schema-id" : 0, + "fields" : [ { + "id" : 1, + "name" : "col_int", + "required" : true, + "type" : "int" + }, { + "id" : 2, + "name" : "col_bigint", + "required" : true, + "type" : "long" + }, { + "id" : 3, + "name" : "col_float", + "required" : true, + "type" : "float" + }, { + "id" : 4, + "name" : "col_double", + "required" : true, + "type" : "double" + }, { + "id" : 5, + "name" : "col_string", + "required" : true, + "type" : "string" + }, { + "id" : 6, + "name" : "col_timestamp", + "required" : true, + "type" : "timestamp" + }, { + "id" : 7, + "name" : "col_date", + "required" : true, + "type" : "date" + } ] + }, + "current-schema-id" : 0, + "schemas" : [ { + "type" : "struct", + "schema-id" : 0, + "fields" : [ { + "id" : 1, + "name" : "col_int", + "required" : true, + "type" : "int" + }, { + "id" : 2, + "name" : "col_bigint", + "required" : true, + "type" : "long" + }, { + "id" : 3, + "name" : "col_float", + "required" : true, + "type" : "float" + }, { + "id" : 4, + "name" : "col_double", + "required" : true, + "type" : "double" + }, { + "id" : 5, + "name" : "col_string", + "required" : true, + "type" : "string" + }, { + "id" : 6, + "name" : "col_timestamp", + "required" : true, + "type" : "timestamp" + }, { + "id" : 7, + "name" : "col_date", + "required" : true, + "type" : "date" + } ] + } ], + "partition-spec" : [ { + "name" : "col_int", + "transform" : "identity", + "source-id" : 1, + "field-id" : 1000 + } ], + "default-spec-id" : 0, + "partition-specs" : [ { + "spec-id" : 0, + "fields" : [ { + "name" : "col_int", + "transform" : "identity", + "source-id" : 1, + "field-id" : 1000 + } ] + } ], + "last-partition-id" : 1000, + "default-sort-order-id" : 0, + "sort-orders" : [ { + "order-id" : 0, + "fields" : [ ] + } ], + "properties" : { + "write.data.path" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data01" + }, + "current-snapshot-id" : 4264681048229339305, + "snapshots" : [ { + "snapshot-id" : 4264681048229339305, + "timestamp-ms" : 1661415756879, + "summary" : { + "operation" : "append", + "added-data-files" : "2", + "added-records" : "3", + "added-files-size" : "4181", + "changed-partition-count" : "2", + "total-records" : "3", + "total-files-size" : "4181", + "total-data-files" : "2", + "total-delete-files" : "0", + "total-position-deletes" : "0", + "total-equality-deletes" : "0" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/snap-4264681048229339305-1-b5880d95-f4f1-49cb-ba55-143c221017fe.avro", + "schema-id" : 0 + } ], + "snapshot-log" : [ { + "timestamp-ms" : 1661415756879, + "snapshot-id" : 4264681048229339305 + } ], + "metadata-log" : [ { + "timestamp-ms" : 1661415755605, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v1.metadata.json" + }, { + "timestamp-ms" : 1661415756879, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v2.metadata.json" + } ] +} \ No newline at end of file diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v4.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v4.metadata.json new file mode 100644 index 000000000..fe4ed425a --- /dev/null +++ b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v4.metadata.json @@ -0,0 +1,169 @@ +{ + "format-version" : 1, + "table-uuid" : "4a1fdbf7-c105-4dce-a924-85a59993bb32", + "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations", + "last-updated-ms" : 1661415757142, + "last-column-id" : 7, + "schema" : { + "type" : "struct", + "schema-id" : 0, + "fields" : [ { + "id" : 1, + "name" : "col_int", + "required" : true, + "type" : "int" + }, { + "id" : 2, + "name" : "col_bigint", + "required" : true, + "type" : "long" + }, { + "id" : 3, + "name" : "col_float", + "required" : true, + "type" : "float" + }, { + "id" : 4, + "name" : "col_double", + "required" : true, + "type" : "double" + }, { + "id" : 5, + "name" : "col_string", + "required" : true, + "type" : "string" + }, { + "id" : 6, + "name" : "col_timestamp", + "required" : true, + "type" : "timestamp" + }, { + "id" : 7, + "name" : "col_date", + "required" : true, + "type" : "date" + } ] + }, + "current-schema-id" : 0, + "schemas" : [ { + "type" : "struct", + "schema-id" : 0, + "fields" : [ { + "id" : 1, + "name" : "col_int", + "required" : true, + "type" : "int" + }, { + "id" : 2, + "name" : "col_bigint", + "required" : true, + "type" : "long" + }, { + "id" : 3, + "name" : "col_float", + "required" : true, + "type" : "float" + }, { + "id" : 4, + "name" : "col_double", + "required" : true, + "type" : "double" + }, { + "id" : 5, + "name" : "col_string", + "required" : true, + "type" : "string" + }, { + "id" : 6, + "name" : "col_timestamp", + "required" : true, + "type" : "timestamp" + }, { + "id" : 7, + "name" : "col_date", + "required" : true, + "type" : "date" + } ] + } ], + "partition-spec" : [ { + "name" : "col_int", + "transform" : "identity", + "source-id" : 1, + "field-id" : 1000 + } ], + "default-spec-id" : 0, + "partition-specs" : [ { + "spec-id" : 0, + "fields" : [ { + "name" : "col_int", + "transform" : "identity", + "source-id" : 1, + "field-id" : 1000 + } ] + } ], + "last-partition-id" : 1000, + "default-sort-order-id" : 0, + "sort-orders" : [ { + "order-id" : 0, + "fields" : [ ] + } ], + "properties" : { + "write.data.path" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data01" + }, + "current-snapshot-id" : 4265463682522664668, + "snapshots" : [ { + "snapshot-id" : 4264681048229339305, + "timestamp-ms" : 1661415756879, + "summary" : { + "operation" : "append", + "added-data-files" : "2", + "added-records" : "3", + "added-files-size" : "4181", + "changed-partition-count" : "2", + "total-records" : "3", + "total-files-size" : "4181", + "total-data-files" : "2", + "total-delete-files" : "0", + "total-position-deletes" : "0", + "total-equality-deletes" : "0" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/snap-4264681048229339305-1-b5880d95-f4f1-49cb-ba55-143c221017fe.avro", + "schema-id" : 0 + }, { + "snapshot-id" : 4265463682522664668, + "parent-snapshot-id" : 4264681048229339305, + "timestamp-ms" : 1661415757142, + "summary" : { + "operation" : "append", + "added-data-files" : "2", + "added-records" : "3", + "added-files-size" : "4181", + "changed-partition-count" : "2", + "total-records" : "6", + "total-files-size" : "8362", + "total-data-files" : "4", + "total-delete-files" : "0", + "total-position-deletes" : "0", + "total-equality-deletes" : "0" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/snap-4265463682522664668-1-ce7ad1c8-1ad5-4391-a640-b203d7c476a4.avro", + "schema-id" : 0 + } ], + "snapshot-log" : [ { + "timestamp-ms" : 1661415756879, + "snapshot-id" : 4264681048229339305 + }, { + "timestamp-ms" : 1661415757142, + "snapshot-id" : 4265463682522664668 + } ], + "metadata-log" : [ { + "timestamp-ms" : 1661415755605, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v1.metadata.json" + }, { + "timestamp-ms" : 1661415756879, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v2.metadata.json" + }, { + "timestamp-ms" : 1661415756955, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v3.metadata.json" + } ] +} \ No newline at end of file diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v5.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v5.metadata.json new file mode 100644 index 000000000..8448fe3a8 --- /dev/null +++ b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v5.metadata.json @@ -0,0 +1,172 @@ +{ + "format-version" : 1, + "table-uuid" : "4a1fdbf7-c105-4dce-a924-85a59993bb32", + "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations", + "last-updated-ms" : 1661415757174, + "last-column-id" : 7, + "schema" : { + "type" : "struct", + "schema-id" : 0, + "fields" : [ { + "id" : 1, + "name" : "col_int", + "required" : true, + "type" : "int" + }, { + "id" : 2, + "name" : "col_bigint", + "required" : true, + "type" : "long" + }, { + "id" : 3, + "name" : "col_float", + "required" : true, + "type" : "float" + }, { + "id" : 4, + "name" : "col_double", + "required" : true, + "type" : "double" + }, { + "id" : 5, + "name" : "col_string", + "required" : true, + "type" : "string" + }, { + "id" : 6, + "name" : "col_timestamp", + "required" : true, + "type" : "timestamp" + }, { + "id" : 7, + "name" : "col_date", + "required" : true, + "type" : "date" + } ] + }, + "current-schema-id" : 0, + "schemas" : [ { + "type" : "struct", + "schema-id" : 0, + "fields" : [ { + "id" : 1, + "name" : "col_int", + "required" : true, + "type" : "int" + }, { + "id" : 2, + "name" : "col_bigint", + "required" : true, + "type" : "long" + }, { + "id" : 3, + "name" : "col_float", + "required" : true, + "type" : "float" + }, { + "id" : 4, + "name" : "col_double", + "required" : true, + "type" : "double" + }, { + "id" : 5, + "name" : "col_string", + "required" : true, + "type" : "string" + }, { + "id" : 6, + "name" : "col_timestamp", + "required" : true, + "type" : "timestamp" + }, { + "id" : 7, + "name" : "col_date", + "required" : true, + "type" : "date" + } ] + } ], + "partition-spec" : [ { + "name" : "col_int", + "transform" : "identity", + "source-id" : 1, + "field-id" : 1000 + } ], + "default-spec-id" : 0, + "partition-specs" : [ { + "spec-id" : 0, + "fields" : [ { + "name" : "col_int", + "transform" : "identity", + "source-id" : 1, + "field-id" : 1000 + } ] + } ], + "last-partition-id" : 1000, + "default-sort-order-id" : 0, + "sort-orders" : [ { + "order-id" : 0, + "fields" : [ ] + } ], + "properties" : { + "write.data.path" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data02" + }, + "current-snapshot-id" : 4265463682522664668, + "snapshots" : [ { + "snapshot-id" : 4264681048229339305, + "timestamp-ms" : 1661415756879, + "summary" : { + "operation" : "append", + "added-data-files" : "2", + "added-records" : "3", + "added-files-size" : "4181", + "changed-partition-count" : "2", + "total-records" : "3", + "total-files-size" : "4181", + "total-data-files" : "2", + "total-delete-files" : "0", + "total-position-deletes" : "0", + "total-equality-deletes" : "0" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/snap-4264681048229339305-1-b5880d95-f4f1-49cb-ba55-143c221017fe.avro", + "schema-id" : 0 + }, { + "snapshot-id" : 4265463682522664668, + "parent-snapshot-id" : 4264681048229339305, + "timestamp-ms" : 1661415757142, + "summary" : { + "operation" : "append", + "added-data-files" : "2", + "added-records" : "3", + "added-files-size" : "4181", + "changed-partition-count" : "2", + "total-records" : "6", + "total-files-size" : "8362", + "total-data-files" : "4", + "total-delete-files" : "0", + "total-position-deletes" : "0", + "total-equality-deletes" : "0" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/snap-4265463682522664668-1-ce7ad1c8-1ad5-4391-a640-b203d7c476a4.avro", + "schema-id" : 0 + } ], + "snapshot-log" : [ { + "timestamp-ms" : 1661415756879, + "snapshot-id" : 4264681048229339305 + }, { + "timestamp-ms" : 1661415757142, + "snapshot-id" : 4265463682522664668 + } ], + "metadata-log" : [ { + "timestamp-ms" : 1661415755605, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v1.metadata.json" + }, { + "timestamp-ms" : 1661415756879, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v2.metadata.json" + }, { + "timestamp-ms" : 1661415756955, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v3.metadata.json" + }, { + "timestamp-ms" : 1661415757142, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v4.metadata.json" + } ] +} \ No newline at end of file diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v6.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v6.metadata.json new file mode 100644 index 000000000..9af6df221 --- /dev/null +++ b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v6.metadata.json @@ -0,0 +1,197 @@ +{ + "format-version" : 1, + "table-uuid" : "4a1fdbf7-c105-4dce-a924-85a59993bb32", + "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations", + "last-updated-ms" : 1661415757283, + "last-column-id" : 7, + "schema" : { + "type" : "struct", + "schema-id" : 0, + "fields" : [ { + "id" : 1, + "name" : "col_int", + "required" : true, + "type" : "int" + }, { + "id" : 2, + "name" : "col_bigint", + "required" : true, + "type" : "long" + }, { + "id" : 3, + "name" : "col_float", + "required" : true, + "type" : "float" + }, { + "id" : 4, + "name" : "col_double", + "required" : true, + "type" : "double" + }, { + "id" : 5, + "name" : "col_string", + "required" : true, + "type" : "string" + }, { + "id" : 6, + "name" : "col_timestamp", + "required" : true, + "type" : "timestamp" + }, { + "id" : 7, + "name" : "col_date", + "required" : true, + "type" : "date" + } ] + }, + "current-schema-id" : 0, + "schemas" : [ { + "type" : "struct", + "schema-id" : 0, + "fields" : [ { + "id" : 1, + "name" : "col_int", + "required" : true, + "type" : "int" + }, { + "id" : 2, + "name" : "col_bigint", + "required" : true, + "type" : "long" + }, { + "id" : 3, + "name" : "col_float", + "required" : true, + "type" : "float" + }, { + "id" : 4, + "name" : "col_double", + "required" : true, + "type" : "double" + }, { + "id" : 5, + "name" : "col_string", + "required" : true, + "type" : "string" + }, { + "id" : 6, + "name" : "col_timestamp", + "required" : true, + "type" : "timestamp" + }, { + "id" : 7, + "name" : "col_date", + "required" : true, + "type" : "date" + } ] + } ], + "partition-spec" : [ { + "name" : "col_int", + "transform" : "identity", + "source-id" : 1, + "field-id" : 1000 + } ], + "default-spec-id" : 0, + "partition-specs" : [ { + "spec-id" : 0, + "fields" : [ { + "name" : "col_int", + "transform" : "identity", + "source-id" : 1, + "field-id" : 1000 + } ] + } ], + "last-partition-id" : 1000, + "default-sort-order-id" : 0, + "sort-orders" : [ { + "order-id" : 0, + "fields" : [ ] + } ], + "properties" : { + "write.data.path" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data02" + }, + "current-snapshot-id" : 7684033746298894981, + "snapshots" : [ { + "snapshot-id" : 4264681048229339305, + "timestamp-ms" : 1661415756879, + "summary" : { + "operation" : "append", + "added-data-files" : "2", + "added-records" : "3", + "added-files-size" : "4181", + "changed-partition-count" : "2", + "total-records" : "3", + "total-files-size" : "4181", + "total-data-files" : "2", + "total-delete-files" : "0", + "total-position-deletes" : "0", + "total-equality-deletes" : "0" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/snap-4264681048229339305-1-b5880d95-f4f1-49cb-ba55-143c221017fe.avro", + "schema-id" : 0 + }, { + "snapshot-id" : 4265463682522664668, + "parent-snapshot-id" : 4264681048229339305, + "timestamp-ms" : 1661415757142, + "summary" : { + "operation" : "append", + "added-data-files" : "2", + "added-records" : "3", + "added-files-size" : "4181", + "changed-partition-count" : "2", + "total-records" : "6", + "total-files-size" : "8362", + "total-data-files" : "4", + "total-delete-files" : "0", + "total-position-deletes" : "0", + "total-equality-deletes" : "0" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/snap-4265463682522664668-1-ce7ad1c8-1ad5-4391-a640-b203d7c476a4.avro", + "schema-id" : 0 + }, { + "snapshot-id" : 7684033746298894981, + "parent-snapshot-id" : 4265463682522664668, + "timestamp-ms" : 1661415757283, + "summary" : { + "operation" : "append", + "added-data-files" : "2", + "added-records" : "3", + "added-files-size" : "4181", + "changed-partition-count" : "2", + "total-records" : "9", + "total-files-size" : "12543", + "total-data-files" : "6", + "total-delete-files" : "0", + "total-position-deletes" : "0", + "total-equality-deletes" : "0" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/snap-7684033746298894981-1-42056022-e2d2-4548-9376-8993109c2ace.avro", + "schema-id" : 0 + } ], + "snapshot-log" : [ { + "timestamp-ms" : 1661415756879, + "snapshot-id" : 4264681048229339305 + }, { + "timestamp-ms" : 1661415757142, + "snapshot-id" : 4265463682522664668 + }, { + "timestamp-ms" : 1661415757283, + "snapshot-id" : 7684033746298894981 + } ], + "metadata-log" : [ { + "timestamp-ms" : 1661415755605, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v1.metadata.json" + }, { + "timestamp-ms" : 1661415756879, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v2.metadata.json" + }, { + "timestamp-ms" : 1661415756955, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v3.metadata.json" + }, { + "timestamp-ms" : 1661415757142, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v4.metadata.json" + }, { + "timestamp-ms" : 1661415757174, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/v5.metadata.json" + } ] +} \ No newline at end of file diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/version-hint.text b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/version-hint.text new file mode 100644 index 000000000..62f945751 --- /dev/null +++ b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations/metadata/version-hint.text @@ -0,0 +1 @@ +6 \ No newline at end of file diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data/col_int=0/00001-1-5a94b6af-6ee7-4910-9bf5-165a9a4e71df-00001.parquet b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data/col_int=0/00001-1-5a94b6af-6ee7-4910-9bf5-165a9a4e71df-00001.parquet new file mode 100644 index 000000000..20333703b Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data/col_int=0/00001-1-5a94b6af-6ee7-4910-9bf5-165a9a4e71df-00001.parquet differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data/col_int=1/00001-1-5a94b6af-6ee7-4910-9bf5-165a9a4e71df-00002.parquet b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data/col_int=1/00001-1-5a94b6af-6ee7-4910-9bf5-165a9a4e71df-00002.parquet new file mode 100644 index 000000000..c3c9a8241 Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data/col_int=1/00001-1-5a94b6af-6ee7-4910-9bf5-165a9a4e71df-00002.parquet differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data01/col_int=1/00001-1-7ac79643-e19f-4294-914e-7b122aff576c-00001.parquet b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data01/col_int=1/00001-1-7ac79643-e19f-4294-914e-7b122aff576c-00001.parquet new file mode 100644 index 000000000..0755453c4 Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data01/col_int=1/00001-1-7ac79643-e19f-4294-914e-7b122aff576c-00001.parquet differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data01/col_int=2/00001-1-7ac79643-e19f-4294-914e-7b122aff576c-00002.parquet b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data01/col_int=2/00001-1-7ac79643-e19f-4294-914e-7b122aff576c-00002.parquet new file mode 100644 index 000000000..84847f3aa Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data01/col_int=2/00001-1-7ac79643-e19f-4294-914e-7b122aff576c-00002.parquet differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data02/col_int=0/00001-1-26bc91ef-b403-4b65-a6b0-566396b8d097-00002.parquet b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data02/col_int=0/00001-1-26bc91ef-b403-4b65-a6b0-566396b8d097-00002.parquet new file mode 100644 index 000000000..c5357b62f Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data02/col_int=0/00001-1-26bc91ef-b403-4b65-a6b0-566396b8d097-00002.parquet differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data02/col_int=2/00001-1-26bc91ef-b403-4b65-a6b0-566396b8d097-00001.parquet b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data02/col_int=2/00001-1-26bc91ef-b403-4b65-a6b0-566396b8d097-00001.parquet new file mode 100644 index 000000000..84878a9b9 Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data02/col_int=2/00001-1-26bc91ef-b403-4b65-a6b0-566396b8d097-00001.parquet differ diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql index c8d88f8df..fdb5b6f48 100644 --- a/testdata/datasets/functional/functional_schema_template.sql +++ b/testdata/datasets/functional/functional_schema_template.sql @@ -3445,6 +3445,23 @@ hadoop fs -put -f ${IMPALA_HOME}/testdata/data/iceberg_test/hadoop_catalog/ice/i ---- DATASET functional ---- BASE_TABLE_NAME +iceberg_multiple_storage_locations +---- CREATE +CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} +STORED AS ICEBERG +TBLPROPERTIES('write.format.default'='parquet', 'iceberg.catalog'='hadoop.catalog', + 'iceberg.catalog_location'='/test-warehouse/iceberg_test/hadoop_catalog', + 'iceberg.table_identifier'='ice.iceberg_multiple_storage_locations'); +---- DEPENDENT_LOAD +`hadoop fs -mkdir -p /test-warehouse/iceberg_test/hadoop_catalog/ice && \ +hadoop fs -put -f ${IMPALA_HOME}/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations /test-warehouse/iceberg_test/hadoop_catalog/ice && \ +hadoop fs -put -f ${IMPALA_HOME}/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data /test-warehouse/iceberg_test/hadoop_catalog/ice && \ +hadoop fs -put -f ${IMPALA_HOME}/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data01 /test-warehouse/iceberg_test/hadoop_catalog/ice && \ +hadoop fs -put -f ${IMPALA_HOME}/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data02 /test-warehouse/iceberg_test/hadoop_catalog/ice +==== +---- DATASET +functional +---- BASE_TABLE_NAME iceberg_v2_no_deletes ---- CREATE CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} diff --git a/testdata/datasets/functional/schema_constraints.csv b/testdata/datasets/functional/schema_constraints.csv index 2836a6f82..c42602db0 100644 --- a/testdata/datasets/functional/schema_constraints.csv +++ b/testdata/datasets/functional/schema_constraints.csv @@ -91,6 +91,7 @@ table_name:iceberg_v2_positional_not_all_data_files_have_delete_files, constrain table_name:iceberg_v2_positional_not_all_data_files_have_delete_files_orc, constraint:restrict_to, table_format:parquet/none/none table_name:iceberg_v2_partitioned_position_deletes, constraint:restrict_to, table_format:parquet/none/none table_name:iceberg_v2_partitioned_position_deletes_orc, constraint:restrict_to, table_format:parquet/none/none +table_name:iceberg_multiple_storage_locations, constraint:restrict_to, table_format:parquet/none/none # TODO: Support Avro. Data loading currently fails for Avro because complex types # cannot be converted to the corresponding Avro types yet. diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-multiple-storage-locations-table.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-multiple-storage-locations-table.test new file mode 100644 index 000000000..82bd854a4 --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-multiple-storage-locations-table.test @@ -0,0 +1,75 @@ +==== +---- QUERY +select + * +from + functional_parquet.iceberg_multiple_storage_locations +order by + col_int, + col_bigint; +---- RESULTS +0,12345678900,3.1400001049,2.7182,'a',1970-01-01 00:00:00,1974-02-09 +0,12345678901,3.1400001049,2.71821,'b',1970-01-01 00:00:00,1974-02-09 +0,12345678902,3.1400001049,2.71822,'c',1970-01-01 00:00:00,1974-02-09 +1,12345678900,3.1400001049,2.7182,'a',1970-01-01 00:00:00,1974-02-09 +1,12345678901,3.1400001049,2.71821,'b',1970-01-01 00:00:00,1974-02-09 +1,12345678902,3.1400001049,2.71822,'c',1970-01-01 00:00:00,1974-02-09 +2,12345678900,3.1400001049,2.7182,'a',1970-01-01 00:00:00,1974-02-09 +2,12345678901,3.1400001049,2.71821,'b',1970-01-01 00:00:00,1974-02-09 +2,12345678902,3.1400001049,2.71822,'c',1970-01-01 00:00:00,1974-02-09 +---- TYPES +INT,BIGINT,FLOAT,DOUBLE,STRING,TIMESTAMP,DATE +---- RUNTIME_PROFILE +aggregation(SUM, NumRowGroups): 6 +aggregation(SUM, RowsRead): 9 +==== +---- QUERY +show files in functional_parquet.iceberg_multiple_storage_locations; +---- RESULTS +row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data/col_int=0/.*1.parquet','.*','' +row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data/col_int=1/.*2.parquet','.*','' +row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data01/col_int=1/.*1.parquet','.*','' +row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data01/col_int=2/.*2.parquet','.*','' +row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data02/col_int=0/.*2.parquet','.*','' +row_regex:'$NAMENODE/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_multiple_storage_locations_data02/col_int=2/.*1.parquet','.*','' +---- TYPES +STRING, STRING, STRING +==== +---- QUERY +select + * +from + functional_parquet.iceberg_multiple_storage_locations +where + col_int = 2 +order by + col_bigint; +---- RESULTS +2,12345678900,3.1400001049,2.7182,'a',1970-01-01 00:00:00,1974-02-09 +2,12345678901,3.1400001049,2.71821,'b',1970-01-01 00:00:00,1974-02-09 +2,12345678902,3.1400001049,2.71822,'c',1970-01-01 00:00:00,1974-02-09 +---- TYPES +INT,BIGINT,FLOAT,DOUBLE,STRING,TIMESTAMP,DATE +---- RUNTIME_PROFILE +aggregation(SUM, NumRowGroups): 2 +aggregation(SUM, RowsRead): 3 +==== +---- QUERY +select + * +from + functional_parquet.iceberg_multiple_storage_locations +where + col_string = "a" +order by + col_int; +---- RESULTS +0,12345678900,3.1400001049,2.7182,'a',1970-01-01 00:00:00,1974-02-09 +1,12345678900,3.1400001049,2.7182,'a',1970-01-01 00:00:00,1974-02-09 +2,12345678900,3.1400001049,2.7182,'a',1970-01-01 00:00:00,1974-02-09 +---- TYPES +INT,BIGINT,FLOAT,DOUBLE,STRING,TIMESTAMP,DATE +---- RUNTIME_PROFILE +aggregation(SUM, NumRowGroups): 3 +aggregation(SUM, RowsRead): 6 +==== diff --git a/tests/query_test/test_iceberg.py b/tests/query_test/test_iceberg.py index 1cc9214b6..4dc20f94e 100644 --- a/tests/query_test/test_iceberg.py +++ b/tests/query_test/test_iceberg.py @@ -780,6 +780,10 @@ class TestIcebergTable(IcebergTestSuite): args = ['-q', 'DROP TABLE {0}.{1}'.format(db_name, tbl_name)] results = run_impala_shell_cmd(vector, args) + def test_multiple_storage_locations(self, vector, unique_database): + self.run_test_case('QueryTest/iceberg-multiple-storage-locations-table', + vector, unique_database) + class TestIcebergV2Table(IcebergTestSuite): """Tests related to Iceberg V2 tables.""" @@ -805,4 +809,4 @@ class TestIcebergV2Table(IcebergTestSuite): @SkipIfDockerizedCluster.internal_hostname @SkipIf.not_hdfs def test_read_position_deletes_orc(self, vector): - self.run_test_case('QueryTest/iceberg-v2-read-position-deletes-orc', vector) \ No newline at end of file + self.run_test_case('QueryTest/iceberg-v2-read-position-deletes-orc', vector)
