This is an automated email from the ASF dual-hosted git repository. mblow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit 4b3583211aa27922555a78993b79f04afa4d8bdb Author: Michael Blow <[email protected]> AuthorDate: Tue Mar 2 16:28:47 2021 -0500 [ASTERIXDB-2841][*DB][STO] Encode multiple-dataverse parts as subdirs on disk - Multipart dataverse names are expressed on disk as a directory tree - The first part is expressed normally, subsequent parts have a carat (^) prepended Change-Id: Idcfc45eb7f39153349a13d2baecb784244bdf177 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/10324 Reviewed-by: Michael Blow <[email protected]> Tested-by: Michael Blow <[email protected]> --- .../common/storage/DatasetCopyIdentifier.java | 4 +- .../asterix/common/storage/ResourceReference.java | 61 +++++++++++++++++----- .../asterix/common/utils/StoragePathUtil.java | 18 +++++-- .../apache/asterix/external/util/FeedUtils.java | 4 +- .../metadata/utils/SplitsAndConstraintsUtil.java | 9 ++-- 5 files changed, 72 insertions(+), 24 deletions(-) diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/DatasetCopyIdentifier.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/DatasetCopyIdentifier.java index bf72c19..e520271 100644 --- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/DatasetCopyIdentifier.java +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/DatasetCopyIdentifier.java @@ -71,8 +71,8 @@ public class DatasetCopyIdentifier implements Serializable { } public boolean isMatch(ResourceReference resourceReference) { - return resourceReference.getDataverse().equals(dataverse.getCanonicalForm()) - && resourceReference.getDataset().equals(dataset) && resourceReference.getRebalance().equals(rebalance); + return resourceReference.getDataverse().equals(dataverse) && resourceReference.getDataset().equals(dataset) + && resourceReference.getRebalance().equals(rebalance); } @Override diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/ResourceReference.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/ResourceReference.java index 7791926..0e78152 100644 --- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/ResourceReference.java +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/ResourceReference.java @@ -21,24 +21,34 @@ package org.apache.asterix.common.storage; import java.io.File; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import org.apache.asterix.common.metadata.DataverseName; import org.apache.asterix.common.utils.StorageConstants; +import org.apache.asterix.common.utils.StoragePathUtil; +import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hyracks.storage.am.lsm.common.impls.IndexComponentFileReference; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; public class ResourceReference { + private static final Logger LOGGER = LogManager.getLogger(); protected final String root; protected final String partition; - protected final String dataverse; // == DataverseName.getCanonicalForm() + protected final DataverseName dataverse; protected final String dataset; protected final String rebalance; protected final String index; protected final String name; - private volatile Path relativePath; + private final Path relativePath; protected ResourceReference(String path) { // format: root/partition/dataverse/dataset/rebalanceCount/index/fileName + // format: root/partition/dataverse_p1[/^dataverse_p2[/^dataverse_p3...]]/dataset/rebalanceCount/index/fileName final String[] tokens = StringUtils.split(path, File.separatorChar); if (tokens.length < 6) { throw new IllegalStateException("Unrecognized path structure: " + path); @@ -48,9 +58,40 @@ public class ResourceReference { index = tokens[--offset]; rebalance = tokens[--offset]; dataset = tokens[--offset]; - dataverse = tokens[--offset]; //TODO(MULTI_PART_DATAVERSE_NAME):REVISIT - partition = tokens[--offset]; - root = tokens[--offset]; + List<String> dvParts = new ArrayList<>(); + String dvPart = tokens[--offset]; + while (dvPart.charAt(0) == StoragePathUtil.DATAVERSE_CONTINUATION_MARKER) { + dvParts.add(dvPart.substring(1)); + dvPart = tokens[--offset]; + } + String probablyPartition = tokens[--offset]; + if (dvParts.isEmpty()) { + // root/partition/dataverse/dataset/rebalanceCount/index/fileName + dataverse = DataverseName.createSinglePartName(dvPart); + partition = probablyPartition; + root = tokens[--offset]; + } else if (probablyPartition.startsWith(StorageConstants.PARTITION_DIR_PREFIX)) { + // root/partition/dataverse_p1/^dataverse_p2/.../^dataverse_pn/dataset/rebalanceCount/index/fileName + dvParts.add(dvPart); + Collections.reverse(dvParts); + dataverse = DataverseName.create(dvParts); + partition = probablyPartition; + root = tokens[--offset]; + } else if (dvPart.startsWith(StorageConstants.PARTITION_DIR_PREFIX)) { + // root/partition/dataverse/dataset/rebalanceCount/index/fileName (where dataverse starts with ^) + if (dvParts.size() != 1) { + throw new IllegalArgumentException("unable to parse path: '" + path + "'!"); + } + dataverse = + DataverseName.createSinglePartName(StoragePathUtil.DATAVERSE_CONTINUATION_MARKER + dvParts.get(0)); + LOGGER.info("legacy dataverse starting with ^ found: '{}'; this is not supported for new dataverses", + dataverse); + partition = dvPart; + root = probablyPartition; + } else { + throw new IllegalArgumentException("unable to parse path: '" + path + "'!"); + } + relativePath = Paths.get(root, ArrayUtils.subarray(tokens, offset + 1, tokens.length - 1)); } public static ResourceReference ofIndex(String indexPath) { @@ -65,7 +106,7 @@ public class ResourceReference { return partition; } - public String getDataverse() { //TODO(MULTI_PART_DATAVERSE_NAME):REVISIT + public DataverseName getDataverse() { return dataverse; } @@ -86,19 +127,15 @@ public class ResourceReference { } public Path getRelativePath() { - if (relativePath == null) { - relativePath = Paths.get(root, partition, dataverse, dataset, rebalance, index); - } return relativePath; } public ResourceReference getDatasetReference() { - return ResourceReference - .ofIndex(Paths.get(root, partition, dataverse, dataset, rebalance, dataset).toFile().getPath()); + return ResourceReference.ofIndex(relativePath.getParent().resolve(dataset).toFile().getPath()); } public Path getFileRelativePath() { - return Paths.get(root, partition, dataverse, dataset, rebalance, index, name); + return relativePath.resolve(name); } public int getPartitionNum() { diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/StoragePathUtil.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/StoragePathUtil.java index 587b8b3..32a226e 100644 --- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/StoragePathUtil.java +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/StoragePathUtil.java @@ -21,6 +21,7 @@ package org.apache.asterix.common.utils; import java.io.File; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.Iterator; import org.apache.asterix.common.cluster.ClusterPartition; import org.apache.asterix.common.metadata.DataverseName; @@ -40,6 +41,7 @@ import org.apache.logging.log4j.Logger; public class StoragePathUtil { private static final Logger LOGGER = LogManager.getLogger(); + public static final char DATAVERSE_CONTINUATION_MARKER = '^'; private StoragePathUtil() { } @@ -66,11 +68,21 @@ public class StoragePathUtil { public static String prepareDataverseIndexName(DataverseName dataverseName, String datasetName, String idxName, long rebalanceCount) { - return prepareDataverseIndexName(dataverseName, prepareFullIndexName(datasetName, idxName, rebalanceCount)); + return prepareDataverseComponentName(dataverseName, prepareFullIndexName(datasetName, idxName, rebalanceCount)); } - public static String prepareDataverseIndexName(DataverseName dataverseName, String fullIndexName) { - return dataverseName.getCanonicalForm() + File.separator + fullIndexName; //TODO(MULTI_PART_DATAVERSE_NAME):REVISIT + public static String prepareDataverseName(DataverseName dataverseName) { + Iterator<String> dvParts = dataverseName.getParts().iterator(); + StringBuilder builder = new StringBuilder(); + builder.append(dvParts.next()); + while (dvParts.hasNext()) { + builder.append(File.separatorChar).append(DATAVERSE_CONTINUATION_MARKER).append(dvParts.next()); + } + return builder.toString(); + } + + public static String prepareDataverseComponentName(DataverseName dataverseName, String component) { + return prepareDataverseName(dataverseName) + File.separatorChar + component; } private static String prepareFullIndexName(String datasetName, String idxName, long rebalanceCount) { diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/FeedUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/FeedUtils.java index 2110dee..7f3d911 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/FeedUtils.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/FeedUtils.java @@ -86,10 +86,10 @@ public class FeedUtils { public static FileSplit splitsForAdapter(DataverseName dataverseName, String feedName, String nodeName, ClusterPartition partition) { - String relPathFile = dataverseName.getCanonicalForm() + File.separator + feedName; //TODO(MULTI_PART_DATAVERSE_NAME):REVISIT + String relPathFile = StoragePathUtil.prepareDataverseComponentName(dataverseName, feedName); String storagePartitionPath = StoragePathUtil.prepareStoragePartitionPath(partition.getPartitionId()); // Note: feed adapter instances in a single node share the feed logger - // format: 'storage dir name'/partition_#/dataverse/feed/node + // format: 'storage dir name'/partition_#/dataverse_part1[/ dataverse_part2[...]]/feed/node File f = new File(storagePartitionPath + File.separator + relPathFile + File.separator + nodeName); return StoragePathUtil.getFileSplitForClusterPartition(partition, f.getPath()); } diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SplitsAndConstraintsUtil.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SplitsAndConstraintsUtil.java index b93674c..c85f661 100644 --- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SplitsAndConstraintsUtil.java +++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SplitsAndConstraintsUtil.java @@ -48,11 +48,10 @@ public class SplitsAndConstraintsUtil { DataverseName dataverseName) { List<FileSplit> splits = new ArrayList<>(); // get all partitions - ClusterPartition[] clusterPartition = clusterStateManager.getClusterPartitons(); - for (int j = 0; j < clusterPartition.length; j++) { - File f = new File(StoragePathUtil.prepareStoragePartitionPath(clusterPartition[j].getPartitionId()), - dataverseName.getCanonicalForm()); //TODO(MULTI_PART_DATAVERSE_NAME):REVISIT - splits.add(StoragePathUtil.getFileSplitForClusterPartition(clusterPartition[j], f.getPath())); + for (ClusterPartition clusterPartition : clusterStateManager.getClusterPartitons()) { + File f = new File(StoragePathUtil.prepareStoragePartitionPath(clusterPartition.getPartitionId()), + StoragePathUtil.prepareDataverseName(dataverseName)); + splits.add(StoragePathUtil.getFileSplitForClusterPartition(clusterPartition, f.getPath())); } return splits.toArray(new FileSplit[] {}); }
