This is an automated email from the ASF dual-hosted git repository. mhubail pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
The following commit(s) were added to refs/heads/master by this push: new 5ba7d8152b [ASTERIXDB-3153][OTH] Make the default storage format configurable 5ba7d8152b is described below commit 5ba7d8152b6f157676bd03820eb917009cb155a0 Author: Wail Alkowaileet <wael....@gmail.com> AuthorDate: Sun Mar 26 10:30:07 2023 -0700 [ASTERIXDB-3153][OTH] Make the default storage format configurable - user model changes: yes - storage format changes: no - interface changes: no Details: Currently, columnar datasets must be declared explicitly using the WITH clause. We should extend AsterixDB's capability to configure the default storage format (either row or column). Change-Id: I173dd026528aa4d35dbdddcf1de4a55249c19caf Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17447 Integration-Tests: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Tested-by: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Reviewed-by: Wail Alkowaileet <wael....@gmail.com> Reviewed-by: Murtadha Al Hubail <mhub...@apache.org> --- .../asterix/app/translator/QueryTranslator.java | 4 ++-- .../asterix/common/config/StorageProperties.java | 9 ++++++++- .../asterix/lang/common/statement/DatasetDecl.java | 21 ++++++++++++++------- .../asterix/metadata/dataset/DatasetFormatInfo.java | 6 +++++- .../apache/asterix/metadata/entities/Dataset.java | 2 +- .../DatasetTupleTranslator.java | 4 ++-- .../DatasetTupleTranslatorTest.java | 2 +- .../IndexTupleTranslatorTest.java | 2 +- .../apache/asterix/object/base/AdmObjectNode.java | 14 ++++++++++++++ 9 files changed, 48 insertions(+), 16 deletions(-) diff --git a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java index 15a8238a6b..3fa74230bc 100644 --- a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java +++ b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java @@ -748,8 +748,8 @@ public class QueryTranslator extends AbstractLangTranslator implements IStatemen boolean itemTypeAdded = false, metaItemTypeAdded = false; StorageProperties storageProperties = metadataProvider.getStorageProperties(); - DatasetFormatInfo datasetFormatInfo = dd.getDatasetFormatInfo(storageProperties.getColumnMaxTupleCount(), - storageProperties.getColumnFreeSpaceTolerance()); + DatasetFormatInfo datasetFormatInfo = dd.getDatasetFormatInfo(storageProperties.getStorageFormat(), + storageProperties.getColumnMaxTupleCount(), storageProperties.getColumnFreeSpaceTolerance()); try { // Check if the dataverse exists Dataverse dv = MetadataManager.INSTANCE.getDataverse(mdTxnCtx, dataverseName); diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/StorageProperties.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/StorageProperties.java index 5b99fa0e61..073da971f7 100644 --- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/StorageProperties.java +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/StorageProperties.java @@ -63,7 +63,8 @@ public class StorageProperties extends AbstractProperties { STORAGE_GLOBAL_CLEANUP(BOOLEAN, true), STORAGE_GLOBAL_CLEANUP_TIMEOUT(POSITIVE_INTEGER, (int) TimeUnit.MINUTES.toSeconds(10)), STORAGE_COLUMN_MAX_TUPLE_COUNT(NONNEGATIVE_INTEGER, 15000), - STORAGE_COLUMN_FREE_SPACE_TOLERANCE(DOUBLE, 0.15); + STORAGE_COLUMN_FREE_SPACE_TOLERANCE(DOUBLE, 0.15), + STORAGE_FORMAT(STRING, "row"); private final IOptionType interpreter; private final Object defaultValue; @@ -136,6 +137,8 @@ public class StorageProperties extends AbstractProperties { case STORAGE_COLUMN_FREE_SPACE_TOLERANCE: return "The percentage of the maximum tolerable empty space for a physical mega leaf page (e.g.," + " 0.15 means a physical page with 15% or less empty space is tolerable)"; + case STORAGE_FORMAT: + return "The default storage format (either row or column)"; default: throw new IllegalStateException("NYI: " + this); } @@ -280,4 +283,8 @@ public class StorageProperties extends AbstractProperties { public float getColumnFreeSpaceTolerance() { return (float) accessor.getDouble(Option.STORAGE_COLUMN_FREE_SPACE_TOLERANCE); } + + public String getStorageFormat() { + return accessor.getString(Option.STORAGE_FORMAT); + } } diff --git a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/DatasetDecl.java b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/DatasetDecl.java index 8f48db0ff4..b8d1bfdc1d 100644 --- a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/DatasetDecl.java +++ b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/DatasetDecl.java @@ -134,14 +134,21 @@ public class DatasetDecl extends AbstractStatement { .getOptionalString(DatasetDeclParametersUtil.STORAGE_BLOCK_COMPRESSION_SCHEME_PARAMETER_NAME); } - public DatasetFormatInfo getDatasetFormatInfo(int defaultMaxTupleCount, float defaultFreeSpaceTolerance) { - final AdmObjectNode datasetFormatNode = - (AdmObjectNode) withObjectNode.get(DatasetDeclParametersUtil.DATASET_FORMAT_PARAMETER_NAME); - if (datasetType != DatasetType.INTERNAL || datasetFormatNode == null) { - return DatasetFormatInfo.DEFAULT; + public DatasetFormatInfo getDatasetFormatInfo(String defaultFormat, int defaultMaxTupleCount, + float defaultFreeSpaceTolerance) { + if (datasetType != DatasetType.INTERNAL) { + return DatasetFormatInfo.SYSTEM_DEFAULT; } - DatasetConfig.DatasetFormat datasetFormat = DatasetConfig.DatasetFormat.getFormat( - datasetFormatNode.getOptionalString(DatasetDeclParametersUtil.DATASET_FORMAT_FORMAT_PARAMETER_NAME)); + + AdmObjectNode datasetFormatNode = (AdmObjectNode) withObjectNode + .getOrDefault(DatasetDeclParametersUtil.DATASET_FORMAT_PARAMETER_NAME, AdmObjectNode.EMPTY); + DatasetConfig.DatasetFormat datasetFormat = DatasetConfig.DatasetFormat.getFormat(datasetFormatNode + .getOptionalString(DatasetDeclParametersUtil.DATASET_FORMAT_FORMAT_PARAMETER_NAME, defaultFormat)); + + if (datasetFormat == DatasetConfig.DatasetFormat.ROW) { + return DatasetFormatInfo.SYSTEM_DEFAULT; + } + int maxTupleCount = datasetFormatNode.getOptionalInt( DatasetDeclParametersUtil.DATASET_FORMAT_MAX_TUPLE_COUNT_PARAMETER_NAME, defaultMaxTupleCount); float freeSpaceTolerance = datasetFormatNode.getOptionalFloat( diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/dataset/DatasetFormatInfo.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/dataset/DatasetFormatInfo.java index 38951a4e2b..86d923337e 100644 --- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/dataset/DatasetFormatInfo.java +++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/dataset/DatasetFormatInfo.java @@ -20,11 +20,15 @@ package org.apache.asterix.metadata.dataset; import java.io.Serializable; +import org.apache.asterix.common.config.DatasetConfig; import org.apache.asterix.common.config.DatasetConfig.DatasetFormat; public class DatasetFormatInfo implements Serializable { private static final long serialVersionUID = 7656132322813253435L; - public static final DatasetFormatInfo DEFAULT = new DatasetFormatInfo(); + /** + * System's default format for non-{@link DatasetConfig.DatasetType#INTERNAL} datasets + */ + public static final DatasetFormatInfo SYSTEM_DEFAULT = new DatasetFormatInfo(); private final DatasetFormat format; private final int maxTupleCount; private final float freeSpaceTolerance; diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Dataset.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Dataset.java index a35be40f9c..c0f2dddff4 100644 --- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Dataset.java +++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Dataset.java @@ -164,7 +164,7 @@ public class Dataset implements IMetadataEntity<Dataset>, IDataset { DatasetType datasetType, int datasetId, int pendingOp) { this(dataverseName, datasetName, recordTypeDataverseName, recordTypeName, /*metaTypeDataverseName*/null, /*metaTypeName*/null, nodeGroupName, compactionPolicy, compactionPolicyProperties, datasetDetails, - hints, datasetType, datasetId, pendingOp, CompressionManager.NONE, DatasetFormatInfo.DEFAULT); + hints, datasetType, datasetId, pendingOp, CompressionManager.NONE, DatasetFormatInfo.SYSTEM_DEFAULT); } public Dataset(DataverseName dataverseName, String datasetName, DataverseName itemTypeDataverseName, diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslator.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslator.java index eafa331dcb..790faa5f0c 100644 --- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslator.java +++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslator.java @@ -433,7 +433,7 @@ public class DatasetTupleTranslator extends AbstractTupleTranslator<Dataset> { int datasetFormatIndex = datasetType.getFieldIndex(MetadataRecordTypes.DATASET_ARECORD_DATASET_FORMAT_FIELD_NAME); if (datasetFormatIndex < 0) { - return DatasetFormatInfo.DEFAULT; + return DatasetFormatInfo.SYSTEM_DEFAULT; } ARecordType datasetFormatType = (ARecordType) datasetType.getFieldTypes()[datasetFormatIndex]; @@ -676,7 +676,7 @@ public class DatasetTupleTranslator extends AbstractTupleTranslator<Dataset> { private void writeDatasetFormatInfo(Dataset dataset) throws HyracksDataException { DatasetFormatInfo info = dataset.getDatasetFormatInfo(); - if (DatasetFormatInfo.DEFAULT == info) { + if (DatasetFormatInfo.SYSTEM_DEFAULT == info) { return; } diff --git a/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslatorTest.java b/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslatorTest.java index 292ff16d4d..b6f9df766d 100644 --- a/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslatorTest.java +++ b/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslatorTest.java @@ -57,7 +57,7 @@ public class DatasetTupleTranslatorTest { DataverseName.createSinglePartName("foo"), "LogType", DataverseName.createSinglePartName("CB"), "MetaType", "DEFAULT_NG_ALL_NODES", "prefix", compactionPolicyProperties, details, Collections.emptyMap(), DatasetType.INTERNAL, 115, 0, CompressionManager.NONE, - DatasetFormatInfo.DEFAULT); + DatasetFormatInfo.SYSTEM_DEFAULT); DatasetTupleTranslator dtTranslator = new DatasetTupleTranslator(true); ITupleReference tuple = dtTranslator.getTupleFromMetadataEntity(dataset); Dataset deserializedDataset = dtTranslator.getMetadataEntityFromTuple(tuple); diff --git a/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/IndexTupleTranslatorTest.java b/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/IndexTupleTranslatorTest.java index 77c64d736c..9f5447876f 100644 --- a/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/IndexTupleTranslatorTest.java +++ b/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/IndexTupleTranslatorTest.java @@ -70,7 +70,7 @@ public class IndexTupleTranslatorTest { DataverseName dvCB = DataverseName.createSinglePartName("CB"); Dataset dataset = new Dataset(dvTest, "d1", dvFoo, "LogType", dvCB, "MetaType", "DEFAULT_NG_ALL_NODES", "prefix", compactionPolicyProperties, details, Collections.emptyMap(), DatasetType.INTERNAL, 115, 0, - CompressionManager.NONE, DatasetFormatInfo.DEFAULT); + CompressionManager.NONE, DatasetFormatInfo.SYSTEM_DEFAULT); Index index = new Index(dvTest, "d1", "i1", IndexType.BTREE, Collections.singletonList(Collections.singletonList("row_id")), diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/object/base/AdmObjectNode.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/object/base/AdmObjectNode.java index 966b9bae88..bcabb18dd3 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/object/base/AdmObjectNode.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/object/base/AdmObjectNode.java @@ -68,6 +68,15 @@ public class AdmObjectNode implements IAdmNode { return children.get(fieldName); } + public IAdmNode getOrDefault(String fieldName, IAdmNode defaultValue) { + IAdmNode node = get(fieldName); + return node != null ? node : defaultValue; + } + + public IAdmNode getOrEmpty(String fieldName) { + return children.get(fieldName); + } + public Set<String> getFieldNames() { return children.keySet(); } @@ -162,6 +171,11 @@ public class AdmObjectNode implements IAdmNode { return ((AdmStringNode) node).get(); } + public String getOptionalString(String field, String defaultValue) { + String value = getOptionalString(field); + return value != null ? value : defaultValue; + } + public int getOptionalInt(String field, int defaultValue) { final IAdmNode node = get(field); if (node == null) {