This is an automated email from the ASF dual-hosted git repository. htowaileb pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
The following commit(s) were added to refs/heads/master by this push: new 86c153a43c [ASTERIXDB-3235][EXT]: Refactor external prefix + add filter evaluate exprssion (P1) 86c153a43c is described below commit 86c153a43cb557978ed567ec41b7db30b213fcb8 Author: Hussain Towaileb <hussain.towai...@couchbase.com> AuthorDate: Fri Aug 4 20:44:03 2023 +0300 [ASTERIXDB-3235][EXT]: Refactor external prefix + add filter evaluate exprssion (P1) Change-Id: I2b91d29a2241218baea439042a35a1e6d19ad5e2 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17697 Integration-Tests: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Tested-by: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Reviewed-by: Hussain Towaileb <hussai...@gmail.com> Reviewed-by: Wail Alkowaileet <wael....@gmail.com> --- .../external_dataset/PrefixComputedFieldsTest.java | 58 +++--- .../common/external/IExternalFilterEvaluator.java | 6 +- .../external/IExternalFilterEvaluatorFactory.java | 4 +- .../external/NoOpExternalFilterEvaluator.java | 6 +- .../NoOpExternalFilterEvaluatorFactory.java | 4 +- .../record/reader/aws/AwsS3InputStreamFactory.java | 42 +--- .../asterix/external/util/ExternalDataPrefix.java | 227 ++++++++++----------- .../asterix/external/util/ExternalDataUtils.java | 9 + 8 files changed, 161 insertions(+), 195 deletions(-) diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/PrefixComputedFieldsTest.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/PrefixComputedFieldsTest.java index b2c405a9e0..2736969836 100644 --- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/PrefixComputedFieldsTest.java +++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/PrefixComputedFieldsTest.java @@ -34,14 +34,14 @@ public class PrefixComputedFieldsTest extends TestCase { @Test public void test() throws Exception { - ExternalDataPrefix prefix = new ExternalDataPrefix(null); + ExternalDataPrefix prefix = new ExternalDataPrefix(Collections.emptyMap()); assertEquals("", prefix.getOriginal()); assertEquals("", prefix.getRoot()); assertFalse(prefix.isEndsWithSlash()); assertEquals(Collections.emptyList(), prefix.getSegments()); - assertEquals(Collections.emptyList(), prefix.getComputedFieldDetails().getComputedFieldNames()); - assertEquals(Collections.emptyList(), prefix.getComputedFieldDetails().getComputedFieldTypes()); - assertEquals(Collections.emptyList(), prefix.getComputedFieldDetails().getComputedFieldIndexes()); + assertEquals(Collections.emptyList(), prefix.getComputedFieldNames()); + assertEquals(Collections.emptyList(), prefix.getComputedFieldTypes()); + assertEquals(Collections.emptyList(), prefix.getComputedFieldSegmentIndexes()); String prefix1 = ""; prefix = new ExternalDataPrefix(prefix1); @@ -49,9 +49,9 @@ public class PrefixComputedFieldsTest extends TestCase { assertEquals("", prefix.getRoot()); assertFalse(prefix.isEndsWithSlash()); assertEquals(Collections.emptyList(), prefix.getSegments()); - assertEquals(Collections.emptyList(), prefix.getComputedFieldDetails().getComputedFieldNames()); - assertEquals(Collections.emptyList(), prefix.getComputedFieldDetails().getComputedFieldTypes()); - assertEquals(Collections.emptyList(), prefix.getComputedFieldDetails().getComputedFieldIndexes()); + assertEquals(Collections.emptyList(), prefix.getComputedFieldNames()); + assertEquals(Collections.emptyList(), prefix.getComputedFieldTypes()); + assertEquals(Collections.emptyList(), prefix.getComputedFieldSegmentIndexes()); String prefix2 = "hotel"; prefix = new ExternalDataPrefix(prefix2); @@ -59,9 +59,9 @@ public class PrefixComputedFieldsTest extends TestCase { assertEquals("hotel", prefix.getRoot()); assertFalse(prefix.isEndsWithSlash()); assertEquals(List.of("hotel"), prefix.getSegments()); - assertEquals(Collections.emptyList(), prefix.getComputedFieldDetails().getComputedFieldNames()); - assertEquals(Collections.emptyList(), prefix.getComputedFieldDetails().getComputedFieldTypes()); - assertEquals(Collections.emptyList(), prefix.getComputedFieldDetails().getComputedFieldIndexes()); + assertEquals(Collections.emptyList(), prefix.getComputedFieldNames()); + assertEquals(Collections.emptyList(), prefix.getComputedFieldTypes()); + assertEquals(Collections.emptyList(), prefix.getComputedFieldSegmentIndexes()); String prefix3 = "hotel/{hotel-id:inT}/"; prefix = new ExternalDataPrefix(prefix3); @@ -69,9 +69,9 @@ public class PrefixComputedFieldsTest extends TestCase { assertEquals("hotel/", prefix.getRoot()); assertTrue(prefix.isEndsWithSlash()); assertEquals(List.of("hotel", "{hotel-id:inT}"), prefix.getSegments()); - assertEquals(List.of(List.of("hotel-id")), prefix.getComputedFieldDetails().getComputedFieldNames()); - assertEquals(List.of(AINT32), prefix.getComputedFieldDetails().getComputedFieldTypes()); - assertEquals(List.of(1), prefix.getComputedFieldDetails().getComputedFieldIndexes()); + assertEquals(List.of("hotel-id"), prefix.getComputedFieldNames()); + assertEquals(List.of(AINT32), prefix.getComputedFieldTypes()); + assertEquals(List.of(1), prefix.getComputedFieldSegmentIndexes()); String prefix4 = "hotel/{hotel-id:int}-{hotel-name:sTRing}"; prefix = new ExternalDataPrefix(prefix4); @@ -79,10 +79,9 @@ public class PrefixComputedFieldsTest extends TestCase { assertEquals("hotel", prefix.getRoot()); assertFalse(prefix.isEndsWithSlash()); assertEquals(List.of("hotel", "{hotel-id:int}-{hotel-name:sTRing}"), prefix.getSegments()); - assertEquals(List.of(List.of("hotel-id"), List.of("hotel-name")), - prefix.getComputedFieldDetails().getComputedFieldNames()); - assertEquals(List.of(AINT32, ASTRING), prefix.getComputedFieldDetails().getComputedFieldTypes()); - assertEquals(List.of(1, 1), prefix.getComputedFieldDetails().getComputedFieldIndexes()); + assertEquals(List.of("hotel-id", "hotel-name"), prefix.getComputedFieldNames()); + assertEquals(List.of(AINT32, ASTRING), prefix.getComputedFieldTypes()); + assertEquals(List.of(1, 1), prefix.getComputedFieldSegmentIndexes()); String prefix5 = "hotel/something/{hotel-id:int}-{hotel-name:sTRing}/review/{year:int}-{month:int}-{day:int}/"; prefix = new ExternalDataPrefix(prefix5); @@ -92,12 +91,9 @@ public class PrefixComputedFieldsTest extends TestCase { assertTrue(prefix.isEndsWithSlash()); assertEquals(List.of("hotel", "something", "{hotel-id:int}-{hotel-name:sTRing}", "review", "{year:int}-{month:int}-{day:int}"), prefix.getSegments()); - assertEquals( - List.of(List.of("hotel-id"), List.of("hotel-name"), List.of("year"), List.of("month"), List.of("day")), - prefix.getComputedFieldDetails().getComputedFieldNames()); - assertEquals(List.of(AINT32, ASTRING, AINT32, AINT32, AINT32), - prefix.getComputedFieldDetails().getComputedFieldTypes()); - assertEquals(List.of(2, 2, 4, 4, 4), prefix.getComputedFieldDetails().getComputedFieldIndexes()); + assertEquals(List.of("hotel-id", "hotel-name", "year", "month", "day"), prefix.getComputedFieldNames()); + assertEquals(List.of(AINT32, ASTRING, AINT32, AINT32, AINT32), prefix.getComputedFieldTypes()); + assertEquals(List.of(2, 2, 4, 4, 4), prefix.getComputedFieldSegmentIndexes()); String prefix6 = "hotel/something/{hotel-id:int}-{hotel-name:sTRing}/review/{year:int}/{month:int}/{day:int}"; prefix = new ExternalDataPrefix(prefix6); @@ -107,21 +103,17 @@ public class PrefixComputedFieldsTest extends TestCase { assertFalse(prefix.isEndsWithSlash()); assertEquals(List.of("hotel", "something", "{hotel-id:int}-{hotel-name:sTRing}", "review", "{year:int}", "{month:int}", "{day:int}"), prefix.getSegments()); - assertEquals( - List.of(List.of("hotel-id"), List.of("hotel-name"), List.of("year"), List.of("month"), List.of("day")), - prefix.getComputedFieldDetails().getComputedFieldNames()); - assertEquals(List.of(AINT32, ASTRING, AINT32, AINT32, AINT32), - prefix.getComputedFieldDetails().getComputedFieldTypes()); - assertEquals(List.of(2, 2, 4, 5, 6), prefix.getComputedFieldDetails().getComputedFieldIndexes()); + assertEquals(List.of("hotel-id", "hotel-name", "year", "month", "day"), prefix.getComputedFieldNames()); + assertEquals(List.of(AINT32, ASTRING, AINT32, AINT32, AINT32), prefix.getComputedFieldTypes()); + assertEquals(List.of(2, 2, 4, 5, 6), prefix.getComputedFieldSegmentIndexes()); String prefix7 = "hotel/{hotel.details.id:int}-{hotel-name:sTRing}"; prefix = new ExternalDataPrefix(prefix7); assertEquals("hotel/{hotel.details.id:int}-{hotel-name:sTRing}", prefix.getOriginal()); assertEquals("hotel", prefix.getRoot()); assertFalse(prefix.isEndsWithSlash()); - assertEquals(List.of(List.of("hotel", "details", "id"), List.of("hotel-name")), - prefix.getComputedFieldDetails().getComputedFieldNames()); - assertEquals(List.of(AINT32, ASTRING), prefix.getComputedFieldDetails().getComputedFieldTypes()); - assertEquals(List.of(1, 1), prefix.getComputedFieldDetails().getComputedFieldIndexes()); + assertEquals(List.of("hotel.details.id", "hotel-name"), prefix.getComputedFieldNames()); + assertEquals(List.of(AINT32, ASTRING), prefix.getComputedFieldTypes()); + assertEquals(List.of(1, 1), prefix.getComputedFieldSegmentIndexes()); } } diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/IExternalFilterEvaluator.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/IExternalFilterEvaluator.java index 22cd20a8bb..a169ecb2be 100644 --- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/IExternalFilterEvaluator.java +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/IExternalFilterEvaluator.java @@ -18,14 +18,14 @@ */ package org.apache.asterix.common.external; -import org.apache.hyracks.api.exceptions.HyracksDataException; +import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; public interface IExternalFilterEvaluator { boolean isEmpty(); boolean isComputedFieldUsed(int index); - void setValue(int index, String stringValue) throws HyracksDataException; + void setValue(int index, String stringValue) throws AlgebricksException; - boolean evaluate() throws HyracksDataException; + boolean evaluate() throws AlgebricksException; } diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/IExternalFilterEvaluatorFactory.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/IExternalFilterEvaluatorFactory.java index 38a38a69ae..c29e554f1f 100644 --- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/IExternalFilterEvaluatorFactory.java +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/IExternalFilterEvaluatorFactory.java @@ -20,11 +20,11 @@ package org.apache.asterix.common.external; import java.io.Serializable; +import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; import org.apache.hyracks.api.application.IServiceContext; -import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.api.exceptions.IWarningCollector; public interface IExternalFilterEvaluatorFactory extends Serializable { IExternalFilterEvaluator create(IServiceContext serviceContext, IWarningCollector warningCollector) - throws HyracksDataException; + throws AlgebricksException; } diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/NoOpExternalFilterEvaluator.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/NoOpExternalFilterEvaluator.java index 78ebeb4d90..e48ad80f67 100644 --- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/NoOpExternalFilterEvaluator.java +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/NoOpExternalFilterEvaluator.java @@ -18,8 +18,6 @@ */ package org.apache.asterix.common.external; -import org.apache.hyracks.api.exceptions.HyracksDataException; - class NoOpExternalFilterEvaluator implements IExternalFilterEvaluator { static final IExternalFilterEvaluator INSTANCE = new NoOpExternalFilterEvaluator(); @@ -37,12 +35,12 @@ class NoOpExternalFilterEvaluator implements IExternalFilterEvaluator { } @Override - public void setValue(int index, String stringValue) throws HyracksDataException { + public void setValue(int index, String stringValue) { throw new IndexOutOfBoundsException("Number of paths is 0"); } @Override - public boolean evaluate() throws HyracksDataException { + public boolean evaluate() { return true; } } diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/NoOpExternalFilterEvaluatorFactory.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/NoOpExternalFilterEvaluatorFactory.java index 4b5bebb282..7b8792eed9 100644 --- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/NoOpExternalFilterEvaluatorFactory.java +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/NoOpExternalFilterEvaluatorFactory.java @@ -19,7 +19,6 @@ package org.apache.asterix.common.external; import org.apache.hyracks.api.application.IServiceContext; -import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.api.exceptions.IWarningCollector; public class NoOpExternalFilterEvaluatorFactory implements IExternalFilterEvaluatorFactory { @@ -30,8 +29,7 @@ public class NoOpExternalFilterEvaluatorFactory implements IExternalFilterEvalua } @Override - public IExternalFilterEvaluator create(IServiceContext serviceContext, IWarningCollector warningCollector) - throws HyracksDataException { + public IExternalFilterEvaluator create(IServiceContext serviceContext, IWarningCollector warningCollector) { return NoOpExternalFilterEvaluator.INSTANCE; } } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java index 4fc63c6e9c..cfa1e46874 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java @@ -23,17 +23,14 @@ import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.PriorityQueue; -import java.util.function.Supplier; +import org.apache.asterix.common.external.IExternalFilterEvaluator; import org.apache.asterix.common.external.IExternalFilterEvaluatorFactory; import org.apache.asterix.external.api.AsterixInputStream; import org.apache.asterix.external.input.record.reader.abstracts.AbstractExternalInputStreamFactory; -import org.apache.asterix.external.util.ExternalDataConstants; import org.apache.asterix.external.util.ExternalDataPrefix; import org.apache.asterix.external.util.ExternalDataUtils; import org.apache.asterix.external.util.aws.s3.S3Utils; -import org.apache.asterix.om.types.ARecordType; -import org.apache.asterix.om.types.IAType; import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; import org.apache.hyracks.api.application.IServiceContext; import org.apache.hyracks.api.context.IHyracksTaskContext; @@ -61,51 +58,30 @@ public class AwsS3InputStreamFactory extends AbstractExternalInputStreamFactory IncludeExcludeMatcher includeExcludeMatcher = ExternalDataUtils.getIncludeExcludeMatchers(configuration); //Get a list of S3 objects - String prefix = configuration.get(ExternalDataConstants.DEFINITION_FIELD_NAME); - ExternalDataPrefix externalDataPrefix = new ExternalDataPrefix(prefix); + ExternalDataPrefix externalDataPrefix = new ExternalDataPrefix(configuration); configuration.put(ExternalDataPrefix.PREFIX_ROOT_FIELD_NAME, externalDataPrefix.getRoot()); // TODO(htowaileb): Since we're using the root to load the files then start filtering, it might end up being // very expensive since at the root of the prefix we might load millions of files, we should consider (when // possible) to get the value and add it List<S3Object> filesOnly = S3Utils.listS3Objects(configuration, includeExcludeMatcher, warningCollector); - - filesOnly = filterPrefixes(externalDataPrefix, filesOnly, () -> true); + filterPrefixes(externalDataPrefix, filesOnly, filterEvaluatorFactory.create(ctx, warningCollector)); // Distribute work load amongst the partitions distributeWorkLoad(filesOnly, getPartitionsCount()); } private List<S3Object> filterPrefixes(ExternalDataPrefix prefix, List<S3Object> filesOnly, - Supplier<Boolean> evaluator) { + IExternalFilterEvaluator evaluator) throws AlgebricksException { - // if no computed fields, return the original list - if (prefix.getComputedFieldDetails().isEmpty()) { + // if no computed fields or empty files list, return the original list + if (filesOnly.isEmpty() || !prefix.hasComputedFields()) { return filesOnly; } List<S3Object> filteredList = new ArrayList<>(); for (S3Object file : filesOnly) { - List<String> segments = ExternalDataPrefix.getPrefixSegments(file.key()); - boolean match = false; - - // if the object key has fewer segments than the expected prefix, then filter it out - // TODO(htowaileb): potentially also exclude if the size matches, key should be longer than prefix - if (segments.size() < prefix.getComputedFieldDetails().getComputedFieldNames().size()) { - continue; - } - - for (int i = 0; i < prefix.getComputedFieldDetails().getComputedFieldNames().size(); i++) { - int index = prefix.getComputedFieldDetails().getComputedFieldIndexes().get(i); - - // TODO(htowaileb): evaluator will container an expression that evaluates whether to include an object or not - match = evaluator.get(); - if (!match) { - break; - } - } - - if (match) { + if (prefix.evaluate(file.key(), evaluator)) { filteredList.add(file); } } @@ -113,10 +89,6 @@ public class AwsS3InputStreamFactory extends AbstractExternalInputStreamFactory return filteredList; } - private ARecordType createRecord(String[] fieldNames, IAType[] fieldTypes) { - return new ARecordType("root", fieldNames, fieldTypes, false); - } - /** * To efficiently utilize the parallelism, work load will be distributed amongst the partitions based on the file * size. diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataPrefix.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataPrefix.java index c3419c152d..97bf7767fa 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataPrefix.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataPrefix.java @@ -20,8 +20,8 @@ package org.apache.asterix.external.util; import static org.apache.asterix.external.util.ExternalDataConstants.COMPUTED_FIELD_PATTERN; +import static org.apache.asterix.external.util.ExternalDataConstants.DEFINITION_FIELD_NAME; import static org.apache.asterix.external.util.ExternalDataConstants.PREFIX_DEFAULT_DELIMITER; -import static org.apache.asterix.om.utils.ProjectionFiltrationTypeUtil.getRecordTypeWithFieldTypes; import java.util.ArrayList; import java.util.Arrays; @@ -35,22 +35,28 @@ import java.util.regex.Matcher; import org.apache.asterix.common.exceptions.CompilationException; import org.apache.asterix.common.exceptions.ErrorCode; +import org.apache.asterix.common.external.IExternalFilterEvaluator; import org.apache.asterix.om.types.ARecordType; import org.apache.asterix.om.types.ATypeTag; import org.apache.asterix.om.types.BuiltinType; import org.apache.asterix.om.types.BuiltinTypeMap; import org.apache.asterix.om.types.IAType; +import org.apache.asterix.om.utils.ProjectionFiltrationTypeUtil; import org.apache.commons.lang3.tuple.Pair; import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; public class ExternalDataPrefix { private final String original; - private final String root; + private String root; private final boolean endsWithSlash; - private final List<String> segments; - private final ComputedFieldDetails computedFieldDetails; + + private final List<String> computedFieldNames = new ArrayList<>(); + private final List<IAType> computedFieldTypes = new ArrayList<>(); + private final List<Integer> computedFieldSegmentIndexes = new ArrayList<>(); + private final List<ARecordType> paths = new ArrayList<>(); + private final Map<Integer, Pair<List<String>, List<IAType>>> computedFields = new HashMap<>(); public static final String PREFIX_ROOT_FIELD_NAME = "prefix-root"; public static final Set<ATypeTag> supportedTypes = new HashSet<>(); @@ -60,62 +66,83 @@ public class ExternalDataPrefix { supportedTypes.add(BuiltinType.AINT32.getTypeTag()); } + public ExternalDataPrefix(Map<String, String> configuration) throws AlgebricksException { + this(configuration.get(DEFINITION_FIELD_NAME)); + } + public ExternalDataPrefix(String prefix) throws AlgebricksException { this.original = prefix != null ? prefix : ""; this.endsWithSlash = this.original.endsWith("/"); - this.segments = getPrefixSegments(this.original); + segments = extractPrefixSegments(original); + extractComputedFields(); + extractRoot(); - computedFieldDetails = getComputedFields(segments); - this.root = getPrefixRoot(segments, computedFieldDetails.getComputedFieldIndexes()); + for (int i = 0; i < computedFieldSegmentIndexes.size(); i++) { + int segmentIndex = computedFieldSegmentIndexes.get(i); + + if (computedFields.containsKey(segmentIndex)) { + Pair<List<String>, List<IAType>> pair = computedFields.get(segmentIndex); + pair.getLeft().add(computedFieldNames.get(i)); + pair.getRight().add(computedFieldTypes.get(i)); + } else { + List<String> names = new ArrayList<>(); + List<IAType> types = new ArrayList<>(); + + names.add(computedFieldNames.get(i)); + types.add(computedFieldTypes.get(i)); + computedFields.put(segmentIndex, Pair.of(names, types)); + } + } } public String getOriginal() { return original; } + public boolean isEndsWithSlash() { + return endsWithSlash; + } + public String getRoot() { return root; } - public boolean isEndsWithSlash() { - return endsWithSlash; + public boolean hasComputedFields() { + return !computedFieldNames.isEmpty(); } public List<String> getSegments() { return segments; } - public ComputedFieldDetails getComputedFieldDetails() { - return computedFieldDetails; + public List<String> getComputedFieldNames() { + return computedFieldNames; + } + + public List<IAType> getComputedFieldTypes() { + return computedFieldTypes; + } + + public List<Integer> getComputedFieldSegmentIndexes() { + return computedFieldSegmentIndexes; + } + + public List<ARecordType> getPaths() { + return paths; } /** - * returns the segments of a prefix, separated by the delimiter - * - * @param prefix prefix - * @return an array of prefix segments + * extracts the segments of a prefix, separated by the delimiter */ - public static List<String> getPrefixSegments(String prefix) { + private List<String> extractPrefixSegments(String prefix) { return prefix.isEmpty() ? Collections.emptyList() : Arrays.asList(prefix.split(PREFIX_DEFAULT_DELIMITER)); } /** - * Extracts and returns the computed fields and their indexes from the provided prefix - * @param prefix prefix - * - * @return Pair of computed field names and their segment index in the prefix + * extracts and returns the computed fields and their indexes from the provided prefix */ - public static ComputedFieldDetails getComputedFields(String prefix) throws AlgebricksException { - List<String> segments = getPrefixSegments(prefix); - return getComputedFields(segments); - } - - public static ComputedFieldDetails getComputedFields(List<String> segments) throws AlgebricksException { - List<List<String>> computedFieldsNames = new ArrayList<>(); - List<IAType> computedFieldTypes = new ArrayList<>(); - List<Integer> computedFieldIndexes = new ArrayList<>(); - + private void extractComputedFields() throws AlgebricksException { // check if there are any segments before doing any testing if (!segments.isEmpty()) { // search for computed fields in each segment @@ -132,15 +159,38 @@ public class ExternalDataPrefix { IAType type = BuiltinTypeMap.getBuiltinType(typePart); validateSupported(type.getTypeTag()); - List<String> nameParts = List.of(namePart.split("\\.")); - computedFieldsNames.add(nameParts); + computedFieldNames.add(namePart); computedFieldTypes.add(type); - computedFieldIndexes.add(i); + computedFieldSegmentIndexes.add(i); + + List<String> nameParts = List.of(namePart.split("\\.")); + paths.add(ProjectionFiltrationTypeUtil.getPathRecordType(nameParts)); } } } + } + + /** + * Returns the longest static path (root) before encountering the first computed field + */ + private void extractRoot() { + StringBuilder builder = new StringBuilder(); + + // check if there are any computed fields before doing any testing + if (computedFieldNames.isEmpty()) { + root = original; + return; + } - return new ComputedFieldDetails(computedFieldsNames, computedFieldTypes, computedFieldIndexes); + // construct all static parts before encountering the first computed field + for (int i = 0; i < computedFieldSegmentIndexes.get(0); i++) { + builder.append(segments.get(i)).append("/"); + } + + // remove last "/" and append it only if needed + root = builder.toString(); + root = root.substring(0, root.length() - 1); + root = ExternalDataUtils.appendSlash(root, endsWithSlash); } /** @@ -149,103 +199,50 @@ public class ExternalDataPrefix { * @param type type to check * @throws CompilationException exception if type is not supported */ - private static void validateSupported(ATypeTag type) throws CompilationException { + private void validateSupported(ATypeTag type) throws CompilationException { if (!supportedTypes.contains(type)) { throw new CompilationException(ErrorCode.UNSUPPORTED_COMPUTED_FIELD_TYPE, type); } } /** - * Returns the longest static path (root) before encountering the first computed field + * Evaluates whether the provided key satisfies the conditions of the evaluator or not + * + * @param key ke + * @param evaluator evaluator * - * @param prefix prefix - * @return prefix root + * @return true if key satisfies the evaluator conditions, false otherwise */ - public String getPrefixRoot(String prefix) throws AlgebricksException { - List<String> prefixSegments = getPrefixSegments(prefix); - List<Integer> computedFieldIndexes = getComputedFields(prefix).getComputedFieldIndexes(); - return getPrefixRoot(prefixSegments, computedFieldIndexes); - } - - public String getPrefixRoot(List<String> prefixSegments, List<Integer> computedFieldIndexes) { - StringBuilder root = new StringBuilder(); - - // check if there are any computed fields before doing any testing - if (computedFieldIndexes.size() == 0) { - return this.original; - } - - // construct all static parts before encountering the first computed field - for (int i = 0; i < computedFieldIndexes.get(0); i++) { - root.append(prefixSegments.get(i)).append("/"); - } - - // remove last "/" and append it only if needed - String finalRoot = root.toString(); - finalRoot = finalRoot.substring(0, finalRoot.length() - 1); - return ExternalDataUtils.appendSlash(finalRoot, this.endsWithSlash); - } - - public static class ComputedFieldDetails { - private final List<List<String>> computedFieldNames; - private final List<IAType> computedFieldTypes; - private final List<Integer> computedFieldIndexes; - private final Map<Integer, Pair<List<List<String>>, List<IAType>>> computedFields = new HashMap<>(); - private final ARecordType recordType; - - public ComputedFieldDetails(List<List<String>> computedFieldNames, List<IAType> computedFieldTypes, - List<Integer> computedFieldIndexes) throws AlgebricksException { - this.computedFieldNames = computedFieldNames; - this.computedFieldTypes = computedFieldTypes; - this.computedFieldIndexes = computedFieldIndexes; - - this.recordType = getRecordTypeWithFieldTypes(computedFieldNames, computedFieldTypes); - - for (int i = 0; i < computedFieldIndexes.size(); i++) { - int index = computedFieldIndexes.get(i); - - if (computedFields.containsKey(index)) { - Pair<List<List<String>>, List<IAType>> pair = computedFields.get(index); - pair.getLeft().add(computedFieldNames.get(i)); - pair.getRight().add(computedFieldTypes.get(i)); - } else { - List<List<String>> names = new ArrayList<>(); - List<IAType> types = new ArrayList<>(); - - names.add(computedFieldNames.get(i)); - types.add(computedFieldTypes.get(i)); - computedFields.put(index, Pair.of(names, types)); - } - } - } - - public boolean isEmpty() { - return computedFieldNames.isEmpty(); - } + public boolean evaluate(String key, IExternalFilterEvaluator evaluator) throws AlgebricksException { + List<String> keySegments = extractPrefixSegments(key); - public List<List<String>> getComputedFieldNames() { - return computedFieldNames; + // segments of object key have to be larger than segments of the prefix + if (keySegments.size() <= segments.size()) { + return false; } - public List<IAType> getComputedFieldTypes() { - return computedFieldTypes; + // extract values for all compute fields and set them in the evaluator + List<String> values = extractValues(keySegments); + for (int i = 0; i < computedFieldNames.size(); i++) { + evaluator.setValue(i, values.get(i)); } - public List<Integer> getComputedFieldIndexes() { - return computedFieldIndexes; - } + return evaluator.evaluate(); + } - public ARecordType getRecordType() { - return recordType; - } + /** + * extracts the computed fields values from the object's key + * + * @param keySegments object's key segments + * @return list of computed field values + */ + private List<String> extractValues(List<String> keySegments) { + List<String> values = new ArrayList<>(); - public Map<Integer, Pair<List<List<String>>, List<IAType>>> getComputedFields() { - return computedFields; + for (Integer computedFieldSegmentIndex : computedFieldSegmentIndexes) { + values.add(keySegments.get(computedFieldSegmentIndex)); } - @Override - public String toString() { - return computedFields.toString(); - } + return values; } } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java index 02653f35d9..9d36b4aa4e 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java @@ -752,11 +752,20 @@ public class ExternalDataUtils { } public static String getPrefix(Map<String, String> configuration, boolean appendSlash) { + String root = configuration.get(ExternalDataPrefix.PREFIX_ROOT_FIELD_NAME); String definition = configuration.get(ExternalDataConstants.DEFINITION_FIELD_NAME); String subPath = configuration.get(ExternalDataConstants.SUBPATH); + boolean hasRoot = root != null && !root.isEmpty(); boolean hasDefinition = definition != null && !definition.isEmpty(); boolean hasSubPath = subPath != null && !subPath.isEmpty(); + + // if computed fields are used, subpath will not take effect. we can tell if we're using a computed field or + // not by checking if the root matches the definition or not, they never match if computed fields are used + if (hasRoot && hasDefinition && !root.equals(definition)) { + return appendSlash(root, appendSlash); + } + if (hasDefinition && !hasSubPath) { return appendSlash(definition, appendSlash); }