This is an automated email from the ASF dual-hosted git repository. alsuliman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit 1edc94dd592216f37eeabbf820a0bdd2cf02b15b Author: Vijay Sarathy <[email protected]> AuthorDate: Wed Mar 1 16:08:04 2023 -0800 [ASTERIXDB-3120][ASTERIXDB-3121] CBO Analyze: improve error message. - user model changes: no - storage format changes: no - interface changes: no Details: Improve error message if seed value is invalid. Improve error message if sample value is invalid. Change-Id: I599005f4f4814f719e422da4ff7d58c2253e5ed4 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17404 Integration-Tests: Jenkins <[email protected]> Reviewed-by: Vijay Sarathy <[email protected]> Reviewed-by: Ali Alsuliman <[email protected]> Tested-by: Jenkins <[email protected]> --- .../analyze-dataset-1.20.ddl.sqlpp | 5 +-- .../analyze-dataset-1.21.query.sqlpp | 9 +++-- .../analyze-dataset-1.22.ddl.sqlpp | 6 +++- .../analyze-dataset-1.23.query.sqlpp | 2 +- .../analyze-dataset-1.24.ddl.sqlpp | 6 +--- .../analyze-dataset-1.25.query.sqlpp | 2 +- ...24.ddl.sqlpp => analyze-dataset-1.26.ddl.sqlpp} | 0 ...uery.sqlpp => analyze-dataset-1.27.query.sqlpp} | 0 .../ddl/analyze-dataset-1/analyze-dataset-1.21.adm | 2 +- .../ddl/analyze-dataset-1/analyze-dataset-1.23.adm | 2 +- .../ddl/analyze-dataset-1/analyze-dataset-1.25.adm | 2 +- ...e-dataset-1.21.adm => analyze-dataset-1.27.adm} | 0 .../asterix/common/exceptions/ErrorCode.java | 3 ++ .../src/main/resources/asx_errormsg/en.properties | 4 ++- .../lang/common/statement/AnalyzeStatement.java | 40 ++++++++++++++++------ .../asterix/lang/common/util/ExpressionUtils.java | 22 ++++++++++++ 16 files changed, 77 insertions(+), 28 deletions(-) diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.20.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.20.ddl.sqlpp index 3190b2dacb..3dfad77bc8 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.20.ddl.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.20.ddl.sqlpp @@ -18,9 +18,10 @@ */ /* - * Description: test that the sample index is dropped using "analyze dataset drop statistics" statement + * Description: analyze dataset with sample=high + * Note, there are more tuples in the dataset that the target sample size */ use test; -analyze dataset ds1 drop statistics; +analyze dataset ds1 with { "sample": "high", "sample-seed": -10 }; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.21.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.21.query.sqlpp index 759fc3f417..9e7988cd11 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.21.query.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.21.query.sqlpp @@ -18,10 +18,13 @@ */ /* - * Description: check that the sample was dropped + * Description: check that the sample was re-created */ +set `import-private-functions` `true`; + use test; -select count(*) cnt -from listMetadata(true, false) v; +select * from + listMetadata(false, false) metadata, + showSampleStats("ds1", "sample_idx_2_ds1", false) stats; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.22.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.22.ddl.sqlpp index 7d6bf92041..3190b2dacb 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.22.ddl.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.22.ddl.sqlpp @@ -17,6 +17,10 @@ * under the License. */ +/* + * Description: test that the sample index is dropped using "analyze dataset drop statistics" statement + */ + use test; -analyze dataset ds1; +analyze dataset ds1 drop statistics; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.23.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.23.query.sqlpp index a593df39ee..759fc3f417 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.23.query.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.23.query.sqlpp @@ -18,7 +18,7 @@ */ /* - * Description: check that the sample was re-created again + * Description: check that the sample was dropped */ use test; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.24.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.24.ddl.sqlpp index 151309ad0b..7d6bf92041 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.24.ddl.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.24.ddl.sqlpp @@ -17,10 +17,6 @@ * under the License. */ -/* - * Description: test that the sample index is dropped when its source dataset is dropped - */ - use test; -drop dataset ds1; \ No newline at end of file +analyze dataset ds1; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.25.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.25.query.sqlpp index 759fc3f417..a593df39ee 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.25.query.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.25.query.sqlpp @@ -18,7 +18,7 @@ */ /* - * Description: check that the sample was dropped + * Description: check that the sample was re-created again */ use test; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.24.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.26.ddl.sqlpp similarity index 100% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.24.ddl.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.26.ddl.sqlpp diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.21.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.27.query.sqlpp similarity index 100% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.21.query.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.27.query.sqlpp diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.21.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.21.adm index bacb60c0e2..49f9285cec 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.21.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.21.adm @@ -1 +1 @@ -{ "cnt": 0 } \ No newline at end of file +{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_2_ds1", "SampleCardinalityTarget": 17008, "SourceCardinality": 17100, "SourceAvgItemSize": true, "SampleSeed": true }, "stats": { "cnt": 16972, "min_pk": true, "max_pk": true, "min_x": true, "max_x": true } } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.23.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.23.adm index f86e66b707..bacb60c0e2 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.23.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.23.adm @@ -1 +1 @@ -{ "cnt": 1 } \ No newline at end of file +{ "cnt": 0 } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.25.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.25.adm index bacb60c0e2..f86e66b707 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.25.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.25.adm @@ -1 +1 @@ -{ "cnt": 0 } \ No newline at end of file +{ "cnt": 1 } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.21.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.27.adm similarity index 100% copy from asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.21.adm copy to asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.27.adm diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java index 44471a299d..b0826e81f5 100644 --- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java @@ -267,6 +267,9 @@ public enum ErrorCode implements IError { INVALID_TIMEZONE(1172), INVALID_PARAM_VALUE_ALLOWED_VALUE(1173), SAMPLE_HAS_ZERO_ROWS(1174), + INVALID_SAMPLE_SIZE(1175), + OUT_OF_RANGE_SAMPLE_SIZE(1176), + INVALID_SAMPLE_SEED(1177), // Feed errors DATAFLOW_ILLEGAL_STATE(3001), diff --git a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties index c9ab080646..0bf523a82d 100644 --- a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties +++ b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties @@ -269,7 +269,9 @@ 1172 = Provided timezone is invalid: '%1$s' 1173 = Invalid value for parameter '%1$s', allowed value(s): %2$s 1174 = Sample has zero rows - +1175 = Sample size options are "low", "medium", "high", or a number +1176 = Sample size has to be between %1$s and %2$s +1177 = Sample seed has to be a number or a string convertible to a number # Feed Errors 3001 = Illegal state. 3002 = Tuple is too large for a frame diff --git a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/AnalyzeStatement.java b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/AnalyzeStatement.java index cbf2c071fe..a719c341cf 100644 --- a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/AnalyzeStatement.java +++ b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/AnalyzeStatement.java @@ -19,14 +19,18 @@ package org.apache.asterix.lang.common.statement; +import java.util.List; import java.util.Locale; import org.apache.asterix.common.exceptions.CompilationException; import org.apache.asterix.common.exceptions.ErrorCode; import org.apache.asterix.common.metadata.DataverseName; import org.apache.asterix.lang.common.base.AbstractStatement; +import org.apache.asterix.lang.common.base.Expression; +import org.apache.asterix.lang.common.expression.FieldBinding; import org.apache.asterix.lang.common.expression.RecordConstructor; import org.apache.asterix.lang.common.util.ExpressionUtils; +import org.apache.asterix.lang.common.util.LangRecordParseUtil; import org.apache.asterix.lang.common.visitor.base.ILangVisitor; import org.apache.asterix.object.base.AdmBigIntNode; import org.apache.asterix.object.base.AdmDoubleNode; @@ -57,20 +61,32 @@ public class AnalyzeStatement extends AbstractStatement { throws CompilationException { this.dataverseName = dataverseName; this.datasetName = datasetName; - this.options = options == null ? null : validateOptions(ExpressionUtils.toNode(options)); + this.options = options == null ? null : validateOptions(options); } - private static AdmObjectNode validateOptions(AdmObjectNode options) throws CompilationException { - for (String fieldName : options.getFieldNames()) { - switch (fieldName) { + private static AdmObjectNode validateOptions(RecordConstructor options) throws CompilationException { + final List<FieldBinding> fbList = options.getFbList(); + for (int i = 0; i < fbList.size(); i++) { + FieldBinding binding = fbList.get(i); + String key = LangRecordParseUtil.exprToStringLiteral(binding.getLeftExpr()).getStringValue(); + Expression value = binding.getRightExpr(); + switch (key) { case SAMPLE_FIELD_NAME: + if (value.getKind() != Expression.Kind.LITERAL_EXPRESSION) { + throw new CompilationException(ErrorCode.INVALID_SAMPLE_SIZE); + } + break; case SAMPLE_SEED_FIELD_NAME: + if (value.getKind() != Expression.Kind.LITERAL_EXPRESSION + && value.getKind() != Expression.Kind.UNARY_EXPRESSION) { + throw new CompilationException(ErrorCode.INVALID_SAMPLE_SEED); + } break; default: - throw new CompilationException(ErrorCode.INVALID_PARAM, fieldName); + throw new CompilationException(ErrorCode.INVALID_PARAM, key); } } - return options; + return (ExpressionUtils.toNode(options)); } @Override @@ -102,18 +118,20 @@ public class AnalyzeStatement extends AbstractStatement { case SAMPLE_HIGH: return SAMPLE_HIGH_SIZE; default: - throw new CompilationException(ErrorCode.INVALID_PROPERTY_FORMAT, SAMPLE_FIELD_NAME); + throw new CompilationException(ErrorCode.INVALID_SAMPLE_SIZE); } case BIGINT: int v = (int) ((AdmBigIntNode) n).get(); if (!isValidSampleSize(v)) { - throw new CompilationException(ErrorCode.INVALID_PROPERTY_FORMAT, SAMPLE_FIELD_NAME); + throw new CompilationException(ErrorCode.OUT_OF_RANGE_SAMPLE_SIZE, SAMPLE_LOW_SIZE, + SAMPLE_HIGH_SIZE); } return v; case DOUBLE: v = (int) ((AdmDoubleNode) n).get(); if (!isValidSampleSize(v)) { - throw new CompilationException(ErrorCode.INVALID_PROPERTY_FORMAT, SAMPLE_FIELD_NAME); + throw new CompilationException(ErrorCode.OUT_OF_RANGE_SAMPLE_SIZE, SAMPLE_LOW_SIZE, + SAMPLE_HIGH_SIZE); } return v; default: @@ -138,7 +156,7 @@ public class AnalyzeStatement extends AbstractStatement { try { return Long.parseLong(s); } catch (NumberFormatException e) { - throw new CompilationException(ErrorCode.INVALID_PROPERTY_FORMAT, SAMPLE_SEED_FIELD_NAME); + throw new CompilationException(ErrorCode.INVALID_SAMPLE_SEED); } default: throw new CompilationException(ErrorCode.WITH_FIELD_MUST_BE_OF_TYPE, SAMPLE_SEED_FIELD_NAME, @@ -167,4 +185,4 @@ public class AnalyzeStatement extends AbstractStatement { public byte getCategory() { return Category.DDL; } -} \ No newline at end of file +} diff --git a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/ExpressionUtils.java b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/ExpressionUtils.java index cab1aca459..5c91aef7d0 100644 --- a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/ExpressionUtils.java +++ b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/ExpressionUtils.java @@ -42,6 +42,7 @@ import org.apache.asterix.lang.common.expression.FieldBinding; import org.apache.asterix.lang.common.expression.ListConstructor; import org.apache.asterix.lang.common.expression.LiteralExpr; import org.apache.asterix.lang.common.expression.RecordConstructor; +import org.apache.asterix.lang.common.expression.UnaryExpr; import org.apache.asterix.lang.common.literal.DoubleLiteral; import org.apache.asterix.lang.common.literal.FloatLiteral; import org.apache.asterix.lang.common.literal.IntegerLiteral; @@ -50,6 +51,7 @@ import org.apache.asterix.lang.common.literal.StringLiteral; import org.apache.asterix.lang.common.statement.FunctionDecl; import org.apache.asterix.lang.common.statement.Query; import org.apache.asterix.lang.common.statement.ViewDecl; +import org.apache.asterix.lang.common.struct.UnaryExprType; import org.apache.asterix.lang.common.visitor.GatherFunctionCallsVisitor; import org.apache.asterix.object.base.AdmArrayNode; import org.apache.asterix.object.base.AdmBigIntNode; @@ -80,6 +82,26 @@ public class ExpressionUtils { return toNode((LiteralExpr) expr); case RECORD_CONSTRUCTOR_EXPRESSION: return toNode((RecordConstructor) expr); + case UNARY_EXPRESSION: + UnaryExpr unaryExpr = (UnaryExpr) expr; + UnaryExprType unaryExprType = unaryExpr.getExprType(); + if (unaryExprType == UnaryExprType.POSITIVE || unaryExprType == UnaryExprType.NEGATIVE) { + Expression uexpr = unaryExpr.getExpr(); + if (uexpr.getKind() == Expression.Kind.LITERAL_EXPRESSION) { + if (unaryExprType == UnaryExprType.POSITIVE) { + return toNode(uexpr); + } else { + Literal lit = ((LiteralExpr) uexpr).getValue(); + return toNode(new LiteralExpr(reverseSign(lit))); + } + } else { + throw new CompilationException(ErrorCode.LITERAL_TYPE_NOT_SUPPORTED_IN_CONSTANT_RECORD, + uexpr.getKind()); + } + } else { + throw new CompilationException(ErrorCode.EXPRESSION_NOT_SUPPORTED_IN_CONSTANT_RECORD, + unaryExprType); + } default: throw new CompilationException(ErrorCode.EXPRESSION_NOT_SUPPORTED_IN_CONSTANT_RECORD, expr.getKind()); }
