This is an automated email from the ASF dual-hosted git repository. mblow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit fba0449ab02c7df056e00efb1bfdf04bf319ef76 Author: Dmitry Lychagin <[email protected]> AuthorDate: Fri Jun 17 18:18:52 2022 -0700 [NO ISSUE][COMP] Add sample-seed parameter to ANALYZE DATASET - user model changes: no - storage format changes: no - interface changes: no Details: - Add sample-seed parameter to ANALYZE DATASET statement - Update testcases Change-Id: I78429541bf7d720cc73dc674dd532f7a1f066a24 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/16584 Contrib: Jenkins <[email protected]> Reviewed-by: Ali Alsuliman <[email protected]> Integration-Tests: Jenkins <[email protected]> Tested-by: Jenkins <[email protected]> Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17327 Reviewed-by: Michael Blow <[email protected]> Tested-by: Michael Blow <[email protected]> --- .../asterix/app/translator/QueryTranslator.java | 8 ++-- .../analyze-dataset-1.1.ddl.sqlpp | 5 ++- .../analyze-dataset-1.10.ddl.sqlpp | 2 +- .../analyze-dataset-1.11.query.sqlpp | 2 +- .../analyze-dataset-1.14.query.sqlpp | 2 +- .../analyze-dataset-1.15.ddl.sqlpp | 2 +- .../analyze-dataset-1.16.query.sqlpp | 2 +- .../analyze-dataset-1.19.query.sqlpp | 2 +- .../analyze-dataset-1.2.query.sqlpp | 2 +- .../analyze-dataset-1.21.query.sqlpp | 2 +- .../analyze-dataset-1.4.ddl.sqlpp | 2 +- .../analyze-dataset-1.5.query.sqlpp | 2 +- .../analyze-dataset-1.7.query.sqlpp | 2 +- .../analyze-dataset-1.9.query.sqlpp | 2 +- .../ddl/analyze-dataset-1/analyze-dataset-1.11.adm | 2 +- .../ddl/analyze-dataset-1/analyze-dataset-1.14.adm | 2 +- .../ddl/analyze-dataset-1/analyze-dataset-1.16.adm | 2 +- .../ddl/analyze-dataset-1/analyze-dataset-1.19.adm | 2 +- .../ddl/analyze-dataset-1/analyze-dataset-1.2.adm | 2 +- .../ddl/analyze-dataset-1/analyze-dataset-1.5.adm | 2 +- .../ddl/analyze-dataset-1/analyze-dataset-1.7.adm | 2 +- .../ddl/analyze-dataset-1/analyze-dataset-1.9.adm | 2 +- .../lang/common/statement/AnalyzeStatement.java | 46 +++++++++++++++++++++- .../apache/asterix/metadata/entities/Index.java | 11 +++++- .../IndexTupleTranslator.java | 16 +++++++- .../metadata/utils/SampleOperationsHelper.java | 3 +- .../SampleSlotRunningAggregateFunctionFactory.java | 9 +++-- 27 files changed, 105 insertions(+), 33 deletions(-) diff --git a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java index 71f59cf753..5b60cc050c 100644 --- a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java +++ b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java @@ -4271,9 +4271,11 @@ public class QueryTranslator extends AbstractLangTranslator implements IStatemen InternalDatasetDetails dsDetails = (InternalDatasetDetails) ds.getDatasetDetails(); int sampleCardinalityTarget = stmtAnalyze.getSampleSize(); + long sampleSeed = stmtAnalyze.getOrCreateSampleSeed(); - Index.SampleIndexDetails newIndexDetailsPendingAdd = new Index.SampleIndexDetails(dsDetails.getPrimaryKey(), - dsDetails.getKeySourceIndicator(), dsDetails.getPrimaryKeyType(), sampleCardinalityTarget, 0, 0); + Index.SampleIndexDetails newIndexDetailsPendingAdd = + new Index.SampleIndexDetails(dsDetails.getPrimaryKey(), dsDetails.getKeySourceIndicator(), + dsDetails.getPrimaryKeyType(), sampleCardinalityTarget, 0, 0, sampleSeed); newIndexPendingAdd = new Index(dataverseName, datasetName, newIndexName, sampleIndexType, newIndexDetailsPendingAdd, false, false, MetadataUtil.PENDING_ADD_OP); @@ -4315,7 +4317,7 @@ public class QueryTranslator extends AbstractLangTranslator implements IStatemen Index.SampleIndexDetails newIndexDetailsFinal = new Index.SampleIndexDetails(dsDetails.getPrimaryKey(), dsDetails.getKeySourceIndicator(), dsDetails.getPrimaryKeyType(), sampleCardinalityTarget, - stats.getCardinality(), stats.getAvgTupleSize()); + stats.getCardinality(), stats.getAvgTupleSize(), sampleSeed); Index newIndexFinal = new Index(dataverseName, datasetName, newIndexName, sampleIndexType, newIndexDetailsFinal, false, false, MetadataUtil.PENDING_NO_OP); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.1.ddl.sqlpp index 50daffd58e..e1d6b1020e 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.1.ddl.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.1.ddl.sqlpp @@ -27,9 +27,10 @@ drop dataverse test if exists; create dataverse test; use test; -create function listMetadata(showSourceAvgItemSize) { +create function listMetadata(showSourceAvgItemSize, showSeed) { select i.DatasetName, i.IndexName, i.SampleCardinalityTarget, i.SourceCardinality, - case when showSourceAvgItemSize then i.SourceAvgItemSize else i.SourceAvgItemSize > 0 end as SourceAvgItemSize + case when showSourceAvgItemSize then i.SourceAvgItemSize else i.SourceAvgItemSize > 0 end as SourceAvgItemSize, + case when showSeed then i.SampleSeed else i.SampleSeed is known end as SampleSeed from Metadata.`Index` i where i.DataverseName = "test" and i.IndexName like "sample_idx%" order by i.IndexName diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.10.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.10.ddl.sqlpp index 1de0947dfa..da5fe13a8d 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.10.ddl.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.10.ddl.sqlpp @@ -24,4 +24,4 @@ use test; -analyze dataset test.ds1 with { "sample": "medium" }; +analyze dataset test.ds1 with { "sample": "medium", "sample-seed": 234.0 }; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.11.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.11.query.sqlpp index 549d273331..38ded0a330 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.11.query.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.11.query.sqlpp @@ -26,5 +26,5 @@ set `import-private-functions` `true`; use test; select * from - listMetadata(false) metadata, + listMetadata(false, true) metadata, showSampleStats("ds1", "sample_idx_2_ds1", true) stats; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.14.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.14.query.sqlpp index b206f5920b..4cae2027a4 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.14.query.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.14.query.sqlpp @@ -26,5 +26,5 @@ set `import-private-functions` `true`; use test; select * from - listMetadata(false) metadata, + listMetadata(false, false) metadata, showSampleStats("ds1", "sample_idx_1_ds1", false) stats; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.15.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.15.ddl.sqlpp index 0cdaf19fcc..6ceb81416d 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.15.ddl.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.15.ddl.sqlpp @@ -24,4 +24,4 @@ use test; -analyze dataset ds1 with { "sample": "high" }; +analyze dataset ds1 with { "sample": "high", "sample-seed": "345" }; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.16.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.16.query.sqlpp index 549d273331..38ded0a330 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.16.query.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.16.query.sqlpp @@ -26,5 +26,5 @@ set `import-private-functions` `true`; use test; select * from - listMetadata(false) metadata, + listMetadata(false, true) metadata, showSampleStats("ds1", "sample_idx_2_ds1", true) stats; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.19.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.19.query.sqlpp index b206f5920b..4cae2027a4 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.19.query.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.19.query.sqlpp @@ -26,5 +26,5 @@ set `import-private-functions` `true`; use test; select * from - listMetadata(false) metadata, + listMetadata(false, false) metadata, showSampleStats("ds1", "sample_idx_1_ds1", false) stats; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.2.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.2.query.sqlpp index 0f1edbd43e..e786e0ef0a 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.2.query.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.2.query.sqlpp @@ -24,4 +24,4 @@ use test; -listMetadata(true); +listMetadata(true, false); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.21.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.21.query.sqlpp index 587629e858..759fc3f417 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.21.query.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.21.query.sqlpp @@ -24,4 +24,4 @@ use test; select count(*) cnt -from listMetadata(true) v; +from listMetadata(true, false) v; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.4.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.4.ddl.sqlpp index 3993a1c565..ed97897046 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.4.ddl.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.4.ddl.sqlpp @@ -21,4 +21,4 @@ * Description: Test sample size parameter */ -analyze dataset test.ds1 with { "sample": "low" }; +analyze dataset test.ds1 with { "sample": "low", "sample-seed": 123 }; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.5.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.5.query.sqlpp index 243dab84ab..e0cd6cc7c5 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.5.query.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.5.query.sqlpp @@ -27,5 +27,5 @@ set `import-private-functions` `true`; use test; select * from - listMetadata(false) metadata, + listMetadata(false, true) metadata, showSampleStats("ds1", "sample_idx_2_ds1", true) stats diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.7.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.7.query.sqlpp index d984ef5f6e..c4930b0920 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.7.query.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.7.query.sqlpp @@ -27,5 +27,5 @@ set `import-private-functions` `true`; use test; select * from - listMetadata(false) metadata, + listMetadata(false, true) metadata, showSampleStats("ds1", "sample_idx_2_ds1", true) stats; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.9.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.9.query.sqlpp index b206f5920b..4cae2027a4 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.9.query.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.9.query.sqlpp @@ -26,5 +26,5 @@ set `import-private-functions` `true`; use test; select * from - listMetadata(false) metadata, + listMetadata(false, false) metadata, showSampleStats("ds1", "sample_idx_1_ds1", false) stats; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.11.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.11.adm index 534cc7a9c0..58f454b3ed 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.11.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.11.adm @@ -1 +1 @@ -{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_2_ds1", "SampleCardinalityTarget": 4252, "SourceCardinality": 1100, "SourceAvgItemSize": true }, "stats": { "cnt": 1100, "min_pk": 1, "max_pk": 1100, "min_x": -1100, "max_x": -1 } } \ No newline at end of file +{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_2_ds1", "SampleCardinalityTarget": 4252, "SourceCardinality": 1100, "SourceAvgItemSize": true, "SampleSeed": 234 }, "stats": { "cnt": 1100, "min_pk": 1, "max_pk": 1100, "min_x": -1100, "max_x": -1 } } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.14.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.14.adm index ee57f4cae6..6ef756af2f 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.14.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.14.adm @@ -1 +1 @@ -{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_1_ds1", "SampleCardinalityTarget": 4252, "SourceCardinality": 4400, "SourceAvgItemSize": true }, "stats": { "cnt": 4246, "min_pk": true, "max_pk": true, "min_x": true, "max_x": true } } \ No newline at end of file +{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_1_ds1", "SampleCardinalityTarget": 4252, "SourceCardinality": 4400, "SourceAvgItemSize": true, "SampleSeed": true }, "stats": { "cnt": 4246, "min_pk": true, "max_pk": true, "min_x": true, "max_x": true } } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.16.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.16.adm index b46ed0b4ce..01eb5b74a8 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.16.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.16.adm @@ -1 +1 @@ -{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_2_ds1", "SampleCardinalityTarget": 17008, "SourceCardinality": 4400, "SourceAvgItemSize": true }, "stats": { "cnt": 4400, "min_pk": 1, "max_pk": 4400, "min_x": -4400, "max_x": -1 } } \ No newline at end of file +{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_2_ds1", "SampleCardinalityTarget": 17008, "SourceCardinality": 4400, "SourceAvgItemSize": true, "SampleSeed": 345 }, "stats": { "cnt": 4400, "min_pk": 1, "max_pk": 4400, "min_x": -4400, "max_x": -1 } } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.19.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.19.adm index 74d092707f..60b969f490 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.19.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.19.adm @@ -1 +1 @@ -{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_1_ds1", "SampleCardinalityTarget": 17008, "SourceCardinality": 17100, "SourceAvgItemSize": true }, "stats": { "cnt": 16972, "min_pk": true, "max_pk": true, "min_x": true, "max_x": true } } \ No newline at end of file +{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_1_ds1", "SampleCardinalityTarget": 17008, "SourceCardinality": 17100, "SourceAvgItemSize": true, "SampleSeed": true }, "stats": { "cnt": 16972, "min_pk": true, "max_pk": true, "min_x": true, "max_x": true } } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.2.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.2.adm index ab853ec2d0..e3cefeeb10 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.2.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.2.adm @@ -1 +1 @@ -{ "DatasetName": "ds1", "IndexName": "sample_idx_1_ds1", "SampleCardinalityTarget": 1063, "SourceCardinality": 0, "SourceAvgItemSize": 0 } \ No newline at end of file +{ "DatasetName": "ds1", "IndexName": "sample_idx_1_ds1", "SampleCardinalityTarget": 1063, "SourceCardinality": 0, "SourceAvgItemSize": 0, "SampleSeed": true } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.5.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.5.adm index a8a77bd0fc..605bb1270e 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.5.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.5.adm @@ -1 +1 @@ -{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_2_ds1", "SampleCardinalityTarget": 1063, "SourceCardinality": 8, "SourceAvgItemSize": true }, "stats": { "cnt": 8, "min_pk": 1, "max_pk": 8, "min_x": -8, "max_x": -1 } } \ No newline at end of file +{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_2_ds1", "SampleCardinalityTarget": 1063, "SourceCardinality": 8, "SourceAvgItemSize": true, "SampleSeed": 123 }, "stats": { "cnt": 8, "min_pk": 1, "max_pk": 8, "min_x": -8, "max_x": -1 } } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.7.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.7.adm index a8a77bd0fc..605bb1270e 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.7.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.7.adm @@ -1 +1 @@ -{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_2_ds1", "SampleCardinalityTarget": 1063, "SourceCardinality": 8, "SourceAvgItemSize": true }, "stats": { "cnt": 8, "min_pk": 1, "max_pk": 8, "min_x": -8, "max_x": -1 } } \ No newline at end of file +{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_2_ds1", "SampleCardinalityTarget": 1063, "SourceCardinality": 8, "SourceAvgItemSize": true, "SampleSeed": 123 }, "stats": { "cnt": 8, "min_pk": 1, "max_pk": 8, "min_x": -8, "max_x": -1 } } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.9.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.9.adm index ee7f2c013f..0084d2bc86 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.9.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.9.adm @@ -1 +1 @@ -{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_1_ds1", "SampleCardinalityTarget": 1063, "SourceCardinality": 1100, "SourceAvgItemSize": true }, "stats": { "cnt": 1033, "min_pk": true, "max_pk": true, "min_x": true, "max_x": true } } \ No newline at end of file +{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_1_ds1", "SampleCardinalityTarget": 1063, "SourceCardinality": 1100, "SourceAvgItemSize": true, "SampleSeed": true }, "stats": { "cnt": 1033, "min_pk": true, "max_pk": true, "min_x": true, "max_x": true } } \ No newline at end of file diff --git a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/AnalyzeStatement.java b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/AnalyzeStatement.java index 7e6e99dbcc..cbf2c071fe 100644 --- a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/AnalyzeStatement.java +++ b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/AnalyzeStatement.java @@ -34,6 +34,7 @@ import org.apache.asterix.object.base.AdmObjectNode; import org.apache.asterix.object.base.AdmStringNode; import org.apache.asterix.object.base.IAdmNode; import org.apache.asterix.om.types.BuiltinType; +import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; public class AnalyzeStatement extends AbstractStatement { @@ -46,6 +47,8 @@ public class AnalyzeStatement extends AbstractStatement { private static final int SAMPLE_HIGH_SIZE = SAMPLE_MEDIUM_SIZE * 4; private static final int SAMPLE_DEFAULT_SIZE = SAMPLE_LOW_SIZE; + private static final String SAMPLE_SEED_FIELD_NAME = "sample-seed"; + private final DataverseName dataverseName; private final String datasetName; private final AdmObjectNode options; @@ -54,7 +57,20 @@ public class AnalyzeStatement extends AbstractStatement { throws CompilationException { this.dataverseName = dataverseName; this.datasetName = datasetName; - this.options = options == null ? null : ExpressionUtils.toNode(options); + this.options = options == null ? null : validateOptions(ExpressionUtils.toNode(options)); + } + + private static AdmObjectNode validateOptions(AdmObjectNode options) throws CompilationException { + for (String fieldName : options.getFieldNames()) { + switch (fieldName) { + case SAMPLE_FIELD_NAME: + case SAMPLE_SEED_FIELD_NAME: + break; + default: + throw new CompilationException(ErrorCode.INVALID_PARAM, fieldName); + } + } + return options; } @Override @@ -106,6 +122,34 @@ public class AnalyzeStatement extends AbstractStatement { } } + public long getOrCreateSampleSeed() throws AlgebricksException { + IAdmNode n = getOption(SAMPLE_SEED_FIELD_NAME); + return n != null ? getSampleSeed(n) : createSampleSeed(); + } + + private long getSampleSeed(IAdmNode n) throws CompilationException { + switch (n.getType()) { + case BIGINT: + return ((AdmBigIntNode) n).get(); + case DOUBLE: + return (long) ((AdmDoubleNode) n).get(); + case STRING: + String s = ((AdmStringNode) n).get(); + try { + return Long.parseLong(s); + } catch (NumberFormatException e) { + throw new CompilationException(ErrorCode.INVALID_PROPERTY_FORMAT, SAMPLE_SEED_FIELD_NAME); + } + default: + throw new CompilationException(ErrorCode.WITH_FIELD_MUST_BE_OF_TYPE, SAMPLE_SEED_FIELD_NAME, + BuiltinType.AINT64.getTypeName(), n.getType().toString()); + } + } + + private long createSampleSeed() { + return System.nanoTime() + System.identityHashCode(this); + } + private boolean isValidSampleSize(int v) { return v >= SAMPLE_LOW_SIZE && v <= SAMPLE_HIGH_SIZE; } diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Index.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Index.java index eae81d5b04..21d2aaac6b 100644 --- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Index.java +++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Index.java @@ -553,15 +553,18 @@ public class Index implements IMetadataEntity<Index>, Comparable<Index> { private final int sourceAvgItemSize; + private final long sampleSeed; + public SampleIndexDetails(List<List<String>> keyFieldNames, List<Integer> keyFieldSourceIndicators, - List<IAType> keyFieldTypes, int sampleCardinalityTarget, long sourceCardinality, - int sourceAvgItemSize) { + List<IAType> keyFieldTypes, int sampleCardinalityTarget, long sourceCardinality, int sourceAvgItemSize, + long sampleSeed) { this.keyFieldNames = keyFieldNames; this.keyFieldSourceIndicators = keyFieldSourceIndicators; this.keyFieldTypes = keyFieldTypes; this.sampleCardinalityTarget = sampleCardinalityTarget; this.sourceCardinality = sourceCardinality; this.sourceAvgItemSize = sourceAvgItemSize; + this.sampleSeed = sampleSeed; } @Override @@ -597,6 +600,10 @@ public class Index implements IMetadataEntity<Index>, Comparable<Index> { public int getSourceAvgItemSize() { return sourceAvgItemSize; } + + public long getSampleSeed() { + return sampleSeed; + } } @Deprecated diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/IndexTupleTranslator.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/IndexTupleTranslator.java index 967c2ba128..9c742ed9c2 100644 --- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/IndexTupleTranslator.java +++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/IndexTupleTranslator.java @@ -91,6 +91,7 @@ public class IndexTupleTranslator extends AbstractTupleTranslator<Index> { public static final String INDEX_SEARCHKEY_ELEMENTS_FIELD_NAME = "SearchKeyElements"; public static final String COMPLEXSEARCHKEY_UNNEST_FIELD_NAME = "UnnestList"; public static final String COMPLEXSEARCHKEY_PROJECT_FIELD_NAME = "ProjectList"; + public static final String SAMPLE_SEED = "SampleSeed"; public static final String SAMPLE_CARDINALITY_TARGET = "SampleCardinalityTarget"; public static final String SOURCE_CARDINALITY = "SourceCardinality"; public static final String SOURCE_AVG_ITEM_SIZE = "SourceAvgItemSize"; @@ -464,6 +465,12 @@ public class IndexTupleTranslator extends AbstractTupleTranslator<Index> { searchElements.stream().map(Pair::getSecond).map(l -> l.get(0)).collect(Collectors.toList()); keyFieldTypes = searchKeyType.stream().map(l -> l.get(0)).collect(Collectors.toList()); + int sampleSeedPos = indexRecord.getType().getFieldIndex(SAMPLE_SEED); + if (sampleSeedPos < 0) { + throw new AsterixException(ErrorCode.METADATA_ERROR, SAMPLE_SEED); + } + long sampleSeed = ((AInt64) indexRecord.getValueByPos(sampleSeedPos)).getLongValue(); + int sampleCardinalityTargetPos = indexRecord.getType().getFieldIndex(SAMPLE_CARDINALITY_TARGET); if (sampleCardinalityTargetPos < 0) { throw new AsterixException(ErrorCode.METADATA_ERROR, SAMPLE_CARDINALITY_TARGET); @@ -484,7 +491,7 @@ public class IndexTupleTranslator extends AbstractTupleTranslator<Index> { int sourceAvgItemSize = ((AInt32) indexRecord.getValueByPos(sourceAvgItemSizePos)).getIntegerValue(); indexDetails = new Index.SampleIndexDetails(keyFieldNames, keyFieldSourceIndicator, keyFieldTypes, - sampleCardinalityTarget, sourceCardinality, sourceAvgItemSize); + sampleCardinalityTarget, sourceCardinality, sourceAvgItemSize, sampleSeed); break; default: throw new AsterixException(ErrorCode.METADATA_ERROR, indexType.toString()); @@ -901,6 +908,13 @@ public class IndexTupleTranslator extends AbstractTupleTranslator<Index> { if (index.getIndexType() == IndexType.SAMPLE) { Index.SampleIndexDetails indexDetails = (Index.SampleIndexDetails) index.getIndexDetails(); + nameValue.reset(); + fieldValue.reset(); + aString.setValue(SAMPLE_SEED); + stringSerde.serialize(aString, nameValue.getDataOutput()); + int64Serde.serialize(new AInt64(indexDetails.getSampleSeed()), fieldValue.getDataOutput()); + recordBuilder.addField(nameValue, fieldValue); + nameValue.reset(); fieldValue.reset(); aString.setValue(SAMPLE_CARDINALITY_TARGET); diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SampleOperationsHelper.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SampleOperationsHelper.java index 28e1ac2e1e..0d3e015c0f 100644 --- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SampleOperationsHelper.java +++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SampleOperationsHelper.java @@ -157,6 +157,7 @@ public class SampleOperationsHelper implements ISecondaryIndexOperationsHelper { public JobSpecification buildLoadingJobSpec() throws AlgebricksException { Index.SampleIndexDetails indexDetails = (Index.SampleIndexDetails) index.getIndexDetails(); int sampleCardinalityTarget = indexDetails.getSampleCardinalityTarget(); + long sampleSeed = indexDetails.getSampleSeed(); IDataFormat format = metadataProvider.getDataFormat(); int nFields = recordDesc.getFieldCount(); int[] columns = new int[nFields]; @@ -211,7 +212,7 @@ public class SampleOperationsHelper implements ISecondaryIndexOperationsHelper { RecordDescriptor raggRecordDesc = new RecordDescriptor(raggSerdes, raggTraits); IRunningAggregateEvaluatorFactory raggSlotEvalFactory = - new SampleSlotRunningAggregateFunctionFactory(sampleCardinalityTarget); + new SampleSlotRunningAggregateFunctionFactory(sampleCardinalityTarget, sampleSeed); IRunningAggregateEvaluatorFactory raggCounterEvalFactory = TidRunningAggregateDescriptor.FACTORY .createFunctionDescriptor().createRunningAggregateEvaluatorFactory(new IScalarEvaluatorFactory[0]); RunningAggregateRuntimeFactory raggRuntimeFactory = diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/runningaggregates/std/SampleSlotRunningAggregateFunctionFactory.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/runningaggregates/std/SampleSlotRunningAggregateFunctionFactory.java index c53da46c90..a4bda44085 100644 --- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/runningaggregates/std/SampleSlotRunningAggregateFunctionFactory.java +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/runningaggregates/std/SampleSlotRunningAggregateFunctionFactory.java @@ -41,12 +41,15 @@ import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference; */ public class SampleSlotRunningAggregateFunctionFactory implements IRunningAggregateEvaluatorFactory { - private static final long serialVersionUID = 1L; + private static final long serialVersionUID = 2L; private final int sampleCardinalityTarget; - public SampleSlotRunningAggregateFunctionFactory(int sampleCardinalityTarget) { + private final long sampleSeed; + + public SampleSlotRunningAggregateFunctionFactory(int sampleCardinalityTarget, long sampleSeed) { this.sampleCardinalityTarget = sampleCardinalityTarget; + this.sampleSeed = sampleSeed; } @Override @@ -65,7 +68,7 @@ public class SampleSlotRunningAggregateFunctionFactory implements IRunningAggreg SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.AINT32); private final AMutableInt32 aInt32 = new AMutableInt32(0); - private final Random rnd = new Random(); + private final Random rnd = new Random(sampleSeed); private long counter; @Override
