This is an automated email from the ASF dual-hosted git repository.
dlych pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git
The following commit(s) were added to refs/heads/master by this push:
new 2aed7aa66f [NO ISSUE][COMP] Add sample-seed parameter to ANALYZE
DATASET
2aed7aa66f is described below
commit 2aed7aa66f73ebbb5b77288051c54cbe21e7a03e
Author: Dmitry Lychagin <[email protected]>
AuthorDate: Fri Jun 17 18:18:52 2022 -0700
[NO ISSUE][COMP] Add sample-seed parameter to ANALYZE DATASET
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
- Add sample-seed parameter to ANALYZE DATASET statement
- Update testcases
Change-Id: I78429541bf7d720cc73dc674dd532f7a1f066a24
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/16584
Contrib: Jenkins <[email protected]>
Reviewed-by: Ali Alsuliman <[email protected]>
Integration-Tests: Jenkins <[email protected]>
Tested-by: Jenkins <[email protected]>
---
.../asterix/app/translator/QueryTranslator.java | 8 ++--
.../analyze-dataset-1.1.ddl.sqlpp | 5 ++-
.../analyze-dataset-1.10.ddl.sqlpp | 2 +-
.../analyze-dataset-1.11.query.sqlpp | 2 +-
.../analyze-dataset-1.14.query.sqlpp | 2 +-
.../analyze-dataset-1.15.ddl.sqlpp | 2 +-
.../analyze-dataset-1.16.query.sqlpp | 2 +-
.../analyze-dataset-1.19.query.sqlpp | 2 +-
.../analyze-dataset-1.2.query.sqlpp | 2 +-
.../analyze-dataset-1.21.query.sqlpp | 2 +-
.../analyze-dataset-1.4.ddl.sqlpp | 2 +-
.../analyze-dataset-1.5.query.sqlpp | 2 +-
.../analyze-dataset-1.7.query.sqlpp | 2 +-
.../analyze-dataset-1.9.query.sqlpp | 2 +-
.../ddl/analyze-dataset-1/analyze-dataset-1.11.adm | 2 +-
.../ddl/analyze-dataset-1/analyze-dataset-1.14.adm | 2 +-
.../ddl/analyze-dataset-1/analyze-dataset-1.16.adm | 2 +-
.../ddl/analyze-dataset-1/analyze-dataset-1.19.adm | 2 +-
.../ddl/analyze-dataset-1/analyze-dataset-1.2.adm | 2 +-
.../ddl/analyze-dataset-1/analyze-dataset-1.5.adm | 2 +-
.../ddl/analyze-dataset-1/analyze-dataset-1.7.adm | 2 +-
.../ddl/analyze-dataset-1/analyze-dataset-1.9.adm | 2 +-
.../lang/common/statement/AnalyzeStatement.java | 46 +++++++++++++++++++++-
.../apache/asterix/metadata/entities/Index.java | 11 +++++-
.../IndexTupleTranslator.java | 16 +++++++-
.../metadata/utils/SampleOperationsHelper.java | 3 +-
.../SampleSlotRunningAggregateFunctionFactory.java | 9 +++--
27 files changed, 105 insertions(+), 33 deletions(-)
diff --git
a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
index 8c18b18add..211e1c1621 100644
---
a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
+++
b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
@@ -4265,9 +4265,11 @@ public class QueryTranslator extends
AbstractLangTranslator implements IStatemen
InternalDatasetDetails dsDetails = (InternalDatasetDetails)
ds.getDatasetDetails();
int sampleCardinalityTarget = stmtAnalyze.getSampleSize();
+ long sampleSeed = stmtAnalyze.getOrCreateSampleSeed();
- Index.SampleIndexDetails newIndexDetailsPendingAdd = new
Index.SampleIndexDetails(dsDetails.getPrimaryKey(),
- dsDetails.getKeySourceIndicator(),
dsDetails.getPrimaryKeyType(), sampleCardinalityTarget, 0, 0);
+ Index.SampleIndexDetails newIndexDetailsPendingAdd =
+ new Index.SampleIndexDetails(dsDetails.getPrimaryKey(),
dsDetails.getKeySourceIndicator(),
+ dsDetails.getPrimaryKeyType(),
sampleCardinalityTarget, 0, 0, sampleSeed);
newIndexPendingAdd = new Index(dataverseName, datasetName,
newIndexName, sampleIndexType,
newIndexDetailsPendingAdd, false, false,
MetadataUtil.PENDING_ADD_OP);
@@ -4309,7 +4311,7 @@ public class QueryTranslator extends
AbstractLangTranslator implements IStatemen
Index.SampleIndexDetails newIndexDetailsFinal = new
Index.SampleIndexDetails(dsDetails.getPrimaryKey(),
dsDetails.getKeySourceIndicator(),
dsDetails.getPrimaryKeyType(), sampleCardinalityTarget,
- stats.getCardinality(), stats.getAvgTupleSize());
+ stats.getCardinality(), stats.getAvgTupleSize(),
sampleSeed);
Index newIndexFinal = new Index(dataverseName, datasetName,
newIndexName, sampleIndexType,
newIndexDetailsFinal, false, false,
MetadataUtil.PENDING_NO_OP);
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.1.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.1.ddl.sqlpp
index 50daffd58e..e1d6b1020e 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.1.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.1.ddl.sqlpp
@@ -27,9 +27,10 @@ drop dataverse test if exists;
create dataverse test;
use test;
-create function listMetadata(showSourceAvgItemSize) {
+create function listMetadata(showSourceAvgItemSize, showSeed) {
select i.DatasetName, i.IndexName, i.SampleCardinalityTarget,
i.SourceCardinality,
- case when showSourceAvgItemSize then i.SourceAvgItemSize else
i.SourceAvgItemSize > 0 end as SourceAvgItemSize
+ case when showSourceAvgItemSize then i.SourceAvgItemSize else
i.SourceAvgItemSize > 0 end as SourceAvgItemSize,
+ case when showSeed then i.SampleSeed else i.SampleSeed is known end as
SampleSeed
from Metadata.`Index` i
where i.DataverseName = "test" and i.IndexName like "sample_idx%"
order by i.IndexName
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.10.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.10.ddl.sqlpp
index 1de0947dfa..da5fe13a8d 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.10.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.10.ddl.sqlpp
@@ -24,4 +24,4 @@
use test;
-analyze dataset test.ds1 with { "sample": "medium" };
+analyze dataset test.ds1 with { "sample": "medium", "sample-seed": 234.0 };
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.11.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.11.query.sqlpp
index 549d273331..38ded0a330 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.11.query.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.11.query.sqlpp
@@ -26,5 +26,5 @@ set `import-private-functions` `true`;
use test;
select * from
- listMetadata(false) metadata,
+ listMetadata(false, true) metadata,
showSampleStats("ds1", "sample_idx_2_ds1", true) stats;
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.14.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.14.query.sqlpp
index b206f5920b..4cae2027a4 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.14.query.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.14.query.sqlpp
@@ -26,5 +26,5 @@ set `import-private-functions` `true`;
use test;
select * from
- listMetadata(false) metadata,
+ listMetadata(false, false) metadata,
showSampleStats("ds1", "sample_idx_1_ds1", false) stats;
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.15.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.15.ddl.sqlpp
index 0cdaf19fcc..6ceb81416d 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.15.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.15.ddl.sqlpp
@@ -24,4 +24,4 @@
use test;
-analyze dataset ds1 with { "sample": "high" };
+analyze dataset ds1 with { "sample": "high", "sample-seed": "345" };
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.16.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.16.query.sqlpp
index 549d273331..38ded0a330 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.16.query.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.16.query.sqlpp
@@ -26,5 +26,5 @@ set `import-private-functions` `true`;
use test;
select * from
- listMetadata(false) metadata,
+ listMetadata(false, true) metadata,
showSampleStats("ds1", "sample_idx_2_ds1", true) stats;
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.19.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.19.query.sqlpp
index b206f5920b..4cae2027a4 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.19.query.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.19.query.sqlpp
@@ -26,5 +26,5 @@ set `import-private-functions` `true`;
use test;
select * from
- listMetadata(false) metadata,
+ listMetadata(false, false) metadata,
showSampleStats("ds1", "sample_idx_1_ds1", false) stats;
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.2.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.2.query.sqlpp
index 0f1edbd43e..e786e0ef0a 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.2.query.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.2.query.sqlpp
@@ -24,4 +24,4 @@
use test;
-listMetadata(true);
+listMetadata(true, false);
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.21.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.21.query.sqlpp
index 587629e858..759fc3f417 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.21.query.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.21.query.sqlpp
@@ -24,4 +24,4 @@
use test;
select count(*) cnt
-from listMetadata(true) v;
+from listMetadata(true, false) v;
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.4.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.4.ddl.sqlpp
index 3993a1c565..ed97897046 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.4.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.4.ddl.sqlpp
@@ -21,4 +21,4 @@
* Description: Test sample size parameter
*/
-analyze dataset test.ds1 with { "sample": "low" };
+analyze dataset test.ds1 with { "sample": "low", "sample-seed": 123 };
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.5.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.5.query.sqlpp
index 243dab84ab..e0cd6cc7c5 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.5.query.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.5.query.sqlpp
@@ -27,5 +27,5 @@ set `import-private-functions` `true`;
use test;
select * from
- listMetadata(false) metadata,
+ listMetadata(false, true) metadata,
showSampleStats("ds1", "sample_idx_2_ds1", true) stats
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.7.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.7.query.sqlpp
index d984ef5f6e..c4930b0920 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.7.query.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.7.query.sqlpp
@@ -27,5 +27,5 @@ set `import-private-functions` `true`;
use test;
select * from
- listMetadata(false) metadata,
+ listMetadata(false, true) metadata,
showSampleStats("ds1", "sample_idx_2_ds1", true) stats;
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.9.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.9.query.sqlpp
index b206f5920b..4cae2027a4 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.9.query.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.9.query.sqlpp
@@ -26,5 +26,5 @@ set `import-private-functions` `true`;
use test;
select * from
- listMetadata(false) metadata,
+ listMetadata(false, false) metadata,
showSampleStats("ds1", "sample_idx_1_ds1", false) stats;
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.11.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.11.adm
index 534cc7a9c0..58f454b3ed 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.11.adm
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.11.adm
@@ -1 +1 @@
-{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_2_ds1",
"SampleCardinalityTarget": 4252, "SourceCardinality": 1100,
"SourceAvgItemSize": true }, "stats": { "cnt": 1100, "min_pk": 1, "max_pk":
1100, "min_x": -1100, "max_x": -1 } }
\ No newline at end of file
+{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_2_ds1",
"SampleCardinalityTarget": 4252, "SourceCardinality": 1100,
"SourceAvgItemSize": true, "SampleSeed": 234 }, "stats": { "cnt": 1100,
"min_pk": 1, "max_pk": 1100, "min_x": -1100, "max_x": -1 } }
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.14.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.14.adm
index ee57f4cae6..6ef756af2f 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.14.adm
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.14.adm
@@ -1 +1 @@
-{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_1_ds1",
"SampleCardinalityTarget": 4252, "SourceCardinality": 4400,
"SourceAvgItemSize": true }, "stats": { "cnt": 4246, "min_pk": true, "max_pk":
true, "min_x": true, "max_x": true } }
\ No newline at end of file
+{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_1_ds1",
"SampleCardinalityTarget": 4252, "SourceCardinality": 4400,
"SourceAvgItemSize": true, "SampleSeed": true }, "stats": { "cnt": 4246,
"min_pk": true, "max_pk": true, "min_x": true, "max_x": true } }
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.16.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.16.adm
index b46ed0b4ce..01eb5b74a8 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.16.adm
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.16.adm
@@ -1 +1 @@
-{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_2_ds1",
"SampleCardinalityTarget": 17008, "SourceCardinality": 4400,
"SourceAvgItemSize": true }, "stats": { "cnt": 4400, "min_pk": 1, "max_pk":
4400, "min_x": -4400, "max_x": -1 } }
\ No newline at end of file
+{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_2_ds1",
"SampleCardinalityTarget": 17008, "SourceCardinality": 4400,
"SourceAvgItemSize": true, "SampleSeed": 345 }, "stats": { "cnt": 4400,
"min_pk": 1, "max_pk": 4400, "min_x": -4400, "max_x": -1 } }
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.19.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.19.adm
index 74d092707f..60b969f490 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.19.adm
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.19.adm
@@ -1 +1 @@
-{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_1_ds1",
"SampleCardinalityTarget": 17008, "SourceCardinality": 17100,
"SourceAvgItemSize": true }, "stats": { "cnt": 16972, "min_pk": true, "max_pk":
true, "min_x": true, "max_x": true } }
\ No newline at end of file
+{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_1_ds1",
"SampleCardinalityTarget": 17008, "SourceCardinality": 17100,
"SourceAvgItemSize": true, "SampleSeed": true }, "stats": { "cnt": 16972,
"min_pk": true, "max_pk": true, "min_x": true, "max_x": true } }
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.2.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.2.adm
index ab853ec2d0..e3cefeeb10 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.2.adm
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.2.adm
@@ -1 +1 @@
-{ "DatasetName": "ds1", "IndexName": "sample_idx_1_ds1",
"SampleCardinalityTarget": 1063, "SourceCardinality": 0, "SourceAvgItemSize": 0
}
\ No newline at end of file
+{ "DatasetName": "ds1", "IndexName": "sample_idx_1_ds1",
"SampleCardinalityTarget": 1063, "SourceCardinality": 0, "SourceAvgItemSize":
0, "SampleSeed": true }
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.5.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.5.adm
index a8a77bd0fc..605bb1270e 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.5.adm
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.5.adm
@@ -1 +1 @@
-{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_2_ds1",
"SampleCardinalityTarget": 1063, "SourceCardinality": 8, "SourceAvgItemSize":
true }, "stats": { "cnt": 8, "min_pk": 1, "max_pk": 8, "min_x": -8, "max_x": -1
} }
\ No newline at end of file
+{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_2_ds1",
"SampleCardinalityTarget": 1063, "SourceCardinality": 8, "SourceAvgItemSize":
true, "SampleSeed": 123 }, "stats": { "cnt": 8, "min_pk": 1, "max_pk": 8,
"min_x": -8, "max_x": -1 } }
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.7.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.7.adm
index a8a77bd0fc..605bb1270e 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.7.adm
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.7.adm
@@ -1 +1 @@
-{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_2_ds1",
"SampleCardinalityTarget": 1063, "SourceCardinality": 8, "SourceAvgItemSize":
true }, "stats": { "cnt": 8, "min_pk": 1, "max_pk": 8, "min_x": -8, "max_x": -1
} }
\ No newline at end of file
+{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_2_ds1",
"SampleCardinalityTarget": 1063, "SourceCardinality": 8, "SourceAvgItemSize":
true, "SampleSeed": 123 }, "stats": { "cnt": 8, "min_pk": 1, "max_pk": 8,
"min_x": -8, "max_x": -1 } }
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.9.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.9.adm
index ee7f2c013f..0084d2bc86 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.9.adm
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.9.adm
@@ -1 +1 @@
-{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_1_ds1",
"SampleCardinalityTarget": 1063, "SourceCardinality": 1100,
"SourceAvgItemSize": true }, "stats": { "cnt": 1033, "min_pk": true, "max_pk":
true, "min_x": true, "max_x": true } }
\ No newline at end of file
+{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_1_ds1",
"SampleCardinalityTarget": 1063, "SourceCardinality": 1100,
"SourceAvgItemSize": true, "SampleSeed": true }, "stats": { "cnt": 1033,
"min_pk": true, "max_pk": true, "min_x": true, "max_x": true } }
\ No newline at end of file
diff --git
a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/AnalyzeStatement.java
b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/AnalyzeStatement.java
index 7e6e99dbcc..cbf2c071fe 100644
---
a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/AnalyzeStatement.java
+++
b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/AnalyzeStatement.java
@@ -34,6 +34,7 @@ import org.apache.asterix.object.base.AdmObjectNode;
import org.apache.asterix.object.base.AdmStringNode;
import org.apache.asterix.object.base.IAdmNode;
import org.apache.asterix.om.types.BuiltinType;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
public class AnalyzeStatement extends AbstractStatement {
@@ -46,6 +47,8 @@ public class AnalyzeStatement extends AbstractStatement {
private static final int SAMPLE_HIGH_SIZE = SAMPLE_MEDIUM_SIZE * 4;
private static final int SAMPLE_DEFAULT_SIZE = SAMPLE_LOW_SIZE;
+ private static final String SAMPLE_SEED_FIELD_NAME = "sample-seed";
+
private final DataverseName dataverseName;
private final String datasetName;
private final AdmObjectNode options;
@@ -54,7 +57,20 @@ public class AnalyzeStatement extends AbstractStatement {
throws CompilationException {
this.dataverseName = dataverseName;
this.datasetName = datasetName;
- this.options = options == null ? null :
ExpressionUtils.toNode(options);
+ this.options = options == null ? null :
validateOptions(ExpressionUtils.toNode(options));
+ }
+
+ private static AdmObjectNode validateOptions(AdmObjectNode options) throws
CompilationException {
+ for (String fieldName : options.getFieldNames()) {
+ switch (fieldName) {
+ case SAMPLE_FIELD_NAME:
+ case SAMPLE_SEED_FIELD_NAME:
+ break;
+ default:
+ throw new CompilationException(ErrorCode.INVALID_PARAM,
fieldName);
+ }
+ }
+ return options;
}
@Override
@@ -106,6 +122,34 @@ public class AnalyzeStatement extends AbstractStatement {
}
}
+ public long getOrCreateSampleSeed() throws AlgebricksException {
+ IAdmNode n = getOption(SAMPLE_SEED_FIELD_NAME);
+ return n != null ? getSampleSeed(n) : createSampleSeed();
+ }
+
+ private long getSampleSeed(IAdmNode n) throws CompilationException {
+ switch (n.getType()) {
+ case BIGINT:
+ return ((AdmBigIntNode) n).get();
+ case DOUBLE:
+ return (long) ((AdmDoubleNode) n).get();
+ case STRING:
+ String s = ((AdmStringNode) n).get();
+ try {
+ return Long.parseLong(s);
+ } catch (NumberFormatException e) {
+ throw new
CompilationException(ErrorCode.INVALID_PROPERTY_FORMAT, SAMPLE_SEED_FIELD_NAME);
+ }
+ default:
+ throw new
CompilationException(ErrorCode.WITH_FIELD_MUST_BE_OF_TYPE,
SAMPLE_SEED_FIELD_NAME,
+ BuiltinType.AINT64.getTypeName(),
n.getType().toString());
+ }
+ }
+
+ private long createSampleSeed() {
+ return System.nanoTime() + System.identityHashCode(this);
+ }
+
private boolean isValidSampleSize(int v) {
return v >= SAMPLE_LOW_SIZE && v <= SAMPLE_HIGH_SIZE;
}
diff --git
a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Index.java
b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Index.java
index eae81d5b04..21d2aaac6b 100644
---
a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Index.java
+++
b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Index.java
@@ -553,15 +553,18 @@ public class Index implements IMetadataEntity<Index>,
Comparable<Index> {
private final int sourceAvgItemSize;
+ private final long sampleSeed;
+
public SampleIndexDetails(List<List<String>> keyFieldNames,
List<Integer> keyFieldSourceIndicators,
- List<IAType> keyFieldTypes, int sampleCardinalityTarget, long
sourceCardinality,
- int sourceAvgItemSize) {
+ List<IAType> keyFieldTypes, int sampleCardinalityTarget, long
sourceCardinality, int sourceAvgItemSize,
+ long sampleSeed) {
this.keyFieldNames = keyFieldNames;
this.keyFieldSourceIndicators = keyFieldSourceIndicators;
this.keyFieldTypes = keyFieldTypes;
this.sampleCardinalityTarget = sampleCardinalityTarget;
this.sourceCardinality = sourceCardinality;
this.sourceAvgItemSize = sourceAvgItemSize;
+ this.sampleSeed = sampleSeed;
}
@Override
@@ -597,6 +600,10 @@ public class Index implements IMetadataEntity<Index>,
Comparable<Index> {
public int getSourceAvgItemSize() {
return sourceAvgItemSize;
}
+
+ public long getSampleSeed() {
+ return sampleSeed;
+ }
}
@Deprecated
diff --git
a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/IndexTupleTranslator.java
b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/IndexTupleTranslator.java
index 967c2ba128..9c742ed9c2 100644
---
a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/IndexTupleTranslator.java
+++
b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/IndexTupleTranslator.java
@@ -91,6 +91,7 @@ public class IndexTupleTranslator extends
AbstractTupleTranslator<Index> {
public static final String INDEX_SEARCHKEY_ELEMENTS_FIELD_NAME =
"SearchKeyElements";
public static final String COMPLEXSEARCHKEY_UNNEST_FIELD_NAME =
"UnnestList";
public static final String COMPLEXSEARCHKEY_PROJECT_FIELD_NAME =
"ProjectList";
+ public static final String SAMPLE_SEED = "SampleSeed";
public static final String SAMPLE_CARDINALITY_TARGET =
"SampleCardinalityTarget";
public static final String SOURCE_CARDINALITY = "SourceCardinality";
public static final String SOURCE_AVG_ITEM_SIZE = "SourceAvgItemSize";
@@ -464,6 +465,12 @@ public class IndexTupleTranslator extends
AbstractTupleTranslator<Index> {
searchElements.stream().map(Pair::getSecond).map(l ->
l.get(0)).collect(Collectors.toList());
keyFieldTypes = searchKeyType.stream().map(l ->
l.get(0)).collect(Collectors.toList());
+ int sampleSeedPos =
indexRecord.getType().getFieldIndex(SAMPLE_SEED);
+ if (sampleSeedPos < 0) {
+ throw new AsterixException(ErrorCode.METADATA_ERROR,
SAMPLE_SEED);
+ }
+ long sampleSeed = ((AInt64)
indexRecord.getValueByPos(sampleSeedPos)).getLongValue();
+
int sampleCardinalityTargetPos =
indexRecord.getType().getFieldIndex(SAMPLE_CARDINALITY_TARGET);
if (sampleCardinalityTargetPos < 0) {
throw new AsterixException(ErrorCode.METADATA_ERROR,
SAMPLE_CARDINALITY_TARGET);
@@ -484,7 +491,7 @@ public class IndexTupleTranslator extends
AbstractTupleTranslator<Index> {
int sourceAvgItemSize = ((AInt32)
indexRecord.getValueByPos(sourceAvgItemSizePos)).getIntegerValue();
indexDetails = new Index.SampleIndexDetails(keyFieldNames,
keyFieldSourceIndicator, keyFieldTypes,
- sampleCardinalityTarget, sourceCardinality,
sourceAvgItemSize);
+ sampleCardinalityTarget, sourceCardinality,
sourceAvgItemSize, sampleSeed);
break;
default:
throw new AsterixException(ErrorCode.METADATA_ERROR,
indexType.toString());
@@ -901,6 +908,13 @@ public class IndexTupleTranslator extends
AbstractTupleTranslator<Index> {
if (index.getIndexType() == IndexType.SAMPLE) {
Index.SampleIndexDetails indexDetails = (Index.SampleIndexDetails)
index.getIndexDetails();
+ nameValue.reset();
+ fieldValue.reset();
+ aString.setValue(SAMPLE_SEED);
+ stringSerde.serialize(aString, nameValue.getDataOutput());
+ int64Serde.serialize(new AInt64(indexDetails.getSampleSeed()),
fieldValue.getDataOutput());
+ recordBuilder.addField(nameValue, fieldValue);
+
nameValue.reset();
fieldValue.reset();
aString.setValue(SAMPLE_CARDINALITY_TARGET);
diff --git
a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SampleOperationsHelper.java
b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SampleOperationsHelper.java
index 28e1ac2e1e..0d3e015c0f 100644
---
a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SampleOperationsHelper.java
+++
b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SampleOperationsHelper.java
@@ -157,6 +157,7 @@ public class SampleOperationsHelper implements
ISecondaryIndexOperationsHelper {
public JobSpecification buildLoadingJobSpec() throws AlgebricksException {
Index.SampleIndexDetails indexDetails = (Index.SampleIndexDetails)
index.getIndexDetails();
int sampleCardinalityTarget =
indexDetails.getSampleCardinalityTarget();
+ long sampleSeed = indexDetails.getSampleSeed();
IDataFormat format = metadataProvider.getDataFormat();
int nFields = recordDesc.getFieldCount();
int[] columns = new int[nFields];
@@ -211,7 +212,7 @@ public class SampleOperationsHelper implements
ISecondaryIndexOperationsHelper {
RecordDescriptor raggRecordDesc = new RecordDescriptor(raggSerdes,
raggTraits);
IRunningAggregateEvaluatorFactory raggSlotEvalFactory =
- new
SampleSlotRunningAggregateFunctionFactory(sampleCardinalityTarget);
+ new
SampleSlotRunningAggregateFunctionFactory(sampleCardinalityTarget, sampleSeed);
IRunningAggregateEvaluatorFactory raggCounterEvalFactory =
TidRunningAggregateDescriptor.FACTORY
.createFunctionDescriptor().createRunningAggregateEvaluatorFactory(new
IScalarEvaluatorFactory[0]);
RunningAggregateRuntimeFactory raggRuntimeFactory =
diff --git
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/runningaggregates/std/SampleSlotRunningAggregateFunctionFactory.java
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/runningaggregates/std/SampleSlotRunningAggregateFunctionFactory.java
index c53da46c90..a4bda44085 100644
---
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/runningaggregates/std/SampleSlotRunningAggregateFunctionFactory.java
+++
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/runningaggregates/std/SampleSlotRunningAggregateFunctionFactory.java
@@ -41,12 +41,15 @@ import
org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
*/
public class SampleSlotRunningAggregateFunctionFactory implements
IRunningAggregateEvaluatorFactory {
- private static final long serialVersionUID = 1L;
+ private static final long serialVersionUID = 2L;
private final int sampleCardinalityTarget;
- public SampleSlotRunningAggregateFunctionFactory(int
sampleCardinalityTarget) {
+ private final long sampleSeed;
+
+ public SampleSlotRunningAggregateFunctionFactory(int
sampleCardinalityTarget, long sampleSeed) {
this.sampleCardinalityTarget = sampleCardinalityTarget;
+ this.sampleSeed = sampleSeed;
}
@Override
@@ -65,7 +68,7 @@ public class SampleSlotRunningAggregateFunctionFactory
implements IRunningAggreg
SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.AINT32);
private final AMutableInt32 aInt32 = new AMutableInt32(0);
- private final Random rnd = new Random();
+ private final Random rnd = new Random(sampleSeed);
private long counter;
@Override