This is an automated email from the ASF dual-hosted git repository.
vogievetsky pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 9679f6a9b5 Web console: add arrayOfDoublesSketch and other small fixes
(#13486)
9679f6a9b5 is described below
commit 9679f6a9b5c84a7618529f1739a6256e04c9da56
Author: Vadim Ogievetsky <[email protected]>
AuthorDate: Tue Dec 6 21:21:49 2022 -0800
Web console: add arrayOfDoublesSketch and other small fixes (#13486)
* add padding and keywords
* add arrayOfDoubles
* Update docs/development/extensions-core/datasketches-tuple.md
Co-authored-by: Charles Smith <[email protected]>
* Update docs/development/extensions-core/datasketches-tuple.md
Co-authored-by: Charles Smith <[email protected]>
* Update docs/development/extensions-core/datasketches-tuple.md
Co-authored-by: Charles Smith <[email protected]>
* Update docs/development/extensions-core/datasketches-tuple.md
Co-authored-by: Charles Smith <[email protected]>
* Update docs/development/extensions-core/datasketches-tuple.md
Co-authored-by: Charles Smith <[email protected]>
* partiton int
* fix docs
Co-authored-by: Charles Smith <[email protected]>
---
.../extensions-core/datasketches-tuple.md | 43 +++++++++++++++++++---
web-console/lib/keywords.js | 3 ++
web-console/script/create-sql-docs.js | 4 +-
web-console/src/bootstrap/react-table-defaults.tsx | 4 +-
.../segment-timeline/segment-timeline.tsx | 2 +-
.../ingest-query-pattern/ingest-query-pattern.ts | 3 +-
.../src/druid-models/metric-spec/metric-spec.tsx | 43 ++++++++++++++++++++++
.../workbench-query/workbench-query.ts | 8 +++-
.../src/helpers/execution/sql-task-execution.ts | 9 ++++-
web-console/src/react-table/react-table-extra.scss | 4 ++
.../sql-data-loader-view/sql-data-loader-view.tsx | 2 +
.../connect-external-data-dialog.tsx | 18 ++++++---
.../input-source-step/example-inputs.ts | 3 ++
.../input-source-step/input-source-step.tsx | 12 +++++-
.../src/views/workbench-view/workbench-view.tsx | 9 ++++-
15 files changed, 143 insertions(+), 24 deletions(-)
diff --git a/docs/development/extensions-core/datasketches-tuple.md
b/docs/development/extensions-core/datasketches-tuple.md
index fc4f74d5c8..c9a05b5ab1 100644
--- a/docs/development/extensions-core/datasketches-tuple.md
+++ b/docs/development/extensions-core/datasketches-tuple.md
@@ -39,19 +39,52 @@ druid.extensions.loadList=["druid-datasketches"]
"name" : <output_name>,
"fieldName" : <metric_name>,
"nominalEntries": <number>,
- "numberOfValues" : <number>,
- "metricColumns" : <array of strings>
+ "metricColumns" : <array of strings>,
+ "numberOfValues" : <number>
}
```
|property|description|required?|
|--------|-----------|---------|
|type|This String should always be "arrayOfDoublesSketch"|yes|
-|name|A String for the output (result) name of the calculation.|yes|
+|name|String representing the output column to store sketch values.|yes|
|fieldName|A String for the name of the input field.|yes|
|nominalEntries|Parameter that determines the accuracy and size of the sketch.
Higher k means higher accuracy but more space to store sketches. Must be a
power of 2. See the [Theta sketch
accuracy](https://datasketches.apache.org/docs/Theta/ThetaErrorTable) for
details. |no, defaults to 16384|
-|numberOfValues|Number of values associated with each distinct key. |no,
defaults to 1|
-|metricColumns|If building sketches from raw data, an array of names of the
input columns containing numeric values to be associated with each distinct
key.|no, defaults to empty array|
+|metricColumns|When building sketches from raw data, an array input column
that contain numeric values to associate with each distinct key. If not
provided, assumes `fieldName` is an `arrayOfDoublesSketch`|no, if not provided
`fieldName` is assumed to be an arrayOfDoublesSketch|
+|numberOfValues|Number of values associated with each distinct key. |no,
defaults to the length of `metricColumns` if provided and 1 otherwise|
+
+You can use the `arrayOfDoublesSketch` aggregator to:
+
+- Build a sketch from raw data. In this case, set `metricColumns` to an array.
+- Build a sketch from an existing `ArrayOfDoubles` sketch . In this case,
leave `metricColumns` unset and set the `fieldName` to an `ArrayOfDoubles`
sketch with `numberOfValues` doubles. At ingestion time, you must base64 encode
`ArrayOfDoubles` sketches at ingestion time.
+
+#### Example on top of raw data
+
+Compute a theta of unique users. For each user store the `added` and `deleted`
scores. The new sketch column will be called `users_theta`.
+
+```json
+{
+ "type": "arrayOfDoublesSketch",
+ "name": "users_theta",
+ "fieldName": "user",
+ "nominalEntries": 16384,
+ "metricColumns": ["added", "deleted"],
+}
+```
+
+#### Example ingesting a precomputed sketch column
+
+Ingest a sketch column called `user_sketches` that has a base64 encoded value
of two doubles in its array and store it in a column called `users_theta`.
+
+```json
+{
+ "type": "arrayOfDoublesSketch",
+ "name": "users_theta",
+ "fieldName": "user_sketches",
+ "nominalEntries": 16384,
+ "numberOfValues": 2,
+}
+```
### Post Aggregators
diff --git a/web-console/lib/keywords.js b/web-console/lib/keywords.js
index e34b2daf45..bc81153dd7 100644
--- a/web-console/lib/keywords.js
+++ b/web-console/lib/keywords.js
@@ -61,6 +61,9 @@ exports.SQL_KEYWORDS = [
'REPLACE INTO',
'OVERWRITE',
'RETURNING',
+ 'OVER',
+ 'PARTITION BY',
+ 'WINDOW',
];
exports.SQL_EXPRESSION_PARTS = [
diff --git a/web-console/script/create-sql-docs.js
b/web-console/script/create-sql-docs.js
index 6af65006f8..13ed438915 100755
--- a/web-console/script/create-sql-docs.js
+++ b/web-console/script/create-sql-docs.js
@@ -52,9 +52,7 @@ function convertMarkdownToHtml(markdown) {
// Concert to markdown
markdown = snarkdown(markdown);
- return markdown
- .replace(/<br \/>/g, '<br /><br />') // Double up the <br>s
- .replace(/<a[^>]*>(.*?)<\/a>/g, '$1'); // Remove links
+ return markdown.replace(/<a[^>]*>(.*?)<\/a>/g, '$1'); // Remove links
}
const readDoc = async () => {
diff --git a/web-console/src/bootstrap/react-table-defaults.tsx
b/web-console/src/bootstrap/react-table-defaults.tsx
index 4c31928064..139a13bcd5 100644
--- a/web-console/src/bootstrap/react-table-defaults.tsx
+++ b/web-console/src/bootstrap/react-table-defaults.tsx
@@ -53,12 +53,12 @@ export function bootstrapReactTable() {
.map((row: any) => row[column.id]);
const previewCount = countBy(previewValues);
return (
- <span>
+ <div className="default-aggregated">
{Object.keys(previewCount)
.sort()
.map(v => `${v} (${previewCount[v]})`)
.join(', ')}
- </span>
+ </div>
);
},
defaultPageSize: 20,
diff --git a/web-console/src/components/segment-timeline/segment-timeline.tsx
b/web-console/src/components/segment-timeline/segment-timeline.tsx
index c138e82dff..f8cef06189 100644
--- a/web-console/src/components/segment-timeline/segment-timeline.tsx
+++ b/web-console/src/components/segment-timeline/segment-timeline.tsx
@@ -278,7 +278,7 @@ ORDER BY "start" DESC`;
intervals = await queryDruidSql({
query: SegmentTimeline.getSqlQuery(startDate, endDate),
});
- datasources = uniq(intervals.map(r => r.datasource));
+ datasources = uniq(intervals.map(r => r.datasource).sort());
} else if (capabilities.hasCoordinatorAccess()) {
const startIso = startDate.toISOString();
diff --git
a/web-console/src/druid-models/ingest-query-pattern/ingest-query-pattern.ts
b/web-console/src/druid-models/ingest-query-pattern/ingest-query-pattern.ts
index f4dee926b6..7bdcaae50a 100644
--- a/web-console/src/druid-models/ingest-query-pattern/ingest-query-pattern.ts
+++ b/web-console/src/druid-models/ingest-query-pattern/ingest-query-pattern.ts
@@ -63,6 +63,7 @@ export function externalConfigToIngestQueryPattern(
config: ExternalConfig,
isArrays: boolean[],
timeExpression: SqlExpression | undefined,
+ partitionedByHint: string | undefined,
): IngestQueryPattern {
return {
destinationTableName:
guessDataSourceNameFromInputSource(config.inputSource) || 'data',
@@ -71,7 +72,7 @@ export function externalConfigToIngestQueryPattern(
mainExternalConfig: config,
filters: [],
dimensions: externalConfigToInitDimensions(config, isArrays,
timeExpression),
- partitionedBy: timeExpression ? 'day' : 'all',
+ partitionedBy: partitionedByHint || (timeExpression ? 'day' : 'all'),
clusteredBy: [],
};
}
diff --git a/web-console/src/druid-models/metric-spec/metric-spec.tsx
b/web-console/src/druid-models/metric-spec/metric-spec.tsx
index 6b3290272e..4295310486 100644
--- a/web-console/src/druid-models/metric-spec/metric-spec.tsx
+++ b/web-console/src/druid-models/metric-spec/metric-spec.tsx
@@ -78,6 +78,7 @@ export const METRIC_SPEC_FIELDS: Field<MetricSpec>[] = [
// Should the first / last aggregators become usable at ingestion time,
reverse the changes made in:
// https://github.com/apache/druid/pull/10794
'thetaSketch',
+ 'arrayOfDoublesSketch',
{
group: 'HLLSketch',
suggestions: ['HLLSketchBuild', 'HLLSketchMerge'],
@@ -104,6 +105,7 @@ export const METRIC_SPEC_FIELDS: Field<MetricSpec>[] = [
'doubleMax',
'floatMax',
'thetaSketch',
+ 'arrayOfDoublesSketch',
'HLLSketchBuild',
'HLLSketchMerge',
'quantilesDoublesSketch',
@@ -178,6 +180,47 @@ export const METRIC_SPEC_FIELDS: Field<MetricSpec>[] = [
</>
),
},
+ // arrayOfDoublesSketch
+ {
+ name: 'nominalEntries',
+ type: 'number',
+ defined: typeIs('arrayOfDoublesSketch'),
+ defaultValue: 16384,
+ info: (
+ <>
+ <p>
+ Parameter that determines the accuracy and size of the sketch.
Higher k means higher
+ accuracy but more space to store sketches.
+ </p>
+ <p>Must be a power of 2.</p>
+ <p>
+ See the{' '}
+ <ExternalLink
href="https://datasketches.apache.org/docs/Theta/ThetaErrorTable">
+ Theta sketch accuracy
+ </ExternalLink>{' '}
+ for details.
+ </p>
+ </>
+ ),
+ },
+ {
+ name: 'metricColumns',
+ type: 'string-array',
+ defined: typeIs('arrayOfDoublesSketch'),
+ info: (
+ <>
+ If building sketches from raw data, an array of names of the input
columns containing
+ numeric values to be associated with each distinct key.
+ </>
+ ),
+ },
+ {
+ name: 'numberOfValues',
+ type: 'number',
+ defined: typeIs('arrayOfDoublesSketch'),
+ placeholder: 'metricColumns length or 1',
+ info: <>Number of values associated with each distinct key.</>,
+ },
// HLLSketchBuild & HLLSketchMerge
{
name: 'lgK',
diff --git a/web-console/src/druid-models/workbench-query/workbench-query.ts
b/web-console/src/druid-models/workbench-query/workbench-query.ts
index 36c71cb07a..b6a1f74aa1 100644
--- a/web-console/src/druid-models/workbench-query/workbench-query.ts
+++ b/web-console/src/druid-models/workbench-query/workbench-query.ts
@@ -82,13 +82,19 @@ export class WorkbenchQuery {
externalConfig: ExternalConfig,
isArrays: boolean[],
timeExpression: SqlExpression | undefined,
+ partitionedByHint: string | undefined,
): WorkbenchQuery {
return new WorkbenchQuery({
queryContext: {},
queryParts: [
WorkbenchQueryPart.fromQueryString(
ingestQueryPatternToQuery(
- externalConfigToIngestQueryPattern(externalConfig, isArrays,
timeExpression),
+ externalConfigToIngestQueryPattern(
+ externalConfig,
+ isArrays,
+ timeExpression,
+ partitionedByHint,
+ ),
).toString(),
),
],
diff --git a/web-console/src/helpers/execution/sql-task-execution.ts
b/web-console/src/helpers/execution/sql-task-execution.ts
index e7f7250c53..358eee25c5 100644
--- a/web-console/src/helpers/execution/sql-task-execution.ts
+++ b/web-console/src/helpers/execution/sql-task-execution.ts
@@ -124,9 +124,14 @@ export async function reattachTaskExecution(
option: ReattachTaskQueryOptions,
): Promise<Execution | IntermediateQueryState<Execution>> {
const { id, cancelToken, preserveOnTermination } = option;
- let execution = await getTaskExecution(id, undefined, cancelToken);
+ let execution: Execution;
- execution = await updateExecutionWithDatasourceExistsIfNeeded(execution,
cancelToken);
+ try {
+ execution = await getTaskExecution(id, undefined, cancelToken);
+ execution = await updateExecutionWithDatasourceExistsIfNeeded(execution,
cancelToken);
+ } catch (e) {
+ throw new Error(`Reattaching to query failed due to: ${e.message}`);
+ }
if (execution.isFullyComplete()) return execution;
diff --git a/web-console/src/react-table/react-table-extra.scss
b/web-console/src/react-table/react-table-extra.scss
index d87c25c84d..bdeecf5e96 100644
--- a/web-console/src/react-table/react-table-extra.scss
+++ b/web-console/src/react-table/react-table-extra.scss
@@ -45,4 +45,8 @@
}
}
}
+
+ .default-aggregated {
+ padding: 10px 5px;
+ }
}
diff --git
a/web-console/src/views/sql-data-loader-view/sql-data-loader-view.tsx
b/web-console/src/views/sql-data-loader-view/sql-data-loader-view.tsx
index 23c81d02d2..2b3126372a 100644
--- a/web-console/src/views/sql-data-loader-view/sql-data-loader-view.tsx
+++ b/web-console/src/views/sql-data-loader-view/sql-data-loader-view.tsx
@@ -151,6 +151,7 @@ export const SqlDataLoaderView = React.memo(function
SqlDataLoaderView(
{ inputSource, inputFormat, signature },
isArrays,
timeExpression,
+ undefined,
),
).toString(),
queryContext: {
@@ -167,6 +168,7 @@ export const SqlDataLoaderView = React.memo(function
SqlDataLoaderView(
{ inputSource, inputFormat, signature },
isArrays,
timeExpression,
+ undefined,
),
).toString(),
});
diff --git
a/web-console/src/views/workbench-view/connect-external-data-dialog/connect-external-data-dialog.tsx
b/web-console/src/views/workbench-view/connect-external-data-dialog/connect-external-data-dialog.tsx
index d857d37dbf..e043cf1340 100644
---
a/web-console/src/views/workbench-view/connect-external-data-dialog/connect-external-data-dialog.tsx
+++
b/web-console/src/views/workbench-view/connect-external-data-dialog/connect-external-data-dialog.tsx
@@ -20,7 +20,7 @@ import { Classes, Dialog } from '@blueprintjs/core';
import { SqlExpression } from 'druid-query-toolkit';
import React, { useState } from 'react';
-import { ExternalConfig } from '../../../druid-models';
+import { ExternalConfig, InputFormat, InputSource } from
'../../../druid-models';
import { InputFormatStep } from '../input-format-step/input-format-step';
import { InputSourceStep } from '../input-source-step/input-source-step';
@@ -32,20 +32,27 @@ export interface ConnectExternalDataDialogProps {
config: ExternalConfig,
isArrays: boolean[],
timeExpression: SqlExpression | undefined,
+ partitionedByHint: string | undefined,
): void;
onClose(): void;
}
+interface ExternalConfigStep {
+ inputSource?: InputSource;
+ inputFormat?: InputFormat;
+ partitionedByHint?: string;
+}
+
export const ConnectExternalDataDialog = React.memo(function
ConnectExternalDataDialog(
props: ConnectExternalDataDialogProps,
) {
const { initExternalConfig, onClose, onSetExternalConfig } = props;
- const [externalConfigStep, setExternalConfigStep] =
useState<Partial<ExternalConfig>>(
+ const [externalConfigStep, setExternalConfigStep] =
useState<ExternalConfigStep>(
initExternalConfig || {},
);
- const { inputSource, inputFormat } = externalConfigStep;
+ const { inputSource, inputFormat, partitionedByHint } = externalConfigStep;
return (
<Dialog
@@ -65,6 +72,7 @@ export const ConnectExternalDataDialog = React.memo(function
ConnectExternalData
{ inputSource, inputFormat, signature },
isArrays,
timeExpression,
+ partitionedByHint,
);
onClose();
}}
@@ -76,8 +84,8 @@ export const ConnectExternalDataDialog = React.memo(function
ConnectExternalData
<InputSourceStep
initInputSource={inputSource}
mode="sampler"
- onSet={(inputSource, inputFormat) => {
- setExternalConfigStep({ inputSource, inputFormat });
+ onSet={(inputSource, inputFormat, partitionedByHint) => {
+ setExternalConfigStep({ inputSource, inputFormat,
partitionedByHint });
}}
/>
)}
diff --git
a/web-console/src/views/workbench-view/input-source-step/example-inputs.ts
b/web-console/src/views/workbench-view/input-source-step/example-inputs.ts
index a74f1754b1..a6ad104c7f 100644
--- a/web-console/src/views/workbench-view/input-source-step/example-inputs.ts
+++ b/web-console/src/views/workbench-view/input-source-step/example-inputs.ts
@@ -23,6 +23,7 @@ export interface ExampleInput {
description: string;
inputSource: InputSource;
inputFormat?: InputFormat;
+ partitionedByHint?: string;
}
const TRIPS_INPUT_FORMAT: InputFormat = {
@@ -122,6 +123,7 @@ export const EXAMPLE_INPUTS: ExampleInput[] = [
],
},
inputFormat: TRIPS_INPUT_FORMAT,
+ partitionedByHint: 'month',
},
{
name: 'NYC Taxi cabs (all files)',
@@ -206,6 +208,7 @@ export const EXAMPLE_INPUTS: ExampleInput[] = [
],
},
inputFormat: TRIPS_INPUT_FORMAT,
+ partitionedByHint: 'month',
},
{
name: 'FlightCarrierOnTime (1 month)',
diff --git
a/web-console/src/views/workbench-view/input-source-step/input-source-step.tsx
b/web-console/src/views/workbench-view/input-source-step/input-source-step.tsx
index f144e8f975..9ea55fd0d1 100644
---
a/web-console/src/views/workbench-view/input-source-step/input-source-step.tsx
+++
b/web-console/src/views/workbench-view/input-source-step/input-source-step.tsx
@@ -71,7 +71,11 @@ const ROWS_TO_SAMPLE = 50;
export interface InputSourceStepProps {
initInputSource: Partial<InputSource> | undefined;
mode: 'sampler' | 'msq';
- onSet(inputSource: InputSource, inputFormat: InputFormat): void;
+ onSet(
+ inputSource: InputSource,
+ inputFormat: InputFormat,
+ partitionedByHint: string | undefined,
+ ): void;
}
export const InputSourceStep = React.memo(function InputSourceStep(props:
InputSourceStepProps) {
@@ -169,7 +173,11 @@ export const InputSourceStep = React.memo(function
InputSourceStep(props: InputS
useEffect(() => {
const guessedInputFormat = guessedInputFormatState.data;
if (!guessedInputFormat) return;
- onSet(exampleInput?.inputSource || (inputSource as any),
guessedInputFormat);
+ onSet(
+ exampleInput?.inputSource || (inputSource as any),
+ guessedInputFormat,
+ exampleInput?.partitionedByHint,
+ );
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [guessedInputFormatState]);
diff --git a/web-console/src/views/workbench-view/workbench-view.tsx
b/web-console/src/views/workbench-view/workbench-view.tsx
index 5d601d2fb6..56af602e43 100644
--- a/web-console/src/views/workbench-view/workbench-view.tsx
+++ b/web-console/src/views/workbench-view/workbench-view.tsx
@@ -324,9 +324,14 @@ export class WorkbenchView extends
React.PureComponent<WorkbenchViewProps, Workb
return (
<ConnectExternalDataDialog
- onSetExternalConfig={(externalConfig, isArrays, timeExpression) => {
+ onSetExternalConfig={(externalConfig, isArrays, timeExpression,
partitionedByHint) => {
this.handleNewTab(
- WorkbenchQuery.fromInitExternalConfig(externalConfig, isArrays,
timeExpression),
+ WorkbenchQuery.fromInitExternalConfig(
+ externalConfig,
+ isArrays,
+ timeExpression,
+ partitionedByHint,
+ ),
'Ext ' +
guessDataSourceNameFromInputSource(externalConfig.inputSource),
);
}}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]