This is an automated email from the ASF dual-hosted git repository.

vogievetsky pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new 9679f6a9b5 Web console: add arrayOfDoublesSketch and other small fixes 
(#13486)
9679f6a9b5 is described below

commit 9679f6a9b5c84a7618529f1739a6256e04c9da56
Author: Vadim Ogievetsky <[email protected]>
AuthorDate: Tue Dec 6 21:21:49 2022 -0800

    Web console: add arrayOfDoublesSketch and other small fixes (#13486)
    
    * add padding and keywords
    
    * add arrayOfDoubles
    
    * Update docs/development/extensions-core/datasketches-tuple.md
    
    Co-authored-by: Charles Smith <[email protected]>
    
    * Update docs/development/extensions-core/datasketches-tuple.md
    
    Co-authored-by: Charles Smith <[email protected]>
    
    * Update docs/development/extensions-core/datasketches-tuple.md
    
    Co-authored-by: Charles Smith <[email protected]>
    
    * Update docs/development/extensions-core/datasketches-tuple.md
    
    Co-authored-by: Charles Smith <[email protected]>
    
    * Update docs/development/extensions-core/datasketches-tuple.md
    
    Co-authored-by: Charles Smith <[email protected]>
    
    * partiton int
    
    * fix docs
    
    Co-authored-by: Charles Smith <[email protected]>
---
 .../extensions-core/datasketches-tuple.md          | 43 +++++++++++++++++++---
 web-console/lib/keywords.js                        |  3 ++
 web-console/script/create-sql-docs.js              |  4 +-
 web-console/src/bootstrap/react-table-defaults.tsx |  4 +-
 .../segment-timeline/segment-timeline.tsx          |  2 +-
 .../ingest-query-pattern/ingest-query-pattern.ts   |  3 +-
 .../src/druid-models/metric-spec/metric-spec.tsx   | 43 ++++++++++++++++++++++
 .../workbench-query/workbench-query.ts             |  8 +++-
 .../src/helpers/execution/sql-task-execution.ts    |  9 ++++-
 web-console/src/react-table/react-table-extra.scss |  4 ++
 .../sql-data-loader-view/sql-data-loader-view.tsx  |  2 +
 .../connect-external-data-dialog.tsx               | 18 ++++++---
 .../input-source-step/example-inputs.ts            |  3 ++
 .../input-source-step/input-source-step.tsx        | 12 +++++-
 .../src/views/workbench-view/workbench-view.tsx    |  9 ++++-
 15 files changed, 143 insertions(+), 24 deletions(-)

diff --git a/docs/development/extensions-core/datasketches-tuple.md 
b/docs/development/extensions-core/datasketches-tuple.md
index fc4f74d5c8..c9a05b5ab1 100644
--- a/docs/development/extensions-core/datasketches-tuple.md
+++ b/docs/development/extensions-core/datasketches-tuple.md
@@ -39,19 +39,52 @@ druid.extensions.loadList=["druid-datasketches"]
   "name" : <output_name>,
   "fieldName" : <metric_name>,
   "nominalEntries": <number>,
-  "numberOfValues" : <number>,
-  "metricColumns" : <array of strings>
+  "metricColumns" : <array of strings>,
+  "numberOfValues" : <number>
  }
 ```
 
 |property|description|required?|
 |--------|-----------|---------|
 |type|This String should always be "arrayOfDoublesSketch"|yes|
-|name|A String for the output (result) name of the calculation.|yes|
+|name|String representing the output column to store sketch values.|yes|
 |fieldName|A String for the name of the input field.|yes|
 |nominalEntries|Parameter that determines the accuracy and size of the sketch. 
Higher k means higher accuracy but more space to store sketches. Must be a 
power of 2. See the [Theta sketch 
accuracy](https://datasketches.apache.org/docs/Theta/ThetaErrorTable) for 
details. |no, defaults to 16384|
-|numberOfValues|Number of values associated with each distinct key. |no, 
defaults to 1|
-|metricColumns|If building sketches from raw data, an array of names of the 
input columns containing numeric values to be associated with each distinct 
key.|no, defaults to empty array|
+|metricColumns|When building sketches from raw data, an array input column 
that contain numeric values to associate with each distinct key. If not 
provided, assumes `fieldName` is an `arrayOfDoublesSketch`|no, if not provided 
`fieldName` is assumed to be an arrayOfDoublesSketch|
+|numberOfValues|Number of values associated with each distinct key. |no, 
defaults to the length of `metricColumns` if provided and 1 otherwise|
+
+You can use the `arrayOfDoublesSketch` aggregator to:
+
+- Build a sketch from raw data. In this case, set `metricColumns` to an array.
+- Build a sketch from an existing `ArrayOfDoubles` sketch . In this case, 
leave `metricColumns` unset and set the `fieldName` to an `ArrayOfDoubles` 
sketch with `numberOfValues` doubles. At ingestion time, you must base64 encode 
`ArrayOfDoubles`  sketches at ingestion time.
+
+#### Example on top of raw data
+
+Compute a theta of unique users. For each user store the `added` and `deleted` 
scores. The new sketch column will be called `users_theta`.
+
+```json
+{
+  "type": "arrayOfDoublesSketch",
+  "name": "users_theta",
+  "fieldName": "user",
+  "nominalEntries": 16384,
+  "metricColumns": ["added", "deleted"],
+}
+```
+
+#### Example ingesting a precomputed sketch column
+
+Ingest a sketch column called `user_sketches` that has a base64 encoded value 
of two doubles in its array and store it in a column called `users_theta`.
+
+```json
+{
+  "type": "arrayOfDoublesSketch",
+  "name": "users_theta",
+  "fieldName": "user_sketches",
+  "nominalEntries": 16384,
+  "numberOfValues": 2,
+}
+```
 
 ### Post Aggregators
 
diff --git a/web-console/lib/keywords.js b/web-console/lib/keywords.js
index e34b2daf45..bc81153dd7 100644
--- a/web-console/lib/keywords.js
+++ b/web-console/lib/keywords.js
@@ -61,6 +61,9 @@ exports.SQL_KEYWORDS = [
   'REPLACE INTO',
   'OVERWRITE',
   'RETURNING',
+  'OVER',
+  'PARTITION BY',
+  'WINDOW',
 ];
 
 exports.SQL_EXPRESSION_PARTS = [
diff --git a/web-console/script/create-sql-docs.js 
b/web-console/script/create-sql-docs.js
index 6af65006f8..13ed438915 100755
--- a/web-console/script/create-sql-docs.js
+++ b/web-console/script/create-sql-docs.js
@@ -52,9 +52,7 @@ function convertMarkdownToHtml(markdown) {
   // Concert to markdown
   markdown = snarkdown(markdown);
 
-  return markdown
-    .replace(/<br \/>/g, '<br /><br />') // Double up the <br>s
-    .replace(/<a[^>]*>(.*?)<\/a>/g, '$1'); // Remove links
+  return markdown.replace(/<a[^>]*>(.*?)<\/a>/g, '$1'); // Remove links
 }
 
 const readDoc = async () => {
diff --git a/web-console/src/bootstrap/react-table-defaults.tsx 
b/web-console/src/bootstrap/react-table-defaults.tsx
index 4c31928064..139a13bcd5 100644
--- a/web-console/src/bootstrap/react-table-defaults.tsx
+++ b/web-console/src/bootstrap/react-table-defaults.tsx
@@ -53,12 +53,12 @@ export function bootstrapReactTable() {
         .map((row: any) => row[column.id]);
       const previewCount = countBy(previewValues);
       return (
-        <span>
+        <div className="default-aggregated">
           {Object.keys(previewCount)
             .sort()
             .map(v => `${v} (${previewCount[v]})`)
             .join(', ')}
-        </span>
+        </div>
       );
     },
     defaultPageSize: 20,
diff --git a/web-console/src/components/segment-timeline/segment-timeline.tsx 
b/web-console/src/components/segment-timeline/segment-timeline.tsx
index c138e82dff..f8cef06189 100644
--- a/web-console/src/components/segment-timeline/segment-timeline.tsx
+++ b/web-console/src/components/segment-timeline/segment-timeline.tsx
@@ -278,7 +278,7 @@ ORDER BY "start" DESC`;
             intervals = await queryDruidSql({
               query: SegmentTimeline.getSqlQuery(startDate, endDate),
             });
-            datasources = uniq(intervals.map(r => r.datasource));
+            datasources = uniq(intervals.map(r => r.datasource).sort());
           } else if (capabilities.hasCoordinatorAccess()) {
             const startIso = startDate.toISOString();
 
diff --git 
a/web-console/src/druid-models/ingest-query-pattern/ingest-query-pattern.ts 
b/web-console/src/druid-models/ingest-query-pattern/ingest-query-pattern.ts
index f4dee926b6..7bdcaae50a 100644
--- a/web-console/src/druid-models/ingest-query-pattern/ingest-query-pattern.ts
+++ b/web-console/src/druid-models/ingest-query-pattern/ingest-query-pattern.ts
@@ -63,6 +63,7 @@ export function externalConfigToIngestQueryPattern(
   config: ExternalConfig,
   isArrays: boolean[],
   timeExpression: SqlExpression | undefined,
+  partitionedByHint: string | undefined,
 ): IngestQueryPattern {
   return {
     destinationTableName: 
guessDataSourceNameFromInputSource(config.inputSource) || 'data',
@@ -71,7 +72,7 @@ export function externalConfigToIngestQueryPattern(
     mainExternalConfig: config,
     filters: [],
     dimensions: externalConfigToInitDimensions(config, isArrays, 
timeExpression),
-    partitionedBy: timeExpression ? 'day' : 'all',
+    partitionedBy: partitionedByHint || (timeExpression ? 'day' : 'all'),
     clusteredBy: [],
   };
 }
diff --git a/web-console/src/druid-models/metric-spec/metric-spec.tsx 
b/web-console/src/druid-models/metric-spec/metric-spec.tsx
index 6b3290272e..4295310486 100644
--- a/web-console/src/druid-models/metric-spec/metric-spec.tsx
+++ b/web-console/src/druid-models/metric-spec/metric-spec.tsx
@@ -78,6 +78,7 @@ export const METRIC_SPEC_FIELDS: Field<MetricSpec>[] = [
       // Should the first / last aggregators become usable at ingestion time, 
reverse the changes made in:
       // https://github.com/apache/druid/pull/10794
       'thetaSketch',
+      'arrayOfDoublesSketch',
       {
         group: 'HLLSketch',
         suggestions: ['HLLSketchBuild', 'HLLSketchMerge'],
@@ -104,6 +105,7 @@ export const METRIC_SPEC_FIELDS: Field<MetricSpec>[] = [
       'doubleMax',
       'floatMax',
       'thetaSketch',
+      'arrayOfDoublesSketch',
       'HLLSketchBuild',
       'HLLSketchMerge',
       'quantilesDoublesSketch',
@@ -178,6 +180,47 @@ export const METRIC_SPEC_FIELDS: Field<MetricSpec>[] = [
       </>
     ),
   },
+  // arrayOfDoublesSketch
+  {
+    name: 'nominalEntries',
+    type: 'number',
+    defined: typeIs('arrayOfDoublesSketch'),
+    defaultValue: 16384,
+    info: (
+      <>
+        <p>
+          Parameter that determines the accuracy and size of the sketch. 
Higher k means higher
+          accuracy but more space to store sketches.
+        </p>
+        <p>Must be a power of 2.</p>
+        <p>
+          See the{' '}
+          <ExternalLink 
href="https://datasketches.apache.org/docs/Theta/ThetaErrorTable";>
+            Theta sketch accuracy
+          </ExternalLink>{' '}
+          for details.
+        </p>
+      </>
+    ),
+  },
+  {
+    name: 'metricColumns',
+    type: 'string-array',
+    defined: typeIs('arrayOfDoublesSketch'),
+    info: (
+      <>
+        If building sketches from raw data, an array of names of the input 
columns containing
+        numeric values to be associated with each distinct key.
+      </>
+    ),
+  },
+  {
+    name: 'numberOfValues',
+    type: 'number',
+    defined: typeIs('arrayOfDoublesSketch'),
+    placeholder: 'metricColumns length or 1',
+    info: <>Number of values associated with each distinct key.</>,
+  },
   // HLLSketchBuild & HLLSketchMerge
   {
     name: 'lgK',
diff --git a/web-console/src/druid-models/workbench-query/workbench-query.ts 
b/web-console/src/druid-models/workbench-query/workbench-query.ts
index 36c71cb07a..b6a1f74aa1 100644
--- a/web-console/src/druid-models/workbench-query/workbench-query.ts
+++ b/web-console/src/druid-models/workbench-query/workbench-query.ts
@@ -82,13 +82,19 @@ export class WorkbenchQuery {
     externalConfig: ExternalConfig,
     isArrays: boolean[],
     timeExpression: SqlExpression | undefined,
+    partitionedByHint: string | undefined,
   ): WorkbenchQuery {
     return new WorkbenchQuery({
       queryContext: {},
       queryParts: [
         WorkbenchQueryPart.fromQueryString(
           ingestQueryPatternToQuery(
-            externalConfigToIngestQueryPattern(externalConfig, isArrays, 
timeExpression),
+            externalConfigToIngestQueryPattern(
+              externalConfig,
+              isArrays,
+              timeExpression,
+              partitionedByHint,
+            ),
           ).toString(),
         ),
       ],
diff --git a/web-console/src/helpers/execution/sql-task-execution.ts 
b/web-console/src/helpers/execution/sql-task-execution.ts
index e7f7250c53..358eee25c5 100644
--- a/web-console/src/helpers/execution/sql-task-execution.ts
+++ b/web-console/src/helpers/execution/sql-task-execution.ts
@@ -124,9 +124,14 @@ export async function reattachTaskExecution(
   option: ReattachTaskQueryOptions,
 ): Promise<Execution | IntermediateQueryState<Execution>> {
   const { id, cancelToken, preserveOnTermination } = option;
-  let execution = await getTaskExecution(id, undefined, cancelToken);
+  let execution: Execution;
 
-  execution = await updateExecutionWithDatasourceExistsIfNeeded(execution, 
cancelToken);
+  try {
+    execution = await getTaskExecution(id, undefined, cancelToken);
+    execution = await updateExecutionWithDatasourceExistsIfNeeded(execution, 
cancelToken);
+  } catch (e) {
+    throw new Error(`Reattaching to query failed due to: ${e.message}`);
+  }
 
   if (execution.isFullyComplete()) return execution;
 
diff --git a/web-console/src/react-table/react-table-extra.scss 
b/web-console/src/react-table/react-table-extra.scss
index d87c25c84d..bdeecf5e96 100644
--- a/web-console/src/react-table/react-table-extra.scss
+++ b/web-console/src/react-table/react-table-extra.scss
@@ -45,4 +45,8 @@
       }
     }
   }
+
+  .default-aggregated {
+    padding: 10px 5px;
+  }
 }
diff --git 
a/web-console/src/views/sql-data-loader-view/sql-data-loader-view.tsx 
b/web-console/src/views/sql-data-loader-view/sql-data-loader-view.tsx
index 23c81d02d2..2b3126372a 100644
--- a/web-console/src/views/sql-data-loader-view/sql-data-loader-view.tsx
+++ b/web-console/src/views/sql-data-loader-view/sql-data-loader-view.tsx
@@ -151,6 +151,7 @@ export const SqlDataLoaderView = React.memo(function 
SqlDataLoaderView(
                     { inputSource, inputFormat, signature },
                     isArrays,
                     timeExpression,
+                    undefined,
                   ),
                 ).toString(),
                 queryContext: {
@@ -167,6 +168,7 @@ export const SqlDataLoaderView = React.memo(function 
SqlDataLoaderView(
                     { inputSource, inputFormat, signature },
                     isArrays,
                     timeExpression,
+                    undefined,
                   ),
                 ).toString(),
               });
diff --git 
a/web-console/src/views/workbench-view/connect-external-data-dialog/connect-external-data-dialog.tsx
 
b/web-console/src/views/workbench-view/connect-external-data-dialog/connect-external-data-dialog.tsx
index d857d37dbf..e043cf1340 100644
--- 
a/web-console/src/views/workbench-view/connect-external-data-dialog/connect-external-data-dialog.tsx
+++ 
b/web-console/src/views/workbench-view/connect-external-data-dialog/connect-external-data-dialog.tsx
@@ -20,7 +20,7 @@ import { Classes, Dialog } from '@blueprintjs/core';
 import { SqlExpression } from 'druid-query-toolkit';
 import React, { useState } from 'react';
 
-import { ExternalConfig } from '../../../druid-models';
+import { ExternalConfig, InputFormat, InputSource } from 
'../../../druid-models';
 import { InputFormatStep } from '../input-format-step/input-format-step';
 import { InputSourceStep } from '../input-source-step/input-source-step';
 
@@ -32,20 +32,27 @@ export interface ConnectExternalDataDialogProps {
     config: ExternalConfig,
     isArrays: boolean[],
     timeExpression: SqlExpression | undefined,
+    partitionedByHint: string | undefined,
   ): void;
   onClose(): void;
 }
 
+interface ExternalConfigStep {
+  inputSource?: InputSource;
+  inputFormat?: InputFormat;
+  partitionedByHint?: string;
+}
+
 export const ConnectExternalDataDialog = React.memo(function 
ConnectExternalDataDialog(
   props: ConnectExternalDataDialogProps,
 ) {
   const { initExternalConfig, onClose, onSetExternalConfig } = props;
 
-  const [externalConfigStep, setExternalConfigStep] = 
useState<Partial<ExternalConfig>>(
+  const [externalConfigStep, setExternalConfigStep] = 
useState<ExternalConfigStep>(
     initExternalConfig || {},
   );
 
-  const { inputSource, inputFormat } = externalConfigStep;
+  const { inputSource, inputFormat, partitionedByHint } = externalConfigStep;
 
   return (
     <Dialog
@@ -65,6 +72,7 @@ export const ConnectExternalDataDialog = React.memo(function 
ConnectExternalData
                 { inputSource, inputFormat, signature },
                 isArrays,
                 timeExpression,
+                partitionedByHint,
               );
               onClose();
             }}
@@ -76,8 +84,8 @@ export const ConnectExternalDataDialog = React.memo(function 
ConnectExternalData
           <InputSourceStep
             initInputSource={inputSource}
             mode="sampler"
-            onSet={(inputSource, inputFormat) => {
-              setExternalConfigStep({ inputSource, inputFormat });
+            onSet={(inputSource, inputFormat, partitionedByHint) => {
+              setExternalConfigStep({ inputSource, inputFormat, 
partitionedByHint });
             }}
           />
         )}
diff --git 
a/web-console/src/views/workbench-view/input-source-step/example-inputs.ts 
b/web-console/src/views/workbench-view/input-source-step/example-inputs.ts
index a74f1754b1..a6ad104c7f 100644
--- a/web-console/src/views/workbench-view/input-source-step/example-inputs.ts
+++ b/web-console/src/views/workbench-view/input-source-step/example-inputs.ts
@@ -23,6 +23,7 @@ export interface ExampleInput {
   description: string;
   inputSource: InputSource;
   inputFormat?: InputFormat;
+  partitionedByHint?: string;
 }
 
 const TRIPS_INPUT_FORMAT: InputFormat = {
@@ -122,6 +123,7 @@ export const EXAMPLE_INPUTS: ExampleInput[] = [
       ],
     },
     inputFormat: TRIPS_INPUT_FORMAT,
+    partitionedByHint: 'month',
   },
   {
     name: 'NYC Taxi cabs (all files)',
@@ -206,6 +208,7 @@ export const EXAMPLE_INPUTS: ExampleInput[] = [
       ],
     },
     inputFormat: TRIPS_INPUT_FORMAT,
+    partitionedByHint: 'month',
   },
   {
     name: 'FlightCarrierOnTime (1 month)',
diff --git 
a/web-console/src/views/workbench-view/input-source-step/input-source-step.tsx 
b/web-console/src/views/workbench-view/input-source-step/input-source-step.tsx
index f144e8f975..9ea55fd0d1 100644
--- 
a/web-console/src/views/workbench-view/input-source-step/input-source-step.tsx
+++ 
b/web-console/src/views/workbench-view/input-source-step/input-source-step.tsx
@@ -71,7 +71,11 @@ const ROWS_TO_SAMPLE = 50;
 export interface InputSourceStepProps {
   initInputSource: Partial<InputSource> | undefined;
   mode: 'sampler' | 'msq';
-  onSet(inputSource: InputSource, inputFormat: InputFormat): void;
+  onSet(
+    inputSource: InputSource,
+    inputFormat: InputFormat,
+    partitionedByHint: string | undefined,
+  ): void;
 }
 
 export const InputSourceStep = React.memo(function InputSourceStep(props: 
InputSourceStepProps) {
@@ -169,7 +173,11 @@ export const InputSourceStep = React.memo(function 
InputSourceStep(props: InputS
   useEffect(() => {
     const guessedInputFormat = guessedInputFormatState.data;
     if (!guessedInputFormat) return;
-    onSet(exampleInput?.inputSource || (inputSource as any), 
guessedInputFormat);
+    onSet(
+      exampleInput?.inputSource || (inputSource as any),
+      guessedInputFormat,
+      exampleInput?.partitionedByHint,
+    );
     // eslint-disable-next-line react-hooks/exhaustive-deps
   }, [guessedInputFormatState]);
 
diff --git a/web-console/src/views/workbench-view/workbench-view.tsx 
b/web-console/src/views/workbench-view/workbench-view.tsx
index 5d601d2fb6..56af602e43 100644
--- a/web-console/src/views/workbench-view/workbench-view.tsx
+++ b/web-console/src/views/workbench-view/workbench-view.tsx
@@ -324,9 +324,14 @@ export class WorkbenchView extends 
React.PureComponent<WorkbenchViewProps, Workb
 
     return (
       <ConnectExternalDataDialog
-        onSetExternalConfig={(externalConfig, isArrays, timeExpression) => {
+        onSetExternalConfig={(externalConfig, isArrays, timeExpression, 
partitionedByHint) => {
           this.handleNewTab(
-            WorkbenchQuery.fromInitExternalConfig(externalConfig, isArrays, 
timeExpression),
+            WorkbenchQuery.fromInitExternalConfig(
+              externalConfig,
+              isArrays,
+              timeExpression,
+              partitionedByHint,
+            ),
             'Ext ' + 
guessDataSourceNameFromInputSource(externalConfig.inputSource),
           );
         }}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to