Re: [PR] Web console: improve SQL autocomplete and add JSON autocomplete (druid)

via GitHub Fri, 20 Jun 2025 12:44:06 -0700


vogievetsky commented on code in PR #18126:
URL: https://github.com/apache/druid/pull/18126#discussion_r2159574384



##########
web-console/src/views/load-data-view/ingestion-spec-completions.ts:
##########
@@ -0,0 +1,831 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import type { JsonCompletionRule } from '../../utils';
+
+/**
+ * Determines if a type is a supervisor type based on the documentation:
+ * "The supervisor type. For streaming ingestion, this can be either kafka, 
kinesis, or rabbit.
+ * For automatic compaction, set the type to autocompact."
+ */
+function isSupervisorType(type: string): boolean {
+  return type === 'kafka' || type === 'kinesis' || type === 'rabbit' || type 
=== 'autocompact';
+}
+
+export const INGESTION_SPEC_COMPLETIONS: JsonCompletionRule[] = [
+  // Root level properties (task and supervisor specs)
+  {
+    path: '$',
+    isObject: true,
+    completions: [
+      { value: 'type', documentation: 'Type of ingestion task or supervisor' },
+      { value: 'spec', documentation: 'Specification for the ingestion task or 
supervisor' },
+    ],
+  },
+  // Supervisor-only root level properties
+  {
+    path: '$',
+    isObject: true,
+    condition: obj => isSupervisorType(obj.type),
+    completions: [
+      {
+        value: 'suspended',
+        documentation: 'Whether the supervisor is suspended (supervisor only)',
+      },
+    ],
+  },
+  // Type values for tasks
+  {
+    path: '$.type',
+    completions: [
+      { value: 'index_parallel', documentation: 'Native batch ingestion 
(parallel)' },
+      { value: 'index', documentation: 'Native batch ingestion (single task)' 
},
+      { value: 'index_hadoop', documentation: 'Hadoop-based batch ingestion' },
+      { value: 'kafka', documentation: 'Kafka supervisor for streaming 
ingestion' },
+      { value: 'kinesis', documentation: 'Kinesis supervisor for streaming 
ingestion' },
+      { value: 'rabbit', documentation: 'RabbitMQ supervisor for streaming 
ingestion' },
+      { value: 'autocompact', documentation: 'Auto-compaction supervisor' },
+    ],
+  },
+  // suspended values (supervisor only)
+  {
+    path: '$.suspended',
+    completions: [
+      { value: 'false', documentation: 'Supervisor is running (default)' },
+      { value: 'true', documentation: 'Supervisor is suspended' },
+    ],
+  },
+  // spec object properties
+  {
+    path: '$.spec',
+    isObject: true,
+    completions: [
+      { value: 'dataSchema', documentation: 'Schema configuration for 
ingestion' },
+      { value: 'ioConfig', documentation: 'Input/output configuration' },
+      { value: 'tuningConfig', documentation: 'Performance and tuning 
configuration' },
+    ],
+  },
+  // dataSchema object properties
+  {
+    path: '$.spec.dataSchema',
+    isObject: true,
+    completions: [
+      { value: 'dataSource', documentation: 'Name of the datasource to ingest 
into' },
+      { value: 'timestampSpec', documentation: 'Primary timestamp 
configuration' },
+      { value: 'dimensionsSpec', documentation: 'Dimensions configuration' },
+      { value: 'metricsSpec', documentation: 'Metrics and aggregators 
configuration' },
+      { value: 'granularitySpec', documentation: 'Granularity and rollup 
configuration' },
+      { value: 'transformSpec', documentation: 'Transform and filter 
configuration' },
+    ],
+  },
+  // timestampSpec object properties
+  {
+    path: '$.spec.dataSchema.timestampSpec',
+    isObject: true,
+    completions: [
+      { value: 'column', documentation: 'Input column containing the 
timestamp' },
+      { value: 'format', documentation: 'Format of the timestamp' },
+      { value: 'missingValue', documentation: 'Default timestamp for missing 
values' },
+    ],
+  },
+  // timestampSpec.format values
+  {
+    path: '$.spec.dataSchema.timestampSpec.format',
+    completions: [
+      { value: 'auto', documentation: 'Automatically detect ISO or millis 
format (default)' },
+      { value: 'iso', documentation: 'ISO8601 format with T separator' },
+      { value: 'posix', documentation: 'Seconds since epoch' },
+      { value: 'millis', documentation: 'Milliseconds since epoch' },
+      { value: 'micro', documentation: 'Microseconds since epoch' },
+      { value: 'nano', documentation: 'Nanoseconds since epoch' },
+      { value: 'yyyy-MM-dd HH:mm:ss', documentation: 'Custom Joda format' },
+      { value: 'yyyy-MM-dd', documentation: 'Date only format' },
+    ],
+  },
+  // timestampSpec.column common values
+  {
+    path: '$.spec.dataSchema.timestampSpec.column',
+    completions: [
+      { value: 'timestamp', documentation: 'Default timestamp column name' },
+      { value: '__time', documentation: 'Druid internal timestamp column' },
+      { value: 'time', documentation: 'Common timestamp column name' },
+      { value: 'event_time', documentation: 'Common event time column name' },
+      { value: 'created_at', documentation: 'Common creation time column name' 
},
+    ],
+  },
+  // dimensionsSpec object properties
+  {
+    path: '$.spec.dataSchema.dimensionsSpec',
+    isObject: true,
+    completions: [
+      { value: 'dimensions', documentation: 'List of dimension specifications' 
},
+      { value: 'dimensionExclusions', documentation: 'Dimensions to exclude 
from ingestion' },
+      { value: 'spatialDimensions', documentation: 'Spatial dimension 
specifications' },
+      { value: 'includeAllDimensions', documentation: 'Include all discovered 
dimensions' },
+      { value: 'useSchemaDiscovery', documentation: 'Enable automatic schema 
discovery' },
+      {
+        value: 'forceSegmentSortByTime',
+        documentation: 'Force segments to be sorted by time first',
+      },
+    ],
+  },
+  // useSchemaDiscovery values
+  {
+    path: '$.spec.dataSchema.dimensionsSpec.useSchemaDiscovery',
+    completions: [
+      { value: 'true', documentation: 'Enable automatic type detection and 
schema discovery' },
+      { value: 'false', documentation: 'Use manual dimension specification 
(default)' },
+    ],
+  },
+  // includeAllDimensions values
+  {
+    path: '$.spec.dataSchema.dimensionsSpec.includeAllDimensions',
+    completions: [
+      { value: 'true', documentation: 'Include both explicit and discovered 
dimensions' },
+      { value: 'false', documentation: 'Include only explicit dimensions 
(default)' },
+    ],
+  },
+  // forceSegmentSortByTime values
+  {
+    path: '$.spec.dataSchema.dimensionsSpec.forceSegmentSortByTime',
+    completions: [
+      { value: 'true', documentation: 'Sort by __time first, then dimensions 
(default)' },
+      { value: 'false', documentation: 'Sort by dimensions only 
(experimental)' },
+    ],
+  },
+  // dimension object properties
+  {
+    path: '$.spec.dataSchema.dimensionsSpec.dimensions.[]',
+    isObject: true,
+    completions: [
+      { value: 'type', documentation: 'Type of the dimension' },
+      { value: 'name', documentation: 'Name of the dimension' },
+      { value: 'createBitmapIndex', documentation: 'Whether to create bitmap 
index (string only)' },
+      {
+        value: 'multiValueHandling',
+        documentation: 'How to handle multi-value fields (string only)',
+      },
+    ],
+  },
+  // dimension type values
+  {
+    path: '$.spec.dataSchema.dimensionsSpec.dimensions.[].type',
+    completions: [
+      { value: 'auto', documentation: 'Automatically detect type (schema 
discovery)' },
+      { value: 'string', documentation: 'String dimension (default)' },
+      { value: 'long', documentation: 'Long integer dimension' },
+      { value: 'float', documentation: 'Float dimension' },
+      { value: 'double', documentation: 'Double precision dimension' },
+      { value: 'json', documentation: 'JSON/nested data dimension' },
+    ],
+  },
+  // createBitmapIndex values
+  {
+    path: '$.spec.dataSchema.dimensionsSpec.dimensions.[].createBitmapIndex',
+    completions: [
+      { value: 'true', documentation: 'Create bitmap index (default, faster 
filtering)' },
+      { value: 'false', documentation: 'No bitmap index (saves storage)' },
+    ],
+  },
+  // multiValueHandling values
+  {
+    path: '$.spec.dataSchema.dimensionsSpec.dimensions.[].multiValueHandling',
+    completions: [
+      { value: 'sorted_array', documentation: 'Sort multi-values (default)' },
+      { value: 'sorted_set', documentation: 'Sort and deduplicate 
multi-values' },
+      { value: 'array', documentation: 'Keep multi-values as-is' },
+    ],
+  },
+  // metricsSpec array properties
+  {
+    path: '$.spec.dataSchema.metricsSpec.[]',
+    isObject: true,
+    completions: [
+      { value: 'type', documentation: 'Type of aggregator' },
+      { value: 'name', documentation: 'Name of the metric' },
+      { value: 'fieldName', documentation: 'Input field to aggregate' },
+    ],
+  },
+  // metric aggregator types
+  {
+    path: '$.spec.dataSchema.metricsSpec.[].type',
+    completions: [
+      { value: 'count', documentation: 'Count of rows' },
+      { value: 'longSum', documentation: 'Sum of long values' },
+      { value: 'doubleSum', documentation: 'Sum of double values' },
+      { value: 'longMin', documentation: 'Minimum of long values' },
+      { value: 'longMax', documentation: 'Maximum of long values' },
+      { value: 'doubleMin', documentation: 'Minimum of double values' },
+      { value: 'doubleMax', documentation: 'Maximum of double values' },
+      { value: 'longFirst', documentation: 'First long value seen' },
+      { value: 'longLast', documentation: 'Last long value seen' },
+      { value: 'doubleFirst', documentation: 'First double value seen' },
+      { value: 'doubleLast', documentation: 'Last double value seen' },
+      { value: 'thetaSketch', documentation: 'Theta sketch for approximate 
counting' },
+      { value: 'HLLSketchBuild', documentation: 'HyperLogLog sketch for 
cardinality' },
+      { value: 'quantilesDoublesSketch', documentation: 'Quantiles sketch for 
percentiles' },
+    ],
+  },
+  // granularitySpec object properties
+  {
+    path: '$.spec.dataSchema.granularitySpec',
+    isObject: true,
+    completions: [
+      { value: 'type', documentation: 'Type of granularity specification' },
+      { value: 'segmentGranularity', documentation: 'Granularity for segment 
partitioning' },
+      { value: 'queryGranularity', documentation: 'Granularity for timestamp 
truncation' },
+      { value: 'rollup', documentation: 'Whether to enable rollup 
(aggregation)' },
+      { value: 'intervals', documentation: 'Time intervals to process (batch 
only)' },
+    ],
+  },
+  // granularitySpec.type values
+  {
+    path: '$.spec.dataSchema.granularitySpec.type',
+    completions: [
+      { value: 'uniform', documentation: 'Uniform granularity (default)' },
+      { value: 'arbitrary', documentation: 'Arbitrary granularity (advanced)' 
},
+    ],
+  },
+  // segmentGranularity values
+  {
+    path: '$.spec.dataSchema.granularitySpec.segmentGranularity',
+    completions: [
+      { value: 'SECOND', documentation: 'Second-level segments' },
+      { value: 'MINUTE', documentation: 'Minute-level segments' },
+      { value: 'HOUR', documentation: 'Hourly segments' },
+      { value: 'DAY', documentation: 'Daily segments (common choice)' },
+      { value: 'WEEK', documentation: 'Weekly segments (not recommended)' },
+      { value: 'MONTH', documentation: 'Monthly segments' },
+      { value: 'QUARTER', documentation: 'Quarterly segments' },
+      { value: 'YEAR', documentation: 'Yearly segments' },
+      { value: 'ALL', documentation: 'Single segment for all data' },
+    ],
+  },
+  // queryGranularity values
+  {
+    path: '$.spec.dataSchema.granularitySpec.queryGranularity',
+    completions: [
+      { value: 'NONE', documentation: 'No truncation (millisecond precision)' 
},
+      { value: 'SECOND', documentation: 'Second-level truncation' },
+      { value: 'MINUTE', documentation: 'Minute-level truncation' },
+      { value: 'HOUR', documentation: 'Hour-level truncation' },
+      { value: 'DAY', documentation: 'Day-level truncation' },
+      { value: 'WEEK', documentation: 'Week-level truncation' },
+      { value: 'MONTH', documentation: 'Month-level truncation' },
+      { value: 'QUARTER', documentation: 'Quarter-level truncation' },
+      { value: 'YEAR', documentation: 'Year-level truncation' },
+    ],
+  },
+  // rollup values
+  {
+    path: '$.spec.dataSchema.granularitySpec.rollup',
+    completions: [
+      { value: 'true', documentation: 'Enable rollup (aggregate identical 
rows)' },
+      { value: 'false', documentation: 'Disable rollup (store raw data)' },
+    ],
+  },
+  // transformSpec object properties
+  {
+    path: '$.spec.dataSchema.transformSpec',
+    isObject: true,
+    completions: [
+      { value: 'transforms', documentation: 'List of transform expressions' },
+      { value: 'filter', documentation: 'Filter to apply during ingestion' },
+    ],
+  },
+  // transform object properties
+  {
+    path: '$.spec.dataSchema.transformSpec.transforms.[]',
+    isObject: true,
+    completions: [
+      { value: 'type', documentation: 'Type of transform' },
+      { value: 'name', documentation: 'Name of the output field' },
+      { value: 'expression', documentation: 'Transform expression' },
+    ],
+  },
+  // transform type values
+  {
+    path: '$.spec.dataSchema.transformSpec.transforms.[].type',
+    completions: [{ value: 'expression', documentation: 'Expression-based 
transform' }],
+  },
+  // filter object properties
+  {
+    path: '$.spec.dataSchema.transformSpec.filter',
+    isObject: true,
+    completions: [
+      { value: 'type', documentation: 'Type of filter' },
+      { value: 'dimension', documentation: 'Dimension to filter on' },
+      { value: 'value', documentation: 'Value to filter for' },
+      { value: 'values', documentation: 'List of values to filter for' },
+      { value: 'fields', documentation: 'List of sub-filters (logical 
filters)' },
+    ],
+  },
+  // filter type values
+  {
+    path: '$.spec.dataSchema.transformSpec.filter.type',
+    completions: [
+      { value: 'selector', documentation: 'Exact match filter' },
+      { value: 'in', documentation: 'Match any of multiple values' },
+      { value: 'like', documentation: 'Pattern matching filter' },
+      { value: 'regex', documentation: 'Regular expression filter' },
+      { value: 'range', documentation: 'Numeric range filter' },
+      { value: 'and', documentation: 'Logical AND filter' },
+      { value: 'or', documentation: 'Logical OR filter' },
+      { value: 'not', documentation: 'Logical NOT filter' },
+    ],
+  },
+  // ioConfig object properties (general)
+  {
+    path: '$.spec.ioConfig',
+    isObject: true,
+    completions: [
+      { value: 'type', documentation: 'Type of I/O configuration' },
+      { value: 'inputSource', documentation: 'Data input source configuration' 
},
+      { value: 'inputFormat', documentation: 'Input data format configuration' 
},
+    ],
+  },
+  // ioConfig type values
+  {
+    path: '$.spec.ioConfig.type',
+    completions: [
+      { value: 'index_parallel', documentation: 'Parallel batch ingestion' },
+      { value: 'index', documentation: 'Single task batch ingestion' },
+      { value: 'hadoop', documentation: 'Hadoop-based ingestion' },
+      { value: 'kafka', documentation: 'Kafka streaming ingestion' },
+      { value: 'kinesis', documentation: 'Kinesis streaming ingestion' },
+      { value: 'rabbit', documentation: 'RabbitMQ streaming ingestion' },
+    ],
+  },
+  // Batch ioConfig properties
+  {
+    path: '$.spec.ioConfig',
+    isObject: true,
+    condition: obj => obj.type === 'index_parallel' || obj.type === 'index',
+    completions: [
+      { value: 'firehose', documentation: 'Legacy data input (deprecated)' },
+      { value: 'appendToExisting', documentation: 'Whether to append to 
existing segments' },
+      { value: 'dropExisting', documentation: 'Whether to drop existing 
segments' },
+    ],
+  },
+  // Streaming ioConfig properties (Kafka/Kinesis/Rabbit)
+  {
+    path: '$.spec.ioConfig',
+    isObject: true,
+    condition: obj => isSupervisorType(obj.type) && obj.type !== 'autocompact',
+    completions: [
+      { value: 'taskCount', documentation: 'Number of reading tasks per 
replica' },
+      { value: 'replicas', documentation: 'Number of replica task sets' },
+      { value: 'taskDuration', documentation: 'Duration before tasks stop 
reading' },
+      { value: 'startDelay', documentation: 'Delay before supervisor starts 
managing tasks' },
+      { value: 'period', documentation: 'How often supervisor executes 
management logic' },
+      { value: 'completionTimeout', documentation: 'Timeout for task 
completion' },
+      { value: 'autoScalerConfig', documentation: 'Auto-scaling configuration' 
},
+    ],
+  },
+  // Kafka-specific ioConfig properties
+  {
+    path: '$.spec.ioConfig',
+    isObject: true,
+    condition: obj => obj.type === 'kafka',
+    completions: [
+      { value: 'topic', documentation: 'Kafka topic to consume from' },
+      { value: 'consumerProperties', documentation: 'Kafka consumer 
properties' },
+      {
+        value: 'useEarliestOffset',
+        documentation: 'Start from earliest offset when no stored offset',
+      },
+    ],
+  },
+  // Kinesis-specific ioConfig properties
+  {
+    path: '$.spec.ioConfig',
+    isObject: true,
+    condition: obj => obj.type === 'kinesis',
+    completions: [
+      { value: 'stream', documentation: 'Kinesis stream to consume from' },
+      { value: 'endpoint', documentation: 'Kinesis endpoint URL' },
+      {
+        value: 'useEarliestSequenceNumber',
+        documentation: 'Start from earliest when no stored sequence number',
+      },
+    ],
+  },
+  // appendToExisting values
+  {
+    path: '$.spec.ioConfig.appendToExisting',
+    completions: [
+      { value: 'false', documentation: 'Overwrite existing data (default)' },
+      { value: 'true', documentation: 'Append to existing data' },
+    ],
+  },
+  // dropExisting values
+  {
+    path: '$.spec.ioConfig.dropExisting',
+    completions: [
+      { value: 'false', documentation: 'Keep existing segments (default)' },
+      { value: 'true', documentation: 'Drop existing segments in intervals' },
+    ],
+  },
+  // useEarliestOffset values
+  {
+    path: '$.spec.ioConfig.useEarliestOffset',
+    completions: [
+      { value: 'false', documentation: 'Use latest offset when no stored 
offset (default)' },
+      { value: 'true', documentation: 'Use earliest offset when no stored 
offset' },
+    ],
+  },
+  // useEarliestSequenceNumber values
+  {
+    path: '$.spec.ioConfig.useEarliestSequenceNumber',
+    completions: [
+      { value: 'false', documentation: 'Use latest sequence number (default)' 
},
+      { value: 'true', documentation: 'Use earliest sequence number' },
+    ],
+  },
+  // inputSource object properties
+  {
+    path: '$.spec.ioConfig.inputSource',
+    isObject: true,
+    completions: [{ value: 'type', documentation: 'Type of input source' }],
+  },
+  // inputSource type values
+  {
+    path: '$.spec.ioConfig.inputSource.type',
+    completions: [
+      { value: 'local', documentation: 'Local file system' },
+      { value: 'http', documentation: 'HTTP/HTTPS URLs' },
+      { value: 's3', documentation: 'Amazon S3' },
+      { value: 'gs', documentation: 'Google Cloud Storage' },
+      { value: 'azure', documentation: 'Azure Blob Storage' },
+      { value: 'hdfs', documentation: 'Hadoop Distributed File System' },
+      { value: 'druid', documentation: 'Re-index from existing Druid 
datasource' },
+      { value: 'inline', documentation: 'Inline data in the spec' },
+      { value: 'combining', documentation: 'Combine multiple input sources' },
+    ],
+  },
+  // Local input source properties
+  {
+    path: '$.spec.ioConfig.inputSource',
+    isObject: true,
+    condition: obj => obj.type === 'local',
+    completions: [
+      { value: 'baseDir', documentation: 'Base directory path' },
+      { value: 'filter', documentation: 'File filter pattern' },
+      { value: 'files', documentation: 'List of specific file paths' },
+    ],
+  },
+  // HTTP input source properties
+  {
+    path: '$.spec.ioConfig.inputSource',
+    isObject: true,
+    condition: obj => obj.type === 'http',
+    completions: [
+      { value: 'uris', documentation: 'List of HTTP/HTTPS URIs' },
+      { value: 'httpAuthenticationUsername', documentation: 'HTTP 
authentication username' },
+      { value: 'httpAuthenticationPassword', documentation: 'HTTP 
authentication password' },

Review Comment:
   I'll add `systemFields` and will add `requestHeaders` when it is documented



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] Web console: improve SQL autocomplete and add JSON autocomplete (druid)

Reply via email to