vogievetsky commented on code in PR #18126: URL: https://github.com/apache/druid/pull/18126#discussion_r2159574384
########## web-console/src/views/load-data-view/ingestion-spec-completions.ts: ########## @@ -0,0 +1,831 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import type { JsonCompletionRule } from '../../utils'; + +/** + * Determines if a type is a supervisor type based on the documentation: + * "The supervisor type. For streaming ingestion, this can be either kafka, kinesis, or rabbit. + * For automatic compaction, set the type to autocompact." + */ +function isSupervisorType(type: string): boolean { + return type === 'kafka' || type === 'kinesis' || type === 'rabbit' || type === 'autocompact'; +} + +export const INGESTION_SPEC_COMPLETIONS: JsonCompletionRule[] = [ + // Root level properties (task and supervisor specs) + { + path: '$', + isObject: true, + completions: [ + { value: 'type', documentation: 'Type of ingestion task or supervisor' }, + { value: 'spec', documentation: 'Specification for the ingestion task or supervisor' }, + ], + }, + // Supervisor-only root level properties + { + path: '$', + isObject: true, + condition: obj => isSupervisorType(obj.type), + completions: [ + { + value: 'suspended', + documentation: 'Whether the supervisor is suspended (supervisor only)', + }, + ], + }, + // Type values for tasks + { + path: '$.type', + completions: [ + { value: 'index_parallel', documentation: 'Native batch ingestion (parallel)' }, + { value: 'index', documentation: 'Native batch ingestion (single task)' }, + { value: 'index_hadoop', documentation: 'Hadoop-based batch ingestion' }, + { value: 'kafka', documentation: 'Kafka supervisor for streaming ingestion' }, + { value: 'kinesis', documentation: 'Kinesis supervisor for streaming ingestion' }, + { value: 'rabbit', documentation: 'RabbitMQ supervisor for streaming ingestion' }, + { value: 'autocompact', documentation: 'Auto-compaction supervisor' }, + ], + }, + // suspended values (supervisor only) + { + path: '$.suspended', + completions: [ + { value: 'false', documentation: 'Supervisor is running (default)' }, + { value: 'true', documentation: 'Supervisor is suspended' }, + ], + }, + // spec object properties + { + path: '$.spec', + isObject: true, + completions: [ + { value: 'dataSchema', documentation: 'Schema configuration for ingestion' }, + { value: 'ioConfig', documentation: 'Input/output configuration' }, + { value: 'tuningConfig', documentation: 'Performance and tuning configuration' }, + ], + }, + // dataSchema object properties + { + path: '$.spec.dataSchema', + isObject: true, + completions: [ + { value: 'dataSource', documentation: 'Name of the datasource to ingest into' }, + { value: 'timestampSpec', documentation: 'Primary timestamp configuration' }, + { value: 'dimensionsSpec', documentation: 'Dimensions configuration' }, + { value: 'metricsSpec', documentation: 'Metrics and aggregators configuration' }, + { value: 'granularitySpec', documentation: 'Granularity and rollup configuration' }, + { value: 'transformSpec', documentation: 'Transform and filter configuration' }, + ], + }, + // timestampSpec object properties + { + path: '$.spec.dataSchema.timestampSpec', + isObject: true, + completions: [ + { value: 'column', documentation: 'Input column containing the timestamp' }, + { value: 'format', documentation: 'Format of the timestamp' }, + { value: 'missingValue', documentation: 'Default timestamp for missing values' }, + ], + }, + // timestampSpec.format values + { + path: '$.spec.dataSchema.timestampSpec.format', + completions: [ + { value: 'auto', documentation: 'Automatically detect ISO or millis format (default)' }, + { value: 'iso', documentation: 'ISO8601 format with T separator' }, + { value: 'posix', documentation: 'Seconds since epoch' }, + { value: 'millis', documentation: 'Milliseconds since epoch' }, + { value: 'micro', documentation: 'Microseconds since epoch' }, + { value: 'nano', documentation: 'Nanoseconds since epoch' }, + { value: 'yyyy-MM-dd HH:mm:ss', documentation: 'Custom Joda format' }, + { value: 'yyyy-MM-dd', documentation: 'Date only format' }, + ], + }, + // timestampSpec.column common values + { + path: '$.spec.dataSchema.timestampSpec.column', + completions: [ + { value: 'timestamp', documentation: 'Default timestamp column name' }, + { value: '__time', documentation: 'Druid internal timestamp column' }, + { value: 'time', documentation: 'Common timestamp column name' }, + { value: 'event_time', documentation: 'Common event time column name' }, + { value: 'created_at', documentation: 'Common creation time column name' }, + ], + }, + // dimensionsSpec object properties + { + path: '$.spec.dataSchema.dimensionsSpec', + isObject: true, + completions: [ + { value: 'dimensions', documentation: 'List of dimension specifications' }, + { value: 'dimensionExclusions', documentation: 'Dimensions to exclude from ingestion' }, + { value: 'spatialDimensions', documentation: 'Spatial dimension specifications' }, + { value: 'includeAllDimensions', documentation: 'Include all discovered dimensions' }, + { value: 'useSchemaDiscovery', documentation: 'Enable automatic schema discovery' }, + { + value: 'forceSegmentSortByTime', + documentation: 'Force segments to be sorted by time first', + }, + ], + }, + // useSchemaDiscovery values + { + path: '$.spec.dataSchema.dimensionsSpec.useSchemaDiscovery', + completions: [ + { value: 'true', documentation: 'Enable automatic type detection and schema discovery' }, + { value: 'false', documentation: 'Use manual dimension specification (default)' }, + ], + }, + // includeAllDimensions values + { + path: '$.spec.dataSchema.dimensionsSpec.includeAllDimensions', + completions: [ + { value: 'true', documentation: 'Include both explicit and discovered dimensions' }, + { value: 'false', documentation: 'Include only explicit dimensions (default)' }, + ], + }, + // forceSegmentSortByTime values + { + path: '$.spec.dataSchema.dimensionsSpec.forceSegmentSortByTime', + completions: [ + { value: 'true', documentation: 'Sort by __time first, then dimensions (default)' }, + { value: 'false', documentation: 'Sort by dimensions only (experimental)' }, + ], + }, + // dimension object properties + { + path: '$.spec.dataSchema.dimensionsSpec.dimensions.[]', + isObject: true, + completions: [ + { value: 'type', documentation: 'Type of the dimension' }, + { value: 'name', documentation: 'Name of the dimension' }, + { value: 'createBitmapIndex', documentation: 'Whether to create bitmap index (string only)' }, + { + value: 'multiValueHandling', + documentation: 'How to handle multi-value fields (string only)', + }, + ], + }, + // dimension type values + { + path: '$.spec.dataSchema.dimensionsSpec.dimensions.[].type', + completions: [ + { value: 'auto', documentation: 'Automatically detect type (schema discovery)' }, + { value: 'string', documentation: 'String dimension (default)' }, + { value: 'long', documentation: 'Long integer dimension' }, + { value: 'float', documentation: 'Float dimension' }, + { value: 'double', documentation: 'Double precision dimension' }, + { value: 'json', documentation: 'JSON/nested data dimension' }, + ], + }, + // createBitmapIndex values + { + path: '$.spec.dataSchema.dimensionsSpec.dimensions.[].createBitmapIndex', + completions: [ + { value: 'true', documentation: 'Create bitmap index (default, faster filtering)' }, + { value: 'false', documentation: 'No bitmap index (saves storage)' }, + ], + }, + // multiValueHandling values + { + path: '$.spec.dataSchema.dimensionsSpec.dimensions.[].multiValueHandling', + completions: [ + { value: 'sorted_array', documentation: 'Sort multi-values (default)' }, + { value: 'sorted_set', documentation: 'Sort and deduplicate multi-values' }, + { value: 'array', documentation: 'Keep multi-values as-is' }, + ], + }, + // metricsSpec array properties + { + path: '$.spec.dataSchema.metricsSpec.[]', + isObject: true, + completions: [ + { value: 'type', documentation: 'Type of aggregator' }, + { value: 'name', documentation: 'Name of the metric' }, + { value: 'fieldName', documentation: 'Input field to aggregate' }, + ], + }, + // metric aggregator types + { + path: '$.spec.dataSchema.metricsSpec.[].type', + completions: [ + { value: 'count', documentation: 'Count of rows' }, + { value: 'longSum', documentation: 'Sum of long values' }, + { value: 'doubleSum', documentation: 'Sum of double values' }, + { value: 'longMin', documentation: 'Minimum of long values' }, + { value: 'longMax', documentation: 'Maximum of long values' }, + { value: 'doubleMin', documentation: 'Minimum of double values' }, + { value: 'doubleMax', documentation: 'Maximum of double values' }, + { value: 'longFirst', documentation: 'First long value seen' }, + { value: 'longLast', documentation: 'Last long value seen' }, + { value: 'doubleFirst', documentation: 'First double value seen' }, + { value: 'doubleLast', documentation: 'Last double value seen' }, + { value: 'thetaSketch', documentation: 'Theta sketch for approximate counting' }, + { value: 'HLLSketchBuild', documentation: 'HyperLogLog sketch for cardinality' }, + { value: 'quantilesDoublesSketch', documentation: 'Quantiles sketch for percentiles' }, + ], + }, + // granularitySpec object properties + { + path: '$.spec.dataSchema.granularitySpec', + isObject: true, + completions: [ + { value: 'type', documentation: 'Type of granularity specification' }, + { value: 'segmentGranularity', documentation: 'Granularity for segment partitioning' }, + { value: 'queryGranularity', documentation: 'Granularity for timestamp truncation' }, + { value: 'rollup', documentation: 'Whether to enable rollup (aggregation)' }, + { value: 'intervals', documentation: 'Time intervals to process (batch only)' }, + ], + }, + // granularitySpec.type values + { + path: '$.spec.dataSchema.granularitySpec.type', + completions: [ + { value: 'uniform', documentation: 'Uniform granularity (default)' }, + { value: 'arbitrary', documentation: 'Arbitrary granularity (advanced)' }, + ], + }, + // segmentGranularity values + { + path: '$.spec.dataSchema.granularitySpec.segmentGranularity', + completions: [ + { value: 'SECOND', documentation: 'Second-level segments' }, + { value: 'MINUTE', documentation: 'Minute-level segments' }, + { value: 'HOUR', documentation: 'Hourly segments' }, + { value: 'DAY', documentation: 'Daily segments (common choice)' }, + { value: 'WEEK', documentation: 'Weekly segments (not recommended)' }, + { value: 'MONTH', documentation: 'Monthly segments' }, + { value: 'QUARTER', documentation: 'Quarterly segments' }, + { value: 'YEAR', documentation: 'Yearly segments' }, + { value: 'ALL', documentation: 'Single segment for all data' }, + ], + }, + // queryGranularity values + { + path: '$.spec.dataSchema.granularitySpec.queryGranularity', + completions: [ + { value: 'NONE', documentation: 'No truncation (millisecond precision)' }, + { value: 'SECOND', documentation: 'Second-level truncation' }, + { value: 'MINUTE', documentation: 'Minute-level truncation' }, + { value: 'HOUR', documentation: 'Hour-level truncation' }, + { value: 'DAY', documentation: 'Day-level truncation' }, + { value: 'WEEK', documentation: 'Week-level truncation' }, + { value: 'MONTH', documentation: 'Month-level truncation' }, + { value: 'QUARTER', documentation: 'Quarter-level truncation' }, + { value: 'YEAR', documentation: 'Year-level truncation' }, + ], + }, + // rollup values + { + path: '$.spec.dataSchema.granularitySpec.rollup', + completions: [ + { value: 'true', documentation: 'Enable rollup (aggregate identical rows)' }, + { value: 'false', documentation: 'Disable rollup (store raw data)' }, + ], + }, + // transformSpec object properties + { + path: '$.spec.dataSchema.transformSpec', + isObject: true, + completions: [ + { value: 'transforms', documentation: 'List of transform expressions' }, + { value: 'filter', documentation: 'Filter to apply during ingestion' }, + ], + }, + // transform object properties + { + path: '$.spec.dataSchema.transformSpec.transforms.[]', + isObject: true, + completions: [ + { value: 'type', documentation: 'Type of transform' }, + { value: 'name', documentation: 'Name of the output field' }, + { value: 'expression', documentation: 'Transform expression' }, + ], + }, + // transform type values + { + path: '$.spec.dataSchema.transformSpec.transforms.[].type', + completions: [{ value: 'expression', documentation: 'Expression-based transform' }], + }, + // filter object properties + { + path: '$.spec.dataSchema.transformSpec.filter', + isObject: true, + completions: [ + { value: 'type', documentation: 'Type of filter' }, + { value: 'dimension', documentation: 'Dimension to filter on' }, + { value: 'value', documentation: 'Value to filter for' }, + { value: 'values', documentation: 'List of values to filter for' }, + { value: 'fields', documentation: 'List of sub-filters (logical filters)' }, + ], + }, + // filter type values + { + path: '$.spec.dataSchema.transformSpec.filter.type', + completions: [ + { value: 'selector', documentation: 'Exact match filter' }, + { value: 'in', documentation: 'Match any of multiple values' }, + { value: 'like', documentation: 'Pattern matching filter' }, + { value: 'regex', documentation: 'Regular expression filter' }, + { value: 'range', documentation: 'Numeric range filter' }, + { value: 'and', documentation: 'Logical AND filter' }, + { value: 'or', documentation: 'Logical OR filter' }, + { value: 'not', documentation: 'Logical NOT filter' }, + ], + }, + // ioConfig object properties (general) + { + path: '$.spec.ioConfig', + isObject: true, + completions: [ + { value: 'type', documentation: 'Type of I/O configuration' }, + { value: 'inputSource', documentation: 'Data input source configuration' }, + { value: 'inputFormat', documentation: 'Input data format configuration' }, + ], + }, + // ioConfig type values + { + path: '$.spec.ioConfig.type', + completions: [ + { value: 'index_parallel', documentation: 'Parallel batch ingestion' }, + { value: 'index', documentation: 'Single task batch ingestion' }, + { value: 'hadoop', documentation: 'Hadoop-based ingestion' }, + { value: 'kafka', documentation: 'Kafka streaming ingestion' }, + { value: 'kinesis', documentation: 'Kinesis streaming ingestion' }, + { value: 'rabbit', documentation: 'RabbitMQ streaming ingestion' }, + ], + }, + // Batch ioConfig properties + { + path: '$.spec.ioConfig', + isObject: true, + condition: obj => obj.type === 'index_parallel' || obj.type === 'index', + completions: [ + { value: 'firehose', documentation: 'Legacy data input (deprecated)' }, + { value: 'appendToExisting', documentation: 'Whether to append to existing segments' }, + { value: 'dropExisting', documentation: 'Whether to drop existing segments' }, + ], + }, + // Streaming ioConfig properties (Kafka/Kinesis/Rabbit) + { + path: '$.spec.ioConfig', + isObject: true, + condition: obj => isSupervisorType(obj.type) && obj.type !== 'autocompact', + completions: [ + { value: 'taskCount', documentation: 'Number of reading tasks per replica' }, + { value: 'replicas', documentation: 'Number of replica task sets' }, + { value: 'taskDuration', documentation: 'Duration before tasks stop reading' }, + { value: 'startDelay', documentation: 'Delay before supervisor starts managing tasks' }, + { value: 'period', documentation: 'How often supervisor executes management logic' }, + { value: 'completionTimeout', documentation: 'Timeout for task completion' }, + { value: 'autoScalerConfig', documentation: 'Auto-scaling configuration' }, + ], + }, + // Kafka-specific ioConfig properties + { + path: '$.spec.ioConfig', + isObject: true, + condition: obj => obj.type === 'kafka', + completions: [ + { value: 'topic', documentation: 'Kafka topic to consume from' }, + { value: 'consumerProperties', documentation: 'Kafka consumer properties' }, + { + value: 'useEarliestOffset', + documentation: 'Start from earliest offset when no stored offset', + }, + ], + }, + // Kinesis-specific ioConfig properties + { + path: '$.spec.ioConfig', + isObject: true, + condition: obj => obj.type === 'kinesis', + completions: [ + { value: 'stream', documentation: 'Kinesis stream to consume from' }, + { value: 'endpoint', documentation: 'Kinesis endpoint URL' }, + { + value: 'useEarliestSequenceNumber', + documentation: 'Start from earliest when no stored sequence number', + }, + ], + }, + // appendToExisting values + { + path: '$.spec.ioConfig.appendToExisting', + completions: [ + { value: 'false', documentation: 'Overwrite existing data (default)' }, + { value: 'true', documentation: 'Append to existing data' }, + ], + }, + // dropExisting values + { + path: '$.spec.ioConfig.dropExisting', + completions: [ + { value: 'false', documentation: 'Keep existing segments (default)' }, + { value: 'true', documentation: 'Drop existing segments in intervals' }, + ], + }, + // useEarliestOffset values + { + path: '$.spec.ioConfig.useEarliestOffset', + completions: [ + { value: 'false', documentation: 'Use latest offset when no stored offset (default)' }, + { value: 'true', documentation: 'Use earliest offset when no stored offset' }, + ], + }, + // useEarliestSequenceNumber values + { + path: '$.spec.ioConfig.useEarliestSequenceNumber', + completions: [ + { value: 'false', documentation: 'Use latest sequence number (default)' }, + { value: 'true', documentation: 'Use earliest sequence number' }, + ], + }, + // inputSource object properties + { + path: '$.spec.ioConfig.inputSource', + isObject: true, + completions: [{ value: 'type', documentation: 'Type of input source' }], + }, + // inputSource type values + { + path: '$.spec.ioConfig.inputSource.type', + completions: [ + { value: 'local', documentation: 'Local file system' }, + { value: 'http', documentation: 'HTTP/HTTPS URLs' }, + { value: 's3', documentation: 'Amazon S3' }, + { value: 'gs', documentation: 'Google Cloud Storage' }, + { value: 'azure', documentation: 'Azure Blob Storage' }, + { value: 'hdfs', documentation: 'Hadoop Distributed File System' }, + { value: 'druid', documentation: 'Re-index from existing Druid datasource' }, + { value: 'inline', documentation: 'Inline data in the spec' }, + { value: 'combining', documentation: 'Combine multiple input sources' }, + ], + }, + // Local input source properties + { + path: '$.spec.ioConfig.inputSource', + isObject: true, + condition: obj => obj.type === 'local', + completions: [ + { value: 'baseDir', documentation: 'Base directory path' }, + { value: 'filter', documentation: 'File filter pattern' }, + { value: 'files', documentation: 'List of specific file paths' }, + ], + }, + // HTTP input source properties + { + path: '$.spec.ioConfig.inputSource', + isObject: true, + condition: obj => obj.type === 'http', + completions: [ + { value: 'uris', documentation: 'List of HTTP/HTTPS URIs' }, + { value: 'httpAuthenticationUsername', documentation: 'HTTP authentication username' }, + { value: 'httpAuthenticationPassword', documentation: 'HTTP authentication password' }, Review Comment: I'll add `systemFields` and will add `requestHeaders` when it is documented -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
