This is an automated email from the ASF dual-hosted git repository.

karan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new c419ae5f737 use objectGlob (#16452)
c419ae5f737 is described below

commit c419ae5f73784a3ef90cea82aefd0a749d83978d
Author: Vadim Ogievetsky <[email protected]>
AuthorDate: Wed May 15 02:41:11 2024 -0700

    use objectGlob (#16452)
    
    Catching up to a change introduced in #13027
---
 .../druid-models/ingestion-spec/ingestion-spec.tsx | 46 +++++++++++-------
 .../src/druid-models/input-source/input-source.tsx | 54 ++++++++++++++--------
 2 files changed, 64 insertions(+), 36 deletions(-)

diff --git a/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx 
b/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx
index 393c3e7ee1b..37ec41d50e9 100644
--- a/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx
+++ b/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx
@@ -56,7 +56,11 @@ import { summarizeIndexSpec } from 
'../index-spec/index-spec';
 import type { InputFormat } from '../input-format/input-format';
 import { issueWithInputFormat } from '../input-format/input-format';
 import type { InputSource } from '../input-source/input-source';
-import { FILTER_SUGGESTIONS, issueWithInputSource } from 
'../input-source/input-source';
+import {
+  FILTER_SUGGESTIONS,
+  issueWithInputSource,
+  OBJECT_GLOB_SUGGESTIONS,
+} from '../input-source/input-source';
 import type { MetricSpec } from '../metric-spec/metric-spec';
 import {
   getMetricSpecOutputType,
@@ -584,21 +588,29 @@ export function getIoConfigFormFields(ingestionComboType: 
IngestionComboType): F
     ),
   };
 
-  const inputSourceFilter: Field<IoConfig> = {
-    name: 'inputSource.filter',
-    label: 'File filter',
+  const inputSourceObjectGlob: Field<IoConfig> = {
+    name: 'inputSource.objectGlob',
+    label: 'Object glob',
     type: 'string',
-    suggestions: FILTER_SUGGESTIONS,
-    placeholder: '*',
+    suggestions: OBJECT_GLOB_SUGGESTIONS,
+    placeholder: '(all files)',
     info: (
-      <p>
-        A wildcard filter for files. See{' '}
-        <ExternalLink 
href="https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/filefilter/WildcardFileFilter.html";>
-          here
-        </ExternalLink>{' '}
-        for format information. Files matching the filter criteria are 
considered for ingestion.
-        Files not matching the filter criteria are ignored.
-      </p>
+      <>
+        <p>A glob for the object part of the URI.</p>
+        <p>
+          The glob must match the entire object part, not just the filename. 
For example, the glob
+          <Code>*.json</Code> does not match <Code>/bar/file.json</Code>, 
because and the{' '}
+          <Code>*</Code> does not match the slash. To match all objects ending 
in <Code>.json</Code>
+          , use <Code>**.json</Code> instead.
+        </p>
+        <p>
+          For more information, refer to the documentation for{' '}
+          <ExternalLink 
href="https://docs.oracle.com/javase/8/docs/api/java/nio/file/FileSystem.html#getPathMatcher-java.lang.String-";>
+            FileSystem#getPathMatcher
+          </ExternalLink>
+          .
+        </p>
+      </>
     ),
   };
 
@@ -781,7 +793,7 @@ export function getIoConfigFormFields(ingestionComboType: 
IngestionComboType): F
             </>
           ),
         },
-        inputSourceFilter,
+        inputSourceObjectGlob,
         {
           name: 'inputSource.properties.accessKeyId.type',
           label: 'Access key ID type',
@@ -944,7 +956,7 @@ export function getIoConfigFormFields(ingestionComboType: 
IngestionComboType): F
             </>
           ),
         },
-        inputSourceFilter,
+        inputSourceObjectGlob,
         {
           name: 'inputSource.properties.sharedAccessStorageToken',
           label: 'Shared Access Storage Token',
@@ -1018,7 +1030,7 @@ export function getIoConfigFormFields(ingestionComboType: 
IngestionComboType): F
             </>
           ),
         },
-        inputSourceFilter,
+        inputSourceObjectGlob,
       ];
 
     case 'index_parallel:delta':
diff --git a/web-console/src/druid-models/input-source/input-source.tsx 
b/web-console/src/druid-models/input-source/input-source.tsx
index 17b137412ef..174f8aba516 100644
--- a/web-console/src/druid-models/input-source/input-source.tsx
+++ b/web-console/src/druid-models/input-source/input-source.tsx
@@ -16,6 +16,7 @@
  * limitations under the License.
  */
 
+import { Code } from '@blueprintjs/core';
 import React from 'react';
 
 import type { Field } from '../../components';
@@ -36,6 +37,18 @@ export const FILTER_SUGGESTIONS: string[] = [
   '*.avro',
 ];
 
+export const OBJECT_GLOB_SUGGESTIONS: string[] = [
+  '**.jsonl',
+  '**.jsonl.gz',
+  '**.json',
+  '**.json.gz',
+  '**.csv',
+  '**.tsv',
+  '**.parquet',
+  '**.orc',
+  '**.avro',
+];
+
 export interface InputSource {
   type: string;
   baseDir?: string;
@@ -43,6 +56,7 @@ export interface InputSource {
   uris?: string[];
   prefixes?: string[];
   objects?: { bucket: string; path: string }[];
+  objectGlob?: string;
   fetchTimeout?: number;
   systemFields?: string[];
 
@@ -94,10 +108,11 @@ export type InputSourceDesc =
       httpAuthenticationPassword?: any;
     }
   | {
-      type: 's3';
+      type: 's3' | 'google' | 'azureStorage';
       uris?: string[];
       prefixes?: string[];
       objects?: { bucket: string; path: string }[];
+      objectGlob?: string;
       properties?: {
         accessKeyId?: any;
         secretAccessKey?: any;
@@ -105,12 +120,6 @@ export type InputSourceDesc =
         assumeRoleExternalId?: any;
       };
     }
-  | {
-      type: 'google' | 'azureStorage';
-      uris?: string[];
-      prefixes?: string[];
-      objects?: { bucket: string; path: string }[];
-    }
   | {
       type: 'hdfs';
       paths?: string | string[];
@@ -483,21 +492,28 @@ export const INPUT_SOURCE_FIELDS: Field<InputSource>[] = [
 
   // Cloud common
   {
-    name: 'filter',
-    label: 'File filter',
+    name: 'objectGlob',
     type: 'string',
-    suggestions: FILTER_SUGGESTIONS,
-    placeholder: '*',
+    suggestions: OBJECT_GLOB_SUGGESTIONS,
+    placeholder: '(all files)',
     defined: typeIsKnown(KNOWN_TYPES, 's3', 'azureStorage', 'google'),
     info: (
-      <p>
-        A wildcard filter for files. See{' '}
-        <ExternalLink 
href="https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/filefilter/WildcardFileFilter.html";>
-          here
-        </ExternalLink>{' '}
-        for format information. Files matching the filter criteria are 
considered for ingestion.
-        Files not matching the filter criteria are ignored.
-      </p>
+      <>
+        <p>A glob for the object part of the URI.</p>
+        <p>
+          The glob must match the entire object part, not just the filename. 
For example, the glob
+          <Code>*.json</Code> does not match <Code>/bar/file.json</Code>, 
because and the{' '}
+          <Code>*</Code> does not match the slash. To match all objects ending 
in <Code>.json</Code>
+          , use <Code>**.json</Code> instead.
+        </p>
+        <p>
+          For more information, refer to the documentation for{' '}
+          <ExternalLink 
href="https://docs.oracle.com/javase/8/docs/api/java/nio/file/FileSystem.html#getPathMatcher-java.lang.String-";>
+            FileSystem#getPathMatcher
+          </ExternalLink>
+          .
+        </p>
+      </>
     ),
   },
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to