This is an automated email from the ASF dual-hosted git repository.

xiedeyantu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/main by this push:
     new 118edc9e94 [CALCITE-4460] Support custom delimiter when parsing CSV 
tables
118edc9e94 is described below

commit 118edc9e94bb24807b4682875482ffa8acc0fecf
Author: Diveyam Mishra <[email protected]>
AuthorDate: Sat May 16 14:37:05 2026 +0530

    [CALCITE-4460] Support custom delimiter when parsing CSV tables
---
 .../calcite/adapter/csv/CsvFilterableTable.java    |  2 +-
 .../calcite/adapter/csv/CsvScannableTable.java     |  2 +-
 .../adapter/csv/CsvStreamScannableTable.java       |  2 +-
 .../org/apache/calcite/adapter/csv/CsvTable.java   |  4 +-
 .../calcite/adapter/csv/CsvTranslatableTable.java  |  3 +-
 .../apache/calcite/adapter/file/CsvEnumerator.java | 20 ++---
 .../calcite/adapter/file/CsvStreamReader.java      |  6 +-
 .../org/apache/calcite/adapter/file/CsvTable.java  | 11 ++-
 .../calcite/adapter/file/CsvTableFactory.java      | 15 +++-
 .../calcite/adapter/file/CsvTranslatableTable.java | 10 ++-
 .../apache/calcite/adapter/file/FileSchema.java    |  5 +-
 .../calcite/adapter/file/FileAdapterTest.java      | 85 ++++++++++++++++++++++
 file/src/test/resources/custom-separator.json      | 36 +++++++++
 .../test/resources/sales-csv/PIPE_DELIMITED.csv    |  5 ++
 site/_docs/file_adapter.md                         | 20 +++++
 15 files changed, 198 insertions(+), 28 deletions(-)

diff --git 
a/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvFilterableTable.java
 
b/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvFilterableTable.java
index 4a12352951..ee621740f9 100644
--- 
a/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvFilterableTable.java
+++ 
b/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvFilterableTable.java
@@ -66,7 +66,7 @@ public CsvFilterableTable(Source source,
     return new AbstractEnumerable<@Nullable Object[]>() {
       @Override public Enumerator<@Nullable Object[]> enumerator() {
         return new CsvEnumerator<>(source, cancelFlag, false, filterValues,
-            CsvEnumerator.arrayConverter(fieldTypes, fields, false));
+            CsvEnumerator.arrayConverter(fieldTypes, fields, false), ',');
       }
     };
   }
diff --git 
a/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvScannableTable.java
 
b/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvScannableTable.java
index 25d0295054..836af81373 100644
--- 
a/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvScannableTable.java
+++ 
b/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvScannableTable.java
@@ -58,7 +58,7 @@ public class CsvScannableTable extends CsvTable
     return new AbstractEnumerable<@Nullable Object[]>() {
       @Override public Enumerator<@Nullable Object[]> enumerator() {
         return new CsvEnumerator<>(source, cancelFlag, false, null,
-            CsvEnumerator.arrayConverter(fieldTypes, fields, false));
+            CsvEnumerator.arrayConverter(fieldTypes, fields, false), ',');
       }
     };
   }
diff --git 
a/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvStreamScannableTable.java
 
b/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvStreamScannableTable.java
index a7947a2f98..7c0d574cc7 100644
--- 
a/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvStreamScannableTable.java
+++ 
b/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvStreamScannableTable.java
@@ -65,7 +65,7 @@ public class CsvStreamScannableTable extends CsvScannableTable
     return new AbstractEnumerable<@Nullable Object[]>() {
       @Override public Enumerator<@Nullable Object[]> enumerator() {
         return new CsvEnumerator<>(source, cancelFlag, true, null,
-            CsvEnumerator.arrayConverter(fieldTypes, fields, true));
+            CsvEnumerator.arrayConverter(fieldTypes, fields, true), ',');
       }
     };
   }
diff --git 
a/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvTable.java 
b/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvTable.java
index a882e5c824..aac56bb38c 100644
--- a/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvTable.java
+++ b/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvTable.java
@@ -51,7 +51,7 @@ public abstract class CsvTable extends AbstractTable {
     if (rowType == null) {
       rowType =
           CsvEnumerator.deduceRowType((JavaTypeFactory) typeFactory, source,
-              null, isStream());
+              null, isStream(), ',');
     }
     return rowType;
   }
@@ -61,7 +61,7 @@ public List<RelDataType> getFieldTypes(RelDataTypeFactory 
typeFactory) {
     if (fieldTypes == null) {
       fieldTypes = new ArrayList<>();
       CsvEnumerator.deduceRowType((JavaTypeFactory) typeFactory, source,
-          fieldTypes, isStream());
+          fieldTypes, isStream(), ',');
     }
     return fieldTypes;
   }
diff --git 
a/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvTranslatableTable.java
 
b/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvTranslatableTable.java
index 78632cfb43..1da1992b6b 100644
--- 
a/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvTranslatableTable.java
+++ 
b/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvTranslatableTable.java
@@ -66,7 +66,8 @@ public Enumerable<Object> project(final DataContext root,
             source,
             cancelFlag,
             getFieldTypes(typeFactory),
-            ImmutableIntList.of(fields));
+            ImmutableIntList.of(fields),
+            ',');
       }
     };
   }
diff --git 
a/file/src/main/java/org/apache/calcite/adapter/file/CsvEnumerator.java 
b/file/src/main/java/org/apache/calcite/adapter/file/CsvEnumerator.java
index 012be61450..f62433beab 100644
--- a/file/src/main/java/org/apache/calcite/adapter/file/CsvEnumerator.java
+++ b/file/src/main/java/org/apache/calcite/adapter/file/CsvEnumerator.java
@@ -113,14 +113,15 @@ private static void clearTimeFormats() {
       .compile("\"decimal\\(([0-9]+),([0-9]+)\\)");
 
   public CsvEnumerator(Source source, AtomicBoolean cancelFlag,
-      List<RelDataType> fieldTypes, List<Integer> fields) {
+      List<RelDataType> fieldTypes, List<Integer> fields, char separator) {
     //noinspection unchecked
     this(source, cancelFlag, false, null,
-        (RowConverter<E>) converter(fieldTypes, fields));
+        (RowConverter<E>) converter(fieldTypes, fields), separator);
   }
 
   public CsvEnumerator(Source source, AtomicBoolean cancelFlag, boolean stream,
-      @Nullable String @Nullable [] filterValues, RowConverter<E> 
rowConverter) {
+      @Nullable String @Nullable [] filterValues, RowConverter<E> rowConverter,
+      char separator) {
     this.cancelFlag = cancelFlag;
     this.rowConverter = rowConverter;
     this.filterValues =
@@ -128,9 +129,9 @@ public CsvEnumerator(Source source, AtomicBoolean 
cancelFlag, boolean stream,
             : ImmutableNullableList.copyOf(filterValues);
     try {
       if (stream) {
-        this.reader = new CsvStreamReader(source);
+        this.reader = new CsvStreamReader(source, separator);
       } else {
-        this.reader = openCsv(source);
+        this.reader = openCsv(source, separator);
       }
       this.reader.readNext(); // skip header row
     } catch (IOException e) {
@@ -156,14 +157,15 @@ private static RowConverter<?> 
converter(List<RelDataType> fieldTypes,
   /** Deduces the names and types of a table's columns by reading the first 
line
    * of a CSV file. */
   public static RelDataType deduceRowType(JavaTypeFactory typeFactory,
-      Source source, @Nullable List<RelDataType> fieldTypes, Boolean stream) {
+      Source source, @Nullable List<RelDataType> fieldTypes, Boolean stream,
+      char separator) {
     final List<RelDataType> types = new ArrayList<>();
     final List<String> names = new ArrayList<>();
     if (stream) {
       names.add(FileSchemaFactory.ROWTIME_COLUMN_NAME);
       types.add(typeFactory.createSqlType(SqlTypeName.TIMESTAMP));
     }
-    try (CSVReader reader = openCsv(source)) {
+    try (CSVReader reader = openCsv(source, separator)) {
       String[] strings = reader.readNext();
       if (strings == null) {
         strings = new String[]{"EmptyFileHasNoColumns:boolean"};
@@ -247,9 +249,9 @@ public static RelDataType deduceRowType(JavaTypeFactory 
typeFactory,
     return typeFactory.createStructType(Pair.zip(names, types));
   }
 
-  static CSVReader openCsv(Source source) throws IOException {
+  static CSVReader openCsv(Source source, char separator) throws IOException {
     requireNonNull(source, "source");
-    return new CSVReader(source.reader());
+    return new CSVReader(source.reader(), separator);
   }
 
   @Override public E current() {
diff --git 
a/file/src/main/java/org/apache/calcite/adapter/file/CsvStreamReader.java 
b/file/src/main/java/org/apache/calcite/adapter/file/CsvStreamReader.java
index 54dd3837e2..e9113c7d1a 100644
--- a/file/src/main/java/org/apache/calcite/adapter/file/CsvStreamReader.java
+++ b/file/src/main/java/org/apache/calcite/adapter/file/CsvStreamReader.java
@@ -53,9 +53,9 @@ class CsvStreamReader extends CSVReader implements Closeable {
    */
   public static final long DEFAULT_MONITOR_DELAY = 2000;
 
-  CsvStreamReader(Source source) {
+  CsvStreamReader(Source source, char separator) {
     this(source,
-        CSVParser.DEFAULT_SEPARATOR,
+        separator,
         CSVParser.DEFAULT_QUOTE_CHARACTER,
         CSVParser.DEFAULT_ESCAPE_CHARACTER,
         DEFAULT_SKIP_LINES,
@@ -106,7 +106,7 @@ private CsvStreamReader(Source source, char separator, char 
quoteChar,
   /**
    * Reads the next line from the buffer and converts to a string array.
    *
-   * @return a string array with each comma-separated element as a separate 
entry.
+   * @return a string array with each delimited element as a separate entry.
    *
    * @throws IOException if bad things happen during the read
    */
diff --git a/file/src/main/java/org/apache/calcite/adapter/file/CsvTable.java 
b/file/src/main/java/org/apache/calcite/adapter/file/CsvTable.java
index 5ebb2b1218..4b4c718d41 100644
--- a/file/src/main/java/org/apache/calcite/adapter/file/CsvTable.java
+++ b/file/src/main/java/org/apache/calcite/adapter/file/CsvTable.java
@@ -37,13 +37,16 @@
 public abstract class CsvTable extends AbstractTable {
   protected final Source source;
   protected final @Nullable RelProtoDataType protoRowType;
+  protected final char separator;
   private @Nullable RelDataType rowType;
   private @Nullable List<RelDataType> fieldTypes;
 
-  /** Creates a CsvTable. */
-  CsvTable(Source source, @Nullable RelProtoDataType protoRowType) {
+  /** Creates a CsvTable with a custom separator. */
+  CsvTable(Source source, @Nullable RelProtoDataType protoRowType,
+      char separator) {
     this.source = source;
     this.protoRowType = protoRowType;
+    this.separator = separator;
   }
 
   @Override public RelDataType getRowType(RelDataTypeFactory typeFactory) {
@@ -53,7 +56,7 @@ public abstract class CsvTable extends AbstractTable {
     if (rowType == null) {
       rowType =
           CsvEnumerator.deduceRowType((JavaTypeFactory) typeFactory, source,
-              null, isStream());
+              null, isStream(), separator);
     }
     return rowType;
   }
@@ -63,7 +66,7 @@ public List<RelDataType> getFieldTypes(RelDataTypeFactory 
typeFactory) {
     if (fieldTypes == null) {
       fieldTypes = new ArrayList<>();
       CsvEnumerator.deduceRowType((JavaTypeFactory) typeFactory, source,
-          fieldTypes, isStream());
+          fieldTypes, isStream(), separator);
     }
     return fieldTypes;
   }
diff --git 
a/file/src/main/java/org/apache/calcite/adapter/file/CsvTableFactory.java 
b/file/src/main/java/org/apache/calcite/adapter/file/CsvTableFactory.java
index 82e636cb61..fa37d15653 100644
--- a/file/src/main/java/org/apache/calcite/adapter/file/CsvTableFactory.java
+++ b/file/src/main/java/org/apache/calcite/adapter/file/CsvTableFactory.java
@@ -25,6 +25,8 @@
 import org.apache.calcite.util.Source;
 import org.apache.calcite.util.Sources;
 
+import au.com.bytecode.opencsv.CSVParser;
+
 import org.checkerframework.checker.nullness.qual.Nullable;
 
 import java.io.File;
@@ -50,6 +52,17 @@ public CsvTableFactory() {
     final Source source = Sources.file(base, fileName);
     final RelProtoDataType protoRowType =
         rowType != null ? RelDataTypeImpl.proto(rowType) : null;
-    return new CsvTranslatableTable(source, protoRowType);
+    final String separatorStr = (String) operand.get("separator");
+    final char separator;
+    if (separatorStr == null) {
+      separator = CSVParser.DEFAULT_SEPARATOR;
+    } else if (separatorStr.length() == 1) {
+      separator = separatorStr.charAt(0);
+    } else {
+      throw new IllegalArgumentException(
+          "Invalid separator '" + separatorStr
+              + "'. Separator must be a single character.");
+    }
+    return new CsvTranslatableTable(source, protoRowType, separator);
   }
 }
diff --git 
a/file/src/main/java/org/apache/calcite/adapter/file/CsvTranslatableTable.java 
b/file/src/main/java/org/apache/calcite/adapter/file/CsvTranslatableTable.java
index ebfc1f2e70..7f81defebe 100644
--- 
a/file/src/main/java/org/apache/calcite/adapter/file/CsvTranslatableTable.java
+++ 
b/file/src/main/java/org/apache/calcite/adapter/file/CsvTranslatableTable.java
@@ -47,9 +47,10 @@
  */
 public class CsvTranslatableTable extends CsvTable
     implements QueryableTable, TranslatableTable {
-  /** Creates a CsvTable. */
-  CsvTranslatableTable(Source source, @Nullable RelProtoDataType protoRowType) 
{
-    super(source, protoRowType);
+  /** Creates a CsvTranslatableTable with a custom separator. */
+  CsvTranslatableTable(Source source, @Nullable RelProtoDataType protoRowType,
+      char separator) {
+    super(source, protoRowType, separator);
   }
 
   @Override public String toString() {
@@ -65,7 +66,8 @@ public Enumerable<Object> project(final DataContext root,
       @Override public Enumerator<Object> enumerator() {
         JavaTypeFactory typeFactory = root.getTypeFactory();
         return new CsvEnumerator<>(source, cancelFlag,
-            getFieldTypes(typeFactory), ImmutableIntList.of(fields));
+            getFieldTypes(typeFactory), ImmutableIntList.of(fields),
+            separator);
       }
     };
   }
diff --git a/file/src/main/java/org/apache/calcite/adapter/file/FileSchema.java 
b/file/src/main/java/org/apache/calcite/adapter/file/FileSchema.java
index b842030ad0..f66effb4a3 100644
--- a/file/src/main/java/org/apache/calcite/adapter/file/FileSchema.java
+++ b/file/src/main/java/org/apache/calcite/adapter/file/FileSchema.java
@@ -23,6 +23,8 @@
 import org.apache.calcite.util.Sources;
 import org.apache.calcite.util.Util;
 
+import au.com.bytecode.opencsv.CSVParser;
+
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 
@@ -142,7 +144,8 @@ private static boolean 
addTable(ImmutableMap.Builder<String, Table> builder,
     }
     final Source sourceSansCsv = sourceSansGz.trimOrNull(".csv");
     if (sourceSansCsv != null) {
-      final Table table = new CsvTranslatableTable(source, null);
+      final Table table =
+          new CsvTranslatableTable(source, null, CSVParser.DEFAULT_SEPARATOR);
       builder.put(Util.first(tableName, sourceSansCsv.path()), table);
       return true;
     }
diff --git 
a/file/src/test/java/org/apache/calcite/adapter/file/FileAdapterTest.java 
b/file/src/test/java/org/apache/calcite/adapter/file/FileAdapterTest.java
index 0d0fdc031c..ad774f3964 100644
--- a/file/src/test/java/org/apache/calcite/adapter/file/FileAdapterTest.java
+++ b/file/src/test/java/org/apache/calcite/adapter/file/FileAdapterTest.java
@@ -332,6 +332,91 @@ private static void checkEmpty(ResultSet resultSet) {
     sql("model-with-custom-table", "select * from CUSTOM_TABLE.EMPS").ok();
   }
 
+  /** Test case for
+   * <a 
href="https://issues.apache.org/jira/browse/CALCITE-4460";>[CALCITE-4460]
+   * Support custom delimiter when parsing CSV tables</a>.
+   *
+   * <p>Reads a pipe-delimited file via CsvTableFactory with a custom
+   * separator. */
+  @Test void testCsvCustomSeparatorPipe() {
+    final String sql = "select * from CUSTOM_SEPARATOR.PIPE_DEPTS";
+    sql("custom-separator", sql)
+        .returns("DEPTNO=10; NAME=Sales",
+            "DEPTNO=20; NAME=Marketing",
+            "DEPTNO=30; NAME=Accounts",
+            "DEPTNO=40; NAME=tic|tac|toe")
+        .ok();
+  }
+
+  /** Test case for
+   * <a 
href="https://issues.apache.org/jira/browse/CALCITE-4460";>[CALCITE-4460]
+   * Support custom delimiter when parsing CSV tables</a>.
+   *
+   * <p>Verifies quoted content is parsed correctly when it contains the custom
+   * separator character. */
+  @Test void testCsvCustomSeparatorEscaping() {
+    final String sql = "select * from CUSTOM_SEPARATOR.PIPE_DEPTS "
+        + "where NAME = 'tic|tac|toe'";
+    sql("custom-separator", sql)
+        .returns("DEPTNO=40; NAME=tic|tac|toe")
+        .ok();
+  }
+
+  /** Test case for
+   * <a 
href="https://issues.apache.org/jira/browse/CALCITE-4460";>[CALCITE-4460]
+   * Support custom delimiter when parsing CSV tables</a>.
+   *
+   * <p>Verifies that a multi-character separator is rejected. */
+  @Test void testCsvCustomSeparatorInvalidMultiChar() throws SQLException {
+    Properties info = new Properties();
+    info.put("model",
+        "inline:"
+            + "{\n"
+            + "  version: '1.0',\n"
+            + "  defaultSchema: 'TEST',\n"
+            + "  schemas: [\n"
+            + "    {\n"
+            + "      name: 'TEST',\n"
+            + "      tables: [\n"
+            + "        {\n"
+            + "          name: 'BAD',\n"
+            + "          type: 'custom',\n"
+            + "          factory: 
'org.apache.calcite.adapter.file.CsvTableFactory',\n"
+            + "          operand: {\n"
+            + "            file: 'sales-csv/DEPTS.csv',\n"
+            + "            separator: '||'\n"
+            + "          }\n"
+            + "        }\n"
+            + "      ]\n"
+            + "    }\n"
+            + "  ]\n"
+            + "}");
+    try {
+      Connection connection =
+          DriverManager.getConnection("jdbc:calcite:", info);
+      connection.close();
+      throw new AssertionError("expected error");
+    } catch (RuntimeException e) {
+      Throwable cause = e;
+      while (cause.getCause() != null) {
+        cause = cause.getCause();
+      }
+      assertThat(cause.getMessage(),
+          is("Invalid separator '||'. "
+              + "Separator must be a single character."));
+    }
+  }
+
+  /** Test case for
+   * <a 
href="https://issues.apache.org/jira/browse/CALCITE-4460";>[CALCITE-4460]
+   * Support custom delimiter when parsing CSV tables</a>.
+   *
+   * <p>Verifies that omitting the separator defaults to comma. */
+  @Test void testCsvDefaultSeparatorBackwardCompat() {
+    final String sql = "select * from CUSTOM_TABLE.EMPS";
+    sql("model-with-custom-table", sql).ok();
+  }
+
   @Test void testPushDownProject() {
     final String sql = "explain plan for select * from EMPS";
     final String expected = "PLAN=CsvTableScan(table=[[SALES, EMPS]], "
diff --git a/file/src/test/resources/custom-separator.json 
b/file/src/test/resources/custom-separator.json
new file mode 100644
index 0000000000..ebc5002394
--- /dev/null
+++ b/file/src/test/resources/custom-separator.json
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+{
+  "version": "1.0",
+  "defaultSchema": "CUSTOM_SEPARATOR",
+  "schemas": [
+    {
+      "name": "CUSTOM_SEPARATOR",
+      "tables": [
+        {
+          "name": "PIPE_DEPTS",
+          "type": "custom",
+          "factory": "org.apache.calcite.adapter.file.CsvTableFactory",
+          "operand": {
+            "file": "sales-csv/PIPE_DELIMITED.csv",
+            "separator": "|"
+          }
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/file/src/test/resources/sales-csv/PIPE_DELIMITED.csv 
b/file/src/test/resources/sales-csv/PIPE_DELIMITED.csv
new file mode 100644
index 0000000000..2a094fae30
--- /dev/null
+++ b/file/src/test/resources/sales-csv/PIPE_DELIMITED.csv
@@ -0,0 +1,5 @@
+DEPTNO:int|NAME:string
+10|"Sales"
+20|"Marketing"
+30|"Accounts"
+40|"tic|tac|toe"
diff --git a/site/_docs/file_adapter.md b/site/_docs/file_adapter.md
index a812558179..dc31fec413 100644
--- a/site/_docs/file_adapter.md
+++ b/site/_docs/file_adapter.md
@@ -273,6 +273,26 @@ ## CSV files and model-free browsing
 3 rows selected (0.985 seconds)
 {% endhighlight %}
 
+### CSV Custom Separator
+
+When using `CsvTableFactory` to define a table in a model, you can specify an
+optional `separator` operand to use a custom delimiter.
+
+{% highlight json %}
+{
+  "name": "PIPE_DEPTS",
+  "type": "custom",
+  "factory": "org.apache.calcite.adapter.file.CsvTableFactory",
+  "operand": {
+    "file": "sales-csv/PIPE_DELIMITED.csv",
+    "separator": "|"
+  }
+}
+{% endhighlight %}
+
+The separator must be a single character. If not specified, it defaults to a
+comma.
+
 ## JSON files and model-free browsing
 
 Some files describe their own schema, and for these files, we do not need a 
model. For example, `DEPTS.json` has an integer `DEPTNO` column and a string 
`NAME` column:

Reply via email to