This is an automated email from the ASF dual-hosted git repository.

jiangtian pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/tsfile.git


The following commit(s) were added to refs/heads/develop by this push:
     new caa77ac0 [Java/C++/C] Resolved case sensitivity issue when reading 
column names. (#518)
caa77ac0 is described below

commit caa77ac003f9b64687b3ebb193f19be455508fdc
Author: Hongzhi Gao <[email protected]>
AuthorDate: Thu Jun 19 10:40:21 2025 +0800

    [Java/C++/C] Resolved case sensitivity issue when reading column names. 
(#518)
    
    * Revert "Resolved case sensitivity issue when reading column names. (#517)"
    
    This reverts commit c501bf4e0ac13cdbe0b7d404cd4ea3ef99904203.
    
    * Resolved case sensitivity issue when reading column names.
    
    * [Java]Resolved case sensitivity issue when reading column names.
    
    * spotless apply
    
    * fix cpp ut mem leak
    
    * fix cpp ut mem leak
---
 cpp/src/reader/result_set.h                        | 33 ++++++++++++++++++--
 cpp/src/reader/table_query_executor.cc             | 17 +++++-----
 cpp/src/utils/storage_utils.h                      |  2 ++
 cpp/test/cwrapper/c_release_test.cc                |  4 +--
 .../reader/table_view/tsfile_reader_table_test.cc  | 36 ++++++++++++++++++++++
 .../writer/table_view/tsfile_writer_table_test.cc  |  7 +++--
 .../read/query/dataset/AbstractResultSet.java      |  4 +--
 .../org/apache/tsfile/tableview/TableViewTest.java | 27 ++++++++++++++++
 8 files changed, 114 insertions(+), 16 deletions(-)

diff --git a/cpp/src/reader/result_set.h b/cpp/src/reader/result_set.h
index 59e08455..e0dfacd3 100644
--- a/cpp/src/reader/result_set.h
+++ b/cpp/src/reader/result_set.h
@@ -21,6 +21,9 @@
 #define READER_QUERY_DATA_SET_H
 
 #include <unordered_map>
+#include <iostream>
+#include <string>
+#include <algorithm>
 
 #include "common/row_record.h"
 
@@ -166,8 +169,34 @@ class ResultSet {
      */
     virtual void close() = 0;
 
-   protected:
-    std::unordered_map<std::string, uint32_t> index_lookup_;
+protected:
+    struct CaseInsensitiveHash {
+        std::size_t operator()(const std::string& str) const {
+            std::string lowerStr = str;
+            std::transform(lowerStr.begin(), lowerStr.end(), lowerStr.begin(),
+                           [](unsigned char c) {
+                               return std::tolower(c);
+                           });
+            return std::hash<std::string>()(lowerStr);
+        }
+    };
+
+    struct CaseInsensitiveEqual {
+        bool operator()(const std::string& lhs, const std::string& rhs) const {
+            if (lhs.size() != rhs.size()) {
+                return false;
+            }
+            for (size_t i = 0; i < lhs.size(); ++i) {
+                if (std::tolower(lhs[i]) != std::tolower(rhs[i])) {
+                    return false;
+                }
+            }
+            return true;
+        }
+    };
+
+    std::unordered_map<std::string, uint32_t, CaseInsensitiveHash,
+                       CaseInsensitiveEqual> index_lookup_;
     common::PageArena pa_;
 };
 
diff --git a/cpp/src/reader/table_query_executor.cc 
b/cpp/src/reader/table_query_executor.cc
index 97913c21..77ecc929 100644
--- a/cpp/src/reader/table_query_executor.cc
+++ b/cpp/src/reader/table_query_executor.cc
@@ -41,19 +41,20 @@ int TableQueryExecutor::query(const std::string &table_name,
         ret_qds = nullptr;
         return ret;
     }
-    std::vector<std::string> std_column_names(columns);
-    for (auto &column : std_column_names) {
+    std::vector<std::string> lower_case_column_names(columns);
+    for (auto &column : lower_case_column_names) {
         to_lowercase_inplace(column);
     }
     std::shared_ptr<ColumnMapping> column_mapping = 
std::make_shared<ColumnMapping>();
-    for (size_t i = 0; i < std_column_names.size(); ++i) {
-        column_mapping->add(std_column_names[i], static_cast<int>(i), 
*table_schema);
+    for (size_t i = 0; i < lower_case_column_names.size(); ++i) {
+        column_mapping->add(lower_case_column_names[i], static_cast<int>(i), 
*table_schema);
     }
     std::vector<common::TSDataType> data_types;
-    data_types.reserve(columns.size());
-    for (size_t i = 0; i < columns.size(); ++i) {
-        auto ind = table_schema->find_column_index(columns[i]);
+    data_types.reserve(lower_case_column_names.size());
+    for (size_t i = 0; i < lower_case_column_names.size(); ++i) {
+        auto ind = table_schema->find_column_index(lower_case_column_names[i]);
         if (ind < 0) {
+            delete time_filter;
             return common::E_COLUMN_NOT_EXIST;
         }
         data_types.push_back(table_schema->get_data_types()[ind]);
@@ -61,7 +62,7 @@ int TableQueryExecutor::query(const std::string &table_name,
     // column_mapping.add(*measurement_filter);
 
     auto device_task_iterator = std::unique_ptr<DeviceTaskIterator>(
-        new DeviceTaskIterator(std_column_names, table_root, column_mapping,
+        new DeviceTaskIterator(columns, table_root, column_mapping,
                                meta_data_querier_, id_filter, table_schema));
 
     std::unique_ptr<TsBlockReader> tsblock_reader;
diff --git a/cpp/src/utils/storage_utils.h b/cpp/src/utils/storage_utils.h
index a152a57e..a0c6f3b2 100644
--- a/cpp/src/utils/storage_utils.h
+++ b/cpp/src/utils/storage_utils.h
@@ -19,6 +19,8 @@
 #ifndef UTILS_STORAGE_UTILS_H
 #define UTILS_STORAGE_UTILS_H
 
+#include <inttypes.h>
+#include <stdint.h>
 #include <algorithm>
 
 #include "common/datatype/value.h"
diff --git a/cpp/test/cwrapper/c_release_test.cc 
b/cpp/test/cwrapper/c_release_test.cc
index 0ccb0cdd..bb73fb9d 100644
--- a/cpp/test/cwrapper/c_release_test.cc
+++ b/cpp/test/cwrapper/c_release_test.cc
@@ -307,9 +307,9 @@ TEST_F(CReleaseTest, TsFileWriterMultiDataType) {
         ASSERT_EQ("device1", std::string(str_value));
         free(str_value);
         ASSERT_EQ(value, 
tsfile_result_set_get_value_by_name_int32_t(result_set,
-                                                                     "INT32"));
+                                                                     "int32"));
         ASSERT_EQ(value * 100, tsfile_result_set_get_value_by_name_int64_t(
-                                   result_set, "INT64"));
+                                   result_set, "int64"));
         ASSERT_EQ(value * 100.0, tsfile_result_set_get_value_by_name_float(
                                      result_set, "FLOAT"));
 
diff --git a/cpp/test/reader/table_view/tsfile_reader_table_test.cc 
b/cpp/test/reader/table_view/tsfile_reader_table_test.cc
index 2bdc74b2..2e5a929f 100644
--- a/cpp/test/reader/table_view/tsfile_reader_table_test.cc
+++ b/cpp/test/reader/table_view/tsfile_reader_table_test.cc
@@ -380,3 +380,39 @@ TEST_F(TsFileTableReaderTest, 
TableModelQueryWithMultiTabletsMultiFlush) {
     delete[] literal;
     delete tmp_table_schema;
 }
+
+TEST_F(TsFileTableReaderTest, ReadNonExistColumn) {
+    std::vector<MeasurementSchema*> measurement_schemas;
+    std::vector<ColumnCategory> column_categories;
+    measurement_schemas.resize(2);
+    measurement_schemas[0] = new MeasurementSchema("device", STRING);
+    measurement_schemas[1] = new MeasurementSchema("value", DOUBLE);
+    column_categories.emplace_back(ColumnCategory::TAG);
+    column_categories.emplace_back(ColumnCategory::FIELD);
+    TableSchema* table_schema =
+        new TableSchema("test_table", measurement_schemas, column_categories);
+    auto tsfile_table_writer =
+        std::make_shared<TsFileTableWriter>(&write_file_, table_schema);
+    Tablet tablet = Tablet(table_schema->get_measurement_names(),
+                           table_schema->get_data_types());
+    tablet.set_table_name("test_table");
+    for (int i = 0; i < 100; i++) {
+        tablet.add_timestamp(i, static_cast<int64_t>(i));
+        tablet.add_value(i, "device",
+                         std::string("device" + std::to_string(i)).c_str());
+        tablet.add_value(i, "value", i * 1.1);
+    }
+    tsfile_table_writer->write_table(tablet);
+    tsfile_table_writer->flush();
+    tsfile_table_writer->close();
+
+    TsFileReader reader = TsFileReader();
+    reader.open(write_file_.get_file_path());
+    ResultSet* ret = nullptr;
+    std::vector<std::string> column_names = {"non-exist-column"};
+    int ret_value = reader.query("test_table", column_names, 0, 50, ret);
+    ASSERT_NE(common::E_OK, ret_value);
+    ASSERT_EQ(ret, nullptr);
+    reader.close();
+    delete table_schema;
+}
diff --git a/cpp/test/writer/table_view/tsfile_writer_table_test.cc 
b/cpp/test/writer/table_view/tsfile_writer_table_test.cc
index 44636078..e24b1d4b 100644
--- a/cpp/test/writer/table_view/tsfile_writer_table_test.cc
+++ b/cpp/test/writer/table_view/tsfile_writer_table_test.cc
@@ -434,11 +434,14 @@ TEST_F(TsFileWriterTableTest, WriteAndReadSimple) {
     TsFileReader reader = TsFileReader();
     reader.open(write_file_.get_file_path());
     ResultSet* ret = nullptr;
-    int ret_value = reader.query("test_table", {"device", "value"}, 0, 50, 
ret);
+    std::vector<std::string> column_names = {"device", "VALUE"};
+    int ret_value = reader.query("test_table", column_names, 0, 50, ret);
     ASSERT_EQ(common::E_OK, ret_value);
 
     ASSERT_EQ(ret_value, 0);
     auto* table_result_set = (TableResultSet*)ret;
+    auto metadata = ret->get_metadata();
+    ASSERT_EQ(metadata->get_column_name(column_names.size() + 1), "VALUE");
     bool has_next = false;
     int cur_line = 0;
     while (IS_SUCC(table_result_set->next(has_next)) && has_next) {
@@ -447,7 +450,7 @@ TEST_F(TsFileWriterTableTest, WriteAndReadSimple) {
         ASSERT_EQ(table_result_set->get_value<common::String*>("device")
                       ->to_std_string(),
                   "device" + std::to_string(timestamp));
-        ASSERT_EQ(table_result_set->get_value<double>("value"),
+        ASSERT_EQ(table_result_set->get_value<double>("VaLue"),
                   timestamp * 1.1);
     }
     ASSERT_EQ(cur_line, 51);
diff --git 
a/java/tsfile/src/main/java/org/apache/tsfile/read/query/dataset/AbstractResultSet.java
 
b/java/tsfile/src/main/java/org/apache/tsfile/read/query/dataset/AbstractResultSet.java
index 10352790..b4be1082 100644
--- 
a/java/tsfile/src/main/java/org/apache/tsfile/read/query/dataset/AbstractResultSet.java
+++ 
b/java/tsfile/src/main/java/org/apache/tsfile/read/query/dataset/AbstractResultSet.java
@@ -27,9 +27,9 @@ import org.apache.tsfile.read.common.RowRecord;
 
 import java.io.IOException;
 import java.time.LocalDate;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.TreeMap;
 
 public abstract class AbstractResultSet implements ResultSet {
 
@@ -41,7 +41,7 @@ public abstract class AbstractResultSet implements ResultSet {
     // Add Time at first column
     this.resultSetMetadata = new ResultSetMetadataImpl(columnNameList, 
tsDataTypeList);
     int columnNum = tsDataTypeList.size() + 1;
-    this.columnNameToColumnIndexMap = new HashMap<>(tsDataTypeList.size());
+    this.columnNameToColumnIndexMap = new 
TreeMap<>(String.CASE_INSENSITIVE_ORDER);
     for (int columnIndex = 1; columnIndex <= columnNum; columnIndex++) {
       this.columnNameToColumnIndexMap.put(
           resultSetMetadata.getColumnName(columnIndex), columnIndex);
diff --git 
a/java/tsfile/src/test/java/org/apache/tsfile/tableview/TableViewTest.java 
b/java/tsfile/src/test/java/org/apache/tsfile/tableview/TableViewTest.java
index 361cd3f5..17552af1 100644
--- a/java/tsfile/src/test/java/org/apache/tsfile/tableview/TableViewTest.java
+++ b/java/tsfile/src/test/java/org/apache/tsfile/tableview/TableViewTest.java
@@ -35,11 +35,15 @@ import 
org.apache.tsfile.read.controller.CachedChunkLoaderImpl;
 import org.apache.tsfile.read.controller.MetadataQuerierByFileImpl;
 import org.apache.tsfile.read.expression.QueryExpression;
 import org.apache.tsfile.read.query.dataset.QueryDataSet;
+import org.apache.tsfile.read.query.dataset.ResultSet;
+import org.apache.tsfile.read.query.dataset.ResultSetMetadata;
 import org.apache.tsfile.read.query.executor.QueryExecutor;
 import org.apache.tsfile.read.query.executor.TableQueryExecutor;
 import 
org.apache.tsfile.read.query.executor.TableQueryExecutor.TableQueryOrdering;
 import org.apache.tsfile.read.query.executor.TsFileExecutor;
 import org.apache.tsfile.read.reader.block.TsBlockReader;
+import org.apache.tsfile.read.v4.ITsFileReader;
+import org.apache.tsfile.read.v4.TsFileReaderBuilder;
 import org.apache.tsfile.utils.Binary;
 import org.apache.tsfile.utils.TsFileSketchTool;
 import org.apache.tsfile.write.TsFileWriter;
@@ -153,6 +157,29 @@ public class TableViewTest {
     assertEquals(1000, cnt);
   }
 
+  @Test
+  public void testReadCaseSensitivity() throws Exception {
+    final File testFile = new File(testDir, "testFile");
+    writeTsFile(testTableSchema, testFile);
+
+    ArrayList<String> columns = new ArrayList<>(Arrays.asList("ID1", "ID2", 
"S1", "S2"));
+    try (ITsFileReader reader = new 
TsFileReaderBuilder().file(testFile).build();
+        ResultSet resultSet = reader.query(testTableSchema.getTableName(), 
columns, 2, 8)) {
+      // first column is Time
+      ResultSetMetadata metadata = resultSet.getMetadata();
+      for (int column = 2; column <= 5; column++) {
+        assertEquals(metadata.getColumnName(column), columns.get(column - 2));
+      }
+      while (resultSet.next()) {
+        Long timeField = resultSet.getLong("Time");
+        assertFalse(resultSet.isNull("ID1"));
+        assertFalse(resultSet.isNull("id2"));
+        assertFalse(resultSet.isNull("s1"));
+        assertFalse(resultSet.isNull("S2"));
+      }
+    }
+  }
+
   @Test
   public void testDeviceIdWithNull() throws Exception {
     final File testFile = new File(testDir, "testFile");

Reply via email to