This is an automated email from the ASF dual-hosted git repository.
jiangtian pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/tsfile.git
The following commit(s) were added to refs/heads/develop by this push:
new caa77ac0 [Java/C++/C] Resolved case sensitivity issue when reading
column names. (#518)
caa77ac0 is described below
commit caa77ac003f9b64687b3ebb193f19be455508fdc
Author: Hongzhi Gao <[email protected]>
AuthorDate: Thu Jun 19 10:40:21 2025 +0800
[Java/C++/C] Resolved case sensitivity issue when reading column names.
(#518)
* Revert "Resolved case sensitivity issue when reading column names. (#517)"
This reverts commit c501bf4e0ac13cdbe0b7d404cd4ea3ef99904203.
* Resolved case sensitivity issue when reading column names.
* [Java]Resolved case sensitivity issue when reading column names.
* spotless apply
* fix cpp ut mem leak
* fix cpp ut mem leak
---
cpp/src/reader/result_set.h | 33 ++++++++++++++++++--
cpp/src/reader/table_query_executor.cc | 17 +++++-----
cpp/src/utils/storage_utils.h | 2 ++
cpp/test/cwrapper/c_release_test.cc | 4 +--
.../reader/table_view/tsfile_reader_table_test.cc | 36 ++++++++++++++++++++++
.../writer/table_view/tsfile_writer_table_test.cc | 7 +++--
.../read/query/dataset/AbstractResultSet.java | 4 +--
.../org/apache/tsfile/tableview/TableViewTest.java | 27 ++++++++++++++++
8 files changed, 114 insertions(+), 16 deletions(-)
diff --git a/cpp/src/reader/result_set.h b/cpp/src/reader/result_set.h
index 59e08455..e0dfacd3 100644
--- a/cpp/src/reader/result_set.h
+++ b/cpp/src/reader/result_set.h
@@ -21,6 +21,9 @@
#define READER_QUERY_DATA_SET_H
#include <unordered_map>
+#include <iostream>
+#include <string>
+#include <algorithm>
#include "common/row_record.h"
@@ -166,8 +169,34 @@ class ResultSet {
*/
virtual void close() = 0;
- protected:
- std::unordered_map<std::string, uint32_t> index_lookup_;
+protected:
+ struct CaseInsensitiveHash {
+ std::size_t operator()(const std::string& str) const {
+ std::string lowerStr = str;
+ std::transform(lowerStr.begin(), lowerStr.end(), lowerStr.begin(),
+ [](unsigned char c) {
+ return std::tolower(c);
+ });
+ return std::hash<std::string>()(lowerStr);
+ }
+ };
+
+ struct CaseInsensitiveEqual {
+ bool operator()(const std::string& lhs, const std::string& rhs) const {
+ if (lhs.size() != rhs.size()) {
+ return false;
+ }
+ for (size_t i = 0; i < lhs.size(); ++i) {
+ if (std::tolower(lhs[i]) != std::tolower(rhs[i])) {
+ return false;
+ }
+ }
+ return true;
+ }
+ };
+
+ std::unordered_map<std::string, uint32_t, CaseInsensitiveHash,
+ CaseInsensitiveEqual> index_lookup_;
common::PageArena pa_;
};
diff --git a/cpp/src/reader/table_query_executor.cc
b/cpp/src/reader/table_query_executor.cc
index 97913c21..77ecc929 100644
--- a/cpp/src/reader/table_query_executor.cc
+++ b/cpp/src/reader/table_query_executor.cc
@@ -41,19 +41,20 @@ int TableQueryExecutor::query(const std::string &table_name,
ret_qds = nullptr;
return ret;
}
- std::vector<std::string> std_column_names(columns);
- for (auto &column : std_column_names) {
+ std::vector<std::string> lower_case_column_names(columns);
+ for (auto &column : lower_case_column_names) {
to_lowercase_inplace(column);
}
std::shared_ptr<ColumnMapping> column_mapping =
std::make_shared<ColumnMapping>();
- for (size_t i = 0; i < std_column_names.size(); ++i) {
- column_mapping->add(std_column_names[i], static_cast<int>(i),
*table_schema);
+ for (size_t i = 0; i < lower_case_column_names.size(); ++i) {
+ column_mapping->add(lower_case_column_names[i], static_cast<int>(i),
*table_schema);
}
std::vector<common::TSDataType> data_types;
- data_types.reserve(columns.size());
- for (size_t i = 0; i < columns.size(); ++i) {
- auto ind = table_schema->find_column_index(columns[i]);
+ data_types.reserve(lower_case_column_names.size());
+ for (size_t i = 0; i < lower_case_column_names.size(); ++i) {
+ auto ind = table_schema->find_column_index(lower_case_column_names[i]);
if (ind < 0) {
+ delete time_filter;
return common::E_COLUMN_NOT_EXIST;
}
data_types.push_back(table_schema->get_data_types()[ind]);
@@ -61,7 +62,7 @@ int TableQueryExecutor::query(const std::string &table_name,
// column_mapping.add(*measurement_filter);
auto device_task_iterator = std::unique_ptr<DeviceTaskIterator>(
- new DeviceTaskIterator(std_column_names, table_root, column_mapping,
+ new DeviceTaskIterator(columns, table_root, column_mapping,
meta_data_querier_, id_filter, table_schema));
std::unique_ptr<TsBlockReader> tsblock_reader;
diff --git a/cpp/src/utils/storage_utils.h b/cpp/src/utils/storage_utils.h
index a152a57e..a0c6f3b2 100644
--- a/cpp/src/utils/storage_utils.h
+++ b/cpp/src/utils/storage_utils.h
@@ -19,6 +19,8 @@
#ifndef UTILS_STORAGE_UTILS_H
#define UTILS_STORAGE_UTILS_H
+#include <inttypes.h>
+#include <stdint.h>
#include <algorithm>
#include "common/datatype/value.h"
diff --git a/cpp/test/cwrapper/c_release_test.cc
b/cpp/test/cwrapper/c_release_test.cc
index 0ccb0cdd..bb73fb9d 100644
--- a/cpp/test/cwrapper/c_release_test.cc
+++ b/cpp/test/cwrapper/c_release_test.cc
@@ -307,9 +307,9 @@ TEST_F(CReleaseTest, TsFileWriterMultiDataType) {
ASSERT_EQ("device1", std::string(str_value));
free(str_value);
ASSERT_EQ(value,
tsfile_result_set_get_value_by_name_int32_t(result_set,
- "INT32"));
+ "int32"));
ASSERT_EQ(value * 100, tsfile_result_set_get_value_by_name_int64_t(
- result_set, "INT64"));
+ result_set, "int64"));
ASSERT_EQ(value * 100.0, tsfile_result_set_get_value_by_name_float(
result_set, "FLOAT"));
diff --git a/cpp/test/reader/table_view/tsfile_reader_table_test.cc
b/cpp/test/reader/table_view/tsfile_reader_table_test.cc
index 2bdc74b2..2e5a929f 100644
--- a/cpp/test/reader/table_view/tsfile_reader_table_test.cc
+++ b/cpp/test/reader/table_view/tsfile_reader_table_test.cc
@@ -380,3 +380,39 @@ TEST_F(TsFileTableReaderTest,
TableModelQueryWithMultiTabletsMultiFlush) {
delete[] literal;
delete tmp_table_schema;
}
+
+TEST_F(TsFileTableReaderTest, ReadNonExistColumn) {
+ std::vector<MeasurementSchema*> measurement_schemas;
+ std::vector<ColumnCategory> column_categories;
+ measurement_schemas.resize(2);
+ measurement_schemas[0] = new MeasurementSchema("device", STRING);
+ measurement_schemas[1] = new MeasurementSchema("value", DOUBLE);
+ column_categories.emplace_back(ColumnCategory::TAG);
+ column_categories.emplace_back(ColumnCategory::FIELD);
+ TableSchema* table_schema =
+ new TableSchema("test_table", measurement_schemas, column_categories);
+ auto tsfile_table_writer =
+ std::make_shared<TsFileTableWriter>(&write_file_, table_schema);
+ Tablet tablet = Tablet(table_schema->get_measurement_names(),
+ table_schema->get_data_types());
+ tablet.set_table_name("test_table");
+ for (int i = 0; i < 100; i++) {
+ tablet.add_timestamp(i, static_cast<int64_t>(i));
+ tablet.add_value(i, "device",
+ std::string("device" + std::to_string(i)).c_str());
+ tablet.add_value(i, "value", i * 1.1);
+ }
+ tsfile_table_writer->write_table(tablet);
+ tsfile_table_writer->flush();
+ tsfile_table_writer->close();
+
+ TsFileReader reader = TsFileReader();
+ reader.open(write_file_.get_file_path());
+ ResultSet* ret = nullptr;
+ std::vector<std::string> column_names = {"non-exist-column"};
+ int ret_value = reader.query("test_table", column_names, 0, 50, ret);
+ ASSERT_NE(common::E_OK, ret_value);
+ ASSERT_EQ(ret, nullptr);
+ reader.close();
+ delete table_schema;
+}
diff --git a/cpp/test/writer/table_view/tsfile_writer_table_test.cc
b/cpp/test/writer/table_view/tsfile_writer_table_test.cc
index 44636078..e24b1d4b 100644
--- a/cpp/test/writer/table_view/tsfile_writer_table_test.cc
+++ b/cpp/test/writer/table_view/tsfile_writer_table_test.cc
@@ -434,11 +434,14 @@ TEST_F(TsFileWriterTableTest, WriteAndReadSimple) {
TsFileReader reader = TsFileReader();
reader.open(write_file_.get_file_path());
ResultSet* ret = nullptr;
- int ret_value = reader.query("test_table", {"device", "value"}, 0, 50,
ret);
+ std::vector<std::string> column_names = {"device", "VALUE"};
+ int ret_value = reader.query("test_table", column_names, 0, 50, ret);
ASSERT_EQ(common::E_OK, ret_value);
ASSERT_EQ(ret_value, 0);
auto* table_result_set = (TableResultSet*)ret;
+ auto metadata = ret->get_metadata();
+ ASSERT_EQ(metadata->get_column_name(column_names.size() + 1), "VALUE");
bool has_next = false;
int cur_line = 0;
while (IS_SUCC(table_result_set->next(has_next)) && has_next) {
@@ -447,7 +450,7 @@ TEST_F(TsFileWriterTableTest, WriteAndReadSimple) {
ASSERT_EQ(table_result_set->get_value<common::String*>("device")
->to_std_string(),
"device" + std::to_string(timestamp));
- ASSERT_EQ(table_result_set->get_value<double>("value"),
+ ASSERT_EQ(table_result_set->get_value<double>("VaLue"),
timestamp * 1.1);
}
ASSERT_EQ(cur_line, 51);
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/read/query/dataset/AbstractResultSet.java
b/java/tsfile/src/main/java/org/apache/tsfile/read/query/dataset/AbstractResultSet.java
index 10352790..b4be1082 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/read/query/dataset/AbstractResultSet.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/read/query/dataset/AbstractResultSet.java
@@ -27,9 +27,9 @@ import org.apache.tsfile.read.common.RowRecord;
import java.io.IOException;
import java.time.LocalDate;
-import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.TreeMap;
public abstract class AbstractResultSet implements ResultSet {
@@ -41,7 +41,7 @@ public abstract class AbstractResultSet implements ResultSet {
// Add Time at first column
this.resultSetMetadata = new ResultSetMetadataImpl(columnNameList,
tsDataTypeList);
int columnNum = tsDataTypeList.size() + 1;
- this.columnNameToColumnIndexMap = new HashMap<>(tsDataTypeList.size());
+ this.columnNameToColumnIndexMap = new
TreeMap<>(String.CASE_INSENSITIVE_ORDER);
for (int columnIndex = 1; columnIndex <= columnNum; columnIndex++) {
this.columnNameToColumnIndexMap.put(
resultSetMetadata.getColumnName(columnIndex), columnIndex);
diff --git
a/java/tsfile/src/test/java/org/apache/tsfile/tableview/TableViewTest.java
b/java/tsfile/src/test/java/org/apache/tsfile/tableview/TableViewTest.java
index 361cd3f5..17552af1 100644
--- a/java/tsfile/src/test/java/org/apache/tsfile/tableview/TableViewTest.java
+++ b/java/tsfile/src/test/java/org/apache/tsfile/tableview/TableViewTest.java
@@ -35,11 +35,15 @@ import
org.apache.tsfile.read.controller.CachedChunkLoaderImpl;
import org.apache.tsfile.read.controller.MetadataQuerierByFileImpl;
import org.apache.tsfile.read.expression.QueryExpression;
import org.apache.tsfile.read.query.dataset.QueryDataSet;
+import org.apache.tsfile.read.query.dataset.ResultSet;
+import org.apache.tsfile.read.query.dataset.ResultSetMetadata;
import org.apache.tsfile.read.query.executor.QueryExecutor;
import org.apache.tsfile.read.query.executor.TableQueryExecutor;
import
org.apache.tsfile.read.query.executor.TableQueryExecutor.TableQueryOrdering;
import org.apache.tsfile.read.query.executor.TsFileExecutor;
import org.apache.tsfile.read.reader.block.TsBlockReader;
+import org.apache.tsfile.read.v4.ITsFileReader;
+import org.apache.tsfile.read.v4.TsFileReaderBuilder;
import org.apache.tsfile.utils.Binary;
import org.apache.tsfile.utils.TsFileSketchTool;
import org.apache.tsfile.write.TsFileWriter;
@@ -153,6 +157,29 @@ public class TableViewTest {
assertEquals(1000, cnt);
}
+ @Test
+ public void testReadCaseSensitivity() throws Exception {
+ final File testFile = new File(testDir, "testFile");
+ writeTsFile(testTableSchema, testFile);
+
+ ArrayList<String> columns = new ArrayList<>(Arrays.asList("ID1", "ID2",
"S1", "S2"));
+ try (ITsFileReader reader = new
TsFileReaderBuilder().file(testFile).build();
+ ResultSet resultSet = reader.query(testTableSchema.getTableName(),
columns, 2, 8)) {
+ // first column is Time
+ ResultSetMetadata metadata = resultSet.getMetadata();
+ for (int column = 2; column <= 5; column++) {
+ assertEquals(metadata.getColumnName(column), columns.get(column - 2));
+ }
+ while (resultSet.next()) {
+ Long timeField = resultSet.getLong("Time");
+ assertFalse(resultSet.isNull("ID1"));
+ assertFalse(resultSet.isNull("id2"));
+ assertFalse(resultSet.isNull("s1"));
+ assertFalse(resultSet.isNull("S2"));
+ }
+ }
+ }
+
@Test
public void testDeviceIdWithNull() throws Exception {
final File testFile = new File(testDir, "testFile");