This is an automated email from the ASF dual-hosted git repository.
liuneng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 221f0f849 [CH]duplicate column name case support in broadcast join
#6926 (#6927)
221f0f849 is described below
commit 221f0f849c1e463dcecae7c515e23281d263f623
Author: loudongfeng <[email protected]>
AuthorDate: Tue Aug 20 10:41:38 2024 +0800
[CH]duplicate column name case support in broadcast join #6926 (#6927)
What changes were proposed in this pull request?
Fixes: #6926
How was this patch tested?
by UT
---
.../execution/GlutenClickhouseFunctionSuite.scala | 20 +++++++++++++++++++
cpp-ch/local-engine/Join/BroadCastJoinBuilder.cpp | 23 +++++++++++++++++-----
2 files changed, 38 insertions(+), 5 deletions(-)
diff --git
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala
index ac18f256e..4130ea348 100644
---
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala
+++
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala
@@ -198,4 +198,24 @@ class GlutenClickhouseFunctionSuite extends
GlutenClickHouseTPCHAbstractSuite {
}
}
+ test("duplicate column name issue") {
+ withTable("left_table", "right_table") {
+ sql("create table left_table(id int, name string) using orc")
+ sql("create table right_table(id int, book string) using orc")
+ sql("insert into left_table values (1,'a'),(2,'b'),(3,'c'),(4,'d')")
+ sql("insert into right_table values (1,'a'),(1,'b'),(2,'c'),(2,'d')")
+ compareResultsAgainstVanillaSpark(
+ """
+ |select p1.id, p1.name, p2.book
+ | from left_table p1 left join
+ | (select id, id, book
+ | from right_table where id <= 2) p2
+ | on p1.id=p2.id
+ |""".stripMargin,
+ true,
+ { _ => }
+ )
+ }
+ }
+
}
diff --git a/cpp-ch/local-engine/Join/BroadCastJoinBuilder.cpp
b/cpp-ch/local-engine/Join/BroadCastJoinBuilder.cpp
index f47f423df..da301dcb8 100644
--- a/cpp-ch/local-engine/Join/BroadCastJoinBuilder.cpp
+++ b/cpp-ch/local-engine/Join/BroadCastJoinBuilder.cpp
@@ -57,13 +57,26 @@ jlong callJavaGet(const std::string & id)
DB::Block resetBuildTableBlockName(Block & block, bool only_one = false)
{
DB::ColumnsWithTypeAndName new_cols;
+ std::set<std::string> names;
+ int32_t seq = 0;
for (const auto & col : block)
{
- // Add a prefix to avoid column name conflicts with left table.
- new_cols.emplace_back(col.column, col.type,
BlockUtil::RIHGT_COLUMN_PREFIX + col.name);
-
- if (only_one)
- break;
+ // Add a prefix to avoid column name conflicts with left table.
+ std::stringstream new_name;
+ // add a sequence to avoid duplicate name in some rare cases
+ if (names.find(col.name) == names.end())
+ {
+ new_name << BlockUtil::RIHGT_COLUMN_PREFIX << col.name;
+ names.insert(col.name);
+ }
+ else
+ {
+ new_name << BlockUtil::RIHGT_COLUMN_PREFIX << (seq++) << "_" <<
col.name;
+ }
+ new_cols.emplace_back(col.column, col.type, new_name.str());
+
+ if (only_one)
+ break;
}
return DB::Block(new_cols);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]