This is an automated email from the ASF dual-hosted git repository.

liuneng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 221f0f849 [CH]duplicate column name case support in broadcast join 
#6926 (#6927)
221f0f849 is described below

commit 221f0f849c1e463dcecae7c515e23281d263f623
Author: loudongfeng <[email protected]>
AuthorDate: Tue Aug 20 10:41:38 2024 +0800

    [CH]duplicate column name case support in broadcast join #6926 (#6927)
    
    What changes were proposed in this pull request?
    Fixes: #6926
    
    How was this patch tested?
    by UT
---
 .../execution/GlutenClickhouseFunctionSuite.scala  | 20 +++++++++++++++++++
 cpp-ch/local-engine/Join/BroadCastJoinBuilder.cpp  | 23 +++++++++++++++++-----
 2 files changed, 38 insertions(+), 5 deletions(-)

diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala
index ac18f256e..4130ea348 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala
@@ -198,4 +198,24 @@ class GlutenClickhouseFunctionSuite extends 
GlutenClickHouseTPCHAbstractSuite {
     }
   }
 
+  test("duplicate column name issue") {
+    withTable("left_table", "right_table") {
+      sql("create table left_table(id int, name string) using orc")
+      sql("create table right_table(id int, book string) using orc")
+      sql("insert into left_table values (1,'a'),(2,'b'),(3,'c'),(4,'d')")
+      sql("insert into right_table values (1,'a'),(1,'b'),(2,'c'),(2,'d')")
+      compareResultsAgainstVanillaSpark(
+        """
+          |select p1.id, p1.name, p2.book
+          | from left_table p1 left join
+          | (select id, id, book
+          |    from right_table where id <= 2) p2
+          | on p1.id=p2.id
+          |""".stripMargin,
+        true,
+        { _ => }
+      )
+    }
+  }
+
 }
diff --git a/cpp-ch/local-engine/Join/BroadCastJoinBuilder.cpp 
b/cpp-ch/local-engine/Join/BroadCastJoinBuilder.cpp
index f47f423df..da301dcb8 100644
--- a/cpp-ch/local-engine/Join/BroadCastJoinBuilder.cpp
+++ b/cpp-ch/local-engine/Join/BroadCastJoinBuilder.cpp
@@ -57,13 +57,26 @@ jlong callJavaGet(const std::string & id)
 DB::Block resetBuildTableBlockName(Block & block, bool only_one = false)
 {
     DB::ColumnsWithTypeAndName new_cols;
+    std::set<std::string> names;
+    int32_t seq = 0;
     for (const auto & col : block)
     {
-        // Add a prefix to avoid column name conflicts with left table.
-        new_cols.emplace_back(col.column, col.type, 
BlockUtil::RIHGT_COLUMN_PREFIX + col.name);
-
-        if (only_one)
-            break;
+      // Add a prefix to avoid column name conflicts with left table.
+      std::stringstream new_name;
+      // add a sequence to avoid duplicate name in some rare cases
+      if (names.find(col.name) == names.end())
+      {
+         new_name << BlockUtil::RIHGT_COLUMN_PREFIX << col.name;
+         names.insert(col.name);
+      }
+      else
+      {
+        new_name << BlockUtil::RIHGT_COLUMN_PREFIX  << (seq++) << "_" << 
col.name;
+      }
+      new_cols.emplace_back(col.column, col.type, new_name.str());
+
+      if (only_one)
+        break;
     }
     return DB::Block(new_cols);
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to