Apache): fix to workaround concurrency issue (#2282)

curth Tue, 29 Oct 2024 10:18:29 -0700

This is an automated email from the ASF dual-hosted git repository.

curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git



The following commit(s) were added to refs/heads/main by this push:
     new 99e7e53a1 fix(csharp/src/Drivers/Apache): fix to workaround 
concurrency issue (#2282)
99e7e53a1 is described below

commit 99e7e53a19d3f70f0e5a6c3140eeef4f4c213901
Author: Bruce Irschick <[email protected]>
AuthorDate: Tue Oct 29 10:18:04 2024 -0700

    fix(csharp/src/Drivers/Apache): fix to workaround concurrency issue (#2282)
    
    Provides an interim work-around for the concurrency issue identified in
    #2280.
    
    * Removes the SQL `DELETE` statements from the SQL table scripts.
    * Uses the XUnit.Collection to serialize the execution of ClientTests
    and DriverTests.
    * Fixes the missing application of `HttpRequestTimeout` due to an
    incomplete implementation of the `ValidateOptions` in
    `SparkDatabricksConnection`.
    * Improve table create table syntax to `CREATE OR REPLACE TABLE` to
    reduce probably of inconsistent state.
    
    Note: this is not the final solution. A more robust isolation of table
    creation needs to done to isolate concurrency.
---
 csharp/src/Drivers/Apache/Spark/SparkDatabricksConnection.cs | 12 ------------
 csharp/test/Drivers/Apache/Spark/ClientTests.cs              |  6 ++++--
 csharp/test/Drivers/Apache/Spark/DriverTests.cs              |  6 ++++--
 .../Drivers/Apache/Spark/Resources/SparkData-Databricks.sql  |  6 ++----
 csharp/test/Drivers/Apache/Spark/Resources/SparkData.sql     |  4 +---
 5 files changed, 11 insertions(+), 23 deletions(-)

diff --git a/csharp/src/Drivers/Apache/Spark/SparkDatabricksConnection.cs 
b/csharp/src/Drivers/Apache/Spark/SparkDatabricksConnection.cs
index 764027198..7d187fc71 100644
--- a/csharp/src/Drivers/Apache/Spark/SparkDatabricksConnection.cs
+++ b/csharp/src/Drivers/Apache/Spark/SparkDatabricksConnection.cs
@@ -16,9 +16,7 @@
 */
 
 using System.Collections.Generic;
-using System.Threading;
 using System.Threading.Tasks;
-using Apache.Arrow.Adbc.Drivers.Apache.Hive2;
 using Apache.Arrow.Ipc;
 using Apache.Hive.Service.Rpc.Thrift;
 
@@ -45,16 +43,6 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Spark
             return req;
         }
 
-        protected override void ValidateOptions()
-        {
-            Properties.TryGetValue(SparkParameters.DataTypeConv, out string? 
dataTypeConv);
-            // Note: In Databricks, scalar types are provided implicitly.
-            DataTypeConversion = DataTypeConversionParser.Parse(dataTypeConv);
-
-            Properties.TryGetValue(SparkParameters.TLSOptions, out string? 
tlsOptions);
-            TlsOptions = TlsOptionsParser.Parse(tlsOptions);
-        }
-
         protected override Task<TGetResultSetMetadataResp> 
GetResultSetMetadataAsync(TGetSchemasResp response) =>
             Task.FromResult(response.DirectResults.ResultSetMetadata);
         protected override Task<TGetResultSetMetadataResp> 
GetResultSetMetadataAsync(TGetCatalogsResp response) =>
diff --git a/csharp/test/Drivers/Apache/Spark/ClientTests.cs 
b/csharp/test/Drivers/Apache/Spark/ClientTests.cs
index 28c80d8f0..f2288f420 100644
--- a/csharp/test/Drivers/Apache/Spark/ClientTests.cs
+++ b/csharp/test/Drivers/Apache/Spark/ClientTests.cs
@@ -30,8 +30,12 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
     /// <remarks>
     /// Tests are ordered to ensure data is created for the other
     /// queries to run.
+    /// <para>Note: This test creates/replaces the table identified in the 
configuration (metadata/table).
+    /// It uses the test collection "TableCreateTestCollection" to ensure it 
does not run
+    /// as the same time as any other tests that may create/update the same 
table.</para>
     /// </remarks>
     [TestCaseOrderer("Apache.Arrow.Adbc.Tests.Xunit.TestOrderer", 
"Apache.Arrow.Adbc.Tests")]
+    [Collection("TableCreateTestCollection")]
     public class ClientTests : TestBase<SparkTestConfiguration, 
SparkTestEnvironment>
     {
         public ClientTests(ITestOutputHelper? outputHelper) : 
base(outputHelper, new SparkTestEnvironment.Factory())
@@ -54,7 +58,6 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
 
                 List<int> expectedResults = TestEnvironment.ServerType != 
SparkServerType.Databricks
                     ? [
-                        -1, // DROP   TABLE
                         -1, // CREATE TABLE
                         affectedRows,  // INSERT
                         affectedRows,  // INSERT
@@ -63,7 +66,6 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
                         //1,  // DELETE
                     ]
                     : [
-                        -1, // DROP   TABLE
                         -1, // CREATE TABLE
                         affectedRows,  // INSERT
                         affectedRows,  // INSERT
diff --git a/csharp/test/Drivers/Apache/Spark/DriverTests.cs 
b/csharp/test/Drivers/Apache/Spark/DriverTests.cs
index 880f74888..9be3eb87f 100644
--- a/csharp/test/Drivers/Apache/Spark/DriverTests.cs
+++ b/csharp/test/Drivers/Apache/Spark/DriverTests.cs
@@ -36,8 +36,12 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
     /// <remarks>
     /// Tests are ordered to ensure data is created for the other
     /// queries to run.
+    /// <para>Note: This test creates/replaces the table identified in the 
configuration (metadata/table).
+    /// It uses the test collection "TableCreateTestCollection" to ensure it 
does not run
+    /// as the same time as any other tests that may create/update the same 
table.</para>
     /// </remarks>
     [TestCaseOrderer("Apache.Arrow.Adbc.Tests.Xunit.TestOrderer", 
"Apache.Arrow.Adbc.Tests")]
+    [Collection("TableCreateTestCollection")]
     public class DriverTests : TestBase<SparkTestConfiguration, 
SparkTestEnvironment>
     {
         /// <summary>
@@ -92,7 +96,6 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
             List<int> expectedResults = TestEnvironment.ServerType != 
SparkServerType.Databricks
                 ?
                 [
-                    -1, // DROP   TABLE
                     -1, // CREATE TABLE
                     1,  // INSERT
                     1,  // INSERT
@@ -102,7 +105,6 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Spark
                 ]
                 :
                 [
-                    -1, // DROP   TABLE
                     -1, // CREATE TABLE
                     1,  // INSERT
                     1,  // INSERT
diff --git 
a/csharp/test/Drivers/Apache/Spark/Resources/SparkData-Databricks.sql 
b/csharp/test/Drivers/Apache/Spark/Resources/SparkData-Databricks.sql
index 908ffbb93..f8f44fc54 100644
--- a/csharp/test/Drivers/Apache/Spark/Resources/SparkData-Databricks.sql
+++ b/csharp/test/Drivers/Apache/Spark/Resources/SparkData-Databricks.sql
@@ -14,9 +14,7 @@
  -- See the License for the specific language governing permissions and
  -- limitations under the License.
 
-DROP TABLE IF EXISTS {ADBC_CATALOG}.{ADBC_DATASET}.{ADBC_TABLE};
-
-CREATE TABLE IF NOT EXISTS {ADBC_CATALOG}.{ADBC_DATASET}.{ADBC_TABLE} (
+CREATE OR REPLACE TABLE {ADBC_CATALOG}.{ADBC_DATASET}.{ADBC_TABLE} (
   id LONG,
   byte BYTE,
   short SHORT,
@@ -42,7 +40,7 @@ CREATE TABLE IF NOT EXISTS 
{ADBC_CATALOG}.{ADBC_DATASET}.{ADBC_TABLE} (
   >,
   varchar VARCHAR(255),
   char CHAR(10)
-);
+) USING DELTA;
 
 INSERT INTO {ADBC_CATALOG}.{ADBC_DATASET}.{ADBC_TABLE} (
     id,
diff --git a/csharp/test/Drivers/Apache/Spark/Resources/SparkData.sql 
b/csharp/test/Drivers/Apache/Spark/Resources/SparkData.sql
index 433bf5a5e..8ee0f7e90 100644
--- a/csharp/test/Drivers/Apache/Spark/Resources/SparkData.sql
+++ b/csharp/test/Drivers/Apache/Spark/Resources/SparkData.sql
@@ -14,9 +14,7 @@
  -- See the License for the specific language governing permissions and
  -- limitations under the License.
 
-DROP TABLE IF EXISTS {ADBC_CATALOG}.{ADBC_DATASET}.{ADBC_TABLE};
-
-CREATE TABLE IF NOT EXISTS {ADBC_CATALOG}.{ADBC_DATASET}.{ADBC_TABLE} (
+CREATE OR REPLACE TABLE IF NOT EXISTS 
{ADBC_CATALOG}.{ADBC_DATASET}.{ADBC_TABLE} (
   id LONG,
   byte BYTE,
   short SHORT,

(arrow-adbc) branch main updated: fix(csharp/src/Drivers/Apache): fix to workaround concurrency issue (#2282)

Reply via email to