This is an automated email from the ASF dual-hosted git repository.
curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/main by this push:
new 80f4404d5 feat(csharp/src/Drivers/Databricks): Default catalog +
schema support (#2806)
80f4404d5 is described below
commit 80f4404d54c17196619f7dbf60da3624fde51247
Author: Todd Meng <[email protected]>
AuthorDate: Tue May 20 07:17:11 2025 -0700
feat(csharp/src/Drivers/Databricks): Default catalog + schema support
(#2806)
Allows users to specify an optional default catalog and default schema
that will take affect throughout the session.
Includes tests that verifies basic behavior. To test, include in the
DATABRICKS_TEST_CONFIG_FILE environment variable:
```
"catalog": "system",
"db_schema": "access",
```
NOTE: using Namespace to set catalogs is only in DBR 8.4+. We will need to
introduce fallbacks (todo in a different PR)
---
.../src/Drivers/Databricks/DatabricksConnection.cs | 33 +++++++++
csharp/src/Drivers/Databricks/readme.md | 2 +
.../test/Drivers/Apache/ApacheTestConfiguration.cs | 6 ++
.../Drivers/Databricks/DatabricksConnectionTest.cs | 85 +++++++++++++++++++++-
.../Databricks/DatabricksTestEnvironment.cs | 8 ++
5 files changed, 130 insertions(+), 4 deletions(-)
diff --git a/csharp/src/Drivers/Databricks/DatabricksConnection.cs
b/csharp/src/Drivers/Databricks/DatabricksConnection.cs
index 99f477c6e..b7a26e2da 100644
--- a/csharp/src/Drivers/Databricks/DatabricksConnection.cs
+++ b/csharp/src/Drivers/Databricks/DatabricksConnection.cs
@@ -53,6 +53,9 @@ namespace Apache.Arrow.Adbc.Drivers.Databricks
private const bool DefaultRetryOnUnavailable= true;
private const int DefaultTemporarilyUnavailableRetryTimeout = 500;
+ // Default namespace
+ private TNamespace? _defaultNamespace;
+
public DatabricksConnection(IReadOnlyDictionary<string, string>
properties) : base(properties)
{
ValidateProperties();
@@ -125,6 +128,25 @@ namespace Apache.Arrow.Adbc.Drivers.Databricks
}
_maxBytesPerFile = maxBytesPerFileValue;
}
+
+ // Parse default namespace
+ string? defaultCatalog = null;
+ string? defaultSchema = null;
+ Properties.TryGetValue(AdbcOptions.Connection.CurrentCatalog, out
defaultCatalog);
+ Properties.TryGetValue(AdbcOptions.Connection.CurrentDbSchema, out
defaultSchema);
+
+ if (!string.IsNullOrWhiteSpace(defaultCatalog))
+ {
+ _defaultNamespace = new TNamespace
+ {
+ CatalogName = defaultCatalog,
+ SchemaName = defaultSchema
+ };
+ }
+ else if (!string.IsNullOrEmpty(defaultSchema))
+ {
+ throw new ArgumentException($"Parameter
'{AdbcOptions.Connection.CurrentCatalog}' is not set but
'{AdbcOptions.Connection.CurrentDbSchema}' is set. Please provide a value for
'{AdbcOptions.Connection.CurrentCatalog}'.");
+ }
}
/// <summary>
@@ -152,6 +174,11 @@ namespace Apache.Arrow.Adbc.Drivers.Databricks
/// </summary>
internal long MaxBytesPerFile => _maxBytesPerFile;
+ /// <summary>
+ /// Gets the default namespace to use for SQL queries.
+ /// </summary>
+ internal TNamespace? DefaultNamespace => _defaultNamespace;
+
/// <summary>
/// Gets a value indicating whether to retry requests that receive a
503 response with a Retry-After header.
/// </summary>
@@ -285,6 +312,12 @@ namespace Apache.Arrow.Adbc.Drivers.Databricks
CanUseMultipleCatalogs = true,
};
+ // Set default namespace if available
+ if (_defaultNamespace != null)
+ {
+ req.InitialNamespace = _defaultNamespace;
+ }
+
// If not using queries to set server-side properties, include
them in Configuration
if (!_applySSPWithQueries)
{
diff --git a/csharp/src/Drivers/Databricks/readme.md
b/csharp/src/Drivers/Databricks/readme.md
index 6a0af0c4c..d0d1c2f49 100644
--- a/csharp/src/Drivers/Databricks/readme.md
+++ b/csharp/src/Drivers/Databricks/readme.md
@@ -38,6 +38,8 @@ The Databricks ADBC driver supports the following
authentication methods:
Basic (username and password) authentication is not supported at this time.
+Optional default catalog and default schema can be set for the session with
`adbc.connection.catalog` and `adbc.connection.db_schema` (catalog must be set
if default schema is provided).
+
## Data Types
The following table depicts how the Databricks ADBC driver converts a
Databricks type to an Arrow type and a .NET type:
diff --git a/csharp/test/Drivers/Apache/ApacheTestConfiguration.cs
b/csharp/test/Drivers/Apache/ApacheTestConfiguration.cs
index 91366384f..47571194e 100644
--- a/csharp/test/Drivers/Apache/ApacheTestConfiguration.cs
+++ b/csharp/test/Drivers/Apache/ApacheTestConfiguration.cs
@@ -62,6 +62,12 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache
[JsonPropertyName("http_options"), JsonIgnore(Condition =
JsonIgnoreCondition.WhenWritingDefault)]
public HttpTestConfiguration? HttpOptions { get; set; }
+
+ [JsonPropertyName("catalog"), JsonIgnore(Condition =
JsonIgnoreCondition.WhenWritingDefault)]
+ public string Catalog { get; set; } = string.Empty;
+
+ [JsonPropertyName("db_schema"), JsonIgnore(Condition =
JsonIgnoreCondition.WhenWritingDefault)]
+ public string DbSchema { get; set; } = string.Empty;
}
public class HttpTestConfiguration
diff --git a/csharp/test/Drivers/Databricks/DatabricksConnectionTest.cs
b/csharp/test/Drivers/Databricks/DatabricksConnectionTest.cs
index 5c334957f..22a07f04a 100644
--- a/csharp/test/Drivers/Databricks/DatabricksConnectionTest.cs
+++ b/csharp/test/Drivers/Databricks/DatabricksConnectionTest.cs
@@ -15,14 +15,18 @@
* limitations under the License.
*/
-using System;
-using System.Collections.Generic;
-using System.Globalization;
-using System.Net;
+using Apache.Arrow.Adbc;
using Apache.Arrow.Adbc.Drivers.Apache;
using Apache.Arrow.Adbc.Drivers.Apache.Hive2;
using Apache.Arrow.Adbc.Drivers.Apache.Spark;
using Apache.Arrow.Adbc.Drivers.Databricks;
+using Apache.Hive.Service.Rpc.Thrift;
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Net;
+using System.Reflection;
+using System.Threading.Tasks;
using Thrift.Transport;
using Xunit;
using Xunit.Abstractions;
@@ -315,5 +319,78 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Databricks
Add(new(new() { [SparkParameters.Type] =
SparkServerTypeConstants.Http, [SparkParameters.HostName] = "valid.server.com",
[AdbcOptions.Username] = "user", [AdbcOptions.Password] = "myPassword",
[DatabricksParameters.TemporarilyUnavailableRetryTimeout] = "-1" },
typeof(ArgumentOutOfRangeException)));
}
}
+
+ /// <summary>
+ /// Tests that default namespace is correctly stored in the connection
namespace.
+ /// </summary>
+ [SkippableFact]
+ internal void DefaultNamespaceStoredInConnection()
+ {
+ // Skip if default catalog or schema is not configured
+ Skip.If(string.IsNullOrEmpty(TestConfiguration.Catalog), "Default
catalog not configured");
+ Skip.If(string.IsNullOrEmpty(TestConfiguration.DbSchema), "Default
schema not configured");
+
+ // Act
+ using var connection = NewConnection();
+
+ // Assert
+ Assert.NotNull(connection);
+ Assert.IsType<DatabricksConnection>(connection);
+
+ var defaultNamespace =
((DatabricksConnection)connection).DefaultNamespace;
+ Assert.NotNull(defaultNamespace);
+ Assert.Equal(TestConfiguration.Catalog,
defaultNamespace.CatalogName);
+ Assert.Equal(TestConfiguration.DbSchema,
defaultNamespace.SchemaName);
+ }
+
+ [SkippableFact]
+ public async Task SetDefaultCatalogAndSchemaOptionsTest()
+ {
+ string? defaultCatalog = null;
+ string? defaultSchema = null;
+ if (!string.IsNullOrEmpty(TestConfiguration.Catalog))
+ {
+ defaultCatalog = TestConfiguration.Catalog;
+ }
+ else
+ {
+ Skip.If(true, "No catalog specified in environment variable
DATABRICKS_CATALOG or test configuration");
+ }
+
+ if (!string.IsNullOrEmpty(TestConfiguration.DbSchema))
+ {
+ defaultSchema = TestConfiguration.DbSchema;
+ }
+
+ AdbcConnection connection = NewConnection();
+ AdbcStatement statement = connection.CreateStatement();
+
+ // Verify the settings were applied by querying the current
catalog and schema
+ statement.SqlQuery = "SELECT current_catalog()" + (defaultSchema
!= null ? ", current_schema()" : "");
+ var result = await statement.ExecuteQueryAsync();
+ Assert.NotNull(result.Stream);
+
+ var batch = await result.Stream.ReadNextRecordBatchAsync();
+ Assert.NotNull(batch);
+ Assert.Equal(1, batch.Length);
+ Assert.Equal(defaultSchema != null ? 2 : 1, batch.ColumnCount);
+
+ // Get the values from the result
+ var catalogArray = (StringArray)batch.Column(0);
+
+ string actualCatalog = catalogArray.GetString(0) ?? string.Empty;
+
+ // Verify the values match what we set
+ Assert.Equal(defaultCatalog, actualCatalog);
+
+ if (defaultSchema != null)
+ {
+ var schemaArray = (StringArray)batch.Column(1);
+ string actualSchema = schemaArray.GetString(0) ?? string.Empty;
+ Assert.Equal(defaultSchema, actualSchema);
+ }
+
+ OutputHelper?.WriteLine($"Successfully set and verified default
catalog: {defaultCatalog} and schema: {defaultSchema}");
+ }
}
}
diff --git a/csharp/test/Drivers/Databricks/DatabricksTestEnvironment.cs
b/csharp/test/Drivers/Databricks/DatabricksTestEnvironment.cs
index 89f87a9f8..ff46a21e1 100644
--- a/csharp/test/Drivers/Databricks/DatabricksTestEnvironment.cs
+++ b/csharp/test/Drivers/Databricks/DatabricksTestEnvironment.cs
@@ -109,6 +109,14 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Databricks
{
parameters.Add(DatabricksParameters.OAuthScope,
testConfiguration.OAuthScope!);
}
+ if (!string.IsNullOrEmpty(testConfiguration.Catalog))
+ {
+ parameters.Add(AdbcOptions.Connection.CurrentCatalog,
testConfiguration.Catalog!);
+ }
+ if (!string.IsNullOrEmpty(testConfiguration.DbSchema))
+ {
+ parameters.Add(AdbcOptions.Connection.CurrentDbSchema,
testConfiguration.DbSchema!);
+ }
if (!string.IsNullOrEmpty(testConfiguration.Type))
{
parameters.Add(SparkParameters.Type, testConfiguration.Type!);