CurtHagenlocher commented on code in PR #2953:
URL: https://github.com/apache/arrow-adbc/pull/2953#discussion_r2157139852
##########
csharp/src/Drivers/Databricks/DatabricksConnection.cs:
##########
@@ -222,6 +235,12 @@ private void ValidateProperties()
/// </summary>
internal bool EnableMultipleCatalogSupport =>
_enableMultipleCatalogSupport;
+ /// <summary>
+ /// Check if current connection can use `DESC TABLE EXTENDED` query
+ /// </summary>
+ internal bool CanUseDescTableExtended => _useDescTableExtended &&
ServerProtocolVersion != null && ServerProtocolVersion >=
TProtocolVersion.SPARK_CLI_SERVICE_PROTOCOL_V7;
+
+
Review Comment:
nit: extra blank line
##########
csharp/src/Drivers/Apache/Hive2/HiveServer2Statement.cs:
##########
@@ -769,9 +769,11 @@ private QueryResult
CreateEmptyExtendedColumnsResult(Schema baseSchema)
// Add FK fields
foreach (var field in ForeignKeyFields)
{
- allFields.Add(new Field(ForeignKeyPrefix + field,
StringType.Default, true));
+ IArrowType fieldType = field != "KEQ_SEQ" ? StringType.Default
: Int16Type.Default;
+ allFields.Add(new Field(ForeignKeyPrefix + field, fieldType,
true));
}
+
Review Comment:
nit: extra blank line
##########
csharp/src/Drivers/Databricks/DatabricksStatement.cs:
##########
@@ -524,5 +509,313 @@ private QueryResult EmptyCrossReferenceResult()
return new QueryResult(0, new
HiveServer2Connection.HiveInfoArrowStream(schema, arrays));
}
+
+ protected override async Task<QueryResult>
GetColumnsExtendedAsync(CancellationToken cancellationToken = default)
+ {
+ string? fullTableName = BuildTableName();
+ var canUseDescTableExtended =
((DatabricksConnection)Connection).CanUseDescTableExtended;
+
+ if (!canUseDescTableExtended ||
string.IsNullOrEmpty(fullTableName))
+ {
+ // When fullTableName is empty, we cannot use metadata SQL
query to get the info,
+ // so fallback to base class implementation
+ return await base.GetColumnsExtendedAsync(cancellationToken);
+ }
+
+ string query = $"DESC TABLE EXTENDED {fullTableName} AS JSON";
+ using var descStmt = Connection.CreateStatement();
+ descStmt.SqlQuery = query;
+ QueryResult descResult;
+
+ try
+ {
+ descResult = await descStmt.ExecuteQueryAsync();
+ }
+ catch (HiveServer2Exception ex) when (ex.Message.Contains("Error
running query"))
+ {
+ // Fallback to base implementation
+ Debug.WriteLine($"[ERROR] Failed to run {query}. Fallback to
base::GetColumnsExtendedAsync.Error message:{ex.Message}");
+ return await base.GetColumnsExtendedAsync(cancellationToken);
+ }
+
+ var columnMetadataSchema = CreateColumnMetadataSchema();
+
+ if (descResult.Stream == null)
+ {
+ return CreateEmptyExtendedColumnsResult(columnMetadataSchema);
+ }
+
+ // Read the json result
+ var resultJson = "";
+ using (var stream = descResult.Stream)
+ {
+ var batch = await
stream.ReadNextRecordBatchAsync(cancellationToken);
+ if (batch == null || batch.Length == 0)
+ {
+ return
CreateEmptyExtendedColumnsResult(columnMetadataSchema);
+ }
+
+ resultJson = ((StringArray)batch.Column(0)).GetString(0);
+ }
+
+ // Parse the JSON result
+ var result =
JsonSerializer.Deserialize<DescTableExtendedResult>(resultJson);
+ if (result == null)
+ {
+ throw new FormatException($"Invalid json result of
{query}.Result={resultJson}");
+ }
+ return CreateExtendedColumnsResult(columnMetadataSchema,result);
+ }
+
+
Review Comment:
nit: extra blank line
##########
csharp/src/Drivers/Databricks/DatabricksStatement.cs:
##########
@@ -190,6 +194,39 @@ private void HandleSparkCatalog()
CatalogName = DatabricksConnection.HandleSparkCatalog(CatalogName);
}
+ /// <summary>
+ /// Helper method that returns the fully qualified table name enclosed
by backtick.
+ /// The returned value can be used as table name in the SQL statement
+ ///
+ /// If only SchemaName is defined, it will return
`SchemaName`.`TableName`
+ /// If both CatalogName and SchemaName are defined, it will return
`CatalogName`.`SchenaName`.`TableName`
+ /// </summary>
+ protected string? BuildTableName()
+ {
+ if (string.IsNullOrEmpty(TableName))
+ {
+ return TableName;
+ }
+
+ var parts = new List<string>();
+
+ if (!string.IsNullOrEmpty(SchemaName))
+ {
+ // Only include CatalogName when SchemaName is defined
+ if (!string.IsNullOrEmpty(CatalogName) &&
!CatalogName!.Equals("SPARK", StringComparison.OrdinalIgnoreCase))
+ {
+ parts.Add($"`{CatalogName.Replace("`", "``")}`");
+ }
+ parts.Add($"`{SchemaName!.Replace("`", "``")}`");
+ }
+
+ // Escape if TableName contains backtick
+ parts.Add($"`{TableName!.Replace("`", "``")}`");
+
+ return string.Join(".", parts);
+ }
+
+
Review Comment:
nit: extra blank line
##########
csharp/src/Drivers/Databricks/Result/DescTableExtendedResult.cs:
##########
@@ -0,0 +1,566 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+using System;
+using System.Collections.Generic;
+using System.Net.Http.Headers;
+using System.Text.Json.Serialization;
+using System.Text.RegularExpressions;
+using static Apache.Arrow.Adbc.Drivers.Apache.Hive2.HiveServer2Connection;
+
+
+namespace Apache.Arrow.Adbc.Drivers.Databricks.Result
+{
+ /// <summary>
+ /// The response of SQL `DESC EXTENDED TABLE <table_name> AS JSON`
+ ///
+ /// See
https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-syntax-aux-describe-table#json-formatted-output
+ /// </summary>
+ internal class DescTableExtendedResult
+ {
+
+ [JsonPropertyName("table_name")]
+ public string TableName { get; set; } = String.Empty;
+
+ [JsonPropertyName("catalog_name")]
+ public string CatalogName { get; set; } = String.Empty;
+
+ [JsonPropertyName("schema_name")]
+ public string SchemaName { get; set; } = String.Empty;
+
+ [JsonPropertyName("type")]
+ public string Type { get; set; } = String.Empty;
+
+ [JsonPropertyName("columns")]
+ public List<ColumnInfo> Columns { get; set; } = new List<ColumnInfo>();
+
+ [JsonPropertyName("table_properties")]
+ public Dictionary<string, string> TableProperties { get; set; } = new
Dictionary<string, string>();
+
+ /// <summary>
+ /// Table constraints in a string format, e.g.
+ ///
+ /// "[ (pk_constraint, PRIMARY KEY (`col1`, `col2`)),
+ /// (fk_constraint, FOREIGN KEY (`col3`) REFERENCES
`catalog`.`schema`.`table` (`refcol1`, `refcol2`))
+ /// ]"
+ /// </summary>
+ [JsonPropertyName("table_constraints")]
+ public string? TableConstraints { get; set; }
+
+ internal class ColumnInfo
+ {
+ [JsonPropertyName("name")]
+ public string Name { get; set; } = String.Empty;
+
+ [JsonPropertyName("type")]
+ public ColumnType Type { get; set; } = new ColumnType();
+
+ [JsonPropertyName("comment")]
+ public string? Comment { get; set; }
+
+ [JsonPropertyName("nullable")]
+ public bool Nullable { get; set; } = true;
+
+ /// <summary>
+ /// Get the data type based on the type `Type.Name`
+ ///
+ /// See the list of type names from
https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-datatypes
+ /// </summary>
+ [JsonIgnore]
+ public ColumnTypeId DataType
+ {
+ get
+ {
+ string normalizedTypeName = Type.Name.Trim().ToUpper();
+
+ return normalizedTypeName switch
+ {
+ "BOOLEAN" => ColumnTypeId.BOOLEAN,
+ "TINYINT" or "BYTE" => ColumnTypeId.TINYINT,
+ "SMALLINT" or "SHORT" => ColumnTypeId.SMALLINT,
+ "INT" or "INTEGER" => ColumnTypeId.INTEGER,
+ "BIGINT" or "LONG" => ColumnTypeId.BIGINT,
+ "FLOAT" or "REAL" => ColumnTypeId.FLOAT,
+ "DOUBLE" => ColumnTypeId.DOUBLE,
+ "DECIMAL" or "NUMERIC" => ColumnTypeId.DECIMAL,
+
+ "CHAR" => ColumnTypeId.CHAR,
+ "STRING" or "VARCHAR" => ColumnTypeId.VARCHAR,
+ "BINARY" => ColumnTypeId.BINARY,
+
+ "TIMESTAMP" => ColumnTypeId.TIMESTAMP,
+ "TIMESTAMP_LTZ" => ColumnTypeId.TIMESTAMP,
+ "TIMESTAMP_NTZ" => ColumnTypeId.TIMESTAMP,
+ "DATE" => ColumnTypeId.DATE,
+
+ "ARRAY" => ColumnTypeId.ARRAY,
+ "MAP" => ColumnTypeId.JAVA_OBJECT,
+ "STRUCT" => ColumnTypeId.STRUCT,
+ "INTERVAL" => ColumnTypeId.OTHER, // Intervals don't
have a direct JDBC mapping
+ "VOID" => ColumnTypeId.NULL,
+ "VARIANT" => ColumnTypeId.OTHER,
+ _ => ColumnTypeId.OTHER // Default fallback for
unknown types
+ };
+ }
+ }
+
+ [JsonIgnore]
+ public bool IsNumber
+ {
+ get
+ {
+ return DataType switch
+ {
+ ColumnTypeId.TINYINT or ColumnTypeId.SMALLINT or
ColumnTypeId.INTEGER or
+ ColumnTypeId.BIGINT or ColumnTypeId.FLOAT or
ColumnTypeId.DOUBLE or
+ ColumnTypeId.DECIMAL or ColumnTypeId.NUMERIC => true,
+ _ => false
+ };
+ }
+ }
+
+ [JsonIgnore]
+ public int DecimalDigits
+ {
+ get
+ {
+ return DataType switch
+ {
+ ColumnTypeId.DECIMAL or ColumnTypeId.NUMERIC =>
Type.Scale ?? 0,
+ ColumnTypeId.DOUBLE => 15,
+ ColumnTypeId.FLOAT or ColumnTypeId.REAL => 7,
+ ColumnTypeId.TIMESTAMP => 6,
+ _ => 0
+ };
+ }
+ }
+
+ /// <summary>
+ /// Get column size
+ ///
+ /// Currently the query `DESC TABLE EXTNEDED AS JSON` does not
return the column size,
+ /// we can calculate it based on the data type and some type
specific properties
+ /// </summary>
+ [JsonIgnore]
+ public int? ColumnSize
+ {
+ get
+ {
+ return DataType switch
+ {
+ ColumnTypeId.TINYINT or ColumnTypeId.BOOLEAN => 1,
+ ColumnTypeId.SMALLINT => 2,
+ ColumnTypeId.INTEGER or ColumnTypeId.FLOAT or
ColumnTypeId.DATE => 4,
+ ColumnTypeId.BIGINT or ColumnTypeId.DOUBLE or
ColumnTypeId.TIMESTAMP or ColumnTypeId.TIMESTAMP_WITH_TIMEZONE => 8,
+ ColumnTypeId.CHAR => Type.Length,
+ ColumnTypeId.VARCHAR => Type.Name.Trim().ToUpper() ==
"STRING" ? int.MaxValue: Type.Length,
+ ColumnTypeId.DECIMAL => Type.Precision ?? 0,
+ ColumnTypeId.NULL => 1,
+ _ => Type.Name.Trim().ToUpper() == "INTERVAL" ?
getIntervalSize() : 0
+ };
+ }
+ }
+
+ private int getIntervalSize()
Review Comment:
```suggestion
private int GetIntervalSize()
```
##########
csharp/src/Drivers/Databricks/Result/DescTableExtendedResult.cs:
##########
@@ -0,0 +1,566 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+using System;
+using System.Collections.Generic;
+using System.Net.Http.Headers;
+using System.Text.Json.Serialization;
+using System.Text.RegularExpressions;
+using static Apache.Arrow.Adbc.Drivers.Apache.Hive2.HiveServer2Connection;
+
+
+namespace Apache.Arrow.Adbc.Drivers.Databricks.Result
+{
+ /// <summary>
+ /// The response of SQL `DESC EXTENDED TABLE <table_name> AS JSON`
+ ///
+ /// See
https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-syntax-aux-describe-table#json-formatted-output
+ /// </summary>
+ internal class DescTableExtendedResult
+ {
+
+ [JsonPropertyName("table_name")]
+ public string TableName { get; set; } = String.Empty;
+
+ [JsonPropertyName("catalog_name")]
+ public string CatalogName { get; set; } = String.Empty;
+
+ [JsonPropertyName("schema_name")]
+ public string SchemaName { get; set; } = String.Empty;
+
+ [JsonPropertyName("type")]
+ public string Type { get; set; } = String.Empty;
+
+ [JsonPropertyName("columns")]
+ public List<ColumnInfo> Columns { get; set; } = new List<ColumnInfo>();
+
+ [JsonPropertyName("table_properties")]
+ public Dictionary<string, string> TableProperties { get; set; } = new
Dictionary<string, string>();
+
+ /// <summary>
+ /// Table constraints in a string format, e.g.
+ ///
+ /// "[ (pk_constraint, PRIMARY KEY (`col1`, `col2`)),
+ /// (fk_constraint, FOREIGN KEY (`col3`) REFERENCES
`catalog`.`schema`.`table` (`refcol1`, `refcol2`))
+ /// ]"
+ /// </summary>
+ [JsonPropertyName("table_constraints")]
+ public string? TableConstraints { get; set; }
+
+ internal class ColumnInfo
+ {
+ [JsonPropertyName("name")]
+ public string Name { get; set; } = String.Empty;
+
+ [JsonPropertyName("type")]
+ public ColumnType Type { get; set; } = new ColumnType();
+
+ [JsonPropertyName("comment")]
+ public string? Comment { get; set; }
+
+ [JsonPropertyName("nullable")]
+ public bool Nullable { get; set; } = true;
+
+ /// <summary>
+ /// Get the data type based on the type `Type.Name`
+ ///
+ /// See the list of type names from
https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-datatypes
+ /// </summary>
+ [JsonIgnore]
+ public ColumnTypeId DataType
+ {
+ get
+ {
+ string normalizedTypeName = Type.Name.Trim().ToUpper();
+
+ return normalizedTypeName switch
+ {
+ "BOOLEAN" => ColumnTypeId.BOOLEAN,
+ "TINYINT" or "BYTE" => ColumnTypeId.TINYINT,
+ "SMALLINT" or "SHORT" => ColumnTypeId.SMALLINT,
+ "INT" or "INTEGER" => ColumnTypeId.INTEGER,
+ "BIGINT" or "LONG" => ColumnTypeId.BIGINT,
+ "FLOAT" or "REAL" => ColumnTypeId.FLOAT,
+ "DOUBLE" => ColumnTypeId.DOUBLE,
+ "DECIMAL" or "NUMERIC" => ColumnTypeId.DECIMAL,
+
+ "CHAR" => ColumnTypeId.CHAR,
+ "STRING" or "VARCHAR" => ColumnTypeId.VARCHAR,
+ "BINARY" => ColumnTypeId.BINARY,
+
+ "TIMESTAMP" => ColumnTypeId.TIMESTAMP,
+ "TIMESTAMP_LTZ" => ColumnTypeId.TIMESTAMP,
+ "TIMESTAMP_NTZ" => ColumnTypeId.TIMESTAMP,
+ "DATE" => ColumnTypeId.DATE,
+
+ "ARRAY" => ColumnTypeId.ARRAY,
+ "MAP" => ColumnTypeId.JAVA_OBJECT,
+ "STRUCT" => ColumnTypeId.STRUCT,
+ "INTERVAL" => ColumnTypeId.OTHER, // Intervals don't
have a direct JDBC mapping
+ "VOID" => ColumnTypeId.NULL,
+ "VARIANT" => ColumnTypeId.OTHER,
+ _ => ColumnTypeId.OTHER // Default fallback for
unknown types
+ };
+ }
+ }
+
+ [JsonIgnore]
+ public bool IsNumber
+ {
+ get
+ {
+ return DataType switch
+ {
+ ColumnTypeId.TINYINT or ColumnTypeId.SMALLINT or
ColumnTypeId.INTEGER or
+ ColumnTypeId.BIGINT or ColumnTypeId.FLOAT or
ColumnTypeId.DOUBLE or
+ ColumnTypeId.DECIMAL or ColumnTypeId.NUMERIC => true,
+ _ => false
+ };
+ }
+ }
+
+ [JsonIgnore]
+ public int DecimalDigits
+ {
+ get
+ {
+ return DataType switch
+ {
+ ColumnTypeId.DECIMAL or ColumnTypeId.NUMERIC =>
Type.Scale ?? 0,
+ ColumnTypeId.DOUBLE => 15,
+ ColumnTypeId.FLOAT or ColumnTypeId.REAL => 7,
+ ColumnTypeId.TIMESTAMP => 6,
+ _ => 0
+ };
+ }
+ }
+
+ /// <summary>
+ /// Get column size
+ ///
+ /// Currently the query `DESC TABLE EXTNEDED AS JSON` does not
return the column size,
+ /// we can calculate it based on the data type and some type
specific properties
+ /// </summary>
+ [JsonIgnore]
+ public int? ColumnSize
+ {
+ get
+ {
+ return DataType switch
+ {
+ ColumnTypeId.TINYINT or ColumnTypeId.BOOLEAN => 1,
+ ColumnTypeId.SMALLINT => 2,
+ ColumnTypeId.INTEGER or ColumnTypeId.FLOAT or
ColumnTypeId.DATE => 4,
+ ColumnTypeId.BIGINT or ColumnTypeId.DOUBLE or
ColumnTypeId.TIMESTAMP or ColumnTypeId.TIMESTAMP_WITH_TIMEZONE => 8,
+ ColumnTypeId.CHAR => Type.Length,
+ ColumnTypeId.VARCHAR => Type.Name.Trim().ToUpper() ==
"STRING" ? int.MaxValue: Type.Length,
+ ColumnTypeId.DECIMAL => Type.Precision ?? 0,
+ ColumnTypeId.NULL => 1,
+ _ => Type.Name.Trim().ToUpper() == "INTERVAL" ?
getIntervalSize() : 0
+ };
+ }
+ }
+
+ private int getIntervalSize()
+ {
+ if (String.IsNullOrEmpty(Type.StartUnit))
+ {
+ return 0;
+ }
+
+ // Check whether interval is yearMonthIntervalQualifier or
dayTimeIntervalQualifier
+ // yearMonthIntervalQualifier size is 4,
dayTimeIntervalQualifier size is 8
+ // see
https://docs.databricks.com/aws/en/sql/language-manual/data-types/interval-type
+ return Type.StartUnit!.ToUpper() switch
+ {
+ "YEAR" or "MONTH" => 4,
+ "DAY" or "HOUR" or "MINUTE" or "SECOND" => 8,
+ _ => 4
+ };
+ }
+ }
+
+ public class ForeignKeyInfo
+ {
+ public string KeyName { get; set; } = string.Empty;
+ public List<string> LocalColumns { get; set; } = new
List<string>();
+ public List<string> RefColumns { get; set; } = new List<string>();
+ public string RefCatalog { get; set; } = string.Empty;
+ public string RefSchema { get; set; } = string.Empty;
+ public string RefTable { get; set; } = string.Empty;
+ }
+
+ internal class ColumnType
+ {
+ // Here the name is the base type e.g. it is DECIMAL if column
type is defined as decimal(10,2)
+ [JsonPropertyName("name")]
+ public string Name { get; set; } = String.Empty;
+
+ /// <summary>
+ /// Precision for DECIMAL type, only for DECIMAL and NUMERIC types
+ /// </summary>
+ [JsonPropertyName("precision")]
+ public int? Precision { get; set; }
+
+ /// <summary>
+ /// Scale for DECIMAL type, only for DECIMAL and NUMERIC types
+ /// </summary>
+ [JsonPropertyName("scale")]
+ public int? Scale { get; set; }
+
+ /// <summary>
+ /// Element type for ARRAY type, only for ARRAY type
+ /// </summary>
+ [JsonPropertyName("element_type")]
+ public ColumnType? ElementType { get; set; }
+
+ /// <summary>
+ /// Key and value types for MAP type, only for MAP type
+ /// </summary>
+ [JsonPropertyName("key_type")]
+ public ColumnType? KeyType { get; set; }
+
+ /// <summary>
+ /// Value type for MAP type, only for MAP type
+ /// </summary>
+ [JsonPropertyName("value_type")]
+ public ColumnType? ValueType { get; set; }
+
+ /// <summary>
+ /// Fields for STRUCT type, only for STRUCT type
+ /// </summary>
+ [JsonPropertyName("fields")]
+ public List<ColumnInfo>? Fields { get; set; }
+
+ /// <summary>
+ /// Interval start and end units, only for INTERVAL type
+ /// </summary>
+ [JsonPropertyName("start_unit")]
+ public string? StartUnit { get; set; }
+
+ /// <summary>
+ /// Interval start and end units, only for INTERVAL type
+ /// </summary>
+ [JsonPropertyName("end_unit")]
+ public string? EndUnit { get; set; }
+
+ /// <summary>
+ /// Length of characters, only for character types (CHAR, VARCHAR)
+ /// </summary>
+ [JsonPropertyName("length")]
+ public int? Length {get; set; }
+
+ /// <summary>
+ /// Get the full type name e.g. DECIMAL(10,2), map<string,int>
+ /// </summary>
+ [JsonIgnore]
+ public string FullTypeName
+ {
+ get
+ {
+ string normalizedTypeName = Name.Trim().ToUpper();
+
+ return normalizedTypeName switch
+ {
+ "CHAR" or "VARCHAR" => $"{normalizedTypeName}({Length
?? 1})",
+ "DECIMAL" or "NUMERIC" => Precision != null ?
$"{normalizedTypeName}({Precision},{Scale ?? 0})" : normalizedTypeName,
+ "ARRAY" => ElementType != null ?
$"ARRAY<{ElementType.FullTypeName}>" : "ARRAY<>",
+ "MAP" => (KeyType != null && ValueType != null) ?
$"MAP<{KeyType!.FullTypeName}, {ValueType!.FullTypeName}>":"Map<>",
+ "STRUCT" => BuildStructTypeName(),
+ "INTERVAL" => (StartUnit != null && EndUnit != null) ?
$"INTERVAL {StartUnit.ToUpper()} TO {EndUnit.ToUpper()}": "INTERVAL",
+ "TIMESTAMP_LTZ" => "TIMESTAMP",
+ _ => normalizedTypeName
+ };
+ }
+ }
+
+ private string BuildStructTypeName()
+ {
+ if (Fields == null || Fields.Count == 0)
+ {
+ return "STRUCT<>";
+ }
+
+ var fieldTypes = new List<string>();
+ foreach (var field in Fields)
+ {
+ fieldTypes.Add($"{field.Name}: {field.Type.FullTypeName}");
+ }
+
+ return $"STRUCT<{string.Join(", ", fieldTypes)}>";
+ }
+ }
+
+
+ private bool _hasConstraintsParsed = false;
+
+ private List<string> _primaryKeys = new();
+ private List<ForeignKeyInfo> _foreignKeys = new();
+
+ [JsonIgnore]
+ public List<string> PrimaryKeys
+ {
+ get
+ {
+ parseConstraints();
+ return _primaryKeys;
+ }
+ }
+
+ [JsonIgnore]
+ public List<ForeignKeyInfo> ForeignKeys
+ {
+ get
+ {
+ parseConstraints();
+ return _foreignKeys;
+ }
+ }
+
+ private void parseConstraints()
Review Comment:
```suggestion
private void ParseConstraints()
```
##########
csharp/src/Drivers/Databricks/Result/DescTableExtendedResult.cs:
##########
@@ -0,0 +1,566 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+using System;
+using System.Collections.Generic;
+using System.Net.Http.Headers;
+using System.Text.Json.Serialization;
+using System.Text.RegularExpressions;
+using static Apache.Arrow.Adbc.Drivers.Apache.Hive2.HiveServer2Connection;
+
+
Review Comment:
nit: extra blank lines here and at line 35
##########
csharp/src/Drivers/Databricks/DatabricksStatement.cs:
##########
@@ -524,5 +509,313 @@ private QueryResult EmptyCrossReferenceResult()
return new QueryResult(0, new
HiveServer2Connection.HiveInfoArrowStream(schema, arrays));
}
+
+ protected override async Task<QueryResult>
GetColumnsExtendedAsync(CancellationToken cancellationToken = default)
+ {
+ string? fullTableName = BuildTableName();
+ var canUseDescTableExtended =
((DatabricksConnection)Connection).CanUseDescTableExtended;
+
+ if (!canUseDescTableExtended ||
string.IsNullOrEmpty(fullTableName))
+ {
+ // When fullTableName is empty, we cannot use metadata SQL
query to get the info,
+ // so fallback to base class implementation
+ return await base.GetColumnsExtendedAsync(cancellationToken);
+ }
+
+ string query = $"DESC TABLE EXTENDED {fullTableName} AS JSON";
+ using var descStmt = Connection.CreateStatement();
+ descStmt.SqlQuery = query;
+ QueryResult descResult;
+
+ try
+ {
+ descResult = await descStmt.ExecuteQueryAsync();
+ }
+ catch (HiveServer2Exception ex) when (ex.Message.Contains("Error
running query"))
+ {
+ // Fallback to base implementation
+ Debug.WriteLine($"[ERROR] Failed to run {query}. Fallback to
base::GetColumnsExtendedAsync.Error message:{ex.Message}");
+ return await base.GetColumnsExtendedAsync(cancellationToken);
+ }
+
+ var columnMetadataSchema = CreateColumnMetadataSchema();
+
+ if (descResult.Stream == null)
+ {
+ return CreateEmptyExtendedColumnsResult(columnMetadataSchema);
+ }
+
+ // Read the json result
+ var resultJson = "";
+ using (var stream = descResult.Stream)
+ {
+ var batch = await
stream.ReadNextRecordBatchAsync(cancellationToken);
+ if (batch == null || batch.Length == 0)
+ {
+ return
CreateEmptyExtendedColumnsResult(columnMetadataSchema);
+ }
+
+ resultJson = ((StringArray)batch.Column(0)).GetString(0);
+ }
+
+ // Parse the JSON result
+ var result =
JsonSerializer.Deserialize<DescTableExtendedResult>(resultJson);
+ if (result == null)
+ {
+ throw new FormatException($"Invalid json result of
{query}.Result={resultJson}");
+ }
+ return CreateExtendedColumnsResult(columnMetadataSchema,result);
+ }
+
+
+ /// <summary>
+ /// Creates the schema for the column metadata result set.
+ /// This schema is used for the GetColumns metadata query.
+ /// </summary>
+ private static Schema CreateColumnMetadataSchema()
+ {
+ var fields = new[]
+ {
+ new Field("TABLE_CAT", StringType.Default, true),
+ new Field("TABLE_SCHEM", StringType.Default, true),
+ new Field("TABLE_NAME", StringType.Default, true),
+ new Field("COLUMN_NAME", StringType.Default, true),
+ new Field("DATA_TYPE", Int32Type.Default, true),
+ new Field("TYPE_NAME", StringType.Default, true),
+ new Field("COLUMN_SIZE", Int32Type.Default, true),
+ new Field("BUFFER_LENGTH", Int8Type.Default, true),
+ new Field("DECIMAL_DIGITS", Int32Type.Default, true),
+ new Field("NUM_PREC_RADIX", Int32Type.Default, true),
+ new Field("NULLABLE", Int32Type.Default, true),
+ new Field("REMARKS", StringType.Default, true),
+ new Field("COLUMN_DEF", StringType.Default, true),
+ new Field("SQL_DATA_TYPE", Int32Type.Default, true),
+ new Field("SQL_DATETIME_SUB", Int32Type.Default, true),
+ new Field("CHAR_OCTET_LENGTH", Int32Type.Default, true),
+ new Field("ORDINAL_POSITION", Int32Type.Default, true),
+ new Field("IS_NULLABLE", StringType.Default, true),
+ new Field("SCOPE_CATALOG", StringType.Default, true),
+ new Field("SCOPE_SCHEMA", StringType.Default, true),
+ new Field("SCOPE_TABLE", StringType.Default, true),
+ new Field("SOURCE_DATA_TYPE", Int16Type.Default, true),
+ new Field("IS_AUTO_INCREMENT", StringType.Default, true),
+ new Field("BASE_TYPE_NAME", StringType.Default, true)
+ };
+ return new Schema(fields, null);
+ }
+
+ /// <summary>
+ /// Creates an empty array for each column in the column metadata
schema.
+ /// </summary>
+ private static IArrowArray[] CreateColumnMetadataEmptyArray()
+ {
+ return
+ [
+ new StringArray.Builder().Build(), // TABLE_CAT
+ new StringArray.Builder().Build(), // TABLE_SCHEM
+ new StringArray.Builder().Build(), // TABLE_NAME
+ new StringArray.Builder().Build(), // COLUMN_NAME
+ new Int32Array.Builder().Build(), // DATA_TYPE
+ new StringArray.Builder().Build(), // TYPE_NAME
+ new Int32Array.Builder().Build(), // COLUMN_SIZE
+ new Int8Array.Builder().Build(), // BUFFER_LENGTH
+ new Int32Array.Builder().Build(), // DECIMAL_DIGITS
+ new Int32Array.Builder().Build(), // NUM_PREC_RADIX
+ new Int32Array.Builder().Build(), // NULLABLE
+ new StringArray.Builder().Build(), // REMARKS
+ new StringArray.Builder().Build(), // COLUMN_DEF
+ new Int32Array.Builder().Build(), // SQL_DATA_TYPE
+ new Int32Array.Builder().Build(), // SQL_DATETIME_SUB
+ new Int32Array.Builder().Build(), // CHAR_OCTET_LENGTH
+ new Int32Array.Builder().Build(), // ORDINAL_POSITION
+ new StringArray.Builder().Build(), // IS_NULLABLE
+ new StringArray.Builder().Build(), // SCOPE_CATALOG
+ new StringArray.Builder().Build(), // SCOPE_SCHEMA
+ new StringArray.Builder().Build(), // SCOPE_TABLE
+ new Int16Array.Builder().Build(), // SOURCE_DATA_TYPE
+ new StringArray.Builder().Build(), // IS_AUTO_INCREMENT
+ new StringArray.Builder().Build() // BASE_TYPE_NAME
+ ];
+ }
+
+
Review Comment:
nit: extra blank line
##########
csharp/src/Drivers/Databricks/Result/DescTableExtendedResult.cs:
##########
@@ -0,0 +1,566 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+using System;
+using System.Collections.Generic;
+using System.Net.Http.Headers;
+using System.Text.Json.Serialization;
+using System.Text.RegularExpressions;
+using static Apache.Arrow.Adbc.Drivers.Apache.Hive2.HiveServer2Connection;
+
+
+namespace Apache.Arrow.Adbc.Drivers.Databricks.Result
+{
+ /// <summary>
+ /// The response of SQL `DESC EXTENDED TABLE <table_name> AS JSON`
+ ///
+ /// See
https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-syntax-aux-describe-table#json-formatted-output
+ /// </summary>
+ internal class DescTableExtendedResult
+ {
+
+ [JsonPropertyName("table_name")]
+ public string TableName { get; set; } = String.Empty;
+
+ [JsonPropertyName("catalog_name")]
+ public string CatalogName { get; set; } = String.Empty;
+
+ [JsonPropertyName("schema_name")]
+ public string SchemaName { get; set; } = String.Empty;
+
+ [JsonPropertyName("type")]
+ public string Type { get; set; } = String.Empty;
+
+ [JsonPropertyName("columns")]
+ public List<ColumnInfo> Columns { get; set; } = new List<ColumnInfo>();
+
+ [JsonPropertyName("table_properties")]
+ public Dictionary<string, string> TableProperties { get; set; } = new
Dictionary<string, string>();
+
+ /// <summary>
+ /// Table constraints in a string format, e.g.
+ ///
+ /// "[ (pk_constraint, PRIMARY KEY (`col1`, `col2`)),
+ /// (fk_constraint, FOREIGN KEY (`col3`) REFERENCES
`catalog`.`schema`.`table` (`refcol1`, `refcol2`))
+ /// ]"
+ /// </summary>
+ [JsonPropertyName("table_constraints")]
+ public string? TableConstraints { get; set; }
+
+ internal class ColumnInfo
+ {
+ [JsonPropertyName("name")]
+ public string Name { get; set; } = String.Empty;
+
+ [JsonPropertyName("type")]
+ public ColumnType Type { get; set; } = new ColumnType();
+
+ [JsonPropertyName("comment")]
+ public string? Comment { get; set; }
+
+ [JsonPropertyName("nullable")]
+ public bool Nullable { get; set; } = true;
+
+ /// <summary>
+ /// Get the data type based on the type `Type.Name`
+ ///
+ /// See the list of type names from
https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-datatypes
+ /// </summary>
+ [JsonIgnore]
+ public ColumnTypeId DataType
+ {
+ get
+ {
+ string normalizedTypeName = Type.Name.Trim().ToUpper();
+
+ return normalizedTypeName switch
+ {
+ "BOOLEAN" => ColumnTypeId.BOOLEAN,
+ "TINYINT" or "BYTE" => ColumnTypeId.TINYINT,
+ "SMALLINT" or "SHORT" => ColumnTypeId.SMALLINT,
+ "INT" or "INTEGER" => ColumnTypeId.INTEGER,
+ "BIGINT" or "LONG" => ColumnTypeId.BIGINT,
+ "FLOAT" or "REAL" => ColumnTypeId.FLOAT,
+ "DOUBLE" => ColumnTypeId.DOUBLE,
+ "DECIMAL" or "NUMERIC" => ColumnTypeId.DECIMAL,
+
+ "CHAR" => ColumnTypeId.CHAR,
+ "STRING" or "VARCHAR" => ColumnTypeId.VARCHAR,
+ "BINARY" => ColumnTypeId.BINARY,
+
+ "TIMESTAMP" => ColumnTypeId.TIMESTAMP,
+ "TIMESTAMP_LTZ" => ColumnTypeId.TIMESTAMP,
+ "TIMESTAMP_NTZ" => ColumnTypeId.TIMESTAMP,
+ "DATE" => ColumnTypeId.DATE,
+
+ "ARRAY" => ColumnTypeId.ARRAY,
+ "MAP" => ColumnTypeId.JAVA_OBJECT,
+ "STRUCT" => ColumnTypeId.STRUCT,
+ "INTERVAL" => ColumnTypeId.OTHER, // Intervals don't
have a direct JDBC mapping
+ "VOID" => ColumnTypeId.NULL,
+ "VARIANT" => ColumnTypeId.OTHER,
+ _ => ColumnTypeId.OTHER // Default fallback for
unknown types
+ };
+ }
+ }
+
+ [JsonIgnore]
+ public bool IsNumber
+ {
+ get
+ {
+ return DataType switch
+ {
+ ColumnTypeId.TINYINT or ColumnTypeId.SMALLINT or
ColumnTypeId.INTEGER or
+ ColumnTypeId.BIGINT or ColumnTypeId.FLOAT or
ColumnTypeId.DOUBLE or
+ ColumnTypeId.DECIMAL or ColumnTypeId.NUMERIC => true,
+ _ => false
+ };
+ }
+ }
+
+ [JsonIgnore]
+ public int DecimalDigits
+ {
+ get
+ {
+ return DataType switch
+ {
+ ColumnTypeId.DECIMAL or ColumnTypeId.NUMERIC =>
Type.Scale ?? 0,
+ ColumnTypeId.DOUBLE => 15,
+ ColumnTypeId.FLOAT or ColumnTypeId.REAL => 7,
+ ColumnTypeId.TIMESTAMP => 6,
+ _ => 0
+ };
+ }
+ }
+
+ /// <summary>
+ /// Get column size
+ ///
+ /// Currently the query `DESC TABLE EXTNEDED AS JSON` does not
return the column size,
+ /// we can calculate it based on the data type and some type
specific properties
+ /// </summary>
+ [JsonIgnore]
+ public int? ColumnSize
+ {
+ get
+ {
+ return DataType switch
+ {
+ ColumnTypeId.TINYINT or ColumnTypeId.BOOLEAN => 1,
+ ColumnTypeId.SMALLINT => 2,
+ ColumnTypeId.INTEGER or ColumnTypeId.FLOAT or
ColumnTypeId.DATE => 4,
+ ColumnTypeId.BIGINT or ColumnTypeId.DOUBLE or
ColumnTypeId.TIMESTAMP or ColumnTypeId.TIMESTAMP_WITH_TIMEZONE => 8,
+ ColumnTypeId.CHAR => Type.Length,
+ ColumnTypeId.VARCHAR => Type.Name.Trim().ToUpper() ==
"STRING" ? int.MaxValue: Type.Length,
+ ColumnTypeId.DECIMAL => Type.Precision ?? 0,
+ ColumnTypeId.NULL => 1,
+ _ => Type.Name.Trim().ToUpper() == "INTERVAL" ?
getIntervalSize() : 0
+ };
+ }
+ }
+
+ private int getIntervalSize()
+ {
+ if (String.IsNullOrEmpty(Type.StartUnit))
+ {
+ return 0;
+ }
+
+ // Check whether interval is yearMonthIntervalQualifier or
dayTimeIntervalQualifier
+ // yearMonthIntervalQualifier size is 4,
dayTimeIntervalQualifier size is 8
+ // see
https://docs.databricks.com/aws/en/sql/language-manual/data-types/interval-type
+ return Type.StartUnit!.ToUpper() switch
+ {
+ "YEAR" or "MONTH" => 4,
+ "DAY" or "HOUR" or "MINUTE" or "SECOND" => 8,
+ _ => 4
+ };
+ }
+ }
+
+ public class ForeignKeyInfo
+ {
+ public string KeyName { get; set; } = string.Empty;
+ public List<string> LocalColumns { get; set; } = new
List<string>();
+ public List<string> RefColumns { get; set; } = new List<string>();
+ public string RefCatalog { get; set; } = string.Empty;
+ public string RefSchema { get; set; } = string.Empty;
+ public string RefTable { get; set; } = string.Empty;
+ }
+
+ internal class ColumnType
+ {
+ // Here the name is the base type e.g. it is DECIMAL if column
type is defined as decimal(10,2)
+ [JsonPropertyName("name")]
+ public string Name { get; set; } = String.Empty;
+
+ /// <summary>
+ /// Precision for DECIMAL type, only for DECIMAL and NUMERIC types
+ /// </summary>
+ [JsonPropertyName("precision")]
+ public int? Precision { get; set; }
+
+ /// <summary>
+ /// Scale for DECIMAL type, only for DECIMAL and NUMERIC types
+ /// </summary>
+ [JsonPropertyName("scale")]
+ public int? Scale { get; set; }
+
+ /// <summary>
+ /// Element type for ARRAY type, only for ARRAY type
+ /// </summary>
+ [JsonPropertyName("element_type")]
+ public ColumnType? ElementType { get; set; }
+
+ /// <summary>
+ /// Key and value types for MAP type, only for MAP type
+ /// </summary>
+ [JsonPropertyName("key_type")]
+ public ColumnType? KeyType { get; set; }
+
+ /// <summary>
+ /// Value type for MAP type, only for MAP type
+ /// </summary>
+ [JsonPropertyName("value_type")]
+ public ColumnType? ValueType { get; set; }
+
+ /// <summary>
+ /// Fields for STRUCT type, only for STRUCT type
+ /// </summary>
+ [JsonPropertyName("fields")]
+ public List<ColumnInfo>? Fields { get; set; }
+
+ /// <summary>
+ /// Interval start and end units, only for INTERVAL type
+ /// </summary>
+ [JsonPropertyName("start_unit")]
+ public string? StartUnit { get; set; }
+
+ /// <summary>
+ /// Interval start and end units, only for INTERVAL type
+ /// </summary>
+ [JsonPropertyName("end_unit")]
+ public string? EndUnit { get; set; }
+
+ /// <summary>
+ /// Length of characters, only for character types (CHAR, VARCHAR)
+ /// </summary>
+ [JsonPropertyName("length")]
+ public int? Length {get; set; }
+
+ /// <summary>
+ /// Get the full type name e.g. DECIMAL(10,2), map<string,int>
+ /// </summary>
+ [JsonIgnore]
+ public string FullTypeName
+ {
+ get
+ {
+ string normalizedTypeName = Name.Trim().ToUpper();
+
+ return normalizedTypeName switch
+ {
+ "CHAR" or "VARCHAR" => $"{normalizedTypeName}({Length
?? 1})",
+ "DECIMAL" or "NUMERIC" => Precision != null ?
$"{normalizedTypeName}({Precision},{Scale ?? 0})" : normalizedTypeName,
+ "ARRAY" => ElementType != null ?
$"ARRAY<{ElementType.FullTypeName}>" : "ARRAY<>",
+ "MAP" => (KeyType != null && ValueType != null) ?
$"MAP<{KeyType!.FullTypeName}, {ValueType!.FullTypeName}>":"Map<>",
+ "STRUCT" => BuildStructTypeName(),
+ "INTERVAL" => (StartUnit != null && EndUnit != null) ?
$"INTERVAL {StartUnit.ToUpper()} TO {EndUnit.ToUpper()}": "INTERVAL",
+ "TIMESTAMP_LTZ" => "TIMESTAMP",
+ _ => normalizedTypeName
+ };
+ }
+ }
+
+ private string BuildStructTypeName()
+ {
+ if (Fields == null || Fields.Count == 0)
+ {
+ return "STRUCT<>";
+ }
+
+ var fieldTypes = new List<string>();
+ foreach (var field in Fields)
+ {
+ fieldTypes.Add($"{field.Name}: {field.Type.FullTypeName}");
+ }
+
+ return $"STRUCT<{string.Join(", ", fieldTypes)}>";
+ }
+ }
+
+
+ private bool _hasConstraintsParsed = false;
+
+ private List<string> _primaryKeys = new();
+ private List<ForeignKeyInfo> _foreignKeys = new();
+
+ [JsonIgnore]
+ public List<string> PrimaryKeys
+ {
+ get
+ {
+ parseConstraints();
+ return _primaryKeys;
+ }
+ }
+
+ [JsonIgnore]
+ public List<ForeignKeyInfo> ForeignKeys
+ {
+ get
+ {
+ parseConstraints();
+ return _foreignKeys;
+ }
+ }
+
+ private void parseConstraints()
+ {
+ if (_hasConstraintsParsed)
+ return;
+
+ _hasConstraintsParsed = true;
+
+ if (TableConstraints == null || TableConstraints.Trim().Length ==
0)
+ return;
+
+ // Constraints string format example:
+ // "[ (pk_constraint, PRIMARY KEY (`col1`, `col2`)),
(fk_constraint, FOREIGN KEY (`col3`) REFERENCES `catalog`.`schema`.`table`
(`refcol1`, `refcol2`)) ]"
+
+ var constraintString = TableConstraints.Trim();
Review Comment:
when not null, we're trimming twice. Consider instead doing
```
var constraintString = TableConstraints?.Trim();
if (constraintString == null || constraintString.Length == 0)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]