CurtHagenlocher commented on code in PR #2152:
URL: https://github.com/apache/arrow-adbc/pull/2152#discussion_r1773465444
##########
csharp/src/Drivers/Apache/Hive2/HiveServer2Reader.cs:
##########
@@ -78,6 +105,55 @@ static IArrowArray GetArray(TColumn column)
(IArrowArray?)column.StringVal?.Values ??
(IArrowArray?)column.BinaryVal?.Values ??
throw new InvalidOperationException("unsupported data type");
+ if (expectedArrowType != null && arrowArray is StringArray
stringArray && s_arrowStringConverters.ContainsKey(expectedArrowType.TypeId))
+ {
+ // Perform a conversion from string to native/scalar type.
+ Func<StringArray, IArrowType, IArrowArray> converter =
s_arrowStringConverters[expectedArrowType.TypeId];
+ return converter(stringArray, expectedArrowType);
+ }
+ return arrowArray;
+ }
+
+ private static Date32Array ConvertToDate32(StringArray array,
IArrowType _)
+ {
+ var resultArray = new Date32Array.Builder();
+ foreach (string item in (IReadOnlyCollection<string>)array)
+ {
+ resultArray.Append(DateTime.Parse(item));
Review Comment:
Does the date come back in a predictable format which could be parsed more
efficiently than by DateTime.Parse? (e.g. if we know it always comes back as
yyyy-mm-dd?). DateTime.Parse handles lots of cases that we probably don't need
to support.
Otherwise, this should probably have CultureInfo.InvariantCulture passed to
it.
##########
csharp/src/Drivers/Apache/Hive2/HiveServer2Parameters.cs:
##########
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
+{
+ public static class HiveServer2DataTypeConversionConstants
Review Comment:
Are we really going to inflict this symbol on the public API? :(
##########
csharp/src/Drivers/Apache/Hive2/HiveServer2Reader.cs:
##########
@@ -78,6 +105,55 @@ static IArrowArray GetArray(TColumn column)
(IArrowArray?)column.StringVal?.Values ??
(IArrowArray?)column.BinaryVal?.Values ??
throw new InvalidOperationException("unsupported data type");
+ if (expectedArrowType != null && arrowArray is StringArray
stringArray && s_arrowStringConverters.ContainsKey(expectedArrowType.TypeId))
+ {
+ // Perform a conversion from string to native/scalar type.
+ Func<StringArray, IArrowType, IArrowArray> converter =
s_arrowStringConverters[expectedArrowType.TypeId];
+ return converter(stringArray, expectedArrowType);
+ }
+ return arrowArray;
+ }
+
+ private static Date32Array ConvertToDate32(StringArray array,
IArrowType _)
+ {
+ var resultArray = new Date32Array.Builder();
+ foreach (string item in (IReadOnlyCollection<string>)array)
+ {
+ resultArray.Append(DateTime.Parse(item));
+ }
+
+ return resultArray.Build();
+ }
+
+ private static Decimal128Array ConvertToDecimal128(StringArray array,
IArrowType schemaType)
+ {
+ // Using the schema type to get the precision and scale.
+ var resultArray = new
Decimal128Array.Builder((Decimal128Type)schemaType);
+ foreach (string item in (IReadOnlyList<string>)array)
+ {
+ // Trying to parse the value into a decimal to handle the
exponent syntax. But this might overflow.
+ if (decimal.TryParse(item, NumberStyles.Float,
CultureInfo.InvariantCulture, out decimal decimalValue))
+ {
+ resultArray.Append(new SqlDecimal(decimalValue));
+ }
+ else
+ {
+ resultArray.Append(item);
+ }
+ }
+ return resultArray.Build();
+ }
+
+ private static TimestampArray ConvertToTimestamp(StringArray array,
IArrowType _)
+ {
+ // Match the precision of the server
+ var resultArrayBuiilder = new
TimestampArray.Builder(TimeUnit.Microsecond);
+ foreach (string item in (IReadOnlyList<string>)array)
+ {
+ DateTimeOffset value = DateTimeOffset.Parse(item,
DateTimeFormatInfo.InvariantInfo, DateTimeStyles.AssumeUniversal);
Review Comment:
Same comment about the format; if we know that e.g. it's ISO-8601 then we
can parse much more cheaply.
##########
csharp/src/Drivers/Apache/Hive2/HiveServer2Reader.cs:
##########
@@ -78,6 +107,55 @@ static IArrowArray GetArray(TColumn column)
(IArrowArray?)column.StringVal?.Values ??
(IArrowArray?)column.BinaryVal?.Values ??
throw new InvalidOperationException("unsupported data type");
+ if (expectedArrowType != null && arrowArray is StringArray
stringArray && s_arrowStringConverters.ContainsKey(expectedArrowType.TypeId))
+ {
+ // Perform a conversion from string to native/scalar type.
+ Func<StringArray, IArrowType, IArrowArray> converter =
s_arrowStringConverters[expectedArrowType.TypeId];
+ return converter(stringArray, expectedArrowType);
+ }
+ return arrowArray;
+ }
+
+ private static Date32Array ConvertToDate32(StringArray array,
IArrowType _)
+ {
+ var resultArray = new Date32Array.Builder();
+ foreach (string item in (IReadOnlyCollection<string>)array)
+ {
+ resultArray.Append(DateTime.Parse(item));
+ }
+
+ return resultArray.Build();
+ }
+
+ private static Decimal128Array ConvertToDecimal128(StringArray array,
IArrowType schemaType)
+ {
+ // Using the schema type to get the precision and scale.
+ var resultArray = new
Decimal128Array.Builder((Decimal128Type)schemaType);
+ foreach (string item in (IReadOnlyList<string>)array)
+ {
+ // Trying to parse the value into a decimal to handle the
exponent syntax. But this might overflow.
+ if (decimal.TryParse(item, NumberStyles.Float,
CultureInfo.InvariantCulture, out decimal decimalValue))
+ {
+ resultArray.Append(new SqlDecimal(decimalValue));
+ }
+ else
+ {
+ resultArray.Append(item);
Review Comment:
Consider adding as a comment, though if there's a test looking for 'E' or
'e' I think that might be enough of a signal to explain why.
##########
csharp/src/Drivers/Apache/Impala/ImpalaParameters.cs:
##########
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Apache.Arrow.Adbc.Drivers.Apache.Impala
+{
+ /// <summary>
+ /// Parameters used for connecting to Impala data sources.
+ /// </summary>
+ public static class ImpalaParameters
+ {
+ public const string HostName = "adbc.impala.host";
+ public const string Port = "adbc.impala.port";
+ public const string Path = "adbc.impala.path";
+ public const string AuthType = "adbc.impala.auth_type";
+ public const string DataTypeConv = "adbc.impala.data_type_conv";
+ }
+
+ public static class ImpalaAuthTypeConstants
+ {
+ public const string None = "none";
+ public const string UsernameOnly = "username_only";
+ public const string Basic = "basic";
+
+ public static bool TryParse(string? authType, out ImpalaAuthType
authTypeValue)
+ {
+ switch (authType?.Trim().ToLowerInvariant())
+ {
+ case null:
+ case "":
+ authTypeValue = ImpalaAuthType.Empty;
+ return true;
+ case None:
+ authTypeValue = ImpalaAuthType.None;
+ return true;
+ case UsernameOnly:
+ authTypeValue = ImpalaAuthType.UsernameOnly;
+ return true;
+ case Basic:
+ authTypeValue = ImpalaAuthType.Basic;
+ return true;
+ default:
+ authTypeValue = ImpalaAuthType.Invalid;
+ return false;
+ }
+ }
+ }
+
+ public enum ImpalaAuthType
Review Comment:
The same comment applies to some of the preexisting code in SparkParameters.
##########
csharp/src/Drivers/Apache/Impala/ImpalaParameters.cs:
##########
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Apache.Arrow.Adbc.Drivers.Apache.Impala
+{
+ /// <summary>
+ /// Parameters used for connecting to Impala data sources.
+ /// </summary>
+ public static class ImpalaParameters
+ {
+ public const string HostName = "adbc.impala.host";
+ public const string Port = "adbc.impala.port";
+ public const string Path = "adbc.impala.path";
+ public const string AuthType = "adbc.impala.auth_type";
+ public const string DataTypeConv = "adbc.impala.data_type_conv";
+ }
+
+ public static class ImpalaAuthTypeConstants
+ {
+ public const string None = "none";
+ public const string UsernameOnly = "username_only";
+ public const string Basic = "basic";
+
+ public static bool TryParse(string? authType, out ImpalaAuthType
authTypeValue)
+ {
+ switch (authType?.Trim().ToLowerInvariant())
+ {
+ case null:
+ case "":
+ authTypeValue = ImpalaAuthType.Empty;
+ return true;
+ case None:
+ authTypeValue = ImpalaAuthType.None;
+ return true;
+ case UsernameOnly:
+ authTypeValue = ImpalaAuthType.UsernameOnly;
+ return true;
+ case Basic:
+ authTypeValue = ImpalaAuthType.Basic;
+ return true;
+ default:
+ authTypeValue = ImpalaAuthType.Invalid;
+ return false;
+ }
+ }
+ }
+
+ public enum ImpalaAuthType
Review Comment:
Do this enum and `ImpalaAuthTypeConstants.TryParse` need to be public?
##########
csharp/src/Drivers/Apache/Hive2/HiveServer2Reader.cs:
##########
@@ -78,6 +105,55 @@ static IArrowArray GetArray(TColumn column)
(IArrowArray?)column.StringVal?.Values ??
(IArrowArray?)column.BinaryVal?.Values ??
throw new InvalidOperationException("unsupported data type");
+ if (expectedArrowType != null && arrowArray is StringArray
stringArray && s_arrowStringConverters.ContainsKey(expectedArrowType.TypeId))
+ {
+ // Perform a conversion from string to native/scalar type.
+ Func<StringArray, IArrowType, IArrowArray> converter =
s_arrowStringConverters[expectedArrowType.TypeId];
+ return converter(stringArray, expectedArrowType);
+ }
+ return arrowArray;
+ }
+
+ private static Date32Array ConvertToDate32(StringArray array,
IArrowType _)
+ {
+ var resultArray = new Date32Array.Builder();
+ foreach (string item in (IReadOnlyCollection<string>)array)
+ {
+ resultArray.Append(DateTime.Parse(item));
+ }
+
+ return resultArray.Build();
+ }
+
+ private static Decimal128Array ConvertToDecimal128(StringArray array,
IArrowType schemaType)
+ {
+ // Using the schema type to get the precision and scale.
+ var resultArray = new
Decimal128Array.Builder((Decimal128Type)schemaType);
+ foreach (string item in (IReadOnlyList<string>)array)
+ {
+ // Trying to parse the value into a decimal to handle the
exponent syntax. But this might overflow.
+ if (decimal.TryParse(item, NumberStyles.Float,
CultureInfo.InvariantCulture, out decimal decimalValue))
+ {
+ resultArray.Append(new SqlDecimal(decimalValue));
+ }
+ else
+ {
+ resultArray.Append(item);
+ }
+ }
+ return resultArray.Build();
+ }
+
+ private static TimestampArray ConvertToTimestamp(StringArray array,
IArrowType _)
+ {
+ // Match the precision of the server
+ var resultArrayBuiilder = new
TimestampArray.Builder(TimeUnit.Microsecond);
Review Comment:
typo in `resultArrayBuiilder`
##########
csharp/src/Drivers/Apache/Hive2/HiveServer2Reader.cs:
##########
@@ -78,6 +107,55 @@ static IArrowArray GetArray(TColumn column)
(IArrowArray?)column.StringVal?.Values ??
(IArrowArray?)column.BinaryVal?.Values ??
throw new InvalidOperationException("unsupported data type");
+ if (expectedArrowType != null && arrowArray is StringArray
stringArray && s_arrowStringConverters.ContainsKey(expectedArrowType.TypeId))
+ {
+ // Perform a conversion from string to native/scalar type.
+ Func<StringArray, IArrowType, IArrowArray> converter =
s_arrowStringConverters[expectedArrowType.TypeId];
+ return converter(stringArray, expectedArrowType);
+ }
+ return arrowArray;
+ }
+
+ private static Date32Array ConvertToDate32(StringArray array,
IArrowType _)
+ {
+ var resultArray = new Date32Array.Builder();
+ foreach (string item in (IReadOnlyCollection<string>)array)
+ {
+ resultArray.Append(DateTime.Parse(item));
+ }
+
+ return resultArray.Build();
+ }
+
+ private static Decimal128Array ConvertToDecimal128(StringArray array,
IArrowType schemaType)
+ {
+ // Using the schema type to get the precision and scale.
+ var resultArray = new
Decimal128Array.Builder((Decimal128Type)schemaType);
+ foreach (string item in (IReadOnlyList<string>)array)
+ {
+ // Trying to parse the value into a decimal to handle the
exponent syntax. But this might overflow.
+ if (decimal.TryParse(item, NumberStyles.Float,
CultureInfo.InvariantCulture, out decimal decimalValue))
Review Comment:
At least as a first cut, what about searching the string for 'E' or 'e' and
using that to decide which parser to use?
##########
csharp/test/Drivers/Apache/Hive2/HiveServer2ParametersTest.cs:
##########
@@ -0,0 +1,62 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+using System;
+using System.Collections.Generic;
+using Apache.Arrow.Adbc.Drivers.Apache.Hive2;
+using Xunit;
+
+namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Hive2
+{
+ public class HiveServer2ParametersTest
+ {
+ [SkippableTheory]
+ [MemberData(nameof(GetParametersTestData))]
+ public void TestParametersParse(string? dataTypeConversion,
HiveServer2DataTypeConversion expected, Type? excptionType = default)
Review Comment:
typo in `excptionType`
##########
csharp/src/Drivers/Apache/Hive2/HiveServer2Parameters.cs:
##########
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
+{
+ public static class HiveServer2DataTypeConversionConstants
Review Comment:
How much of what's in this file actually needs to be public? Isn't it
sufficient to expose the None and Scalar constants? Perhaps these could be
moved to a more neutral name?
SupportedList is a concept that the ADBC API desperately needs, but doesn't
generically support. Can we wait for a more generic "supported options" API
before doing something like this?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]