This is an automated email from the ASF dual-hosted git repository.

curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
     new 4ec2e7d0e refactor(csharp/src/Drivers/Apache/Spark): use UTF8 string 
for data conversion, instead of .NET String (#2192)
4ec2e7d0e is described below

commit 4ec2e7d0ef057a31d98979437f0fb0d8de080c86
Author: Bruce Irschick <[email protected]>
AuthorDate: Tue Oct 1 14:13:37 2024 -0700

    refactor(csharp/src/Drivers/Apache/Spark): use UTF8 string for data 
conversion, instead of .NET String (#2192)
    
    To reduce an unnecessary conversion to String, refactor the data type
    conversions to use Utf8Parser methods and ReadOnlySpan\<byte> in place
    of String and ReadOnlySpan\<char>.
---
 csharp/src/Drivers/Apache/Hive2/DecimalUtility.cs  | 106 +++++----
 .../src/Drivers/Apache/Hive2/HiveServer2Reader.cs  | 231 +++++++++++-------
 .../Drivers/Apache/Hive2/DecimalUtilityTests.cs    |  15 +-
 .../Drivers/Apache/Hive2/HiveServer2ReaderTest.cs  | 265 +++++++++++++++++++++
 4 files changed, 475 insertions(+), 142 deletions(-)

diff --git a/csharp/src/Drivers/Apache/Hive2/DecimalUtility.cs 
b/csharp/src/Drivers/Apache/Hive2/DecimalUtility.cs
index e9c7cb603..8c7d076bd 100644
--- a/csharp/src/Drivers/Apache/Hive2/DecimalUtility.cs
+++ b/csharp/src/Drivers/Apache/Hive2/DecimalUtility.cs
@@ -16,19 +16,22 @@
 */
 
 using System;
+using System.Buffers.Text;
 using System.Numerics;
+using System.Text;
 
 namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
 {
     internal static class DecimalUtility
     {
-        private const char AsciiZero = '0';
+        private const byte AsciiZero = (byte)'0';
         private const int AsciiDigitMaxIndex = '9' - AsciiZero;
-        private const char AsciiMinus = '-';
-        private const char AsciiPlus = '+';
-        private const char AsciiUpperE = 'E';
-        private const char AsciiLowerE = 'e';
-        private const char AsciiPeriod = '.';
+        private const byte AsciiMinus = (byte)'-';
+        private const byte AsciiPlus = (byte)'+';
+        private const byte AsciiUpperE = (byte)'E';
+        private const byte AsciiLowerE = (byte)'e';
+        private const byte AsciiPeriod = (byte)'.';
+        private const byte AsciiSpace = (byte)' ';
 
         /// <summary>
         /// Gets the BigInteger bytes for the given string value.
@@ -39,7 +42,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
         /// <param name="byteWidth">The width in bytes for the target buffer. 
Should match the length of the bytes parameter.</param>
         /// <param name="bytes">The buffer to place the BigInteger bytes 
into.</param>
         /// <exception cref="ArgumentOutOfRangeException"></exception>
-        internal static void GetBytes(string value, int precision, int scale, 
int byteWidth, Span<byte> bytes)
+        internal static void GetBytes(ReadOnlySpan<byte> value, int precision, 
int scale, int byteWidth, Span<byte> bytes)
         {
             if (precision < 1)
             {
@@ -70,7 +73,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
 #else
             byte[] tempBytes = integerValue.ToByteArray();
             bytesWritten = tempBytes.Length;
-            if (bytesWritten > bytes.Length)
+            if (bytesWritten > byteWidth)
             {
                 throw new OverflowException($"Decimal size greater than 
{byteWidth} bytes: {bytesWritten}");
             }
@@ -83,20 +86,20 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
             }
         }
 
-        private static BigInteger ToBigInteger(string value, int precision, 
int scale)
+        private static BigInteger ToBigInteger(ReadOnlySpan<byte> value, int 
precision, int scale)
         {
-            BigInteger integerValue;
+            ReadOnlySpan<byte> significantValue = GetSignificantValue(value, 
precision, scale);
 #if NETCOREAPP
-            ReadOnlySpan<char> significantValue = GetSignificantValue(value, 
precision, scale);
-            integerValue = BigInteger.Parse(significantValue);
+            // We can rely on the fact that all the characters in the span 
have already been confirmed to be ASCII (i.e., < 128)
+            Span<char> chars = stackalloc char[significantValue.Length];
+            Encoding.UTF8.GetChars(significantValue, chars);
+            return BigInteger.Parse(chars);
 #else
-            ReadOnlySpan<char> significantValue = 
GetSignificantValue(value.AsSpan(), precision, scale);
-            integerValue = BigInteger.Parse(significantValue.ToString());
+            return BigInteger.Parse(Encoding.UTF8.GetString(significantValue));
 #endif
-            return integerValue;
         }
 
-        private static ReadOnlySpan<char> 
GetSignificantValue(ReadOnlySpan<char> value, int precision, int scale)
+        private static ReadOnlySpan<byte> 
GetSignificantValue(ReadOnlySpan<byte> value, int precision, int scale)
         {
             ParseDecimal(value, out ParserState state);
 
@@ -104,12 +107,12 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
                 precision,
                 scale,
                 state,
-                out char sign,
-                out ReadOnlySpan<char> integerSpan,
-                out ReadOnlySpan<char> fractionalSpan,
+                out byte sign,
+                out ReadOnlySpan<byte> integerSpan,
+                out ReadOnlySpan<byte> fractionalSpan,
                 out int neededScale);
 
-            Span<char> significant = new char[precision + 1];
+            Span<byte> significant = new byte[precision + 1];
             BuildSignificantValue(
                 sign,
                 scale,
@@ -121,7 +124,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
             return significant;
         }
 
-        private static void ProcessDecimal(ReadOnlySpan<char> value, int 
precision, int scale, ParserState state, out char sign, out ReadOnlySpan<char> 
integerSpan, out ReadOnlySpan<char> fractionalSpan, out int neededScale)
+        private static void ProcessDecimal(ReadOnlySpan<byte> value, int 
precision, int scale, ParserState state, out byte sign, out ReadOnlySpan<byte> 
integerSpan, out ReadOnlySpan<byte> fractionalSpan, out int neededScale)
         {
             int int_length = 0;
             int frac_length = 0;
@@ -133,19 +136,18 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
             {
                 int expStart = state.ExpSignIndex != -1 ? state.ExpSignIndex : 
state.ExponentStart;
                 int expLength = state.ExponentEnd - expStart + 1;
-                ReadOnlySpan<char> exponentSpan = value.Slice(expStart, 
expLength);
-#if NETCOREAPP
-                exponent = int.Parse(exponentSpan);
-#else
-                exponent = int.Parse(exponentSpan.ToString());
-#endif
+                ReadOnlySpan<byte> exponentSpan = value.Slice(expStart, 
expLength);
+                if (!Utf8Parser.TryParse(exponentSpan, out exponent, out int 
_))
+                {
+                    throw new FormatException($"unable to parse exponent value 
'{Encoding.UTF8.GetString(exponentSpan)}'");
+                }
             }
             integerSpan = int_length > 0 ? value.Slice(state.IntegerStart, 
state.IntegerEnd - state.IntegerStart + 1) : [];
             fractionalSpan = frac_length > 0 ? 
value.Slice(state.FractionalStart, state.FractionalEnd - state.FractionalStart 
+ 1) : [];
-            Span<char> tempSignificant;
+            Span<byte> tempSignificant;
             if (exponent != 0)
             {
-                tempSignificant = new char[int_length + frac_length];
+                tempSignificant = new byte[int_length + frac_length];
                 if (int_length > 0) value.Slice(state.IntegerStart, 
state.IntegerEnd - state.IntegerStart + 1).CopyTo(tempSignificant.Slice(0));
                 if (frac_length > 0) value.Slice(state.FractionalStart, 
state.FractionalEnd - state.FractionalStart + 
1).CopyTo(tempSignificant.Slice(int_length));
                 // Trim trailing zeros from combined string
@@ -179,22 +181,22 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
             neededScale = frac_length;
             if (neededPrecision > precision)
             {
-                throw new OverflowException($"Decimal precision cannot be 
greater than that in the Arrow vector: {value.ToString()} has precision > 
{precision}");
+                throw new OverflowException($"Decimal precision cannot be 
greater than that in the Arrow vector: {Encoding.UTF8.GetString(value)} has 
precision > {precision}");
             }
             if (neededScale > scale)
             {
-                throw new OverflowException($"Decimal scale cannot be greater 
than that in the Arrow vector: {value.ToString()} has scale > {scale}");
+                throw new OverflowException($"Decimal scale cannot be greater 
than that in the Arrow vector: {Encoding.UTF8.GetString(value)} has scale > 
{scale}");
             }
             sign = state.SignIndex != -1 ? value[state.SignIndex] : AsciiPlus;
         }
 
         private static void BuildSignificantValue(
-            char sign,
+            byte sign,
             int scale,
-            ReadOnlySpan<char> integerSpan,
-            ReadOnlySpan<char> fractionalSpan,
+            ReadOnlySpan<byte> integerSpan,
+            ReadOnlySpan<byte> fractionalSpan,
             int neededScale,
-            Span<char> significant)
+            Span<byte> significant)
         {
             significant[0] = sign;
             int end = 0;
@@ -242,18 +244,18 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
             public ParserState() { }
         }
 
-        private static void ParseDecimal(ReadOnlySpan<char> value, out 
ParserState parserState)
+        private static void ParseDecimal(ReadOnlySpan<byte> value, out 
ParserState parserState)
         {
-            ParserState state = new ParserState();
+            ParserState state = new();
             int index = 0;
             int length = value.Length;
             while (index < length)
             {
-                char c = value[index];
+                byte c = value[index];
                 switch (state.CurrentState)
                 {
                     case ParseState.StartWhiteSpace:
-                        if (!char.IsWhiteSpace(c))
+                        if (c != AsciiSpace)
                         {
                             state.CurrentState = 
ParseState.SignOrDigitOrDecimal;
                         }
@@ -284,7 +286,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
                             index++;
                             state.CurrentState = ParseState.FractionOrExponent;
                         }
-                        else if (char.IsWhiteSpace(c))
+                        else if (c == AsciiSpace)
                         {
                             index++;
                             state.CurrentState = ParseState.EndWhiteSpace;
@@ -315,7 +317,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
                             index++;
                             state.CurrentState = ParseState.ExpSignOrExpValue;
                         }
-                        else if (char.IsWhiteSpace(c))
+                        else if (c == AsciiSpace)
                         {
                             index++;
                             state.CurrentState = ParseState.EndWhiteSpace;
@@ -340,7 +342,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
                             index++;
                             state.CurrentState = ParseState.ExpSignOrExpValue;
                         }
-                        else if (char.IsWhiteSpace(c))
+                        else if (c == AsciiSpace)
                         {
                             index++;
                             state.CurrentState = ParseState.EndWhiteSpace;
@@ -365,7 +367,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
                             index++;
                             state.CurrentState = ParseState.ExpValue;
                         }
-                        else if (char.IsWhiteSpace(c))
+                        else if (c == AsciiSpace)
                         {
                             index++;
                             state.CurrentState = ParseState.EndWhiteSpace;
@@ -383,7 +385,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
                             state.ExponentEnd = index;
                             index++;
                         }
-                        else if (char.IsWhiteSpace(c))
+                        else if (c == AsciiSpace)
                         {
                             index++;
                             state.CurrentState = ParseState.EndWhiteSpace;
@@ -394,7 +396,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
                         }
                         break;
                     case ParseState.EndWhiteSpace:
-                        if (char.IsWhiteSpace(c))
+                        if (c == AsciiSpace)
                         {
                             index++;
                             state.CurrentState = ParseState.EndWhiteSpace;
@@ -405,7 +407,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
                         }
                         break;
                     case ParseState.Invalid:
-                        throw new ArgumentOutOfRangeException(nameof(value), 
value.ToString(), $"Invalid numeric value at index {index}.");
+                        throw new ArgumentOutOfRangeException(nameof(value), 
Encoding.UTF8.GetString(value), $"Invalid numeric value at index {index}.");
                 }
             }
             // Trim leading zeros from integer portion
@@ -444,7 +446,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
             if (state.IntegerStart == -1 && state.FractionalStart == -1)
             {
                 if (!state.HasZero)
-                    throw new ArgumentOutOfRangeException(nameof(value), 
value.ToString(), "input does not contain a valid numeric value.");
+                    throw new ArgumentOutOfRangeException(nameof(value), 
Encoding.UTF8.GetString(value), "input does not contain a valid numeric 
value.");
                 else
                 {
                     state.IntegerStart = value.IndexOf(AsciiZero);
@@ -455,4 +457,14 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
             parserState = state;
         }
     }
+
+#if !NETCOREAPP
+    internal static class EncodingExtensions
+    {
+        public static string GetString(this Encoding encoding, 
ReadOnlySpan<byte> source)
+        {
+            return encoding.GetString(source.ToArray());
+        }
+    }
+#endif
 }
diff --git a/csharp/src/Drivers/Apache/Hive2/HiveServer2Reader.cs 
b/csharp/src/Drivers/Apache/Hive2/HiveServer2Reader.cs
index 828a5fe58..37e0f5955 100644
--- a/csharp/src/Drivers/Apache/Hive2/HiveServer2Reader.cs
+++ b/csharp/src/Drivers/Apache/Hive2/HiveServer2Reader.cs
@@ -16,6 +16,7 @@
 */
 
 using System;
+using System.Buffers.Text;
 using System.Collections.Generic;
 using System.Data.SqlTypes;
 using System.Globalization;
@@ -29,13 +30,30 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
 {
     internal class HiveServer2Reader : IArrowArrayStream
     {
-        private const char AsciiZero = '0';
+        private const byte AsciiZero = (byte)'0';
         private const int AsciiDigitMaxIndex = '9' - AsciiZero;
-        private const char AsciiDash = '-';
-        private const char AsciiSpace = ' ';
-        private const char AsciiColon = ':';
-        private const char AsciiPeriod = '.';
-
+        private const byte AsciiDash = (byte)'-';
+        private const byte AsciiSpace = (byte)' ';
+        private const byte AsciiColon = (byte)':';
+        private const byte AsciiPeriod = (byte)'.';
+        private const char StandardFormatRoundTrippable = 'O';
+        private const char StandardFormatExponent = 'E';
+        private const int YearMonthSepIndex = 4;
+        private const int MonthDaySepIndex = 7;
+        private const int KnownFormatDateLength = 10;
+        private const int KnownFormatDateTimeLength = 19;
+        private const int DayHourSepIndex = 10;
+        private const int HourMinuteSepIndex = 13;
+        private const int MinuteSecondSepIndex = 16;
+        private const int YearIndex = 0;
+        private const int MonthIndex = 5;
+        private const int DayIndex = 8;
+        private const int HourIndex = 11;
+        private const int MinuteIndex = 14;
+        private const int SecondIndex = 17;
+        private const int SecondSubsecondSepIndex = 19;
+        private const int SubsecondIndex = 20;
+        private const int MillisecondDecimalPlaces = 3;
         private HiveServer2Statement? _statement;
         private readonly DataTypeConversion _dataTypeConversion;
         private static readonly IReadOnlyDictionary<ArrowTypeId, 
Func<StringArray, IArrowType, IArrowArray>> s_arrowStringConverters =
@@ -118,49 +136,56 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
             return arrowArray;
         }
 
-        private static Date32Array ConvertToDate32(StringArray array, 
IArrowType _)
+        internal static Date32Array ConvertToDate32(StringArray array, 
IArrowType _)
         {
+            const DateTimeStyles DateTimeStyles = 
DateTimeStyles.AllowWhiteSpaces;
             var resultArray = new Date32Array.Builder();
-            foreach (string item in (IReadOnlyCollection<string>)array)
+            int length = array.Length;
+            for (int i = 0; i < length; i++)
             {
-                if (item == null)
+                // Work with UTF8 string.
+                ReadOnlySpan<byte> date = array.GetBytes(i, out bool isNull);
+                if (isNull)
                 {
                     resultArray.AppendNull();
-                    continue;
                 }
-
-                ReadOnlySpan<char> date = item.AsSpan();
-                bool isKnownFormat = date.Length >= 8 && date[4] == AsciiDash 
&& date[7] == AsciiDash;
-                if (isKnownFormat)
+                else if (TryParse(date, out DateTime dateTime)
+                    || Utf8Parser.TryParse(date, out dateTime, out int _, 
standardFormat: StandardFormatRoundTrippable)
+                    || DateTime.TryParse(array.GetString(i), 
CultureInfo.InvariantCulture, DateTimeStyles, out dateTime))
                 {
-                    DateTime value = ConvertToDateTime(date);
-                    resultArray.Append(value);
+                    resultArray.Append(dateTime);
                 }
                 else
                 {
-                    resultArray.Append(DateTime.Parse(item, 
CultureInfo.InvariantCulture));
+                    throw new FormatException($"unable to convert value 
'{array.GetString(i)}' to DateTime");
                 }
             }
 
             return resultArray.Build();
         }
 
-        private static DateTime ConvertToDateTime(ReadOnlySpan<char> date)
+        internal static bool TryParse(ReadOnlySpan<byte> date, out DateTime 
dateTime)
         {
-            int year;
-            int month;
-            int day;
-#if NETCOREAPP
-            year = int.Parse(date.Slice(0, 4));
-            month = int.Parse(date.Slice(5, 2));
-            day = int.Parse(date.Slice(8, 2));
-#else
-            year = int.Parse(date.Slice(0, 4).ToString());
-            month = int.Parse(date.Slice(5, 2).ToString());
-            day = int.Parse(date.Slice(8, 2).ToString());
-#endif
-            DateTime value = new(year, month, day);
-            return value;
+            if (date.Length == KnownFormatDateLength
+                && date[YearMonthSepIndex] == AsciiDash && 
date[MonthDaySepIndex] == AsciiDash
+                && Utf8Parser.TryParse(date.Slice(YearIndex, 4), out int year, 
out int bytesConsumed) && bytesConsumed == 4
+                && Utf8Parser.TryParse(date.Slice(MonthIndex, 2), out int 
month, out bytesConsumed) && bytesConsumed == 2
+                && Utf8Parser.TryParse(date.Slice(DayIndex, 2), out int day, 
out bytesConsumed) && bytesConsumed == 2)
+            {
+                try
+                {
+                    dateTime = new(year, month, day);
+                    return true;
+                }
+                catch (ArgumentOutOfRangeException)
+                {
+                    dateTime = default;
+                    return false;
+                }
+            }
+
+            dateTime = default;
+            return false;
         }
 
         private static Decimal128Array ConvertToDecimal128(StringArray array, 
IArrowType schemaType)
@@ -169,16 +194,18 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
             Decimal128Type decimalType = (Decimal128Type)schemaType;
             var resultArray = new Decimal128Array.Builder(decimalType);
             Span<byte> buffer = stackalloc byte[decimalType.ByteWidth];
-            foreach (string item in (IReadOnlyList<string>)array)
+
+            int length = array.Length;
+            for (int i = 0; i < length; i++)
             {
-                if (item == null)
+                // Work with UTF8 string.
+                ReadOnlySpan<byte> item = array.GetBytes(i, out bool isNull);
+                if (isNull)
                 {
                     resultArray.AppendNull();
-                    continue;
                 }
-
                 // Try to parse the value into a decimal because it is the 
most performant and handles the exponent syntax. But this might overflow.
-                if (decimal.TryParse(item, NumberStyles.Float, 
CultureInfo.InvariantCulture, out decimal decimalValue))
+                else if (Utf8Parser.TryParse(item, out decimal decimalValue, 
out int _, standardFormat: StandardFormatExponent))
                 {
                     resultArray.Append(new SqlDecimal(decimalValue));
                 }
@@ -191,84 +218,108 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
             return resultArray.Build();
         }
 
-        private static TimestampArray ConvertToTimestamp(StringArray array, 
IArrowType _)
+        internal static TimestampArray ConvertToTimestamp(StringArray array, 
IArrowType _)
         {
+            const DateTimeStyles DateTimeStyles = 
DateTimeStyles.AssumeUniversal | DateTimeStyles.AllowWhiteSpaces;
             // Match the precision of the server
             var resultArrayBuilder = new 
TimestampArray.Builder(TimeUnit.Microsecond);
-            foreach (string item in (IReadOnlyList<string>)array)
+            int length = array.Length;
+            for (int i = 0; i < length; i++)
             {
-                if (item == null)
+                // Work with UTF8 string.
+                ReadOnlySpan<byte> date = array.GetBytes(i, out bool isNull);
+                if (isNull)
                 {
                     resultArrayBuilder.AppendNull();
-                    continue;
                 }
-
-                ReadOnlySpan<char> date = item.AsSpan();
-                bool isKnownFormat = date.Length >= 17 && date[4] == AsciiDash 
&& date[7] == AsciiDash && date[10] == AsciiSpace && date[13] == AsciiColon && 
date[16] == AsciiColon;
-                if (isKnownFormat)
+                else if (TryParse(date, out DateTimeOffset dateValue)
+                    || Utf8Parser.TryParse(date, out dateValue, out int _, 
standardFormat: StandardFormatRoundTrippable)
+                    || DateTimeOffset.TryParse(array.GetString(i), 
CultureInfo.InvariantCulture, DateTimeStyles, out dateValue))
                 {
-                    DateTimeOffset value = ConvertToDateTimeOffset(date);
-                    resultArrayBuilder.Append(value);
+                    resultArrayBuilder.Append(dateValue);
                 }
                 else
                 {
-                    DateTimeOffset value = DateTimeOffset.Parse(item, 
DateTimeFormatInfo.InvariantInfo, DateTimeStyles.AssumeUniversal);
-                    resultArrayBuilder.Append(value);
+                    throw new FormatException($"unable to convert value 
'{array.GetString(i)}' to DateTimeOffset");
                 }
             }
+
             return resultArrayBuilder.Build();
         }
 
-        private static DateTimeOffset 
ConvertToDateTimeOffset(ReadOnlySpan<char> date)
+        internal static bool TryParse(ReadOnlySpan<byte> date, out 
DateTimeOffset dateValue)
         {
-            int year;
-            int month;
-            int day;
-            int hour;
-            int minute;
-            int second;
-#if NETCOREAPP
-            year = int.Parse(date.Slice(0, 4));
-            month = int.Parse(date.Slice(5, 2));
-            day = int.Parse(date.Slice(8, 2));
-            hour = int.Parse(date.Slice(11, 2));
-            minute = int.Parse(date.Slice(14, 2));
-            second = int.Parse(date.Slice(17, 2));
-#else
-            year = int.Parse(date.Slice(0, 4).ToString());
-            month = int.Parse(date.Slice(5, 2).ToString());
-            day = int.Parse(date.Slice(8, 2).ToString());
-            hour = int.Parse(date.Slice(11, 2).ToString());
-            minute = int.Parse(date.Slice(14, 2).ToString());
-            second = int.Parse(date.Slice(17, 2).ToString());
-#endif
-            DateTimeOffset dateValue = new(year, month, day, hour, minute, 
second, TimeSpan.Zero);
+            bool isKnownFormat = date.Length >= KnownFormatDateTimeLength
+                && date[YearMonthSepIndex] == AsciiDash
+                && date[MonthDaySepIndex] == AsciiDash
+                && date[DayHourSepIndex] == AsciiSpace
+                && date[HourMinuteSepIndex] == AsciiColon
+                && date[MinuteSecondSepIndex] == AsciiColon;
+
+            if (!isKnownFormat
+                || !Utf8Parser.TryParse(date.Slice(YearIndex, 4), out int 
year, out int bytesConsumed, standardFormat: 'D') || bytesConsumed != 4
+                || !Utf8Parser.TryParse(date.Slice(MonthIndex, 2), out int 
month, out bytesConsumed, standardFormat: 'D') || bytesConsumed != 2
+                || !Utf8Parser.TryParse(date.Slice(DayIndex, 2), out int day, 
out bytesConsumed, standardFormat: 'D') || bytesConsumed != 2
+                || !Utf8Parser.TryParse(date.Slice(HourIndex, 2), out int 
hour, out bytesConsumed, standardFormat: 'D') || bytesConsumed != 2
+                || !Utf8Parser.TryParse(date.Slice(MinuteIndex, 2), out int 
minute, out bytesConsumed, standardFormat: 'D') || bytesConsumed != 2
+                || !Utf8Parser.TryParse(date.Slice(SecondIndex, 2), out int 
second, out bytesConsumed, standardFormat: 'D') || bytesConsumed != 2)
+            {
+                dateValue = default;
+                return false;
+            }
+
+            try
+            {
+                dateValue = new(year, month, day, hour, minute, second, 
TimeSpan.Zero);
+            }
+            catch (ArgumentOutOfRangeException)
+            {
+                dateValue = default;
+                return false;
+            }
+
+            // Retrieve subseconds, if available
             int length = date.Length;
-            if (length >= 20 && date[19] == AsciiPeriod)
+            if (length > SecondSubsecondSepIndex)
             {
-                int start = -1;
-                int end = 20;
-                while (end < length && (uint)(date[end] - AsciiZero) <= 
AsciiDigitMaxIndex)
+                if (date[SecondSubsecondSepIndex] == AsciiPeriod)
                 {
-                    if (start == -1) start = end;
-                    end++;
+                    int start = -1;
+                    int end = SubsecondIndex;
+                    while (end < length && (uint)(date[end] - AsciiZero) <= 
AsciiDigitMaxIndex)
+                    {
+                        if (start == -1) start = end;
+                        end++;
+                    }
+                    if (end < length)
+                    {
+                        // Indicates unrecognized trailing character(s)
+                        dateValue = default;
+                        return false;
+                    }
+
+                    int subSecondsLength = start != -1 ? end - start : 0;
+                    if (subSecondsLength > 0)
+                    {
+                        if (!Utf8Parser.TryParse(date.Slice(start, 
subSecondsLength), out int subSeconds, out _))
+                        {
+                            dateValue = default;
+                            return false;
+                        }
+
+                        double factorOfMilliseconds = Math.Pow(10, 
subSecondsLength - MillisecondDecimalPlaces);
+                        long ticks = (long)(subSeconds * 
(TimeSpan.TicksPerMillisecond / factorOfMilliseconds));
+                        dateValue = dateValue.AddTicks(ticks);
+                    }
                 }
-                int subSeconds = 0;
-                int subSecondsLength = start != -1 ? end - start : 0;
-                if (subSecondsLength > 0)
+                else
                 {
-#if NETCOREAPP
-                    subSeconds = int.Parse(date.Slice(start, 
subSecondsLength));
-#else
-                    subSeconds = int.Parse(date.Slice(start, 
subSecondsLength).ToString());
-#endif
+                    dateValue = default;
+                    return false;
                 }
-                double factorOfMilliseconds = Math.Pow(10, subSecondsLength - 
3);
-                long ticks = (long)(subSeconds * (TimeSpan.TicksPerMillisecond 
/ factorOfMilliseconds));
-                dateValue = dateValue.AddTicks(ticks);
             }
 
-            return dateValue;
+            return true;
         }
     }
 }
diff --git a/csharp/test/Drivers/Apache/Hive2/DecimalUtilityTests.cs 
b/csharp/test/Drivers/Apache/Hive2/DecimalUtilityTests.cs
index 1f5f2c534..467317c40 100644
--- a/csharp/test/Drivers/Apache/Hive2/DecimalUtilityTests.cs
+++ b/csharp/test/Drivers/Apache/Hive2/DecimalUtilityTests.cs
@@ -16,10 +16,12 @@
 */
 
 using System;
+using System.Buffers.Text;
 using System.Collections.Generic;
 using System.Data.SqlTypes;
 using System.Diagnostics;
 using System.Globalization;
+using System.Text;
 using Apache.Arrow.Adbc.Drivers.Apache.Hive2;
 using Xunit;
 using Xunit.Abstractions;
@@ -35,8 +37,9 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Hive2
 
         [SkippableTheory]
         [MemberData(nameof(Decimal128Data))]
-        public void TestCanConvertDecimal(string value, int precision, int 
scale, int byteWidth, byte[] expected, SqlDecimal? expectedDecimal = default)
+        public void TestCanConvertDecimal(string stringValue, int precision, 
int scale, int byteWidth, byte[] expected, SqlDecimal? expectedDecimal = 
default)
         {
+            ReadOnlySpan<byte> value = Encoding.UTF8.GetBytes(stringValue);
             byte[] actual = new byte[byteWidth];
             DecimalUtility.GetBytes(value, precision, scale, byteWidth, 
actual);
             Assert.Equal(expected, actual);
@@ -56,25 +59,27 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Hive2
             Stopwatch stopwatch = new();
 
             int testCount = 1000000;
-            string testValue = "99999999999999999999999999999999999999";
+            ReadOnlySpan<byte> testValue = 
"99999999999999999999999999999999999999"u8;
+            string testValueString = "99999999999999999999999999999999999999";
             int byteWidth = 16;
             byte[] buffer = new byte[byteWidth];
-            Decimal128Array.Builder builder = new Decimal128Array.Builder(new 
Types.Decimal128Type(38, 0));
+            Decimal128Array.Builder builder = new(new Types.Decimal128Type(38, 
0));
             stopwatch.Restart();
             for (int i = 0; i < testCount; i++)
             {
-                if (decimal.TryParse(testValue, NumberStyles.Float, 
NumberFormatInfo.InvariantInfo, out var actualDecimal))
+                if (Utf8Parser.TryParse(testValue, out decimal actualDecimal, 
out _, standardFormat: 'E'))
                 {
                     builder.Append(new SqlDecimal(actualDecimal));
                 }
                 else
                 {
-                    builder.Append(testValue);
+                    builder.Append(testValueString);
                 }
             }
             stopwatch.Stop();
             _outputHelper.WriteLine($"Decimal128Builder.Append: {testCount} 
iterations took {stopwatch.ElapsedMilliseconds} elapsed milliseconds");
 
+            builder = new(new Types.Decimal128Type(38, 0));
             stopwatch.Restart();
             for (int i = 0; i < testCount; i++)
             {
diff --git a/csharp/test/Drivers/Apache/Hive2/HiveServer2ReaderTest.cs 
b/csharp/test/Drivers/Apache/Hive2/HiveServer2ReaderTest.cs
new file mode 100644
index 000000000..d784e2351
--- /dev/null
+++ b/csharp/test/Drivers/Apache/Hive2/HiveServer2ReaderTest.cs
@@ -0,0 +1,265 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+using System;
+using System.Globalization;
+using System.Text;
+using Apache.Arrow.Adbc.Drivers.Apache.Hive2;
+using Xunit;
+
+namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Hive2
+{
+    public class HiveServer2ReaderTest
+    {
+        private const bool IsValid = true;
+        private const bool IsNotValid = false;
+
+        [Theory]
+        [MemberData(nameof(GetDateTestData), /* isKnownFormat */ true)]
+        internal void TestCanConvertKnownFormatDate(string date, DateTime 
expected, bool isValid)
+        {
+            ReadOnlySpan<byte> dateSpan = 
Encoding.UTF8.GetBytes(date).AsSpan();
+            if (isValid)
+            {
+                Assert.True(HiveServer2Reader.TryParse(dateSpan, out DateTime 
dateTime));
+                Assert.Equal(expected, dateTime);
+            }
+            else
+            {
+                Assert.False(HiveServer2Reader.TryParse(dateSpan, out DateTime 
_));
+            }
+        }
+
+        [Theory]
+        [MemberData(nameof(GetDateTestData), /* isKnownFormat */ false)]
+        internal void TestCanConvertUnknownFormatDate(string date, DateTime 
expected, bool isValid)
+        {
+            var builder = new StringArray.Builder();
+            builder.Append(date);
+            var stringArray = builder.Build();
+            if (isValid)
+            {
+                var dateArray = HiveServer2Reader.ConvertToDate32(stringArray, 
stringArray.Data.DataType);
+                Assert.Equal(1, dateArray.Length);
+                Assert.Equal(expected, dateArray.GetDateTime(0));
+            }
+            else
+            {
+                Assert.Throws<FormatException>(() => 
HiveServer2Reader.ConvertToDate32(stringArray, stringArray.Data.DataType));
+            }
+        }
+
+        [Theory]
+        [MemberData(nameof(GetTimestampTestData), /* isKnownFormat */ true)]
+        internal void TestCanConvertKnownFormatTimestamp(string date, 
DateTimeOffset expected, bool isValid)
+        {
+            ReadOnlySpan<byte> dateSpan = 
Encoding.UTF8.GetBytes(date).AsSpan();
+            if (isValid)
+            {
+                Assert.True(HiveServer2Reader.TryParse(dateSpan, out 
DateTimeOffset dateTime));
+                Assert.Equal(expected, dateTime);
+            }
+            else
+            {
+                Assert.False(HiveServer2Reader.TryParse(dateSpan, out 
DateTimeOffset _));
+            }
+        }
+
+        [Theory]
+        [MemberData(nameof(GetTimestampTestData), /* isKnownFormat */ false)]
+        internal void TestCanConvertUnknownFormatTimestamp(string date, 
DateTimeOffset expected, bool isValid)
+        {
+            var builder = new StringArray.Builder();
+            builder.Append(date);
+            var stringArray = builder.Build();
+            if (isValid)
+            {
+                TimestampArray timestampArray = 
HiveServer2Reader.ConvertToTimestamp(stringArray, stringArray.Data.DataType);
+                Assert.Equal(1, timestampArray.Length);
+                Assert.Equal(expected, timestampArray.GetTimestamp(0));
+            }
+            else
+            {
+                Assert.Throws<FormatException>(() => 
HiveServer2Reader.ConvertToTimestamp(stringArray, stringArray.Data.DataType));
+            }
+        }
+
+        public static TheoryData<string, DateTime, bool> GetDateTestData(bool 
isKnownFormat)
+        {
+            string[] dates =
+                [
+                    "0001-01-01",
+                    "0001-12-31",
+                    "1970-01-01",
+                    "2024-12-31",
+                    "9999-12-31",
+                ];
+
+            var data = new TheoryData<string, DateTime, bool>();
+            foreach (string date in dates)
+            {
+                data.Add(date, DateTime.Parse(date, 
CultureInfo.InvariantCulture), IsValid);
+            }
+
+            // Conditionally invalid component separators
+            string[] leadingSpaces = ["", " "];
+            string[] TrailingSpaces = ["", " "];
+            string[] separators = ["/", " "];
+            foreach (string leadingSpace in leadingSpaces)
+            {
+                foreach (string trailingSpace in TrailingSpaces)
+                {
+                    foreach (string separator in separators)
+                    {
+                        foreach (string date in dates)
+                        {
+                            data.Add(leadingSpace + date.Replace("-", 
separator) + trailingSpace, DateTime.Parse(date), !isKnownFormat);
+                        }
+                    }
+                }
+            }
+
+            // Always invalid for a date separator
+            separators = [":"];
+            foreach (string leadingSpace in leadingSpaces)
+            {
+                foreach (string trailingSpace in TrailingSpaces)
+                {
+                    foreach (string separator in separators)
+                    {
+                        foreach (string date in dates)
+                        {
+                            data.Add(leadingSpace + date.Replace("-", 
separator) + trailingSpace, default, IsNotValid);
+                        }
+                    }
+                }
+            }
+
+            string[] invalidDates =
+                [
+                    "0001-01-00",
+                    "0001-01-32",
+                    "0001-02-30",
+                    "0001-13-01",
+                    "00a1-01-01",
+                    "0001-a1-01",
+                    "0001-01-a1",
+                    "001a-01-01",
+                    "0001-1a-01",
+                    "0001-01-1a",
+                ];
+            foreach (string date in invalidDates)
+            {
+                data.Add(date, default, IsNotValid);
+            }
+
+            return data;
+        }
+
+        public static TheoryData<string, DateTimeOffset, bool> 
GetTimestampTestData(bool isKnownFormat)
+        {
+            string[] dates =
+                [
+                    "0001-01-01 00:00:00",
+                    "9999-12-31 23:59:59",
+                    "0001-01-01 00:00:00.1000000",
+                    "0001-12-31 00:00:00.0100000",
+                    "1970-01-01 00:00:00.0010000",
+                    "2024-12-31 00:00:00.0001000",
+                    "9999-12-31 00:00:00.0000100",
+                    "9999-12-31 00:00:00.",
+                    "9999-12-31 00:00:00.9",
+                    "9999-12-31 00:00:00.99",
+                    "9999-12-31 00:00:00.999",
+                    "9999-12-31 00:00:00.9999",
+                    "9999-12-31 00:00:00.99999",
+                    "9999-12-31 00:00:00.999999",
+                    "9999-12-31 00:00:00.999999",
+                    "9999-12-31 00:00:00.9999990",
+                    "9999-12-31 00:00:00.99999900",
+                ];
+
+            var data = new TheoryData<string, DateTimeOffset, bool>();
+            foreach (string date in dates)
+            {
+                data.Add(date, DateTimeOffset.Parse(date, 
CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal), IsValid);
+            }
+
+            // Conditionally invalid component separators
+            string[] leadingSpaces = ["", " "];
+            string[] TrailingSpaces = ["", " "];
+            string[] dateSeparators = ["/", " "];
+            foreach (string leadingSpace in leadingSpaces)
+            {
+                foreach (string trailingSpace in TrailingSpaces)
+                {
+                    foreach (string separator in dateSeparators)
+                    {
+                        foreach (string date in dates)
+                        {
+                            data.Add(
+                                leadingSpace + date.Replace("-", separator) + 
trailingSpace,
+                                DateTimeOffset.Parse(date, 
CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal),
+                                !isKnownFormat);
+                        }
+                    }
+                }
+            }
+
+            // Always an invalid separator for date.
+            dateSeparators = [":"];
+            foreach (string leadingSpace in leadingSpaces)
+            {
+                foreach (string trailingSpace in TrailingSpaces)
+                {
+                    foreach (string separator in dateSeparators)
+                    {
+                        foreach (string date in dates)
+                        {
+                            data.Add(leadingSpace + date.Replace("-", 
separator) + trailingSpace, default, IsNotValid);
+                        }
+                    }
+                }
+            }
+
+            string[] invalidDates =
+                [
+                    "0001-01-00 00:00:00",
+                    "0001-01-32 00:00:00",
+                    "0001-02-30 00:00:00",
+                    "0001-13-01 00:00:00",
+                    "abcd-13-01 00:00:00",
+                    "0001-12-01 00:00:00.abc",
+                    "00a1-01-01 00:00:00",
+                    "0001-a1-01 00:00:00",
+                    "0001-01-a1 00:00:00",
+                    "0001-01-01 a0:00:00",
+                    "0001-01-01 00:a0:00",
+                    "0001-01-01 00:00:a0",
+                    "001a-01-01 00:00:00",
+                    "0010-1a-01 00:00:00",
+                    "0010-10-1a 00:00:00",
+                ];
+            foreach (string date in invalidDates)
+            {
+                data.Add(date, default, IsNotValid);
+            }
+
+            return data;
+        }
+    }
+}


Reply via email to