This is an automated email from the ASF dual-hosted git repository.
curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/main by this push:
new 4ec2e7d0e refactor(csharp/src/Drivers/Apache/Spark): use UTF8 string
for data conversion, instead of .NET String (#2192)
4ec2e7d0e is described below
commit 4ec2e7d0ef057a31d98979437f0fb0d8de080c86
Author: Bruce Irschick <[email protected]>
AuthorDate: Tue Oct 1 14:13:37 2024 -0700
refactor(csharp/src/Drivers/Apache/Spark): use UTF8 string for data
conversion, instead of .NET String (#2192)
To reduce an unnecessary conversion to String, refactor the data type
conversions to use Utf8Parser methods and ReadOnlySpan\<byte> in place
of String and ReadOnlySpan\<char>.
---
csharp/src/Drivers/Apache/Hive2/DecimalUtility.cs | 106 +++++----
.../src/Drivers/Apache/Hive2/HiveServer2Reader.cs | 231 +++++++++++-------
.../Drivers/Apache/Hive2/DecimalUtilityTests.cs | 15 +-
.../Drivers/Apache/Hive2/HiveServer2ReaderTest.cs | 265 +++++++++++++++++++++
4 files changed, 475 insertions(+), 142 deletions(-)
diff --git a/csharp/src/Drivers/Apache/Hive2/DecimalUtility.cs
b/csharp/src/Drivers/Apache/Hive2/DecimalUtility.cs
index e9c7cb603..8c7d076bd 100644
--- a/csharp/src/Drivers/Apache/Hive2/DecimalUtility.cs
+++ b/csharp/src/Drivers/Apache/Hive2/DecimalUtility.cs
@@ -16,19 +16,22 @@
*/
using System;
+using System.Buffers.Text;
using System.Numerics;
+using System.Text;
namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
{
internal static class DecimalUtility
{
- private const char AsciiZero = '0';
+ private const byte AsciiZero = (byte)'0';
private const int AsciiDigitMaxIndex = '9' - AsciiZero;
- private const char AsciiMinus = '-';
- private const char AsciiPlus = '+';
- private const char AsciiUpperE = 'E';
- private const char AsciiLowerE = 'e';
- private const char AsciiPeriod = '.';
+ private const byte AsciiMinus = (byte)'-';
+ private const byte AsciiPlus = (byte)'+';
+ private const byte AsciiUpperE = (byte)'E';
+ private const byte AsciiLowerE = (byte)'e';
+ private const byte AsciiPeriod = (byte)'.';
+ private const byte AsciiSpace = (byte)' ';
/// <summary>
/// Gets the BigInteger bytes for the given string value.
@@ -39,7 +42,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
/// <param name="byteWidth">The width in bytes for the target buffer.
Should match the length of the bytes parameter.</param>
/// <param name="bytes">The buffer to place the BigInteger bytes
into.</param>
/// <exception cref="ArgumentOutOfRangeException"></exception>
- internal static void GetBytes(string value, int precision, int scale,
int byteWidth, Span<byte> bytes)
+ internal static void GetBytes(ReadOnlySpan<byte> value, int precision,
int scale, int byteWidth, Span<byte> bytes)
{
if (precision < 1)
{
@@ -70,7 +73,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
#else
byte[] tempBytes = integerValue.ToByteArray();
bytesWritten = tempBytes.Length;
- if (bytesWritten > bytes.Length)
+ if (bytesWritten > byteWidth)
{
throw new OverflowException($"Decimal size greater than
{byteWidth} bytes: {bytesWritten}");
}
@@ -83,20 +86,20 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
}
}
- private static BigInteger ToBigInteger(string value, int precision,
int scale)
+ private static BigInteger ToBigInteger(ReadOnlySpan<byte> value, int
precision, int scale)
{
- BigInteger integerValue;
+ ReadOnlySpan<byte> significantValue = GetSignificantValue(value,
precision, scale);
#if NETCOREAPP
- ReadOnlySpan<char> significantValue = GetSignificantValue(value,
precision, scale);
- integerValue = BigInteger.Parse(significantValue);
+ // We can rely on the fact that all the characters in the span
have already been confirmed to be ASCII (i.e., < 128)
+ Span<char> chars = stackalloc char[significantValue.Length];
+ Encoding.UTF8.GetChars(significantValue, chars);
+ return BigInteger.Parse(chars);
#else
- ReadOnlySpan<char> significantValue =
GetSignificantValue(value.AsSpan(), precision, scale);
- integerValue = BigInteger.Parse(significantValue.ToString());
+ return BigInteger.Parse(Encoding.UTF8.GetString(significantValue));
#endif
- return integerValue;
}
- private static ReadOnlySpan<char>
GetSignificantValue(ReadOnlySpan<char> value, int precision, int scale)
+ private static ReadOnlySpan<byte>
GetSignificantValue(ReadOnlySpan<byte> value, int precision, int scale)
{
ParseDecimal(value, out ParserState state);
@@ -104,12 +107,12 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
precision,
scale,
state,
- out char sign,
- out ReadOnlySpan<char> integerSpan,
- out ReadOnlySpan<char> fractionalSpan,
+ out byte sign,
+ out ReadOnlySpan<byte> integerSpan,
+ out ReadOnlySpan<byte> fractionalSpan,
out int neededScale);
- Span<char> significant = new char[precision + 1];
+ Span<byte> significant = new byte[precision + 1];
BuildSignificantValue(
sign,
scale,
@@ -121,7 +124,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
return significant;
}
- private static void ProcessDecimal(ReadOnlySpan<char> value, int
precision, int scale, ParserState state, out char sign, out ReadOnlySpan<char>
integerSpan, out ReadOnlySpan<char> fractionalSpan, out int neededScale)
+ private static void ProcessDecimal(ReadOnlySpan<byte> value, int
precision, int scale, ParserState state, out byte sign, out ReadOnlySpan<byte>
integerSpan, out ReadOnlySpan<byte> fractionalSpan, out int neededScale)
{
int int_length = 0;
int frac_length = 0;
@@ -133,19 +136,18 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
{
int expStart = state.ExpSignIndex != -1 ? state.ExpSignIndex :
state.ExponentStart;
int expLength = state.ExponentEnd - expStart + 1;
- ReadOnlySpan<char> exponentSpan = value.Slice(expStart,
expLength);
-#if NETCOREAPP
- exponent = int.Parse(exponentSpan);
-#else
- exponent = int.Parse(exponentSpan.ToString());
-#endif
+ ReadOnlySpan<byte> exponentSpan = value.Slice(expStart,
expLength);
+ if (!Utf8Parser.TryParse(exponentSpan, out exponent, out int
_))
+ {
+ throw new FormatException($"unable to parse exponent value
'{Encoding.UTF8.GetString(exponentSpan)}'");
+ }
}
integerSpan = int_length > 0 ? value.Slice(state.IntegerStart,
state.IntegerEnd - state.IntegerStart + 1) : [];
fractionalSpan = frac_length > 0 ?
value.Slice(state.FractionalStart, state.FractionalEnd - state.FractionalStart
+ 1) : [];
- Span<char> tempSignificant;
+ Span<byte> tempSignificant;
if (exponent != 0)
{
- tempSignificant = new char[int_length + frac_length];
+ tempSignificant = new byte[int_length + frac_length];
if (int_length > 0) value.Slice(state.IntegerStart,
state.IntegerEnd - state.IntegerStart + 1).CopyTo(tempSignificant.Slice(0));
if (frac_length > 0) value.Slice(state.FractionalStart,
state.FractionalEnd - state.FractionalStart +
1).CopyTo(tempSignificant.Slice(int_length));
// Trim trailing zeros from combined string
@@ -179,22 +181,22 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
neededScale = frac_length;
if (neededPrecision > precision)
{
- throw new OverflowException($"Decimal precision cannot be
greater than that in the Arrow vector: {value.ToString()} has precision >
{precision}");
+ throw new OverflowException($"Decimal precision cannot be
greater than that in the Arrow vector: {Encoding.UTF8.GetString(value)} has
precision > {precision}");
}
if (neededScale > scale)
{
- throw new OverflowException($"Decimal scale cannot be greater
than that in the Arrow vector: {value.ToString()} has scale > {scale}");
+ throw new OverflowException($"Decimal scale cannot be greater
than that in the Arrow vector: {Encoding.UTF8.GetString(value)} has scale >
{scale}");
}
sign = state.SignIndex != -1 ? value[state.SignIndex] : AsciiPlus;
}
private static void BuildSignificantValue(
- char sign,
+ byte sign,
int scale,
- ReadOnlySpan<char> integerSpan,
- ReadOnlySpan<char> fractionalSpan,
+ ReadOnlySpan<byte> integerSpan,
+ ReadOnlySpan<byte> fractionalSpan,
int neededScale,
- Span<char> significant)
+ Span<byte> significant)
{
significant[0] = sign;
int end = 0;
@@ -242,18 +244,18 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
public ParserState() { }
}
- private static void ParseDecimal(ReadOnlySpan<char> value, out
ParserState parserState)
+ private static void ParseDecimal(ReadOnlySpan<byte> value, out
ParserState parserState)
{
- ParserState state = new ParserState();
+ ParserState state = new();
int index = 0;
int length = value.Length;
while (index < length)
{
- char c = value[index];
+ byte c = value[index];
switch (state.CurrentState)
{
case ParseState.StartWhiteSpace:
- if (!char.IsWhiteSpace(c))
+ if (c != AsciiSpace)
{
state.CurrentState =
ParseState.SignOrDigitOrDecimal;
}
@@ -284,7 +286,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
index++;
state.CurrentState = ParseState.FractionOrExponent;
}
- else if (char.IsWhiteSpace(c))
+ else if (c == AsciiSpace)
{
index++;
state.CurrentState = ParseState.EndWhiteSpace;
@@ -315,7 +317,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
index++;
state.CurrentState = ParseState.ExpSignOrExpValue;
}
- else if (char.IsWhiteSpace(c))
+ else if (c == AsciiSpace)
{
index++;
state.CurrentState = ParseState.EndWhiteSpace;
@@ -340,7 +342,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
index++;
state.CurrentState = ParseState.ExpSignOrExpValue;
}
- else if (char.IsWhiteSpace(c))
+ else if (c == AsciiSpace)
{
index++;
state.CurrentState = ParseState.EndWhiteSpace;
@@ -365,7 +367,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
index++;
state.CurrentState = ParseState.ExpValue;
}
- else if (char.IsWhiteSpace(c))
+ else if (c == AsciiSpace)
{
index++;
state.CurrentState = ParseState.EndWhiteSpace;
@@ -383,7 +385,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
state.ExponentEnd = index;
index++;
}
- else if (char.IsWhiteSpace(c))
+ else if (c == AsciiSpace)
{
index++;
state.CurrentState = ParseState.EndWhiteSpace;
@@ -394,7 +396,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
}
break;
case ParseState.EndWhiteSpace:
- if (char.IsWhiteSpace(c))
+ if (c == AsciiSpace)
{
index++;
state.CurrentState = ParseState.EndWhiteSpace;
@@ -405,7 +407,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
}
break;
case ParseState.Invalid:
- throw new ArgumentOutOfRangeException(nameof(value),
value.ToString(), $"Invalid numeric value at index {index}.");
+ throw new ArgumentOutOfRangeException(nameof(value),
Encoding.UTF8.GetString(value), $"Invalid numeric value at index {index}.");
}
}
// Trim leading zeros from integer portion
@@ -444,7 +446,7 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
if (state.IntegerStart == -1 && state.FractionalStart == -1)
{
if (!state.HasZero)
- throw new ArgumentOutOfRangeException(nameof(value),
value.ToString(), "input does not contain a valid numeric value.");
+ throw new ArgumentOutOfRangeException(nameof(value),
Encoding.UTF8.GetString(value), "input does not contain a valid numeric
value.");
else
{
state.IntegerStart = value.IndexOf(AsciiZero);
@@ -455,4 +457,14 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
parserState = state;
}
}
+
+#if !NETCOREAPP
+ internal static class EncodingExtensions
+ {
+ public static string GetString(this Encoding encoding,
ReadOnlySpan<byte> source)
+ {
+ return encoding.GetString(source.ToArray());
+ }
+ }
+#endif
}
diff --git a/csharp/src/Drivers/Apache/Hive2/HiveServer2Reader.cs
b/csharp/src/Drivers/Apache/Hive2/HiveServer2Reader.cs
index 828a5fe58..37e0f5955 100644
--- a/csharp/src/Drivers/Apache/Hive2/HiveServer2Reader.cs
+++ b/csharp/src/Drivers/Apache/Hive2/HiveServer2Reader.cs
@@ -16,6 +16,7 @@
*/
using System;
+using System.Buffers.Text;
using System.Collections.Generic;
using System.Data.SqlTypes;
using System.Globalization;
@@ -29,13 +30,30 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
{
internal class HiveServer2Reader : IArrowArrayStream
{
- private const char AsciiZero = '0';
+ private const byte AsciiZero = (byte)'0';
private const int AsciiDigitMaxIndex = '9' - AsciiZero;
- private const char AsciiDash = '-';
- private const char AsciiSpace = ' ';
- private const char AsciiColon = ':';
- private const char AsciiPeriod = '.';
-
+ private const byte AsciiDash = (byte)'-';
+ private const byte AsciiSpace = (byte)' ';
+ private const byte AsciiColon = (byte)':';
+ private const byte AsciiPeriod = (byte)'.';
+ private const char StandardFormatRoundTrippable = 'O';
+ private const char StandardFormatExponent = 'E';
+ private const int YearMonthSepIndex = 4;
+ private const int MonthDaySepIndex = 7;
+ private const int KnownFormatDateLength = 10;
+ private const int KnownFormatDateTimeLength = 19;
+ private const int DayHourSepIndex = 10;
+ private const int HourMinuteSepIndex = 13;
+ private const int MinuteSecondSepIndex = 16;
+ private const int YearIndex = 0;
+ private const int MonthIndex = 5;
+ private const int DayIndex = 8;
+ private const int HourIndex = 11;
+ private const int MinuteIndex = 14;
+ private const int SecondIndex = 17;
+ private const int SecondSubsecondSepIndex = 19;
+ private const int SubsecondIndex = 20;
+ private const int MillisecondDecimalPlaces = 3;
private HiveServer2Statement? _statement;
private readonly DataTypeConversion _dataTypeConversion;
private static readonly IReadOnlyDictionary<ArrowTypeId,
Func<StringArray, IArrowType, IArrowArray>> s_arrowStringConverters =
@@ -118,49 +136,56 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
return arrowArray;
}
- private static Date32Array ConvertToDate32(StringArray array,
IArrowType _)
+ internal static Date32Array ConvertToDate32(StringArray array,
IArrowType _)
{
+ const DateTimeStyles DateTimeStyles =
DateTimeStyles.AllowWhiteSpaces;
var resultArray = new Date32Array.Builder();
- foreach (string item in (IReadOnlyCollection<string>)array)
+ int length = array.Length;
+ for (int i = 0; i < length; i++)
{
- if (item == null)
+ // Work with UTF8 string.
+ ReadOnlySpan<byte> date = array.GetBytes(i, out bool isNull);
+ if (isNull)
{
resultArray.AppendNull();
- continue;
}
-
- ReadOnlySpan<char> date = item.AsSpan();
- bool isKnownFormat = date.Length >= 8 && date[4] == AsciiDash
&& date[7] == AsciiDash;
- if (isKnownFormat)
+ else if (TryParse(date, out DateTime dateTime)
+ || Utf8Parser.TryParse(date, out dateTime, out int _,
standardFormat: StandardFormatRoundTrippable)
+ || DateTime.TryParse(array.GetString(i),
CultureInfo.InvariantCulture, DateTimeStyles, out dateTime))
{
- DateTime value = ConvertToDateTime(date);
- resultArray.Append(value);
+ resultArray.Append(dateTime);
}
else
{
- resultArray.Append(DateTime.Parse(item,
CultureInfo.InvariantCulture));
+ throw new FormatException($"unable to convert value
'{array.GetString(i)}' to DateTime");
}
}
return resultArray.Build();
}
- private static DateTime ConvertToDateTime(ReadOnlySpan<char> date)
+ internal static bool TryParse(ReadOnlySpan<byte> date, out DateTime
dateTime)
{
- int year;
- int month;
- int day;
-#if NETCOREAPP
- year = int.Parse(date.Slice(0, 4));
- month = int.Parse(date.Slice(5, 2));
- day = int.Parse(date.Slice(8, 2));
-#else
- year = int.Parse(date.Slice(0, 4).ToString());
- month = int.Parse(date.Slice(5, 2).ToString());
- day = int.Parse(date.Slice(8, 2).ToString());
-#endif
- DateTime value = new(year, month, day);
- return value;
+ if (date.Length == KnownFormatDateLength
+ && date[YearMonthSepIndex] == AsciiDash &&
date[MonthDaySepIndex] == AsciiDash
+ && Utf8Parser.TryParse(date.Slice(YearIndex, 4), out int year,
out int bytesConsumed) && bytesConsumed == 4
+ && Utf8Parser.TryParse(date.Slice(MonthIndex, 2), out int
month, out bytesConsumed) && bytesConsumed == 2
+ && Utf8Parser.TryParse(date.Slice(DayIndex, 2), out int day,
out bytesConsumed) && bytesConsumed == 2)
+ {
+ try
+ {
+ dateTime = new(year, month, day);
+ return true;
+ }
+ catch (ArgumentOutOfRangeException)
+ {
+ dateTime = default;
+ return false;
+ }
+ }
+
+ dateTime = default;
+ return false;
}
private static Decimal128Array ConvertToDecimal128(StringArray array,
IArrowType schemaType)
@@ -169,16 +194,18 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
Decimal128Type decimalType = (Decimal128Type)schemaType;
var resultArray = new Decimal128Array.Builder(decimalType);
Span<byte> buffer = stackalloc byte[decimalType.ByteWidth];
- foreach (string item in (IReadOnlyList<string>)array)
+
+ int length = array.Length;
+ for (int i = 0; i < length; i++)
{
- if (item == null)
+ // Work with UTF8 string.
+ ReadOnlySpan<byte> item = array.GetBytes(i, out bool isNull);
+ if (isNull)
{
resultArray.AppendNull();
- continue;
}
-
// Try to parse the value into a decimal because it is the
most performant and handles the exponent syntax. But this might overflow.
- if (decimal.TryParse(item, NumberStyles.Float,
CultureInfo.InvariantCulture, out decimal decimalValue))
+ else if (Utf8Parser.TryParse(item, out decimal decimalValue,
out int _, standardFormat: StandardFormatExponent))
{
resultArray.Append(new SqlDecimal(decimalValue));
}
@@ -191,84 +218,108 @@ namespace Apache.Arrow.Adbc.Drivers.Apache.Hive2
return resultArray.Build();
}
- private static TimestampArray ConvertToTimestamp(StringArray array,
IArrowType _)
+ internal static TimestampArray ConvertToTimestamp(StringArray array,
IArrowType _)
{
+ const DateTimeStyles DateTimeStyles =
DateTimeStyles.AssumeUniversal | DateTimeStyles.AllowWhiteSpaces;
// Match the precision of the server
var resultArrayBuilder = new
TimestampArray.Builder(TimeUnit.Microsecond);
- foreach (string item in (IReadOnlyList<string>)array)
+ int length = array.Length;
+ for (int i = 0; i < length; i++)
{
- if (item == null)
+ // Work with UTF8 string.
+ ReadOnlySpan<byte> date = array.GetBytes(i, out bool isNull);
+ if (isNull)
{
resultArrayBuilder.AppendNull();
- continue;
}
-
- ReadOnlySpan<char> date = item.AsSpan();
- bool isKnownFormat = date.Length >= 17 && date[4] == AsciiDash
&& date[7] == AsciiDash && date[10] == AsciiSpace && date[13] == AsciiColon &&
date[16] == AsciiColon;
- if (isKnownFormat)
+ else if (TryParse(date, out DateTimeOffset dateValue)
+ || Utf8Parser.TryParse(date, out dateValue, out int _,
standardFormat: StandardFormatRoundTrippable)
+ || DateTimeOffset.TryParse(array.GetString(i),
CultureInfo.InvariantCulture, DateTimeStyles, out dateValue))
{
- DateTimeOffset value = ConvertToDateTimeOffset(date);
- resultArrayBuilder.Append(value);
+ resultArrayBuilder.Append(dateValue);
}
else
{
- DateTimeOffset value = DateTimeOffset.Parse(item,
DateTimeFormatInfo.InvariantInfo, DateTimeStyles.AssumeUniversal);
- resultArrayBuilder.Append(value);
+ throw new FormatException($"unable to convert value
'{array.GetString(i)}' to DateTimeOffset");
}
}
+
return resultArrayBuilder.Build();
}
- private static DateTimeOffset
ConvertToDateTimeOffset(ReadOnlySpan<char> date)
+ internal static bool TryParse(ReadOnlySpan<byte> date, out
DateTimeOffset dateValue)
{
- int year;
- int month;
- int day;
- int hour;
- int minute;
- int second;
-#if NETCOREAPP
- year = int.Parse(date.Slice(0, 4));
- month = int.Parse(date.Slice(5, 2));
- day = int.Parse(date.Slice(8, 2));
- hour = int.Parse(date.Slice(11, 2));
- minute = int.Parse(date.Slice(14, 2));
- second = int.Parse(date.Slice(17, 2));
-#else
- year = int.Parse(date.Slice(0, 4).ToString());
- month = int.Parse(date.Slice(5, 2).ToString());
- day = int.Parse(date.Slice(8, 2).ToString());
- hour = int.Parse(date.Slice(11, 2).ToString());
- minute = int.Parse(date.Slice(14, 2).ToString());
- second = int.Parse(date.Slice(17, 2).ToString());
-#endif
- DateTimeOffset dateValue = new(year, month, day, hour, minute,
second, TimeSpan.Zero);
+ bool isKnownFormat = date.Length >= KnownFormatDateTimeLength
+ && date[YearMonthSepIndex] == AsciiDash
+ && date[MonthDaySepIndex] == AsciiDash
+ && date[DayHourSepIndex] == AsciiSpace
+ && date[HourMinuteSepIndex] == AsciiColon
+ && date[MinuteSecondSepIndex] == AsciiColon;
+
+ if (!isKnownFormat
+ || !Utf8Parser.TryParse(date.Slice(YearIndex, 4), out int
year, out int bytesConsumed, standardFormat: 'D') || bytesConsumed != 4
+ || !Utf8Parser.TryParse(date.Slice(MonthIndex, 2), out int
month, out bytesConsumed, standardFormat: 'D') || bytesConsumed != 2
+ || !Utf8Parser.TryParse(date.Slice(DayIndex, 2), out int day,
out bytesConsumed, standardFormat: 'D') || bytesConsumed != 2
+ || !Utf8Parser.TryParse(date.Slice(HourIndex, 2), out int
hour, out bytesConsumed, standardFormat: 'D') || bytesConsumed != 2
+ || !Utf8Parser.TryParse(date.Slice(MinuteIndex, 2), out int
minute, out bytesConsumed, standardFormat: 'D') || bytesConsumed != 2
+ || !Utf8Parser.TryParse(date.Slice(SecondIndex, 2), out int
second, out bytesConsumed, standardFormat: 'D') || bytesConsumed != 2)
+ {
+ dateValue = default;
+ return false;
+ }
+
+ try
+ {
+ dateValue = new(year, month, day, hour, minute, second,
TimeSpan.Zero);
+ }
+ catch (ArgumentOutOfRangeException)
+ {
+ dateValue = default;
+ return false;
+ }
+
+ // Retrieve subseconds, if available
int length = date.Length;
- if (length >= 20 && date[19] == AsciiPeriod)
+ if (length > SecondSubsecondSepIndex)
{
- int start = -1;
- int end = 20;
- while (end < length && (uint)(date[end] - AsciiZero) <=
AsciiDigitMaxIndex)
+ if (date[SecondSubsecondSepIndex] == AsciiPeriod)
{
- if (start == -1) start = end;
- end++;
+ int start = -1;
+ int end = SubsecondIndex;
+ while (end < length && (uint)(date[end] - AsciiZero) <=
AsciiDigitMaxIndex)
+ {
+ if (start == -1) start = end;
+ end++;
+ }
+ if (end < length)
+ {
+ // Indicates unrecognized trailing character(s)
+ dateValue = default;
+ return false;
+ }
+
+ int subSecondsLength = start != -1 ? end - start : 0;
+ if (subSecondsLength > 0)
+ {
+ if (!Utf8Parser.TryParse(date.Slice(start,
subSecondsLength), out int subSeconds, out _))
+ {
+ dateValue = default;
+ return false;
+ }
+
+ double factorOfMilliseconds = Math.Pow(10,
subSecondsLength - MillisecondDecimalPlaces);
+ long ticks = (long)(subSeconds *
(TimeSpan.TicksPerMillisecond / factorOfMilliseconds));
+ dateValue = dateValue.AddTicks(ticks);
+ }
}
- int subSeconds = 0;
- int subSecondsLength = start != -1 ? end - start : 0;
- if (subSecondsLength > 0)
+ else
{
-#if NETCOREAPP
- subSeconds = int.Parse(date.Slice(start,
subSecondsLength));
-#else
- subSeconds = int.Parse(date.Slice(start,
subSecondsLength).ToString());
-#endif
+ dateValue = default;
+ return false;
}
- double factorOfMilliseconds = Math.Pow(10, subSecondsLength -
3);
- long ticks = (long)(subSeconds * (TimeSpan.TicksPerMillisecond
/ factorOfMilliseconds));
- dateValue = dateValue.AddTicks(ticks);
}
- return dateValue;
+ return true;
}
}
}
diff --git a/csharp/test/Drivers/Apache/Hive2/DecimalUtilityTests.cs
b/csharp/test/Drivers/Apache/Hive2/DecimalUtilityTests.cs
index 1f5f2c534..467317c40 100644
--- a/csharp/test/Drivers/Apache/Hive2/DecimalUtilityTests.cs
+++ b/csharp/test/Drivers/Apache/Hive2/DecimalUtilityTests.cs
@@ -16,10 +16,12 @@
*/
using System;
+using System.Buffers.Text;
using System.Collections.Generic;
using System.Data.SqlTypes;
using System.Diagnostics;
using System.Globalization;
+using System.Text;
using Apache.Arrow.Adbc.Drivers.Apache.Hive2;
using Xunit;
using Xunit.Abstractions;
@@ -35,8 +37,9 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Hive2
[SkippableTheory]
[MemberData(nameof(Decimal128Data))]
- public void TestCanConvertDecimal(string value, int precision, int
scale, int byteWidth, byte[] expected, SqlDecimal? expectedDecimal = default)
+ public void TestCanConvertDecimal(string stringValue, int precision,
int scale, int byteWidth, byte[] expected, SqlDecimal? expectedDecimal =
default)
{
+ ReadOnlySpan<byte> value = Encoding.UTF8.GetBytes(stringValue);
byte[] actual = new byte[byteWidth];
DecimalUtility.GetBytes(value, precision, scale, byteWidth,
actual);
Assert.Equal(expected, actual);
@@ -56,25 +59,27 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Hive2
Stopwatch stopwatch = new();
int testCount = 1000000;
- string testValue = "99999999999999999999999999999999999999";
+ ReadOnlySpan<byte> testValue =
"99999999999999999999999999999999999999"u8;
+ string testValueString = "99999999999999999999999999999999999999";
int byteWidth = 16;
byte[] buffer = new byte[byteWidth];
- Decimal128Array.Builder builder = new Decimal128Array.Builder(new
Types.Decimal128Type(38, 0));
+ Decimal128Array.Builder builder = new(new Types.Decimal128Type(38,
0));
stopwatch.Restart();
for (int i = 0; i < testCount; i++)
{
- if (decimal.TryParse(testValue, NumberStyles.Float,
NumberFormatInfo.InvariantInfo, out var actualDecimal))
+ if (Utf8Parser.TryParse(testValue, out decimal actualDecimal,
out _, standardFormat: 'E'))
{
builder.Append(new SqlDecimal(actualDecimal));
}
else
{
- builder.Append(testValue);
+ builder.Append(testValueString);
}
}
stopwatch.Stop();
_outputHelper.WriteLine($"Decimal128Builder.Append: {testCount}
iterations took {stopwatch.ElapsedMilliseconds} elapsed milliseconds");
+ builder = new(new Types.Decimal128Type(38, 0));
stopwatch.Restart();
for (int i = 0; i < testCount; i++)
{
diff --git a/csharp/test/Drivers/Apache/Hive2/HiveServer2ReaderTest.cs
b/csharp/test/Drivers/Apache/Hive2/HiveServer2ReaderTest.cs
new file mode 100644
index 000000000..d784e2351
--- /dev/null
+++ b/csharp/test/Drivers/Apache/Hive2/HiveServer2ReaderTest.cs
@@ -0,0 +1,265 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+using System;
+using System.Globalization;
+using System.Text;
+using Apache.Arrow.Adbc.Drivers.Apache.Hive2;
+using Xunit;
+
+namespace Apache.Arrow.Adbc.Tests.Drivers.Apache.Hive2
+{
+ public class HiveServer2ReaderTest
+ {
+ private const bool IsValid = true;
+ private const bool IsNotValid = false;
+
+ [Theory]
+ [MemberData(nameof(GetDateTestData), /* isKnownFormat */ true)]
+ internal void TestCanConvertKnownFormatDate(string date, DateTime
expected, bool isValid)
+ {
+ ReadOnlySpan<byte> dateSpan =
Encoding.UTF8.GetBytes(date).AsSpan();
+ if (isValid)
+ {
+ Assert.True(HiveServer2Reader.TryParse(dateSpan, out DateTime
dateTime));
+ Assert.Equal(expected, dateTime);
+ }
+ else
+ {
+ Assert.False(HiveServer2Reader.TryParse(dateSpan, out DateTime
_));
+ }
+ }
+
+ [Theory]
+ [MemberData(nameof(GetDateTestData), /* isKnownFormat */ false)]
+ internal void TestCanConvertUnknownFormatDate(string date, DateTime
expected, bool isValid)
+ {
+ var builder = new StringArray.Builder();
+ builder.Append(date);
+ var stringArray = builder.Build();
+ if (isValid)
+ {
+ var dateArray = HiveServer2Reader.ConvertToDate32(stringArray,
stringArray.Data.DataType);
+ Assert.Equal(1, dateArray.Length);
+ Assert.Equal(expected, dateArray.GetDateTime(0));
+ }
+ else
+ {
+ Assert.Throws<FormatException>(() =>
HiveServer2Reader.ConvertToDate32(stringArray, stringArray.Data.DataType));
+ }
+ }
+
+ [Theory]
+ [MemberData(nameof(GetTimestampTestData), /* isKnownFormat */ true)]
+ internal void TestCanConvertKnownFormatTimestamp(string date,
DateTimeOffset expected, bool isValid)
+ {
+ ReadOnlySpan<byte> dateSpan =
Encoding.UTF8.GetBytes(date).AsSpan();
+ if (isValid)
+ {
+ Assert.True(HiveServer2Reader.TryParse(dateSpan, out
DateTimeOffset dateTime));
+ Assert.Equal(expected, dateTime);
+ }
+ else
+ {
+ Assert.False(HiveServer2Reader.TryParse(dateSpan, out
DateTimeOffset _));
+ }
+ }
+
+ [Theory]
+ [MemberData(nameof(GetTimestampTestData), /* isKnownFormat */ false)]
+ internal void TestCanConvertUnknownFormatTimestamp(string date,
DateTimeOffset expected, bool isValid)
+ {
+ var builder = new StringArray.Builder();
+ builder.Append(date);
+ var stringArray = builder.Build();
+ if (isValid)
+ {
+ TimestampArray timestampArray =
HiveServer2Reader.ConvertToTimestamp(stringArray, stringArray.Data.DataType);
+ Assert.Equal(1, timestampArray.Length);
+ Assert.Equal(expected, timestampArray.GetTimestamp(0));
+ }
+ else
+ {
+ Assert.Throws<FormatException>(() =>
HiveServer2Reader.ConvertToTimestamp(stringArray, stringArray.Data.DataType));
+ }
+ }
+
+ public static TheoryData<string, DateTime, bool> GetDateTestData(bool
isKnownFormat)
+ {
+ string[] dates =
+ [
+ "0001-01-01",
+ "0001-12-31",
+ "1970-01-01",
+ "2024-12-31",
+ "9999-12-31",
+ ];
+
+ var data = new TheoryData<string, DateTime, bool>();
+ foreach (string date in dates)
+ {
+ data.Add(date, DateTime.Parse(date,
CultureInfo.InvariantCulture), IsValid);
+ }
+
+ // Conditionally invalid component separators
+ string[] leadingSpaces = ["", " "];
+ string[] TrailingSpaces = ["", " "];
+ string[] separators = ["/", " "];
+ foreach (string leadingSpace in leadingSpaces)
+ {
+ foreach (string trailingSpace in TrailingSpaces)
+ {
+ foreach (string separator in separators)
+ {
+ foreach (string date in dates)
+ {
+ data.Add(leadingSpace + date.Replace("-",
separator) + trailingSpace, DateTime.Parse(date), !isKnownFormat);
+ }
+ }
+ }
+ }
+
+ // Always invalid for a date separator
+ separators = [":"];
+ foreach (string leadingSpace in leadingSpaces)
+ {
+ foreach (string trailingSpace in TrailingSpaces)
+ {
+ foreach (string separator in separators)
+ {
+ foreach (string date in dates)
+ {
+ data.Add(leadingSpace + date.Replace("-",
separator) + trailingSpace, default, IsNotValid);
+ }
+ }
+ }
+ }
+
+ string[] invalidDates =
+ [
+ "0001-01-00",
+ "0001-01-32",
+ "0001-02-30",
+ "0001-13-01",
+ "00a1-01-01",
+ "0001-a1-01",
+ "0001-01-a1",
+ "001a-01-01",
+ "0001-1a-01",
+ "0001-01-1a",
+ ];
+ foreach (string date in invalidDates)
+ {
+ data.Add(date, default, IsNotValid);
+ }
+
+ return data;
+ }
+
+ public static TheoryData<string, DateTimeOffset, bool>
GetTimestampTestData(bool isKnownFormat)
+ {
+ string[] dates =
+ [
+ "0001-01-01 00:00:00",
+ "9999-12-31 23:59:59",
+ "0001-01-01 00:00:00.1000000",
+ "0001-12-31 00:00:00.0100000",
+ "1970-01-01 00:00:00.0010000",
+ "2024-12-31 00:00:00.0001000",
+ "9999-12-31 00:00:00.0000100",
+ "9999-12-31 00:00:00.",
+ "9999-12-31 00:00:00.9",
+ "9999-12-31 00:00:00.99",
+ "9999-12-31 00:00:00.999",
+ "9999-12-31 00:00:00.9999",
+ "9999-12-31 00:00:00.99999",
+ "9999-12-31 00:00:00.999999",
+ "9999-12-31 00:00:00.999999",
+ "9999-12-31 00:00:00.9999990",
+ "9999-12-31 00:00:00.99999900",
+ ];
+
+ var data = new TheoryData<string, DateTimeOffset, bool>();
+ foreach (string date in dates)
+ {
+ data.Add(date, DateTimeOffset.Parse(date,
CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal), IsValid);
+ }
+
+ // Conditionally invalid component separators
+ string[] leadingSpaces = ["", " "];
+ string[] TrailingSpaces = ["", " "];
+ string[] dateSeparators = ["/", " "];
+ foreach (string leadingSpace in leadingSpaces)
+ {
+ foreach (string trailingSpace in TrailingSpaces)
+ {
+ foreach (string separator in dateSeparators)
+ {
+ foreach (string date in dates)
+ {
+ data.Add(
+ leadingSpace + date.Replace("-", separator) +
trailingSpace,
+ DateTimeOffset.Parse(date,
CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal),
+ !isKnownFormat);
+ }
+ }
+ }
+ }
+
+ // Always an invalid separator for date.
+ dateSeparators = [":"];
+ foreach (string leadingSpace in leadingSpaces)
+ {
+ foreach (string trailingSpace in TrailingSpaces)
+ {
+ foreach (string separator in dateSeparators)
+ {
+ foreach (string date in dates)
+ {
+ data.Add(leadingSpace + date.Replace("-",
separator) + trailingSpace, default, IsNotValid);
+ }
+ }
+ }
+ }
+
+ string[] invalidDates =
+ [
+ "0001-01-00 00:00:00",
+ "0001-01-32 00:00:00",
+ "0001-02-30 00:00:00",
+ "0001-13-01 00:00:00",
+ "abcd-13-01 00:00:00",
+ "0001-12-01 00:00:00.abc",
+ "00a1-01-01 00:00:00",
+ "0001-a1-01 00:00:00",
+ "0001-01-a1 00:00:00",
+ "0001-01-01 a0:00:00",
+ "0001-01-01 00:a0:00",
+ "0001-01-01 00:00:a0",
+ "001a-01-01 00:00:00",
+ "0010-1a-01 00:00:00",
+ "0010-10-1a 00:00:00",
+ ];
+ foreach (string date in invalidDates)
+ {
+ data.Add(date, default, IsNotValid);
+ }
+
+ return data;
+ }
+ }
+}