adamreeve commented on code in PR #379:
URL: https://github.com/apache/arrow-dotnet/pull/379#discussion_r3509990458
##########
Apache.Arrow.sln:
##########
@@ -1,3 +1,4 @@
+
Review Comment:
This extra blank line should be removed.
##########
Apache.Arrow.sln:
##########
@@ -264,10 +275,51 @@ Global
{73EBE132-AE05-4C32-9525-515F4768156B}.Release|x64.Build.0 =
Release|Any CPU
{73EBE132-AE05-4C32-9525-515F4768156B}.Release|x86.ActiveCfg =
Release|Any CPU
{73EBE132-AE05-4C32-9525-515F4768156B}.Release|x86.Build.0 =
Release|Any CPU
+ {3F9AEB1B-814A-4B82-8E0A-481AEA0CBDD0}.Debug|Any CPU.ActiveCfg
= Debug|Any CPU
+ {3F9AEB1B-814A-4B82-8E0A-481AEA0CBDD0}.Debug|Any CPU.Build.0 =
Debug|Any CPU
+ {3F9AEB1B-814A-4B82-8E0A-481AEA0CBDD0}.Debug|x64.ActiveCfg =
Debug|Any CPU
+ {3F9AEB1B-814A-4B82-8E0A-481AEA0CBDD0}.Debug|x64.Build.0 =
Debug|Any CPU
+ {3F9AEB1B-814A-4B82-8E0A-481AEA0CBDD0}.Debug|x86.ActiveCfg =
Debug|Any CPU
+ {3F9AEB1B-814A-4B82-8E0A-481AEA0CBDD0}.Debug|x86.Build.0 =
Debug|Any CPU
+ {3F9AEB1B-814A-4B82-8E0A-481AEA0CBDD0}.Release|Any
CPU.ActiveCfg = Release|Any CPU
+ {3F9AEB1B-814A-4B82-8E0A-481AEA0CBDD0}.Release|Any CPU.Build.0
= Release|Any CPU
+ {3F9AEB1B-814A-4B82-8E0A-481AEA0CBDD0}.Release|x64.ActiveCfg =
Release|Any CPU
+ {3F9AEB1B-814A-4B82-8E0A-481AEA0CBDD0}.Release|x64.Build.0 =
Release|Any CPU
+ {3F9AEB1B-814A-4B82-8E0A-481AEA0CBDD0}.Release|x86.ActiveCfg =
Release|Any CPU
+ {3F9AEB1B-814A-4B82-8E0A-481AEA0CBDD0}.Release|x86.Build.0 =
Release|Any CPU
+ {E183F970-90FA-49F1-B339-7827171A3B02}.Debug|Any CPU.ActiveCfg
= Debug|Any CPU
+ {E183F970-90FA-49F1-B339-7827171A3B02}.Debug|Any CPU.Build.0 =
Debug|Any CPU
+ {E183F970-90FA-49F1-B339-7827171A3B02}.Debug|x64.ActiveCfg =
Debug|Any CPU
+ {E183F970-90FA-49F1-B339-7827171A3B02}.Debug|x64.Build.0 =
Debug|Any CPU
+ {E183F970-90FA-49F1-B339-7827171A3B02}.Debug|x86.ActiveCfg =
Debug|Any CPU
+ {E183F970-90FA-49F1-B339-7827171A3B02}.Debug|x86.Build.0 =
Debug|Any CPU
+ {E183F970-90FA-49F1-B339-7827171A3B02}.Release|Any
CPU.ActiveCfg = Release|Any CPU
+ {E183F970-90FA-49F1-B339-7827171A3B02}.Release|Any CPU.Build.0
= Release|Any CPU
+ {E183F970-90FA-49F1-B339-7827171A3B02}.Release|x64.ActiveCfg =
Release|Any CPU
+ {E183F970-90FA-49F1-B339-7827171A3B02}.Release|x64.Build.0 =
Release|Any CPU
+ {E183F970-90FA-49F1-B339-7827171A3B02}.Release|x86.ActiveCfg =
Release|Any CPU
+ {E183F970-90FA-49F1-B339-7827171A3B02}.Release|x86.Build.0 =
Release|Any CPU
+ {C9657321-6DFA-411A-AD42-D51C51D0FAF0}.Debug|Any CPU.ActiveCfg
= Debug|Any CPU
+ {C9657321-6DFA-411A-AD42-D51C51D0FAF0}.Debug|Any CPU.Build.0 =
Debug|Any CPU
+ {C9657321-6DFA-411A-AD42-D51C51D0FAF0}.Debug|x64.ActiveCfg =
Debug|Any CPU
+ {C9657321-6DFA-411A-AD42-D51C51D0FAF0}.Debug|x64.Build.0 =
Debug|Any CPU
+ {C9657321-6DFA-411A-AD42-D51C51D0FAF0}.Debug|x86.ActiveCfg =
Debug|Any CPU
+ {C9657321-6DFA-411A-AD42-D51C51D0FAF0}.Debug|x86.Build.0 =
Debug|Any CPU
+ {C9657321-6DFA-411A-AD42-D51C51D0FAF0}.Release|Any
CPU.ActiveCfg = Release|Any CPU
+ {C9657321-6DFA-411A-AD42-D51C51D0FAF0}.Release|Any CPU.Build.0
= Release|Any CPU
+ {C9657321-6DFA-411A-AD42-D51C51D0FAF0}.Release|x64.ActiveCfg =
Release|Any CPU
+ {C9657321-6DFA-411A-AD42-D51C51D0FAF0}.Release|x64.Build.0 =
Release|Any CPU
+ {C9657321-6DFA-411A-AD42-D51C51D0FAF0}.Release|x86.ActiveCfg =
Release|Any CPU
+ {C9657321-6DFA-411A-AD42-D51C51D0FAF0}.Release|x86.Build.0 =
Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
+ GlobalSection(NestedProjects) = preSolution
Review Comment:
This nests the new projects under "src" and "test" folders, but no other
projects are nested like this. Can you either remove the nesting or make other
projects also nested for consistency.
##########
src/Apache.Arrow.Compute/Aggregations.cs:
##########
@@ -0,0 +1,420 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+#if NET8_0_OR_GREATER
+using System.Numerics;
+using System.Numerics.Tensors;
+#endif
+
+namespace Apache.Arrow.Compute
+{
+ /// <summary>
+ /// Aggregation kernels over <see cref="PrimitiveArray{T}"/>
(Sum/Min/Max/Mean).
+ /// </summary>
+ /// <remarks>
+ /// <para>
+ /// Null handling follows LINQ semantics: null entries are skipped and do
not contribute to the
+ /// result. <c>Sum</c> of an empty or all-null array returns zero.
<c>Min</c>, <c>Max</c> and
+ /// <c>Mean</c> throw <see cref="InvalidOperationException"/> when the
array contains no non-null
+ /// elements, matching <see
cref="System.Linq.Enumerable.Min{TSource}(System.Collections.Generic.IEnumerable{TSource})"/>
+ /// and <see
cref="System.Linq.Enumerable.Average(System.Collections.Generic.IEnumerable{int})"/>.
+ /// </para>
+ /// <para>
+ /// On net8.0 and later the kernels are generic over <see
cref="INumber{TSelf}"/> and, when the
+ /// array has no nulls, dispatch to <see cref="TensorPrimitives"/> for a
SIMD-accelerated single
+ /// pass over the contiguous values buffer; when nulls are present they
fall back to a correct,
+ /// validity-aware scalar loop. On netstandard2.0 and net462 (where
generic math and
+ /// <see cref="TensorPrimitives"/> are unavailable) the kernels are
provided as per-type overloads
+ /// (<see cref="Int32Array"/>, <see cref="Int64Array"/>, <see
cref="FloatArray"/>,
+ /// <see cref="DoubleArray"/>) backed by scalar loops with the same null
semantics.
+ /// </para>
+ /// </remarks>
+ public static class Aggregations
+ {
+#if NET8_0_OR_GREATER
+ /// <summary>Sums the non-null elements. Returns zero for an empty or
all-null array.</summary>
+ public static T Sum<T>(this PrimitiveArray<T> array)
+ where T : unmanaged, INumber<T>
+ {
+ if (array is null) throw new ArgumentNullException(nameof(array));
+
+ ReadOnlySpan<T> values = array.Values;
+
+ if (array.NullCount == 0)
+ {
+ return TensorPrimitives.Sum(values);
+ }
+
+ T acc = T.Zero;
+ for (int i = 0; i < values.Length; i++)
+ {
+ if (array.IsValid(i))
+ {
+ acc += values[i];
+ }
+ }
+ return acc;
+ }
+
+ /// <summary>Returns the smallest non-null element.</summary>
+ /// <exception cref="InvalidOperationException">The array contains no
non-null elements.</exception>
+ public static T Min<T>(this PrimitiveArray<T> array)
+ where T : unmanaged, INumber<T>
+ {
+ if (array is null) throw new ArgumentNullException(nameof(array));
+
+ ReadOnlySpan<T> values = array.Values;
+
+ if (values.Length == 0 || array.Length - array.NullCount == 0)
+ {
+ throw new InvalidOperationException("Sequence contains no
non-null elements.");
+ }
+
+ if (array.NullCount == 0)
+ {
+ return TensorPrimitives.Min(values);
+ }
+
+ bool set = false;
+ T min = T.Zero;
Review Comment:
If you add an `IMinMaxValue<T>` constraint, you could initialize min to
`T.MaxValue` and not need `set`. And similarly for Max.
##########
src/Apache.Arrow.Compute/Aggregations.cs:
##########
@@ -0,0 +1,420 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+#if NET8_0_OR_GREATER
+using System.Numerics;
+using System.Numerics.Tensors;
+#endif
+
+namespace Apache.Arrow.Compute
+{
+ /// <summary>
+ /// Aggregation kernels over <see cref="PrimitiveArray{T}"/>
(Sum/Min/Max/Mean).
+ /// </summary>
+ /// <remarks>
+ /// <para>
+ /// Null handling follows LINQ semantics: null entries are skipped and do
not contribute to the
+ /// result. <c>Sum</c> of an empty or all-null array returns zero.
<c>Min</c>, <c>Max</c> and
+ /// <c>Mean</c> throw <see cref="InvalidOperationException"/> when the
array contains no non-null
+ /// elements, matching <see
cref="System.Linq.Enumerable.Min{TSource}(System.Collections.Generic.IEnumerable{TSource})"/>
+ /// and <see
cref="System.Linq.Enumerable.Average(System.Collections.Generic.IEnumerable{int})"/>.
+ /// </para>
+ /// <para>
+ /// On net8.0 and later the kernels are generic over <see
cref="INumber{TSelf}"/> and, when the
+ /// array has no nulls, dispatch to <see cref="TensorPrimitives"/> for a
SIMD-accelerated single
+ /// pass over the contiguous values buffer; when nulls are present they
fall back to a correct,
+ /// validity-aware scalar loop. On netstandard2.0 and net462 (where
generic math and
+ /// <see cref="TensorPrimitives"/> are unavailable) the kernels are
provided as per-type overloads
+ /// (<see cref="Int32Array"/>, <see cref="Int64Array"/>, <see
cref="FloatArray"/>,
+ /// <see cref="DoubleArray"/>) backed by scalar loops with the same null
semantics.
+ /// </para>
+ /// </remarks>
+ public static class Aggregations
+ {
+#if NET8_0_OR_GREATER
+ /// <summary>Sums the non-null elements. Returns zero for an empty or
all-null array.</summary>
+ public static T Sum<T>(this PrimitiveArray<T> array)
+ where T : unmanaged, INumber<T>
+ {
+ if (array is null) throw new ArgumentNullException(nameof(array));
+
+ ReadOnlySpan<T> values = array.Values;
+
+ if (array.NullCount == 0)
+ {
+ return TensorPrimitives.Sum(values);
+ }
+
+ T acc = T.Zero;
+ for (int i = 0; i < values.Length; i++)
+ {
+ if (array.IsValid(i))
+ {
+ acc += values[i];
+ }
+ }
+ return acc;
+ }
+
+ /// <summary>Returns the smallest non-null element.</summary>
+ /// <exception cref="InvalidOperationException">The array contains no
non-null elements.</exception>
+ public static T Min<T>(this PrimitiveArray<T> array)
+ where T : unmanaged, INumber<T>
+ {
+ if (array is null) throw new ArgumentNullException(nameof(array));
+
+ ReadOnlySpan<T> values = array.Values;
+
+ if (values.Length == 0 || array.Length - array.NullCount == 0)
+ {
+ throw new InvalidOperationException("Sequence contains no
non-null elements.");
+ }
+
+ if (array.NullCount == 0)
+ {
+ return TensorPrimitives.Min(values);
+ }
+
+ bool set = false;
+ T min = T.Zero;
+ for (int i = 0; i < values.Length; i++)
+ {
+ if (!array.IsValid(i)) continue;
+ if (!set) { min = values[i]; set = true; }
+ else if (values[i] < min) { min = values[i]; }
+ }
+ return min;
+ }
+
+ /// <summary>Returns the largest non-null element.</summary>
+ /// <exception cref="InvalidOperationException">The array contains no
non-null elements.</exception>
+ public static T Max<T>(this PrimitiveArray<T> array)
+ where T : unmanaged, INumber<T>
+ {
+ if (array is null) throw new ArgumentNullException(nameof(array));
+
+ ReadOnlySpan<T> values = array.Values;
+
+ if (values.Length == 0 || array.Length - array.NullCount == 0)
+ {
+ throw new InvalidOperationException("Sequence contains no
non-null elements.");
+ }
+
+ if (array.NullCount == 0)
+ {
+ return TensorPrimitives.Max(values);
+ }
+
+ bool set = false;
+ T max = T.Zero;
+ for (int i = 0; i < values.Length; i++)
+ {
+ if (!array.IsValid(i)) continue;
+ if (!set) { max = values[i]; set = true; }
+ else if (values[i] > max) { max = values[i]; }
+ }
+ return max;
+ }
+
+ /// <summary>Returns the arithmetic mean of the non-null elements as a
<see cref="double"/>.</summary>
+ /// <exception cref="InvalidOperationException">The array contains no
non-null elements.</exception>
+ public static double Mean<T>(this PrimitiveArray<T> array)
+ where T : unmanaged, INumber<T>
+ {
+ if (array is null) throw new ArgumentNullException(nameof(array));
+
+ long count = array.Length - array.NullCount;
+ if (count == 0)
+ {
+ throw new InvalidOperationException("Sequence contains no
non-null elements.");
+ }
+
+ T sum = array.Sum();
+ return double.CreateChecked(sum) / count;
+ }
+#else
+ // netstandard2.0 / net462 fallback: generic math and TensorPrimitives
are unavailable, so the
+ // kernels are provided as per-type overloads backed by validity-aware
scalar loops. The null
+ // semantics match the generic net8.0+ implementation above.
+
+ private const string NoElements = "Sequence contains no non-null
elements.";
Review Comment:
This could be moved above the `#if NET8_0_OR_GREATER` and reused for the
.NET 8 implementations too.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]