CurtHagenlocher commented on code in PR #257: URL: https://github.com/apache/arrow-dotnet/pull/257#discussion_r2780517768
########## src/Apache.Arrow.Operations/Apache.Arrow.Operations.csproj: ########## @@ -0,0 +1,16 @@ +<Project Sdk="Microsoft.NET.Sdk"> + + <PropertyGroup> + </PropertyGroup> + + <PropertyGroup> + <TargetFrameworks>net8.0</TargetFrameworks> Review Comment: Okay, we obviously wouldn't be able to use the `INumber`-based methods and would have to replace them with individual implementations by type. That could perhaps be done another time. ########## src/Apache.Arrow.Operations/Comparison.cs: ########## @@ -0,0 +1,390 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +using System; +using System.Numerics; +using Apache.Arrow; +using Apache.Arrow.Memory; +using Apache.Arrow.Types; + +namespace Apache.Arrow.Operations; + +public static class Comparison +{ + /// <summary> + /// Negate a boolean array, flipping true to false, false to true. Nulls remain null + /// </summary> + /// <param name="mask"></param> + /// <param name="allocator"></param> + /// <returns></returns> + public static BooleanArray Invert(BooleanArray mask, MemoryAllocator? allocator = null) + { + var builder = new BooleanArray.Builder(); + builder.Reserve(mask.Length); + foreach (var val in mask) + { + if (val != null) + { + builder.Append(!(bool)val); + } + else + { + builder.AppendNull(); + } + } + return builder.Build(allocator); + } + + /// <summary> + /// Perform a pairwise boolean AND operation. + /// </summary> + /// <param name="lhs"></param> + /// <param name="rhs"></param> + /// <param name="allocator"></param> + /// <returns></returns> + /// <exception cref="InvalidOperationException"></exception> + public static BooleanArray And(BooleanArray lhs, BooleanArray rhs, MemoryAllocator? allocator = null) + { + if (lhs.Length != rhs.Length) throw new InvalidOperationException("Arrays must have the same length"); Review Comment: Maybe `ArgumentException` is more appropriate for this kind of error? (I'm not sure.) ########## src/Apache.Arrow.Operations/Comparison.cs: ########## @@ -0,0 +1,390 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +using System; +using System.Numerics; +using Apache.Arrow; +using Apache.Arrow.Memory; +using Apache.Arrow.Types; + +namespace Apache.Arrow.Operations; + +public static class Comparison +{ + /// <summary> + /// Negate a boolean array, flipping true to false, false to true. Nulls remain null + /// </summary> + /// <param name="mask"></param> + /// <param name="allocator"></param> + /// <returns></returns> + public static BooleanArray Invert(BooleanArray mask, MemoryAllocator? allocator = null) + { + var builder = new BooleanArray.Builder(); + builder.Reserve(mask.Length); + foreach (var val in mask) + { + if (val != null) + { + builder.Append(!(bool)val); + } + else + { + builder.AppendNull(); + } + } + return builder.Build(allocator); + } + + /// <summary> + /// Perform a pairwise boolean AND operation. + /// </summary> + /// <param name="lhs"></param> + /// <param name="rhs"></param> + /// <param name="allocator"></param> + /// <returns></returns> + /// <exception cref="InvalidOperationException"></exception> + public static BooleanArray And(BooleanArray lhs, BooleanArray rhs, MemoryAllocator? allocator = null) + { + if (lhs.Length != rhs.Length) throw new InvalidOperationException("Arrays must have the same length"); + var builder = new BooleanArray.Builder(); + builder.Reserve(lhs.Length); + for (int i = 0; i < lhs.Length; i++) + { + var a = lhs.GetValue(i); + var b = rhs.GetValue(i); + if (a != null && b != null) + { + builder.Append((bool)a && (bool)b); + } + else + { + builder.AppendNull(); + } + } + return builder.Build(allocator); + } + + /// <summary> + /// Performa a pairwise boolean OR operation. + /// </summary> + /// <param name="lhs"></param> + /// <param name="rhs"></param> + /// <param name="allocator"></param> + /// <returns></returns> + /// <exception cref="InvalidOperationException"></exception> + public static BooleanArray Or(BooleanArray lhs, BooleanArray rhs, MemoryAllocator? allocator = null) + { + if (lhs.Length != rhs.Length) throw new InvalidOperationException("Arrays must have the same length"); + var builder = new BooleanArray.Builder(); + builder.Reserve(lhs.Length); + for (int i = 0; i < lhs.Length; i++) + { + var a = lhs.GetValue(i); + var b = rhs.GetValue(i); + if (a != null && b != null) + { + builder.Append((bool)a || (bool)b); + } + else + { + builder.AppendNull(); + } + } + return builder.Build(allocator); + } + + /// <summary> + /// Performa a pairwise boolean XOR operation. + /// </summary> + /// <param name="lhs"></param> + /// <param name="rhs"></param> + /// <param name="allocator"></param> + /// <returns></returns> + /// <exception cref="InvalidOperationException"></exception> + public static BooleanArray Xor(BooleanArray lhs, BooleanArray rhs, MemoryAllocator? allocator = null) + { + if (lhs.Length != rhs.Length) throw new InvalidOperationException("Arrays must have the same length"); + var builder = new BooleanArray.Builder(); + builder.Reserve(lhs.Length); + for (int i = 0; i < lhs.Length; i++) + { + var a = lhs.GetValue(i); + var b = rhs.GetValue(i); + if (a != null && b != null) + { + builder.Append((bool)a ^ (bool)b); + } + else + { + builder.AppendNull(); + } + } + return builder.Build(allocator); + } + + /// <summary> + /// Compare each value in `lhs` to a scalar `rhs`, returning boolean mask + /// </summary> + /// <typeparam name="T"></typeparam> + /// <param name="lhs"></param> + /// <param name="rhs"></param> + /// <param name="allocator"></param> + /// <returns></returns> + public static BooleanArray Equal<T>(PrimitiveArray<T> lhs, T rhs, MemoryAllocator? allocator = null) where T : struct, INumber<T> + { + var cmp = new BooleanArray.Builder(); + for (int i = 0; i < lhs.Length; i++) + { + var a = lhs.GetValue(i); + var flag = a == rhs; + cmp.Append(flag); + } + return cmp.Build(allocator); + } + + /// <summary> + /// Perform a pairwise comparison between each position in `lhs` and `rhs`, returning a boolean mask + /// </summary> + /// <typeparam name="T"></typeparam> + /// <param name="lhs"></param> + /// <param name="rhs"></param> + /// <param name="allocator"></param> + /// <returns></returns> + /// <exception cref="InvalidOperationException"></exception> + public static BooleanArray Equal<T>(PrimitiveArray<T> lhs, PrimitiveArray<T> rhs, MemoryAllocator? allocator = null) where T : struct, INumber<T> + { + var cmp = new BooleanArray.Builder(); Review Comment: Nit: move the allocation until after the length check. Also applies to a few other methods. ########## src/Apache.Arrow.Operations/Comparison.cs: ########## @@ -0,0 +1,390 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +using System; +using System.Numerics; +using Apache.Arrow; +using Apache.Arrow.Memory; +using Apache.Arrow.Types; + +namespace Apache.Arrow.Operations; + +public static class Comparison +{ + /// <summary> + /// Negate a boolean array, flipping true to false, false to true. Nulls remain null + /// </summary> + /// <param name="mask"></param> + /// <param name="allocator"></param> + /// <returns></returns> + public static BooleanArray Invert(BooleanArray mask, MemoryAllocator? allocator = null) + { + var builder = new BooleanArray.Builder(); + builder.Reserve(mask.Length); + foreach (var val in mask) + { + if (val != null) + { + builder.Append(!(bool)val); + } + else + { + builder.AppendNull(); + } + } + return builder.Build(allocator); + } + + /// <summary> + /// Perform a pairwise boolean AND operation. + /// </summary> + /// <param name="lhs"></param> + /// <param name="rhs"></param> + /// <param name="allocator"></param> + /// <returns></returns> + /// <exception cref="InvalidOperationException"></exception> + public static BooleanArray And(BooleanArray lhs, BooleanArray rhs, MemoryAllocator? allocator = null) + { + if (lhs.Length != rhs.Length) throw new InvalidOperationException("Arrays must have the same length"); + var builder = new BooleanArray.Builder(); + builder.Reserve(lhs.Length); + for (int i = 0; i < lhs.Length; i++) + { + var a = lhs.GetValue(i); + var b = rhs.GetValue(i); + if (a != null && b != null) + { + builder.Append((bool)a && (bool)b); + } + else + { + builder.AppendNull(); + } + } + return builder.Build(allocator); + } + + /// <summary> + /// Performa a pairwise boolean OR operation. + /// </summary> + /// <param name="lhs"></param> + /// <param name="rhs"></param> + /// <param name="allocator"></param> + /// <returns></returns> + /// <exception cref="InvalidOperationException"></exception> + public static BooleanArray Or(BooleanArray lhs, BooleanArray rhs, MemoryAllocator? allocator = null) + { + if (lhs.Length != rhs.Length) throw new InvalidOperationException("Arrays must have the same length"); + var builder = new BooleanArray.Builder(); + builder.Reserve(lhs.Length); + for (int i = 0; i < lhs.Length; i++) + { + var a = lhs.GetValue(i); + var b = rhs.GetValue(i); + if (a != null && b != null) + { + builder.Append((bool)a || (bool)b); + } + else + { + builder.AppendNull(); + } + } + return builder.Build(allocator); + } + + /// <summary> + /// Performa a pairwise boolean XOR operation. + /// </summary> + /// <param name="lhs"></param> + /// <param name="rhs"></param> + /// <param name="allocator"></param> + /// <returns></returns> + /// <exception cref="InvalidOperationException"></exception> + public static BooleanArray Xor(BooleanArray lhs, BooleanArray rhs, MemoryAllocator? allocator = null) + { + if (lhs.Length != rhs.Length) throw new InvalidOperationException("Arrays must have the same length"); + var builder = new BooleanArray.Builder(); + builder.Reserve(lhs.Length); + for (int i = 0; i < lhs.Length; i++) + { + var a = lhs.GetValue(i); + var b = rhs.GetValue(i); + if (a != null && b != null) + { + builder.Append((bool)a ^ (bool)b); + } + else + { + builder.AppendNull(); + } + } + return builder.Build(allocator); + } + + /// <summary> + /// Compare each value in `lhs` to a scalar `rhs`, returning boolean mask + /// </summary> + /// <typeparam name="T"></typeparam> + /// <param name="lhs"></param> + /// <param name="rhs"></param> + /// <param name="allocator"></param> + /// <returns></returns> + public static BooleanArray Equal<T>(PrimitiveArray<T> lhs, T rhs, MemoryAllocator? allocator = null) where T : struct, INumber<T> + { + var cmp = new BooleanArray.Builder(); + for (int i = 0; i < lhs.Length; i++) + { + var a = lhs.GetValue(i); + var flag = a == rhs; + cmp.Append(flag); + } + return cmp.Build(allocator); + } + + /// <summary> + /// Perform a pairwise comparison between each position in `lhs` and `rhs`, returning a boolean mask + /// </summary> + /// <typeparam name="T"></typeparam> + /// <param name="lhs"></param> + /// <param name="rhs"></param> + /// <param name="allocator"></param> + /// <returns></returns> + /// <exception cref="InvalidOperationException"></exception> + public static BooleanArray Equal<T>(PrimitiveArray<T> lhs, PrimitiveArray<T> rhs, MemoryAllocator? allocator = null) where T : struct, INumber<T> + { + var cmp = new BooleanArray.Builder(); + if (lhs.Length != rhs.Length) throw new InvalidOperationException("Arrays must have the same length"); + for (int i = 0; i < lhs.Length; i++) + { + var a = lhs.GetValue(i); + var b = rhs.GetValue(i); + var flag = a == b; + cmp.Append(flag); + } + return cmp.Build(allocator); + } + + /// <summary> + /// Compare each value in `lhs` to a scalar `rhs`, returning boolean mask + /// </summary> + /// <param name="lhs"></param> + /// <param name="rhs"></param> + /// <param name="allocator"></param> + /// <returns></returns> + public static BooleanArray Equal(StringArray lhs, string rhs, MemoryAllocator? allocator = null) + { + var cmp = new BooleanArray.Builder(); + for (int i = 0; i < lhs.Length; i++) + { + var a = lhs.GetString(i); + var flag = a == rhs; + cmp.Append(flag); + } + return cmp.Build(allocator); + } + + /// <summary> + /// Perform a pairwise comparison between each position in `lhs` and `rhs`, returning a boolean mask + /// </summary> + /// <param name="lhs"></param> + /// <param name="rhs"></param> + /// <param name="allocator"></param> + /// <returns></returns> + /// <exception cref="InvalidOperationException"></exception> + public static BooleanArray Equal(StringArray lhs, StringArray rhs, MemoryAllocator? allocator = null) + { + var cmp = new BooleanArray.Builder(); + if (lhs.Length != rhs.Length) throw new InvalidOperationException("Arrays must have the same length"); + for (int i = 0; i < lhs.Length; i++) + { + var a = lhs.GetString(i); + var b = rhs.GetString(i); + var flag = a == b; + cmp.Append(flag); + } + return cmp.Build(allocator); + } + + /// <summary> + /// Compare each value in `lhs` to a scalar `rhs`, returning boolean mask + /// </summary> + /// <param name="lhs"></param> + /// <param name="rhs"></param> + /// <param name="allocator"></param> + /// <returns></returns> + public static BooleanArray Equal(LargeStringArray lhs, string rhs, MemoryAllocator? allocator = null) + { + var cmp = new BooleanArray.Builder(); + for (int i = 0; i < lhs.Length; i++) + { + var a = lhs.GetString(i); + var flag = a == rhs; + cmp.Append(flag); + } + return cmp.Build(allocator); + } + + /// <summary> + /// Perform a pairwise comparison between each position in `lhs` and `rhs`, returning a boolean mask + /// </summary> + /// <param name="lhs"></param> + /// <param name="rhs"></param> + /// <param name="allocator"></param> + /// <returns></returns> + /// <exception cref="InvalidOperationException"></exception> + public static BooleanArray Equal(LargeStringArray lhs, LargeStringArray rhs, MemoryAllocator? allocator = null) + { + var cmp = new BooleanArray.Builder(); + if (lhs.Length != rhs.Length) throw new InvalidOperationException("Arrays must have the same length"); + for (int i = 0; i < lhs.Length; i++) + { + var a = lhs.GetString(i); + var b = rhs.GetString(i); + var flag = a == b; + cmp.Append(flag); + } + return cmp.Build(allocator); + } + + /// <summary> + /// A dispatching comparison between a string array and a single string. If the `lhs` is not some flavor + /// of string array, an exception is thrown. + /// </summary> + /// <param name="lhs"></param> + /// <param name="rhs"></param> + /// <param name="allocator"></param> + /// <returns></returns> + /// <exception cref="InvalidDataException"></exception> + public static BooleanArray Equal(IArrowArray lhs, string rhs, MemoryAllocator? allocator = null) + { + switch (lhs.Data.DataType.TypeId) + { + case ArrowTypeId.String: + return Equal((StringArray)lhs, rhs, allocator); + case ArrowTypeId.LargeString: + return Equal((LargeStringArray)lhs, rhs, allocator); + default: + throw new InvalidDataException("Unsupported data type " + lhs.Data.DataType.Name); + } + } + + public static BooleanArray GreaterThan<T>(PrimitiveArray<T> lhs, T rhs, MemoryAllocator? allocator = null) where T : struct, INumber<T> Review Comment: The `INumber`-based methods don't seem to take null values into account either. ########## src/Apache.Arrow.Operations/Apache.Arrow.Operations.csproj: ########## @@ -0,0 +1,16 @@ +<Project Sdk="Microsoft.NET.Sdk"> + + <PropertyGroup> + </PropertyGroup> + + <PropertyGroup> + <TargetFrameworks>net8.0</TargetFrameworks> + <ImplicitUsings>enable</ImplicitUsings> + <Nullable>enable</Nullable> Review Comment: The other projects don't enable `Nullable` yet but there's no good reason not to enable it here. They also don't enable `ImplicitUsings` but I don't have the experience to know why we would want or not want it. ########## src/Apache.Arrow.Operations/Comparison.cs: ########## @@ -0,0 +1,390 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +using System; +using System.Numerics; +using Apache.Arrow; +using Apache.Arrow.Memory; +using Apache.Arrow.Types; + +namespace Apache.Arrow.Operations; + +public static class Comparison +{ + /// <summary> + /// Negate a boolean array, flipping true to false, false to true. Nulls remain null + /// </summary> + /// <param name="mask"></param> + /// <param name="allocator"></param> + /// <returns></returns> + public static BooleanArray Invert(BooleanArray mask, MemoryAllocator? allocator = null) + { + var builder = new BooleanArray.Builder(); + builder.Reserve(mask.Length); + foreach (var val in mask) + { + if (val != null) + { + builder.Append(!(bool)val); + } + else + { + builder.AppendNull(); + } + } + return builder.Build(allocator); + } + + /// <summary> + /// Perform a pairwise boolean AND operation. + /// </summary> + /// <param name="lhs"></param> + /// <param name="rhs"></param> + /// <param name="allocator"></param> + /// <returns></returns> + /// <exception cref="InvalidOperationException"></exception> + public static BooleanArray And(BooleanArray lhs, BooleanArray rhs, MemoryAllocator? allocator = null) + { + if (lhs.Length != rhs.Length) throw new InvalidOperationException("Arrays must have the same length"); + var builder = new BooleanArray.Builder(); + builder.Reserve(lhs.Length); + for (int i = 0; i < lhs.Length; i++) + { + var a = lhs.GetValue(i); + var b = rhs.GetValue(i); + if (a != null && b != null) + { + builder.Append((bool)a && (bool)b); + } + else + { + builder.AppendNull(); + } + } + return builder.Build(allocator); + } + + /// <summary> + /// Performa a pairwise boolean OR operation. + /// </summary> + /// <param name="lhs"></param> + /// <param name="rhs"></param> + /// <param name="allocator"></param> + /// <returns></returns> + /// <exception cref="InvalidOperationException"></exception> + public static BooleanArray Or(BooleanArray lhs, BooleanArray rhs, MemoryAllocator? allocator = null) + { + if (lhs.Length != rhs.Length) throw new InvalidOperationException("Arrays must have the same length"); + var builder = new BooleanArray.Builder(); + builder.Reserve(lhs.Length); + for (int i = 0; i < lhs.Length; i++) + { + var a = lhs.GetValue(i); + var b = rhs.GetValue(i); + if (a != null && b != null) + { + builder.Append((bool)a || (bool)b); + } + else + { + builder.AppendNull(); + } + } + return builder.Build(allocator); + } + + /// <summary> + /// Performa a pairwise boolean XOR operation. + /// </summary> + /// <param name="lhs"></param> + /// <param name="rhs"></param> + /// <param name="allocator"></param> + /// <returns></returns> + /// <exception cref="InvalidOperationException"></exception> + public static BooleanArray Xor(BooleanArray lhs, BooleanArray rhs, MemoryAllocator? allocator = null) + { + if (lhs.Length != rhs.Length) throw new InvalidOperationException("Arrays must have the same length"); + var builder = new BooleanArray.Builder(); + builder.Reserve(lhs.Length); + for (int i = 0; i < lhs.Length; i++) + { + var a = lhs.GetValue(i); + var b = rhs.GetValue(i); + if (a != null && b != null) + { + builder.Append((bool)a ^ (bool)b); + } + else + { + builder.AppendNull(); + } + } + return builder.Build(allocator); + } + + /// <summary> + /// Compare each value in `lhs` to a scalar `rhs`, returning boolean mask + /// </summary> + /// <typeparam name="T"></typeparam> + /// <param name="lhs"></param> + /// <param name="rhs"></param> + /// <param name="allocator"></param> + /// <returns></returns> + public static BooleanArray Equal<T>(PrimitiveArray<T> lhs, T rhs, MemoryAllocator? allocator = null) where T : struct, INumber<T> + { + var cmp = new BooleanArray.Builder(); + for (int i = 0; i < lhs.Length; i++) + { + var a = lhs.GetValue(i); + var flag = a == rhs; + cmp.Append(flag); + } + return cmp.Build(allocator); + } + + /// <summary> + /// Perform a pairwise comparison between each position in `lhs` and `rhs`, returning a boolean mask + /// </summary> + /// <typeparam name="T"></typeparam> + /// <param name="lhs"></param> + /// <param name="rhs"></param> + /// <param name="allocator"></param> + /// <returns></returns> + /// <exception cref="InvalidOperationException"></exception> + public static BooleanArray Equal<T>(PrimitiveArray<T> lhs, PrimitiveArray<T> rhs, MemoryAllocator? allocator = null) where T : struct, INumber<T> + { + var cmp = new BooleanArray.Builder(); + if (lhs.Length != rhs.Length) throw new InvalidOperationException("Arrays must have the same length"); + for (int i = 0; i < lhs.Length; i++) + { + var a = lhs.GetValue(i); + var b = rhs.GetValue(i); + var flag = a == b; + cmp.Append(flag); + } + return cmp.Build(allocator); + } + + /// <summary> + /// Compare each value in `lhs` to a scalar `rhs`, returning boolean mask + /// </summary> + /// <param name="lhs"></param> + /// <param name="rhs"></param> + /// <param name="allocator"></param> + /// <returns></returns> + public static BooleanArray Equal(StringArray lhs, string rhs, MemoryAllocator? allocator = null) Review Comment: Should `rhs` be nullable to allow a comparison to null? It's worth calling out in the documentation that this implements C# semantics, so `null == null`. These are different from SQL semantics where the value of `null == X` is `null`. I think that's the right thing to do, but it might be surprising to someone expecting more SQL-like behavior. ########## src/Apache.Arrow.Operations/Comparison.cs: ########## @@ -0,0 +1,390 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +using System; +using System.Numerics; +using Apache.Arrow; +using Apache.Arrow.Memory; +using Apache.Arrow.Types; + +namespace Apache.Arrow.Operations; + +public static class Comparison +{ + /// <summary> + /// Negate a boolean array, flipping true to false, false to true. Nulls remain null + /// </summary> + /// <param name="mask"></param> + /// <param name="allocator"></param> + /// <returns></returns> + public static BooleanArray Invert(BooleanArray mask, MemoryAllocator? allocator = null) + { + var builder = new BooleanArray.Builder(); + builder.Reserve(mask.Length); + foreach (var val in mask) + { + if (val != null) + { + builder.Append(!(bool)val); + } + else + { + builder.AppendNull(); + } + } + return builder.Build(allocator); + } + + /// <summary> + /// Perform a pairwise boolean AND operation. + /// </summary> + /// <param name="lhs"></param> + /// <param name="rhs"></param> + /// <param name="allocator"></param> + /// <returns></returns> + /// <exception cref="InvalidOperationException"></exception> + public static BooleanArray And(BooleanArray lhs, BooleanArray rhs, MemoryAllocator? allocator = null) + { + if (lhs.Length != rhs.Length) throw new InvalidOperationException("Arrays must have the same length"); + var builder = new BooleanArray.Builder(); + builder.Reserve(lhs.Length); + for (int i = 0; i < lhs.Length; i++) + { + var a = lhs.GetValue(i); + var b = rhs.GetValue(i); + if (a != null && b != null) + { + builder.Append((bool)a && (bool)b); + } + else + { + builder.AppendNull(); + } + } + return builder.Build(allocator); + } + + /// <summary> + /// Performa a pairwise boolean OR operation. + /// </summary> + /// <param name="lhs"></param> + /// <param name="rhs"></param> + /// <param name="allocator"></param> + /// <returns></returns> + /// <exception cref="InvalidOperationException"></exception> + public static BooleanArray Or(BooleanArray lhs, BooleanArray rhs, MemoryAllocator? allocator = null) + { + if (lhs.Length != rhs.Length) throw new InvalidOperationException("Arrays must have the same length"); + var builder = new BooleanArray.Builder(); + builder.Reserve(lhs.Length); + for (int i = 0; i < lhs.Length; i++) + { + var a = lhs.GetValue(i); + var b = rhs.GetValue(i); + if (a != null && b != null) + { + builder.Append((bool)a || (bool)b); + } + else + { + builder.AppendNull(); + } + } + return builder.Build(allocator); + } + + /// <summary> + /// Performa a pairwise boolean XOR operation. + /// </summary> + /// <param name="lhs"></param> + /// <param name="rhs"></param> + /// <param name="allocator"></param> + /// <returns></returns> + /// <exception cref="InvalidOperationException"></exception> + public static BooleanArray Xor(BooleanArray lhs, BooleanArray rhs, MemoryAllocator? allocator = null) + { + if (lhs.Length != rhs.Length) throw new InvalidOperationException("Arrays must have the same length"); + var builder = new BooleanArray.Builder(); + builder.Reserve(lhs.Length); + for (int i = 0; i < lhs.Length; i++) + { + var a = lhs.GetValue(i); + var b = rhs.GetValue(i); + if (a != null && b != null) + { + builder.Append((bool)a ^ (bool)b); + } + else + { + builder.AppendNull(); + } + } + return builder.Build(allocator); + } + + /// <summary> + /// Compare each value in `lhs` to a scalar `rhs`, returning boolean mask + /// </summary> + /// <typeparam name="T"></typeparam> + /// <param name="lhs"></param> + /// <param name="rhs"></param> + /// <param name="allocator"></param> + /// <returns></returns> + public static BooleanArray Equal<T>(PrimitiveArray<T> lhs, T rhs, MemoryAllocator? allocator = null) where T : struct, INumber<T> Review Comment: This doesn't seem to take null values into account. ########## src/Apache.Arrow.Operations/Comparison.cs: ########## @@ -0,0 +1,390 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +using System; +using System.Numerics; +using Apache.Arrow; +using Apache.Arrow.Memory; +using Apache.Arrow.Types; + +namespace Apache.Arrow.Operations; + +public static class Comparison +{ + /// <summary> + /// Negate a boolean array, flipping true to false, false to true. Nulls remain null + /// </summary> + /// <param name="mask"></param> + /// <param name="allocator"></param> + /// <returns></returns> + public static BooleanArray Invert(BooleanArray mask, MemoryAllocator? allocator = null) + { + var builder = new BooleanArray.Builder(); + builder.Reserve(mask.Length); + foreach (var val in mask) Review Comment: The same approach could also be used for the binary operations, except that the validity buffers would need to be ANDed. In general, I think it's best to implement these primitive operations in terms of `ArrowBuffer` and then have the Array-based methods call into those. ########## src/Apache.Arrow.Operations/Comparison.cs: ########## @@ -0,0 +1,390 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +using System; +using System.Numerics; +using Apache.Arrow; +using Apache.Arrow.Memory; +using Apache.Arrow.Types; + +namespace Apache.Arrow.Operations; + +public static class Comparison +{ + /// <summary> + /// Negate a boolean array, flipping true to false, false to true. Nulls remain null + /// </summary> + /// <param name="mask"></param> + /// <param name="allocator"></param> + /// <returns></returns> + public static BooleanArray Invert(BooleanArray mask, MemoryAllocator? allocator = null) + { + var builder = new BooleanArray.Builder(); + builder.Reserve(mask.Length); Review Comment: Note to myself that I really need to add the initial size to the builder ctor. Or you could do it ;). ########## src/Apache.Arrow.Operations/Comparison.cs: ########## @@ -0,0 +1,390 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +using System; +using System.Numerics; +using Apache.Arrow; +using Apache.Arrow.Memory; +using Apache.Arrow.Types; + +namespace Apache.Arrow.Operations; + +public static class Comparison +{ + /// <summary> + /// Negate a boolean array, flipping true to false, false to true. Nulls remain null + /// </summary> + /// <param name="mask"></param> + /// <param name="allocator"></param> + /// <returns></returns> + public static BooleanArray Invert(BooleanArray mask, MemoryAllocator? allocator = null) + { + var builder = new BooleanArray.Builder(); + builder.Reserve(mask.Length); + foreach (var val in mask) Review Comment: This can be done much more efficiently by getting the underlying `ArrowBuffer`s from the source array and operating on them directly. If the validity buffer is set, it can be copied as-is (we don't have a way to share buffers, unfortunately). For the value buffer, we could allocate it with `ArrowBuffer.Builder` and then get its `Span` property. For both the `ReadOnlySpan` on the source buffer and the `Span` on the target buffer, we could then cast them to `(ReadOnly)Span<ulong>` and invert 64 bits at a time. To some degree this depends on how interested you are in going down the optimization rabbit hole. You could also use Benchmark.NET and try it both ways to see what kind of difference it makes. The extreme version of this kind of thing involves learning about some of the SIMD intrinsics that were added to .NET, like [Avx2](https://learn.microsoft.com/en-us/dotnet/api/system.runtime.intrinsics.x86.avx2?view=net-10.0). I started down this path in 2023 but never ended up having the time for it ... . ########## src/Apache.Arrow.Operations/Apache.Arrow.Operations.csproj: ########## @@ -0,0 +1,16 @@ +<Project Sdk="Microsoft.NET.Sdk"> + + <PropertyGroup> + </PropertyGroup> + + <PropertyGroup> + <TargetFrameworks>net8.0</TargetFrameworks> Review Comment: Can we target netstandard2.0 also or do you expect it to be hard to support? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
