eerhardt commented on a change in pull request #7158:
URL: https://github.com/apache/arrow/pull/7158#discussion_r428142187



##########
File path: csharp/test/Apache.Arrow.Tests/BooleanArrayTests.cs
##########
@@ -18,6 +18,8 @@
 
 namespace Apache.Arrow.Tests
 {
+    using System.Linq;

Review comment:
       This should go at the top of the file with the rest of the usings.

##########
File path: csharp/src/Apache.Arrow/ArrowBuffer.Builder.cs
##########
@@ -22,51 +22,111 @@ namespace Apache.Arrow
 {
     public partial struct ArrowBuffer
     {
+        /// <summary>
+        /// The <see cref="Builder{T}"/> class is able to append value-type 
items, with fluent-style methods, to build
+        /// up an <see cref="ArrowBuffer"/> of contiguous items.
+        /// </summary>
+        /// <remarks>
+        /// Note that <see cref="bool"/> is not supported as a generic type 
argument for this class.  Please use
+        /// <see cref="BitPackedBuilder"/> instead.
+        /// </remarks>
+        /// <typeparam name="T">Value-type of item to build into a 
buffer.</typeparam>
         public class Builder<T>
             where T : struct
         {
             private const int DefaultCapacity = 8;
 
             private readonly int _size;
 
+            /// <summary>
+            /// Gets the number of items of current capacity.
+            /// </summary>
             public int Capacity => Memory.Length / _size;
+
+            /// <summary>
+            /// Gets the number of items currently appended.
+            /// </summary>
             public int Length { get; private set; }
+
+            /// <summary>
+            /// Gets the raw byte memory underpinning the builder.
+            /// </summary>
             public Memory<byte> Memory { get; private set; }
+
+            /// <summary>
+            /// Gets the span of memory underpinning the builder.
+            /// </summary>
             public Span<T> Span
             {
                 [MethodImpl(MethodImplOptions.AggressiveInlining)]
                 get => Memory.Span.CastTo<T>();
             }
 
+            /// <summary>
+            /// Creates an instance of the <see cref="Builder{T}"/> class.
+            /// </summary>
+            /// <param name="capacity">Number of items of initial capacity to 
reserve.</param>
             public Builder(int capacity = DefaultCapacity)
             {
+                // Using `bool` as the template argument, if used in an 
unrestricted fashion, would result in a buffer
+                // with inappropriate contents being produced.  Because C# 
does not support template specialisation,
+                // and because generic type constraints do not support 
negation, we will throw a runtime error to
+                // indicate that such a template type is not supported.
+                if (typeof(T) == typeof(bool))
+                {
+                    throw new ArgumentException(
+                        $"An instance of {nameof(Builder<T>)} cannot be 
instantiated, as `bool` is not an " +
+                        $"appropriate generic type to use with this class - 
please use {nameof(BitPackedBuilder)} " +
+                        $"instead");
+                }

Review comment:
       I think this approach is fine. We do similar things in places in the 
runtime, for example:
   
   
https://github.com/dotnet/runtime/blob/49f59a3cb935523c07187ce26bb03c7fa7fd66bd/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs#L349
   
   The only thing I would change here is to throw a `NotSupportedException` 
instead of an `ArgumentException`. `ArgumentException`s are for when an 
argument passed to the method is invalid. But here it is the generic type.

##########
File path: csharp/src/Apache.Arrow/ArrowBuffer.Builder.cs
##########
@@ -22,51 +22,111 @@ namespace Apache.Arrow
 {
     public partial struct ArrowBuffer
     {
+        /// <summary>
+        /// The <see cref="Builder{T}"/> class is able to append value-type 
items, with fluent-style methods, to build
+        /// up an <see cref="ArrowBuffer"/> of contiguous items.
+        /// </summary>
+        /// <remarks>
+        /// Note that <see cref="bool"/> is not supported as a generic type 
argument for this class.  Please use
+        /// <see cref="BitPackedBuilder"/> instead.
+        /// </remarks>
+        /// <typeparam name="T">Value-type of item to build into a 
buffer.</typeparam>
         public class Builder<T>
             where T : struct
         {
             private const int DefaultCapacity = 8;
 
             private readonly int _size;
 
+            /// <summary>
+            /// Gets the number of items of current capacity.

Review comment:
       I think I'd borrow from StringBuilder's comments here:
   
   
https://docs.microsoft.com/en-us/dotnet/api/system.text.stringbuilder.capacity?view=netcore-3.1
   
   > Gets the maximum number of items that can be contained in the memory 
allocated by the current instance.

##########
File path: csharp/src/Apache.Arrow/ArrowBuffer.Builder.cs
##########
@@ -80,27 +140,59 @@ public Builder<T> AppendRange(IEnumerable<T> values)
                 return this;
             }
 
-            public Builder<T> Reserve(int capacity)
+            /// <summary>
+            /// Reserve a given number of items' additional capacity.
+            /// </summary>
+            /// <param name="additionalCapacity">Number of items of required 
additional capacity.</param>
+            /// <returns>Returns the builder (for fluent-style 
composition).</returns>
+            public Builder<T> Reserve(int additionalCapacity)
             {
-                EnsureCapacity(capacity);
+                if (additionalCapacity < 0)
+                {
+                    throw new 
ArgumentOutOfRangeException(nameof(additionalCapacity));
+                }
+
+                EnsureAdditionalCapacity(additionalCapacity);
                 return this;
             }
 
+            /// <summary>
+            /// Resize the buffer to a given size.
+            /// </summary>
+            /// <remarks>
+            /// Note that if the required capacity is smaller than the current 
length of the populated buffer so far,
+            /// the buffer will be truncated and items at the end of the 
buffer will be lost.
+            /// </remarks>
+            /// <remarks>
+            /// Note also that a negative capacity will result in the buffer 
being resized to zero.
+            /// </remarks>
+            /// <param name="capacity">Number of items of required 
capacity.</param>
+            /// <returns>Returns the builder (for fluent-style 
composition).</returns>
             public Builder<T> Resize(int capacity)
             {
+                capacity = capacity < 0 ? 0 : capacity;
                 EnsureCapacity(capacity);
                 Length = Math.Max(0, capacity);
 
                 return this;
             }
 
+            /// <summary>
+            /// Clear all contents appended so far.
+            /// </summary>
+            /// <returns>Returns the builder (for fluent-style 
composition).</returns>
             public Builder<T> Clear()
             {
                 Span.Fill(default);
                 Length = 0;
                 return this;
             }
 
+            /// <summary>
+            /// Build an Arrow buffer from the appended contents so far.
+            /// </summary>
+            /// <param name="allocator">Optional memory allocator.</param>
+            /// <returns>Returns the builder (for fluent-style 
composition).</returns>

Review comment:
       This `returns` is incorrect.

##########
File path: csharp/src/Apache.Arrow/ArrowBuffer.BitPackedBuilder.cs
##########
@@ -0,0 +1,268 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+namespace Apache.Arrow
+{
+    using System;
+    using System.Collections.Generic;
+    using System.Diagnostics;
+    using Apache.Arrow.Memory;
+
+    public partial struct ArrowBuffer
+    {
+        /// <summary>
+        /// The <see cref="ArrowBuffer.BitPackedBuilder"/> class is a 
complement to <see cref="ArrowBuffer.Builder{T}"/>
+        /// and is designed for boolean fields, which are efficiently 
bit-packed into byte-aligned memory.
+        /// </summary>
+        public class BitPackedBuilder

Review comment:
       What do you think about naming this `BitmapBuilder` or 
`BitVectorBuilder`? I haven't seen `BitPacked` in a lot of documentation.
   
   The docs use `bitmap`:
   
   https://arrow.apache.org/docs/format/Columnar.html#validity-bitmaps

##########
File path: csharp/src/Apache.Arrow/ArrowBuffer.BitPackedBuilder.cs
##########
@@ -0,0 +1,268 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+namespace Apache.Arrow
+{
+    using System;
+    using System.Collections.Generic;
+    using System.Diagnostics;
+    using Apache.Arrow.Memory;
+
+    public partial struct ArrowBuffer
+    {
+        /// <summary>
+        /// The <see cref="ArrowBuffer.BitPackedBuilder"/> class is a 
complement to <see cref="ArrowBuffer.Builder{T}"/>
+        /// and is designed for boolean fields, which are efficiently 
bit-packed into byte-aligned memory.
+        /// </summary>
+        public class BitPackedBuilder
+        {
+            private const int DefaultBitCapacity = 8;
+
+            /// <summary>
+            /// Gets the number of bits of current capacity.
+            /// </summary>
+            public int BitCapacity { get; private set; }
+
+            /// <summary>
+            /// Gets the number of bits currently appended.
+            /// </summary>
+            public int BitCount { get; private set; }
+
+            /// <summary>
+            /// Gets the raw byte memory underpinning the builder.
+            /// </summary>
+            public Memory<byte> Memory { get; private set; }
+
+            /// <summary>
+            /// Gets the span of (bit-packed byte) memory underpinning the 
builder.
+            /// </summary>
+            public Span<byte> Span => Memory.Span;
+
+            /// <summary>
+            /// Creates an instance of the <see cref="BitPackedBuilder"/> 
class.
+            /// </summary>
+            /// <param name="bitCapacity">Number of bits of initial capacity 
to reserve.</param>
+            public BitPackedBuilder(int bitCapacity = DefaultBitCapacity)
+            {
+                Memory = new byte[BitUtility.ByteCount(bitCapacity)];
+                BitCapacity = bitCapacity;
+                BitCount = 0;
+            }
+
+            /// <summary>
+            /// Append a single bit.
+            /// </summary>
+            /// <param name="bit">Bit to append.</param>
+            /// <returns>Returns the builder (for fluent-style 
composition).</returns>
+            public BitPackedBuilder Append(bool bit)
+            {
+                if (BitCount % 8 == 0)
+                {
+                    // Append a new byte to the buffer when needed.
+                    EnsureAdditionalCapacity(1);
+                    Span[BitCount / 8] = 0;
+                }
+
+                BitUtility.SetBit(Span, BitCount, bit);
+                BitCount++;
+                return this;
+            }
+
+            /// <summary>
+            /// Append multiple bits.
+            /// </summary>
+            /// <param name="bits">Bits to append.</param>
+            /// <returns>Returns the builder (for fluent-style 
composition).</returns>
+            public BitPackedBuilder AppendRange(IEnumerable<bool> bits)
+            {
+                if (bits != null)
+                {
+                    foreach (var v in bits)
+                    {
+                        Append(v);
+                    }
+                }
+
+                return this;
+            }
+
+            /// <summary>
+            /// Count the number of set bits (i.e. set to 1).
+            /// </summary>
+            /// <returns>Returns the number of set bits.</returns>
+            public int CountSetBits() => BitUtility.CountBits(this.Span);
+
+            /// <summary>
+            /// Count the number of unset bits (i.e. set to 0).
+            /// </summary>
+            /// <returns>Returns the number of unset bits.</returns>
+            public int CountUnsetBits() => this.BitCount - this.CountSetBits();
+
+            /// <summary>
+            /// Toggle the bit at a particular index.
+            /// </summary>
+            /// <param name="index">Index of bit to toggle.</param>
+            /// <returns>Returns the builder (for fluent-style 
composition).</returns>
+            public BitPackedBuilder Toggle(int index)
+            {
+                CheckIndex(index);
+                BitUtility.ToggleBit(Span, index);
+                return this;
+            }
+
+            /// <summary>
+            /// Set the bit at a particular index to 1.
+            /// </summary>
+            /// <param name="index">Index of bit to set.</param>
+            /// <returns>Returns the builder (for fluent-style 
composition).</returns>
+            public BitPackedBuilder Set(int index)
+            {
+                CheckIndex(index);
+                BitUtility.SetBit(Span, index);
+                return this;
+            }
+
+            /// <summary>
+            /// Set the bit at a particular index to a given value.
+            /// </summary>
+            /// <param name="index">Index of bit to set/unset.</param>
+            /// <param name="value">Value of bit.</param>
+            /// <returns>Returns the builder (for fluent-style 
composition).</returns>
+            public BitPackedBuilder Set(int index, bool value)
+            {
+                CheckIndex(index);
+                BitUtility.SetBit(Span, index, value);
+                return this;
+            }
+
+            /// <summary>
+            /// Swap the bits at two given indices.
+            /// </summary>
+            /// <param name="i">First index.</param>
+            /// <param name="j">Second index.</param>
+            /// <returns>Returns the builder (for fluent-style 
composition).</returns>
+            public BitPackedBuilder Swap(int i, int j)
+            {
+                CheckIndex(i);
+                CheckIndex(j);
+                var bi = BitUtility.GetBit(Span, i);
+                var bj = BitUtility.GetBit(Span, j);
+                BitUtility.SetBit(Span, i, bj);
+                BitUtility.SetBit(Span, j, bi);
+                return this;
+            }
+
+            /// <summary>
+            /// Reserve a given number of bits' additional capacity.
+            /// </summary>
+            /// <param name="bitAdditionalCapacity">Number of bits of required 
additional capacity.</param>
+            /// <returns>Returns the builder (for fluent-style 
composition).</returns>
+            public BitPackedBuilder Reserve(int bitAdditionalCapacity)

Review comment:
       As above, I would rather use "capacity" in this class, and not 
"BitCapacity". But if we aren't going to change that, I think this should be 
`additionalBitCapacity`, it seems to read better IMO.

##########
File path: csharp/src/Apache.Arrow/ArrowBuffer.BitPackedBuilder.cs
##########
@@ -0,0 +1,268 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+namespace Apache.Arrow
+{
+    using System;
+    using System.Collections.Generic;
+    using System.Diagnostics;
+    using Apache.Arrow.Memory;
+
+    public partial struct ArrowBuffer
+    {
+        /// <summary>
+        /// The <see cref="ArrowBuffer.BitPackedBuilder"/> class is a 
complement to <see cref="ArrowBuffer.Builder{T}"/>
+        /// and is designed for boolean fields, which are efficiently 
bit-packed into byte-aligned memory.
+        /// </summary>
+        public class BitPackedBuilder
+        {
+            private const int DefaultBitCapacity = 8;
+
+            /// <summary>
+            /// Gets the number of bits of current capacity.
+            /// </summary>
+            public int BitCapacity { get; private set; }
+
+            /// <summary>
+            /// Gets the number of bits currently appended.
+            /// </summary>
+            public int BitCount { get; private set; }
+
+            /// <summary>
+            /// Gets the raw byte memory underpinning the builder.
+            /// </summary>
+            public Memory<byte> Memory { get; private set; }
+
+            /// <summary>
+            /// Gets the span of (bit-packed byte) memory underpinning the 
builder.
+            /// </summary>
+            public Span<byte> Span => Memory.Span;
+
+            /// <summary>
+            /// Creates an instance of the <see cref="BitPackedBuilder"/> 
class.
+            /// </summary>
+            /// <param name="bitCapacity">Number of bits of initial capacity 
to reserve.</param>
+            public BitPackedBuilder(int bitCapacity = DefaultBitCapacity)
+            {
+                Memory = new byte[BitUtility.ByteCount(bitCapacity)];
+                BitCapacity = bitCapacity;
+                BitCount = 0;
+            }
+
+            /// <summary>
+            /// Append a single bit.
+            /// </summary>
+            /// <param name="bit">Bit to append.</param>
+            /// <returns>Returns the builder (for fluent-style 
composition).</returns>
+            public BitPackedBuilder Append(bool bit)
+            {
+                if (BitCount % 8 == 0)
+                {
+                    // Append a new byte to the buffer when needed.
+                    EnsureAdditionalCapacity(1);
+                    Span[BitCount / 8] = 0;

Review comment:
       Is `Span[BitCount / 8] = 0;` necessary? If `EnsureAdditionalCapacity` 
added more space, the new memory will be zero'd out by the runtime (since it is 
a new array and we only copy the existing memory to it).

##########
File path: csharp/src/Apache.Arrow/ArrowBuffer.Builder.cs
##########
@@ -80,27 +140,59 @@ public Builder<T> AppendRange(IEnumerable<T> values)
                 return this;
             }
 
-            public Builder<T> Reserve(int capacity)
+            /// <summary>
+            /// Reserve a given number of items' additional capacity.
+            /// </summary>
+            /// <param name="additionalCapacity">Number of items of required 
additional capacity.</param>
+            /// <returns>Returns the builder (for fluent-style 
composition).</returns>
+            public Builder<T> Reserve(int additionalCapacity)
             {
-                EnsureCapacity(capacity);
+                if (additionalCapacity < 0)
+                {
+                    throw new 
ArgumentOutOfRangeException(nameof(additionalCapacity));
+                }
+
+                EnsureAdditionalCapacity(additionalCapacity);
                 return this;
             }
 
+            /// <summary>
+            /// Resize the buffer to a given size.
+            /// </summary>
+            /// <remarks>
+            /// Note that if the required capacity is smaller than the current 
length of the populated buffer so far,
+            /// the buffer will be truncated and items at the end of the 
buffer will be lost.
+            /// </remarks>
+            /// <remarks>
+            /// Note also that a negative capacity will result in the buffer 
being resized to zero.
+            /// </remarks>
+            /// <param name="capacity">Number of items of required 
capacity.</param>
+            /// <returns>Returns the builder (for fluent-style 
composition).</returns>
             public Builder<T> Resize(int capacity)
             {
+                capacity = capacity < 0 ? 0 : capacity;
                 EnsureCapacity(capacity);
                 Length = Math.Max(0, capacity);

Review comment:
       `capacity` can never be negative anymore after your change on line 173. 
So this can just be `Length = capacity;`

##########
File path: csharp/src/Apache.Arrow/ArrowBuffer.Builder.cs
##########
@@ -80,27 +140,59 @@ public Builder<T> AppendRange(IEnumerable<T> values)
                 return this;
             }
 
-            public Builder<T> Reserve(int capacity)
+            /// <summary>
+            /// Reserve a given number of items' additional capacity.
+            /// </summary>
+            /// <param name="additionalCapacity">Number of items of required 
additional capacity.</param>
+            /// <returns>Returns the builder (for fluent-style 
composition).</returns>
+            public Builder<T> Reserve(int additionalCapacity)
             {
-                EnsureCapacity(capacity);
+                if (additionalCapacity < 0)
+                {
+                    throw new 
ArgumentOutOfRangeException(nameof(additionalCapacity));
+                }
+
+                EnsureAdditionalCapacity(additionalCapacity);
                 return this;
             }
 
+            /// <summary>
+            /// Resize the buffer to a given size.
+            /// </summary>
+            /// <remarks>
+            /// Note that if the required capacity is smaller than the current 
length of the populated buffer so far,
+            /// the buffer will be truncated and items at the end of the 
buffer will be lost.
+            /// </remarks>
+            /// <remarks>
+            /// Note also that a negative capacity will result in the buffer 
being resized to zero.
+            /// </remarks>
+            /// <param name="capacity">Number of items of required 
capacity.</param>
+            /// <returns>Returns the builder (for fluent-style 
composition).</returns>
             public Builder<T> Resize(int capacity)
             {
+                capacity = capacity < 0 ? 0 : capacity;
                 EnsureCapacity(capacity);

Review comment:
       Nice catch and fix.

##########
File path: csharp/src/Apache.Arrow/ArrowBuffer.BitPackedBuilder.cs
##########
@@ -0,0 +1,268 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+namespace Apache.Arrow
+{
+    using System;

Review comment:
       The usings should go outside the namespace (repeated elsewhere).

##########
File path: csharp/src/Apache.Arrow/ArrowBuffer.Builder.cs
##########
@@ -80,27 +140,59 @@ public Builder<T> AppendRange(IEnumerable<T> values)
                 return this;
             }
 
-            public Builder<T> Reserve(int capacity)
+            /// <summary>
+            /// Reserve a given number of items' additional capacity.
+            /// </summary>
+            /// <param name="additionalCapacity">Number of items of required 
additional capacity.</param>
+            /// <returns>Returns the builder (for fluent-style 
composition).</returns>
+            public Builder<T> Reserve(int additionalCapacity)
             {
-                EnsureCapacity(capacity);
+                if (additionalCapacity < 0)
+                {
+                    throw new 
ArgumentOutOfRangeException(nameof(additionalCapacity));
+                }
+
+                EnsureAdditionalCapacity(additionalCapacity);
                 return this;
             }
 
+            /// <summary>
+            /// Resize the buffer to a given size.
+            /// </summary>
+            /// <remarks>
+            /// Note that if the required capacity is smaller than the current 
length of the populated buffer so far,
+            /// the buffer will be truncated and items at the end of the 
buffer will be lost.
+            /// </remarks>
+            /// <remarks>
+            /// Note also that a negative capacity will result in the buffer 
being resized to zero.
+            /// </remarks>
+            /// <param name="capacity">Number of items of required 
capacity.</param>
+            /// <returns>Returns the builder (for fluent-style 
composition).</returns>
             public Builder<T> Resize(int capacity)
             {
+                capacity = capacity < 0 ? 0 : capacity;

Review comment:
       Yes, I would keep this behavior. Especially since someone took the time 
to write that unit test.
   
   > I don't know why that is the requirement
   
   What else would you do? throw an exception? I assume the thinking was "why 
throw when we can provide an acceptable non-throwing behavior?".

##########
File path: csharp/src/Apache.Arrow/ArrowBuffer.BitPackedBuilder.cs
##########
@@ -0,0 +1,268 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+namespace Apache.Arrow
+{
+    using System;
+    using System.Collections.Generic;
+    using System.Diagnostics;
+    using Apache.Arrow.Memory;
+
+    public partial struct ArrowBuffer
+    {
+        /// <summary>
+        /// The <see cref="ArrowBuffer.BitPackedBuilder"/> class is a 
complement to <see cref="ArrowBuffer.Builder{T}"/>
+        /// and is designed for boolean fields, which are efficiently 
bit-packed into byte-aligned memory.
+        /// </summary>
+        public class BitPackedBuilder
+        {
+            private const int DefaultBitCapacity = 8;

Review comment:
       I think I would multiply this by `4` (or possibly `8`). Allocating a 
single byte array seems like it is too small. You aren't saving much 
memory-wise since .NET arrays are usually aligned in memory. Which means 1 byte 
or 4 bytes are going to allocate the same amount of memory underneath.

##########
File path: csharp/src/Apache.Arrow/ArrowBuffer.BitPackedBuilder.cs
##########
@@ -0,0 +1,268 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+namespace Apache.Arrow
+{
+    using System;
+    using System.Collections.Generic;
+    using System.Diagnostics;
+    using Apache.Arrow.Memory;
+
+    public partial struct ArrowBuffer
+    {
+        /// <summary>
+        /// The <see cref="ArrowBuffer.BitPackedBuilder"/> class is a 
complement to <see cref="ArrowBuffer.Builder{T}"/>
+        /// and is designed for boolean fields, which are efficiently 
bit-packed into byte-aligned memory.
+        /// </summary>
+        public class BitPackedBuilder
+        {
+            private const int DefaultBitCapacity = 8;
+
+            /// <summary>
+            /// Gets the number of bits of current capacity.
+            /// </summary>
+            public int BitCapacity { get; private set; }
+
+            /// <summary>
+            /// Gets the number of bits currently appended.
+            /// </summary>
+            public int BitCount { get; private set; }
+
+            /// <summary>
+            /// Gets the raw byte memory underpinning the builder.
+            /// </summary>
+            public Memory<byte> Memory { get; private set; }
+
+            /// <summary>
+            /// Gets the span of (bit-packed byte) memory underpinning the 
builder.
+            /// </summary>
+            public Span<byte> Span => Memory.Span;
+
+            /// <summary>
+            /// Creates an instance of the <see cref="BitPackedBuilder"/> 
class.
+            /// </summary>
+            /// <param name="bitCapacity">Number of bits of initial capacity 
to reserve.</param>
+            public BitPackedBuilder(int bitCapacity = DefaultBitCapacity)
+            {
+                Memory = new byte[BitUtility.ByteCount(bitCapacity)];
+                BitCapacity = bitCapacity;
+                BitCount = 0;
+            }
+
+            /// <summary>
+            /// Append a single bit.
+            /// </summary>
+            /// <param name="bit">Bit to append.</param>
+            /// <returns>Returns the builder (for fluent-style 
composition).</returns>
+            public BitPackedBuilder Append(bool bit)

Review comment:
       ```suggestion
               public BitPackedBuilder Append(bool value)
   ```
   
   It is more typical to call an argument like this `value`.

##########
File path: csharp/src/Apache.Arrow/ArrowBuffer.BitPackedBuilder.cs
##########
@@ -0,0 +1,268 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+namespace Apache.Arrow
+{
+    using System;
+    using System.Collections.Generic;
+    using System.Diagnostics;
+    using Apache.Arrow.Memory;
+
+    public partial struct ArrowBuffer
+    {
+        /// <summary>
+        /// The <see cref="ArrowBuffer.BitPackedBuilder"/> class is a 
complement to <see cref="ArrowBuffer.Builder{T}"/>
+        /// and is designed for boolean fields, which are efficiently 
bit-packed into byte-aligned memory.
+        /// </summary>
+        public class BitPackedBuilder
+        {
+            private const int DefaultBitCapacity = 8;
+
+            /// <summary>
+            /// Gets the number of bits of current capacity.
+            /// </summary>
+            public int BitCapacity { get; private set; }
+
+            /// <summary>
+            /// Gets the number of bits currently appended.
+            /// </summary>
+            public int BitCount { get; private set; }
+
+            /// <summary>
+            /// Gets the raw byte memory underpinning the builder.
+            /// </summary>
+            public Memory<byte> Memory { get; private set; }
+
+            /// <summary>
+            /// Gets the span of (bit-packed byte) memory underpinning the 
builder.
+            /// </summary>
+            public Span<byte> Span => Memory.Span;
+
+            /// <summary>
+            /// Creates an instance of the <see cref="BitPackedBuilder"/> 
class.
+            /// </summary>
+            /// <param name="bitCapacity">Number of bits of initial capacity 
to reserve.</param>
+            public BitPackedBuilder(int bitCapacity = DefaultBitCapacity)
+            {
+                Memory = new byte[BitUtility.ByteCount(bitCapacity)];
+                BitCapacity = bitCapacity;
+                BitCount = 0;
+            }
+
+            /// <summary>
+            /// Append a single bit.
+            /// </summary>
+            /// <param name="bit">Bit to append.</param>
+            /// <returns>Returns the builder (for fluent-style 
composition).</returns>
+            public BitPackedBuilder Append(bool bit)
+            {
+                if (BitCount % 8 == 0)
+                {
+                    // Append a new byte to the buffer when needed.
+                    EnsureAdditionalCapacity(1);
+                    Span[BitCount / 8] = 0;
+                }
+
+                BitUtility.SetBit(Span, BitCount, bit);
+                BitCount++;
+                return this;
+            }
+
+            /// <summary>
+            /// Append multiple bits.
+            /// </summary>
+            /// <param name="bits">Bits to append.</param>
+            /// <returns>Returns the builder (for fluent-style 
composition).</returns>
+            public BitPackedBuilder AppendRange(IEnumerable<bool> bits)
+            {
+                if (bits != null)
+                {
+                    foreach (var v in bits)
+                    {
+                        Append(v);
+                    }
+                }
+
+                return this;
+            }
+
+            /// <summary>
+            /// Count the number of set bits (i.e. set to 1).
+            /// </summary>
+            /// <returns>Returns the number of set bits.</returns>
+            public int CountSetBits() => BitUtility.CountBits(this.Span);
+
+            /// <summary>
+            /// Count the number of unset bits (i.e. set to 0).
+            /// </summary>
+            /// <returns>Returns the number of unset bits.</returns>
+            public int CountUnsetBits() => this.BitCount - this.CountSetBits();
+
+            /// <summary>
+            /// Toggle the bit at a particular index.
+            /// </summary>
+            /// <param name="index">Index of bit to toggle.</param>
+            /// <returns>Returns the builder (for fluent-style 
composition).</returns>
+            public BitPackedBuilder Toggle(int index)
+            {
+                CheckIndex(index);
+                BitUtility.ToggleBit(Span, index);
+                return this;
+            }
+
+            /// <summary>
+            /// Set the bit at a particular index to 1.
+            /// </summary>
+            /// <param name="index">Index of bit to set.</param>
+            /// <returns>Returns the builder (for fluent-style 
composition).</returns>
+            public BitPackedBuilder Set(int index)
+            {
+                CheckIndex(index);
+                BitUtility.SetBit(Span, index);
+                return this;
+            }
+
+            /// <summary>
+            /// Set the bit at a particular index to a given value.
+            /// </summary>
+            /// <param name="index">Index of bit to set/unset.</param>
+            /// <param name="value">Value of bit.</param>
+            /// <returns>Returns the builder (for fluent-style 
composition).</returns>
+            public BitPackedBuilder Set(int index, bool value)
+            {
+                CheckIndex(index);
+                BitUtility.SetBit(Span, index, value);
+                return this;
+            }
+
+            /// <summary>
+            /// Swap the bits at two given indices.
+            /// </summary>
+            /// <param name="i">First index.</param>
+            /// <param name="j">Second index.</param>
+            /// <returns>Returns the builder (for fluent-style 
composition).</returns>
+            public BitPackedBuilder Swap(int i, int j)
+            {
+                CheckIndex(i);
+                CheckIndex(j);
+                var bi = BitUtility.GetBit(Span, i);
+                var bj = BitUtility.GetBit(Span, j);
+                BitUtility.SetBit(Span, i, bj);
+                BitUtility.SetBit(Span, j, bi);
+                return this;
+            }
+
+            /// <summary>
+            /// Reserve a given number of bits' additional capacity.
+            /// </summary>
+            /// <param name="bitAdditionalCapacity">Number of bits of required 
additional capacity.</param>
+            /// <returns>Returns the builder (for fluent-style 
composition).</returns>
+            public BitPackedBuilder Reserve(int bitAdditionalCapacity)
+            {
+                if (bitAdditionalCapacity < 0)
+                {
+                    throw new 
ArgumentOutOfRangeException(nameof(bitAdditionalCapacity));
+                }
+
+                EnsureAdditionalCapacity(bitAdditionalCapacity);
+                return this;
+            }
+
+            /// <summary>
+            /// Resize the buffer to a given size.
+            /// </summary>
+            /// <remarks>
+            /// Note that if the required capacity is smaller than the current 
length of the populated buffer so far,
+            /// the buffer will be truncated and items at the end of the 
buffer will be lost.
+            /// </remarks>
+            /// <remarks>
+            /// Note also that a negative capacity will result in the buffer 
being resized to zero.
+            /// </remarks>
+            /// <param name="bitCapacity">Number of bits of required 
capacity.</param>
+            /// <returns>Returns the builder (for fluent-style 
composition).</returns>
+            public BitPackedBuilder Resize(int bitCapacity)
+            {
+                bitCapacity = bitCapacity < 0 ? 0 : bitCapacity;
+                EnsureCapacity(bitCapacity);
+                BitCount = Math.Max(0, bitCapacity);
+
+                return this;
+            }
+
+            /// <summary>
+            /// Clear all contents appended so far.
+            /// </summary>
+            /// <returns>Returns the builder (for fluent-style 
composition).</returns>
+            public BitPackedBuilder Clear()
+            {
+                Span.Fill(default);
+                BitCount = 0;
+                return this;
+            }
+
+            /// <summary>
+            /// Build an Arrow buffer from the appended contents so far.
+            /// </summary>
+            /// <param name="allocator">Optional memory allocator.</param>
+            /// <returns>Returns the builder (for fluent-style 
composition).</returns>

Review comment:
       The `returns` needs to be updated.

##########
File path: csharp/src/Apache.Arrow/Arrays/BinaryArray.cs
##########
@@ -56,16 +56,16 @@ public abstract class BuilderBase<TArray, TBuilder> : 
IArrowArrayBuilder<byte, T
             protected TBuilder Instance => this as TBuilder;
             protected ArrowBuffer.Builder<int> ValueOffsets { get; }
             protected ArrowBuffer.Builder<byte> ValueBuffer { get; }
-            protected BooleanArray.Builder ValidityBuffer { get; }
+            protected ArrowBuffer.BitPackedBuilder ValidityBuffer { get; }
             protected int Offset { get; set; }
-            protected int NullCount { get; private set; }
+            protected int NullCount => this.ValidityBuffer.CountUnsetBits();

Review comment:
       Since `NullCount` is a property, it should have a fast runtime access 
performance (similar to accessing a field). However, this change is calling a 
method which is going to grow in time as the buffer grows. I think we should 
keep this a property with a backing field, so callers don't get into a 
performance trap where they are calling `NullCount` too often thinking it has 
fast access (since it is a property).

##########
File path: csharp/src/Apache.Arrow/ArrowBuffer.BitPackedBuilder.cs
##########
@@ -0,0 +1,268 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+namespace Apache.Arrow
+{
+    using System;
+    using System.Collections.Generic;
+    using System.Diagnostics;
+    using Apache.Arrow.Memory;
+
+    public partial struct ArrowBuffer
+    {
+        /// <summary>
+        /// The <see cref="ArrowBuffer.BitPackedBuilder"/> class is a 
complement to <see cref="ArrowBuffer.Builder{T}"/>
+        /// and is designed for boolean fields, which are efficiently 
bit-packed into byte-aligned memory.
+        /// </summary>
+        public class BitPackedBuilder
+        {
+            private const int DefaultBitCapacity = 8;
+
+            /// <summary>
+            /// Gets the number of bits of current capacity.
+            /// </summary>
+            public int BitCapacity { get; private set; }
+
+            /// <summary>
+            /// Gets the number of bits currently appended.
+            /// </summary>
+            public int BitCount { get; private set; }

Review comment:
       My first reaction would be to keep these named `Capacity` and `Length` 
to align with the other builders. Even with the other builders, the `Length` of 
the builder isn't the same as the underlying Memory length, since you could be 
building a 4-byte integer buffer. In all cases "Length" is the number of 
elements added to the builder. Here "elements" just happens to be "bits".




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to