Platob commented on code in PR #35299:
URL: https://github.com/apache/arrow/pull/35299#discussion_r1186646427


##########
csharp/src/Apache.Arrow/Builder/BufferBuilder.cs:
##########
@@ -0,0 +1,356 @@
+using System;
+using System.Buffers;
+using System.Collections.Generic;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using Apache.Arrow.Memory;
+
+namespace Apache.Arrow.Builder
+{
+    public class BufferBuilder : IBufferBuilder
+    {
+        public class BitBuffer
+        {
+            private readonly bool[] _bits;
+
+            public int Length { get; private set; }
+            public int AvailableLength => Capacity - Length;
+
+            public int Capacity;
+
+            public bool IsFull => Length == Capacity;
+            public byte ToByte(ref byte data) => BitUtility.ToByte(ref data, 
_bits);
+
+            public BitBuffer(int capacity = 8)
+            {
+                Capacity = capacity;
+                _bits = new bool[capacity];
+                Length = 0;
+            }
+
+            public void Append(bool bit) => _bits[Length++] = bit;
+            public void Fill(ReadOnlySpan<bool> bits)
+            {
+                bits.CopyTo(_bits.AsSpan().Slice(Length, bits.Length));
+                Length += bits.Length;
+            }
+
+            public void Reset()
+            {
+                for (int i = 0; i < _bits.Length; i++)
+                {
+                    _bits[i] = false;
+                }
+                Length = 0;
+            }
+        }
+
+        private const int DefaultCapacity = 64;
+        public int ByteLength { get; private set; }
+
+        public Memory<byte> Memory { get; private set; }
+        public BitBuffer BitOverhead { get; }

Review Comment:
   During its lifetime, the builder will allocate only 1 time 8 bits in bit 
overhead
   
   On every bit(s) append, it will check if the 8 bits are written (like the 
old bitmap builder checking if it is % 8)
   
   If it is full (8 bits are written) i will write a new byte in the Memory 
Buffer and reset all values in the BitOverhead to false setting his length to 0 
and ready to recieve other bits
   
   ```csharp
   public class BenchmarkBits
   {
       public static long ElapsedTicks(int repetition, Action action)
       {
           Stopwatch stopwatch = Stopwatch.StartNew();
   
           for (int i = 0; i < repetition; i++)
               action();
   
           stopwatch.Stop();
   
           return stopwatch.ElapsedTicks / repetition;
       }
   
       private readonly ITestOutputHelper output;
   
       private static readonly bool[] trueBits = Enumerable.Range(0, 
100).Select(_ => true).ToArray();
       private static readonly bool[] falseBits = Enumerable.Range(0, 
100).Select(_ => false).ToArray();
   
       public BenchmarkBits(ITestOutputHelper output)
       {
           this.output = output;
       }
   
       [Fact]
       public void Bench()
       {
           output.WriteLine($"Elapsed {ElapsedTicks(1000000, MakeOld)}");
           output.WriteLine($"Elapsed {ElapsedTicks(1000000, MakeNew)}");
       }
   
       private static void MakeNew()
       {
           var builder = new BufferBuilder(64);
   
           builder.AppendBit(true).AppendBit(true)
               .AppendBits(trueBits)
               .AppendBits(falseBits)
               .Build();
       }
   
       private static void MakeOld()
       {
           var builder = new ArrowBuffer.BitmapBuilder(64);
   
           builder.Append(true).Append(true)
               .AppendRange(trueBits)
               .AppendRange(falseBits)
               .Build();
       }
   }
   ````
   
   On 1 million iterations the new implementation is in mean 168 ticks
   On 1 million iterations the old implementation is in mean 312 ticks



##########
csharp/src/Apache.Arrow/Builder/BufferBuilder.cs:
##########
@@ -0,0 +1,356 @@
+using System;
+using System.Buffers;
+using System.Collections.Generic;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using Apache.Arrow.Memory;
+
+namespace Apache.Arrow.Builder
+{
+    public class BufferBuilder : IBufferBuilder
+    {
+        public class BitBuffer
+        {
+            private readonly bool[] _bits;
+
+            public int Length { get; private set; }
+            public int AvailableLength => Capacity - Length;
+
+            public int Capacity;
+
+            public bool IsFull => Length == Capacity;
+            public byte ToByte(ref byte data) => BitUtility.ToByte(ref data, 
_bits);
+
+            public BitBuffer(int capacity = 8)
+            {
+                Capacity = capacity;
+                _bits = new bool[capacity];
+                Length = 0;
+            }
+
+            public void Append(bool bit) => _bits[Length++] = bit;
+            public void Fill(ReadOnlySpan<bool> bits)
+            {
+                bits.CopyTo(_bits.AsSpan().Slice(Length, bits.Length));
+                Length += bits.Length;
+            }
+
+            public void Reset()
+            {
+                for (int i = 0; i < _bits.Length; i++)
+                {
+                    _bits[i] = false;
+                }
+                Length = 0;
+            }
+        }
+
+        private const int DefaultCapacity = 64;
+        public int ByteLength { get; private set; }
+
+        public Memory<byte> Memory { get; private set; }
+        public BitBuffer BitOverhead { get; }

Review Comment:
   Same for sequential append it looks better, thus it scales good with larger 
value range
   
   Old was in mean 1183 Ticks vs 938 Ticks for the new
   
   ```csharp
   [Fact]
   public void Bench()
   {
       output.WriteLine($"Elapsed {ElapsedTicks(100000, MakeOld)}");
       output.WriteLine($"Elapsed {ElapsedTicks(100000, MakeNew)}");
   }
   
   private static void MakeNew()
   {
       var builder = new BufferBuilder(64);
   
       for (int i = 0; i < 1000; i++)
           builder.AppendBit(true);
   
       builder.Build();
   }
   
   private static void MakeOld()
   {
       var builder = new ArrowBuffer.BitmapBuilder(64);
   
       for (int i = 0; i < 1000; i++)
           builder.Append(true);
   
       builder.Build();
   }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to