[GitHub] [arrow] mr-smidge commented on a change in pull request #7671: ARROW-8344: [C#] Bug-fixes to binary array plus other improvements

2020-07-13 Thread GitBox


mr-smidge commented on a change in pull request #7671:
URL: https://github.com/apache/arrow/pull/7671#discussion_r453692211



##
File path: csharp/src/Apache.Arrow/Arrays/BinaryArray.cs
##
@@ -66,87 +66,158 @@ protected BuilderBase(IArrowType dataType)
 ValueOffsets = new ArrowBuffer.Builder();
 ValueBuffer = new ArrowBuffer.Builder();
 ValidityBuffer = new ArrowBuffer.BitmapBuilder();
+
+// From the docs:
+//
+// The offsets buffer contains length + 1 signed integers 
(either 32-bit or 64-bit, depending on the
+// logical type), which encode the start position of each slot 
in the data buffer. The length of the
+// value in each slot is computed using the difference between 
the offset at that slot’s index and the
+// subsequent offset.
+//
+// In this builder, we choose to append the first offset 
(zero) upon construction, and each trailing
+// offset is then added after each individual item has been 
appended.
+ValueOffsets.Append(this.Offset);
 }
 
 protected abstract TArray Build(ArrayData data);
 
-public int Length => ValueOffsets.Length;
+/// 
+/// Gets the length of the array built so far.
+/// 
+public int Length => ValueOffsets.Length - 1;
 
+/// 
+/// Build an Arrow array from the appended contents so far.
+/// 
+/// Optional memory allocator.
+/// Returns an array of type .
 public TArray Build(MemoryAllocator allocator = default)
 {
-ValueOffsets.Append(Offset);
-
-ArrowBuffer validityBuffer = NullCount > 0
-? ValidityBuffer.Build(allocator)
-: ArrowBuffer.Empty;
-
-var data = new ArrayData(DataType, ValueOffsets.Length - 1, 
NullCount, 0,
-new[] { validityBuffer, ValueOffsets.Build(allocator), 
ValueBuffer.Build(allocator) });
+var bufs = new[]
+{
+NullCount > 0 ? ValidityBuffer.Build(allocator) : 
ArrowBuffer.Empty,
+ValueOffsets.Build(allocator),
+ValueBuffer.Build(allocator),
+};
+var data = new ArrayData(
+DataType,
+length: ValueOffsets.Length - 1,

Review comment:
   Yes it can - changed.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] mr-smidge commented on a change in pull request #7671: ARROW-8344: [C#] Bug-fixes to binary array plus other improvements

2020-07-13 Thread GitBox


mr-smidge commented on a change in pull request #7671:
URL: https://github.com/apache/arrow/pull/7671#discussion_r453690865



##
File path: csharp/src/Apache.Arrow/Arrays/BinaryArray.cs
##
@@ -66,87 +66,158 @@ protected BuilderBase(IArrowType dataType)
 ValueOffsets = new ArrowBuffer.Builder();
 ValueBuffer = new ArrowBuffer.Builder();
 ValidityBuffer = new ArrowBuffer.BitmapBuilder();
+
+// From the docs:
+//
+// The offsets buffer contains length + 1 signed integers 
(either 32-bit or 64-bit, depending on the
+// logical type), which encode the start position of each slot 
in the data buffer. The length of the
+// value in each slot is computed using the difference between 
the offset at that slot’s index and the
+// subsequent offset.
+//
+// In this builder, we choose to append the first offset 
(zero) upon construction, and each trailing
+// offset is then added after each individual item has been 
appended.
+ValueOffsets.Append(this.Offset);
 }
 
 protected abstract TArray Build(ArrayData data);
 
-public int Length => ValueOffsets.Length;
+/// 
+/// Gets the length of the array built so far.
+/// 
+public int Length => ValueOffsets.Length - 1;
 
+/// 
+/// Build an Arrow array from the appended contents so far.
+/// 
+/// Optional memory allocator.
+/// Returns an array of type .
 public TArray Build(MemoryAllocator allocator = default)
 {
-ValueOffsets.Append(Offset);
-
-ArrowBuffer validityBuffer = NullCount > 0
-? ValidityBuffer.Build(allocator)
-: ArrowBuffer.Empty;
-
-var data = new ArrayData(DataType, ValueOffsets.Length - 1, 
NullCount, 0,
-new[] { validityBuffer, ValueOffsets.Build(allocator), 
ValueBuffer.Build(allocator) });
+var bufs = new[]
+{
+NullCount > 0 ? ValidityBuffer.Build(allocator) : 
ArrowBuffer.Empty,
+ValueOffsets.Build(allocator),
+ValueBuffer.Build(allocator),
+};
+var data = new ArrayData(
+DataType,
+length: ValueOffsets.Length - 1,
+NullCount,
+offset: 0,
+bufs);
 
 return Build(data);
 }
 
+/// 
+/// Append a single null value to the array.
+/// 
+/// Returns the builder (for fluent-style 
composition).
 public TBuilder AppendNull()
 {
-ValueOffsets.Append(Offset);
+// Do not add to the value buffer in the case of a null.
+// Note that we do not need to increment the offset as a 
result.
 ValidityBuffer.Append(false);
+ValueOffsets.Append(Offset);
 return Instance;
 }
 
+/// 
+/// Appends a value, consisting of a single byte, to the array.
+/// 
+/// Byte value to append.
+/// Returns the builder (for fluent-style 
composition).
 public TBuilder Append(byte value)
 {
-ValueOffsets.Append(Offset);
 ValueBuffer.Append(value);
-Offset++;
 ValidityBuffer.Append(true);
+Offset++;
+ValueOffsets.Append(Offset);
 return Instance;
 }
 
+/// 
+/// Append a value, consisting of a span of bytes, to the array.
+/// 
+/// 
+/// Note that a single value is added, which consists of 
arbitrarily many bytes.  If multiple values are
+/// to be added, use the  method.
+/// 
+/// Span of bytes to add.
+/// Returns the builder (for fluent-style 
composition).
 public TBuilder Append(ReadOnlySpan span)
 {
-ValueOffsets.Append(Offset);
 ValueBuffer.Append(span);
 ValidityBuffer.Append(true);
 Offset += span.Length;
+ValueOffsets.Append(Offset);
 return Instance;
 }
 
-public TBuilder AppendRange(IEnumerable values)
+/// 
+/// Append a value, consisting of an enumerable collection of 
bytes, to the array.
+/// 
+/// 
+/// Note that this method appends a single value, which may 
consist of arbitrarily many bytes.  If multiple
+ 

[GitHub] [arrow] mr-smidge commented on a change in pull request #7671: ARROW-8344: [C#] Bug-fixes to binary array plus other improvements

2020-07-13 Thread GitBox


mr-smidge commented on a change in pull request #7671:
URL: https://github.com/apache/arrow/pull/7671#discussion_r453690625



##
File path: csharp/src/Apache.Arrow/Arrays/BinaryArray.cs
##
@@ -66,87 +66,158 @@ protected BuilderBase(IArrowType dataType)
 ValueOffsets = new ArrowBuffer.Builder();
 ValueBuffer = new ArrowBuffer.Builder();
 ValidityBuffer = new ArrowBuffer.BitmapBuilder();
+
+// From the docs:
+//
+// The offsets buffer contains length + 1 signed integers 
(either 32-bit or 64-bit, depending on the
+// logical type), which encode the start position of each slot 
in the data buffer. The length of the
+// value in each slot is computed using the difference between 
the offset at that slot’s index and the
+// subsequent offset.
+//
+// In this builder, we choose to append the first offset 
(zero) upon construction, and each trailing
+// offset is then added after each individual item has been 
appended.
+ValueOffsets.Append(this.Offset);
 }
 
 protected abstract TArray Build(ArrayData data);
 
-public int Length => ValueOffsets.Length;
+/// 
+/// Gets the length of the array built so far.
+/// 
+public int Length => ValueOffsets.Length - 1;
 
+/// 
+/// Build an Arrow array from the appended contents so far.
+/// 
+/// Optional memory allocator.
+/// Returns an array of type .
 public TArray Build(MemoryAllocator allocator = default)
 {
-ValueOffsets.Append(Offset);
-
-ArrowBuffer validityBuffer = NullCount > 0
-? ValidityBuffer.Build(allocator)
-: ArrowBuffer.Empty;
-
-var data = new ArrayData(DataType, ValueOffsets.Length - 1, 
NullCount, 0,
-new[] { validityBuffer, ValueOffsets.Build(allocator), 
ValueBuffer.Build(allocator) });
+var bufs = new[]
+{
+NullCount > 0 ? ValidityBuffer.Build(allocator) : 
ArrowBuffer.Empty,
+ValueOffsets.Build(allocator),
+ValueBuffer.Build(allocator),
+};
+var data = new ArrayData(
+DataType,
+length: ValueOffsets.Length - 1,
+NullCount,
+offset: 0,
+bufs);
 
 return Build(data);
 }
 
+/// 
+/// Append a single null value to the array.
+/// 
+/// Returns the builder (for fluent-style 
composition).
 public TBuilder AppendNull()
 {
-ValueOffsets.Append(Offset);
+// Do not add to the value buffer in the case of a null.
+// Note that we do not need to increment the offset as a 
result.
 ValidityBuffer.Append(false);
+ValueOffsets.Append(Offset);
 return Instance;
 }
 
+/// 
+/// Appends a value, consisting of a single byte, to the array.
+/// 
+/// Byte value to append.
+/// Returns the builder (for fluent-style 
composition).
 public TBuilder Append(byte value)
 {
-ValueOffsets.Append(Offset);
 ValueBuffer.Append(value);
-Offset++;
 ValidityBuffer.Append(true);
+Offset++;
+ValueOffsets.Append(Offset);
 return Instance;
 }
 
+/// 
+/// Append a value, consisting of a span of bytes, to the array.
+/// 
+/// 
+/// Note that a single value is added, which consists of 
arbitrarily many bytes.  If multiple values are
+/// to be added, use the  method.
+/// 
+/// Span of bytes to add.
+/// Returns the builder (for fluent-style 
composition).
 public TBuilder Append(ReadOnlySpan span)
 {
-ValueOffsets.Append(Offset);
 ValueBuffer.Append(span);
 ValidityBuffer.Append(true);
 Offset += span.Length;
+ValueOffsets.Append(Offset);
 return Instance;
 }
 
-public TBuilder AppendRange(IEnumerable values)
+/// 
+/// Append a value, consisting of an enumerable collection of 
bytes, to the array.
+/// 
+/// 
+/// Note that this method appends a single value, which may 
consist of arbitrarily many bytes.  If multiple
+ 

[GitHub] [arrow] mr-smidge commented on a change in pull request #7671: ARROW-8344: [C#] Bug-fixes to binary array plus other improvements

2020-07-13 Thread GitBox


mr-smidge commented on a change in pull request #7671:
URL: https://github.com/apache/arrow/pull/7671#discussion_r453690533



##
File path: csharp/src/Apache.Arrow/Arrays/BinaryArray.cs
##
@@ -66,87 +66,158 @@ protected BuilderBase(IArrowType dataType)
 ValueOffsets = new ArrowBuffer.Builder();
 ValueBuffer = new ArrowBuffer.Builder();
 ValidityBuffer = new ArrowBuffer.BitmapBuilder();
+
+// From the docs:
+//
+// The offsets buffer contains length + 1 signed integers 
(either 32-bit or 64-bit, depending on the
+// logical type), which encode the start position of each slot 
in the data buffer. The length of the
+// value in each slot is computed using the difference between 
the offset at that slot’s index and the
+// subsequent offset.
+//
+// In this builder, we choose to append the first offset 
(zero) upon construction, and each trailing
+// offset is then added after each individual item has been 
appended.
+ValueOffsets.Append(this.Offset);
 }
 
 protected abstract TArray Build(ArrayData data);
 
-public int Length => ValueOffsets.Length;
+/// 
+/// Gets the length of the array built so far.
+/// 
+public int Length => ValueOffsets.Length - 1;
 
+/// 
+/// Build an Arrow array from the appended contents so far.
+/// 
+/// Optional memory allocator.
+/// Returns an array of type .
 public TArray Build(MemoryAllocator allocator = default)
 {
-ValueOffsets.Append(Offset);
-
-ArrowBuffer validityBuffer = NullCount > 0
-? ValidityBuffer.Build(allocator)
-: ArrowBuffer.Empty;
-
-var data = new ArrayData(DataType, ValueOffsets.Length - 1, 
NullCount, 0,
-new[] { validityBuffer, ValueOffsets.Build(allocator), 
ValueBuffer.Build(allocator) });
+var bufs = new[]
+{
+NullCount > 0 ? ValidityBuffer.Build(allocator) : 
ArrowBuffer.Empty,
+ValueOffsets.Build(allocator),
+ValueBuffer.Build(allocator),
+};
+var data = new ArrayData(
+DataType,
+length: ValueOffsets.Length - 1,
+NullCount,
+offset: 0,
+bufs);
 
 return Build(data);
 }
 
+/// 
+/// Append a single null value to the array.
+/// 
+/// Returns the builder (for fluent-style 
composition).
 public TBuilder AppendNull()
 {
-ValueOffsets.Append(Offset);
+// Do not add to the value buffer in the case of a null.
+// Note that we do not need to increment the offset as a 
result.
 ValidityBuffer.Append(false);
+ValueOffsets.Append(Offset);
 return Instance;
 }
 
+/// 
+/// Appends a value, consisting of a single byte, to the array.
+/// 
+/// Byte value to append.
+/// Returns the builder (for fluent-style 
composition).
 public TBuilder Append(byte value)
 {
-ValueOffsets.Append(Offset);
 ValueBuffer.Append(value);
-Offset++;
 ValidityBuffer.Append(true);
+Offset++;
+ValueOffsets.Append(Offset);
 return Instance;
 }
 
+/// 
+/// Append a value, consisting of a span of bytes, to the array.
+/// 
+/// 
+/// Note that a single value is added, which consists of 
arbitrarily many bytes.  If multiple values are
+/// to be added, use the  method.
+/// 
+/// Span of bytes to add.
+/// Returns the builder (for fluent-style 
composition).
 public TBuilder Append(ReadOnlySpan span)
 {
-ValueOffsets.Append(Offset);
 ValueBuffer.Append(span);
 ValidityBuffer.Append(true);
 Offset += span.Length;
+ValueOffsets.Append(Offset);
 return Instance;
 }
 
-public TBuilder AppendRange(IEnumerable values)
+/// 
+/// Append a value, consisting of an enumerable collection of 
bytes, to the array.
+/// 
+/// 
+/// Note that this method appends a single value, which may 
consist of arbitrarily many bytes.  If multiple
+ 

[GitHub] [arrow] mr-smidge commented on a change in pull request #7671: ARROW-8344: [C#] Bug-fixes to binary array plus other improvements

2020-07-07 Thread GitBox


mr-smidge commented on a change in pull request #7671:
URL: https://github.com/apache/arrow/pull/7671#discussion_r451216615



##
File path: csharp/src/Apache.Arrow/Arrays/BinaryArray.cs
##
@@ -237,7 +329,9 @@ public ReadOnlySpan GetBytes(int index)
 
 if (IsNull(index))
 {
-return null;
+// Note that `return null;` is valid syntax, but would be 
misleading as `null` in the context of a span
+// is actually returned as an empty span.
+return ReadOnlySpan.Empty;

Review comment:
   This clause previously said `return null;`, which was misleading as it 
suggested that the method could be used to identify null values in the array.  
However, `null` in the context of a `ReadOnlySpan` is the same as the empty 
span, and so the method can't distinguish null values from empty ones:
   
   ```csharp
   Assert.True(ReadOnlySpan.Empty == null)
   ```
   
   The new code is clearer in intent.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] mr-smidge commented on a change in pull request #7671: ARROW-8344: [C#] Bug-fixes to binary array plus other improvements

2020-07-07 Thread GitBox


mr-smidge commented on a change in pull request #7671:
URL: https://github.com/apache/arrow/pull/7671#discussion_r451215646



##
File path: csharp/src/Apache.Arrow/Arrays/BinaryArray.cs
##
@@ -173,11 +245,19 @@ public TBuilder Set(int index, byte value)
 throw new NotImplementedException();
 }
 
+/// 
+/// Clear all contents appended so far.
+/// 
+/// Returns the builder (for fluent-style 
composition).
 public TBuilder Clear()
 {
 ValueOffsets.Clear();
 ValueBuffer.Clear();
 ValidityBuffer.Clear();
+
+// Always write the first offset before anything has been 
written.
+Offset = 0;

Review comment:
   Not resetting the `Offset` member variable was the root cause of the 
string-clearing bug referenced in ARROW-8344.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] mr-smidge commented on a change in pull request #7671: ARROW-8344: [C#] Bug-fixes to binary array plus other improvements

2020-07-07 Thread GitBox


mr-smidge commented on a change in pull request #7671:
URL: https://github.com/apache/arrow/pull/7671#discussion_r451215414



##
File path: csharp/src/Apache.Arrow/Arrays/BinaryArray.cs
##
@@ -66,87 +66,158 @@ protected BuilderBase(IArrowType dataType)
 ValueOffsets = new ArrowBuffer.Builder();
 ValueBuffer = new ArrowBuffer.Builder();
 ValidityBuffer = new ArrowBuffer.BitmapBuilder();
+
+// From the docs:
+//
+// The offsets buffer contains length + 1 signed integers 
(either 32-bit or 64-bit, depending on the
+// logical type), which encode the start position of each slot 
in the data buffer. The length of the
+// value in each slot is computed using the difference between 
the offset at that slot’s index and the
+// subsequent offset.
+//
+// In this builder, we choose to append the first offset 
(zero) upon construction, and each trailing
+// offset is then added after each individual item has been 
appended.
+ValueOffsets.Append(this.Offset);
 }
 
 protected abstract TArray Build(ArrayData data);
 
-public int Length => ValueOffsets.Length;
+/// 
+/// Gets the length of the array built so far.
+/// 
+public int Length => ValueOffsets.Length - 1;
 
+/// 
+/// Build an Arrow array from the appended contents so far.
+/// 
+/// Optional memory allocator.
+/// Returns an array of type .
 public TArray Build(MemoryAllocator allocator = default)
 {
-ValueOffsets.Append(Offset);
-
-ArrowBuffer validityBuffer = NullCount > 0
-? ValidityBuffer.Build(allocator)
-: ArrowBuffer.Empty;
-
-var data = new ArrayData(DataType, ValueOffsets.Length - 1, 
NullCount, 0,
-new[] { validityBuffer, ValueOffsets.Build(allocator), 
ValueBuffer.Build(allocator) });
+var bufs = new[]
+{
+NullCount > 0 ? ValidityBuffer.Build(allocator) : 
ArrowBuffer.Empty,
+ValueOffsets.Build(allocator),
+ValueBuffer.Build(allocator),
+};
+var data = new ArrayData(
+DataType,
+length: ValueOffsets.Length - 1,
+NullCount,
+offset: 0,
+bufs);
 
 return Build(data);
 }
 
+/// 
+/// Append a single null value to the array.
+/// 
+/// Returns the builder (for fluent-style 
composition).
 public TBuilder AppendNull()
 {
-ValueOffsets.Append(Offset);
+// Do not add to the value buffer in the case of a null.
+// Note that we do not need to increment the offset as a 
result.
 ValidityBuffer.Append(false);
+ValueOffsets.Append(Offset);
 return Instance;
 }
 
+/// 
+/// Appends a value, consisting of a single byte, to the array.
+/// 
+/// Byte value to append.
+/// Returns the builder (for fluent-style 
composition).
 public TBuilder Append(byte value)
 {
-ValueOffsets.Append(Offset);
 ValueBuffer.Append(value);
-Offset++;
 ValidityBuffer.Append(true);
+Offset++;
+ValueOffsets.Append(Offset);
 return Instance;
 }
 
+/// 
+/// Append a value, consisting of a span of bytes, to the array.
+/// 
+/// 
+/// Note that a single value is added, which consists of 
arbitrarily many bytes.  If multiple values are
+/// to be added, use the  method.
+/// 
+/// Span of bytes to add.
+/// Returns the builder (for fluent-style 
composition).
 public TBuilder Append(ReadOnlySpan span)
 {
-ValueOffsets.Append(Offset);
 ValueBuffer.Append(span);
 ValidityBuffer.Append(true);
 Offset += span.Length;
+ValueOffsets.Append(Offset);
 return Instance;
 }
 
-public TBuilder AppendRange(IEnumerable values)
+/// 
+/// Append a value, consisting of an enumerable collection of 
bytes, to the array.
+/// 
+/// 
+/// Note that this method appends a single value, which may 
consist of arbitrarily many bytes.  If multiple
+ 

[GitHub] [arrow] mr-smidge commented on a change in pull request #7671: ARROW-8344: [C#] Bug-fixes to binary array plus other improvements

2020-07-07 Thread GitBox


mr-smidge commented on a change in pull request #7671:
URL: https://github.com/apache/arrow/pull/7671#discussion_r451214556



##
File path: csharp/src/Apache.Arrow/Arrays/BinaryArray.cs
##
@@ -66,87 +66,158 @@ protected BuilderBase(IArrowType dataType)
 ValueOffsets = new ArrowBuffer.Builder();
 ValueBuffer = new ArrowBuffer.Builder();
 ValidityBuffer = new ArrowBuffer.BitmapBuilder();
+
+// From the docs:
+//
+// The offsets buffer contains length + 1 signed integers 
(either 32-bit or 64-bit, depending on the
+// logical type), which encode the start position of each slot 
in the data buffer. The length of the
+// value in each slot is computed using the difference between 
the offset at that slot’s index and the
+// subsequent offset.
+//
+// In this builder, we choose to append the first offset 
(zero) upon construction, and each trailing
+// offset is then added after each individual item has been 
appended.
+ValueOffsets.Append(this.Offset);

Review comment:
   For an array with N items, there need to be N+1 offset values written.
   
   Previously, an offset was written with each `Append*()` call, with the extra 
one written when calling `Build()`.  This PR flips this around to add the extra 
offset upon construction (or calling `Clear()`), with the others written 
post-hoc during `Append*()` calls.  As such, `Build()` is now idempotent.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org