eerhardt commented on a change in pull request #10990: URL: https://github.com/apache/arrow/pull/10990#discussion_r717712297
########## File path: csharp/test/Apache.Arrow.Tests/ArrowArrayBuilderFactoryReflector.cs ########## @@ -0,0 +1,32 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Reflection; +using Apache.Arrow.Types; + +namespace Apache.Arrow.Tests +{ + static class ArrayArrayBuilderFactoryReflector + { + private static readonly Type s_arrowArrayBuilderFactoryType = Assembly.Load("Apache.Arrow").GetType("Apache.Arrow.ArrowArrayBuilderFactory"); Review comment: Do we need to hold onto this type statically in a field? It could just be inlined into the MethodInfo call below and the field could be removed. ########## File path: csharp/src/Apache.Arrow/Arrays/ListArray.cs ########## @@ -77,7 +77,7 @@ public Builder AppendNull() public ListArray Build(MemoryAllocator allocator = default) { - Append(); + ValueOffsetsBufferBuilder.Append(ValueBuilder.Length); Review comment: Can you write a unit test for this to ensure it stays fixed? ########## File path: csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs ########## @@ -144,7 +154,7 @@ private void ReadDictionaryBatch(Flatbuf.DictionaryBatch dictionaryBatch, ByteBu if (dictionaryBatch.IsDelta) { - throw new NotImplementedException("Dictionary delta is not supported yet"); + DictionaryMemo.AddDeltaDictionary(id, arrays[0], _allocator); Review comment: Note, once my integration change #10973 is merged, you can enable the dictionary integration tests to make sure dictionaries are working across C# and the other languages. ########## File path: csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs ########## @@ -0,0 +1,240 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Memory; +using Apache.Arrow.Types; +using System; +using System.Collections.Generic; + +namespace Apache.Arrow +{ + static class ArrayDataConcatenator + { + internal static ArrayData Concatenate(IReadOnlyList<ArrayData> arrayDataList, MemoryAllocator allocator = default) + { + if (arrayDataList == null || arrayDataList.Count == 0) + { + return null; + } + + if (arrayDataList.Count == 1) + { + return arrayDataList[0]; + } + + var arrowArrayConcatenationVisitor = new ArrayDataConcatenationVisitor(arrayDataList, allocator); + + IArrowType type = arrayDataList[0].DataType; + type.Accept(arrowArrayConcatenationVisitor); + + return arrowArrayConcatenationVisitor.Result; + } + + private class ArrayDataConcatenationVisitor : + IArrowTypeVisitor<BooleanType>, + IArrowTypeVisitor<FixedWidthType>, + IArrowTypeVisitor<BinaryType>, + IArrowTypeVisitor<StringType>, + IArrowTypeVisitor<ListType>, + IArrowTypeVisitor<StructType> + { + public ArrayData Result { get; private set; } + private readonly IReadOnlyList<ArrayData> _arrayDataList; + private readonly int _totalLength; + private readonly int _totalNullCount; + private readonly MemoryAllocator _allocator; + + public ArrayDataConcatenationVisitor(IReadOnlyList<ArrayData> arrayDataList, MemoryAllocator allocator = default) + { + _arrayDataList = arrayDataList; + _allocator = allocator; + + foreach (ArrayData arrayData in _arrayDataList) + { + _totalLength += arrayData.Length; + _totalNullCount += arrayData.NullCount; + } + } + + public void Visit(BooleanType type) + { + CheckData(type, 2); + ArrowBuffer validityBuffer = ConcatenateValidityBuffer(); + ArrowBuffer valueBuffer = ConcatenateBitmapBuffer(1); + + Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, valueBuffer }); + } + + public void Visit(FixedWidthType type) + { + CheckData(type, 2); + ArrowBuffer validityBuffer = ConcatenateValidityBuffer(); + ArrowBuffer valueBuffer = ConcatenateFixedWidthTypeValueBuffer(type); + + Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, valueBuffer }); + } + + public void Visit(BinaryType type) => ConcatenateVariableBinaryArrayData(type); + + public void Visit(StringType type) => ConcatenateVariableBinaryArrayData(type); + + public void Visit(ListType type) + { + CheckData(type, 2); + ArrowBuffer validityBuffer = ConcatenateValidityBuffer(); + ArrowBuffer offsetBuffer = ConcateneteOffsetBuffer(); + ArrayData child = Concatenate(SelectChildren(0), _allocator); + + Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, offsetBuffer }, new[] { child }); + } + + public void Visit(StructType type) + { + CheckData(type, 1); + List<ArrayData> children = new List<ArrayData>(type.Fields.Count); + + for (int i = 0; i < type.Fields.Count; i++) + { + children.Add(Concatenate(SelectChildren(i), _allocator)); + } + + Result = new ArrayData(type, _arrayDataList[0].Length, _arrayDataList[0].NullCount, 0, _arrayDataList[0].Buffers, children); + } + + public void Visit(IArrowType type) + { + throw new NotImplementedException($"Concatination for {type.Name} is not supported yet."); + } + + private void CheckData(IArrowType type, int expectedBufferCount) + { + foreach (ArrayData arrayData in _arrayDataList) + { + arrayData.EnsureDataType(type.TypeId); + arrayData.EnsureBufferCount(expectedBufferCount); + } + } + + private void ConcatenateVariableBinaryArrayData(IArrowType type) + { + CheckData(type, 3); + ArrowBuffer validityBuffer = ConcatenateValidityBuffer(); + ArrowBuffer offsetBuffer = ConcateneteOffsetBuffer(); + ArrowBuffer valueBuffer = ConcatenateVariableBinaryValueBuffer(); + + Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, offsetBuffer, valueBuffer }); + } + + private ArrowBuffer ConcatenateValidityBuffer() + { + if (_totalNullCount == 0) + { + return ArrowBuffer.Empty; + } + + return ConcatenateBitmapBuffer(0); + } + + private ArrowBuffer ConcatenateBitmapBuffer(int bufferIndex) + { + var builder = new ArrowBuffer.BitmapBuilder(_totalLength); + + foreach (ArrayData arrayData in _arrayDataList) + { + int length = arrayData.Length; + ReadOnlySpan<byte> span = arrayData.Buffers[bufferIndex].Span; + + for (int i = 0; i < length; i++) + { + builder.Append(span.IsEmpty || BitUtility.GetBit(span, i)); + } + } + + return builder.Build(_allocator); + } + + private ArrowBuffer ConcatenateFixedWidthTypeValueBuffer(FixedWidthType type) + { + int typeByteWidth = type.BitWidth / 8; + var builder = new ArrowBuffer.Builder<byte>(_totalLength * typeByteWidth); + + foreach (ArrayData arrayData in _arrayDataList) + { + int length = arrayData.Length; + int byteLength = length * typeByteWidth; + + builder.Append(arrayData.Buffers[1].Span.Slice(0, byteLength)); + } + + return builder.Build(_allocator); + } + + private ArrowBuffer ConcatenateVariableBinaryValueBuffer() + { + var builder = new ArrowBuffer.Builder<byte>(); + + foreach (ArrayData arrayData in _arrayDataList) + { + int lastOffset = arrayData.Buffers[1].Span.CastTo<int>()[arrayData.Length]; + builder.Append(arrayData.Buffers[2].Span.Slice(0, lastOffset)); + } + + return builder.Build(_allocator); + } + + private ArrowBuffer ConcateneteOffsetBuffer() Review comment: ```suggestion private ArrowBuffer ConcatenateOffsetBuffer() ``` ########## File path: csharp/test/Apache.Arrow.Tests/ArrowArrayBuilderFactoryReflector.cs ########## @@ -0,0 +1,32 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Reflection; +using Apache.Arrow.Types; + +namespace Apache.Arrow.Tests +{ + static class ArrayArrayBuilderFactoryReflector + { + private static readonly Type s_arrowArrayBuilderFactoryType = Assembly.Load("Apache.Arrow").GetType("Apache.Arrow.ArrowArrayBuilderFactory"); Review comment: ```suggestion private static readonly Type s_arrowArrayBuilderFactoryType = typeof(ArrayData).Assembly.GetType("Apache.Arrow.ArrowArrayBuilderFactory"); ``` ########## File path: csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs ########## @@ -0,0 +1,240 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Memory; +using Apache.Arrow.Types; +using System; +using System.Collections.Generic; + +namespace Apache.Arrow +{ + static class ArrayDataConcatenator + { + internal static ArrayData Concatenate(IReadOnlyList<ArrayData> arrayDataList, MemoryAllocator allocator = default) + { + if (arrayDataList == null || arrayDataList.Count == 0) + { + return null; + } + + if (arrayDataList.Count == 1) + { + return arrayDataList[0]; + } + + var arrowArrayConcatenationVisitor = new ArrayDataConcatenationVisitor(arrayDataList, allocator); + + IArrowType type = arrayDataList[0].DataType; + type.Accept(arrowArrayConcatenationVisitor); + + return arrowArrayConcatenationVisitor.Result; + } + + private class ArrayDataConcatenationVisitor : + IArrowTypeVisitor<BooleanType>, + IArrowTypeVisitor<FixedWidthType>, + IArrowTypeVisitor<BinaryType>, + IArrowTypeVisitor<StringType>, + IArrowTypeVisitor<ListType>, + IArrowTypeVisitor<StructType> + { + public ArrayData Result { get; private set; } + private readonly IReadOnlyList<ArrayData> _arrayDataList; + private readonly int _totalLength; + private readonly int _totalNullCount; + private readonly MemoryAllocator _allocator; + + public ArrayDataConcatenationVisitor(IReadOnlyList<ArrayData> arrayDataList, MemoryAllocator allocator = default) + { + _arrayDataList = arrayDataList; + _allocator = allocator; + + foreach (ArrayData arrayData in _arrayDataList) + { + _totalLength += arrayData.Length; + _totalNullCount += arrayData.NullCount; + } + } + + public void Visit(BooleanType type) + { + CheckData(type, 2); + ArrowBuffer validityBuffer = ConcatenateValidityBuffer(); + ArrowBuffer valueBuffer = ConcatenateBitmapBuffer(1); + + Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, valueBuffer }); + } + + public void Visit(FixedWidthType type) + { + CheckData(type, 2); + ArrowBuffer validityBuffer = ConcatenateValidityBuffer(); + ArrowBuffer valueBuffer = ConcatenateFixedWidthTypeValueBuffer(type); + + Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, valueBuffer }); + } + + public void Visit(BinaryType type) => ConcatenateVariableBinaryArrayData(type); + + public void Visit(StringType type) => ConcatenateVariableBinaryArrayData(type); + + public void Visit(ListType type) + { + CheckData(type, 2); + ArrowBuffer validityBuffer = ConcatenateValidityBuffer(); + ArrowBuffer offsetBuffer = ConcateneteOffsetBuffer(); + ArrayData child = Concatenate(SelectChildren(0), _allocator); + + Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, offsetBuffer }, new[] { child }); + } + + public void Visit(StructType type) + { + CheckData(type, 1); + List<ArrayData> children = new List<ArrayData>(type.Fields.Count); + + for (int i = 0; i < type.Fields.Count; i++) + { + children.Add(Concatenate(SelectChildren(i), _allocator)); + } + + Result = new ArrayData(type, _arrayDataList[0].Length, _arrayDataList[0].NullCount, 0, _arrayDataList[0].Buffers, children); + } + + public void Visit(IArrowType type) + { + throw new NotImplementedException($"Concatination for {type.Name} is not supported yet."); Review comment: ```suggestion throw new NotImplementedException($"Concatenation for {type.Name} is not supported yet."); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
