This is an automated email from the ASF dual-hosted git repository. quinnj pushed a commit to branch jq-subarray-return in repository https://gitbox.apache.org/repos/asf/arrow-julia.git
commit 61c4c03af1c01dc92ed0ce5c887a392e181b9e7b Author: Jacob Quinn <[email protected]> AuthorDate: Wed May 31 22:35:31 2023 -0600 Return SubArrays when possible for arrow list types --- src/table.jl | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/table.jl b/src/table.jl index da23038..80d8fc6 100644 --- a/src/table.jl +++ b/src/table.jl @@ -625,6 +625,8 @@ function reinterp(::Type{T}, batch, buf, compression) where {T} end end +const SubVector{T, P} = SubArray{T, 1, P, Tuple{UnitRange{Int64}}, true} + function build(f::Meta.Field, L::ListTypes, batch, rb, de, nodeidx, bufferidx, convert) @debugv 2 "building array: L = $L" validity = buildbitmap(batch, rb, nodeidx, bufferidx) @@ -637,16 +639,23 @@ function build(f::Meta.Field, L::ListTypes, batch, rb, de, nodeidx, bufferidx, c bufferidx += 1 len = rb.nodes[nodeidx].length nodeidx += 1 + meta = buildmetadata(f.custom_metadata) if L isa Meta.Utf8 || L isa Meta.LargeUtf8 || L isa Meta.Binary || L isa Meta.LargeBinary buffer = rb.buffers[bufferidx] bytes, A = reinterp(UInt8, batch, buffer, rb.compression) bufferidx += 1 + T = juliaeltype(f, meta, convert) else bytes = UInt8[] A, nodeidx, bufferidx = build(f.children[1], batch, rb, de, nodeidx, bufferidx, convert) + T = juliaeltype(f, meta, convert) + # juliaeltype returns Vector for List, translate to SubArray + S = Base.nonmissingtype(T) + if S <: Vector + ST = SubVector{eltype(S), typeof(A)} + T = S == T ? ST : Union{Missing, ST} + end end - meta = buildmetadata(f.custom_metadata) - T = juliaeltype(f, meta, convert) return List{T, OT, typeof(A)}(bytes, validity, offsets, A, len, meta), nodeidx, bufferidx end
