This is an automated email from the ASF dual-hosted git repository.
quinnj pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-julia.git
The following commit(s) were added to refs/heads/main by this push:
new de59e48 Fix bug where inline bytes weren't valid for Utf8View data
(#548)
de59e48 is described below
commit de59e485a4197b5e70537e3572a31734d3f9e4a9
Author: Jacob Quinn <[email protected]>
AuthorDate: Fri May 9 14:23:48 2025 -0600
Fix bug where inline bytes weren't valid for Utf8View data (#548)
Fixed a follow up bug to #511
---
src/table.jl | 3 ++-
test/reject_reason_trimmed.arrow | Bin 0 -> 1144 bytes
test/runtests.jl | 10 ++++++++++
3 files changed, 12 insertions(+), 1 deletion(-)
diff --git a/src/table.jl b/src/table.jl
index dc367d2..fe9206b 100644
--- a/src/table.jl
+++ b/src/table.jl
@@ -881,7 +881,8 @@ function build(
validity = buildbitmap(batch, rb, nodeidx, bufferidx)
bufferidx += 1
buffer = rb.buffers[bufferidx]
- inline, views = reinterp(ViewElement, batch, buffer, rb.compression)
+ _, views = reinterp(ViewElement, batch, buffer, rb.compression)
+ inline = reinterpret(UInt8, views) # reuse the (possibly realigned)
memory backing `views`
bufferidx += 1
buffers = Vector{UInt8}[]
for i = 1:rb.variadicBufferCounts[varbufferidx]
diff --git a/test/reject_reason_trimmed.arrow b/test/reject_reason_trimmed.arrow
new file mode 100644
index 0000000..b6ac143
Binary files /dev/null and b/test/reject_reason_trimmed.arrow differ
diff --git a/test/runtests.jl b/test/runtests.jl
index d720fa0..1b3418e 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1069,6 +1069,16 @@ end
tbl = Arrow.Table(Arrow.tobuffer(tt[2]))
@test tbl.col16[1] == Dates.Time(0, 0, 0)
end
+
+ @testset "#511: Bug in reading Utf8View data" begin
+ t = Arrow.Table(
+ joinpath(
+ dirname(pathof(Arrow)),
+ "../test/reject_reason_trimmed.arrow",
+ ),
+ )
+ @test t.reject_reason[end] == "POST_ONLY"
+ end
end # @testset "misc"
@testset "DataAPI.metadata" begin