This is an automated email from the ASF dual-hosted git repository.
baumgold pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-julia.git
The following commit(s) were added to refs/heads/main by this push:
new 64fc730 Compatibility of schemas with nested types (#504)
64fc730 is described below
commit 64fc730f767de84835a5f1b4fc9b7831a3c2d15b
Author: Romain Poncet <[email protected]>
AuthorDate: Sun May 5 20:07:12 2024 +0100
Compatibility of schemas with nested types (#504)
Hi,
Here is a minimal example of the issue I've encountered.
```julia
struct A
x::Int
end
struct B
a::A
end
v = [B(A(i)) for i =1:3]
io = IOBuffer()
Arrow.write(io, v; file=false)
seekstart(io)
Arrow.append(io, v) # throws
```
I don't know if this is really necessary, or if I'm not using this
library properly, but this issue makes it difficult to append to arrow
files with nested types.
Since I've only added more cases where the call to `append` can succeed,
I do not think that this creates retro-compatibility issues.
Thanks for the review!
---------
Co-authored-by: Ben Baumgold <[email protected]>
---
Project.toml | 2 +-
src/append.jl | 26 ++++++++++++++++++++++++--
test/runtests.jl | 19 +++++++++++++++++++
3 files changed, 44 insertions(+), 3 deletions(-)
diff --git a/Project.toml b/Project.toml
index b283dc9..8f0cdf7 100644
--- a/Project.toml
+++ b/Project.toml
@@ -17,7 +17,7 @@
name = "Arrow"
uuid = "69666777-d1a9-59fb-9406-91d4454c9d45"
authors = ["quinnj <[email protected]>"]
-version = "2.7.1"
+version = "2.7.2"
[deps]
ArrowTypes = "31f734f8-188a-4ce0-8406-c8a06bd891cd"
diff --git a/src/append.jl b/src/append.jl
index 5a9c259..64a638b 100644
--- a/src/append.jl
+++ b/src/append.jl
@@ -282,9 +282,31 @@ function is_equivalent_schema(sch1::Tables.Schema,
sch2::Tables.Schema)
for (t1, t2) in zip(sch1.types, sch2.types)
tt1 = Base.nonmissingtype(t1)
tt2 = Base.nonmissingtype(t2)
- if t1 == t2 ||
- (tt1 <: AbstractVector && tt2 <: AbstractVector && eltype(tt1) ==
eltype(tt2))
+ if t1 == t2
continue
+ elseif tt1 <: AbstractVector && tt2 <: AbstractVector && eltype(tt1)
== eltype(tt2)
+ continue
+ elseif isstructtype(tt1) && isstructtype(tt2)
+ is_equivalent_type_by_field(tt1, tt2)
+ else
+ return false
+ end
+ end
+ true
+end
+
+function is_equivalent_type_by_field(T1, T2)
+ n1 = fieldcount(T1)
+ n2 = fieldcount(T2)
+ n1 != n2 && return false
+
+ for i = 1:n1
+ fieldname(T1, i) == fieldname(T2, i) || return false
+
+ if fieldtype(T1, i) == fieldtype(T2, i)
+ continue
+ elseif isstructtype(T1) && isstructtype(T2)
+ is_equivalent_type_by_field(T1, T2) || continue
else
return false
end
diff --git a/test/runtests.jl b/test/runtests.jl
index ed288b3..6cf990a 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1042,5 +1042,24 @@ end
@test tbl.f[2] === Foo493(4, 5)
end
end
+
+ @testset "# 504" begin
+ struct Foo504
+ x::Int
+ end
+
+ struct Bar504
+ a::Foo504
+ end
+
+ v = [Bar504(Foo504(i)) for i = 1:3]
+ io = IOBuffer()
+ Arrow.write(io, v; file=false)
+ seekstart(io)
+ Arrow.append(io, v) # testing the compatility between the schema
of the arrow Table, and the "schema" of v (using the fallback mechanism of
Tables.jl)
+ seekstart(io)
+ t = Arrow.Table(io)
+ @test Arrow.Tables.rowcount(t) == 6
+ end
end # @testset "misc"
end