This is an automated email from the ASF dual-hosted git repository.

baumgold pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-julia.git


The following commit(s) were added to refs/heads/main by this push:
     new 64fc730  Compatibility of schemas with nested types (#504)
64fc730 is described below

commit 64fc730f767de84835a5f1b4fc9b7831a3c2d15b
Author: Romain Poncet <[email protected]>
AuthorDate: Sun May 5 20:07:12 2024 +0100

    Compatibility of schemas with nested types (#504)
    
    Hi,
    
    Here is a minimal example of the issue I've encountered.
    
    ```julia
     struct A
        x::Int
    end
    
    struct B
        a::A
    end
    
    v = [B(A(i)) for i =1:3]
    
    io = IOBuffer()
    Arrow.write(io, v; file=false)
    seekstart(io)
    Arrow.append(io, v) # throws
    ```
    
    I don't know if this is really necessary, or if I'm not using this
    library properly, but this issue makes it difficult to append to arrow
    files with nested types.
    
    Since I've only added more cases where the call to `append` can succeed,
    I do not think that this creates retro-compatibility issues.
    
    Thanks for the review!
    
    ---------
    
    Co-authored-by: Ben Baumgold <[email protected]>
---
 Project.toml     |  2 +-
 src/append.jl    | 26 ++++++++++++++++++++++++--
 test/runtests.jl | 19 +++++++++++++++++++
 3 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/Project.toml b/Project.toml
index b283dc9..8f0cdf7 100644
--- a/Project.toml
+++ b/Project.toml
@@ -17,7 +17,7 @@
 name = "Arrow"
 uuid = "69666777-d1a9-59fb-9406-91d4454c9d45"
 authors = ["quinnj <[email protected]>"]
-version = "2.7.1"
+version = "2.7.2"
 
 [deps]
 ArrowTypes = "31f734f8-188a-4ce0-8406-c8a06bd891cd"
diff --git a/src/append.jl b/src/append.jl
index 5a9c259..64a638b 100644
--- a/src/append.jl
+++ b/src/append.jl
@@ -282,9 +282,31 @@ function is_equivalent_schema(sch1::Tables.Schema, 
sch2::Tables.Schema)
     for (t1, t2) in zip(sch1.types, sch2.types)
         tt1 = Base.nonmissingtype(t1)
         tt2 = Base.nonmissingtype(t2)
-        if t1 == t2 ||
-           (tt1 <: AbstractVector && tt2 <: AbstractVector && eltype(tt1) == 
eltype(tt2))
+        if t1 == t2
             continue
+        elseif tt1 <: AbstractVector && tt2 <: AbstractVector && eltype(tt1) 
== eltype(tt2)
+            continue
+        elseif isstructtype(tt1) && isstructtype(tt2)
+            is_equivalent_type_by_field(tt1, tt2)
+        else
+            return false
+        end
+    end
+    true
+end
+
+function is_equivalent_type_by_field(T1, T2)
+    n1 = fieldcount(T1)
+    n2 = fieldcount(T2)
+    n1 != n2 && return false
+
+    for i = 1:n1
+        fieldname(T1, i) == fieldname(T2, i) || return false
+
+        if fieldtype(T1, i) == fieldtype(T2, i)
+            continue
+        elseif isstructtype(T1) && isstructtype(T2)
+            is_equivalent_type_by_field(T1, T2) || continue
         else
             return false
         end
diff --git a/test/runtests.jl b/test/runtests.jl
index ed288b3..6cf990a 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1042,5 +1042,24 @@ end
                 @test tbl.f[2] === Foo493(4, 5)
             end
         end
+
+        @testset "# 504" begin
+            struct Foo504
+                x::Int
+            end
+
+            struct Bar504
+                a::Foo504
+            end
+
+            v = [Bar504(Foo504(i)) for i = 1:3]
+            io = IOBuffer()
+            Arrow.write(io, v; file=false)
+            seekstart(io)
+            Arrow.append(io, v) # testing the compatility between the schema 
of the arrow Table, and the "schema" of v (using the fallback mechanism of 
Tables.jl)
+            seekstart(io)
+            t = Arrow.Table(io)
+            @test Arrow.Tables.rowcount(t) == 6
+        end
     end # @testset "misc"
 end

Reply via email to