This is an automated email from the ASF dual-hosted git repository.

kevingurney pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 152be67100 GH-37041: [MATLAB] Implement Feather V1 Reader using new 
MATLAB Interface APIs (#37044)
152be67100 is described below

commit 152be67100cdd367a3e6064988085e3b327ad0fe
Author: Kevin Gurney <[email protected]>
AuthorDate: Mon Aug 7 16:26:16 2023 -0400

    GH-37041: [MATLAB] Implement Feather V1 Reader using new MATLAB Interface 
APIs (#37044)
    
    ### Rationale for this change
    
    Now that we've have the basic building blocks for tabular IO in the MATLAB 
Interface (Array, Schema, RecordBatch), we can implement a Feather V1 reader in 
terms of the new APIs.
    
    This is a follow up to #37043, where a new Feather V1 internal `Writer` 
object was added.
    
    ### What changes are included in this PR?
    
    1. Added a new class called arrow.internal.io.feather.Reader which can be 
used to read Feather V1 files. It has one public property named `Filename` and 
one public method named `read`.
    
    **Example Usage:**
    
    ```matlab
    >> T = array2table(rand(3))
    
    T =
    
      3x3 table
    
         Var1        Var2       Var3
        _______    ________    _______
    
        0.79221    0.035712    0.67874
        0.95949     0.84913    0.75774
        0.65574     0.93399    0.74313
    
    >> filename = "test.feather";
    
    >> featherwrite(filename, T)
    
    >> reader = arrow.internal.io.feather.Reader(filename)
    
    reader =
    
      Reader with properties:
    
        Filename: "test.feather"
    
    >> T = reader.read()
    
    T =
    
      3x3 table
    
         Var1        Var2       Var3
        _______    ________    _______
    
        0.79221    0.035712    0.67874
        0.95949     0.84913    0.75774
        0.65574     0.93399    0.74313
    ```
    
    ### Are these changes tested?
    
    Yes.
    
    1. Added `Reader` to `feather/tRoundTrip.m`.
    
    ### Are there any user-facing changes?
    
    No.
    
    These are only internal objects right now.
    
    ### Future Directions
    
    1. Re-implement `featherread` in terms of the new `Reader` object.
    2. Remove legacy feather code and infrastructure.
    
    ### Notes
    
    1. For conciseness, I renamed the C++ Proxy class `FeatherWriter` to 
`Writer` since it is already inside of a `feather` namespace / "package".
    * Closes: #37041
    
    Authored-by: Kevin Gurney <[email protected]>
    Signed-off-by: Kevin Gurney <[email protected]>
---
 matlab/src/cpp/arrow/matlab/error/error.h          |  6 ++
 .../cpp/arrow/matlab/io/feather/proxy/reader.cc    | 98 ++++++++++++++++++++++
 .../feather/proxy/{feather_writer.h => reader.h}   | 20 ++---
 .../feather/proxy/{feather_writer.cc => writer.cc} | 16 ++--
 .../feather/proxy/{feather_writer.h => writer.h}   |  6 +-
 matlab/src/cpp/arrow/matlab/proxy/factory.cc       |  6 +-
 .../+internal/+io/+feather/{Writer.m => Reader.m}  | 30 ++++---
 .../matlab/+arrow/+internal/+io/+feather/Writer.m  |  4 +-
 matlab/test/arrow/io/feather/tRoundTrip.m          |  5 ++
 matlab/tools/cmake/BuildMatlabArrowInterface.cmake |  4 +-
 10 files changed, 154 insertions(+), 41 deletions(-)

diff --git a/matlab/src/cpp/arrow/matlab/error/error.h 
b/matlab/src/cpp/arrow/matlab/error/error.h
index e1d2982f28..deac5e26fc 100644
--- a/matlab/src/cpp/arrow/matlab/error/error.h
+++ b/matlab/src/cpp/arrow/matlab/error/error.h
@@ -181,7 +181,13 @@ namespace arrow::matlab::error {
     static const char* RECORD_BATCH_NUMERIC_INDEX_WITH_EMPTY_RECORD_BATCH = 
"arrow:tabular:recordbatch:NumericIndexWithEmptyRecordBatch";
     static const char* RECORD_BATCH_INVALID_NUMERIC_COLUMN_INDEX = 
"arrow:tabular:recordbatch:InvalidNumericColumnIndex";
     static const char* FAILED_TO_OPEN_FILE_FOR_WRITE = 
"arrow:io:FailedToOpenFileForWrite";
+    static const char* FAILED_TO_OPEN_FILE_FOR_READ = 
"arrow:io:FailedToOpenFileForRead";
     static const char* FEATHER_FAILED_TO_WRITE_TABLE = 
"arrow:io:feather:FailedToWriteTable";
     static const char* TABLE_FROM_RECORD_BATCH = "arrow:table:FromRecordBatch";
+    static const char* FEATHER_FAILED_TO_CREATE_READER = 
"arrow:io:feather:FailedToCreateReader";
+    static const char* FEATHER_VERSION_2 = "arrow:io:feather:FeatherVersion2";
+    static const char* FEATHER_VERSION_UNKNOWN = 
"arrow:io:feather:FeatherVersionUnknown";
+    static const char* FEATHER_FAILED_TO_READ_TABLE = 
"arrow:io:feather:FailedToReadTable";
+    static const char* FEATHER_FAILED_TO_READ_RECORD_BATCH = 
"arrow:io:feather:FailedToReadRecordBatch";
 
 }
diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.cc 
b/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.cc
new file mode 100644
index 0000000000..a264d24ecb
--- /dev/null
+++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.cc
@@ -0,0 +1,98 @@
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "libmexclass/proxy/ProxyManager.h"
+
+#include "arrow/matlab/error/error.h"
+#include "arrow/matlab/io/feather/proxy/reader.h"
+#include "arrow/matlab/tabular/proxy/record_batch.h"
+
+#include "arrow/util/utf8.h"
+
+#include "arrow/result.h"
+
+#include "arrow/io/file.h"
+#include "arrow/ipc/feather.h"
+#include "arrow/table.h"
+
+namespace arrow::matlab::io::feather::proxy {
+
+    Reader::Reader(const std::string& filename) : filename{filename} {
+        REGISTER_METHOD(Reader, read);
+        REGISTER_METHOD(Reader, getFilename);
+    }
+
+    libmexclass::proxy::MakeResult Reader::make(const 
libmexclass::proxy::FunctionArguments& constructor_arguments) {
+        namespace mda = ::matlab::data;
+        using ReaderProxy = arrow::matlab::io::feather::proxy::Reader;
+
+        mda::StructArray args = constructor_arguments[0];
+        const mda::StringArray filename_utf16_mda = args[0]["Filename"];
+        const auto filename_utf16 = std::u16string(filename_utf16_mda[0]);
+        MATLAB_ASSIGN_OR_ERROR(const auto filename, 
arrow::util::UTF16StringToUTF8(filename_utf16), 
error::UNICODE_CONVERSION_ERROR_ID);
+
+        return std::make_shared<ReaderProxy>(filename);
+    }
+
+    void Reader::read(libmexclass::proxy::method::Context& context) {
+        namespace mda = ::matlab::data;
+        using namespace libmexclass::proxy;
+        using RecordBatchProxy = arrow::matlab::tabular::proxy::RecordBatch;
+
+        mda::ArrayFactory factory;
+
+        // Create a file input stream.
+        MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto source, 
arrow::io::ReadableFile::Open(filename, arrow::default_memory_pool()), context, 
error::FAILED_TO_OPEN_FILE_FOR_READ);
+
+        // Create a Reader from the file input stream.
+        MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto reader, 
arrow::ipc::feather::Reader::Open(source), context, 
error::FEATHER_FAILED_TO_CREATE_READER);
+
+        // Error if not Feather V1.
+        const auto version = reader->version();
+        if (version == ipc::feather::kFeatherV2Version) {
+            
MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(Status::NotImplemented("Support for Feather 
V2 has not been implemented."), context, error::FEATHER_VERSION_2);
+        } else if (version != ipc::feather::kFeatherV1Version) {
+            MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(Status::Invalid("Unknown 
Feather format version."), context, error::FEATHER_VERSION_UNKNOWN);
+        }
+
+        // Read a Table from the file.
+        std::shared_ptr<arrow::Table> table = nullptr;
+        MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(reader->Read(&table), context, 
error::FEATHER_FAILED_TO_READ_TABLE);
+
+        // Get the first RecordBatch from the Table.
+        arrow::TableBatchReader table_batch_reader{table};
+        std::shared_ptr<arrow::RecordBatch> record_batch = nullptr;
+        
MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(table_batch_reader.ReadNext(&record_batch), 
context, error::FEATHER_FAILED_TO_READ_RECORD_BATCH);
+
+        // Create a Proxy from the first RecordBatch.
+        auto record_batch_proxy = 
std::make_shared<RecordBatchProxy>(record_batch);
+        const auto record_batch_proxy_id = 
ProxyManager::manageProxy(record_batch_proxy);
+
+        const auto record_batch_proxy_id_mda = 
factory.createScalar(record_batch_proxy_id);
+
+        context.outputs[0] = record_batch_proxy_id_mda;
+    }
+
+    void Reader::getFilename(libmexclass::proxy::method::Context& context) {
+        namespace mda = ::matlab::data;
+        mda::ArrayFactory factory;
+
+        MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto filename_utf16, 
arrow::util::UTF8StringToUTF16(filename), context, 
error::UNICODE_CONVERSION_ERROR_ID);
+        auto filename_utf16_mda = factory.createScalar(filename_utf16);
+        context.outputs[0] = filename_utf16_mda;
+    }
+
+}
diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h 
b/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.h
similarity index 73%
copy from matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h
copy to matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.h
index dadb479887..fb6c06de86 100644
--- a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h
+++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/reader.h
@@ -17,25 +17,23 @@
 
 #pragma once
 
-#include "arrow/status.h"
-
 #include "libmexclass/proxy/Proxy.h"
 
 namespace arrow::matlab::io::feather::proxy {
 
-    class FeatherWriter : public libmexclass::proxy::Proxy {
+    class Reader : public libmexclass::proxy::Proxy {
         public:
-            FeatherWriter(const std::string& filename);
-        
-            ~FeatherWriter() {}
+            Reader(const std::string& filename);
+
+            virtual ~Reader() {}
+
+            static libmexclass::proxy::MakeResult make(const 
libmexclass::proxy::FunctionArguments& constructor_arguments);
 
-            static libmexclass::proxy::MakeResult make(const 
libmexclass::proxy::FunctionArguments& constructor_arguments);  
-        
         protected:
+            void read(libmexclass::proxy::method::Context& context);
             void getFilename(libmexclass::proxy::method::Context& context);
-            void write(libmexclass::proxy::method::Context& context);
 
-        private:
-            const std::string filename; 
+            const std::string filename;
     };
+
 }
diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc 
b/matlab/src/cpp/arrow/matlab/io/feather/proxy/writer.cc
similarity index 86%
rename from matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc
rename to matlab/src/cpp/arrow/matlab/io/feather/proxy/writer.cc
index a27e1fb0e6..c71c9ae7a5 100644
--- a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc
+++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/writer.cc
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "arrow/matlab/io/feather/proxy/feather_writer.h"
+#include "arrow/matlab/io/feather/proxy/writer.h"
 #include "arrow/matlab/tabular/proxy/record_batch.h"
 #include "arrow/matlab/error/error.h"
 
@@ -30,12 +30,12 @@
 
 namespace arrow::matlab::io::feather::proxy {
 
-    FeatherWriter::FeatherWriter(const std::string& filename) : 
filename{filename} {
-        REGISTER_METHOD(FeatherWriter, getFilename);
-        REGISTER_METHOD(FeatherWriter, write);
+    Writer::Writer(const std::string& filename) : filename{filename} {
+        REGISTER_METHOD(Writer, getFilename);
+        REGISTER_METHOD(Writer, write);
     }
 
-    libmexclass::proxy::MakeResult FeatherWriter::make(const 
libmexclass::proxy::FunctionArguments& constructor_arguments) {
+    libmexclass::proxy::MakeResult Writer::make(const 
libmexclass::proxy::FunctionArguments& constructor_arguments) {
         namespace mda = ::matlab::data;
         mda::StructArray opts = constructor_arguments[0];
         const mda::StringArray filename_mda = opts[0]["Filename"];
@@ -45,10 +45,10 @@ namespace arrow::matlab::io::feather::proxy {
                                arrow::util::UTF16StringToUTF8(filename_utf16),
                                error::UNICODE_CONVERSION_ERROR_ID);
         
-        return std::make_shared<FeatherWriter>(filename_utf8);
+        return std::make_shared<Writer>(filename_utf8);
     }
 
-    void FeatherWriter::getFilename(libmexclass::proxy::method::Context& 
context) {
+    void Writer::getFilename(libmexclass::proxy::method::Context& context) {
         namespace mda = ::matlab::data;
         MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto utf16_filename,
                                             
arrow::util::UTF8StringToUTF16(filename), 
@@ -59,7 +59,7 @@ namespace arrow::matlab::io::feather::proxy {
         context.outputs[0] = str_mda;
     }
 
-    void FeatherWriter::write(libmexclass::proxy::method::Context& context) {
+    void Writer::write(libmexclass::proxy::method::Context& context) {
         namespace mda = ::matlab::data;
         mda::StructArray opts = context.inputs[0];
         const mda::TypedArray<uint64_t> record_batch_proxy_id_mda = 
opts[0]["RecordBatchProxyID"];
diff --git a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h 
b/matlab/src/cpp/arrow/matlab/io/feather/proxy/writer.h
similarity index 89%
rename from matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h
rename to matlab/src/cpp/arrow/matlab/io/feather/proxy/writer.h
index dadb479887..21dc70f432 100644
--- a/matlab/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.h
+++ b/matlab/src/cpp/arrow/matlab/io/feather/proxy/writer.h
@@ -23,11 +23,11 @@
 
 namespace arrow::matlab::io::feather::proxy {
 
-    class FeatherWriter : public libmexclass::proxy::Proxy {
+    class Writer : public libmexclass::proxy::Proxy {
         public:
-            FeatherWriter(const std::string& filename);
+            Writer(const std::string& filename);
         
-            ~FeatherWriter() {}
+            ~Writer() {}
 
             static libmexclass::proxy::MakeResult make(const 
libmexclass::proxy::FunctionArguments& constructor_arguments);  
         
diff --git a/matlab/src/cpp/arrow/matlab/proxy/factory.cc 
b/matlab/src/cpp/arrow/matlab/proxy/factory.cc
index 7a2a4f3192..bce875bb9f 100644
--- a/matlab/src/cpp/arrow/matlab/proxy/factory.cc
+++ b/matlab/src/cpp/arrow/matlab/proxy/factory.cc
@@ -25,7 +25,8 @@
 #include "arrow/matlab/type/proxy/string_type.h"
 #include "arrow/matlab/type/proxy/timestamp_type.h"
 #include "arrow/matlab/type/proxy/field.h"
-#include "arrow/matlab/io/feather/proxy/feather_writer.h"
+#include "arrow/matlab/io/feather/proxy/writer.h"
+#include "arrow/matlab/io/feather/proxy/reader.h"
 
 #include "factory.h"
 
@@ -61,7 +62,8 @@ libmexclass::proxy::MakeResult Factory::make_proxy(const 
ClassName& class_name,
     REGISTER_PROXY(arrow.type.proxy.BooleanType    , 
arrow::matlab::type::proxy::PrimitiveCType<bool>);
     REGISTER_PROXY(arrow.type.proxy.StringType     , 
arrow::matlab::type::proxy::StringType);
     REGISTER_PROXY(arrow.type.proxy.TimestampType  , 
arrow::matlab::type::proxy::TimestampType);
-    REGISTER_PROXY(arrow.io.feather.proxy.FeatherWriter  , 
arrow::matlab::io::feather::proxy::FeatherWriter);
+    REGISTER_PROXY(arrow.io.feather.proxy.Writer   , 
arrow::matlab::io::feather::proxy::Writer);
+    REGISTER_PROXY(arrow.io.feather.proxy.Reader   , 
arrow::matlab::io::feather::proxy::Reader);
 
     return libmexclass::error::Error{error::UNKNOWN_PROXY_ERROR_ID, "Did not 
find matching C++ proxy for " + class_name};
 };
diff --git a/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m 
b/matlab/src/matlab/+arrow/+internal/+io/+feather/Reader.m
similarity index 63%
copy from matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m
copy to matlab/src/matlab/+arrow/+internal/+io/+feather/Reader.m
index 470c41fd5b..80da7294d2 100644
--- a/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m
+++ b/matlab/src/matlab/+arrow/+internal/+io/+feather/Reader.m
@@ -1,5 +1,3 @@
-%WRITER Class for writing feather V1 files.
-
 % Licensed to the Apache Software Foundation (ASF) under one or more
 % contributor license agreements.  See the NOTICE file distributed with
 % this work for additional information regarding copyright ownership.
@@ -14,35 +12,41 @@
 % WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 % implied.  See the License for the specific language governing
 % permissions and limitations under the License.
-classdef Writer < matlab.mixin.Scalar
 
-    properties(Hidden, SetAccess=private, GetAccess=public)
+classdef Reader
+%READER An internal Reader object for reading Feather files.
+
+    properties (GetAccess=public, SetAccess=private, Hidden)
         Proxy
     end
 
-    properties(Dependent)
+    properties (Dependent, SetAccess=private, GetAccess=public)
+        % Name of the file to read.
         Filename
     end
 
     methods
-        function obj = Writer(filename)
+
+        function obj = Reader(filename)
             arguments
                 filename(1, 1) {mustBeNonmissing, mustBeNonzeroLengthText}
             end
 
             args = struct(Filename=filename);
-            proxyName = "arrow.io.feather.proxy.FeatherWriter";
-            obj.Proxy = arrow.internal.proxy.create(proxyName, args);
+            obj.Proxy = 
arrow.internal.proxy.create("arrow.io.feather.proxy.Reader", args);
         end
 
-        function write(obj, T)
-            rb = arrow.recordbatch(T);
-            args = struct(RecordBatchProxyID=rb.Proxy.ID);
-            obj.Proxy.write(args);
+        function T = read(obj)
+            recordBatchProxyID = obj.Proxy.read();
+            proxy = 
libmexclass.proxy.Proxy(Name="arrow.tabular.proxy.RecordBatch", 
ID=recordBatchProxyID);
+            recordBatch = arrow.tabular.RecordBatch(proxy);
+            T = recordBatch.toMATLAB();
         end
 
         function filename = get.Filename(obj)
             filename = obj.Proxy.getFilename();
         end
+
     end
-end
\ No newline at end of file
+
+end
diff --git a/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m 
b/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m
index 470c41fd5b..37c785f10a 100644
--- a/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m
+++ b/matlab/src/matlab/+arrow/+internal/+io/+feather/Writer.m
@@ -31,7 +31,7 @@ classdef Writer < matlab.mixin.Scalar
             end
 
             args = struct(Filename=filename);
-            proxyName = "arrow.io.feather.proxy.FeatherWriter";
+            proxyName = "arrow.io.feather.proxy.Writer";
             obj.Proxy = arrow.internal.proxy.create(proxyName, args);
         end
 
@@ -45,4 +45,4 @@ classdef Writer < matlab.mixin.Scalar
             filename = obj.Proxy.getFilename();
         end
     end
-end
\ No newline at end of file
+end
diff --git a/matlab/test/arrow/io/feather/tRoundTrip.m 
b/matlab/test/arrow/io/feather/tRoundTrip.m
index d56152be6d..e735d196c1 100644
--- a/matlab/test/arrow/io/feather/tRoundTrip.m
+++ b/matlab/test/arrow/io/feather/tRoundTrip.m
@@ -49,4 +49,9 @@ end
 function featherwrite(T, filename)
     writer = arrow.internal.io.feather.Writer(filename);
     writer.write(T);
+end
+
+function T = featherread(filename)
+    reader = arrow.internal.io.feather.Reader(filename);
+    T = reader.read();
 end
\ No newline at end of file
diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake 
b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake
index 1d57999417..c19740f181 100644
--- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake
+++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake
@@ -56,8 +56,8 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES 
"${CMAKE_SOURCE_DIR}/src/cpp/a
                                                   
"${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/timestamp_type.cc"
                                                   
"${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/field.cc"
                                                   
"${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/wrap.cc"
-                                                  
"${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/feather/proxy/feather_writer.cc")
-
+                                                  
"${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/feather/proxy/writer.cc"
+                                                  
"${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/feather/proxy/reader.cc")
 
 
 set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_FACTORY_INCLUDE_DIR 
"${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/proxy")

Reply via email to