This is an automated email from the ASF dual-hosted git repository.

kevingurney pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 91a492757a GH-37340: [MATLAB] The `column(index)` method of 
`arrow.tabular.RecordBatch` errors if `index` refers to an 
`arrow.array.Time32Array` column (#37347)
91a492757a is described below

commit 91a492757a2ebbe338258ecc1bcbd854e1fc7370
Author: sgilmore10 <[email protected]>
AuthorDate: Wed Aug 23 17:14:51 2023 -0400

    GH-37340: [MATLAB] The `column(index)` method of 
`arrow.tabular.RecordBatch` errors if `index` refers to an 
`arrow.array.Time32Array` column (#37347)
    
    
    
    ### Rationale for this change
    
    The `column(index)` method of `arrow.tabular.RecordBatch` errors  if 
`index` refers to an `arrow.array.Time32Array` column.
    
    ```matlab
    >> string = arrow.array(["A", "B", "C"]);
    >> time32 = arrow.array.Time32Array.fromMATLAB(seconds(1:3));
    >> rb = arrow.tabular.RecordBatch.fromArrays(string, time32, 
ColumnNames=["String", "Time32"])
    
    rb =
    
    String:   [
        "A",
        "B",
        "C"
      ]
    Time32:   [
        00:00:01,
        00:00:02,
        00:00:03
      ]
    
    >> time32Column = rb.column(2)
    
    Error using  .
    Unsupported DataType: time32[s]
    
    Error in arrow.tabular.RecordBatch/column (line 63)
                [proxyID, typeID] = obj.Proxy.getColumnByIndex(args);
    ```
    
    `column(index)` is throwing this error because the case for `Time32` was 
not added to `arrow::matlab::array::proxy::wrap(arrowArray)` in #37315. 
`column(index)` calls this function to create proxy objects around existing 
arrow arrays. Adding the case for `Time32` will resolve this bug.
    
    ### What changes are included in this PR?
    
    1. Updated `arrow::Result<proxy::Array> wrap(const 
std::shared_ptr<arrow::Array>& array)` to handle wrapping `arrow::Time32Array` 
instances within `proxy::Time32Array`s.
    2. Added a new test utility called 
`arrow.internal.test.tabular.createAllSupportedArrayTypes`, which returns two 
cell arrays: `arrowArrays` and `matlabData`. The `arrowArrays` cell array 
contains one instance of each concrete subclass of `arrow.array.Array`. The 
`matlabData` cell array contains the MATLAB arrays used to generate each array 
in `arrowArrays`. This utility can be used to create an 
`arrow.array.RecordBatch` that contains all the arrow array types that are 
supported by the [...]
    
    ### Are these changes tested?
    
    Yes. Updated the `fromArrays` test cases in `tRecordBatch` to verify the 
`column(index)` method of `arrow.type.RecordBatch` supports extracting columns 
of any arrow type (supported by the MATLAB Interface).
    
    ### Are there any user-facing changes?
    
    Yes. Fixed a bug in `arrow.tabular.RecordBatch`.
    
    ### Future Directions
    
    1. #37345
    
    * Closes: #37340
    
    Authored-by: Sarah Gilmore <[email protected]>
    Signed-off-by: Kevin Gurney <[email protected]>
---
 matlab/src/cpp/arrow/matlab/array/proxy/wrap.cc    |   2 +
 .../+test/+tabular/createAllSupportedArrayTypes.m  | 104 +++++++++++++++++++++
 matlab/test/arrow/tabular/tRecordBatch.m           |  39 ++++----
 3 files changed, 125 insertions(+), 20 deletions(-)

diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/wrap.cc 
b/matlab/src/cpp/arrow/matlab/array/proxy/wrap.cc
index dab0935959..e11e9bb766 100644
--- a/matlab/src/cpp/arrow/matlab/array/proxy/wrap.cc
+++ b/matlab/src/cpp/arrow/matlab/array/proxy/wrap.cc
@@ -51,6 +51,8 @@ namespace arrow::matlab::array::proxy {
                 return 
std::make_shared<proxy::NumericArray<arrow::DoubleType>>(std::static_pointer_cast<arrow::DoubleArray>(array));
             case ID::TIMESTAMP:
                 return 
std::make_shared<proxy::NumericArray<arrow::TimestampType>>(std::static_pointer_cast<arrow::TimestampArray>(array));
+            case ID::TIME32:
+                return 
std::make_shared<proxy::NumericArray<arrow::Time32Type>>(std::static_pointer_cast<arrow::Time32Array>(array));
             case ID::STRING:
                 return 
std::make_shared<proxy::StringArray>(std::static_pointer_cast<arrow::StringArray>(array));
             default:
diff --git 
a/matlab/src/matlab/+arrow/+internal/+test/+tabular/createAllSupportedArrayTypes.m
 
b/matlab/src/matlab/+arrow/+internal/+test/+tabular/createAllSupportedArrayTypes.m
new file mode 100644
index 0000000000..7d3f36cb46
--- /dev/null
+++ 
b/matlab/src/matlab/+arrow/+internal/+test/+tabular/createAllSupportedArrayTypes.m
@@ -0,0 +1,104 @@
+%CREATEALLSUPPORTEDARRAYTYPES Creates a MATLAB cell array containing all 
+%the concrete subclasses of arrow.array.Array. Returns a cell array
+%containing the MATLAB data from which the arrow arrays were generated
+%as second output argument.
+
+% Licensed to the Apache Software Foundation (ASF) under one or more
+% contributor license agreements.  See the NOTICE file distributed with
+% this work for additional information regarding copyright ownership.
+% The ASF licenses this file to you under the Apache License, Version
+% 2.0 (the "License"); you may not use this file except in compliance
+% with the License.  You may obtain a copy of the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+% implied.  See the License for the specific language governing
+% permissions and limitations under the License.
+
+function [arrowArrays, matlabData] = createAllSupportedArrayTypes(opts)
+    arguments
+        opts.NumRows(1, 1) {mustBeFinite, mustBeNonnegative} = 3;  
+    end
+
+    import arrow.type.ID
+    import arrow.array.*
+
+    classes = getArrayClassNames();
+    numClasses = numel(classes);
+    arrowArrays = cell(numClasses, 1);
+    matlabData  = cell(numClasses, 1);
+    
+    numericArrayToMatlabTypeDict = getArrowArrayToMatlabTypeDictionary();
+
+    for ii = 1:numel(classes)
+        name = classes(ii);
+        if name == "arrow.array.BooleanArray"
+            matlabData{ii} = randomLogicals(opts.NumRows);
+            arrowArrays{ii} = BooleanArray.fromMATLAB(matlabData{ii});
+        elseif isKey(numericArrayToMatlabTypeDict, name)
+            matlabType = numericArrayToMatlabTypeDict(name);
+            matlabData{ii} = randomNumbers(matlabType, opts.NumRows);
+            cmd = compose("%s.fromMATLAB(matlabData{ii})", name);
+            arrowArrays{ii} = eval(cmd);
+        elseif name == "arrow.array.StringArray"
+            matlabData{ii} = randomStrings(opts.NumRows);
+            arrowArrays{ii} = StringArray.fromMATLAB(matlabData{ii});
+        elseif name == "arrow.array.TimestampArray"
+            matlabData{ii} = randomDatetimes(opts.NumRows);
+            arrowArrays{ii} = TimestampArray.fromMATLAB(matlabData{ii});
+        elseif name == "arrow.array.Time32Array"
+            matlabData{ii} = randomDurations(opts.NumRows);
+            arrowArrays{ii} = Time32Array.fromMATLAB(matlabData{ii});
+        else
+            error("arrow:test:SupportedArrayCase", ...
+                "Missing if-branch for array class " + name); 
+        end
+    end
+end
+
+function classes = getArrayClassNames()
+    metaClass = meta.package.fromName("arrow.array").ClassList;
+
+    % Removes all Abstract classes from the list of all subclasses
+    abstract = [metaClass.Abstract];
+    metaClass(abstract) = [];
+    classes = string({metaClass.Name});
+end
+
+function dict = getArrowArrayToMatlabTypeDictionary()
+    pkg = "arrow.array";
+    unsignedTypes = compose("UInt%d", power(2, 3:6));
+    signedTypes = compose("Int%d", power(2, 3:6));
+    floatTypes = compose("Float%d", power(2, 5:6));
+    numericTypes = [unsignedTypes, signedTypes, floatTypes];
+    keys = compose("%s.%sArray", pkg, numericTypes);
+    
+    values = [lower([unsignedTypes, signedTypes]) "single" "double"];
+    dict = dictionary(keys, values);
+end
+
+function number = randomNumbers(numberType, numElements)
+    number = cast(randi(255, [numElements 1]), numberType);
+end
+
+function text = randomStrings(numElements)
+    text = string(randi(255, [numElements 1]));
+end
+
+function tf = randomLogicals(numElements)
+    number = randi(2, [numElements 1]) - 1;
+    tf = logical(number);
+end
+
+function times = randomDurations(numElements)
+    number = randi(255, [numElements 1]);
+    times = seconds(number);
+end
+
+function dates = randomDatetimes(numElements)
+    day = days(randi(255, [numElements 1]));
+    dates = datetime(2023, 8, 23) + day;
+end
\ No newline at end of file
diff --git a/matlab/test/arrow/tabular/tRecordBatch.m 
b/matlab/test/arrow/tabular/tRecordBatch.m
index 027195d3d8..d9c3c98652 100644
--- a/matlab/test/arrow/tabular/tRecordBatch.m
+++ b/matlab/test/arrow/tabular/tRecordBatch.m
@@ -28,11 +28,17 @@ classdef tRecordBatch < matlab.unittest.TestCase
         function SupportedTypes(tc)
             % Create a table all supported MATLAB types.
             import arrow.internal.test.tabular.createTableWithSupportedTypes
+            import arrow.type.traits.traits
 
             TOriginal = createTableWithSupportedTypes();
             arrowRecordBatch = arrow.recordBatch(TOriginal);
             expectedColumnNames = string(TOriginal.Properties.VariableNames);
-            tc.verifyRecordBatch(arrowRecordBatch, expectedColumnNames, 
TOriginal);
+
+            % For each variable in the input MATLAB table, look up the 
+            % corresponding Arrow Array type using type traits.
+            expectedArrayClasses = varfun(@(var) 
traits(string(class(var))).ArrayClassName, ...
+                TOriginal, OutputFormat="uniform");
+            tc.verifyRecordBatch(arrowRecordBatch, expectedColumnNames, 
expectedArrayClasses, TOriginal);
         end
 
         function ToMATLAB(tc)
@@ -125,19 +131,16 @@ classdef tRecordBatch < matlab.unittest.TestCase
         % RecordBatch when given a comma-separated list of
         % arrow.array.Array values.
             import arrow.tabular.RecordBatch
-            import arrow.internal.test.tabular.createTableWithSupportedTypes
+            import arrow.internal.test.tabular.createAllSupportedArrayTypes
 
-            TOriginal = createTableWithSupportedTypes();
-
-            arrowArrays = cell([1 width(TOriginal)]);
-            for ii = 1:width(TOriginal)
-                arrowArrays{ii} = arrow.array(TOriginal.(ii));
-            end
+            [arrowArrays, matlabData] = createAllSupportedArrayTypes();
+            TOriginal = table(matlabData{:});
 
             arrowRecordBatch = RecordBatch.fromArrays(arrowArrays{:});
             expectedColumnNames = compose("Column%d", 1:width(TOriginal));
             TOriginal.Properties.VariableNames = expectedColumnNames;
-            tc.verifyRecordBatch(arrowRecordBatch, expectedColumnNames, 
TOriginal);
+            expectedArrayClasses = cellfun(@(c) string(class(c)), arrowArrays, 
UniformOutput=true);
+            tc.verifyRecordBatch(arrowRecordBatch, expectedColumnNames, 
expectedArrayClasses, TOriginal);
         end
 
         function FromArraysWithColumnNamesProvided(tc)
@@ -145,19 +148,16 @@ classdef tRecordBatch < matlab.unittest.TestCase
         % RecordBatch when given a comma-separated list of
         % arrow.array.Array values and the ColumnNames nv-pair is provided.
             import arrow.tabular.RecordBatch
-            import arrow.internal.test.tabular.createTableWithSupportedTypes
-
-            TOriginal = createTableWithSupportedTypes();
+            import arrow.internal.test.tabular.createAllSupportedArrayTypes
 
-            arrowArrays = cell([1 width(TOriginal)]);
-            for ii = 1:width(TOriginal)
-                arrowArrays{ii} = arrow.array(TOriginal.(ii));
-            end
+            [arrowArrays, matlabData] = createAllSupportedArrayTypes();
+            TOriginal = table(matlabData{:});
 
             columnNames = compose("MyVar%d", 1:numel(arrowArrays));
             arrowRecordBatch = RecordBatch.fromArrays(arrowArrays{:}, 
ColumnNames=columnNames);
             TOriginal.Properties.VariableNames = columnNames;
-            tc.verifyRecordBatch(arrowRecordBatch, columnNames, TOriginal);
+            expectedArrayClasses = cellfun(@(c) string(class(c)), arrowArrays, 
UniformOutput=true);
+            tc.verifyRecordBatch(arrowRecordBatch, columnNames, 
expectedArrayClasses, TOriginal);
         end
 
         function FromArraysUnequalArrayLengthsError(tc)
@@ -226,7 +226,7 @@ classdef tRecordBatch < matlab.unittest.TestCase
     end
 
     methods
-        function verifyRecordBatch(tc, recordBatch, expectedColumnNames, 
expectedTable)
+        function verifyRecordBatch(tc, recordBatch, expectedColumnNames, 
expectedArrayClasses, expectedTable)
             tc.verifyEqual(recordBatch.NumColumns, 
int32(width(expectedTable)));
             tc.verifyEqual(recordBatch.ColumnNames, expectedColumnNames);
             convertedTable = recordBatch.table();
@@ -234,8 +234,7 @@ classdef tRecordBatch < matlab.unittest.TestCase
              for ii = 1:recordBatch.NumColumns
                 column = recordBatch.column(ii);
                 tc.verifyEqual(column.toMATLAB(), expectedTable{:, ii});
-                traits = 
arrow.type.traits.traits(string(class(expectedTable{:, ii})));
-                tc.verifyInstanceOf(column, traits.ArrayClassName);
+                tc.verifyInstanceOf(column, expectedArrayClasses(ii));
              end
         end
     end

Reply via email to