This is an automated email from the ASF dual-hosted git repository.
kevingurney pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new f1d2fc92f9 GH-37628: [MATLAB] Implement `isequal` for the
`arrow.tabular.Table` MATLAB class (#37629)
f1d2fc92f9 is described below
commit f1d2fc92f9d898fc067d46a0d032d9b117a2d7fc
Author: sgilmore10 <[email protected]>
AuthorDate: Fri Sep 8 11:37:21 2023 -0400
GH-37628: [MATLAB] Implement `isequal` for the `arrow.tabular.Table` MATLAB
class (#37629)
### Rationale for this change
Following on to #37474, #37446, #37525, and #37627, we should implement
`isequal` for the arrow.tabular.Table` MATLAB class.
### What changes are included in this PR?
1. Add new function `arrow.internal.tabular.isequal` that both
`arrow.tabular.RecordBatch` and `arrow.tabular.Table` can use to implement
their `isequal` methods.
2. Modified `arrow.tabular.RecordBatch` to use the new `isequal` package
function to implement its `isequal` method.
3. Implemented the `isequal` method for `arrow.tabular.Table` using the new
`isequal` package function.
### Are these changes tested?
Yes, added `isequal` unit tests to `tTable.m`
### Are there any user-facing changes?
Yes. Users can now compare `arrow.tabular.Table`s using `isequal`:
```matlab
>> t1 = table(1, "A", false, VariableNames=["Number", "String",
"Logical"]);
>> t2 = table([1; 2], ["A"; "B"], [false; false], VariableNames=["Number",
"String", "Logical"]);
>> tbl1 = arrow.table(t1);
>> tbl2 = arrow.table(t2);
>> tbl3 = arrow.table(t1);
>> isequal(tbl1, tbl2)
ans =
logical
0
>> isequal(tbl1, tbl3)
ans =
logical
1
```
* Closes: #37628
Authored-by: Sarah Gilmore <[email protected]>
Signed-off-by: Kevin Gurney <[email protected]>
---
.../src/matlab/+arrow/+tabular/+internal/isequal.m | 60 +++++++++++++++++
matlab/src/matlab/+arrow/+tabular/RecordBatch.m | 38 +----------
matlab/src/matlab/+arrow/+tabular/Table.m | 4 ++
matlab/test/arrow/tabular/tTable.m | 75 +++++++++++++++++++++-
4 files changed, 138 insertions(+), 39 deletions(-)
diff --git a/matlab/src/matlab/+arrow/+tabular/+internal/isequal.m
b/matlab/src/matlab/+arrow/+tabular/+internal/isequal.m
new file mode 100644
index 0000000000..9457620e13
--- /dev/null
+++ b/matlab/src/matlab/+arrow/+tabular/+internal/isequal.m
@@ -0,0 +1,60 @@
+%ISEQUAL Utility function used by both arrow.tabular.RecordBatch and
+%arrow.tabular.Table to implement the isequal method.
+
+% Licensed to the Apache Software Foundation (ASF) under one or more
+% contributor license agreements. See the NOTICE file distributed with
+% this work for additional information regarding copyright ownership.
+% The ASF licenses this file to you under the Apache License, Version
+% 2.0 (the "License"); you may not use this file except in compliance
+% with the License. You may obtain a copy of the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+% implied. See the License for the specific language governing
+% permissions and limitations under the License.
+
+function tf = isequal(tabularObj, varargin)
+ narginchk(2, inf);
+ tf = false;
+
+ classType = string(class(tabularObj));
+
+ schemasToCompare = cell([1 numel(varargin)]);
+ for ii = 1:numel(varargin)
+ element = varargin{ii};
+ if ~isa(element, classType)
+ % If element is not an instance of classType, then it cannot
+ % be equal to tabularObj. Return false early.
+ return;
+ end
+ schemasToCompare{ii} = element.Schema;
+ end
+
+ if ~isequal(tabularObj.Schema, schemasToCompare{:})
+ % If the schemas are not equal, then the record batches (or tables)
+ % are not equal. Return false early.
+ return;
+ end
+
+ % Function that extracts the column stored at colIndex from the
+ % record batch (or table) stored at tabularIndex in varargin.
+ getColumnFcn = @(tabularIndex, colIndex)
varargin{tabularIndex}.column(colIndex);
+
+ tabularObjIndices = 1:numel(varargin);
+ for ii = 1:tabularObj.NumColumns
+ colIndices = repmat(ii, [1 numel(tabularObjIndices)]);
+ % Gather all columns at index ii across the record batches (or
+ % tables) stored in varargin. Compare these columns with the
+ % corresponding column in obj. If they are not equal, then the
+ % record batches (or tables) are not equal. Return false.
+ columnsToCompare = arrayfun(getColumnFcn, tabularObjIndices,
colIndices, UniformOutput=false);
+ if ~isequal(tabularObj.column(ii), columnsToCompare{:})
+ return;
+ end
+ end
+ tf = true;
+end
+
diff --git a/matlab/src/matlab/+arrow/+tabular/RecordBatch.m
b/matlab/src/matlab/+arrow/+tabular/RecordBatch.m
index 32269e9114..725039e87e 100644
--- a/matlab/src/matlab/+arrow/+tabular/RecordBatch.m
+++ b/matlab/src/matlab/+arrow/+tabular/RecordBatch.m
@@ -95,43 +95,7 @@ classdef RecordBatch < matlab.mixin.CustomDisplay & ...
end
function tf = isequal(obj, varargin)
- narginchk(2, inf);
- tf = false;
-
- schemasToCompare = cell([1 numel(varargin)]);
- for ii = 1:numel(varargin)
- rb = varargin{ii};
- if ~isa(rb, "arrow.tabular.RecordBatch")
- % If rb is not a RecordBatch, then it cannot be equal
- % to obj. Return false early.
- return;
- end
- schemasToCompare{ii} = rb.Schema;
- end
-
- if ~isequal(obj.Schema, schemasToCompare{:})
- % If the schemas are not equal, the record batches are not
- % equal. Return false early.
- return;
- end
-
- % Function that extracts the column stored at colIndex from the
- % record batch stored at rbIndex in varargin.
- getColumnFcn = @(rbIndex, colIndex)
varargin{rbIndex}.column(colIndex);
-
- rbIndices = 1:numel(varargin);
- for ii = 1:obj.NumColumns
- colIndices = repmat(ii, [1 numel(rbIndices)]);
- % Gather all columns at index ii across the record
- % batches stored in varargin. Compare these columns with
- % the corresponding column in obj. If they are not equal,
- % then the record batches are not equal. Return false.
- columnsToCompare = arrayfun(getColumnFcn, rbIndices,
colIndices, UniformOutput=false);
- if ~isequal(obj.column(ii), columnsToCompare{:})
- return;
- end
- end
- tf = true;
+ tf = arrow.tabular.internal.isequal(obj, varargin{:});
end
end
diff --git a/matlab/src/matlab/+arrow/+tabular/Table.m
b/matlab/src/matlab/+arrow/+tabular/Table.m
index d9eb4d8409..c2f7345040 100644
--- a/matlab/src/matlab/+arrow/+tabular/Table.m
+++ b/matlab/src/matlab/+arrow/+tabular/Table.m
@@ -97,6 +97,10 @@ classdef Table < matlab.mixin.CustomDisplay &
matlab.mixin.Scalar
T = obj.table();
end
+ function tf = isequal(obj, varargin)
+ tf = arrow.tabular.internal.isequal(obj, varargin{:});
+ end
+
end
methods (Access = private)
diff --git a/matlab/test/arrow/tabular/tTable.m
b/matlab/test/arrow/tabular/tTable.m
index 8c6b9aae73..7bb366a604 100644
--- a/matlab/test/arrow/tabular/tTable.m
+++ b/matlab/test/arrow/tabular/tTable.m
@@ -64,8 +64,8 @@ classdef tTable < matlab.unittest.TestCase
% Verify that the toMATLAB method converts
% an arrow.tabular.Table to a MATLAB table as expected.
TOriginal = table([1, 2, 3]');
- arrowRecordBatch = arrow.recordBatch(TOriginal);
- TConverted = table(arrowRecordBatch);
+ arrowTable = arrow.table(TOriginal);
+ TConverted = table(arrowTable);
testCase.verifyEqual(TOriginal, TConverted);
end
@@ -593,6 +593,77 @@ classdef tTable < matlab.unittest.TestCase
"MATLAB:class:SetProhibited");
end
+ function TestIsEqualTrue(testCase)
+ % Verify two tables are considered equal if:
+ % 1. They have the same schema
+ % 2. Their corresponding columns are equal
+ import arrow.tabular.Table
+
+ a1 = arrow.array([1 2 3]);
+ a2 = arrow.array(["A" "B" "C"]);
+ a3 = arrow.array([true true false]);
+
+ t1 = Table.fromArrays(a1, a2, a3, ...
+ ColumnNames=["A", "B", "C"]);
+ t2 = Table.fromArrays(a1, a2, a3, ...
+ ColumnNames=["A", "B", "C"]);
+ testCase.verifyTrue(isequal(t1, t2));
+
+ % Compare zero-column tables
+ t3 = Table.fromArrays();
+ t4 = Table.fromArrays();
+ testCase.verifyTrue(isequal(t3, t4));
+
+ % Compare zero-row tables
+ a4 = arrow.array([]);
+ a5 = arrow.array(strings(0, 0));
+ t5 = Table.fromArrays(a4, a5, ColumnNames=["D" "E"]);
+ t6 = Table.fromArrays(a4, a5, ColumnNames=["D" "E"]);
+ testCase.verifyTrue(isequal(t5, t6));
+
+ % Call isequal with more than two arguments
+ testCase.verifyTrue(isequal(t3, t4, t3, t4));
+ end
+
+ function TestIsEqualFalse(testCase)
+ % Verify isequal returns false when expected.
+ import arrow.tabular.Table
+
+ a1 = arrow.array([1 2 3]);
+ a2 = arrow.array(["A" "B" "C"]);
+ a3 = arrow.array([true true false]);
+ a4 = arrow.array(["A" missing "C"]);
+ a5 = arrow.array([1 2]);
+ a6 = arrow.array(["A" "B"]);
+ a7 = arrow.array([true true]);
+
+ t1 = Table.fromArrays(a1, a2, a3, ...
+ ColumnNames=["A", "B", "C"]);
+ t2 = Table.fromArrays(a1, a2, a3, ...
+ ColumnNames=["D", "E", "F"]);
+ t3 = Table.fromArrays(a1, a4, a3, ...
+ ColumnNames=["A", "B", "C"]);
+ t4 = Table.fromArrays(a5, a6, a7, ...
+ ColumnNames=["A", "B", "C"]);
+ t5 = Table.fromArrays(a1, a2, a3, a1, ...
+ ColumnNames=["A", "B", "C", "D"]);
+
+ % The column names are not equal
+ testCase.verifyFalse(isequal(t1, t2));
+
+ % The columns are not equal
+ testCase.verifyFalse(isequal(t1, t3));
+
+ % The number of rows are not equal
+ testCase.verifyFalse(isequal(t1, t4));
+
+ % The number of columns are not equal
+ testCase.verifyFalse(isequal(t1, t5));
+
+ % Call isequal with more than two arguments
+ testCase.verifyFalse(isequal(t1, t2, t3, t4));
+ end
+
end
methods