sgilmore10 commented on code in PR #37475:
URL: https://github.com/apache/arrow/pull/37475#discussion_r1310727987
##########
matlab/src/matlab/+arrow/+tabular/RecordBatch.m:
##########
@@ -54,13 +54,17 @@
function arrowArray = column(obj, idx)
import arrow.internal.validate.*
- idx = index.numeric(idx, "int32");
- % TODO: Consider vectorizing column() in the future to support
- % extracting multiple columns at once.
- validateattributes(idx, "int32", "scalar");
-
- args = struct(Index=idx);
- [proxyID, typeID] = obj.Proxy.getColumnByIndex(args);
+ idx = index.numericOrString(idx, "int32");
+
+ if isnumeric(idx)
+ validateattributes(idx, "int32", "scalar");
+ args = struct(Index=idx);
+ [proxyID, typeID] = obj.Proxy.getColumnByIndex(args);
+ else
+ validateattributes(idx, "string", "scalar");
Review Comment:
Same comment as above. You could use an `arguments` block.
##########
matlab/test/arrow/tabular/tRecordBatch.m:
##########
@@ -223,6 +223,169 @@ function SchemaNoSetter(tc)
"MATLAB:class:SetProhibited");
end
+ function GetColumnByName(testCase)
+ % Verify that columns can be accessed using a field name.
+ recordBatch = arrow.tabular.RecordBatch.fromArrays(...
+ arrow.array([1, 2, 3]), ...
+ arrow.array(["A", "B", "C"]), ...
+ arrow.array([true, false, true]), ...
+ ColumnNames=["A", "B", "C"] ...
+ );
+
+ expected = arrow.array([1, 2, 3]);
+ actual = recordBatch.column("A");
+ testCase.verifyEqual(actual, expected);
+
+ expected = arrow.array(["A", "B", "C"]);
+ actual = recordBatch.column("B");
+ testCase.verifyEqual(actual, expected);
+
+ expected = arrow.array([true, false, true]);
+ actual = recordBatch.column("C");
+ testCase.verifyEqual(actual, expected);
+ end
+
+ function GetColumnByNameWithEmptyString(testCase)
+ % Verify that a column whose Field name is the empty string ("")
+ % can be accessed using the column() method.
+ recordBatch = arrow.tabular.RecordBatch.fromArrays(...
+ arrow.array([1, 2, 3]), ...
+ arrow.array(["A", "B", "C"]), ...
+ arrow.array([true, false, true]), ...
+ ColumnNames=["A", "", "C"] ...
+ );
+
+ expected = arrow.array(["A", "B", "C"]);
+ actual = recordBatch.column("");
+ testCase.verifyEqual(actual, expected)
+ end
+
+ function GetColumnByNameWithWhitespace(testCase)
+ % Verify that a column whose Field name contains only whitespace
+ % characters can be accessed using the column() method.
+ recordBatch = arrow.tabular.RecordBatch.fromArrays(...
+ arrow.array([1, 2, 3]), ...
+ arrow.array(["A", "B", "C"]), ...
+ arrow.array([true, false, true]), ...
+ ColumnNames=[" ", " ", " "] ...
+ );
+
+ expected = arrow.array([1, 2, 3]);
+ actual = recordBatch.column(" ");
+ testCase.verifyEqual(actual, expected);
+
+ expected = arrow.array(["A", "B", "C"]);
+ actual = recordBatch.column(" ");
+ testCase.verifyEqual(actual, expected);
+
+ expected = arrow.array([true, false, true]);
+ actual = recordBatch.column(" ");
+ testCase.verifyEqual(actual, expected);
+ end
+
+ function ErrorIfColumnNameDoesNotExist(testCase)
+ % Verify that an error is thrown when trying to access a column
+ % with a Field name that is not part of the Schema of the
RecordBatch.
+ recordBatch = arrow.tabular.RecordBatch.fromArrays(...
+ arrow.array([1, 2, 3]), ...
+ arrow.array(["A", "B", "C"]), ...
+ arrow.array([true, false, true]), ...
+ ColumnNames=["A", "B", "C"] ...
+ );
+
+ % Matching should be case sensitive.
+ name = "a";
+ testCase.verifyError(@() recordBatch.column(name),
"arrow:tabular:schema:AmbiguousFieldName");
+
+ name = "aA";
+ testCase.verifyError(@() recordBatch.column(name),
"arrow:tabular:schema:AmbiguousFieldName");
+
+ name = "D";
+ testCase.verifyError(@() recordBatch.column(name),
"arrow:tabular:schema:AmbiguousFieldName");
+
+ name = "";
+ testCase.verifyError(@() recordBatch.column(name),
"arrow:tabular:schema:AmbiguousFieldName");
+
+ name = " ";
+ testCase.verifyError(@() recordBatch.column(name),
"arrow:tabular:schema:AmbiguousFieldName");
+ end
+
+ function ErrorIfAmbiguousColumnName(testCase)
+ % Verify that an error is thrown when trying to access a column
+ % with a name that is ambiguous / occurs more than once in the
+ % Schema of the RecordBatch.
+ recordBatch = arrow.tabular.RecordBatch.fromArrays(...
+ arrow.array([1, 2, 3]), ...
+ arrow.array(["A", "B", "C"]), ...
+ arrow.array([true, false, true]), ...
+ arrow.array([days(1), days(2), days(3)]), ...
+ ColumnNames=["A", "A", "B", "B"] ...
+ );
+
+ name = "A";
+ testCase.verifyError(@() recordBatch.column(name),
"arrow:tabular:schema:AmbiguousFieldName");
+
+ name = "B";
+ testCase.verifyError(@() recordBatch.column(name),
"arrow:tabular:schema:AmbiguousFieldName");
+ end
+
+ function GetColumnByNameWithChar(testCase)
+ % Verify that the column method works when supplied a char
+ % vector as input.
+ recordBatch = arrow.tabular.RecordBatch.fromArrays(...
+ arrow.array([1, 2, 3]), ...
+ arrow.array(["A", "B", "C"]), ...
+ arrow.array([true, false, true]), ...
+ ColumnNames=["", "B", "123"] ...
+ );
+
+ % Should match the first column whose name is the
+ % empty string ("").
+ name = char.empty(0, 0);
+ expected = arrow.array([1, 2, 3]);
+ actual = recordBatch.column(name);
+ testCase.verifyEqual(actual, expected);
+
+ name = char.empty(0, 1);
+ expected = arrow.array([1, 2, 3]);
+ actual = recordBatch.column(name);
+ testCase.verifyEqual(actual, expected);
+
+ name = char.empty(1, 0);
+ expected = arrow.array([1, 2, 3]);
+ actual = recordBatch.column(name);
+ testCase.verifyEqual(actual, expected);
+
+ % Should match the second column whose name is "B".
+ name = 'B';
+ expected = arrow.array(["A", "B", "C"]);
+ actual = recordBatch.column(name);
+ testCase.verifyEqual(actual, expected);
+
+ % Should match the third field whose name is "123".
Review Comment:
field -> column
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]