This is an automated email from the ASF dual-hosted git repository. sgilmore pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push: new 2bfbfc82ca GH-38532: [MATLAB] Add a `validate` method to all `arrow.array.Array` classes (#47059) 2bfbfc82ca is described below commit 2bfbfc82cab3998c3b751e515664893b9648d8e5 Author: Sarah Gilmore <74676073+sgilmor...@users.noreply.github.com> AuthorDate: Mon Jul 14 11:04:47 2025 -0400 GH-38532: [MATLAB] Add a `validate` method to all `arrow.array.Array` classes (#47059) ### Rationale for this change As a follow up to #38531 (see https://github.com/apache/arrow/pull/38531#discussion_r1377981403), we should consider adding a `validate` method to all `arrow.array.Array` classes, which would allow users to explicitly validate the contents of an `arrow.array.Array` after it is created. ### What changes are included in this PR? Added `validate()` as a method to `arrow.array.Array`. This method has one name-value pair which is called `ValidationMode`. `ValidationMode` can either be specified as `"minimal"` or `"full"`. By default, `ValidationMode="minimal"`. **Example Usage:** ```matlab >> offsets = arrow.array(int32([0 1 0])); >> values = arrow.array(1:3); >> array = arrow.array.ListArray.fromArrays(offsets, values); >> array.validate(ValidationMode="full") >> array.validate(ValidationMode="full") Error using . (line 63) Offset invariant failure: non-monotonic offset at slot 2: 0 < 1 Error in arrow.array.Array/validate (line 68) obj.Proxy.validate(struct(ValidationMode=uint8(opts.ValidationMode))); ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ``` ### Are these changes tested? Yes. Added a MATLAB test class called `tValidateArray.m`. ### Are there any user-facing changes? Yes. There is a new public method that is accessible via any subclass of `arrow.array.Array`. * GitHub Issue: #38532 Lead-authored-by: Sarah Gilmore <74676073+sgilmor...@users.noreply.github.com> Co-authored-by: Sarah Gilmore <sgilm...@mathworks.com> Co-authored-by: Kevin Gurney <kevin.p.gur...@gmail.com> Signed-off-by: Sarah Gilmore <sgilm...@mathworks.com> --- matlab/src/cpp/arrow/matlab/array/proxy/array.cc | 35 +++++++ matlab/src/cpp/arrow/matlab/array/proxy/array.h | 2 + .../src/cpp/arrow/matlab/array/proxy/list_array.cc | 35 ------- .../src/cpp/arrow/matlab/array/proxy/list_array.h | 1 - matlab/src/matlab/+arrow/+array/Array.m | 14 +++ matlab/test/arrow/array/tValidateArray.m | 101 +++++++++++++++++++++ 6 files changed, 152 insertions(+), 36 deletions(-) diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/array.cc index 2fa82d27e3..753b15fe72 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/array.cc +++ b/matlab/src/cpp/arrow/matlab/array/proxy/array.cc @@ -19,6 +19,7 @@ #include "arrow/util/utf8.h" #include "arrow/matlab/array/proxy/array.h" +#include "arrow/matlab/array/validation_mode.h" #include "arrow/matlab/bit/unpack.h" #include "arrow/matlab/error/error.h" #include "arrow/matlab/index/validate.h" @@ -41,6 +42,7 @@ Array::Array(std::shared_ptr<arrow::Array> array) : array{std::move(array)} { REGISTER_METHOD(Array, isEqual); REGISTER_METHOD(Array, slice); REGISTER_METHOD(Array, exportToC); + REGISTER_METHOD(Array, validate); } std::shared_ptr<arrow::Array> Array::unwrap() { return array; } @@ -175,4 +177,37 @@ void Array::exportToC(libmexclass::proxy::method::Context& context) { error::C_EXPORT_FAILED); } +void Array::validate(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + mda::StructArray args = context.inputs[0]; + const mda::TypedArray<std::uint8_t> validation_mode_mda = args[0]["ValidationMode"]; + const auto validation_mode_integer = uint8_t(validation_mode_mda[0]); + // Convert integer representation to ValidationMode enum. + const auto validation_mode = static_cast<ValidationMode>(validation_mode_integer); + switch (validation_mode) { + case ValidationMode::None: { + // Do nothing. + break; + } + case ValidationMode::Minimal: { + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(array->Validate(), context, + error::ARRAY_VALIDATE_MINIMAL_FAILED); + break; + } + case ValidationMode::Full: { + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(array->ValidateFull(), context, + error::ARRAY_VALIDATE_FULL_FAILED); + break; + } + default: { + // Throw an error if an unsupported enumeration value is provided. + const auto msg = "Unsupported ValidationMode enumeration value: " + + std::to_string(validation_mode_integer); + context.error = + libmexclass::error::Error{error::ARRAY_VALIDATE_UNSUPPORTED_ENUM, msg}; + return; + } + } +} + } // namespace arrow::matlab::array::proxy diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/array.h b/matlab/src/cpp/arrow/matlab/array/proxy/array.h index c249693ac2..189fd2fea0 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/array.h +++ b/matlab/src/cpp/arrow/matlab/array/proxy/array.h @@ -47,6 +47,8 @@ class Array : public libmexclass::proxy::Proxy { void exportToC(libmexclass::proxy::method::Context& context); + void validate(libmexclass::proxy::method::Context& context); + std::shared_ptr<arrow::Array> array; }; diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/list_array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/list_array.cc index 36b9a6a332..4d569e5eb3 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/list_array.cc +++ b/matlab/src/cpp/arrow/matlab/array/proxy/list_array.cc @@ -17,7 +17,6 @@ #include "arrow/matlab/array/proxy/list_array.h" #include "arrow/matlab/array/proxy/numeric_array.h" -#include "arrow/matlab/array/validation_mode.h" #include "arrow/matlab/error/error.h" #include "arrow/matlab/proxy/wrap.h" @@ -29,7 +28,6 @@ ListArray::ListArray(std::shared_ptr<arrow::ListArray> list_array) : proxy::Array{std::move(list_array)} { REGISTER_METHOD(ListArray, getValues); REGISTER_METHOD(ListArray, getOffsets); - REGISTER_METHOD(ListArray, validate); } libmexclass::proxy::MakeResult ListArray::make( @@ -98,37 +96,4 @@ void ListArray::getOffsets(libmexclass::proxy::method::Context& context) { context.outputs[0] = factory.createScalar(offsets_int32_array_proxy_id); } -void ListArray::validate(libmexclass::proxy::method::Context& context) { - namespace mda = ::matlab::data; - mda::StructArray args = context.inputs[0]; - const mda::TypedArray<std::uint8_t> validation_mode_mda = args[0]["ValidationMode"]; - const auto validation_mode_integer = uint8_t(validation_mode_mda[0]); - // Convert integer representation to ValidationMode enum. - const auto validation_mode = static_cast<ValidationMode>(validation_mode_integer); - switch (validation_mode) { - case ValidationMode::None: { - // Do nothing. - break; - } - case ValidationMode::Minimal: { - MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(array->Validate(), context, - error::ARRAY_VALIDATE_MINIMAL_FAILED); - break; - } - case ValidationMode::Full: { - MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(array->ValidateFull(), context, - error::ARRAY_VALIDATE_FULL_FAILED); - break; - } - default: { - // Throw an error if an unsupported enumeration value is provided. - const auto msg = "Unsupported ValidationMode enumeration value: " + - std::to_string(validation_mode_integer); - context.error = - libmexclass::error::Error{error::ARRAY_VALIDATE_UNSUPPORTED_ENUM, msg}; - return; - } - } -} - } // namespace arrow::matlab::array::proxy diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/list_array.h b/matlab/src/cpp/arrow/matlab/array/proxy/list_array.h index 09f5fdb0cf..c2b4ce7cd7 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/list_array.h +++ b/matlab/src/cpp/arrow/matlab/array/proxy/list_array.h @@ -32,7 +32,6 @@ class ListArray : public arrow::matlab::array::proxy::Array { protected: void getValues(libmexclass::proxy::method::Context& context); void getOffsets(libmexclass::proxy::method::Context& context); - void validate(libmexclass::proxy::method::Context& context); }; } // namespace arrow::matlab::array::proxy diff --git a/matlab/src/matlab/+arrow/+array/Array.m b/matlab/src/matlab/+arrow/+array/Array.m index 01bacdf575..ffd9ec8bd9 100644 --- a/matlab/src/matlab/+arrow/+array/Array.m +++ b/matlab/src/matlab/+arrow/+array/Array.m @@ -53,6 +53,20 @@ classdef (Abstract) Array < matlab.mixin.CustomDisplay & ... proxy = libmexclass.proxy.Proxy(Name=traits.TypeProxyClassName, ID=typeStruct.ProxyID); type = traits.TypeConstructor(proxy); end + + function validate(obj, opts) + arguments + obj + opts.Mode(1, 1) arrow.array.ValidationMode = arrow.array.ValidationMode.Minimal + end + + if opts.Mode == arrow.array.ValidationMode.None + id = "arrow:array:InvalidValidationMode"; + msg = "Invalid Mode. Mode must be ""Minimal"" or ""Full""."; + error(id, msg); + end + obj.Proxy.validate(struct(ValidationMode=uint8(opts.Mode))); + end end methods (Access = private) diff --git a/matlab/test/arrow/array/tValidateArray.m b/matlab/test/arrow/array/tValidateArray.m new file mode 100644 index 0000000000..80148dbb21 --- /dev/null +++ b/matlab/test/arrow/array/tValidateArray.m @@ -0,0 +1,101 @@ +%TVALIDATEARRAY Test class for the arrow.array.Array/validate() method. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tValidateArray < matlab.unittest.TestCase + + methods (Test) + + function InvalidModeInput(test) + % Verify arrow.array.Array/validate() throws an exception if + % provided an invalid value for the Mode name-value + % pair. + array = arrow.array.Float64Array.fromMATLAB(1:5); + + % Cannot convert "abc" to a ValidationMode value + fcn = @() array.validate(Mode="abc"); + test.verifyError(fcn, "MATLAB:validation:UnableToConvert"); + + % Mode must be scalar + modes = [arrow.array.ValidationMode.Full arrow.array.ValidationMode.Minimal]; + fcn = @() array.validate(Mode=modes); + test.verifyError(fcn, "MATLAB:validation:IncompatibleSize"); + + % ValidationMode.None is not supported + mode = arrow.array.ValidationMode.None; + fcn = @() array.validate(Mode=mode); + test.verifyError(fcn, "arrow:array:InvalidValidationMode"); + end + + function ValidationModeMinimalFails(test) + % Verify arrow.array.Array/validate() throws an exception + % with the ID arrow:array:ValidateMinimalFailed if + % Mode="Minimal" and the array fails the "Minimal" + % validation checks. + offsets = arrow.array(int32([0 1 3 4 5])); + values = arrow.array([1 2 3]); + array = arrow.array.ListArray.fromArrays(offsets, values, ValidationMode="None"); + fcn = @() array.validate(Mode="Minimal"); + test.verifyError(fcn, "arrow:array:ValidateMinimalFailed") + end + + function ValidationModeMinimalPasses(test) + % Verify arrow.array.Array/validate() does not throw an + % exception if Mode="Minimal" and the array passes the + % "Minimal" validation checks. + offsets = arrow.array(int32([0 1 0])); + values = arrow.array([1 2 3]); + % NOTE: the array is actually invalid, but it passes the + % "Minimal" validation checks. + array = arrow.array.ListArray.fromArrays(offsets, values); + fcn = @() array.validate(Mode="Minimal"); + test.verifyWarningFree(fcn, "arrow:array:ValidateMinimalFailed") + end + + function ValidationModeFullFails(test) + % Verify arrow.array.Array/validate() throws an exception + % with the ID arrow:array:ValidateFullFailed if + % Mode="Full" and the array fails the "Full" + % validation checks. + offsets = arrow.array(int32([0 1 0])); + values = arrow.array([1 2 3]); + array = arrow.array.ListArray.fromArrays(offsets, values); + fcn = @() array.validate(Mode="Full"); + test.verifyError(fcn, "arrow:array:ValidateFullFailed") + end + + function ValidationModeFullPasses(test) + % Verify arrow.array.Array/validate() does not throw an + % exception if Mode="Full" and the array passes + % the "full" validation checks. + offsets = arrow.array(int32([0 1 3])); + values = arrow.array([1 2 3]); + array = arrow.array.ListArray.fromArrays(offsets, values); + fcn = @() array.validate(Mode="Full"); + test.verifyWarningFree(fcn); + end + + function DefaultValidationModeIsMimimal(test) + % Verify the default Mode value is "Minimal". + offsets = arrow.array(int32([0 1 2 3])); + values = arrow.array([1 2 3]); + array = arrow.array.ListArray.fromArrays(offsets, values); + fcn = @() array.validate(); + test.verifyWarningFree(fcn, "arrow:array:ValidateMinimalFailed") + end + end + +end \ No newline at end of file