[
https://issues.apache.org/jira/browse/ARROW-1757?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16344026#comment-16344026
]
Panchen Xue commented on ARROW-1757:
------------------------------------
Does this code look ok?
{code}
Status DictionaryArray::FromArrays(const std::shared_ptr<DataType>& type,
const std::shared_ptr<Array>& indices) {
dict_type_ = static_cast<const DictionaryType*>(type.get());
DCHECK_EQ(type->id(), Type::DICTIONARY);
DCHECK_EQ(indices->type_id(), dict_type_->index_type()->id());
int64_t range = dict_type_->dictionary()->length();
switch (indices->type_id()) {
case Type::INT8:
DCHECK_EQ(SanityCheck<Int8Type>(indices, range), true);
break;
case Type::INT16:
DCHECK_EQ(SanityCheck<Int16Type>(indices, range), true);
break;
case Type::INT32:
DCHECK_EQ(SanityCheck<Int32Type>(indices, range), true);
break;
case Type::INT64:
DCHECK_EQ(SanityCheck<Int64Type>(indices, range), true);
break;
default:
std::stringstream ss;
ss << "Categorical index type not supported: "
<< indices->type()->ToString();
return Status::NotImplemented(ss.str());
}
auto data = indices->data()->Copy();
data->type = type;
SetData(data);
return Status::OK();
}
template <typename ArrowType>
bool SanityCheck(const std::shared_ptr<Array>& indices,
const int64_t range) {
using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
std::shared_ptr<ArrayType> array =
std::static_pointer_cast<ArrayType>(indices);
const auto data = array->raw_values();
const auto size = sizeof(data) / sizeof(data[0]);
// using decltype because C++11 doesn't allow auto in lambda
return std::all_of(data, data + size,
[&range](decltype(data[0]) value){return value >= 0 && value < range;});
}
{code}
> [C++] Add DictionaryArray::FromArrays alternate ctor that can check or
> sanitized "untrusted" indices
> ----------------------------------------------------------------------------------------------------
>
> Key: ARROW-1757
> URL: https://issues.apache.org/jira/browse/ARROW-1757
> Project: Apache Arrow
> Issue Type: New Feature
> Components: C++
> Reporter: Wes McKinney
> Priority: Major
> Fix For: 0.9.0
>
>
> Related to ARROW-1658. This is related to the offset sanitization in
> {{ListArray::FromArrays}}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)