[ 
https://issues.apache.org/jira/browse/ARROW-1757?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16344026#comment-16344026
 ] 

Panchen Xue commented on ARROW-1757:
------------------------------------

Does this code look ok? 
{code}
Status DictionaryArray::FromArrays(const std::shared_ptr<DataType>& type,
                                   const std::shared_ptr<Array>& indices) {
  dict_type_ = static_cast<const DictionaryType*>(type.get());
  DCHECK_EQ(type->id(), Type::DICTIONARY);
  DCHECK_EQ(indices->type_id(), dict_type_->index_type()->id());
  int64_t range = dict_type_->dictionary()->length();

  switch (indices->type_id()) {
  case Type::INT8:
    DCHECK_EQ(SanityCheck<Int8Type>(indices, range), true);
    break;
  case Type::INT16:
    DCHECK_EQ(SanityCheck<Int16Type>(indices, range), true);
    break;
  case Type::INT32:
    DCHECK_EQ(SanityCheck<Int32Type>(indices, range), true);
    break;
  case Type::INT64:
    DCHECK_EQ(SanityCheck<Int64Type>(indices, range), true);
    break;
  default:
    std::stringstream ss;
    ss << "Categorical index type not supported: "
       << indices->type()->ToString();
    return Status::NotImplemented(ss.str());
  }

  auto data = indices->data()->Copy();
  data->type = type;
  SetData(data);
  return Status::OK();
}

  template <typename ArrowType>
  bool SanityCheck(const std::shared_ptr<Array>& indices,
                   const int64_t range) {
    using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
    std::shared_ptr<ArrayType> array =
      std::static_pointer_cast<ArrayType>(indices);

    const auto data = array->raw_values();
    const auto size = sizeof(data) / sizeof(data[0]);
    
    // using decltype because C++11 doesn't allow auto in lambda
    return std::all_of(data, data + size,
      [&range](decltype(data[0]) value){return value >= 0 && value < range;});
  }
{code}

> [C++] Add DictionaryArray::FromArrays alternate ctor that can check or 
> sanitized "untrusted" indices
> ----------------------------------------------------------------------------------------------------
>
>                 Key: ARROW-1757
>                 URL: https://issues.apache.org/jira/browse/ARROW-1757
>             Project: Apache Arrow
>          Issue Type: New Feature
>          Components: C++
>            Reporter: Wes McKinney
>            Priority: Major
>             Fix For: 0.9.0
>
>
> Related to ARROW-1658. This is related to the offset sanitization in 
> {{ListArray::FromArrays}}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to