bkietz commented on a change in pull request #10397: URL: https://github.com/apache/arrow/pull/10397#discussion_r643577382
########## File path: cpp/src/arrow/compute/exec/expression.cc ########## @@ -510,7 +475,67 @@ Result<Expression> Expression::Bind(const Schema& in_schema, return Bind(ValueDescr::Array(struct_(in_schema.fields())), exec_context); } -Result<Datum> ExecuteScalarExpression(const Expression& expr, const Datum& input, +Result<ExecBatch> MakeExecBatch(const Schema& full_schema, const Datum& partial) { + ExecBatch out; + + if (partial.kind() == Datum::RECORD_BATCH) { + const auto& partial_batch = *partial.record_batch(); + out.length = partial_batch.num_rows(); + + for (const auto& field : full_schema.fields()) { + ARROW_ASSIGN_OR_RAISE(auto column, + FieldRef(field->name()).GetOneOrNone(partial_batch)); + + if (column) { + if (!column->type()->Equals(field->type())) { + // Referenced field was present but didn't have the expected type. + // This *should* be handled by readers, and will just be an error in the future. + ARROW_ASSIGN_OR_RAISE( + auto converted, + compute::Cast(column, field->type(), compute::CastOptions::Safe())); + column = converted.make_array(); + } + out.values.emplace_back(std::move(column)); + } else { + out.values.emplace_back(MakeNullScalar(field->type())); + } + } + return out; + } + + // wasteful but useful for testing: + if (partial.type()->id() == Type::STRUCT) { + if (partial.is_array()) { + ARROW_ASSIGN_OR_RAISE(auto partial_batch, + RecordBatch::FromStructArray(partial.make_array())); + + return MakeExecBatch(full_schema, partial_batch); + } + + if (partial.is_scalar()) { + ARROW_ASSIGN_OR_RAISE(auto partial_array, + MakeArrayFromScalar(*partial.scalar(), 1)); + ARROW_ASSIGN_OR_RAISE(auto out, MakeExecBatch(full_schema, partial_array)); + + for (Datum& value : out.values) { + if (value.is_scalar()) continue; + ARROW_ASSIGN_OR_RAISE(value, value.make_array()->GetScalar(0)); + } Review comment: This was as compact as I could write this case; if you see a way to compress/simplify it then I'll take it but the scalar/array cases are really just for testing purposes -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org