cpcloud commented on a change in pull request #11384:
URL: https://github.com/apache/arrow/pull/11384#discussion_r740127428
##########
File path: cpp/src/arrow/compute/exec/CMakeLists.txt
##########
@@ -31,3 +31,11 @@ add_arrow_compute_test(union_node_test PREFIX
"arrow-compute")
add_arrow_compute_test(util_test PREFIX "arrow-compute")
add_arrow_benchmark(expression_benchmark PREFIX "arrow-compute")
+
+add_arrow_compute_test(ir_test
+ PREFIX
+ "arrow-compute"
+ EXTRA_LINK_LIBS
+ ${GFLAGS_LIBRARIES}
+ TEST_ARGUMENTS
+
"-computeir_dir=${CMAKE_SOURCE_DIR}/../experimental/computeir")
Review comment:
Should `-computeir_dir` have an additional `-` on the front?
##########
File path: cpp/src/arrow/scalar.h
##########
@@ -302,7 +302,7 @@ struct TemporalScalar : internal::PrimitiveScalar<T> {
using internal::PrimitiveScalar<T>::PrimitiveScalar;
using ValueType = typename TemporalScalar<T>::ValueType;
- explicit TemporalScalar(ValueType value, std::shared_ptr<DataType> type)
+ TemporalScalar(ValueType value, std::shared_ptr<DataType> type)
Review comment:
Probably just my seriously rusty C++, but is this to allow copy-list
initialization somewhere else?
##########
File path: cpp/src/arrow/compute/exec/test_util.cc
##########
@@ -235,5 +239,181 @@ void AssertExecBatchesEqual(const
std::shared_ptr<Schema>& schema,
AssertTablesEqual(exp_tab, act_tab);
}
+template <typename T>
+static const T& OptionsAs(const ExecNodeOptions& opts) {
+ const auto& ptr = dynamic_cast<const T&>(opts);
+ return ptr;
+}
+
+template <typename T>
+static const T& OptionsAs(const Declaration& decl) {
Review comment:
Fine to leave as is if that's the convention in this file.
##########
File path: cpp/src/arrow/compute/exec/test_util.cc
##########
@@ -235,5 +239,181 @@ void AssertExecBatchesEqual(const
std::shared_ptr<Schema>& schema,
AssertTablesEqual(exp_tab, act_tab);
}
+template <typename T>
+static const T& OptionsAs(const ExecNodeOptions& opts) {
+ const auto& ptr = dynamic_cast<const T&>(opts);
+ return ptr;
+}
+
+template <typename T>
+static const T& OptionsAs(const Declaration& decl) {
+ return OptionsAs<T>(*decl.options);
+}
+
+bool operator==(const Declaration& l, const Declaration& r) {
Review comment:
How about `lhs` and `rhs` to avoid the `l` variable name? Unfortunately
not everyone uses a font that makes `l` unambiguous with the large number of
characters it resembles.
##########
File path: cpp/src/arrow/compute/exec/exec_plan.h
##########
@@ -351,13 +351,28 @@ struct ARROW_EXPORT Declaration {
options{std::make_shared<Options>(std::move(options))},
label{this->factory_name} {}
+ template <typename Options>
+ Declaration(std::string factory_name, std::vector<Input> inputs, Options
options,
+ std::string label)
+ : factory_name{std::move(factory_name)},
+ inputs{std::move(inputs)},
+ options{std::make_shared<Options>(std::move(options))},
+ label{std::move(label)} {}
+
template <typename Options>
Declaration(std::string factory_name, Options options)
: factory_name{std::move(factory_name)},
inputs{},
options{std::make_shared<Options>(std::move(options))},
label{this->factory_name} {}
+ template <typename Options>
+ Declaration(std::string factory_name, Options options, std::string label)
+ : factory_name{std::move(factory_name)},
+ inputs{},
+ options{std::make_shared<Options>(std::move(options))},
+ label{std::move(label)} {}
Review comment:
I'm guessing we can't use delegating constructors because of
`$SOME_COMPILER`?
##########
File path: cpp/src/arrow/compute/exec/ir_consumer.h
##########
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <flatbuffers/flatbuffers.h>
+
+#include "arrow/compute/exec/exec_plan.h"
+#include "arrow/compute/exec/expression.h"
+#include "arrow/compute/exec/options.h"
+#include "arrow/datum.h"
+#include "arrow/result.h"
+#include "arrow/util/visibility.h"
+
+#include "generated/Plan_generated.h"
+
+namespace arrow {
+
+namespace flatbuf = org::apache::arrow::flatbuf;
+
+namespace compute {
+
+namespace ir = org::apache::arrow::computeir::flatbuf;
+
+class ARROW_EXPORT CatalogSourceNodeOptions : public ExecNodeOptions {
Review comment:
Why make this a class if everything is going to be public? What's the
downside of exposing as little as possible?
##########
File path: cpp/cmake_modules/BuildUtils.cmake
##########
@@ -723,22 +724,27 @@ function(ADD_TEST_CASE REL_TEST_NAME)
add_dependencies(${TEST_NAME} ${ARG_EXTRA_DEPENDENCIES})
endif()
+ if(ARG_ENVIRONMENT)
+ message(STATUS "WTF ${ARG_ENVIRONMENT}")
Review comment:
Not sure `WTF` is informative here :sweat_smile:.
##########
File path: cpp/src/arrow/compute/exec/test_util.cc
##########
@@ -235,5 +239,181 @@ void AssertExecBatchesEqual(const
std::shared_ptr<Schema>& schema,
AssertTablesEqual(exp_tab, act_tab);
}
+template <typename T>
+static const T& OptionsAs(const ExecNodeOptions& opts) {
+ const auto& ptr = dynamic_cast<const T&>(opts);
Review comment:
Not sure if this is around in the codebase still, but is `checked_cast`
appropriate here?
##########
File path: cpp/src/arrow/ipc/metadata_internal.h
##########
@@ -125,38 +125,49 @@ inline std::string StringFromFlatbuffers(const
flatbuffers::String* s) {
// dictionary-encoded fields to a DictionaryMemo instance. May be
// expensive for very large schemas if you are only interested in a
// few fields
+ARROW_EXPORT
Review comment:
These are public now because they are needed in a dependent library (I'm
assuming that's whatever Arrow library contains the compute IR consumer
functionality?)
##########
File path: cpp/src/arrow/compute/exec/test_util.cc
##########
@@ -235,5 +239,181 @@ void AssertExecBatchesEqual(const
std::shared_ptr<Schema>& schema,
AssertTablesEqual(exp_tab, act_tab);
}
+template <typename T>
+static const T& OptionsAs(const ExecNodeOptions& opts) {
+ const auto& ptr = dynamic_cast<const T&>(opts);
+ return ptr;
+}
+
+template <typename T>
+static const T& OptionsAs(const Declaration& decl) {
Review comment:
Should these `static` functions be in an anonymous namespace?
##########
File path: cpp/src/arrow/compute/api_vector.h
##########
@@ -98,17 +98,17 @@ enum class NullPlacement {
/// \brief One sort key for PartitionNthIndices (TODO) and SortIndices
class ARROW_EXPORT SortKey : public util::EqualityComparable<SortKey> {
public:
- explicit SortKey(std::string name, SortOrder order = SortOrder::Ascending)
- : name(std::move(name)), order(order) {}
+ explicit SortKey(FieldRef target, SortOrder order = SortOrder::Ascending)
+ : target(std::move(target)), order(order) {}
using util::EqualityComparable<SortKey>::Equals;
using util::EqualityComparable<SortKey>::operator==;
using util::EqualityComparable<SortKey>::operator!=;
bool Equals(const SortKey& other) const;
std::string ToString() const;
- /// The name of the sort column.
- std::string name;
+ /// A FieldRef targetting the sort column.
+ FieldRef target;
Review comment:
This is technically a breaking change since `SortKey` is exported and
the fields are public. Perhaps it's time to make these fields private and give
them accessor methods, or just call this a `struct`?
##########
File path: cpp/src/arrow/compute/kernels/vector_sort.cc
##########
@@ -91,6 +104,18 @@ Result<std::vector<ResolvedSortKey>> ResolveSortKeys(
});
}
+std::shared_ptr<ChunkedArray> GetTableColumn(const Table& table, const
FieldRef& ref) {
Review comment:
Would you mind documenting the meaning of returning a `nullptr` here and
I assume the requirement that the caller is required to handle `nullptr`?
##########
File path: cpp/src/arrow/type.h
##########
@@ -188,6 +188,9 @@ class ARROW_EXPORT DataType : public
detail::Fingerprintable {
ARROW_EXPORT
std::ostream& operator<<(std::ostream& os, const DataType& type);
+inline bool operator==(const DataType& l, const DataType& r) { return
l.Equals(r); }
Review comment:
`lhs` and `rhs` here too would be nice :)
##########
File path: cpp/src/arrow/util/io_util.cc
##########
@@ -578,6 +607,19 @@ Result<std::vector<PlatformFilename>> ListDir(const
PlatformFilename& dir_path)
#endif
+Status SetWorkingDir(const PlatformFilename& dir_path) {
Review comment:
Setting the working directory is a fraught in the presence of multiple
threads.
You can almost always accomplish what you need to do after running `chdir`
in a thread-safe way without `chdir`.
So, with that in mind: why is this function necessary?
##########
File path: cpp/src/arrow/compute/kernels/vector_sort.cc
##########
@@ -51,6 +51,19 @@ struct SortField {
SortOrder order;
};
+Status CheckNonNested(const FieldRef& ref) {
+ if (ref.IsNested()) {
+ return Status::KeyError("Nested keys not supported for SortKeys");
Review comment:
To be clear this means anything that is a struct, array, map, or union
type is unsupported, correct?
##########
File path: cpp/src/arrow/util/io_util.h
##########
@@ -113,6 +114,10 @@ Result<bool> DeleteDirTree(const PlatformFilename&
dir_path, bool allow_not_foun
ARROW_EXPORT
Result<std::vector<PlatformFilename>> ListDir(const PlatformFilename&
dir_path);
+/// Set the process' current working directory.
+ARROW_EXPORT
+Status SetWorkingDir(const PlatformFilename& dir_path);
Review comment:
I really think we should avoid introducing this as a public API, even if
the publicity is limited to `arrow::internal`.
##########
File path: cpp/src/arrow/compute/api_vector.h
##########
@@ -98,17 +98,17 @@ enum class NullPlacement {
/// \brief One sort key for PartitionNthIndices (TODO) and SortIndices
class ARROW_EXPORT SortKey : public util::EqualityComparable<SortKey> {
public:
- explicit SortKey(std::string name, SortOrder order = SortOrder::Ascending)
- : name(std::move(name)), order(order) {}
+ explicit SortKey(FieldRef target, SortOrder order = SortOrder::Ascending)
+ : target(std::move(target)), order(order) {}
using util::EqualityComparable<SortKey>::Equals;
using util::EqualityComparable<SortKey>::operator==;
using util::EqualityComparable<SortKey>::operator!=;
bool Equals(const SortKey& other) const;
std::string ToString() const;
- /// The name of the sort column.
- std::string name;
+ /// A FieldRef targetting the sort column.
+ FieldRef target;
Review comment:
Thanks for the explanation, it isn't necessary.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]