lidavidm commented on code in PR #12672:
URL: https://github.com/apache/arrow/pull/12672#discussion_r860841793


##########
python/pyarrow/_substrait.pyx:
##########
@@ -0,0 +1,76 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: language_level = 3
+
+from pyarrow import Buffer
+from pyarrow.lib cimport *
+from pyarrow.includes.libarrow cimport *
+
+
+def run_query(plan):
+    """
+    Executes a substrait plan and returns a RecordBatchReader.

Review Comment:
   ```suggestion
       Execute a Substrait plan and read the results as a RecordBatchReader.
   ```



##########
python/pyarrow/_substrait.pyx:
##########
@@ -0,0 +1,76 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: language_level = 3
+
+from pyarrow import Buffer
+from pyarrow.lib cimport *
+from pyarrow.includes.libarrow cimport *
+
+
+def run_query(plan):
+    """
+    Executes a substrait plan and returns a RecordBatchReader.
+
+    Parameters
+    ----------
+    plan : Buffer
+        Substrait plan can be fed as a serialized plan (Buffer). 

Review Comment:
   ```suggestion
           The serialized Substrait plan to execute.
   ```



##########
cpp/src/arrow/engine/substrait/serde_test.cc:
##########
@@ -750,5 +755,64 @@ TEST(Substrait, ExtensionSetFromPlanMissingFunc) {
           &ext_set));
 }
 
+Result<std::string> GetSubstraitJSON() {
+  ARROW_ASSIGN_OR_RAISE(std::string dir_string,
+                        arrow::internal::GetEnvVar("PARQUET_TEST_DATA"));
+  auto file_name =
+      
arrow::internal::PlatformFilename::FromString(dir_string)->Join("binary.parquet");
+  auto file_path = file_name->ToString();
+  std::string substrait_json = R"({
+    "relations": [
+      {"rel": {
+        "read": {
+          "base_schema": {
+            "struct": {
+              "types": [ 
+                         {"binary": {}}
+                       ]
+            },
+            "names": [
+                      "foo"
+                      ]
+          },
+          "local_files": {
+            "items": [
+              {
+                "uri_file": "FILENAME_PLACEHOLDER",
+                "format": "FILE_FORMAT_PARQUET"
+              }
+            ]
+          }
+        }
+      }}
+    ]
+  })";
+  std::string filename_placeholder = "FILENAME_PLACEHOLDER";
+  substrait_json.replace(substrait_json.find(filename_placeholder),
+                         filename_placeholder.size(), file_path);
+  return substrait_json;
+}
+
+TEST(Substrait, GetRecordBatchReader) {
+#ifdef _WIN32
+  GTEST_SKIP() << "Substrait File URI not supported for Windows";
+#else

Review Comment:
   This should work. Please reference the JIRA ID in the message.



##########
python/pyarrow/tests/test_substrait.py:
##########
@@ -0,0 +1,107 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import pathlib
+import pyarrow as pa
+from pyarrow.lib import tobytes
+from pyarrow.lib import ArrowInvalid
+
+try:
+    import pyarrow.parquet as pq
+except ImportError:
+    pq = None
+
+try:
+    import pyarrow.substrait as substrait
+except ImportError:
+    substrait = None
+
+import pytest
+import sys
+
+# Marks all of the tests in this module
+# Ignore these with pytest ... -m 'not engine'
+pytestmark = pytest.mark.substrait

Review Comment:
   ```suggestion
   pytestmark = [pytest.mark.parquet, pytest.mark.substrait]
   ```



##########
python/pyarrow/tests/test_substrait.py:
##########
@@ -0,0 +1,107 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import pathlib
+import pyarrow as pa
+from pyarrow.lib import tobytes
+from pyarrow.lib import ArrowInvalid
+
+try:
+    import pyarrow.parquet as pq
+except ImportError:
+    pq = None
+
+try:
+    import pyarrow.substrait as substrait
+except ImportError:
+    substrait = None
+
+import pytest
+import sys

Review Comment:
   nit, but sort imports properly (builtins, then third party libraries, then 
first party libraries)



##########
cpp/src/arrow/engine/substrait/util.cc:
##########
@@ -0,0 +1,137 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/engine/substrait/util.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/async_util.h"
+
+namespace arrow {
+
+namespace engine {
+
+/// \brief A SinkNodeConsumer specialized to output ExecBatches via 
PushGenerator
+class ARROW_ENGINE_EXPORT SubstraitSinkConsumer : public 
compute::SinkNodeConsumer {

Review Comment:
   Hmm, all of this can actually be in an anonymous namespace without 
ARROW_ENGINE_EXPORT right? Just to avoid exporting the symbols



##########
python/pyarrow/tests/test_substrait.py:
##########
@@ -0,0 +1,107 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import pathlib
+import pyarrow as pa
+from pyarrow.lib import tobytes
+from pyarrow.lib import ArrowInvalid
+
+try:
+    import pyarrow.parquet as pq
+except ImportError:
+    pq = None
+
+try:
+    import pyarrow.substrait as substrait
+except ImportError:
+    substrait = None
+
+import pytest
+import sys
+
+# Marks all of the tests in this module
+# Ignore these with pytest ... -m 'not engine'
+pytestmark = pytest.mark.substrait
+
+
+def resource_root():
+    """Get the path to the test resources directory."""
+    if not os.environ.get("PARQUET_TEST_DATA"):
+        raise RuntimeError("Test resources not found; set "
+                           "PARQUET_TEST_DATA to "
+                           "<repo root>/cpp/submodules/parquet-testing/data")
+    return pathlib.Path(os.environ["PARQUET_TEST_DATA"])
+
+
[email protected](sys.platform == 'win32',
+                    reason="file based URI is not fully supported for Windows")

Review Comment:
   Please reference the Jira ID instead.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to