AlenkaF commented on code in PR #37665:
URL: https://github.com/apache/arrow/pull/37665#discussion_r1369746231
##########
python/pyarrow/_parquet.pyx:
##########
@@ -1776,6 +2022,82 @@ cdef shared_ptr[ArrowWriterProperties]
_create_arrow_writer_properties(
return arrow_properties
+cdef _name_to_index_map(Schema arrow_schema):
+ cdef:
+ shared_ptr[CSchema] sp_arrow_schema
+ shared_ptr[SchemaDescriptor] sp_parquet_schema
+ shared_ptr[WriterProperties] props = _create_writer_properties()
+ shared_ptr[ArrowWriterProperties] arrow_props =
_create_arrow_writer_properties(
+ use_deprecated_int96_timestamps=False,
+ coerce_timestamps=None,
+ allow_truncated_timestamps=False,
+ writer_engine_version="V2"
+ )
+
+ sp_arrow_schema = pyarrow_unwrap_schema(arrow_schema)
+
+ with nogil:
+ check_status(ToParquetSchema(
+ sp_arrow_schema.get(), deref(props.get()),
deref(arrow_props.get()), &sp_parquet_schema))
+
+ out = dict()
+
+ cdef SchemaDescriptor* parquet_schema = sp_parquet_schema.get()
+
+ for i in range(parquet_schema.num_columns()):
+ name = frombytes(parquet_schema.Column(i).path().get().ToDotString())
+ out[name] = i
+
+ return out
+
+
+def _sort_keys_to_sorting_columns(sort_keys, null_placement, Schema schema):
Review Comment:
It seems this is not used anywhere - do you know, is it meant to be as a
possible helper function? Looks like a duplicate of `from_sort_order` to me.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]