iajoiner commented on a change in pull request #9702:
URL: https://github.com/apache/arrow/pull/9702#discussion_r787405490



##########
File path: python/pyarrow/_orc.pyx
##########
@@ -36,7 +36,233 @@ from pyarrow.lib cimport (check_status, _Weakrefable,
                           pyarrow_unwrap_table,
                           get_reader,
                           get_writer)
-from pyarrow.lib import tobytes
+from pyarrow.lib import frombytes, tobytes
+
+
+cdef compression_type_from_enum(CCompressionType compression_type_):
+    return {
+        CCompressionType_UNCOMPRESSED: 'UNCOMPRESSED',
+        CCompressionType_GZIP: 'ZLIB',
+        CCompressionType_SNAPPY: 'SNAPPY',
+        CCompressionType_LZ4: 'LZ4',
+        CCompressionType_ZSTD: 'ZSTD',
+    }.get(compression_type_, 'UNKNOWN')
+
+
+cdef CCompressionType compression_type_from_name(name) except *:
+    if not isinstance(name, str):
+        raise TypeError('compression must be a string')
+    name = name.upper()
+    if name == 'ZLIB':
+        return CCompressionType_GZIP
+    elif name == 'SNAPPY':
+        return CCompressionType_SNAPPY
+    elif name == 'LZ4':
+        return CCompressionType_LZ4
+    elif name == 'ZSTD':
+        return CCompressionType_ZSTD
+    elif name == 'UNCOMPRESSED':
+        return CCompressionType_UNCOMPRESSED
+    raise ValueError('Unknown CompressionKind: {0}'.format(name))
+
+
+cdef compression_strategy_from_enum(CompressionStrategy compression_strategy_):
+    return {
+        _CompressionStrategy_SPEED: 'SPEED',
+        _CompressionStrategy_COMPRESSION: 'COMPRESSION',
+    }.get(compression_strategy_, 'UNKNOWN')
+
+
+cdef CompressionStrategy compression_strategy_from_name(name) except *:
+    if not isinstance(name, str):
+        raise TypeError('compression strategy must be a string')
+    name = name.upper()
+    # SPEED is the default value in the ORC C++ implementaton
+    if name == 'COMPRESSION':
+        return _CompressionStrategy_COMPRESSION
+    elif name == 'SPEED':
+        return _CompressionStrategy_SPEED
+    raise ValueError('Unknown CompressionStrategy: {0}'.format(name))
+
+
+cdef rle_version_from_enum(RleVersion rle_version_):
+    return {
+        _RleVersion_1: '1',
+        _RleVersion_2: '2',
+    }.get(rle_version_, 'UNKNOWN')
+
+
+cdef bloom_filter_version_from_enum(BloomFilterVersion bloom_filter_version_):
+    return {
+        _BloomFilterVersion_ORIGINAL: 'ORIGINAL',
+        _BloomFilterVersion_UTF8: 'UTF8',
+        _BloomFilterVersion_FUTURE: 'FUTURE',
+    }.get(bloom_filter_version_, 'UNKNOWN')
+
+
+cdef file_version_from_class(FileVersion file_version_):
+    cdef object file_version = file_version_.ToString()
+    return frombytes(file_version)
+
+
+cdef writer_id_from_enum(WriterId writer_id_):
+    return {
+        _WriterId_ORC_JAVA_WRITER: 'ORC_JAVA',
+        _WriterId_ORC_CPP_WRITER: 'ORC_CPP',
+        _WriterId_PRESTO_WRITER: 'PRESTO',
+        _WriterId_SCRITCHLEY_GO: 'SCRITCHLEY_GO',
+        _WriterId_TRINO_WRITER: 'TRINO',
+    }.get(writer_id_, 'UNKNOWN')
+
+
+cdef writer_version_from_enum(WriterVersion writer_version_):
+    return {
+        _WriterVersion_ORIGINAL: 'ORIGINAL',
+        _WriterVersion_HIVE_8732: 'HIVE_8732',
+        _WriterVersion_HIVE_4243: 'HIVE_4243',
+        _WriterVersion_HIVE_12055: 'HIVE_12055',
+        _WriterVersion_HIVE_13083: 'HIVE_13083',
+        _WriterVersion_ORC_101: 'ORC_101',
+        _WriterVersion_ORC_135: 'ORC_135',
+        _WriterVersion_ORC_517: 'ORC_517',
+        _WriterVersion_ORC_203: 'ORC_203',
+        _WriterVersion_ORC_14: 'ORC_14',
+    }.get(writer_version_, 'UNKNOWN')
+
+
+cdef shared_ptr[WriteOptions] _create_write_options(
+    file_version=None,
+    batch_size=None,
+    stripe_size=None,
+    compression=None,
+    compression_block_size=None,
+    compression_strategy=None,
+    row_index_stride=None,
+    padding_tolerance=None,
+    dictionary_key_size_threshold=None,
+    bloom_filter_columns=None,
+    bloom_filter_fpp=None
+) except *:
+    """General writer options"""
+    cdef:
+        shared_ptr[WriteOptions] options
+
+    options = make_shared[WriteOptions]()
+
+    # batch_size
+
+    if batch_size is not None:
+        if isinstance(batch_size, int) and batch_size > 0:
+            deref(options).batch_size = batch_size
+        else:
+            raise ValueError("Invalid ORC writer batch size: {0}"
+                             .format(batch_size))
+
+    # file_version
+
+    if file_version is not None:
+        if str(file_version) == "0.12":

Review comment:
       Technically some people may put a float there especially as a 
consequence of forgetting the quotes?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to