[GitHub] [arrow] emkornfield commented on a change in pull request #9504: ARROW-2229: [C++][Python] Add WriteCsv functionality.

GitBox Tue, 23 Feb 2021 21:07:57 -0800


emkornfield commented on a change in pull request #9504:
URL: https://github.com/apache/arrow/pull/9504#discussion_r581622101




##########
File path: python/pyarrow/_csv.pyx
##########
@@ -763,3 +765,86 @@ def open_csv(input_file, read_options=None, 
parse_options=None,
                  move(c_convert_options),
                  maybe_unbox_memory_pool(memory_pool))
     return reader
+
+
+cdef class WriteOptions(_Weakrefable):
+    """
+    Options for writing CSV files.
+
+    Parameters
+    ----------
+    include_header : bool, optional (default True)
+        Whether to include the header
+    batch_size : int, optional (default 1024)
+        How many rows to process together when converting and writing
+        CSV
+    """
+    cdef:
+        CCSVWriteOptions options
+
+    # Avoid mistakingly creating attributes
+    __slots__ = ()
+
+    def __init__(self, *, include_header=None, batch_size=None):
+        self.options = CCSVWriteOptions.Defaults()
+        if include_header is not None:
+            self.options.include_header = include_header
+        if batch_size is not None:
+            self.options.batch_size = 1024
+
+
+cdef _get_write_options(WriteOptions write_options, CCSVWriteOptions* out):
+    if write_options is None:
+        out[0] = CCSVWriteOptions.Defaults()
+    else:
+        out[0] = write_options.options
+
+
+def write_csv(data, output_file, write_options=None,
+              MemoryPool memory_pool=None):
+    """
+

Review comment:
       added

##########
File path: python/pyarrow/_csv.pyx
##########
@@ -763,3 +765,86 @@ def open_csv(input_file, read_options=None, 
parse_options=None,
                  move(c_convert_options),
                  maybe_unbox_memory_pool(memory_pool))
     return reader
+
+
+cdef class WriteOptions(_Weakrefable):
+    """
+    Options for writing CSV files.
+
+    Parameters
+    ----------
+    include_header : bool, optional (default True)
+        Whether to include the header
+    batch_size : int, optional (default 1024)
+        How many rows to process together when converting and writing
+        CSV
+    """
+    cdef:
+        CCSVWriteOptions options
+
+    # Avoid mistakingly creating attributes
+    __slots__ = ()
+
+    def __init__(self, *, include_header=None, batch_size=None):
+        self.options = CCSVWriteOptions.Defaults()
+        if include_header is not None:
+            self.options.include_header = include_header
+        if batch_size is not None:
+            self.options.batch_size = 1024
+
+
+cdef _get_write_options(WriteOptions write_options, CCSVWriteOptions* out):
+    if write_options is None:
+        out[0] = CCSVWriteOptions.Defaults()
+    else:
+        out[0] = write_options.options
+
+
+def write_csv(data, output_file, write_options=None,
+              MemoryPool memory_pool=None):
+    """
+
+    Parameters
+    ----------
+    data: The data to write.
+        Either a pyarrow.RecordBatch or a pyarrow.Table
+    output_file: string, path, pyarrow.OutputStream or file-like object
+        The location of CSV data.
+    write_options: pyarrow.csv.WriteOptions
+        Options to configure writing the CSV file.
+    memory_pool: MemoryPool, optional
+        Pool for temporary allocations.
+
+    Returns
+    -------
+    None
+    """
+    cdef:
+        shared_ptr[COutputStream] stream
+        CCSVWriteOptions c_write_options
+        CMemoryPool* c_memory_pool
+        CRecordBatch* batch
+        CTable* table
+    _get_write_options(write_options, &c_write_options)
+
+    try:
+        where = _stringify_path(output_file)
+    except TypeError:
+        get_writer(output_file, &stream)
+    else:
+        c_where = tobytes(where)
+        stream = GetResultValue(FileOutputStream.Open(c_where))
+
+    c_memory_pool = maybe_unbox_memory_pool(memory_pool)
+    if isinstance(data, RecordBatch):
+        batch = (<RecordBatch>data).batch

Review comment:
       thanks.  changed.

##########
File path: python/pyarrow/_csv.pyx
##########
@@ -763,3 +765,86 @@ def open_csv(input_file, read_options=None, 
parse_options=None,
                  move(c_convert_options),
                  maybe_unbox_memory_pool(memory_pool))
     return reader
+
+
+cdef class WriteOptions(_Weakrefable):
+    """
+    Options for writing CSV files.
+
+    Parameters
+    ----------
+    include_header : bool, optional (default True)
+        Whether to include the header
+    batch_size : int, optional (default 1024)
+        How many rows to process together when converting and writing
+        CSV
+    """
+    cdef:
+        CCSVWriteOptions options
+
+    # Avoid mistakingly creating attributes
+    __slots__ = ()
+
+    def __init__(self, *, include_header=None, batch_size=None):
+        self.options = CCSVWriteOptions.Defaults()
+        if include_header is not None:
+            self.options.include_header = include_header
+        if batch_size is not None:
+            self.options.batch_size = 1024
+
+
+cdef _get_write_options(WriteOptions write_options, CCSVWriteOptions* out):
+    if write_options is None:
+        out[0] = CCSVWriteOptions.Defaults()
+    else:
+        out[0] = write_options.options
+
+
+def write_csv(data, output_file, write_options=None,
+              MemoryPool memory_pool=None):
+    """
+
+    Parameters
+    ----------
+    data: The data to write.
+        Either a pyarrow.RecordBatch or a pyarrow.Table
+    output_file: string, path, pyarrow.OutputStream or file-like object
+        The location of CSV data.
+    write_options: pyarrow.csv.WriteOptions
+        Options to configure writing the CSV file.
+    memory_pool: MemoryPool, optional
+        Pool for temporary allocations.
+
+    Returns
+    -------
+    None

Review comment:
       removed.

##########
File path: python/pyarrow/_csv.pyx
##########
@@ -763,3 +765,86 @@ def open_csv(input_file, read_options=None, 
parse_options=None,
                  move(c_convert_options),
                  maybe_unbox_memory_pool(memory_pool))
     return reader
+
+
+cdef class WriteOptions(_Weakrefable):
+    """
+    Options for writing CSV files.
+
+    Parameters
+    ----------
+    include_header : bool, optional (default True)
+        Whether to include the header
+    batch_size : int, optional (default 1024)
+        How many rows to process together when converting and writing
+        CSV
+    """
+    cdef:
+        CCSVWriteOptions options
+
+    # Avoid mistakingly creating attributes
+    __slots__ = ()
+
+    def __init__(self, *, include_header=None, batch_size=None):
+        self.options = CCSVWriteOptions.Defaults()
+        if include_header is not None:
+            self.options.include_header = include_header
+        if batch_size is not None:
+            self.options.batch_size = 1024
+
+
+cdef _get_write_options(WriteOptions write_options, CCSVWriteOptions* out):
+    if write_options is None:
+        out[0] = CCSVWriteOptions.Defaults()
+    else:
+        out[0] = write_options.options
+
+
+def write_csv(data, output_file, write_options=None,
+              MemoryPool memory_pool=None):
+    """
+
+    Parameters
+    ----------
+    data: The data to write.
+        Either a pyarrow.RecordBatch or a pyarrow.Table
+    output_file: string, path, pyarrow.OutputStream or file-like object
+        The location of CSV data.
+    write_options: pyarrow.csv.WriteOptions
+        Options to configure writing the CSV file.
+    memory_pool: MemoryPool, optional
+        Pool for temporary allocations.
+
+    Returns
+    -------
+    None
+    """
+    cdef:
+        shared_ptr[COutputStream] stream
+        CCSVWriteOptions c_write_options
+        CMemoryPool* c_memory_pool
+        CRecordBatch* batch
+        CTable* table
+    _get_write_options(write_options, &c_write_options)
+
+    try:
+        where = _stringify_path(output_file)
+    except TypeError:
+        get_writer(output_file, &stream)
+    else:
+        c_where = tobytes(where)
+        stream = GetResultValue(FileOutputStream.Open(c_where))

Review comment:
       I think I copied this from someplace.

##########
File path: python/pyarrow/_csv.pyx
##########
@@ -763,3 +765,86 @@ def open_csv(input_file, read_options=None, 
parse_options=None,
                  move(c_convert_options),
                  maybe_unbox_memory_pool(memory_pool))
     return reader
+
+
+cdef class WriteOptions(_Weakrefable):
+    """
+    Options for writing CSV files.
+
+    Parameters
+    ----------
+    include_header : bool, optional (default True)
+        Whether to include the header
+    batch_size : int, optional (default 1024)
+        How many rows to process together when converting and writing
+        CSV
+    """
+    cdef:
+        CCSVWriteOptions options
+
+    # Avoid mistakingly creating attributes
+    __slots__ = ()
+
+    def __init__(self, *, include_header=None, batch_size=None):
+        self.options = CCSVWriteOptions.Defaults()
+        if include_header is not None:
+            self.options.include_header = include_header
+        if batch_size is not None:
+            self.options.batch_size = 1024
+
+
+cdef _get_write_options(WriteOptions write_options, CCSVWriteOptions* out):
+    if write_options is None:
+        out[0] = CCSVWriteOptions.Defaults()
+    else:
+        out[0] = write_options.options
+
+
+def write_csv(data, output_file, write_options=None,
+              MemoryPool memory_pool=None):
+    """
+
+    Parameters
+    ----------
+    data: The data to write.
+        Either a pyarrow.RecordBatch or a pyarrow.Table
+    output_file: string, path, pyarrow.OutputStream or file-like object
+        The location of CSV data.
+    write_options: pyarrow.csv.WriteOptions
+        Options to configure writing the CSV file.
+    memory_pool: MemoryPool, optional
+        Pool for temporary allocations.
+
+    Returns
+    -------
+    None
+    """
+    cdef:
+        shared_ptr[COutputStream] stream
+        CCSVWriteOptions c_write_options
+        CMemoryPool* c_memory_pool
+        CRecordBatch* batch
+        CTable* table
+    _get_write_options(write_options, &c_write_options)
+
+    try:
+        where = _stringify_path(output_file)
+    except TypeError:
+        get_writer(output_file, &stream)
+    else:
+        c_where = tobytes(where)
+        stream = GetResultValue(FileOutputStream.Open(c_where))
+
+    c_memory_pool = maybe_unbox_memory_pool(memory_pool)
+    if isinstance(data, RecordBatch):
+        batch = (<RecordBatch>data).batch
+        with nogil:
+            check_status(WriteCSV(deref(batch), c_write_options, c_memory_pool,
+                                  stream.get()))
+    elif isinstance(data, Table):
+        table = (<Table>data).table
+        with nogil:
+            check_status(WriteCSV(deref(table), c_write_options, c_memory_pool,
+                                  stream.get()))
+    else:
+        raise ValueError(type(data))

Review comment:
       done.

##########
File path: python/pyarrow/_csv.pyx
##########
@@ -763,3 +765,86 @@ def open_csv(input_file, read_options=None, 
parse_options=None,
                  move(c_convert_options),
                  maybe_unbox_memory_pool(memory_pool))
     return reader
+
+
+cdef class WriteOptions(_Weakrefable):
+    """
+    Options for writing CSV files.
+
+    Parameters
+    ----------
+    include_header : bool, optional (default True)
+        Whether to include the header
+    batch_size : int, optional (default 1024)
+        How many rows to process together when converting and writing
+        CSV
+    """
+    cdef:
+        CCSVWriteOptions options
+
+    # Avoid mistakingly creating attributes
+    __slots__ = ()
+
+    def __init__(self, *, include_header=None, batch_size=None):
+        self.options = CCSVWriteOptions.Defaults()
+        if include_header is not None:
+            self.options.include_header = include_header
+        if batch_size is not None:
+            self.options.batch_size = 1024
+
+
+cdef _get_write_options(WriteOptions write_options, CCSVWriteOptions* out):
+    if write_options is None:
+        out[0] = CCSVWriteOptions.Defaults()
+    else:
+        out[0] = write_options.options
+
+
+def write_csv(data, output_file, write_options=None,
+              MemoryPool memory_pool=None):
+    """
+
+    Parameters
+    ----------
+    data: The data to write.
+        Either a pyarrow.RecordBatch or a pyarrow.Table
+    output_file: string, path, pyarrow.OutputStream or file-like object
+        The location of CSV data.
+    write_options: pyarrow.csv.WriteOptions
+        Options to configure writing the CSV file.
+    memory_pool: MemoryPool, optional
+        Pool for temporary allocations.
+
+    Returns
+    -------
+    None
+    """
+    cdef:
+        shared_ptr[COutputStream] stream
+        CCSVWriteOptions c_write_options
+        CMemoryPool* c_memory_pool
+        CRecordBatch* batch
+        CTable* table
+    _get_write_options(write_options, &c_write_options)
+
+    try:
+        where = _stringify_path(output_file)
+    except TypeError:
+        get_writer(output_file, &stream)
+    else:
+        c_where = tobytes(where)
+        stream = GetResultValue(FileOutputStream.Open(c_where))
+
+    c_memory_pool = maybe_unbox_memory_pool(memory_pool)
+    if isinstance(data, RecordBatch):
+        batch = (<RecordBatch>data).batch
+        with nogil:
+            check_status(WriteCSV(deref(batch), c_write_options, c_memory_pool,
+                                  stream.get()))
+    elif isinstance(data, Table):
+        table = (<Table>data).table

Review comment:
       thanks. changed.

##########
File path: python/pyarrow/includes/libarrow.pxd
##########
@@ -1634,6 +1641,15 @@ cdef extern from "arrow/csv/api.h" namespace 
"arrow::csv" nogil:
             CCSVReadOptions, CCSVParseOptions, CCSVConvertOptions)
 
 
+# Writer is included explicity to avoid having to set additional
+# C-Processor definitions in setup.py for cmake.

Review comment:
       removed.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [arrow] emkornfield commented on a change in pull request #9504: ARROW-2229: [C++][Python] Add WriteCsv functionality.

Reply via email to