pitrou commented on code in PR #45084:
URL: https://github.com/apache/arrow/pull/45084#discussion_r1942959754
##########
python/pyarrow/_json.pyx:
##########
@@ -265,6 +267,37 @@ cdef _get_parse_options(ParseOptions parse_options,
CJSONParseOptions* out):
else:
out[0] = parse_options.options
+cdef class JSONStreamingReader(RecordBatchReader):
Review Comment:
Should probably have another line break here to satisfy PEP 8 requirements.
```suggestion
cdef class JSONStreamingReader(RecordBatchReader):
```
##########
python/pyarrow/tests/test_json.py:
##########
@@ -111,26 +112,20 @@ def test_parse_options(pickle_module):
unexpected_field_behavior="ignore")
-class BaseTestJSONRead:
-
+class BaseTestJSON(abc.ABC):
+ @abc.abstractmethod
def read_bytes(self, b, **kwargs):
- return self.read_json(pa.py_buffer(b), **kwargs)
+ """
+ :param b: bytes to be parsed
+ :param kwargs: arguments passed on to open the json file
+ :return: b parsed as a single RecordBatch
Review Comment:
```suggestion
:return: b parsed as a single Table
```
##########
python/pyarrow/tests/test_json.py:
##########
@@ -357,3 +608,17 @@ def read_json(self, *args, **kwargs):
table = read_json(*args, **kwargs)
table.validate(full=True)
return table
+
+
+class TestSerialStreamingJSONRead(BaseTestStreamingJSONRead,
unittest.TestCase):
+
+ @property
+ def use_threads(self):
+ return False
Review Comment:
Nit: I think this can simply be:
```suggestion
use_threads = False
```
##########
python/pyarrow/tests/test_json.py:
##########
@@ -357,3 +608,17 @@ def read_json(self, *args, **kwargs):
table = read_json(*args, **kwargs)
table.validate(full=True)
return table
+
+
+class TestSerialStreamingJSONRead(BaseTestStreamingJSONRead,
unittest.TestCase):
+
+ @property
+ def use_threads(self):
+ return False
+
+
[email protected]
+class TestThreadedStreamingJSONRead(BaseTestStreamingJSONRead,
unittest.TestCase):
+ @property
+ def use_threads(self):
+ return True
Review Comment:
Same here
##########
python/pyarrow/_json.pyx:
##########
@@ -308,3 +341,45 @@ def read_json(input_file, read_options=None,
parse_options=None,
table = GetResultValue(reader.get().Read())
return pyarrow_wrap_table(table)
+
+
+def open_json(input_file, read_options=None, parse_options=None,
+ MemoryPool memory_pool=None):
+ """
+ Open a streaming reader of JSON data.
+
+ Reading using this function is always single-threaded.
+
+ Parameters
+ ----------
+ input_file : string, path or file-like object
+ The location of JSON data. If a string or path, and if it ends
+ with a recognized compressed file extension (e.g. ".gz" or ".bz2"),
+ the data is automatically decompressed when reading.
+ read_options : pyarrow.json.ReadOptions, optional
+ Options for the JSON reader (see pyarrow.json.ReadOptions constructor
+ for defaults)
+ parse_options : pyarrow.json.ParseOptions, optional
+ Options for the JSON parser
+ (see pyarrow.json.ParseOptions constructor for defaults)
+ memory_pool : MemoryPool, optional
+ Pool to allocate Table memory from
Review Comment:
```suggestion
Pool to allocate RecordBatch memory from
```
##########
python/pyarrow/_json.pyx:
##########
@@ -265,6 +267,37 @@ cdef _get_parse_options(ParseOptions parse_options,
CJSONParseOptions* out):
else:
out[0] = parse_options.options
+cdef class JSONStreamingReader(RecordBatchReader):
+ """An object that reads record batches incrementally from a JSON file.
+
+ Should not be instantiated directly by user code.
+ """
+ cdef readonly:
+ Schema schema
+
+ def __init__(self):
+ raise TypeError("Do not call {}'s constructor directly, "
+ "use pyarrow.json.open_json() instead."
+ .format(self.__class__.__name__))
Review Comment:
Nit: we can use f-strings now
```suggestion
raise TypeError(f"Do not call {self.__class__.__name__}'s "
"constructor directly, "
"use pyarrow.json.open_json() instead.")
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]