Attachment inline:
import pyarrow as pa
import multiprocessing as mp
import numpy as np
def make_payload():
"""Common function - make data to send"""
return ['message', 123, np.random.uniform(-100, 100, (4, 4))]
def send_payload(payload, connection):
"""Common function - serialize & send data through a socket"""
s = pa.serialize(payload)
c = s.to_components()
# Send
data = c.pop('data')
connection.send(c)
for d in data:
connection.send_bytes(d)
connection.send_bytes(b'')
def recv_payload(connection):
"""Common function - recv data through a socket & deserialize"""
c = connection.recv()
c['data'] = []
while True:
r = connection.recv_bytes()
if len(r) == 0:
break
c['data'].append(pa.py_buffer(r))
print('...deserialize')
return pa.deserialize_components(c)
def run_same_process():
"""Same process: Send data down a socket, then read data from the
matching socket"""
print('run_same_process')
recv_conn,send_conn = mp.Pipe(duplex=False)
payload = make_payload()
print(payload)
send_payload(payload, send_conn)
payload2 = recv_payload(recv_conn)
print(payload2)
def receiver(recv_conn):
"""Separate process: runs in a different process, recv data & deserialize"""
print('Receiver started')
payload = recv_payload(recv_conn)
print(payload)
def run_separate_process():
"""Separate process: launch the child process, then send data"""
print('run_separate_process')
recv_conn,send_conn = mp.Pipe(duplex=False)
process = mp.Process(target=receiver, args=(recv_conn,))
process.start()
payload = make_payload()
print(payload)
send_payload(payload, send_conn)
process.join()
if __name__ == '__main__':
run_same_process()
run_separate_process()
On Fri, Jul 6, 2018 at 2:42 PM Josh Quigley <[email protected]>
wrote:
> A reproducible program attached - it first runs serialize/deserialize from
> the same process, then it does the same work using a separate process for
> the deserialize.
>
> The behaviour see is (after the same process code executes happily) is
> hanging / child-process crashing during the call to deserialize.
>
> Is this expected, and if not, is there a known workaround?
>
> Running Windows 10, conda distribution, with package versions listed
> below. I'll also see what happens if I run on *nix.
>
> - arrow-cpp=0.9.0=py36_vc14_7
> - boost-cpp=1.66.0=vc14_1
> - bzip2=1.0.6=vc14_1
> - hdf5=1.10.2=vc14_0
> - lzo=2.10=vc14_0
> - parquet-cpp=1.4.0=vc14_0
> - snappy=1.1.7=vc14_1
> - zlib=1.2.11=vc14_0
> - blas=1.0=mkl
> - blosc=1.14.3=he51fdeb_0
> - cython=0.28.3=py36hfa6e2cd_0
> - icc_rt=2017.0.4=h97af966_0
> - intel-openmp=2018.0.3=0
> - numexpr=2.6.5=py36hcd2f87e_0
> - numpy=1.14.5=py36h9fa60d3_2
> - numpy-base=1.14.5=py36h5c71026_2
> - pandas=0.23.1=py36h830ac7b_0
> - pyarrow=0.9.0=py36hfe5e424_2
> - pytables=3.4.4=py36he6f6034_0
> - python=3.6.6=hea74fb7_0
> - vc=14=h0510ff6_3
> - vs2015_runtime=14.0.25123=3
>
>