Can you reproduce it without all of the multiprocessing code? E.g., just call *pyarrow.serialize* in one interpreter. Then copy and paste the bytes into another interpreter and call *pyarrow.deserialize *or *pyarrow.deserialize_components*? On Thu, Jul 5, 2018 at 9:48 PM Josh Quigley <josh.quig...@lifetrading.com.au> wrote:
> Attachment inline: > > import pyarrow as pa > import multiprocessing as mp > import numpy as np > > def make_payload(): > """Common function - make data to send""" > return ['message', 123, np.random.uniform(-100, 100, (4, 4))] > > def send_payload(payload, connection): > """Common function - serialize & send data through a socket""" > s = pa.serialize(payload) > c = s.to_components() > > # Send > data = c.pop('data') > connection.send(c) > for d in data: > connection.send_bytes(d) > connection.send_bytes(b'') > > > def recv_payload(connection): > """Common function - recv data through a socket & deserialize""" > c = connection.recv() > c['data'] = [] > while True: > r = connection.recv_bytes() > if len(r) == 0: > break > c['data'].append(pa.py_buffer(r)) > > print('...deserialize') > return pa.deserialize_components(c) > > > def run_same_process(): > """Same process: Send data down a socket, then read data from the > matching socket""" > print('run_same_process') > recv_conn,send_conn = mp.Pipe(duplex=False) > payload = make_payload() > print(payload) > send_payload(payload, send_conn) > payload2 = recv_payload(recv_conn) > print(payload2) > > > def receiver(recv_conn): > """Separate process: runs in a different process, recv data & > deserialize""" > print('Receiver started') > payload = recv_payload(recv_conn) > print(payload) > > > def run_separate_process(): > """Separate process: launch the child process, then send data""" > > > print('run_separate_process') > recv_conn,send_conn = mp.Pipe(duplex=False) > process = mp.Process(target=receiver, args=(recv_conn,)) > process.start() > > payload = make_payload() > print(payload) > send_payload(payload, send_conn) > > process.join() > > if __name__ == '__main__': > run_same_process() > run_separate_process() > > > On Fri, Jul 6, 2018 at 2:42 PM Josh Quigley < > josh.quig...@lifetrading.com.au> > wrote: > > > A reproducible program attached - it first runs serialize/deserialize > from > > the same process, then it does the same work using a separate process for > > the deserialize. > > > > The behaviour see is (after the same process code executes happily) is > > hanging / child-process crashing during the call to deserialize. > > > > Is this expected, and if not, is there a known workaround? > > > > Running Windows 10, conda distribution, with package versions listed > > below. I'll also see what happens if I run on *nix. > > > > - arrow-cpp=0.9.0=py36_vc14_7 > > - boost-cpp=1.66.0=vc14_1 > > - bzip2=1.0.6=vc14_1 > > - hdf5=1.10.2=vc14_0 > > - lzo=2.10=vc14_0 > > - parquet-cpp=1.4.0=vc14_0 > > - snappy=1.1.7=vc14_1 > > - zlib=1.2.11=vc14_0 > > - blas=1.0=mkl > > - blosc=1.14.3=he51fdeb_0 > > - cython=0.28.3=py36hfa6e2cd_0 > > - icc_rt=2017.0.4=h97af966_0 > > - intel-openmp=2018.0.3=0 > > - numexpr=2.6.5=py36hcd2f87e_0 > > - numpy=1.14.5=py36h9fa60d3_2 > > - numpy-base=1.14.5=py36h5c71026_2 > > - pandas=0.23.1=py36h830ac7b_0 > > - pyarrow=0.9.0=py36hfe5e424_2 > > - pytables=3.4.4=py36he6f6034_0 > > - python=3.6.6=hea74fb7_0 > > - vc=14=h0510ff6_3 > > - vs2015_runtime=14.0.25123=3 > > > > >