Attachment inline:

import pyarrow as pa
import multiprocessing as mp
import numpy as np

def make_payload():
    """Common function - make data to send"""
    return ['message', 123, np.random.uniform(-100, 100, (4, 4))]

def send_payload(payload, connection):
    """Common function - serialize & send data through a socket"""
    s = pa.serialize(payload)
    c = s.to_components()

    # Send
    data = c.pop('data')
    connection.send(c)
    for d in data:
        connection.send_bytes(d)
    connection.send_bytes(b'')


def recv_payload(connection):
    """Common function - recv data through a socket & deserialize"""
    c = connection.recv()
    c['data'] = []
    while True:
        r = connection.recv_bytes()
        if len(r) == 0:
            break
        c['data'].append(pa.py_buffer(r))

    print('...deserialize')
    return pa.deserialize_components(c)


def run_same_process():
    """Same process: Send data down a socket, then read data from the
matching socket"""
    print('run_same_process')
    recv_conn,send_conn = mp.Pipe(duplex=False)
    payload = make_payload()
    print(payload)
    send_payload(payload, send_conn)
    payload2 = recv_payload(recv_conn)
    print(payload2)


def receiver(recv_conn):
    """Separate process: runs in a different process, recv data & deserialize"""
    print('Receiver started')
    payload = recv_payload(recv_conn)
    print(payload)


def run_separate_process():
    """Separate process: launch the child process, then send data"""


    print('run_separate_process')
    recv_conn,send_conn = mp.Pipe(duplex=False)
    process = mp.Process(target=receiver, args=(recv_conn,))
    process.start()

    payload = make_payload()
    print(payload)
    send_payload(payload, send_conn)

    process.join()

if __name__ == '__main__':
    run_same_process()
    run_separate_process()


On Fri, Jul 6, 2018 at 2:42 PM Josh Quigley <josh.quig...@lifetrading.com.au>
wrote:

> A reproducible program attached - it first runs serialize/deserialize from
> the same process, then it does the same work using a separate process for
> the deserialize.
>
> The behaviour see is (after the same process code executes happily) is
> hanging / child-process crashing during the call to deserialize.
>
> Is this expected, and if not, is there a known workaround?
>
> Running Windows 10, conda distribution,  with package versions listed
> below. I'll also see what happens if I run on *nix.
>
>   - arrow-cpp=0.9.0=py36_vc14_7
>   - boost-cpp=1.66.0=vc14_1
>   - bzip2=1.0.6=vc14_1
>   - hdf5=1.10.2=vc14_0
>   - lzo=2.10=vc14_0
>   - parquet-cpp=1.4.0=vc14_0
>   - snappy=1.1.7=vc14_1
>   - zlib=1.2.11=vc14_0
>   - blas=1.0=mkl
>   - blosc=1.14.3=he51fdeb_0
>   - cython=0.28.3=py36hfa6e2cd_0
>   - icc_rt=2017.0.4=h97af966_0
>   - intel-openmp=2018.0.3=0
>   - numexpr=2.6.5=py36hcd2f87e_0
>   - numpy=1.14.5=py36h9fa60d3_2
>   - numpy-base=1.14.5=py36h5c71026_2
>   - pandas=0.23.1=py36h830ac7b_0
>   - pyarrow=0.9.0=py36hfe5e424_2
>   - pytables=3.4.4=py36he6f6034_0
>   - python=3.6.6=hea74fb7_0
>   - vc=14=h0510ff6_3
>   - vs2015_runtime=14.0.25123=3
>
>

Reply via email to