Well, this is weird, when I run the attached code in jupyter I get 
the GpuArrayException: b'an illegal memory access was encountered' 
exception.
When I run this as a standalone script I 
get pygpu.gpuarray.GpuArrayException: b'out of memory', which shouldn't 
happen as running the theano function on a single GPU consumes 2820MiB / 
4030MiB (according to nvidia-smi).
I use THEANO_FLAGS="contexts=dev1->cuda1;dev2->cuda2" python3 
crash_sample.py to run the script.

The problem persists in cudnn v 5.0, 5.1, 6.0 
Adding CUDA_LAUNCH_BLOCKING=1 doesn't seem to change anything


среда, 12 апреля 2017 г., 21:13:22 UTC+3 пользователь Adam Stooke написал:
>
> Nevermind about that libgpuarray github issue....was something else....but 
> running with CUDA_LAUNCH_BLOCKING=1 led me directly to the problem, which 
> turned out to be in an operation entirely different from where the original 
> illegal memory error appeared.
>
> On Wednesday, April 12, 2017 at 7:25:43 AM UTC-7, nouiz wrote:
>>
>> Do you have a script we could run to reproduce it? It would help 
>> investigate this. Also give us the Theano flags that you use to reproduce 
>> this.
>>
>> thanks
>>
>> Fred
>>
>> On Wed, Apr 5, 2017 at 12:54 PM Sergey Ovcharenko <ovchare...@gmail.com> 
>> wrote:
>>
>>> Hi,
>>>
>>> I'm struggling to get a theano graph spread over two GPU's working, but 
>>> I keep encountering the GpuArrayException: b'an illegal memory access was 
>>> encountered' error (full traceback is in the end of this email).
>>> The basic idea is to do a forward pass through two neural networks, each 
>>> located on a separate device each and combine the outputs.
>>>
>>> I'm using the latest Theano, libgpuarray and Lasagne to build the 
>>> networks, and have hacked Lasagne a bit to able to pass target='device' to 
>>> the shared variable constructor during weights initialization. 
>>>
>>> I have THEANO_FLAGS="contexts=dev1->cuda1;dev2->cuda2" and the output 
>>> after theano import is:
>>> Using cuDNN version 5005 on context None 
>>> Mapped name None to device cuda: GeForce GTX 980 (0000:0A:00.0) 
>>> Using cuDNN version 5005 on context dev1 
>>> Mapped name dev1 to device cuda1: GeForce GTX 980 (0000:09:00.0) 
>>> Using cuDNN version 5005 on context dev2 
>>> Mapped name dev2 to device cuda2: GeForce GTX 980 (0000:06:00.0)
>>>
>>>
>>> The networks definition is quite lengthy (and doesn't always reproduce 
>>> on toy graphs), so I'm providing a simpified example of what I'm doing. 
>>> inp_0 = T.tensor4('inp0')
>>> r0 = build_model('dev1', input_var=inp_0)
>>> inp_1 = T.tensor4('inp1')
>>> r1 = build_model("dev2", input_var=inp_1)
>>>
>>> r0_out = lasagne.layers.get_output(r0['fc6'], deterministic=False)
>>> r1_out = lasagne.layers.get_output(r1['fc6'], deterministic=False)
>>>
>>> train_r0 = theano.function(
>>>     [inp_0, inp_1],
>>>     [r0_out, r1_out]
>>> )
>>>
>>> result0 = train_r0(x, x2)
>>> This code fails with the aforementioned error.
>>>
>>> I've also tried to compile a separate function for each of the networks, 
>>> like
>>> train_r0 = theano.function(
>>>     [inp_0],
>>>     [r0_out]
>>> )
>>>
>>> train_r1 = theano.function(
>>>     [inp_1],
>>>     [r1_out]
>>> )
>>>
>>> And running either train_r0 or train_r1 fails. But compiling and running 
>>> a single function (no matter train_r0 or train_r1) works just fine.
>>> Could someone help me debug this? Please let me know if I should provide 
>>> additional code/info.
>>>
>>> Thanks,
>>> Sergey.
>>>
>>> The full traceback:
>>>
>>> RuntimeError                              Traceback (most recent call last)
>>> /home/facenx/.virtualenvs/multitheano/lib/python3.5/site-packages/theano/compile/function_module.py
>>>  in __call__(self, *args, **kwargs)
>>>     883             outputs =\
>>> --> 884                 self.fn() if output_subset is None else\
>>>     885                 self.fn(output_subset=output_subset)
>>>
>>> RuntimeError: Error in the elemwise call
>>>
>>> During handling of the above exception, another exception occurred:
>>>
>>> GpuArrayException                         Traceback (most recent call last)
>>> <ipython-input-11-902c3b4617f7> in <module>()
>>> ----> 1 result0 = train_r0(x, x2)
>>>       2 #result1 = train_r1(x2)
>>>
>>> /home/facenx/.virtualenvs/multitheano/lib/python3.5/site-packages/theano/compile/function_module.py
>>>  in __call__(self, *args, **kwargs)
>>>     896                     node=self.fn.nodes[self.fn.position_of_error],
>>>     897                     thunk=thunk,
>>> --> 898                     storage_map=getattr(self.fn, 'storage_map', 
>>> None))
>>>     899             else:
>>>     900                 # old-style linkers raise their own exceptions
>>>
>>> /home/facenx/.virtualenvs/multitheano/lib/python3.5/site-packages/theano/gof/link.py
>>>  in raise_with_op(node, thunk, exc_info, storage_map)
>>>     139 
>>>     140     hints = []
>>> --> 141     detailed_err_msg = "\nApply node that caused the error: " + 
>>> str(node)
>>>     142     if exc_value.__applynode_index__ is not None:
>>>     143         detailed_err_msg += "\nToposort index: %d" % node_index
>>>
>>> /home/facenx/.virtualenvs/multitheano/lib/python3.5/site-packages/theano/gof/graph.py
>>>  in __str__(self)
>>>     178 
>>>     179     def __str__(self):
>>> --> 180         return op_as_string(self.inputs, self)
>>>     181 
>>>     182     def __repr__(self):
>>>
>>> /home/facenx/.virtualenvs/multitheano/lib/python3.5/site-packages/theano/gof/graph.py
>>>  in op_as_string(i, op, leaf_formatter, node_formatter)
>>>    1256     between i and o
>>>    1257     """
>>> -> 1258     strs = as_string(i, op.inputs, leaf_formatter, node_formatter)
>>>    1259     return node_formatter(op, strs)
>>>    1260 
>>>
>>> /home/facenx/.virtualenvs/multitheano/lib/python3.5/site-packages/theano/gof/graph.py
>>>  in as_string(i, o, leaf_formatter, node_formatter)
>>>    1336             return leaf_formatter(r)
>>>    1337 
>>> -> 1338     return [describe(output) for output in o]
>>>    1339 
>>>    1340 
>>>
>>> /home/facenx/.virtualenvs/multitheano/lib/python3.5/site-packages/theano/gof/graph.py
>>>  in <listcomp>(.0)
>>>    1336             return leaf_formatter(r)
>>>    1337 
>>> -> 1338     return [describe(output) for output in o]
>>>    1339 
>>>    1340 
>>>
>>> /home/facenx/.virtualenvs/multitheano/lib/python3.5/site-packages/theano/gof/graph.py
>>>  in describe(r)
>>>    1334                     return s
>>>    1335         else:
>>> -> 1336             return leaf_formatter(r)
>>>    1337 
>>>    1338     return [describe(output) for output in o]
>>>
>>> /home/facenx/.virtualenvs/multitheano/lib/python3.5/site-packages/theano/gpuarray/type.py
>>>  in __str__(self)
>>>     604         except gpuarray.GpuArrayException:
>>>     605             np_data = self.data
>>> --> 606         return "GpuArrayConstant{%s}" % np_data
>>>     607 
>>>     608 
>>>
>>> pygpu/gpuarray.pyx in pygpu.gpuarray.GpuArray.__str__ 
>>> (pygpu/gpuarray.c:28703)()
>>>
>>> /home/facenx/.virtualenvs/multitheano/lib/python3.5/site-packages/numpy/core/numeric.py
>>>  in asarray(a, dtype, order)
>>>     529 
>>>     530     """
>>> --> 531     return array(a, dtype, copy=False, order=order)
>>>     532 
>>>     533 
>>>
>>> pygpu/gpuarray.pyx in pygpu.gpuarray.GpuArray.__array__ 
>>> (pygpu/gpuarray.c:21616)()
>>>
>>> pygpu/gpuarray.pyx in pygpu.gpuarray._pygpu_as_ndarray 
>>> (pygpu/gpuarray.c:18322)()
>>>
>>> pygpu/gpuarray.pyx in pygpu.gpuarray.array_read (pygpu/gpuarray.c:6923)()
>>>
>>> GpuArrayException: b'an illegal memory access was encountered'
>>>
>>>
>>>
>>> -- 
>>>
>>> --- 
>>> You received this message because you are subscribed to the Google 
>>> Groups "theano-users" group.
>>> To unsubscribe from this group and stop receiving emails from it, send 
>>> an email to theano-users...@googlegroups.com.
>>> For more options, visit https://groups.google.com/d/optout.
>>>
>>

-- 

--- 
You received this message because you are subscribed to the Google Groups 
"theano-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to theano-users+unsubscr...@googlegroups.com.
For more options, visit https://groups.google.com/d/optout.
import theano
import theano.tensor as T
import lasagne
from lasagne.layers import InputLayer, DenseLayer
from lasagne.layers import Conv2DLayer as ConvLayer, Pool2DLayer as PoolLayer
import numpy as np


class ContextIinitializer():

    def __init__(self, context, lasagne_init=lasagne.init.GlorotUniform()):
        self.initializer = lasagne_init
        self.context = context

    def __call__(self, shape):
        values = self.initializer(shape)
        return theano.shared(values, target=self.context)


def build_model(target, input_var=None):
    net = {}
    xavier_init = ContextIinitializer(target)
    constant_init = ContextIinitializer(target, lasagne_init=lasagne.init.Constant())

    net['input'] = InputLayer((32, 3, 96, 96), input_var=input_var)
    net['conv1a'] = ConvLayer(net['input'], num_filters=768, filter_size=3, pad=0, flip_filters=False,
                              W=xavier_init, b=constant_init)
    net['conv1b'] = ConvLayer(net['conv1a'], num_filters=768, filter_size=3, pad=0, flip_filters=False,
                              W=xavier_init, b=constant_init)
    net['pool1b'] = PoolLayer(net['conv1b'], pool_size=2, stride=2, mode='max', ignore_border=True)

    net['fc1'] = DenseLayer(net['pool1b'], num_units=160, nonlinearity=None, W=xavier_init, b=constant_init)

    net['fc2'] = DenseLayer(net['fc1'], num_units=1000, nonlinearity=None, W=xavier_init, b=constant_init)
    return net


x, y = np.zeros((32, 3, 96, 96), dtype=np.float32), np.arange(32)
x2, y2 = np.zeros((32, 3, 96, 96), dtype=np.float32), np.arange(32)

inp_0 = T.tensor4('inp0')
r0 = build_model('dev1', input_var=inp_0)
inp_1 = T.tensor4('inp1')
r1 = build_model("dev2", input_var=inp_1)

r0_out = lasagne.layers.get_output(r0['fc2'])
r1_out = lasagne.layers.get_output(r1['fc2'])

train_r0 = theano.function(
    [inp_0],
    [r0_out],
    allow_input_downcast=True
)
train_r1 = theano.function(
    [inp_0, inp_1],
    [r1_out, r0_out],
    allow_input_downcast=True
)

result0 = train_r0(x)
print(result0[0].sum())

result1 = train_r1(x, x2)
print(result1[0].sum())

Reply via email to