Hello,

I still haven't managed to trace the error down. Below is a shorter example 
that triggers the error. It seems theano tries to create a variable for the 
output gradient for a node through which I do not back propagate. At some 
point it hits a DisconnectedType instance and raises an error.

import numpy as np
import theano.tensor as T
import theano


def make_ops():
    x_var = T.vector()
    m_var = T.bvector()

    r = m_var.sum().astype('floatX')
    z = x_var * m_var / r


    def grad_op1(inputs, output_gradients):
        pass
        return [
            output_gradients[0],  # computation delegated to op2
            theano.gradient.DisconnectedType()()
        ]


    op1 = theano.OpFromGraph(
        inputs=[x_var, m_var],
        outputs=[z, r],
        grad_overrides=grad_op1,
        inline=True,
        name="op1")

    return op1


op1 = make_ops()
x_var = T.vector()
m_var = T.bvector()
z, r = op1(x_var, m_var)

g = theano.grad(T.sum(z), wrt=x_var)
print(g.eval({x_var: np.array([1., .3, .0, .2], dtype=np.float32),
              m_var: np.array([1, 0, 1, 1], dtype=np.int8)}))

output:
TypeError: Cannot convert Type DisconnectedType (of Variable <
DisconnectedType>) into Type TensorType(float32, scalar). You can try to 
manually convert <DisconnectedType> into a TensorType(float32, scalar).

Process finished with exit code 1



Le jeudi 13 juillet 2017 13:03:28 UTC+2, [email protected] a écrit :
>
> Hi,
>
> Thank you for the suggestion, actually inlining makes more sense for what 
> I am trying to do. 
>
> However, a casting issue arises when trying to compute the derivative wrt 
> to the continuous input. If I understood correctly, DisconnectedInput 
> should be returned as the gradient for integral inputs (or inputs wrt which 
> I don't need the derivative) right?
>
> Below is the slightly modified code which illustrate this new issue:
>
> import numpy as np
> import theano.tensor as T
> import theano
>
>
> def make_ops():
>     x_var = T.vector()
>     m_var = T.bvector()
>
>     r = m_var.sum().astype('floatX')
>     z = x_var * m_var / r
>
>
>     def grad_op1(inputs, output_gradients):
>         return [
>             output_gradients[0],  # computation delegated to op2
>             theano.gradient.DisconnectedType()(),
>         ]
>
>
>     op1 = theano.OpFromGraph(
>         inputs=[x_var, m_var],
>         outputs=[z, r],
>         grad_overrides=grad_op1,
>         inline=True)
>
>
>     z_var = T.vector()
>     r_var = T.scalar()
>
>     def grad_op2(inputs, output_gradients):
>         _, m_, r_ = inputs
>         return [
>             m_ * r_,
>             theano.gradient.DisconnectedType()(),
>             theano.gradient.DisconnectedType()()
>         ]
>
>     op2 = theano.OpFromGraph(
>         inputs=[z_var, m_var, r_var],
>         outputs=[z_var],
>         grad_overrides=grad_op2,
>         inline=True)
>
>     return op1, op2
>
>
> op1, op2 = make_ops()
> x_var = T.vector()
> m_var = T.bvector()
> z_, r = op1(x_var, m_var)
> z = op2(z_, m_var, r)
>
> g = theano.grad(T.sum(z), wrt=x_var)
> print(g.eval({x_var: np.array([1., .3, .0, .2], dtype=np.float32),
>               m_var: np.array([1, 0, 1, 1], dtype=np.int8)}))
>
>
>
> Le mardi 11 juillet 2017 11:32:50 UTC+2, [email protected] a écrit :
>>
>> Hi,
>>
>> I am trying to split an computation over two ops in order to avoid 
>> spurious computations when computing the gradient.
>> My current attempt uses a first op which returns the desired result for 
>> the forward part and extra intermediate results. The second op just 
>> forwards the desired result, but its grad is overriden to compute the 
>> gradient based on the intermediate results.
>>
>> In this configuration, Theano complains about unused inputs in the 
>> forward computation because the intermediate results are not used for the 
>> forward method of the second op.
>>
>> Is this an expected behaviour or a bug?
>>
>> ----
>>
>> import numpy as np
>> import theano.tensor as T
>> import theano
>>
>>
>> def make_ops():
>>     x_var = T.vector()
>>     m_var = T.bvector()
>>
>>     r = m_var.sum().astype('floatX')
>>     z = x_var * m_var / r
>>
>>
>>     def grad_op1(inputs, output_gradients):
>>         return [
>>             output_gradients[0],  # computation delegated to op2
>>             theano.gradient.DisconnectedType()()
>>         ]
>>
>>
>>     op1 = theano.OpFromGraph(
>>         inputs=[x_var, m_var],
>>         outputs=[z, r],
>>         grad_overrides=grad_op1)
>>
>>
>>     z_var = T.vector()
>>     r_var = T.scalar()
>>
>>     def grad_op2(inputs, output_gradients):
>>         _, m_, r_ = inputs
>>         return [
>>             m_ * r_,
>>             theano.gradient.DisconnectedType()(),
>>             theano.gradient.DisconnectedType()()
>>         ]
>>
>>     op2 = theano.OpFromGraph(
>>         inputs=[z_var, m_var, r_var],
>>         outputs=[z_var],
>>         grad_overrides=grad_op2)
>>
>>     return op1, op2
>>
>>
>> op1, op2 = make_ops()
>> x_var = T.vector()
>> m_var = T.bvector()
>> z_, r = op1(x_var, m_var)
>> z = op2(z_, m_var, r)
>>
>> print(z_.eval({x_var: np.array([1., .3, .0, .2], dtype=np.float32),
>>                m_var: np.array([1, 0, 1, 1], dtype=np.int8)}))
>>
>> f = theano.function([x_var, m_var], [z], on_unused_input='ignore')  # 
>> raises anyway
>>
>> print(f(np.array([1., .3, .0, .2], dtype=np.float32),
>>       np.array([1, 0, 1, 1], dtype=np.int8)))
>>
>> # g = theano.grad(T.sum(z), wrt=x_var)
>> # print(g.eval({x_var: np.array([1., .3, .0, .2], dtype=np.float32),
>> #               m_var: np.array([1, 0, 1, 1], dtype=np.int8)}))
>>
>>
>>

-- 

--- 
You received this message because you are subscribed to the Google Groups 
"theano-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/d/optout.

Reply via email to