Hi,

I am trying to split an computation over two ops in order to avoid spurious 
computations when computing the gradient.
My current attempt uses a first op which returns the desired result for the 
forward part and extra intermediate results. The second op just forwards 
the desired result, but its grad is overriden to compute the gradient based 
on the intermediate results.

In this configuration, Theano complains about unused inputs in the forward 
computation because the intermediate results are not used for the forward 
method of the second op.

Is this an expected behaviour or a bug?

----

import numpy as np
import theano.tensor as T
import theano


def make_ops():
    x_var = T.vector()
    m_var = T.bvector()

    r = m_var.sum().astype('floatX')
    z = x_var * m_var / r


    def grad_op1(inputs, output_gradients):
        return [
            output_gradients[0],  # computation delegated to op2
            theano.gradient.DisconnectedType()()
        ]


    op1 = theano.OpFromGraph(
        inputs=[x_var, m_var],
        outputs=[z, r],
        grad_overrides=grad_op1)


    z_var = T.vector()
    r_var = T.scalar()

    def grad_op2(inputs, output_gradients):
        _, m_, r_ = inputs
        return [
            m_ * r_,
            theano.gradient.DisconnectedType()(),
            theano.gradient.DisconnectedType()()
        ]

    op2 = theano.OpFromGraph(
        inputs=[z_var, m_var, r_var],
        outputs=[z_var],
        grad_overrides=grad_op2)

    return op1, op2


op1, op2 = make_ops()
x_var = T.vector()
m_var = T.bvector()
z_, r = op1(x_var, m_var)
z = op2(z_, m_var, r)

print(z_.eval({x_var: np.array([1., .3, .0, .2], dtype=np.float32),
               m_var: np.array([1, 0, 1, 1], dtype=np.int8)}))

f = theano.function([x_var, m_var], [z], on_unused_input='ignore')  # 
raises anyway

print(f(np.array([1., .3, .0, .2], dtype=np.float32),
      np.array([1, 0, 1, 1], dtype=np.int8)))

# g = theano.grad(T.sum(z), wrt=x_var)
# print(g.eval({x_var: np.array([1., .3, .0, .2], dtype=np.float32),
#               m_var: np.array([1, 0, 1, 1], dtype=np.int8)}))


-- 

--- 
You received this message because you are subscribed to the Google Groups 
"theano-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/d/optout.

Reply via email to