Hi,

I'm close to a working PoC for the generalized elemwise Op (CPU for now). 
However it appears the Op is not getting properly fused with other elemwise 
Ops.

There are two new scalar Ops, ElemIdx and ElemAt, with respective Elemwise 
subclass: TensorIdx and TensorAt.

The definitions of the new Ops:

class ElemIdx(ScalarOp):
    '''
    This gives tensor indices along an axis. All the indices are computed
    on the fly during elemwise thus much less memory consumption.
    This operates on tensor object while able to fuse with elemwise
    This is similar to threadIdx.* in CUDA

    '''
    # TODO
    # - finish DOCS
    # - should be 0 inps -> 1 outs, like constant,
    #   however theano is not happy with 0 inps for now
    # - support negative axis
    # - make axis symbolic var?
    # - implement numpy.intp for output type?
    __props__ = ('axis',)
    nin = 1
    nout = 1


    def __init__(self, axis, **kwargs):
        super(ElemIdx, self).__init__(**kwargs)
        self.axis = axis

    def c_code(self, node, name, inputs, outputs, sub):
        inp, = inputs
        out, = outputs
        axis = self.axis
        # protect substitutions at Elemwise
        l_sub = '%(l_sub)s'
        r_sub = '%(r_sub)s'
        idx_var = 'IDX_%(inp)s_%(axis)d' % locals()
        code = '''
        #ifdef TENSOR_ELEMWISE
        %(out)s = %(l_sub)s%(idx_var)s%(r_sub)s;
        #endif
        ''' % locals()
        return code

    # TODO def c_code_contiguous(self):
    def c_code_cache_version(self):
        return (0,)

    def do_constant_folding(self, node):
        return False

    def output_types(self, *inp_types):
        return (int32,)

class ElemAt(ScalarOp):
    '''
    Similar to adv. subtensor however works with elemwise.
    This is the opposite of ElemIdx
    '''
    # TODO finish DOCS
    nout = 1


    def __init__(self, ndim, **kwargs):
        super(ElemAt, self).__init__(**kwargs)
        self.nin = 1+ndim

    def c_code(self, node, name, inputs, outputs, sub):
        inp = inputs[0]
        out, = outputs
        idxs = inputs[1:]
        code = '%(out)s = %(inp)ster[' % locals()
        terms = []
        # protect nested substitutions at Elemwise
        l_sub = '%(l_sub)s'
        r_sub = '%(r_sub)s'
        for axis, idx in enumerate(idxs):
            strd_var = 'STRD_%(inp)s_%(axis)d' % locals()
            terms.append(
                '%(idx)s*%(l_sub)s%(strd_var)s%(r_sub)s' % locals())
        code += ' + '.join(terms) + '];\n'
        return '''
        #ifdef TENSOR_ELEMWISE
        %s
        #endif\n''' % code

    def c_code_cache_version(self):
        return (0,)

    def do_constant_folding(self, node):
        return False

    def output_types(self, inp_types):
        # pdb.set_trace()
        return inp_types[:1]

class TensorIdx(Elemwise):
    # TODO DOCS
    __props__ = Elemwise.__props__
    def __init__(self, axis, **kwargs):
        super(TensorIdx, self).__init__(
            scalar_op=ElemIdx(axis),
            **kwargs)

    def __str__(self):
        name = 'idx' if self.name is None else self.name
        axis = self.scalar_op.axis
        return '%(name)s{%(axis)d}' % locals()

    def do_constant_folding(self, node):
        return False

class TensorAt(Elemwise):
    # TODO DOCS
    __props__ = Elemwise.__props__
    def __init__(self, ndim, **kwargs):
        super(TensorAt, self).__init__(
            scalar_op=ElemAt(ndim),
            **kwargs)

    def __str__(self):
        name = 'at' if self.name is None else self.name
        ndim = self.scalar_op.nin - 1
        return '%(name)s{%(ndim)dD}' % locals()

    def do_constant_folding(self, node):
        return False

def idx(x, axis):
    if not isinstance(axis, int):
        raise TypeError('axis must be integer')
    return TensorIdx(axis)(x)

def at_idx(x, *idxs):
    return TensorAt(x.ndim)(x, *idxs)

There are also many hacks done to elemwise.py and elemwise_cgen.py to make 
this work. (link to branch 
<https://github.com/khaotik/Theano/blob/padop/theano/tensor/elemwise.py>, 
highly hacky/unstable though)

When building graph:

x = T.imatrix()
i0 = idx(x, 0)
i1 = idx(x, 1)


fn0 = theano.function([x], i0+i1)
fn1 = theano.function([x], idx(i0+i1)) # doesn't make sense, just for 
testing
fn2 = theano.function([x], at_idx(x, i0, i1)


dp = theano.printing.debugprint
dp(fn0)
dp(fn1)
dp(fn2)

This gives:

Elemwise{Composite{(ElemIdx{axis=0}(i0) + ElemIdx{axis=1}(i0))}} [id A] '' 
  0
 |<TensorType(float32, matrix)> [id B]


idx{0} [id A] ''   1
 |Elemwise{Composite{(ElemIdx{axis=0}(i0) + ElemIdx{axis=1}(i0))}} [id B] '' 
  0
   |<TensorType(float32, matrix)> [id C]


at{2D} [id A] ''   2
 |<TensorType(float32, matrix)> [id B]
 |idx{0} [id C] ''   1
 | |<TensorType(float32, matrix)> [id B]
 |idx{1} [id D] ''   0
   |<TensorType(float32, matrix)> [id B]

Looks like the custom op doesn't wanna fuse its subtree, however it can be 
fused as child of built-in elemwise op. Any clue the cause of this?

Thanks.

-- 

--- 
You received this message because you are subscribed to the Google Groups 
"theano-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/d/optout.

Reply via email to