Hi,
I'm close to a working PoC for the generalized elemwise Op (CPU for now).
However it appears the Op is not getting properly fused with other elemwise
Ops.
There are two new scalar Ops, ElemIdx and ElemAt, with respective Elemwise
subclass: TensorIdx and TensorAt.
The definitions of the new Ops:
class ElemIdx(ScalarOp):
'''
This gives tensor indices along an axis. All the indices are computed
on the fly during elemwise thus much less memory consumption.
This operates on tensor object while able to fuse with elemwise
This is similar to threadIdx.* in CUDA
'''
# TODO
# - finish DOCS
# - should be 0 inps -> 1 outs, like constant,
# however theano is not happy with 0 inps for now
# - support negative axis
# - make axis symbolic var?
# - implement numpy.intp for output type?
__props__ = ('axis',)
nin = 1
nout = 1
def __init__(self, axis, **kwargs):
super(ElemIdx, self).__init__(**kwargs)
self.axis = axis
def c_code(self, node, name, inputs, outputs, sub):
inp, = inputs
out, = outputs
axis = self.axis
# protect substitutions at Elemwise
l_sub = '%(l_sub)s'
r_sub = '%(r_sub)s'
idx_var = 'IDX_%(inp)s_%(axis)d' % locals()
code = '''
#ifdef TENSOR_ELEMWISE
%(out)s = %(l_sub)s%(idx_var)s%(r_sub)s;
#endif
''' % locals()
return code
# TODO def c_code_contiguous(self):
def c_code_cache_version(self):
return (0,)
def do_constant_folding(self, node):
return False
def output_types(self, *inp_types):
return (int32,)
class ElemAt(ScalarOp):
'''
Similar to adv. subtensor however works with elemwise.
This is the opposite of ElemIdx
'''
# TODO finish DOCS
nout = 1
def __init__(self, ndim, **kwargs):
super(ElemAt, self).__init__(**kwargs)
self.nin = 1+ndim
def c_code(self, node, name, inputs, outputs, sub):
inp = inputs[0]
out, = outputs
idxs = inputs[1:]
code = '%(out)s = %(inp)ster[' % locals()
terms = []
# protect nested substitutions at Elemwise
l_sub = '%(l_sub)s'
r_sub = '%(r_sub)s'
for axis, idx in enumerate(idxs):
strd_var = 'STRD_%(inp)s_%(axis)d' % locals()
terms.append(
'%(idx)s*%(l_sub)s%(strd_var)s%(r_sub)s' % locals())
code += ' + '.join(terms) + '];\n'
return '''
#ifdef TENSOR_ELEMWISE
%s
#endif\n''' % code
def c_code_cache_version(self):
return (0,)
def do_constant_folding(self, node):
return False
def output_types(self, inp_types):
# pdb.set_trace()
return inp_types[:1]
class TensorIdx(Elemwise):
# TODO DOCS
__props__ = Elemwise.__props__
def __init__(self, axis, **kwargs):
super(TensorIdx, self).__init__(
scalar_op=ElemIdx(axis),
**kwargs)
def __str__(self):
name = 'idx' if self.name is None else self.name
axis = self.scalar_op.axis
return '%(name)s{%(axis)d}' % locals()
def do_constant_folding(self, node):
return False
class TensorAt(Elemwise):
# TODO DOCS
__props__ = Elemwise.__props__
def __init__(self, ndim, **kwargs):
super(TensorAt, self).__init__(
scalar_op=ElemAt(ndim),
**kwargs)
def __str__(self):
name = 'at' if self.name is None else self.name
ndim = self.scalar_op.nin - 1
return '%(name)s{%(ndim)dD}' % locals()
def do_constant_folding(self, node):
return False
def idx(x, axis):
if not isinstance(axis, int):
raise TypeError('axis must be integer')
return TensorIdx(axis)(x)
def at_idx(x, *idxs):
return TensorAt(x.ndim)(x, *idxs)
There are also many hacks done to elemwise.py and elemwise_cgen.py to make
this work. (link to branch
<https://github.com/khaotik/Theano/blob/padop/theano/tensor/elemwise.py>,
highly hacky/unstable though)
When building graph:
x = T.imatrix()
i0 = idx(x, 0)
i1 = idx(x, 1)
fn0 = theano.function([x], i0+i1)
fn1 = theano.function([x], idx(i0+i1)) # doesn't make sense, just for
testing
fn2 = theano.function([x], at_idx(x, i0, i1)
dp = theano.printing.debugprint
dp(fn0)
dp(fn1)
dp(fn2)
This gives:
Elemwise{Composite{(ElemIdx{axis=0}(i0) + ElemIdx{axis=1}(i0))}} [id A] ''
0
|<TensorType(float32, matrix)> [id B]
idx{0} [id A] '' 1
|Elemwise{Composite{(ElemIdx{axis=0}(i0) + ElemIdx{axis=1}(i0))}} [id B] ''
0
|<TensorType(float32, matrix)> [id C]
at{2D} [id A] '' 2
|<TensorType(float32, matrix)> [id B]
|idx{0} [id C] '' 1
| |<TensorType(float32, matrix)> [id B]
|idx{1} [id D] '' 0
|<TensorType(float32, matrix)> [id B]
Looks like the custom op doesn't wanna fuse its subtree, however it can be
fused as child of built-in elemwise op. Any clue the cause of this?
Thanks.
--
---
You received this message because you are subscribed to the Google Groups
"theano-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.