Re: [Numpy-discussion] array vs. matrix performance

Charles R Harris Mon, 21 May 2007 07:10:25 -0700

On 5/21/07, David Cournapeau <[EMAIL PROTECTED]> wrote:


Nils Wagner wrote:
> Robert Cimrman wrote:
>> I have come to a case where using a matrix would be easier than an
>> array. The code uses lots of dot products, so I tested scipy.dot()
>> performance with the code below and found that the array version is
much
>> faster (about 3 times for the given shape). What is the reason for
this?
>> Or is something wrong with my measurement?
>>
>> regards,
>> r.
>>
>> ---
>> import timeit
>> setup = """
>> import numpy as nm
>> import scipy as sc
>> X = nm.random.rand( 100, 3 )
>> X = nm.asmatrix( X ) # (un)comment this line.
>> print X.shape
>> """
>> tt = timeit.Timer( 'sc.dot( X.T, X )', setup )
>> print tt.timeit()
>> _______________________________________________
>> Numpy-discussion mailing list
>> [email protected]
>> http://projects.scipy.org/mailman/listinfo/numpy-discussion
>>
> Confirmed, but for what reason ?
>
> 0.5.3.dev3020
> 1.0.3.dev3792
> Array version
> 6.84843301773
> Matrix version
> 17.1273219585
>
My guess would be that for such small matrices, the cost of matrix
wrapping is not negligeable against the actual computation. This
difference disappears for bigger matrices (for example, try 1000 and
5000 instead of 100 for the first dimension).



Hmmm, I wonder how Tim's matrix version works. I've attached the code. I am
rather fond of the approach and have used it a few times myself. Tim uses
the dot product is written so: a(b). That is, the () operator is used
instead of the *.

Chuck

from numpy import *

_ULT = 1 # Utlimate AKA last
_PEN = 2 # Penultimate AKA second to last

# I'm sure there's a better way to do this and this is probably buggy...
def _reduce(key, ndim):
    if isinstance(key, tuple):
        m = len(key)
        if m == 0:
            return 0
        elif m == 1:
            key = key[0]
        else:
            if Ellipsis in key:
                key = (None,)*(ndim-m) + key
            if len(key) == ndim:
                return isinstance(key[-1], int) * _ULT + isinstance(key[-2], int) * _PEN
            if len(key) == ndim-1:
                return isinstance(key[-1], int) * _PEN
    if isinstance(key, int):
        if ndim == 1:
           return _ULT
        if ndim == 2:
            return _PEN
    return 0

class ndmatrix(ndarray):
    def __array_finalize__(self, obj):
        if obj.ndim < 2:
            raise ValueError("matrices must have dimension of at least 2")
    def __getitem__(self, key):
        rmask = _reduce(key, self.ndim)
        value = ndarray.__getitem__(self, key)
        if isinstance(value, ndmatrix):
            if rmask == _ULT | _PEN:
                return value.view(ndarray)
            if rmask == _ULT:
                return value.view(ndcolumn)
            if rmask == _PEN:
                return value.view(ndrow)
        return value
    def __call__(self, other):
        if isinstance(other, ndcolumn):
            return sum(self * other[...,newaxis,:], -1).view(ndcolumn)
        if isinstance(other, ndmatrix):
            return sum(self[...,newaxis] * other[...,newaxis,:], -2).view(ndmatrix)
        else:
            raise TypeError("Can only matrix multiply matrices by matrices or vectors")
    def transpose_vector(self):
        return self.swapaxes(-1,-2)
    t = property(transpose_vector)
       
class ndcolumn(ndarray):
    def __array_finalize__(self, obj):
        if obj.ndim < 1:
            raise ValueError("vectors must have dimension of at least 1")
    def __getitem__(self, key):
        rmask = _reduce(key, self.ndim)
        value = ndarray.__getitem__(self, key)
        if isinstance(value, ndmatrix):
            if rmask == _ULT:
                return value.view(ndarray)
        return value
    def __call__(self, other):
        raise TypeError("Cannot matrix multiply columns with anything")
    def transpose_vector(self):
        return self.view(ndrow)
    t = property(transpose_vector)

class ndrow(ndarray):
    def __array_finalize__(self, obj):
        if obj.ndim < 1:
            raise ValueError("vectors must have dimension of at least 1")
    def __getitem__(self, key):
        rmask = _reduce(key, self.ndim)
        value = ndarray.__getitem__(self, key)
        if isinstance(value, ndmatrix):
            if rmask == _ULT:
                return value.view(ndarray)
        return value
    def __call__(self, other):
        if isinstance(other, ndcolumn):
            return sum(self * other, -1).view(ndarray)
        if isinstance(othe, ndmatrix):
            raise notimplemented
        else:
            raise TypeError("Can only matrix multiply rows with matrices or columns")
    def transpose_vector(self):
        return self.view(ndcolumn)
    t = property(transpose_vector)



def matrix(*args, **kwargs):
    m = array(*args, **kwargs)
    return m.view(ndmatrix)
   
def column(*args, **kwargs):
    v = array(*args, **kwargs)
    return v.view(ndcolumn)
   
def row(*args, **kwargs):
    v = array(*args, **kwargs)
    return v.view(ndrow)
   
   
if __name__ == "__main__":
    import doctest, matrix
    doctest.testmod(matrix)

_______________________________________________
Numpy-discussion mailing list
[email protected]
http://projects.scipy.org/mailman/listinfo/numpy-discussion

Re: [Numpy-discussion] array vs. matrix performance

Reply via email to