Jaroslav Hajek wrote:
I think I looked into those sources briefly (because I also noticed
that it implements basic ops), but indeed there seems to be nothing
magical (read: no assembler). Some operations we need (such as
transpose-multiply or diagonal scaling) were not even there, I think.
That doesn't mean it's a bad idea to defer some operations to CXSparse
(as you said, it is a dependency already), but I don't expect big
gains unless there are some better algorithms.

I didn't mean to imply that the current built-in sparse implementation
is somehow slow (although perhaps there is room for optimizations).
Block sparse storage is simply bound to beat the plain sparse one when
the blocks get large enough.
It's just that I didn't expect 10x10 blocks to be enough, given the
m-code implementation.


Well its not hard to reimplement the cs_multiply mex file from CXSPARSE as octave code and I attach a cs_multiply oct-file to test... Running the example

n = [1e2, 5e2, 1e3, 1e3, 1e4, 1e4];
d = [0.01, 0.01, 0.001, 0.01, 0.001, 0.005];
for i = 1: length (n)
 fprintf ("N = %i, d = %f\n", n(i), d(i));
 A = sprand (n(i), n(i), d(i));
 B = sprand (n(i), n(i), d(i));
 tic; C0 = A * B;  toc
 tic; C1 = mymultiply (A, B); toc
endfor

I see the results

N = 100, d = 0.010000
Elapsed time is 0.000342958 seconds.
Elapsed time is 0.00414899 seconds.
N = 500, d = 0.010000
Elapsed time is 0.00301409 seconds.
Elapsed time is 0.00264595 seconds.
N = 1000, d = 0.001000
Elapsed time is 0.000446094 seconds.
Elapsed time is 0.000560952 seconds.
N = 1000, d = 0.010000
Elapsed time is 0.0157239 seconds.
Elapsed time is 0.0209789 seconds.
N = 10000, d = 0.001000
Elapsed time is 0.317249 seconds.
Elapsed time is 0.374798 seconds.
N = 10000, d = 0.005000
Elapsed time is 5.38696 seconds.
Elapsed time is 9.66393 seconds.

so it doesn't appear that using cs_multiply is an advantage

D.

#include <octave/config.h>
#include <octave/oct.h>
#include <octave/oct-sparse.h>

#ifdef IDX_TYPE_LONG
#define CXSPARSE_DNAME(name) cs_dl ## name
#define CXSPARSE_ZNAME(name) cs_cl ## name
#else
#define CXSPARSE_DNAME(name) cs_di ## name
#define CXSPARSE_ZNAME(name) cs_ci ## name
#endif

#if defined(CS_VER) && (((CS_VER == 2) && (CS_SUBVER < 2)) || (CS_VER < 2))
typedef double _Complex cs_complex_t;
#endif
	      
// The code below is a copy of the Sparse<T>::transpose()
// method so that we can transpose directly from the CXSPARSE
// matrix D into an Octave SparseMatrix
template <class A, class B, class T>
B
transpose_cs (A D)
{
  octave_idx_type nr = D->m;
  octave_idx_type nc = D->n;
  octave_idx_type nz = D->nzmax;
  B c (nc, nr, nz);

  for (octave_idx_type i = 0; i < nz; i++)
    c.xcidx (D->i [i] + 1)++;
  nz = 0;
  for (octave_idx_type i = 1; i <= nr; i++)
    {
      const octave_idx_type tmp = c.xcidx (i);
      c.xcidx (i) = nz;
      nz += tmp;
    }
  for (octave_idx_type j = 0; j < nc; j++)
    for (octave_idx_type k = D->p[j]; k < D->p[j+1]; k++)
      {
	octave_idx_type q = c.xcidx (D->i[k] + 1)++;
	c.xridx (q) = j;
	c.xdata (q) = D->x[k];
	//c.xdata (q) = reinterpret_cast<T> (D->x[k]);
      }
  return c;
}

DEFUN_DLD (mymultiply, args, , "")
{
  octave_value retval;
  int nargin = args.length ();

  if (nargin != 2)
    error ("incorrect number of arguments to the binary operator '*'");
  else
    {
      if (args(0).is_complex_type () || args(1).is_complex_type ())
	{
	  SparseComplexMatrix a = args(0).sparse_complex_matrix_value (). transpose ();
	  SparseComplexMatrix b = args(1).sparse_complex_matrix_value (). transpose ();

	  if (!error_state)
	    {
	      CXSPARSE_ZNAME () A, B, *D; 

	      A.nzmax = a.nnz ();
	      A.m = a.rows ();
	      A.n = a.cols ();
	      B.nzmax = b.nnz ();
	      B.m = b.rows ();
	      B.n = b.cols ();
	      // Cast away const on A and B, with full knowledge that CSparse
	      // won't touch it. Prevents the methods below making a copy 
	      // of the data.
	      A.p = const_cast<octave_idx_type *>(a.cidx ());
	      A.i = const_cast<octave_idx_type *>(a.ridx ());
	      A.x = const_cast<cs_complex_t *>
		(reinterpret_cast<const cs_complex_t *> (a.data ()));
	      A.nz = -1;
	      B.p = const_cast<octave_idx_type *>(b.cidx ());
	      B.i = const_cast<octave_idx_type *>(b.ridx ());
	      B.x = const_cast<cs_complex_t *>
		(reinterpret_cast<const cs_complex_t *> (b.data ()));
	      B.nz = -1;

	      BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE;
	      // D = B' * A'
	      D = CXSPARSE_ZNAME (_multiply) (&B, &A);
	      // drop zeros from D 
	      CXSPARSE_ZNAME (_dropzeros) (D);
	      END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE;

	      retval = transpose_cs <CXSPARSE_ZNAME() *, SparseComplexMatrix, Complex> (D);;
	      CXSPARSE_ZNAME (_spfree) (D) ;
	    }
	}
      else
	{
	  SparseMatrix a = args(0).sparse_matrix_value (). transpose ();
	  SparseMatrix b = args(1).sparse_matrix_value (). transpose ();

	  if (!error_state)
	    {
	      CXSPARSE_DNAME () A, B, *D; 

	      A.nzmax = a.nnz ();
	      A.m = a.rows ();
	      A.n = a.cols ();
	      B.nzmax = b.nnz ();
	      B.m = b.rows ();
	      B.n = b.cols ();
	      // Cast away const on A and B, with full knowledge that CSparse
	      // won't touch it. Prevents the methods below making a copy 
	      // of the data.
	      A.p = const_cast<octave_idx_type *>(a.cidx ());
	      A.i = const_cast<octave_idx_type *>(a.ridx ());
	      A.x = const_cast<double *>(a.data ());
	      A.nz = -1;
	      B.p = const_cast<octave_idx_type *>(b.cidx ());
	      B.i = const_cast<octave_idx_type *>(b.ridx ());
	      B.x = const_cast<double *>(b.data ());
	      B.nz = -1;

	      BEGIN_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE;
	      // D = B' * A'
	      D = CXSPARSE_DNAME (_multiply) (&B, &A);
	      // drop zeros from D 
	      CXSPARSE_DNAME (_dropzeros) (D);
	      END_INTERRUPT_IMMEDIATELY_IN_FOREIGN_CODE;

	      retval = transpose_cs <CXSPARSE_DNAME() *, SparseMatrix, double> (D);;
	      CXSPARSE_DNAME (_spfree) (D) ;
	    }
	}
    }

  return retval;
}

------------------------------------------------------------------------------
Download Intel&#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
_______________________________________________
Octave-dev mailing list
Octave-dev@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/octave-dev

Reply via email to