Hi,

I have attached my code for the MAGMA solver in the 2 patch files. Also 
attached is an example/test code for testing the solver.

Thanks,
Harshad
From 0acefb8391f04b59bf9f43c179b927ffd34936f7 Mon Sep 17 00:00:00 2001
From: Harshad Sahasrabudhe <[email protected]>
Date: Tue, 8 Oct 2013 14:33:01 -0500
Subject: [PATCH 1/2] Added MAGMA solver for dense matrices. Currently, it
 only does LU and Cholesky factorization.

---
 config/PETSc/packages/magma.py | 131 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 131 insertions(+)
 create mode 100644 config/PETSc/packages/magma.py

diff --git a/config/PETSc/packages/magma.py b/config/PETSc/packages/magma.py
new file mode 100644
index 0000000..3206888
--- /dev/null
+++ b/config/PETSc/packages/magma.py
@@ -0,0 +1,131 @@
+import PETSc.package
+
+class Configure(PETSc.package.NewPackage):
+  def __init__(self, framework):
+    PETSc.package.NewPackage.__init__(self, framework)
+    self.download     = ['http://icl.cs.utk.edu/projectsfiles/magma/downloads/magma-1.4.0.tar.gz']
+    self.functions    = ['magma_zgetrf','magma_dgetrf']
+    self.includes     = ['magma.h']
+    self.liblist      = [['libmagma.a'],['libmagma.a','libmagmablas.a']]
+    self.double       = 0
+    self.complex      = 1
+
+    self.worksonWindows   = 0
+    self.downloadonWindows= 0
+    return
+
+  def setupDependencies(self, framework):
+    PETSc.package.NewPackage.setupDependencies(self, framework)
+    PETSc.package.NewPackage.setupDependencies(self, framework)
+    self.blasLapack = self.framework.require('config.packages.BlasLapack',self)
+    # NOTE: CUDA dependency is only here if using CUBLAS
+    self.cuda       = self.framework.require('PETSc.packages.cuda',self)
+    self.deps       = [self.blasLapack, self.cuda]
+    return
+
+  def setupHelp(self, help):
+    import nargs
+    PETSc.package.NewPackage.setupHelp(self, help)
+    help.addArgument('MAGMA', '-with-magma-device=<HAVE_CUBLAS or HAVE_clAmdBlas or HAVE_MIC>', nargs.ArgString(None, "HAVE_CUBLAS", 'Type of device to use'))
+    return
+
+  def Install(self):
+    import os
+    import re
+    import sys
+
+    # set blas name mangling
+    if self.blasLapack.mangling == 'underscore':
+      self.mangling   = '-DADD_'
+    elif self.blasLapack.mangling == 'caps':
+      self.mangling   = '-DUPCASE'
+    else:
+      self.mangling   = '-DNOCHANGE'
+    
+    self.isshared     = self.framework.argDB['with-shared-libraries']
+
+    
+    g = open(os.path.join(self.packageDir,'make.inc'),'w')
+
+    # NOTE: LIB has to be set differently depending on -with-magma-device
+    g.write('LIB          = '+self.libraries.toString(self.cuda.lib)+' '+self.libraries.toString(self.blasLapack.dlib)+'\n')
+    g.write('ARCH         = '+self.setCompilers.AR+'\n')
+    g.write('ARCHFLAGS    = '+self.setCompilers.AR_FLAGS+'\n')
+    g.write('RANLIB       = '+self.setCompilers.RANLIB+'\n')
+
+    self.setCompilers.pushLanguage('C')
+    g.write('CC           = '+self.setCompilers.getCompiler()+'\n')
+    self.CCFLAGS_MAGMA    = self.setCompilers.getCompilerFlags()
+    if self.isshared and '-fPIC' not in self.CCFLAGS_MAGMA:
+        self.CCFLAGS_MAGMA = self.CCFLAGS_MAGMA + ' -fPIC'
+    
+    if self.setCompilers.isLinux():
+      g.write('OPTS         = '+self.CCFLAGS_MAGMA+' '+self.mangling+' -DMAGMA_SETAFFINITY\n')
+    else:
+      g.write('OPTS         = '+self.CCFLAGS_MAGMA+' '+self.mangling+'\n')
+    
+    self.setCompilers.popLanguage()
+    self.setCompilers.pushLanguage('FC')
+    g.write('FORT         = '+self.setCompilers.getCompiler()+'\n')
+    self.FCFLAGS_MAGMA    = self.setCompilers.getCompilerFlags()
+    if self.isshared and '-fPIC' not in self.FCFLAGS_MAGMA:
+        self.FCFLAGS_MAGMA = self.FCFLAGS_MAGMA + ' -fPIC'
+
+    g.write('F77OPTS      = '+self.FCFLAGS_MAGMA+' '+self.mangling+' \n')
+    g.write('FOPTS        = '+self.FCFLAGS_MAGMA+' '+self.mangling+' -x f95-cpp-input\n')
+
+    self.setCompilers.popLanguage()
+    # NOTE: next bunch of stuff only set for CUBLAS
+    self.setCompilers.pushLanguage('CUDA')
+    g.write('NVCC         = '+self.setCompilers.getCompiler()+'\n')
+    #  Set the GPU_TARGET for MAGMA depending on the GPU detected by PETSc
+    self.CUDAFLAGS=self.setCompilers.getCompilerFlags()
+    if '-arch=sm_13' in self.CUDAFLAGS:
+      g.write('GPU_TARGET   = Tesla\n')
+      self.CUDAFLAGS=self.CUDAFLAGS.replace('-arch=sm_13','')
+    elif '-arch=sm_20' in self.CUDAFLAGS:
+      g.write('GPU_TARGET   = Fermi\n')
+      self.CUDAFLAGS=self.CUDAFLAGS.replace('-arch=sm_20','')
+    elif  '-arch=sm_30' in self.CUDAFLAGS:
+      g.write('GPU_TARGET   = Kepler\n')
+      self.CUDAFLAGS=self.CUDAFLAGS.replace('-arch=sm_30','')
+    elif  '-arch=sm_35' in self.CUDAFLAGS:
+      g.write('GPU_TARGET   = Kepler\n')
+      self.CUDAFLAGS=self.CUDAFLAGS.replace('-arch=sm_35','')
+    else:
+      raise RuntimeError('MAGMA error: GPU_TARGET must be one of Tesla, Fermi, or Kepler.')
+    if self.isshared:
+        g.write('NVOPTS       = '+self.CUDAFLAGS+' '+self.mangling+' -Xcompiler \"-fno-strict-aliasing -fPIC\"\n')
+    else:
+        g.write('NVOPTS       = '+self.CUDAFLAGS+' '+self.mangling+' -Xcompiler -fno-strict-aliasing\n')
+    
+    g.write('LDOPTS       = -fopenmp\n')
+    self.setCompilers.popLanguage()
+    g.write('CUDADIR      = '+self.cuda.directory+'\n')
+    g.write('INC          = -I'+self.cuda.directory+'/include\n')
+    g.write('LIBDIR       = -L'+self.cuda.directory+'/lib\n')
+
+    g.close()
+    if self.installNeeded('make.inc'):
+
+      try:
+        self.logPrintBox('Compiling MAGMA; this may take several minutes')
+
+        output,err,ret = PETSc.package.NewPackage.executeShellCommand('cd '+self.packageDir+' && make clean && make'+' lib', timeout=2500, log = self.framework.log)
+        libDir     = os.path.join(self.installDir, self.libdir)
+        includeDir = os.path.join(self.installDir, self.includedir)
+        
+        output,err,ret = PETSc.package.NewPackage.executeShellCommand('cd '+self.packageDir+' && mv -f lib/*.* '+libDir+'/. && cp -f include/*.* '+includeDir+'/.', timeout=2500, log = self.framework.log)
+      except RuntimeError, e:
+        raise RuntimeError('Error running make on MAGMA: '+str(e))
+      self.postInstall(output+err,'make.inc')
+    return self.installDir
+
+  def configureLibrary(self):
+    ''' Add the magma needed blas flag'''
+    flagsArg = self.setCompilers.getPreprocessorFlagsArg()
+    oldFlags = getattr(self.setCompilers, flagsArg)
+    setattr(self.setCompilers, flagsArg, oldFlags+' -D'+self.framework.argDB['with-magma-device'])
+    PETSc.package.NewPackage.configureLibrary(self)
+    setattr(self.setCompilers, flagsArg, oldFlags)
+    return
-- 
1.8.0

From 4ddb100f646a04646f7eb071894dfafd6a103b68 Mon Sep 17 00:00:00 2001
From: Harshad Sahasrabudhe <[email protected]>
Date: Tue, 8 Oct 2013 14:35:00 -0500
Subject: [PATCH 2/2] Added LU and Cholesky factorization through MAGMA mat
 solver package for dense matrices.

---
 include/petscmat.h               |   1 +
 src/mat/impls/dense/seq/dense.c  | 162 ++++++++++++++++++++++++++++++++++++++-
 src/mat/impls/dense/seq/makefile |   3 +-
 3 files changed, 163 insertions(+), 3 deletions(-)

diff --git a/include/petscmat.h b/include/petscmat.h
index a168ccb..1b6acf4 100644
--- a/include/petscmat.h
+++ b/include/petscmat.h
@@ -115,6 +115,7 @@ J*/
 #define MATSOLVERSBSTRM       "sbstrm"
 #define MATSOLVERELEMENTAL    "elemental"
 #define MATSOLVERCLIQUE       "clique"
+#define MATSOLVERMAGMA        "magma"
 
 /*E
     MatFactorType - indicates what type of factorization is requested
diff --git a/src/mat/impls/dense/seq/dense.c b/src/mat/impls/dense/seq/dense.c
index cca5781..c5d57d0 100644
--- a/src/mat/impls/dense/seq/dense.c
+++ b/src/mat/impls/dense/seq/dense.c
@@ -8,6 +8,13 @@
 
 #include <../src/mat/impls/aij/seq/aij.h>
 
+#if defined(PETSC_HAVE_MAGMA)
+#if !defined(HAVE_CUBLAS)
+#define HAVE_CUBLAS 1
+#endif
+#include "magma.h"
+#endif
+
 #undef __FUNCT__
 #define __FUNCT__ "MatConvert_SeqDense_SeqAIJ"
 PETSC_EXTERN PetscErrorCode MatConvert_SeqDense_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
@@ -193,6 +200,21 @@ PetscErrorCode MatLUFactorNumeric_SeqDense(Mat fact,Mat A,const MatFactorInfo *i
   PetscFunctionReturn(0);
 }
 
+extern PetscErrorCode MatLUFactor_SeqDense_magma(Mat,IS,IS,const MatFactorInfo*);
+
+#undef __FUNCT__
+#define __FUNCT__ "MatLUFactorNumeric_SeqDense_magma"
+PetscErrorCode MatLUFactorNumeric_SeqDense_magma(Mat fact,Mat A,const MatFactorInfo *info_dummy)
+{
+  MatFactorInfo  info;
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  ierr = MatDuplicateNoCreate_SeqDense(fact,A,MAT_COPY_VALUES);CHKERRQ(ierr);
+  ierr = MatLUFactor_SeqDense_magma(fact,0,0,&info);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
 #undef __FUNCT__
 #define __FUNCT__ "MatSolve_SeqDense"
 PetscErrorCode MatSolve_SeqDense(Mat A,Vec xx,Vec yy)
@@ -445,7 +467,49 @@ PetscErrorCode MatLUFactor_SeqDense(Mat A,IS row,IS col,const MatFactorInfo *min
   A->factortype             = MAT_FACTOR_LU;
 
   ierr = PetscLogFlops((2.0*A->cmap->n*A->cmap->n*A->cmap->n)/3);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+/* ---------------------------------------------------------------*/
+/* COMMENT: I have chosen to hide row permutation in the pivots,
+   rather than put it in the Mat->row slot.*/
+#undef __FUNCT__
+#define __FUNCT__ "MatLUFactor_SeqDense_magma"
+PetscErrorCode MatLUFactor_SeqDense_magma(Mat A,IS row,IS col,const MatFactorInfo *minfo)
+{
+  Mat_SeqDense   *mat = (Mat_SeqDense*)A->data;
+  PetscErrorCode ierr;
+  PetscBLASInt   n,m,info;
+
+  PetscFunctionBegin;
+  ierr = PetscBLASIntCast(A->cmap->n,&n);CHKERRQ(ierr);
+  ierr = PetscBLASIntCast(A->rmap->n,&m);CHKERRQ(ierr);
+  if (!mat->pivots) {
+    ierr = PetscMalloc((A->rmap->n+1)*sizeof(PetscBLASInt),&mat->pivots);CHKERRQ(ierr);
+    ierr = PetscLogObjectMemory((PetscObject)A,A->rmap->n*sizeof(PetscBLASInt));CHKERRQ(ierr);
+  }
+  if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0);
+  ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
+#if defined(PETSC_HAVE_MAGMA)
+  magma_init();
+#if defined(PETSC_USE_COMPLEX)
+  magma_zgetrf(m,n,mat->v,mat->lda,mat->pivots,&info);
+#else
+  magma_dgetrf(m,n,mat->v,mat->lda,mat->pivots,&info);
 #endif
+  magma_finalize();
+#endif
+  ierr = PetscFPTrapPop();CHKERRQ(ierr);
+
+  if (info<0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Bad argument to LU factorization");
+  if (info>0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Bad LU factorization");
+  A->ops->solve             = MatSolve_SeqDense;
+  A->ops->solvetranspose    = MatSolveTranspose_SeqDense;
+  A->ops->solveadd          = MatSolveAdd_SeqDense;
+  A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqDense;
+  A->factortype             = MAT_FACTOR_LU;
+
+  ierr = PetscLogFlops((2.0*A->cmap->n*A->cmap->n*A->cmap->n)/3);CHKERRQ(ierr);
   PetscFunctionReturn(0);
 }
 
@@ -479,6 +543,44 @@ PetscErrorCode MatCholeskyFactor_SeqDense(Mat A,IS perm,const MatFactorInfo *fac
   PetscFunctionReturn(0);
 }
 
+#undef __FUNCT__
+#define __FUNCT__ "MatCholeskyFactor_SeqDense_magma"
+PetscErrorCode MatCholeskyFactor_SeqDense_magma(Mat A,IS perm,const MatFactorInfo *factinfo)
+{
+  Mat_SeqDense   *mat = (Mat_SeqDense*)A->data;
+  PetscErrorCode ierr;
+  PetscBLASInt   info,n;
+#if defined(PETSC_HAVE_MAGMA)
+  magma_uplo_t uplo='L';
+#endif
+
+  PetscFunctionBegin;
+  ierr = PetscBLASIntCast(A->cmap->n,&n);CHKERRQ(ierr);
+  ierr = PetscFree(mat->pivots);CHKERRQ(ierr);
+
+  if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0);
+  PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&n,mat->v,&mat->lda,&info));
+#if defined(PETSC_HAVE_MAGMA)
+  magma_init();
+#if defined(PETSC_USE_COMPLEX)
+  magma_zpotrf(uplo,n,mat->v,mat->lda,&info);
+#else
+  magma_dpotrf(uplo,n,mat->v,mat->lda,&info);
+#endif
+  magma_finalize();
+#endif
+  if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Bad factorization: zero pivot in row %D",(PetscInt)info-1);
+  A->ops->solve             = MatSolve_SeqDense;
+  A->ops->solvetranspose    = MatSolveTranspose_SeqDense;
+  A->ops->solveadd          = MatSolveAdd_SeqDense;
+  A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqDense;
+  A->factortype             = MAT_FACTOR_CHOLESKY;
+
+  ierr = PetscLogFlops((A->cmap->n*A->cmap->n*A->cmap->n)/3.0);CHKERRQ(ierr);
+#endif
+  PetscFunctionReturn(0);
+}
+
 
 #undef __FUNCT__
 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqDense"
@@ -496,6 +598,21 @@ PetscErrorCode MatCholeskyFactorNumeric_SeqDense(Mat fact,Mat A,const MatFactorI
 }
 
 #undef __FUNCT__
+#define __FUNCT__ "MatCholeskyFactorNumeric_SeqDense_magma"
+PetscErrorCode MatCholeskyFactorNumeric_SeqDense_magma(Mat fact,Mat A,const MatFactorInfo *info_dummy)
+{
+  PetscErrorCode ierr;
+  MatFactorInfo  info;
+
+  PetscFunctionBegin;
+  info.fill = 1.0;
+
+  ierr = MatDuplicateNoCreate_SeqDense(fact,A,MAT_COPY_VALUES);CHKERRQ(ierr);
+  ierr = MatCholeskyFactor_SeqDense_magma(fact,0,&info);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqDense"
 PetscErrorCode MatCholeskyFactorSymbolic_SeqDense(Mat fact,Mat A,IS row,const MatFactorInfo *info)
 {
@@ -507,6 +624,17 @@ PetscErrorCode MatCholeskyFactorSymbolic_SeqDense(Mat fact,Mat A,IS row,const Ma
 }
 
 #undef __FUNCT__
+#define __FUNCT__ "MatCholeskyFactorSymbolic_SeqDense_magma"
+PetscErrorCode MatCholeskyFactorSymbolic_SeqDense_magma(Mat fact,Mat A,IS row,const MatFactorInfo *info)
+{
+  PetscFunctionBegin;
+  fact->assembled                  = PETSC_TRUE;
+  fact->preallocated               = PETSC_TRUE;
+  fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqDense_magma;
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
 #define __FUNCT__ "MatLUFactorSymbolic_SeqDense"
 PetscErrorCode MatLUFactorSymbolic_SeqDense(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
 {
@@ -518,6 +646,17 @@ PetscErrorCode MatLUFactorSymbolic_SeqDense(Mat fact,Mat A,IS row,IS col,const M
 }
 
 #undef __FUNCT__
+#define __FUNCT__ "MatLUFactorSymbolic_SeqDense_magma"
+PetscErrorCode MatLUFactorSymbolic_SeqDense_magma(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
+{
+  PetscFunctionBegin;
+  fact->preallocated         = PETSC_TRUE;
+  fact->assembled            = PETSC_TRUE;
+  fact->ops->lufactornumeric = MatLUFactorNumeric_SeqDense_magma;
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
 #define __FUNCT__ "MatGetFactor_seqdense_petsc"
 PETSC_EXTERN PetscErrorCode MatGetFactor_seqdense_petsc(Mat A,MatFactorType ftype,Mat *fact)
 {
@@ -536,6 +675,25 @@ PETSC_EXTERN PetscErrorCode MatGetFactor_seqdense_petsc(Mat A,MatFactorType ftyp
   PetscFunctionReturn(0);
 }
 
+#undef __FUNCT__
+#define __FUNCT__ "MatGetFactor_seqdense_magma"
+PETSC_EXTERN PetscErrorCode MatGetFactor_seqdense_magma(Mat A,MatFactorType ftype,Mat *fact)
+{
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  ierr = MatCreate(PetscObjectComm((PetscObject)A),fact);CHKERRQ(ierr);
+  ierr = MatSetSizes(*fact,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);CHKERRQ(ierr);
+  ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr);
+  if (ftype == MAT_FACTOR_LU) {
+    (*fact)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqDense_magma;
+  } else {
+    (*fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqDense_magma;
+  }
+  (*fact)->factortype = ftype;
+  PetscFunctionReturn(0);
+}
+
 /* ------------------------------------------------------------------*/
 #undef __FUNCT__
 #define __FUNCT__ "MatSOR_SeqDense"
@@ -2100,7 +2258,6 @@ static struct _MatOps MatOps_Values = { MatSetValues_SeqDense,
                                         0,
                                         0,
                                 /*139*/ 0,
-                                        0,
                                         0
 };
 
@@ -2283,6 +2440,9 @@ PETSC_EXTERN PetscErrorCode MatCreate_SeqDense(Mat B)
 
   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqdense_seqaij_C",MatConvert_SeqDense_SeqAIJ);CHKERRQ(ierr);
   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C",MatGetFactor_seqdense_petsc);CHKERRQ(ierr);
+#if defined(PETSC_HAVE_MAGMA)
+  ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_magma_C",MatGetFactor_seqdense_magma);CHKERRQ(ierr);
+#endif
   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqDenseSetPreallocation_C",MatSeqDenseSetPreallocation_SeqDense);CHKERRQ(ierr);
   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaij_seqdense_C",MatMatMult_SeqAIJ_SeqDense);CHKERRQ(ierr);
   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqaij_seqdense_C",MatMatMultSymbolic_SeqAIJ_SeqDense);CHKERRQ(ierr);
diff --git a/src/mat/impls/dense/seq/makefile b/src/mat/impls/dense/seq/makefile
index 17ebfe4..5240248 100644
--- a/src/mat/impls/dense/seq/makefile
+++ b/src/mat/impls/dense/seq/makefile
@@ -1,7 +1,6 @@
-
 ALL: lib
 
-CFLAGS   =
+CFLAGS   = ${MAGMA_INCLUDE}
 FFLAGS   =
 SOURCEC  = dense.c
 SOURCEF  =
-- 
1.8.0

static char help[] = "Testing LU factorization of SeqDense matrices using MAGMA.\n";

#include <petscmat.h>
#include <petscksp.h>

#undef __FUNCT__
#define __FUNCT__ "main"
int main(int argc,char **argv)
{
  Mat            A, F, F1;
  PC             pc, pc1;
  KSP            ksp, ksp1;
  PetscScalar    *a,diff,min,*x1,*x;
  PetscErrorCode ierr;
  PetscInt       size=8,i,j;
  double t1, t2;
  Vec X, X1, B;
  PetscBool sizeset;

  PetscInitialize(&argc,&argv,0,help);
  PetscOptionsInt("-size","Size of mxm array to be factored","Size of mxm array to be factored",8,&size,&sizeset);

  ierr = PetscMalloc(size*size*sizeof(PetscScalar),&a);CHKERRQ(ierr);

  for (i=0; i<size; i++) {
    for (j=0; j<size; j++) {
      a[i+j*size] = rand();
    }
  }
 
  ierr = MatCreate(MPI_COMM_SELF,&A);CHKERRQ(ierr);
  ierr = MatSetSizes(A,size,size,size,size);CHKERRQ(ierr);
  ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
  ierr = MatSeqDenseSetPreallocation(A,a);CHKERRQ(ierr);
  ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);

  ierr = KSPCreate(MPI_COMM_SELF,&ksp);CHKERRQ(ierr);
  ierr = KSPSetOperators(ksp,A,A,SAME_PRECONDITIONER);CHKERRQ(ierr);

  ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
  ierr = KSPGetPC(ksp,&pc);CHKERRQ(ierr);
  ierr = PCSetType(pc,PCLU);CHKERRQ(ierr);
  ierr = PCFactorSetMatSolverPackage(pc,MATSOLVERMAGMA);CHKERRQ(ierr);

  ierr = PCFactorSetUpMatSolverPackage(pc);CHKERRQ(ierr); /* call MatGetFactor() to create F */
  ierr = PCFactorGetMatrix(pc,&F);CHKERRQ(ierr);
  
  ierr = VecCreate(MPI_COMM_SELF,&X);CHKERRQ(ierr);
  ierr = VecSetType(X, VECSEQ);CHKERRQ(ierr);
  ierr = VecSetSizes(X,size,size);CHKERRQ(ierr);
  ierr = VecCreate(MPI_COMM_SELF,&X1);CHKERRQ(ierr);
  ierr = VecSetType(X1, VECSEQ);CHKERRQ(ierr);
  ierr = VecSetSizes(X1,size,size);CHKERRQ(ierr);
  ierr = VecCreate(MPI_COMM_SELF,&B);CHKERRQ(ierr);
  ierr = VecSetType(B, VECSEQ);CHKERRQ(ierr);
  ierr = VecSetSizes(B,size,size);CHKERRQ(ierr);

   
  ierr = VecSet(B, 0);
  
  t1 = MPI_Wtime();
  KSPSolve(ksp,B,X);
  t2 = MPI_Wtime();
  MatSolve(F,B,X);
  printf("MAGMA KSPSolve time = %f\n", t2-t1);

  ierr = KSPCreate(MPI_COMM_SELF,&ksp1);CHKERRQ(ierr);
  ierr = KSPSetOperators(ksp1,A,A,SAME_PRECONDITIONER);CHKERRQ(ierr);

  ierr = KSPSetType(ksp1,KSPPREONLY);CHKERRQ(ierr);
  ierr = KSPGetPC(ksp1,&pc1);CHKERRQ(ierr);
  ierr = PCSetType(pc1,PCLU);CHKERRQ(ierr);
  ierr = PCFactorSetMatSolverPackage(pc1,"petsc");CHKERRQ(ierr);

  ierr = PCFactorSetUpMatSolverPackage(pc1);CHKERRQ(ierr); /* call MatGetFactor() to create F */
  ierr = PCFactorGetMatrix(pc1,&F1);CHKERRQ(ierr);

  t1 = MPI_Wtime();
  KSPSolve(ksp1,B,X1);
  t2 = MPI_Wtime();
  MatSolve(F1,B,X1);
  printf("PETSc KSPSolve time = %f\n", t2-t1);

  PetscFinalize();

}

Reply via email to