Hi,
I have attached my code for the MAGMA solver in the 2 patch files. Also
attached is an example/test code for testing the solver.
Thanks,
Harshad
From 0acefb8391f04b59bf9f43c179b927ffd34936f7 Mon Sep 17 00:00:00 2001
From: Harshad Sahasrabudhe <[email protected]>
Date: Tue, 8 Oct 2013 14:33:01 -0500
Subject: [PATCH 1/2] Added MAGMA solver for dense matrices. Currently, it
only does LU and Cholesky factorization.
---
config/PETSc/packages/magma.py | 131 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 131 insertions(+)
create mode 100644 config/PETSc/packages/magma.py
diff --git a/config/PETSc/packages/magma.py b/config/PETSc/packages/magma.py
new file mode 100644
index 0000000..3206888
--- /dev/null
+++ b/config/PETSc/packages/magma.py
@@ -0,0 +1,131 @@
+import PETSc.package
+
+class Configure(PETSc.package.NewPackage):
+ def __init__(self, framework):
+ PETSc.package.NewPackage.__init__(self, framework)
+ self.download = ['http://icl.cs.utk.edu/projectsfiles/magma/downloads/magma-1.4.0.tar.gz']
+ self.functions = ['magma_zgetrf','magma_dgetrf']
+ self.includes = ['magma.h']
+ self.liblist = [['libmagma.a'],['libmagma.a','libmagmablas.a']]
+ self.double = 0
+ self.complex = 1
+
+ self.worksonWindows = 0
+ self.downloadonWindows= 0
+ return
+
+ def setupDependencies(self, framework):
+ PETSc.package.NewPackage.setupDependencies(self, framework)
+ PETSc.package.NewPackage.setupDependencies(self, framework)
+ self.blasLapack = self.framework.require('config.packages.BlasLapack',self)
+ # NOTE: CUDA dependency is only here if using CUBLAS
+ self.cuda = self.framework.require('PETSc.packages.cuda',self)
+ self.deps = [self.blasLapack, self.cuda]
+ return
+
+ def setupHelp(self, help):
+ import nargs
+ PETSc.package.NewPackage.setupHelp(self, help)
+ help.addArgument('MAGMA', '-with-magma-device=<HAVE_CUBLAS or HAVE_clAmdBlas or HAVE_MIC>', nargs.ArgString(None, "HAVE_CUBLAS", 'Type of device to use'))
+ return
+
+ def Install(self):
+ import os
+ import re
+ import sys
+
+ # set blas name mangling
+ if self.blasLapack.mangling == 'underscore':
+ self.mangling = '-DADD_'
+ elif self.blasLapack.mangling == 'caps':
+ self.mangling = '-DUPCASE'
+ else:
+ self.mangling = '-DNOCHANGE'
+
+ self.isshared = self.framework.argDB['with-shared-libraries']
+
+
+ g = open(os.path.join(self.packageDir,'make.inc'),'w')
+
+ # NOTE: LIB has to be set differently depending on -with-magma-device
+ g.write('LIB = '+self.libraries.toString(self.cuda.lib)+' '+self.libraries.toString(self.blasLapack.dlib)+'\n')
+ g.write('ARCH = '+self.setCompilers.AR+'\n')
+ g.write('ARCHFLAGS = '+self.setCompilers.AR_FLAGS+'\n')
+ g.write('RANLIB = '+self.setCompilers.RANLIB+'\n')
+
+ self.setCompilers.pushLanguage('C')
+ g.write('CC = '+self.setCompilers.getCompiler()+'\n')
+ self.CCFLAGS_MAGMA = self.setCompilers.getCompilerFlags()
+ if self.isshared and '-fPIC' not in self.CCFLAGS_MAGMA:
+ self.CCFLAGS_MAGMA = self.CCFLAGS_MAGMA + ' -fPIC'
+
+ if self.setCompilers.isLinux():
+ g.write('OPTS = '+self.CCFLAGS_MAGMA+' '+self.mangling+' -DMAGMA_SETAFFINITY\n')
+ else:
+ g.write('OPTS = '+self.CCFLAGS_MAGMA+' '+self.mangling+'\n')
+
+ self.setCompilers.popLanguage()
+ self.setCompilers.pushLanguage('FC')
+ g.write('FORT = '+self.setCompilers.getCompiler()+'\n')
+ self.FCFLAGS_MAGMA = self.setCompilers.getCompilerFlags()
+ if self.isshared and '-fPIC' not in self.FCFLAGS_MAGMA:
+ self.FCFLAGS_MAGMA = self.FCFLAGS_MAGMA + ' -fPIC'
+
+ g.write('F77OPTS = '+self.FCFLAGS_MAGMA+' '+self.mangling+' \n')
+ g.write('FOPTS = '+self.FCFLAGS_MAGMA+' '+self.mangling+' -x f95-cpp-input\n')
+
+ self.setCompilers.popLanguage()
+ # NOTE: next bunch of stuff only set for CUBLAS
+ self.setCompilers.pushLanguage('CUDA')
+ g.write('NVCC = '+self.setCompilers.getCompiler()+'\n')
+ # Set the GPU_TARGET for MAGMA depending on the GPU detected by PETSc
+ self.CUDAFLAGS=self.setCompilers.getCompilerFlags()
+ if '-arch=sm_13' in self.CUDAFLAGS:
+ g.write('GPU_TARGET = Tesla\n')
+ self.CUDAFLAGS=self.CUDAFLAGS.replace('-arch=sm_13','')
+ elif '-arch=sm_20' in self.CUDAFLAGS:
+ g.write('GPU_TARGET = Fermi\n')
+ self.CUDAFLAGS=self.CUDAFLAGS.replace('-arch=sm_20','')
+ elif '-arch=sm_30' in self.CUDAFLAGS:
+ g.write('GPU_TARGET = Kepler\n')
+ self.CUDAFLAGS=self.CUDAFLAGS.replace('-arch=sm_30','')
+ elif '-arch=sm_35' in self.CUDAFLAGS:
+ g.write('GPU_TARGET = Kepler\n')
+ self.CUDAFLAGS=self.CUDAFLAGS.replace('-arch=sm_35','')
+ else:
+ raise RuntimeError('MAGMA error: GPU_TARGET must be one of Tesla, Fermi, or Kepler.')
+ if self.isshared:
+ g.write('NVOPTS = '+self.CUDAFLAGS+' '+self.mangling+' -Xcompiler \"-fno-strict-aliasing -fPIC\"\n')
+ else:
+ g.write('NVOPTS = '+self.CUDAFLAGS+' '+self.mangling+' -Xcompiler -fno-strict-aliasing\n')
+
+ g.write('LDOPTS = -fopenmp\n')
+ self.setCompilers.popLanguage()
+ g.write('CUDADIR = '+self.cuda.directory+'\n')
+ g.write('INC = -I'+self.cuda.directory+'/include\n')
+ g.write('LIBDIR = -L'+self.cuda.directory+'/lib\n')
+
+ g.close()
+ if self.installNeeded('make.inc'):
+
+ try:
+ self.logPrintBox('Compiling MAGMA; this may take several minutes')
+
+ output,err,ret = PETSc.package.NewPackage.executeShellCommand('cd '+self.packageDir+' && make clean && make'+' lib', timeout=2500, log = self.framework.log)
+ libDir = os.path.join(self.installDir, self.libdir)
+ includeDir = os.path.join(self.installDir, self.includedir)
+
+ output,err,ret = PETSc.package.NewPackage.executeShellCommand('cd '+self.packageDir+' && mv -f lib/*.* '+libDir+'/. && cp -f include/*.* '+includeDir+'/.', timeout=2500, log = self.framework.log)
+ except RuntimeError, e:
+ raise RuntimeError('Error running make on MAGMA: '+str(e))
+ self.postInstall(output+err,'make.inc')
+ return self.installDir
+
+ def configureLibrary(self):
+ ''' Add the magma needed blas flag'''
+ flagsArg = self.setCompilers.getPreprocessorFlagsArg()
+ oldFlags = getattr(self.setCompilers, flagsArg)
+ setattr(self.setCompilers, flagsArg, oldFlags+' -D'+self.framework.argDB['with-magma-device'])
+ PETSc.package.NewPackage.configureLibrary(self)
+ setattr(self.setCompilers, flagsArg, oldFlags)
+ return
--
1.8.0
From 4ddb100f646a04646f7eb071894dfafd6a103b68 Mon Sep 17 00:00:00 2001
From: Harshad Sahasrabudhe <[email protected]>
Date: Tue, 8 Oct 2013 14:35:00 -0500
Subject: [PATCH 2/2] Added LU and Cholesky factorization through MAGMA mat
solver package for dense matrices.
---
include/petscmat.h | 1 +
src/mat/impls/dense/seq/dense.c | 162 ++++++++++++++++++++++++++++++++++++++-
src/mat/impls/dense/seq/makefile | 3 +-
3 files changed, 163 insertions(+), 3 deletions(-)
diff --git a/include/petscmat.h b/include/petscmat.h
index a168ccb..1b6acf4 100644
--- a/include/petscmat.h
+++ b/include/petscmat.h
@@ -115,6 +115,7 @@ J*/
#define MATSOLVERSBSTRM "sbstrm"
#define MATSOLVERELEMENTAL "elemental"
#define MATSOLVERCLIQUE "clique"
+#define MATSOLVERMAGMA "magma"
/*E
MatFactorType - indicates what type of factorization is requested
diff --git a/src/mat/impls/dense/seq/dense.c b/src/mat/impls/dense/seq/dense.c
index cca5781..c5d57d0 100644
--- a/src/mat/impls/dense/seq/dense.c
+++ b/src/mat/impls/dense/seq/dense.c
@@ -8,6 +8,13 @@
#include <../src/mat/impls/aij/seq/aij.h>
+#if defined(PETSC_HAVE_MAGMA)
+#if !defined(HAVE_CUBLAS)
+#define HAVE_CUBLAS 1
+#endif
+#include "magma.h"
+#endif
+
#undef __FUNCT__
#define __FUNCT__ "MatConvert_SeqDense_SeqAIJ"
PETSC_EXTERN PetscErrorCode MatConvert_SeqDense_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
@@ -193,6 +200,21 @@ PetscErrorCode MatLUFactorNumeric_SeqDense(Mat fact,Mat A,const MatFactorInfo *i
PetscFunctionReturn(0);
}
+extern PetscErrorCode MatLUFactor_SeqDense_magma(Mat,IS,IS,const MatFactorInfo*);
+
+#undef __FUNCT__
+#define __FUNCT__ "MatLUFactorNumeric_SeqDense_magma"
+PetscErrorCode MatLUFactorNumeric_SeqDense_magma(Mat fact,Mat A,const MatFactorInfo *info_dummy)
+{
+ MatFactorInfo info;
+ PetscErrorCode ierr;
+
+ PetscFunctionBegin;
+ ierr = MatDuplicateNoCreate_SeqDense(fact,A,MAT_COPY_VALUES);CHKERRQ(ierr);
+ ierr = MatLUFactor_SeqDense_magma(fact,0,0,&info);CHKERRQ(ierr);
+ PetscFunctionReturn(0);
+}
+
#undef __FUNCT__
#define __FUNCT__ "MatSolve_SeqDense"
PetscErrorCode MatSolve_SeqDense(Mat A,Vec xx,Vec yy)
@@ -445,7 +467,49 @@ PetscErrorCode MatLUFactor_SeqDense(Mat A,IS row,IS col,const MatFactorInfo *min
A->factortype = MAT_FACTOR_LU;
ierr = PetscLogFlops((2.0*A->cmap->n*A->cmap->n*A->cmap->n)/3);CHKERRQ(ierr);
+ PetscFunctionReturn(0);
+}
+
+/* ---------------------------------------------------------------*/
+/* COMMENT: I have chosen to hide row permutation in the pivots,
+ rather than put it in the Mat->row slot.*/
+#undef __FUNCT__
+#define __FUNCT__ "MatLUFactor_SeqDense_magma"
+PetscErrorCode MatLUFactor_SeqDense_magma(Mat A,IS row,IS col,const MatFactorInfo *minfo)
+{
+ Mat_SeqDense *mat = (Mat_SeqDense*)A->data;
+ PetscErrorCode ierr;
+ PetscBLASInt n,m,info;
+
+ PetscFunctionBegin;
+ ierr = PetscBLASIntCast(A->cmap->n,&n);CHKERRQ(ierr);
+ ierr = PetscBLASIntCast(A->rmap->n,&m);CHKERRQ(ierr);
+ if (!mat->pivots) {
+ ierr = PetscMalloc((A->rmap->n+1)*sizeof(PetscBLASInt),&mat->pivots);CHKERRQ(ierr);
+ ierr = PetscLogObjectMemory((PetscObject)A,A->rmap->n*sizeof(PetscBLASInt));CHKERRQ(ierr);
+ }
+ if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0);
+ ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
+#if defined(PETSC_HAVE_MAGMA)
+ magma_init();
+#if defined(PETSC_USE_COMPLEX)
+ magma_zgetrf(m,n,mat->v,mat->lda,mat->pivots,&info);
+#else
+ magma_dgetrf(m,n,mat->v,mat->lda,mat->pivots,&info);
#endif
+ magma_finalize();
+#endif
+ ierr = PetscFPTrapPop();CHKERRQ(ierr);
+
+ if (info<0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Bad argument to LU factorization");
+ if (info>0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Bad LU factorization");
+ A->ops->solve = MatSolve_SeqDense;
+ A->ops->solvetranspose = MatSolveTranspose_SeqDense;
+ A->ops->solveadd = MatSolveAdd_SeqDense;
+ A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqDense;
+ A->factortype = MAT_FACTOR_LU;
+
+ ierr = PetscLogFlops((2.0*A->cmap->n*A->cmap->n*A->cmap->n)/3);CHKERRQ(ierr);
PetscFunctionReturn(0);
}
@@ -479,6 +543,44 @@ PetscErrorCode MatCholeskyFactor_SeqDense(Mat A,IS perm,const MatFactorInfo *fac
PetscFunctionReturn(0);
}
+#undef __FUNCT__
+#define __FUNCT__ "MatCholeskyFactor_SeqDense_magma"
+PetscErrorCode MatCholeskyFactor_SeqDense_magma(Mat A,IS perm,const MatFactorInfo *factinfo)
+{
+ Mat_SeqDense *mat = (Mat_SeqDense*)A->data;
+ PetscErrorCode ierr;
+ PetscBLASInt info,n;
+#if defined(PETSC_HAVE_MAGMA)
+ magma_uplo_t uplo='L';
+#endif
+
+ PetscFunctionBegin;
+ ierr = PetscBLASIntCast(A->cmap->n,&n);CHKERRQ(ierr);
+ ierr = PetscFree(mat->pivots);CHKERRQ(ierr);
+
+ if (!A->rmap->n || !A->cmap->n) PetscFunctionReturn(0);
+ PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&n,mat->v,&mat->lda,&info));
+#if defined(PETSC_HAVE_MAGMA)
+ magma_init();
+#if defined(PETSC_USE_COMPLEX)
+ magma_zpotrf(uplo,n,mat->v,mat->lda,&info);
+#else
+ magma_dpotrf(uplo,n,mat->v,mat->lda,&info);
+#endif
+ magma_finalize();
+#endif
+ if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Bad factorization: zero pivot in row %D",(PetscInt)info-1);
+ A->ops->solve = MatSolve_SeqDense;
+ A->ops->solvetranspose = MatSolveTranspose_SeqDense;
+ A->ops->solveadd = MatSolveAdd_SeqDense;
+ A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqDense;
+ A->factortype = MAT_FACTOR_CHOLESKY;
+
+ ierr = PetscLogFlops((A->cmap->n*A->cmap->n*A->cmap->n)/3.0);CHKERRQ(ierr);
+#endif
+ PetscFunctionReturn(0);
+}
+
#undef __FUNCT__
#define __FUNCT__ "MatCholeskyFactorNumeric_SeqDense"
@@ -496,6 +598,21 @@ PetscErrorCode MatCholeskyFactorNumeric_SeqDense(Mat fact,Mat A,const MatFactorI
}
#undef __FUNCT__
+#define __FUNCT__ "MatCholeskyFactorNumeric_SeqDense_magma"
+PetscErrorCode MatCholeskyFactorNumeric_SeqDense_magma(Mat fact,Mat A,const MatFactorInfo *info_dummy)
+{
+ PetscErrorCode ierr;
+ MatFactorInfo info;
+
+ PetscFunctionBegin;
+ info.fill = 1.0;
+
+ ierr = MatDuplicateNoCreate_SeqDense(fact,A,MAT_COPY_VALUES);CHKERRQ(ierr);
+ ierr = MatCholeskyFactor_SeqDense_magma(fact,0,&info);CHKERRQ(ierr);
+ PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
#define __FUNCT__ "MatCholeskyFactorSymbolic_SeqDense"
PetscErrorCode MatCholeskyFactorSymbolic_SeqDense(Mat fact,Mat A,IS row,const MatFactorInfo *info)
{
@@ -507,6 +624,17 @@ PetscErrorCode MatCholeskyFactorSymbolic_SeqDense(Mat fact,Mat A,IS row,const Ma
}
#undef __FUNCT__
+#define __FUNCT__ "MatCholeskyFactorSymbolic_SeqDense_magma"
+PetscErrorCode MatCholeskyFactorSymbolic_SeqDense_magma(Mat fact,Mat A,IS row,const MatFactorInfo *info)
+{
+ PetscFunctionBegin;
+ fact->assembled = PETSC_TRUE;
+ fact->preallocated = PETSC_TRUE;
+ fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqDense_magma;
+ PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
#define __FUNCT__ "MatLUFactorSymbolic_SeqDense"
PetscErrorCode MatLUFactorSymbolic_SeqDense(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
{
@@ -518,6 +646,17 @@ PetscErrorCode MatLUFactorSymbolic_SeqDense(Mat fact,Mat A,IS row,IS col,const M
}
#undef __FUNCT__
+#define __FUNCT__ "MatLUFactorSymbolic_SeqDense_magma"
+PetscErrorCode MatLUFactorSymbolic_SeqDense_magma(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
+{
+ PetscFunctionBegin;
+ fact->preallocated = PETSC_TRUE;
+ fact->assembled = PETSC_TRUE;
+ fact->ops->lufactornumeric = MatLUFactorNumeric_SeqDense_magma;
+ PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
#define __FUNCT__ "MatGetFactor_seqdense_petsc"
PETSC_EXTERN PetscErrorCode MatGetFactor_seqdense_petsc(Mat A,MatFactorType ftype,Mat *fact)
{
@@ -536,6 +675,25 @@ PETSC_EXTERN PetscErrorCode MatGetFactor_seqdense_petsc(Mat A,MatFactorType ftyp
PetscFunctionReturn(0);
}
+#undef __FUNCT__
+#define __FUNCT__ "MatGetFactor_seqdense_magma"
+PETSC_EXTERN PetscErrorCode MatGetFactor_seqdense_magma(Mat A,MatFactorType ftype,Mat *fact)
+{
+ PetscErrorCode ierr;
+
+ PetscFunctionBegin;
+ ierr = MatCreate(PetscObjectComm((PetscObject)A),fact);CHKERRQ(ierr);
+ ierr = MatSetSizes(*fact,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);CHKERRQ(ierr);
+ ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr);
+ if (ftype == MAT_FACTOR_LU) {
+ (*fact)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqDense_magma;
+ } else {
+ (*fact)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqDense_magma;
+ }
+ (*fact)->factortype = ftype;
+ PetscFunctionReturn(0);
+}
+
/* ------------------------------------------------------------------*/
#undef __FUNCT__
#define __FUNCT__ "MatSOR_SeqDense"
@@ -2100,7 +2258,6 @@ static struct _MatOps MatOps_Values = { MatSetValues_SeqDense,
0,
0,
/*139*/ 0,
- 0,
0
};
@@ -2283,6 +2440,9 @@ PETSC_EXTERN PetscErrorCode MatCreate_SeqDense(Mat B)
ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqdense_seqaij_C",MatConvert_SeqDense_SeqAIJ);CHKERRQ(ierr);
ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C",MatGetFactor_seqdense_petsc);CHKERRQ(ierr);
+#if defined(PETSC_HAVE_MAGMA)
+ ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_magma_C",MatGetFactor_seqdense_magma);CHKERRQ(ierr);
+#endif
ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqDenseSetPreallocation_C",MatSeqDenseSetPreallocation_SeqDense);CHKERRQ(ierr);
ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaij_seqdense_C",MatMatMult_SeqAIJ_SeqDense);CHKERRQ(ierr);
ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqaij_seqdense_C",MatMatMultSymbolic_SeqAIJ_SeqDense);CHKERRQ(ierr);
diff --git a/src/mat/impls/dense/seq/makefile b/src/mat/impls/dense/seq/makefile
index 17ebfe4..5240248 100644
--- a/src/mat/impls/dense/seq/makefile
+++ b/src/mat/impls/dense/seq/makefile
@@ -1,7 +1,6 @@
-
ALL: lib
-CFLAGS =
+CFLAGS = ${MAGMA_INCLUDE}
FFLAGS =
SOURCEC = dense.c
SOURCEF =
--
1.8.0
static char help[] = "Testing LU factorization of SeqDense matrices using MAGMA.\n";
#include <petscmat.h>
#include <petscksp.h>
#undef __FUNCT__
#define __FUNCT__ "main"
int main(int argc,char **argv)
{
Mat A, F, F1;
PC pc, pc1;
KSP ksp, ksp1;
PetscScalar *a,diff,min,*x1,*x;
PetscErrorCode ierr;
PetscInt size=8,i,j;
double t1, t2;
Vec X, X1, B;
PetscBool sizeset;
PetscInitialize(&argc,&argv,0,help);
PetscOptionsInt("-size","Size of mxm array to be factored","Size of mxm array to be factored",8,&size,&sizeset);
ierr = PetscMalloc(size*size*sizeof(PetscScalar),&a);CHKERRQ(ierr);
for (i=0; i<size; i++) {
for (j=0; j<size; j++) {
a[i+j*size] = rand();
}
}
ierr = MatCreate(MPI_COMM_SELF,&A);CHKERRQ(ierr);
ierr = MatSetSizes(A,size,size,size,size);CHKERRQ(ierr);
ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
ierr = MatSeqDenseSetPreallocation(A,a);CHKERRQ(ierr);
ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
ierr = KSPCreate(MPI_COMM_SELF,&ksp);CHKERRQ(ierr);
ierr = KSPSetOperators(ksp,A,A,SAME_PRECONDITIONER);CHKERRQ(ierr);
ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
ierr = KSPGetPC(ksp,&pc);CHKERRQ(ierr);
ierr = PCSetType(pc,PCLU);CHKERRQ(ierr);
ierr = PCFactorSetMatSolverPackage(pc,MATSOLVERMAGMA);CHKERRQ(ierr);
ierr = PCFactorSetUpMatSolverPackage(pc);CHKERRQ(ierr); /* call MatGetFactor() to create F */
ierr = PCFactorGetMatrix(pc,&F);CHKERRQ(ierr);
ierr = VecCreate(MPI_COMM_SELF,&X);CHKERRQ(ierr);
ierr = VecSetType(X, VECSEQ);CHKERRQ(ierr);
ierr = VecSetSizes(X,size,size);CHKERRQ(ierr);
ierr = VecCreate(MPI_COMM_SELF,&X1);CHKERRQ(ierr);
ierr = VecSetType(X1, VECSEQ);CHKERRQ(ierr);
ierr = VecSetSizes(X1,size,size);CHKERRQ(ierr);
ierr = VecCreate(MPI_COMM_SELF,&B);CHKERRQ(ierr);
ierr = VecSetType(B, VECSEQ);CHKERRQ(ierr);
ierr = VecSetSizes(B,size,size);CHKERRQ(ierr);
ierr = VecSet(B, 0);
t1 = MPI_Wtime();
KSPSolve(ksp,B,X);
t2 = MPI_Wtime();
MatSolve(F,B,X);
printf("MAGMA KSPSolve time = %f\n", t2-t1);
ierr = KSPCreate(MPI_COMM_SELF,&ksp1);CHKERRQ(ierr);
ierr = KSPSetOperators(ksp1,A,A,SAME_PRECONDITIONER);CHKERRQ(ierr);
ierr = KSPSetType(ksp1,KSPPREONLY);CHKERRQ(ierr);
ierr = KSPGetPC(ksp1,&pc1);CHKERRQ(ierr);
ierr = PCSetType(pc1,PCLU);CHKERRQ(ierr);
ierr = PCFactorSetMatSolverPackage(pc1,"petsc");CHKERRQ(ierr);
ierr = PCFactorSetUpMatSolverPackage(pc1);CHKERRQ(ierr); /* call MatGetFactor() to create F */
ierr = PCFactorGetMatrix(pc1,&F1);CHKERRQ(ierr);
t1 = MPI_Wtime();
KSPSolve(ksp1,B,X1);
t2 = MPI_Wtime();
MatSolve(F1,B,X1);
printf("PETSc KSPSolve time = %f\n", t2-t1);
PetscFinalize();
}