Thanks for the clarification Mark. I have tried such an implementation but I since I could not find the equivalent of https://petsc.org/main/manualpages/Mat/MatMPIAIJGetLocalMat/ for petsc4py, I used A.getLocalSubMatrix to do so, which returns a ‘localref’ object that I cannot then use to get my local 'seqaij’ matrix. I also tried A.getDenseLocalMatrix() but it seems not to be suitable for my case. I couldn’t find an example in the petsc4py source code or something relevant, do you have any ideas?
"""Experimenting with petsc mat-mat multiplication""" # import pdb import numpy as np from firedrake import COMM_WORLD from firedrake.petsc import PETSc from numpy.testing import assert_array_almost_equal import pdb nproc = COMM_WORLD.size rank = COMM_WORLD.rank def Print(x: str): """Prints the string only on the root process Args: x (str): String to be printed """ PETSc.Sys.Print(x) def create_petsc_matrix_seq(input_array): """Building a sequential petsc matrix from an array Args: input_array (np array): Input array Returns: seq mat: PETSc matrix """ assert len(input_array.shape) == 2 m, n = input_array.shape matrix = PETSc.Mat().createAIJ(size=(m, n), comm=PETSc.COMM_SELF) matrix.setUp() matrix.setValues(range(m), range(n), input_array, addv=False) # Assembly the matrix to compute the final structure matrix.assemblyBegin() matrix.assemblyEnd() return matrix def create_petsc_matrix(input_array, partition_like=None): """Create a PETSc matrix from an input_array Args: input_array (np array): Input array partition_like (petsc mat, optional): Petsc matrix. Defaults to None. sparse (bool, optional): Toggle for sparese or dense. Defaults to True. Returns: petsc mat: PETSc matrix """ # Check if input_array is 1D and reshape if necessary assert len(input_array.shape) == 2, "Input array should be 2-dimensional" global_rows, global_cols = input_array.shape comm = COMM_WORLD if partition_like is not None: local_rows_start, local_rows_end = partition_like.getOwnershipRange() local_rows = local_rows_end - local_rows_start # No parallelization in the columns, set local_cols = None to parallelize size = ((local_rows, global_rows), (global_cols, global_cols)) else: size = ((None, global_rows), (global_cols, global_cols)) matrix = PETSc.Mat().createAIJ(size=size, comm=comm) matrix.setUp() local_rows_start, local_rows_end = matrix.getOwnershipRange() for counter, i in enumerate(range(local_rows_start, local_rows_end)): # Calculate the correct row in the array for the current process row_in_array = counter + local_rows_start matrix.setValues( i, range(global_cols), input_array[row_in_array, :], addv=False ) # Assembly the matrix to compute the final structure matrix.assemblyBegin() matrix.assemblyEnd() return matrix m, k = 10, 3 # Generate the random numpy matrices np.random.seed(0) # sets the seed to 0 A_np = np.random.randint(low=0, high=6, size=(m, k)) B_np = np.random.randint(low=0, high=6, size=(k, k)) # Create B as a sequential matrix on each process B_seq = create_petsc_matrix_seq(B_np) Print(B_seq.getType()) Print(B_seq.getSizes()) A = create_petsc_matrix(A_np) print("A type:", A.getType()) print("A sizes:", A.getSizes()) print("A local ownership range:", A.getOwnershipRange()) # pdb.set_trace() # Create a local sequential matrix for A using the local submatrix local_rows_start, local_rows_end = A.getOwnershipRange() local_rows = local_rows_end - local_rows_start print("local_rows_start:", local_rows_start) print("local_rows_end:", local_rows_end) print("local_rows:", local_rows) local_A = PETSc.Mat().createAIJ(size=(local_rows, k), comm=PETSc.COMM_SELF) # pdb.set_trace() comm = A.getComm() rows = PETSc.IS().createStride(local_rows, first=0, step=1, comm=comm) cols = PETSc.IS().createStride(k, first=0, step=1, comm=comm) print("rows indices:", rows.getIndices()) print("cols indices:", cols.getIndices()) # pdb.set_trace() # Create the local to global mapping for rows and columns l2g_rows = PETSc.LGMap().create(rows.getIndices(), comm=comm) l2g_cols = PETSc.LGMap().create(cols.getIndices(), comm=comm) print("l2g_rows type:", type(l2g_rows)) print("l2g_rows:", l2g_rows.view()) print("l2g_rows type:", type(l2g_cols)) print("l2g_cols:", l2g_cols.view()) # pdb.set_trace() # Set the local-to-global mapping for the matrix A.setLGMap(l2g_rows, l2g_cols) # pdb.set_trace() # Now you can get the local submatrix local_A = A.getLocalSubMatrix(rows, cols) # Assembly the matrix to compute the final structure local_A.assemblyBegin() local_A.assemblyEnd() Print(local_A.getType()) Print(local_A.getSizes()) # pdb.set_trace() # Multiply the two matrices local_C = local_A.matMult(B_seq) > On 23 Aug 2023, at 12:56, Mark Adams <mfad...@lbl.gov> wrote: > > > > On Wed, Aug 23, 2023 at 5:36 AM Thanasis Boutsikakis > <thanasis.boutsika...@corintis.com > <mailto:thanasis.boutsika...@corintis.com>> wrote: >> Thanks for the suggestion Pierre. >> >> Yes B is duplicated by all processes. >> >> In this case, should B be created as a sequential sparse matrix using >> COMM_SELF? > > Yes, that is what Pierre said, > > Mark > >> I guess if not, the multiplication of B with the output of >> https://petsc.org/main/manualpages/Mat/MatMPIAIJGetLocalMat/ would not go >> through, right? >> >> Thanks, >> Thanos >> >>> On 23 Aug 2023, at 10:47, Pierre Jolivet <pierre.joli...@lip6.fr >>> <mailto:pierre.joli...@lip6.fr>> wrote: >>> >>> >>> >>>> On 23 Aug 2023, at 5:35 PM, Thanasis Boutsikakis >>>> <thanasis.boutsika...@corintis.com >>>> <mailto:thanasis.boutsika...@corintis.com>> wrote: >>>> >>>> Hi all, >>>> >>>> I am trying to multiply two Petsc matrices as C = A * B, where A is a tall >>>> matrix and B is a relatively small matrix. >>>> >>>> I have taken the decision to create A as (row-)partitioned matrix and B as >>>> a non-partitioned matrix that it is entirely shared by all procs (to avoid >>>> unnecessary communication). >>>> >>>> Here is my code: >>>> >>>> import numpy as np >>>> from firedrake import COMM_WORLD >>>> from firedrake.petsc import PETSc >>>> from numpy.testing import assert_array_almost_equal >>>> >>>> nproc = COMM_WORLD.size >>>> rank = COMM_WORLD.rank >>>> >>>> def create_petsc_matrix_non_partitioned(input_array): >>>> """Building a mpi non-partitioned petsc matrix from an array >>>> >>>> Args: >>>> input_array (np array): Input array >>>> sparse (bool, optional): Toggle for sparese or dense. Defaults to >>>> True. >>>> >>>> Returns: >>>> mpi mat: PETSc matrix >>>> """ >>>> assert len(input_array.shape) == 2 >>>> >>>> m, n = input_array.shape >>>> >>>> matrix = PETSc.Mat().createAIJ(size=((m, n), (m, n)), comm=COMM_WORLD) >>>> >>>> # Set the values of the matrix >>>> matrix.setValues(range(m), range(n), input_array[:, :], addv=False) >>>> >>>> # Assembly the matrix to compute the final structure >>>> matrix.assemblyBegin() >>>> matrix.assemblyEnd() >>>> >>>> return matrix >>>> >>>> >>>> def create_petsc_matrix(input_array, partition_like=None): >>>> """Create a PETSc matrix from an input_array >>>> >>>> Args: >>>> input_array (np array): Input array >>>> partition_like (petsc mat, optional): Petsc matrix. Defaults to >>>> None. >>>> sparse (bool, optional): Toggle for sparese or dense. Defaults to >>>> True. >>>> >>>> Returns: >>>> petsc mat: PETSc matrix >>>> """ >>>> # Check if input_array is 1D and reshape if necessary >>>> assert len(input_array.shape) == 2, "Input array should be >>>> 2-dimensional" >>>> global_rows, global_cols = input_array.shape >>>> >>>> comm = COMM_WORLD >>>> if partition_like is not None: >>>> local_rows_start, local_rows_end = >>>> partition_like.getOwnershipRange() >>>> local_rows = local_rows_end - local_rows_start >>>> >>>> # No parallelization in the columns, set local_cols = None to >>>> parallelize >>>> size = ((local_rows, global_rows), (global_cols, global_cols)) >>>> else: >>>> size = ((None, global_rows), (global_cols, global_cols)) >>>> >>>> matrix = PETSc.Mat().createAIJ(size=size, comm=comm) >>>> matrix.setUp() >>>> >>>> local_rows_start, local_rows_end = matrix.getOwnershipRange() >>>> >>>> for counter, i in enumerate(range(local_rows_start, local_rows_end)): >>>> # Calculate the correct row in the array for the current process >>>> row_in_array = counter + local_rows_start >>>> matrix.setValues( >>>> i, range(global_cols), input_array[row_in_array, :], addv=False >>>> ) >>>> >>>> # Assembly the matrix to compute the final structure >>>> matrix.assemblyBegin() >>>> matrix.assemblyEnd() >>>> >>>> return matrix >>>> >>>> >>>> m, k = 10, 3 >>>> # Generate the random numpy matrices >>>> np.random.seed(0) # sets the seed to 0 >>>> A_np = np.random.randint(low=0, high=6, size=(m, k)) >>>> B_np = np.random.randint(low=0, high=6, size=(k, k)) >>>> >>>> >>>> A = create_petsc_matrix(A_np) >>>> >>>> B = create_petsc_matrix_non_partitioned(B_np) >>>> >>>> # Now perform the multiplication >>>> C = A * B >>>> >>>> The problem with this is that there is a mismatch between the local rows >>>> of A (depend on the partitioning) and the global rows of B (3 for all >>>> procs), so that the multiplication cannot happen in parallel. Here is the >>>> error: >>>> >>>> [0]PETSC ERROR: >>>> ------------------------------------------------------------------------ >>>> [0]PETSC ERROR: Caught signal number 11 SEGV: Segmentation Violation, >>>> probably memory access out of range >>>> [0]PETSC ERROR: Try option -start_in_debugger or -on_error_attach_debugger >>>> [0]PETSC ERROR: or see https://petsc.org/release/faq/#valgrind and >>>> https://petsc.org/release/faq/ >>>> [0]PETSC ERROR: configure using --with-debugging=yes, recompile, link, and >>>> run >>>> [0]PETSC ERROR: to get more information on the crash. >>>> [0]PETSC ERROR: Run with -malloc_debug to check if memory corruption is >>>> causing the crash. >>>> application called MPI_Abort(MPI_COMM_WORLD, 59) - process 0 >>>> [unset]: write_line error; fd=-1 buf=:cmd=abort exitcode=59 >>>> : >>>> system msg for write_line failure : Bad file descriptor >>>> >>>> >>>> Is there a standard way to achieve this? >>> >>> Your B is duplicated by all processes? >>> If so, then, call >>> https://petsc.org/main/manualpages/Mat/MatMPIAIJGetLocalMat/, do a >>> sequential product with B on COMM_SELF, not COMM_WORLD, and use >>> https://petsc.org/main/manualpages/Mat/MatCreateMPIMatConcatenateSeqMat/ >>> with the output. >>> >>> Thanks, >>> Pierre >>> >>>> Thanks, >>>> Thanos >>>> >>>> >>