Was PETSc built with debugging turned off; so ./configure --with-debugging=0 ?

  Can you run with the equivalent of -log_view to get information about the 
time spent in the various operations and send that information. The data 
generated is the best starting point for determining where the code is spending 
the time.

   Thanks

   Barry


> On Aug 9, 2023, at 9:40 AM, Niclas Götting <ngoett...@itp.uni-bremen.de> 
> wrote:
> 
> Hi all,
> 
> I'm currently trying to convert a quantum simulation from scipy to PETSc. The 
> problem itself is extremely simple and of the form \dot{u}(t) = (A_const + 
> f(t)*B_const)*u(t), where f(t) in this simple test case is a square function. 
> The matrices A_const and B_const are extremely sparse and therefore I 
> thought, the problem will be well suited for PETSc. Currently, I solve the 
> ODE with the following procedure in scipy (I can provide the necessary data 
> files, if needed, but they are just some trace-preserving, very sparse 
> matrices):
> 
> import numpy as np
> import scipy.sparse
> import scipy.integrate
> 
> from tqdm import tqdm
> 
> 
> l = np.load("../liouvillian.npy")
> pump = np.load("../pump_operator.npy")
> state = np.load("../initial_state.npy")
> 
> l = scipy.sparse.csr_array(l)
> pump = scipy.sparse.csr_array(pump)
> 
> def f(t, y, *args):
>     return (l + 0.5 * (5 < t < 10) * pump) @ y
>     #return l @ y # Uncomment for f(t) = 0
> 
> dt = 0.1
> NUM_STEPS = 200
> res = np.empty((NUM_STEPS, 4096), dtype=np.complex128)
> solver = 
> scipy.integrate.ode(f).set_integrator("zvode").set_initial_value(state)
> times = []
> for i in tqdm(range(NUM_STEPS)):
>     res[i, :] = solver.integrate(solver.t + dt)
>     times.append(solver.t)
> 
> Here, A_const = l, B_const = pump and f(t) = 5 < t < 10. tqdm reports about 
> 330it/s on my machine. When converting the code to PETSc, I came to the 
> following result (according to the chapter 
> https://petsc.org/main/manual/ts/#special-cases)
> 
> import sys
> import petsc4py
> petsc4py.init(args=sys.argv)
> import numpy as np
> import scipy.sparse
> 
> from tqdm import tqdm
> from petsc4py import PETSc
> 
> comm = PETSc.COMM_WORLD
> 
> 
> def mat_to_real(arr):
>     return np.block([[arr.real, -arr.imag], [arr.imag, 
> arr.real]]).astype(np.float64)
> 
> def mat_to_petsc_aij(arr):
>     arr_sc_sp = scipy.sparse.csr_array(arr)
>     mat = PETSc.Mat().createAIJ(arr.shape[0], comm=comm)
>     rstart, rend = mat.getOwnershipRange()
>     print(rstart, rend)
>     print(arr.shape[0])
>     print(mat.sizes)
>     I = arr_sc_sp.indptr[rstart : rend + 1] - arr_sc_sp.indptr[rstart]
>     J = arr_sc_sp.indices[arr_sc_sp.indptr[rstart] : arr_sc_sp.indptr[rend]]
>     V = arr_sc_sp.data[arr_sc_sp.indptr[rstart] : arr_sc_sp.indptr[rend]]
> 
>     print(I.shape, J.shape, V.shape)
>     mat.setValuesCSR(I, J, V)
>     mat.assemble()
>     return mat
> 
> 
> l = np.load("../liouvillian.npy")
> l = mat_to_real(l)
> pump = np.load("../pump_operator.npy")
> pump = mat_to_real(pump)
> state = np.load("../initial_state.npy")
> state = np.hstack([state.real, state.imag]).astype(np.float64)
> 
> l = mat_to_petsc_aij(l)
> pump = mat_to_petsc_aij(pump)
> 
> 
> jac = l.duplicate()
> for i in range(8192):
>     jac.setValue(i, i, 0)
> jac.assemble()
> jac += l
> 
> vec = l.createVecRight()
> vec.setValues(np.arange(state.shape[0], dtype=np.int32), state)
> vec.assemble()
> 
> 
> dt = 0.1
> 
> ts = PETSc.TS().create(comm=comm)
> ts.setFromOptions()
> ts.setProblemType(ts.ProblemType.LINEAR)
> ts.setEquationType(ts.EquationType.ODE_EXPLICIT)
> ts.setType(ts.Type.RK)
> ts.setRKType(ts.RKType.RK3BS)
> ts.setTime(0)
> print("KSP:", ts.getKSP().getType())
> print("KSP PC:",ts.getKSP().getPC().getType())
> print("SNES :", ts.getSNES().getType())
> 
> def jacobian(ts, t, u, Amat, Pmat):
>     Amat.zeroEntries()
>     Amat.aypx(1, l, structure=PETSc.Mat.Structure.SUBSET_NONZERO_PATTERN)
>     Amat.axpy(0.5 * (5 < t < 10), pump, 
> structure=PETSc.Mat.Structure.SUBSET_NONZERO_PATTERN)
> 
> ts.setRHSFunction(PETSc.TS.computeRHSFunctionLinear)
> #ts.setRHSJacobian(PETSc.TS.computeRHSJacobianConstant, l, l) # Uncomment for 
> f(t) = 0
> ts.setRHSJacobian(jacobian, jac)
> 
> NUM_STEPS = 200
> res = np.empty((NUM_STEPS, 8192), dtype=np.float64)
> times = []
> rstart, rend = vec.getOwnershipRange()
> for i in tqdm(range(NUM_STEPS)):
>     time = ts.getTime()
>     ts.setMaxTime(time + dt)
>     ts.solve(vec)
>     res[i, rstart:rend] = vec.getArray()[:]
>     times.append(time)
> 
> I decomposed the complex ODE into a larger real ODE, so that I can easily 
> switch maybe to GPU computation later on. Now, the solutions of both scripts 
> are very much identical, but PETSc runs about 3 times slower at 120it/s on my 
> machine. I don't use MPI for PETSc yet.
> 
> I strongly suppose that the problem lies within the jacobian definition, as 
> PETSc is about 3 times *faster* than scipy with f(t) = 0 and therefore a 
> constant jacobian.
> 
> Thank you in advance.
> 
> All the best,
> Niclas
> 
> 

Reply via email to