Hei,

For a project I wanted to combine boost::odeint for timestepping and
PETSc-based vectors and matrices for calculating the right hand side. As
comparison for both timing and correctness I set up an armadillo-based
right hand side (with the main-function being in *main.cpp*, and the
test code in *test_timestepping_clean.cpp*)

In theory, the code works fine, but I have some issues with cleaning up
afterwards in my struct /Petsc_RHS_state_clean/. My initial intention
was to set up all involved matrices and vectors within the constructor,
and free the memory in the destructor. To avoid freeing vectors I have
not used I initially set them to /PETSC_NULL/, and check if this value
has been changed before calling /VecDestroy()./ However, when doing that
I get the following error:

[0]PETSC ERROR:
------------------------------------------------------------------------
[0]PETSC ERROR: Caught signal number 11 SEGV: Segmentation Violation,
probably memory access out of range
[0]PETSC ERROR: Try option -start_in_debugger or -on_error_attach_debugger
[0]PETSC ERROR: or see https://petsc.org/release/faq/#valgrind
[0]PETSC ERROR: or try http://valgrind.org on GNU/linux and Apple Mac OS
X to find memory corruption errors
[0]PETSC ERROR: configure using --with-debugging=yes, recompile, link,
and run  
[0]PETSC ERROR: to get more information on the crash.
[0]PETSC ERROR: --------------------- Error Message
--------------------------------------------------------------

If I comment out that code in ~Petsc_RHS_state_clean(), the program
runs, but will use ~17 GByte of RAM during runtime. As the memory is not
used immediately in full, but rather increases during running, I assume
a memory leak somewhere. Where does it come from, and how can I avoid it?

Thanks!

Regards,

Roland Richter
#include <complex>
//#include <helper_functions.hpp>
#include <iostream>
#include <test_timestepping_clean.hpp>
#include <vector>

int main(int argc, char **args) {
	PetscMPIInt rank, size;
	PetscInitialize(&argc, &args, (char*) 0, NULL);

	std::cout << __cplusplus << '\n';

	MPI_Comm_size(PETSC_COMM_WORLD, &size);
	MPI_Comm_rank(PETSC_COMM_WORLD, &rank);

	//test_scaling_with_MPI (4, 8);
	//test_span_copy (5, 5);
	test_RK4_solvers_clean (16, 64, 1, false);
	//test_scaling (3, 1000, true);
	//	test_scaling (1024, 1024, false);
	//test_scaling (1024, 8192, false);
	PetscFinalize();
	return 0;
}
#include "mpi.h"
#include <complex>
#include <test_timestepping_clean.hpp>
#include <vector>

using namespace boost::numeric::odeint;
typedef std::complex<double> data_type;
typedef std::vector<data_type> value_type;
typedef value_type state_type;
typedef mpi_state<value_type> mpi_state_type;

struct Petsc_RHS_state_clean {
	Petsc_RHS_state_clean(MPI_Comm &communicator, const int global_size) : communicator(communicator) {
		VecCreate(communicator, &local_vec);
		VecSetSizes(local_vec, PETSC_DECIDE, global_size);
		VecSetFromOptions(local_vec);
		VecZeroEntries(local_vec);
		VecAssemblyBegin(local_vec);
		VecAssemblyEnd(local_vec);

		VecCreate(communicator, &scaling_vec);
		VecSetSizes(scaling_vec, PETSC_DECIDE, global_size);
		VecSetFromOptions(scaling_vec);
		PetscScalar set_val = 1;
		for(int i = 0; i < global_size; ++i)
			VecSetValues(scaling_vec, 1, &i, &set_val, INSERT_VALUES);
		VecAssemblyBegin(scaling_vec);
		VecAssemblyEnd(scaling_vec);

		MatCreateDense(communicator, PETSC_DECIDE, PETSC_DECIDE, global_size, global_size, NULL, &diagonal_mat);
		MatSetFromOptions(diagonal_mat);
		MatZeroEntries(diagonal_mat);
		MatAssemblyBegin(diagonal_mat, MAT_FINAL_ASSEMBLY);
		MatAssemblyEnd(diagonal_mat, MAT_FINAL_ASSEMBLY);
	}

	~Petsc_RHS_state_clean(){
				 if(local_vec != PETSC_NULL) {
					PetscInt vecSize = 0;
					PetscErrorCode ierr;
					std::cerr << local_vec << '\n';
					ierr = VecGetSize(local_vec, &vecSize);
					std::cerr << vecSize << '\t' << ierr << '\n';
					ierr = VecDestroy(&local_vec);
					std::cerr << vecSize << '\t' << ierr << '\n';
					local_vec = PETSC_NULL;
				 }
				 if(scaling_vec != PETSC_NULL)
					VecDestroy(&scaling_vec);
				 if(diagonal_mat != PETSC_NULL)
					MatDestroy(&diagonal_mat);
	};

	void operator()(const state_type &x, state_type &dxdt, double t) const {
		//std::cout << t << '\n';
		const size_t M = x.size();
		PetscScalar *local_ptr;
		VecGetArray(local_vec, &local_ptr);
		for(size_t m = 0; m < M; ++m) {
			*(local_ptr + m) = x[m];
		}
		VecRestoreArray(local_vec, &local_ptr);
		VecPointwiseMult(local_vec, scaling_vec, local_vec);
		MatDiagonalScale(diagonal_mat, NULL, local_vec);
		VecGetArray(local_vec, &local_ptr);
		for(size_t m = 0; m < M; ++m) {
			*(local_ptr + m) = calculation_function(*(local_ptr + m), t);
		}
		for(size_t m = 0; m < M; ++m) {
			dxdt[m] = *(local_ptr + m);
		}
		VecRestoreArray(local_vec, &local_ptr);
	}

	template <typename T>
	T calculation_function (const T &x, const double t) const {
		return 3. / (2 * t * t) + x / (2 * t);
	}

	MPI_Comm communicator;
	Vec local_vec = PETSC_NULL;
	Vec scaling_vec = PETSC_NULL;
	Mat diagonal_mat = PETSC_NULL;
};

struct RHS_state_clean {
	RHS_state_clean(){}

	void operator()(const state_type &x, state_type &dxdt, double t) const {
		const size_t M = x.size();
		for(size_t m = 0 ; m < M ; ++m)
		{
			dxdt[m] = calculation_function(x[m], t);
		}
	}

	data_type calculation_function (const data_type &x, const double t) const {
		return 3. / (2 * t * t) + x / (2 * t);
	}
};

void test_ts_arma_clean (const size_t /*matrix_size_rows*/, const size_t /*matrix_size_cols*/,
						 const arma::cx_colvec &initial_condition_vec,
						 arma::cx_colvec &solution_vec,
						 const double dt,
						 const double t_min, const double t_max) {
	runge_kutta4<state_type> stepper;
	state_type local_solution = arma::conv_to<state_type>::from(initial_condition_vec);
	integrate_adaptive(bulirsch_stoer<state_type>(1e-8,																																										 1e-6),
					   RHS_state_clean(),
					   local_solution,
					   t_min,
					   t_max,
					   dt);
	solution_vec = arma::conv_to<arma::cx_colvec>::from(local_solution);
}

void test_ts_arma_with_pure_petsc_preconfigured_clean (const size_t matrix_size_rows, const size_t matrix_size_cols,
													   const arma::cx_colvec &initial_condition_vec,
													   arma::cx_colvec &solution_vec,
													   const double dt,
													   const double t_min, const double t_max) {
	PetscMPIInt rank, size;

	MPI_Comm_size(PETSC_COMM_WORLD, &size);
	MPI_Comm_rank(PETSC_COMM_WORLD, &rank);

	boost::mpi::communicator boost_comm(PETSC_COMM_WORLD, boost::mpi::comm_attach);

	state_type x_initial = arma::conv_to<value_type>::from(initial_condition_vec);

	Mat input_mat;
	MatCreateDense(PETSC_COMM_WORLD, PETSC_DECIDE, PETSC_DECIDE, matrix_size_rows, matrix_size_cols, NULL, &input_mat);
	MatZeroEntries(input_mat);
	MatAssemblyBegin(input_mat, MAT_FINAL_ASSEMBLY);
	MatAssemblyEnd(input_mat, MAT_FINAL_ASSEMBLY);

	PetscInt local_cols, global_cols, local_rows, global_rows;
	PetscInt first_row, last_row;
	MatGetLocalSize(input_mat, &local_rows, &local_cols);
	MatGetSize(input_mat, &global_rows, &global_cols);
	MatGetOwnershipRange(input_mat, &first_row, &last_row);

	state_type x_split((last_row - first_row) * global_cols);

	const PetscScalar *read_ptr;
	MatDenseGetArrayRead (input_mat, &read_ptr);
	for(int j = 0; j < global_cols; ++j) {
		for(int i = 0; i < last_row - first_row; ++i) {
			const int cur_pos = i + j * (last_row - first_row);
			x_split[cur_pos] = *(read_ptr + cur_pos);
		}
	}
	MatDenseRestoreArrayRead (input_mat, &read_ptr);

	runge_kutta4<state_type> stepper;

	Petsc_RHS_state_clean local_calculator(PETSC_COMM_WORLD, matrix_size_cols * matrix_size_rows);

	integrate_adaptive(bulirsch_stoer<state_type>(1e-8,																																										 1e-6),
					   local_calculator,
					   x_split,
					   t_min,
					   t_max,
					   dt);

	PetscScalar *write_ptr;
	MatDenseGetArrayWrite (input_mat, &write_ptr);
	for(int j = 0; j < global_cols; ++j) {
		for(int i = 0; i < last_row - first_row; ++i) {
			const int cur_pos = i + j * (last_row - first_row);
			*(write_ptr + cur_pos) = x_split[cur_pos];
		}
	}
	MatDenseRestoreArrayWrite (input_mat, &write_ptr);

	Mat local_mat;
	MatCreateRedundantMatrix(input_mat, size, MPI_COMM_NULL, MAT_INITIAL_MATRIX, &local_mat);
	MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY);
	MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY);

	const PetscScalar *local_mat_read_ptr;
	MatDenseGetArrayRead(local_mat, &local_mat_read_ptr);
	for(int i = 0; i < (int)matrix_size_cols; ++i) {
		for (int j = 0; j < (int)matrix_size_rows; ++j) {
			const PetscInt cur_pos = j + i * matrix_size_rows;
			x_initial[cur_pos] = *(local_mat_read_ptr + cur_pos);
		}
	}
	MatDenseRestoreArrayRead(local_mat, &local_mat_read_ptr);
	MatDestroy(&local_mat);

	MPI_Bcast(x_initial.data(), 2 * x_initial.size(), MPI_DOUBLE, 0, PETSC_COMM_WORLD);

	solution_vec = arma::conv_to<arma::cx_colvec>::from(x_initial);
	MatDestroy(&input_mat);
}


void test_RK4_solvers_clean(const size_t matrix_size_rows, const size_t matrix_size_cols,
							const size_t num_steps,
							const bool print_matrices) {
	arma::cx_mat initial_condition_mat = arma::zeros<arma::cx_mat>(matrix_size_rows, matrix_size_cols);
	arma::cx_colvec initial_condition_petsc = arma::vectorise(initial_condition_mat),
			initial_condition_arma = arma::vectorise(initial_condition_mat);
	arma::cx_colvec solution_petsc = arma::vectorise(initial_condition_mat),
			solution_arma = arma::vectorise(initial_condition_mat);
	const double t_0 = 1, t_1 = 1.001;
	const double dt = (t_1 - t_0) / num_steps;

	test_ts_arma_with_pure_petsc_preconfigured_clean (matrix_size_rows, matrix_size_cols,
													  initial_condition_petsc, solution_petsc,
													  dt, t_0, t_1);

	test_ts_arma_clean (matrix_size_rows, matrix_size_cols,
						initial_condition_arma, solution_arma,
						dt, t_0, t_1);

	arma::cx_mat solution_mat_petsc = arma::reshape(solution_petsc, matrix_size_rows, matrix_size_cols),
			solution_mat_arma = arma::reshape(solution_arma, matrix_size_rows, matrix_size_cols);

	arma::cx_mat test_solution = arma::conv_to<arma::cx_mat>::from(arma::ones<arma::mat>(matrix_size_rows, matrix_size_cols) * sqrt(t_1) - 1 / (t_1));
	if(print_matrices) {
		std::cout << "Solving done, result from PETSc is\n" << solution_mat_petsc << "\n";
		std::cout << "Solving done, result from Arma is\n" << solution_mat_arma << "\n";
		std::cout << "Expected solution is \n" << test_solution << '\n';
		std::cout << "Difference between PETSc and Solution:\n" << solution_mat_petsc - test_solution << '\n';
	}
	std::cout << "Arma and solution are equal: " << arma::approx_equal(solution_mat_arma, test_solution, "absdiff", 5e-3) << '\n';
	std::cout << "PETSc and solution are equal: " << arma::approx_equal(solution_mat_petsc, test_solution, "absdiff", 5e-3) << '\n';

#define BENCHMARK
#ifdef BENCHMARK
	//Warmup
	test_ts_arma_with_pure_petsc_preconfigured_clean (matrix_size_rows, matrix_size_cols,
													  initial_condition_petsc, solution_petsc,
													  dt, t_0, t_1);

	std::cout << "Warmup complete\n";
	//Benchmarking
	auto t1 = std::chrono::high_resolution_clock::now();
	for(size_t i = 0; i < bench_rounds; ++i) {
		test_ts_arma_clean (matrix_size_rows, matrix_size_cols,
							initial_condition_arma, solution_arma,
							dt, t_0, t_1);
	}
	auto t2 = std::chrono::high_resolution_clock::now();

	auto t3 = std::chrono::high_resolution_clock::now();
	for(size_t i = 0; i < bench_rounds; ++i) {
		test_ts_arma_with_pure_petsc_preconfigured_clean (matrix_size_rows, matrix_size_cols,
														  initial_condition_petsc, solution_petsc,
														  dt, t_0, t_1);
	}
	auto t4 = std::chrono::high_resolution_clock::now();

	std::cout << "Arma-time:\t" << std::chrono::duration_cast<std::chrono::milliseconds>( t2 - t1 ).count() << '\n';
	std::cout << "PETSc-time:\t" << std::chrono::duration_cast<std::chrono::milliseconds>( t4 - t3 ).count() << '\n';
#endif
}
#ifndef TEST_TIMESTEPPING_CLEAN_HPP
#define TEST_TIMESTEPPING_CLEAN_HPP

#include "helper_functions.hpp"

#include <boost/serialization/utility.hpp>
#include <boost/serialization/vector.hpp>
#include <boost/serialization/complex.hpp>
#include <boost/numeric/odeint/external/mpi/mpi.hpp>

void test_RK4_solvers_clean(const size_t matrix_size_rows, const size_t matrix_size_cols,
					  const size_t num_steps,
					  const bool print_matrices);

#endif // TEST_TIMESTEPPING_CLEAN_HPP

Reply via email to