Hei,
For a project I wanted to combine boost::odeint for timestepping and
PETSc-based vectors and matrices for calculating the right hand side. As
comparison for both timing and correctness I set up an armadillo-based
right hand side (with the main-function being in *main.cpp*, and the
test code in *test_timestepping_clean.cpp*)
In theory, the code works fine, but I have some issues with cleaning up
afterwards in my struct /Petsc_RHS_state_clean/. My initial intention
was to set up all involved matrices and vectors within the constructor,
and free the memory in the destructor. To avoid freeing vectors I have
not used I initially set them to /PETSC_NULL/, and check if this value
has been changed before calling /VecDestroy()./ However, when doing that
I get the following error:
[0]PETSC ERROR:
------------------------------------------------------------------------
[0]PETSC ERROR: Caught signal number 11 SEGV: Segmentation Violation,
probably memory access out of range
[0]PETSC ERROR: Try option -start_in_debugger or -on_error_attach_debugger
[0]PETSC ERROR: or see https://petsc.org/release/faq/#valgrind
[0]PETSC ERROR: or try http://valgrind.org on GNU/linux and Apple Mac OS
X to find memory corruption errors
[0]PETSC ERROR: configure using --with-debugging=yes, recompile, link,
and run
[0]PETSC ERROR: to get more information on the crash.
[0]PETSC ERROR: --------------------- Error Message
--------------------------------------------------------------
If I comment out that code in ~Petsc_RHS_state_clean(), the program
runs, but will use ~17 GByte of RAM during runtime. As the memory is not
used immediately in full, but rather increases during running, I assume
a memory leak somewhere. Where does it come from, and how can I avoid it?
Thanks!
Regards,
Roland Richter
#include <complex>
//#include <helper_functions.hpp>
#include <iostream>
#include <test_timestepping_clean.hpp>
#include <vector>
int main(int argc, char **args) {
PetscMPIInt rank, size;
PetscInitialize(&argc, &args, (char*) 0, NULL);
std::cout << __cplusplus << '\n';
MPI_Comm_size(PETSC_COMM_WORLD, &size);
MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
//test_scaling_with_MPI (4, 8);
//test_span_copy (5, 5);
test_RK4_solvers_clean (16, 64, 1, false);
//test_scaling (3, 1000, true);
// test_scaling (1024, 1024, false);
//test_scaling (1024, 8192, false);
PetscFinalize();
return 0;
}
#include "mpi.h"
#include <complex>
#include <test_timestepping_clean.hpp>
#include <vector>
using namespace boost::numeric::odeint;
typedef std::complex<double> data_type;
typedef std::vector<data_type> value_type;
typedef value_type state_type;
typedef mpi_state<value_type> mpi_state_type;
struct Petsc_RHS_state_clean {
Petsc_RHS_state_clean(MPI_Comm &communicator, const int global_size) : communicator(communicator) {
VecCreate(communicator, &local_vec);
VecSetSizes(local_vec, PETSC_DECIDE, global_size);
VecSetFromOptions(local_vec);
VecZeroEntries(local_vec);
VecAssemblyBegin(local_vec);
VecAssemblyEnd(local_vec);
VecCreate(communicator, &scaling_vec);
VecSetSizes(scaling_vec, PETSC_DECIDE, global_size);
VecSetFromOptions(scaling_vec);
PetscScalar set_val = 1;
for(int i = 0; i < global_size; ++i)
VecSetValues(scaling_vec, 1, &i, &set_val, INSERT_VALUES);
VecAssemblyBegin(scaling_vec);
VecAssemblyEnd(scaling_vec);
MatCreateDense(communicator, PETSC_DECIDE, PETSC_DECIDE, global_size, global_size, NULL, &diagonal_mat);
MatSetFromOptions(diagonal_mat);
MatZeroEntries(diagonal_mat);
MatAssemblyBegin(diagonal_mat, MAT_FINAL_ASSEMBLY);
MatAssemblyEnd(diagonal_mat, MAT_FINAL_ASSEMBLY);
}
~Petsc_RHS_state_clean(){
if(local_vec != PETSC_NULL) {
PetscInt vecSize = 0;
PetscErrorCode ierr;
std::cerr << local_vec << '\n';
ierr = VecGetSize(local_vec, &vecSize);
std::cerr << vecSize << '\t' << ierr << '\n';
ierr = VecDestroy(&local_vec);
std::cerr << vecSize << '\t' << ierr << '\n';
local_vec = PETSC_NULL;
}
if(scaling_vec != PETSC_NULL)
VecDestroy(&scaling_vec);
if(diagonal_mat != PETSC_NULL)
MatDestroy(&diagonal_mat);
};
void operator()(const state_type &x, state_type &dxdt, double t) const {
//std::cout << t << '\n';
const size_t M = x.size();
PetscScalar *local_ptr;
VecGetArray(local_vec, &local_ptr);
for(size_t m = 0; m < M; ++m) {
*(local_ptr + m) = x[m];
}
VecRestoreArray(local_vec, &local_ptr);
VecPointwiseMult(local_vec, scaling_vec, local_vec);
MatDiagonalScale(diagonal_mat, NULL, local_vec);
VecGetArray(local_vec, &local_ptr);
for(size_t m = 0; m < M; ++m) {
*(local_ptr + m) = calculation_function(*(local_ptr + m), t);
}
for(size_t m = 0; m < M; ++m) {
dxdt[m] = *(local_ptr + m);
}
VecRestoreArray(local_vec, &local_ptr);
}
template <typename T>
T calculation_function (const T &x, const double t) const {
return 3. / (2 * t * t) + x / (2 * t);
}
MPI_Comm communicator;
Vec local_vec = PETSC_NULL;
Vec scaling_vec = PETSC_NULL;
Mat diagonal_mat = PETSC_NULL;
};
struct RHS_state_clean {
RHS_state_clean(){}
void operator()(const state_type &x, state_type &dxdt, double t) const {
const size_t M = x.size();
for(size_t m = 0 ; m < M ; ++m)
{
dxdt[m] = calculation_function(x[m], t);
}
}
data_type calculation_function (const data_type &x, const double t) const {
return 3. / (2 * t * t) + x / (2 * t);
}
};
void test_ts_arma_clean (const size_t /*matrix_size_rows*/, const size_t /*matrix_size_cols*/,
const arma::cx_colvec &initial_condition_vec,
arma::cx_colvec &solution_vec,
const double dt,
const double t_min, const double t_max) {
runge_kutta4<state_type> stepper;
state_type local_solution = arma::conv_to<state_type>::from(initial_condition_vec);
integrate_adaptive(bulirsch_stoer<state_type>(1e-8, 1e-6),
RHS_state_clean(),
local_solution,
t_min,
t_max,
dt);
solution_vec = arma::conv_to<arma::cx_colvec>::from(local_solution);
}
void test_ts_arma_with_pure_petsc_preconfigured_clean (const size_t matrix_size_rows, const size_t matrix_size_cols,
const arma::cx_colvec &initial_condition_vec,
arma::cx_colvec &solution_vec,
const double dt,
const double t_min, const double t_max) {
PetscMPIInt rank, size;
MPI_Comm_size(PETSC_COMM_WORLD, &size);
MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
boost::mpi::communicator boost_comm(PETSC_COMM_WORLD, boost::mpi::comm_attach);
state_type x_initial = arma::conv_to<value_type>::from(initial_condition_vec);
Mat input_mat;
MatCreateDense(PETSC_COMM_WORLD, PETSC_DECIDE, PETSC_DECIDE, matrix_size_rows, matrix_size_cols, NULL, &input_mat);
MatZeroEntries(input_mat);
MatAssemblyBegin(input_mat, MAT_FINAL_ASSEMBLY);
MatAssemblyEnd(input_mat, MAT_FINAL_ASSEMBLY);
PetscInt local_cols, global_cols, local_rows, global_rows;
PetscInt first_row, last_row;
MatGetLocalSize(input_mat, &local_rows, &local_cols);
MatGetSize(input_mat, &global_rows, &global_cols);
MatGetOwnershipRange(input_mat, &first_row, &last_row);
state_type x_split((last_row - first_row) * global_cols);
const PetscScalar *read_ptr;
MatDenseGetArrayRead (input_mat, &read_ptr);
for(int j = 0; j < global_cols; ++j) {
for(int i = 0; i < last_row - first_row; ++i) {
const int cur_pos = i + j * (last_row - first_row);
x_split[cur_pos] = *(read_ptr + cur_pos);
}
}
MatDenseRestoreArrayRead (input_mat, &read_ptr);
runge_kutta4<state_type> stepper;
Petsc_RHS_state_clean local_calculator(PETSC_COMM_WORLD, matrix_size_cols * matrix_size_rows);
integrate_adaptive(bulirsch_stoer<state_type>(1e-8, 1e-6),
local_calculator,
x_split,
t_min,
t_max,
dt);
PetscScalar *write_ptr;
MatDenseGetArrayWrite (input_mat, &write_ptr);
for(int j = 0; j < global_cols; ++j) {
for(int i = 0; i < last_row - first_row; ++i) {
const int cur_pos = i + j * (last_row - first_row);
*(write_ptr + cur_pos) = x_split[cur_pos];
}
}
MatDenseRestoreArrayWrite (input_mat, &write_ptr);
Mat local_mat;
MatCreateRedundantMatrix(input_mat, size, MPI_COMM_NULL, MAT_INITIAL_MATRIX, &local_mat);
MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY);
MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY);
const PetscScalar *local_mat_read_ptr;
MatDenseGetArrayRead(local_mat, &local_mat_read_ptr);
for(int i = 0; i < (int)matrix_size_cols; ++i) {
for (int j = 0; j < (int)matrix_size_rows; ++j) {
const PetscInt cur_pos = j + i * matrix_size_rows;
x_initial[cur_pos] = *(local_mat_read_ptr + cur_pos);
}
}
MatDenseRestoreArrayRead(local_mat, &local_mat_read_ptr);
MatDestroy(&local_mat);
MPI_Bcast(x_initial.data(), 2 * x_initial.size(), MPI_DOUBLE, 0, PETSC_COMM_WORLD);
solution_vec = arma::conv_to<arma::cx_colvec>::from(x_initial);
MatDestroy(&input_mat);
}
void test_RK4_solvers_clean(const size_t matrix_size_rows, const size_t matrix_size_cols,
const size_t num_steps,
const bool print_matrices) {
arma::cx_mat initial_condition_mat = arma::zeros<arma::cx_mat>(matrix_size_rows, matrix_size_cols);
arma::cx_colvec initial_condition_petsc = arma::vectorise(initial_condition_mat),
initial_condition_arma = arma::vectorise(initial_condition_mat);
arma::cx_colvec solution_petsc = arma::vectorise(initial_condition_mat),
solution_arma = arma::vectorise(initial_condition_mat);
const double t_0 = 1, t_1 = 1.001;
const double dt = (t_1 - t_0) / num_steps;
test_ts_arma_with_pure_petsc_preconfigured_clean (matrix_size_rows, matrix_size_cols,
initial_condition_petsc, solution_petsc,
dt, t_0, t_1);
test_ts_arma_clean (matrix_size_rows, matrix_size_cols,
initial_condition_arma, solution_arma,
dt, t_0, t_1);
arma::cx_mat solution_mat_petsc = arma::reshape(solution_petsc, matrix_size_rows, matrix_size_cols),
solution_mat_arma = arma::reshape(solution_arma, matrix_size_rows, matrix_size_cols);
arma::cx_mat test_solution = arma::conv_to<arma::cx_mat>::from(arma::ones<arma::mat>(matrix_size_rows, matrix_size_cols) * sqrt(t_1) - 1 / (t_1));
if(print_matrices) {
std::cout << "Solving done, result from PETSc is\n" << solution_mat_petsc << "\n";
std::cout << "Solving done, result from Arma is\n" << solution_mat_arma << "\n";
std::cout << "Expected solution is \n" << test_solution << '\n';
std::cout << "Difference between PETSc and Solution:\n" << solution_mat_petsc - test_solution << '\n';
}
std::cout << "Arma and solution are equal: " << arma::approx_equal(solution_mat_arma, test_solution, "absdiff", 5e-3) << '\n';
std::cout << "PETSc and solution are equal: " << arma::approx_equal(solution_mat_petsc, test_solution, "absdiff", 5e-3) << '\n';
#define BENCHMARK
#ifdef BENCHMARK
//Warmup
test_ts_arma_with_pure_petsc_preconfigured_clean (matrix_size_rows, matrix_size_cols,
initial_condition_petsc, solution_petsc,
dt, t_0, t_1);
std::cout << "Warmup complete\n";
//Benchmarking
auto t1 = std::chrono::high_resolution_clock::now();
for(size_t i = 0; i < bench_rounds; ++i) {
test_ts_arma_clean (matrix_size_rows, matrix_size_cols,
initial_condition_arma, solution_arma,
dt, t_0, t_1);
}
auto t2 = std::chrono::high_resolution_clock::now();
auto t3 = std::chrono::high_resolution_clock::now();
for(size_t i = 0; i < bench_rounds; ++i) {
test_ts_arma_with_pure_petsc_preconfigured_clean (matrix_size_rows, matrix_size_cols,
initial_condition_petsc, solution_petsc,
dt, t_0, t_1);
}
auto t4 = std::chrono::high_resolution_clock::now();
std::cout << "Arma-time:\t" << std::chrono::duration_cast<std::chrono::milliseconds>( t2 - t1 ).count() << '\n';
std::cout << "PETSc-time:\t" << std::chrono::duration_cast<std::chrono::milliseconds>( t4 - t3 ).count() << '\n';
#endif
}
#ifndef TEST_TIMESTEPPING_CLEAN_HPP
#define TEST_TIMESTEPPING_CLEAN_HPP
#include "helper_functions.hpp"
#include <boost/serialization/utility.hpp>
#include <boost/serialization/vector.hpp>
#include <boost/serialization/complex.hpp>
#include <boost/numeric/odeint/external/mpi/mpi.hpp>
void test_RK4_solvers_clean(const size_t matrix_size_rows, const size_t matrix_size_cols,
const size_t num_steps,
const bool print_matrices);
#endif // TEST_TIMESTEPPING_CLEAN_HPP