For approximating the inverse mass matrix I followed step-48 with some 
modifications:
In the reinit-subroutine I added 
        data.initialize_dof_vector(inv_mass_matrix);
        FEEvaluation<dim, degree, n_points_1d, 1, Number> fe_eval(data);
        const unsigned int           n_q_points = fe_eval.n_q_points;
        for (unsigned int cell = 0; cell < data.n_cell_batches(); ++cell)
        {
            fe_eval.reinit(cell);
            for (unsigned int q = 0; q < n_q_points; ++q)
                fe_eval.submit_value(make_vectorized_array(1.), q);
            fe_eval.integrate(true, false);
            fe_eval.distribute_local_to_global(inv_mass_matrix);
        }
        inv_mass_matrix.compress(VectorOperation::add);
        for (unsigned int k = 0; k < inv_mass_matrix.local_size(); ++k)
            if (inv_mass_matrix.local_element(k) > 1e-15)
                inv_mass_matrix.local_element(k) =
                        1. / inv_mass_matrix.local_element(k);
            else
                inv_mass_matrix.local_element(k) = 1;

and for applying the matrix I wrote
    template <int dim, int degree, int n_points_1d>
    void LaplaceOperator<dim, degree, 
n_points_1d>::local_apply_inverse_mass_matrix(
            const MatrixFree<dim, Number> &                   data,
            LinearAlgebra::distributed::Vector<Number> &      dst,
            const LinearAlgebra::distributed::Vector<Number> &src,
            const std::pair<unsigned int, unsigned int> &     cell_range) 
const
    {
        dst.reinit(src);
        dst = src;
        dst.scale(inv_mass_matrix);
    }
so that I could reuse the cell_loops from the original code. Still, the 
results indicate that something goes wrong. Should I handle the inverse 
matrix somehow different?
Thanks!
Am Dienstag, 14. Januar 2020 12:43:40 UTC+1 schrieb Martin Kronbichler:
>
> Dear Maxi,
>
> It looks like you are using a scalar finite element (FE_Q<dim>), but you 
> set FEEvaluation to operate on a vectorial field, i.e., 
>
>         FEEvaluation<dim, degree, n_points_1d, dim, Number> phi(data);
>
> Notice the 4th template parameter "dim", which should be one. I agree it 
> is unfortunate that we do not provide a better error message, so I opened 
> an issue for it, https://github.com/dealii/dealii/issues/9312
>
> If you switch the parameter to 1, it should work. However, I want to point 
> out that you use a continuous element with CellwiseInverseMassMatrix - this 
> will not give you a valid matrix inverse. You need to either use a diagonal 
> mass matrix (see step-48) or use the cellwise inverse as a preconditioner 
> (when combined with suitable scaling) and solve the mass matrix iteratively.
>
> Best,
> Martin
> On 14.01.20 10:49, 'Maxi Miller' via deal.II User Group wrote:
>
> I could narrow it down to the function
>   
>  // same as above for has_partitioners_are_compatible == true
>   template <
>     typename VectorType,
>     typename std::enable_if<has_partitioners_are_compatible<VectorType>::
> value,
>                             VectorType>::type * = nullptr>
>   inline void
>   check_vector_compatibility(
>     const VectorType &                            vec,
>     const internal::MatrixFreeFunctions::DoFInfo &dof_info)
>   {
>     (void)vec;
>     (void)dof_info;
>     Assert(vec.partitioners_are_compatible(*dof_info.vector_partitioner),
>            ExcMessage(
>              "The parallel layout of the given vector is not "
>              "compatible with the parallel partitioning in MatrixFree. "
>              "Use MatrixFree::initialize_dof_vector to get a "
>              "compatible vector."));
>   }
>
> located in vector_access_internal.h. Apparently I have a segfault in the 
> function 
>     
>  template <typename Number, typename MemorySpaceType>
>     inline bool
>     Vector<Number, MemorySpaceType>::partitioners_are_compatible(
>       const Utilities::MPI::Partitioner &part) const
>     {
>       return partitioner->is_compatible(part);
>     }
>
> located in la_parallel_vector.templates.h, and thereby directly stopping 
> the program without triggering the assert()-function. The direct gdb output 
> for this function is 
> #3  0x00007ffff00c0c5c in 
> dealii::LinearAlgebra::distributed::Vector<double, 
> dealii::MemorySpace::Host>::partitioners_are_compatible (this=0x0, 
> part=...) at 
> ~Downloads/git-files/dealii/include/deal.II/lac/la_parallel_vector.templates.h:2021
>
> I am not sure what that means. Could it point to a nullpointer somewhere 
> (after this=0x0)? Though, when putting a breakpoint to the first function 
> and printing the involved variables there, I get
> (gdb) print dof_info.vector_partitioner
> $2 = std::shared_ptr<const dealii::Utilities::MPI::Partitioner> (use 
> count 3, weak count 0) = {get() = 0x7a60c0}
> (gdb) print vec.partitioner
> $3 = std::shared_ptr<const dealii::Utilities::MPI::Partitioner> (use 
> count 3, weak count 0) = {get() = 0x7a60c0}
>
> i.e. no null ptr. Could the problem be there, or somewhere else?
> Thanks!
>
> Am Montag, 13. Januar 2020 21:41:54 UTC+1 schrieb Wolfgang Bangerth: 
>>
>> On 1/13/20 8:41 AM, 'Maxi Miller' via deal.II User Group wrote: 
>> > Therefore, I assume that I have some bugs in my code, but am not 
>> experienced 
>> > enough in writing matrix-free code for being able to debug it myself. 
>> What 
>> > kind of approach could I do now, or is there simply a glaring bug which 
>> I did 
>> > not see yet? 
>>
>> I haven't even looked at the code yet, but for segfaults there is a 
>> relatively 
>> simple cure: Run your code in a debugger. A segmentation fault is a 
>> problem 
>> where some piece of code tries to access data at an address (typically 
>> through 
>> a pointer) that is not readable or writable. The general solution to 
>> finding 
>> out what the issue is is to run the code in a debugger -- the debugger 
>> will 
>> then stop at the place where the problem happens, and you can inspect all 
>> of 
>> the values of the pointers and variables that are live at that location. 
>> Once 
>> you see what variable is at fault, the problem is either already obvious, 
>> or 
>> you can start to think about how a pointer got the value it has and why. 
>>
>> Best 
>>   W. 
>>
>> -- 
>> ------------------------------------------------------------------------ 
>> Wolfgang Bangerth          email:                 [email protected] 
>>                             www: http://www.math.colostate.edu/~bangerth/ 
>>
>> -- 
> The deal.II project is located at http://www.dealii.org/
> For mailing list/forum options, see 
> https://groups.google.com/d/forum/dealii?hl=en
> --- 
> You received this message because you are subscribed to the Google Groups 
> "deal.II User Group" group.
> To unsubscribe from this group and stop receiving emails from it, send an 
> email to [email protected] <javascript:>.
> To view this discussion on the web visit 
> https://groups.google.com/d/msgid/dealii/d55e53b2-a0c8-4cc0-ab84-7521f8b23d81%40googlegroups.com
>  
> <https://groups.google.com/d/msgid/dealii/d55e53b2-a0c8-4cc0-ab84-7521f8b23d81%40googlegroups.com?utm_medium=email&utm_source=footer>
> .
>
>

-- 
The deal.II project is located at http://www.dealii.org/
For mailing list/forum options, see 
https://groups.google.com/d/forum/dealii?hl=en
--- 
You received this message because you are subscribed to the Google Groups 
"deal.II User Group" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/dealii/7f3caf40-3e6b-445e-b428-20ea6c060847%40googlegroups.com.
/* ---------------------------------------------------------------------
 *
 * Copyright (C) 2009 - 2019 by the deal.II authors
 *
 * This file is part of the deal.II library.
 *
 * The deal.II library is free software; you can use it, redistribute
 * it, and/or modify it under the terms of the GNU Lesser General
 * Public License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 * The full text of the license can be found in the file LICENSE.md at
 * the top level directory of deal.II.
 *
 * ---------------------------------------------------------------------
 *
 * Author: Wolfgang Bangerth, Texas A&M University, 2009, 2010
 *         Timo Heister, University of Goettingen, 2009, 2010
 */
#include <deal.II/base/quadrature_lib.h>
#include <deal.II/base/function.h>
#include <deal.II/base/timer.h>
#include <deal.II/lac/generic_linear_algebra.h>

#include <deal.II/base/time_stepping.h>

#include <deal.II/base/utilities.h>
#include <deal.II/base/conditional_ostream.h>
#include <deal.II/base/index_set.h>

namespace LA
{
#undef DEAL_II_WITH_PETSC
#if defined(DEAL_II_WITH_PETSC) && !defined(DEAL_II_PETSC_WITH_COMPLEX) && \
	!(defined(DEAL_II_WITH_TRILINOS) && defined(FORCE_USE_OF_TRILINOS))
	using namespace dealii::LinearAlgebraPETSc;
#  define USE_PETSC_LA
#elif defined(DEAL_II_WITH_TRILINOS)
	using namespace dealii::LinearAlgebraTrilinos;
#else
#  error DEAL_II_WITH_PETSC or DEAL_II_WITH_TRILINOS required
#endif
} // namespace LA
#include <deal.II/lac/vector.h>
#include <deal.II/lac/full_matrix.h>
#include <deal.II/lac/solver_cg.h>
#include <deal.II/lac/affine_constraints.h>
#include <deal.II/lac/dynamic_sparsity_pattern.h>
#include <deal.II/lac/sparsity_tools.h>
#include <deal.II/lac/linear_operator.h>
#include <deal.II/lac/parpack_solver.h>
#include <deal.II/lac/solver_gmres.h>
#include <deal.II/lac/sparse_direct.h>

#include <deal.II/grid/grid_generator.h>
#include <deal.II/grid/tria_accessor.h>
#include <deal.II/grid/tria_iterator.h>

#include <deal.II/dofs/dof_handler.h>
#include <deal.II/dofs/dof_accessor.h>
#include <deal.II/dofs/dof_tools.h>

#include <deal.II/fe/fe_values.h>
#include <deal.II/fe/fe_q.h>

#include <deal.II/numerics/vector_tools.h>
#include <deal.II/numerics/data_out.h>
#include <deal.II/numerics/error_estimator.h>

#include <deal.II/matrix_free/fe_evaluation.h>
#include <deal.II/matrix_free/matrix_free.h>
#include <deal.II/matrix_free/operators.h>

#include <deal.II/differentiation/ad.h>

#include <deal.II/distributed/tria.h>
#include <deal.II/distributed/grid_refinement.h>

#include <fstream>
#include <iostream>

constexpr double time_step = 0.1;

constexpr unsigned int fe_degree            = 2;
constexpr unsigned int n_q_points_1d        = fe_degree + 2;
using Number                                = double;

enum LowStorageRungeKuttaScheme
{
	stage_3_order_3, /* Kennedy, Carpenter, Lewis, 2000 */
	stage_5_order_4, /* Kennedy, Carpenter, Lewis, 2000 */
	stage_7_order_4, /* Tselios, Simos, 2007 */
	stage_9_order_5, /* Kennedy, Carpenter, Lewis, 2000 */
};
constexpr LowStorageRungeKuttaScheme lsrk_scheme = stage_5_order_4;

namespace Step40
{
	using namespace dealii;

	class LowStorageRungeKuttaIntegrator
	{
	public:
		LowStorageRungeKuttaIntegrator(const LowStorageRungeKuttaScheme scheme)
		{
			if (scheme == stage_3_order_3)
			{
				bi = {{0.245170287303492, 0.184896052186740, 0.569933660509768}};
				ai = {{0.755726351946097, 0.386954477304099}};
			}
			else if (scheme == stage_5_order_4)
			{
				bi = {{1153189308089. / 22510343858157.,
					   1772645290293. / 4653164025191.,
					   -1672844663538. / 4480602732383.,
					   2114624349019. / 3568978502595.,
					   5198255086312. / 14908931495163.}};
				ai = {{970286171893. / 4311952581923.,
					   6584761158862. / 12103376702013.,
					   2251764453980. / 15575788980749.,
					   26877169314380. / 34165994151039.}};
			}
			else if (scheme == stage_7_order_4)
			{
				bi = {{0.0941840925477795334,
					   0.149683694803496998,
					   0.285204742060440058,
					   -0.122201846148053668,
					   0.0605151571191401122,
					   0.345986987898399296,
					   0.186627171718797670}};
				ai = {{0.241566650129646868 + bi[0],
					   0.0423866513027719953 + bi[1],
					   0.215602732678803776 + bi[2],
					   0.232328007537583987 + bi[3],
					   0.256223412574146438 + bi[4],
					   0.0978694102142697230 + bi[5]}};
			}
			else if (scheme == stage_9_order_5)
			{
				bi = {{2274579626619. / 23610510767302.,
					   693987741272. / 12394497460941.,
					   -347131529483. / 15096185902911.,
					   1144057200723. / 32081666971178.,
					   1562491064753. / 11797114684756.,
					   13113619727965. / 44346030145118.,
					   393957816125. / 7825732611452.,
					   720647959663. / 6565743875477.,
					   3559252274877. / 14424734981077.}};
				ai = {{1107026461565. / 5417078080134.,
					   38141181049399. / 41724347789894.,
					   493273079041. / 11940823631197.,
					   1851571280403. / 6147804934346.,
					   11782306865191. / 62590030070788.,
					   9452544825720. / 13648368537481.,
					   4435885630781. / 26285702406235.,
					   2357909744247. / 11371140753790.}};
			}
			else
				AssertThrow(false, ExcNotImplemented());
		}

		unsigned int n_stages() const
		{
			return bi.size();
		}

		template <typename VectorType, typename Operator>
		void perform_time_step(const Operator &pde_operator,
							   const double    current_time,
							   const double    time_step,
							   VectorType &    solution,
							   VectorType &    vec_Ti,
							   VectorType &    vec_Ki)
		{
			AssertDimension(ai.size() + 1, bi.size());
			pde_operator.perform_stage(current_time,
									   bi[0] * time_step,
					ai[0] * time_step,
					solution,
					vec_Ti,
					solution,
					vec_Ti);
			double sum_previous_bi = 0;
			for (unsigned int stage = 1; stage < bi.size(); ++stage)
			{
				const double c_i = sum_previous_bi + ai[stage - 1];
				pde_operator.perform_stage(current_time + c_i * time_step,
										   bi[stage] * time_step,
										   (stage == bi.size() - 1 ?
												0 :
												ai[stage] * time_step),
										   vec_Ti,
										   vec_Ki,
										   solution,
										   vec_Ti);
				sum_previous_bi += bi[stage - 1];
			}
		}

	private:
		std::vector<double> bi;
		std::vector<double> ai;
	};

	template <int dim, int degree, int n_points_1d>
	class LaplaceOperator
	{
	public:
		static constexpr unsigned int n_quadrature_points_1d = n_points_1d;

		LaplaceOperator() : computing_times(6){

		};

		void reinit(const Mapping<dim> &   mapping,
					const DoFHandler<dim> &dof_handler,
					const AffineConstraints<double> &hanging_node_constraints);

		void print_computing_times() const;

		void apply(const double                                      current_time,
				   const LinearAlgebra::distributed::Vector<Number> &src,
				   LinearAlgebra::distributed::Vector<Number> &      dst) const;

		void
		perform_stage(const Number cur_time,
					  const Number factor_solution,
					  const Number factor_ai,
					  const LinearAlgebra::distributed::Vector<Number> &current_Ti,
					  LinearAlgebra::distributed::Vector<Number> &      vec_Ki,
					  LinearAlgebra::distributed::Vector<Number> &      solution,
					  LinearAlgebra::distributed::Vector<Number> &next_Ti) const;

		void project(const Function<dim> &                       function,
					 LinearAlgebra::distributed::Vector<Number> &solution) const;

		void
		initialize_vector(LinearAlgebra::distributed::Vector<Number> &vector) const;

	private:
		MatrixFree<dim, Number>     data;
		mutable std::vector<double> computing_times;

		LinearAlgebra::distributed::Vector<double> inv_mass_matrix;

		void local_apply_inverse_mass_matrix(
				const MatrixFree<dim, Number> &                   data,
				LinearAlgebra::distributed::Vector<Number> &      dst,
				const LinearAlgebra::distributed::Vector<Number> &src,
				const std::pair<unsigned int, unsigned int> &     cell_range) const;

		void local_apply_cell(
				const MatrixFree<dim, Number> &                   data,
				LinearAlgebra::distributed::Vector<Number> &      dst,
				const LinearAlgebra::distributed::Vector<Number> &src,
				const std::pair<unsigned int, unsigned int> &     cell_range) const;
	};

	template <int dim, int degree, int n_points_1d>
	void LaplaceOperator<dim, degree, n_points_1d>::reinit(
			const Mapping<dim> &mapping,
			const DoFHandler<dim> &dof_handler,
			const AffineConstraints<double> &hanging_node_constraints){
		std::vector<const DoFHandler<dim> *>           dof_handlers({&dof_handler});
		AffineConstraints<double>                      dummy(hanging_node_constraints);
		std::vector<const AffineConstraints<double> *> constraints({&hanging_node_constraints});
		std::vector<Quadrature<1>>                     quadratures(
		{QGauss<1>(n_q_points_1d),
		 QGauss<1>(fe_degree + 1)});
		typename MatrixFree<dim, Number>::AdditionalData additional_data;
		additional_data.mapping_update_flags =
				(update_gradients | update_JxW_values | update_quadrature_points |
				 update_values);

		additional_data.tasks_parallel_scheme =
				MatrixFree<dim, Number>::AdditionalData::none;

		data.reinit(mapping,
					dof_handlers,
					constraints,
					quadratures,
					additional_data);

		data.initialize_dof_vector(inv_mass_matrix);
		FEEvaluation<dim, degree, n_points_1d, 1, Number> fe_eval(data);
		const unsigned int           n_q_points = fe_eval.n_q_points;
		for (unsigned int cell = 0; cell < data.n_cell_batches(); ++cell)
		{
			fe_eval.reinit(cell);
			for (unsigned int q = 0; q < n_q_points; ++q)
				fe_eval.submit_value(make_vectorized_array(1.), q);
			fe_eval.integrate(true, false);
			fe_eval.distribute_local_to_global(inv_mass_matrix);
		}
		inv_mass_matrix.compress(VectorOperation::add);
		for (unsigned int k = 0; k < inv_mass_matrix.local_size(); ++k)
			if (inv_mass_matrix.local_element(k) > 1e-15)
				inv_mass_matrix.local_element(k) =
						1. / inv_mass_matrix.local_element(k);
			else
				inv_mass_matrix.local_element(k) = 1;
	}

	template <int dim, int degree, int n_points_1d>
	void LaplaceOperator<dim, degree, n_points_1d>::initialize_vector(
			LinearAlgebra::distributed::Vector<Number> &vector) const
	{
		data.initialize_dof_vector(vector);
	}

	template <int dim, int degree, int n_points_1d>
	void LaplaceOperator<dim, degree, n_points_1d>::print_computing_times() const
	{
		ConditionalOStream        pcout(std::cout,
										Utilities::MPI::this_mpi_process(MPI_COMM_WORLD) ==
										0);
		Utilities::MPI::MinMaxAvg data;
		if (computing_times[2] + computing_times[3] > 0)
		{
			std::ios_base::fmtflags flags(std::cout.flags());
			std::cout << std::setprecision(3);
			pcout << "   Apply called "
				  << static_cast<std::size_t>(computing_times[3]) << " times"
																  << std::endl;
			pcout << "   RK update called "
				  << static_cast<std::size_t>(computing_times[2]) << " times"
																  << std::endl;
			pcout << "   Evaluate L_h:            ";
			data = Utilities::MPI::min_max_avg(computing_times[0], MPI_COMM_WORLD);
			pcout << std::scientific << std::setw(10) << data.min << " (p"
				  << std::setw(4) << data.min_index << ") " << std::setw(10)
				  << data.avg << std::setw(10) << data.max << " (p" << std::setw(4)
				  << data.max_index << ")" << std::endl;
			pcout << "   Inverse mass (+RK upd):  ";
			data = Utilities::MPI::min_max_avg(computing_times[1], MPI_COMM_WORLD);
			pcout << std::scientific << std::setw(10) << data.min << " (p"
				  << std::setw(4) << data.min_index << ") " << std::setw(10)
				  << data.avg << std::setw(10) << data.max << " (p" << std::setw(4)
				  << data.max_index << ")" << std::endl;
			pcout << "   Compute transport speed: ";
			data = Utilities::MPI::min_max_avg(computing_times[4], MPI_COMM_WORLD);
			pcout << std::scientific << std::setw(10) << data.min << " (p"
				  << std::setw(4) << data.min_index << ") " << std::setw(10)
				  << data.avg << std::setw(10) << data.max << " (p" << std::setw(4)
				  << data.max_index << ")" << std::endl;
			pcout << "   Compute errors/norms:    ";
			data = Utilities::MPI::min_max_avg(computing_times[5], MPI_COMM_WORLD);
			pcout << std::scientific << std::setw(10) << data.min << " (p"
				  << std::setw(4) << data.min_index << ") " << std::setw(10)
				  << data.avg << std::setw(10) << data.max << " (p" << std::setw(4)
				  << data.max_index << ")" << std::endl;
			std::cout.flags(flags);
		}
	}

	template <int dim, int degree, int n_points_1d>
	void LaplaceOperator<dim, degree, n_points_1d>::local_apply_cell(
			const MatrixFree<dim, Number> &                   data,
			LinearAlgebra::distributed::Vector<Number> &      dst,
			const LinearAlgebra::distributed::Vector<Number> &src,
			const std::pair<unsigned int, unsigned int> &     cell_range) const
	{
		FEEvaluation<dim, degree, n_points_1d, 1, Number> phi(data);

		for (unsigned int cell = cell_range.first; cell < cell_range.second; ++cell)
		{
			phi.reinit(cell);
			phi.gather_evaluate(src, false, true);
			for (unsigned int q = 0; q < phi.n_q_points; ++q)
			{
				phi.submit_gradient(-1. * phi.get_gradient(q), q);
			}
			phi.integrate_scatter(false, true, dst);
		}
	}

	template <int dim, int degree, int n_points_1d>
	void LaplaceOperator<dim, degree, n_points_1d>::local_apply_inverse_mass_matrix(
			const MatrixFree<dim, Number> &                   data,
			LinearAlgebra::distributed::Vector<Number> &      dst,
			const LinearAlgebra::distributed::Vector<Number> &src,
			const std::pair<unsigned int, unsigned int> &     cell_range) const
	{
		dst.reinit(src);
		dst = src;
		dst.scale(inv_mass_matrix);
	}

	template <int dim, int degree, int n_points_1d>
	void LaplaceOperator<dim, degree, n_points_1d>::apply(
			const double                                      current_time,
			const LinearAlgebra::distributed::Vector<Number> &src,
			LinearAlgebra::distributed::Vector<Number> &      dst) const
	{
		Timer timer;

		data.cell_loop(&LaplaceOperator::local_apply_cell,
					   this,
					   dst,
					   src,
					   true);
		computing_times[0] += timer.wall_time();
		timer.restart();

		data.cell_loop(&LaplaceOperator::local_apply_inverse_mass_matrix,
					   this,
					   dst,
					   dst);
		computing_times[1] += timer.wall_time();

		computing_times[3] += 1.;
	}

	template <int dim, int degree, int n_points_1d>
	void LaplaceOperator<dim, degree, n_points_1d>::perform_stage(
			const Number                                      current_time,
			const Number                                      factor_solution,
			const Number                                      factor_ai,
			const LinearAlgebra::distributed::Vector<Number> &current_Ti,
			LinearAlgebra::distributed::Vector<Number> &      vec_Ki,
			LinearAlgebra::distributed::Vector<Number> &      solution,
			LinearAlgebra::distributed::Vector<Number> &      next_Ti) const
	{
		Timer timer;

		data.cell_loop(&LaplaceOperator::local_apply_cell,
					   this,
					   vec_Ki,
					   current_Ti,
					   true);
		computing_times[0] += timer.wall_time();

		timer.restart();

		data.cell_loop(&LaplaceOperator::local_apply_inverse_mass_matrix,
					   this,
					   next_Ti,
					   vec_Ki,
					   std::function<void(const unsigned int, const unsigned int)>(),
					   [&](const unsigned int start_range, const unsigned int end_range) {
			const Number ai = factor_ai;
			const Number bi = factor_solution;
			if (ai == Number())
			{
				DEAL_II_OPENMP_SIMD_PRAGMA
						for (unsigned int i = start_range; i < end_range; ++i)
				{
					const Number K_i          = next_Ti.local_element(i);
					const Number sol_i        = solution.local_element(i);
					solution.local_element(i) = sol_i + bi * K_i;
				}
			}
			else
			{
				DEAL_II_OPENMP_SIMD_PRAGMA
						for (unsigned int i = start_range; i < end_range; ++i)
				{
					const Number K_i          = next_Ti.local_element(i);
					const Number sol_i        = solution.local_element(i);
					solution.local_element(i) = sol_i + bi * K_i;
					next_Ti.local_element(i)  = sol_i + ai * K_i;
				}
			}
		});

		computing_times[1] += timer.wall_time();

		computing_times[2] += 1.;
	}


	template <int dim>
	class InitialCondition : public Function<dim>
	{
	public:
		InitialCondition() : Function<dim>(1){

		}
		virtual double value(const Point<dim> &p, const unsigned int component) const override;
		virtual Tensor<1, dim> gradient(const Point<dim> &p, const unsigned int component) const override;
	};

	template <int dim>
	double InitialCondition<dim>::value(const Point<dim> &p, const unsigned int) const{
		const double x = p[0];
		const double y = p[1];

		return sin(M_PI * x) * sin(M_PI * y);
	}

	template <int dim>
	Tensor<1, dim> InitialCondition<dim>::gradient(const Point<dim> &p, const unsigned int) const{
		const double x = p[0];
		const double y = p[1];

		Tensor<1, dim> return_value;
		return_value[0] = M_PI * cos(M_PI * x) * sin(M_PI * y);
		return_value[1] = M_PI * sin(M_PI * x) * cos(M_PI * y);

		return return_value;
	}

	template <int dim>
	class LaplaceProblem
	{
	public:
		LaplaceProblem();
		void run_with_MF();
	private:
		void setup_system();

		void output_results(const unsigned int cycle) const;

		MPI_Comm mpi_communicator;
		parallel::distributed::Triangulation<dim> triangulation;
		FE_Q<dim>       fe;
		MappingQGeneric<dim> mapping;
		DoFHandler<dim> dof_handler;
		AffineConstraints<double> constraints;
		LaplaceOperator<dim, fe_degree, n_q_points_1d> laplace_operator;
		LinearAlgebra::distributed::Vector<Number> locally_relevant_solution_MF;

		ConditionalOStream pcout;
		TimerOutput        computing_timer;
	};

	template <int dim>
	LaplaceProblem<dim>::LaplaceProblem()
		: mpi_communicator(MPI_COMM_WORLD)
		, triangulation(mpi_communicator,
						typename Triangulation<dim>::MeshSmoothing(
							Triangulation<dim>::smoothing_on_refinement |
							Triangulation<dim>::smoothing_on_coarsening))
		, fe(fe_degree)
		, mapping(fe.degree + 1)
		, dof_handler(triangulation)
		, pcout(std::cout,
				(Utilities::MPI::this_mpi_process(mpi_communicator) == 0))
		, computing_timer(mpi_communicator,
						  pcout,
						  TimerOutput::summary,
						  TimerOutput::wall_times)
	{}

	template <int dim>
	void LaplaceProblem<dim>::setup_system()
	{
		TimerOutput::Scope t(computing_timer, "setup");
		dof_handler.distribute_dofs(fe);

		IndexSet locally_relevant_dofs;
		DoFTools::extract_locally_relevant_dofs(dof_handler, locally_relevant_dofs);

		constraints.clear();
		constraints.reinit(locally_relevant_dofs);
		DoFTools::make_hanging_node_constraints(dof_handler, constraints);
		VectorTools::interpolate_boundary_values(dof_handler,
												 0,
												 Functions::ZeroFunction<dim>(),
												 constraints);
		constraints.close();

		laplace_operator.reinit(mapping, dof_handler, constraints);
		laplace_operator.initialize_vector(locally_relevant_solution_MF);
	}

	template <int dim>
	void LaplaceProblem<dim>::output_results(const unsigned int cycle) const
	{
		DataOut<dim> data_out;
		data_out.attach_dof_handler(dof_handler);
		data_out.add_data_vector(locally_relevant_solution_MF, "u_MF");
		Vector<float> subdomain(triangulation.n_active_cells());
		for (unsigned int i = 0; i < subdomain.size(); ++i)
			subdomain(i) = triangulation.locally_owned_subdomain();
		data_out.add_data_vector(subdomain, "subdomain");
		data_out.build_patches();

		data_out.write_vtu_with_pvtu_record("", "solution", cycle);
	}

	template <int dim>
	void LaplaceProblem<dim>::run_with_MF(){
		{
			const unsigned int n_vect_number =
					VectorizedArray<Number>::n_array_elements;
			const unsigned int n_vect_bits = 8 * sizeof(Number) * n_vect_number;

			pcout << "Running with "
				  << Utilities::MPI::n_mpi_processes(MPI_COMM_WORLD)
				  << " MPI processes" << std::endl;
			pcout << "Vectorization over " << n_vect_number << " "
				  << (std::is_same<Number, double>::value ? "doubles" : "floats")
				  << " = " << n_vect_bits << " bits ("
				  << Utilities::System::get_current_vectorization_level()
				  << "), VECTORIZATION_LEVEL=" << DEAL_II_COMPILER_VECTORIZATION_LEVEL
				  << std::endl;
		}

		const unsigned int n_cycles = 8;

		double local_time = 0., local_end_time = 0.;

		GridGenerator::hyper_cube(triangulation);
		triangulation.refine_global(5);

		setup_system();

		for (unsigned int cycle = 0; cycle < n_cycles; ++cycle)
		{
			local_time = cycle * time_step;
			local_end_time = (cycle + 1) * time_step;
			pcout << "Cycle " << cycle << ':' << std::endl;

			//triangulation.refine_global(1);


			if(cycle == 0){

				LinearAlgebra::distributed::Vector<Number> local_solution;
				laplace_operator.initialize_vector(local_solution);

				VectorTools::project(dof_handler,
									 constraints,
									 QGauss<dim>(fe.degree + 1),
									 InitialCondition<dim>(),
									 local_solution);

				locally_relevant_solution_MF = local_solution;
			}

			LowStorageRungeKuttaIntegrator integrator(lsrk_scheme);

			LinearAlgebra::distributed::Vector<Number> rk_register_1;
			LinearAlgebra::distributed::Vector<Number> rk_register_2;
			rk_register_1.reinit(locally_relevant_solution_MF);
			rk_register_2.reinit(locally_relevant_solution_MF);

			double local_time_step = 0.01;

			//			if (Utilities::MPI::n_mpi_processes(mpi_communicator) <= 32 && cycle == 0)
			//			{
			//				TimerOutput::Scope t(computing_timer, "output");
			//				output_results(cycle);
			//			}

			pcout << "   Number of active cells:       "
				  << triangulation.n_global_active_cells() << std::endl
				  << "   Number of degrees of freedom: " << dof_handler.n_dofs()
				  << std::endl;

			Timer        timer;

			double       wtime           = 0;
			double       output_time     = 0;
			unsigned int timestep_number = 1;

			while (local_time < (local_end_time - 1e-12))
			{
				timer.restart();
				++timestep_number;

				integrator.perform_time_step(laplace_operator,
											 local_time,
											 local_time_step,
											 locally_relevant_solution_MF,
											 rk_register_1,
											 rk_register_2);

				constraints.distribute(locally_relevant_solution_MF);

				local_time += local_time_step;

				wtime += timer.wall_time();

				timer.restart();

				//				if (static_cast<int>(time / output_tick) !=
				//						static_cast<int>((time - local_time_step) / output_tick) ||
				//						time >= time_step - 1e-12)
				//					output_results(
				//								static_cast<unsigned int>(std::round(time / output_tick)));
				output_time += timer.wall_time();
			}

			pcout << "Number of time steps: " << timestep_number << '\n';
			timestep_number = 1;
			if (Utilities::MPI::n_mpi_processes(mpi_communicator) <= 32)
			{
				TimerOutput::Scope t(computing_timer, "output");
				output_results(cycle + 1);
			}
			computing_timer.print_summary();
			computing_timer.reset();
			pcout << std::endl;
		}

		local_time = 0.;
	}
} // namespace Step40
int main(int argc, char *argv[])
{
	std::cout << "Hello World\n";
	try
	{
		using namespace dealii;
		using namespace Step40;
#ifdef DEAL_II_WITH_PETSC
		Utilities::MPI::MPI_InitFinalize mpi_initialization(argc, argv, 1);
#else
		Utilities::MPI::MPI_InitFinalize mpi_initialization(argc, argv, dealii::numbers::invalid_unsigned_int);
#endif
		LaplaceProblem<2> laplace_problem_2d;
		//laplace_problem_2d.run();
		laplace_problem_2d.run_with_MF();
	}
	catch (std::exception &exc)
	{
		std::cerr << std::endl
				  << std::endl
				  << "----------------------------------------------------"
				  << std::endl;
		std::cerr << "Exception on processing: " << std::endl
				  << exc.what() << std::endl
				  << "Aborting!" << std::endl
				  << "----------------------------------------------------"
				  << std::endl;
		return 1;
	}
	catch (...)
	{
		std::cerr << std::endl
				  << std::endl
				  << "----------------------------------------------------"
				  << std::endl;
		std::cerr << "Unknown exception!" << std::endl
				  << "Aborting!" << std::endl
				  << "----------------------------------------------------"
				  << std::endl;
		return 1;
	}
	return 0;
}

Reply via email to