hello, I find one kind of treatment in 
dealii/tests/matrix_free/stokes_computation.cc(line 884-919): 
use FEEvaluation<...>::begin_dof_values() to loop over the dofs_per_cell 
rather than dofs_per_component

FEEvaluation<dim, degree_v, degree_v + 1, dim, number> velocity(data, 0);
    for (unsigned int cell = cell_range.first; cell < cell_range.second; ++
cell)
      {
        velocity.reinit(cell);
        AlignedVector<VectorizedArray<number>> diagonal(velocity.
dofs_per_cell);
        for (unsigned int i = 0; i < velocity.dofs_per_cell; ++i)
          {
            for (unsigned int j = 0; j < velocity.dofs_per_cell; ++j)
              velocity.begin_dof_values()[j] = VectorizedArray<number>();
            velocity.begin_dof_values()[i] = make_vectorized_array<number>(
1.);

            velocity.evaluate(false, true, false);
            for (unsigned int q = 0; q < velocity.n_q_points; ++q)
              {
                velocity.submit_symmetric_gradient(
                  viscosity_x_2(cell, q) * velocity.get_symmetric_gradient
(q),
                  q);
              }
            velocity.integrate(false, true);

            diagonal[i] = velocity.begin_dof_values()[i];
          }

        for (unsigned int i = 0; i < velocity.dofs_per_cell; ++i)
          velocity.begin_dof_values()[i] = diagonal[i];
        velocity.distribute_local_to_global(dst);
      }
I use this method to deal with my case, but still not work well(worse than 
PreconditionIdentity).
here is my code attached.

best,
m.

-- 
The deal.II project is located at http://www.dealii.org/
For mailing list/forum options, see 
https://groups.google.com/d/forum/dealii?hl=en
--- 
You received this message because you are subscribed to the Google Groups 
"deal.II User Group" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/dealii/d7490391-33b0-4d99-b029-a1e0fa0c9ef9%40googlegroups.com.
// First include the necessary files from the deal.II library.
#include <deal.II/base/quadrature_lib.h>
#include <deal.II/base/function.h>
#include <deal.II/base/timer.h>

#include <deal.II/lac/affine_constraints.h>
#include <deal.II/lac/solver_cg.h>
#include <deal.II/lac/la_parallel_vector.h>
#include <deal.II/lac/precondition.h>

#include <deal.II/fe/fe_q.h>
#include <deal.II/fe/fe_system.h>

#include <deal.II/grid/tria.h>
#include <deal.II/grid/grid_generator.h>

#include <deal.II/multigrid/multigrid.h>
#include <deal.II/multigrid/mg_transfer_matrix_free.h>
#include <deal.II/multigrid/mg_tools.h>
#include <deal.II/multigrid/mg_coarse.h>
#include <deal.II/multigrid/mg_smoother.h>
#include <deal.II/multigrid/mg_matrix.h>

#include <deal.II/numerics/data_out.h>
#include <deal.II/numerics/vector_tools.h>

// This includes the data structures for the efficient implementation of
// matrix-free methods or more generic finite element operators with the class
// MatrixFree.
#include <deal.II/matrix_free/matrix_free.h>
#include <deal.II/matrix_free/operators.h>
#include <deal.II/matrix_free/fe_evaluation.h>

#include <iostream>
#include <fstream>

namespace MyMechanics
{
  using namespace dealii;

  const unsigned int degree_finite_element = 2;
  const unsigned int dimension             = 3;
  const unsigned int n_components          = 3;

  // Get cubic elastic tensor with dim = 2/3
  // Input:
  //       three independent constants C11, C12, C44
  // Output:
  //       4 rank 2/3 dim symmetric tensor
  template <int dim>
  SymmetricTensor<4,dim> 
  get_stress_strain_tensor(const double C11, const double C12, const double C44)
  {
    Assert((dim == 3) or (dim==2), ExcInternalError());
    SymmetricTensor<4,dim> tmp;
    for (unsigned int i=0; i<dim; ++i)
      for (unsigned int j=0; j<dim; ++j)
        for (unsigned int k=0; k<dim; ++k)
          for (unsigned int l=0; l<dim; ++l)
          {
            if ((i==j && k==l))
            {
              if (i == k)
                tmp[i][j][k][l] = C11;
              else 
                tmp[i][j][k][l] = C12;
            }
            else if ((i == k && j == l))
            // symmetric donot need i==l && j==k
              tmp[i][j][k][l] = C44;
          }
    return tmp;        
  }

  template <int dim, int fe_degree, int n_components, typename number>
  class MechanicsOperator
    : public MatrixFreeOperators::
      Base<dim, LinearAlgebra::distributed::Vector<number>>
  {
  public:
    using value_type = number;

    MechanicsOperator();

    void compute_residual (LinearAlgebra::distributed::Vector<number> &dst,
                           const LinearAlgebra::distributed::Vector<number> &src) const;                           

    void clear();

    virtual void compute_diagonal();

  private:
    virtual 
    void 
    apply_add(LinearAlgebra::distributed::Vector<number> &dst,
              const LinearAlgebra::distributed::Vector<number> &src) const;
    void
    local_apply(const MatrixFree<dim, number>                    &data,
                LinearAlgebra::distributed::Vector<number>       &dst,
                const LinearAlgebra::distributed::Vector<number> &src,
                const std::pair<unsigned int, unsigned int>      &cell_range) const;

    void
    local_compute_residual
      (const MatrixFree<dim, number>                    &data,
       LinearAlgebra::distributed::Vector<number>       &dst,
       const LinearAlgebra::distributed::Vector<number> &src,
       const std::pair<unsigned int, unsigned int>      &cell_range) const;

    void 
    local_compute_diagonal 
      (const MatrixFree<dim,number>               &data,
       LinearAlgebra::distributed::Vector<number> &dst,
       const unsigned int                         &dummy,
       const std::pair<unsigned int,unsigned int> &cell_range) const;
    
    SymmetricTensor<4,dim> Cmatx;
  };

  template <int dim, int fe_degree, int n_components, typename number>
  MechanicsOperator<dim, fe_degree, n_components, number>::MechanicsOperator ()
    :
    MatrixFreeOperators::Base<dim, LinearAlgebra::distributed::Vector<number>>()
  {
    // Cmatx = get_stress_strain_tensor<dim>(57.2,36.1,35.9);
    Cmatx = get_stress_strain_tensor<dim>(2322.98,409.938,956.522);
  }

  template <int dim, int fe_degree, int n_components, typename number>
  void
  MechanicsOperator<dim, fe_degree, n_components, number>::clear ()
  {
    MatrixFreeOperators::
      Base<dim,LinearAlgebra::distributed::Vector<number> >::clear();
  }

  template <int dim, int fe_degree, int n_components, typename number>
  void
  MechanicsOperator<dim, fe_degree, n_components, number>::
  local_apply(const MatrixFree<dim,number>                     &data,
              LinearAlgebra::distributed::Vector<number>       &dst,
              const LinearAlgebra::distributed::Vector<number> &src,
              const std::pair<unsigned int,unsigned int>       &cell_range) const
  {
    FEEvaluation<dim,fe_degree,fe_degree+1,n_components,number> phi (data);

    for (unsigned int cell = cell_range.first; cell < cell_range.second; ++cell)
    {
      phi.reinit(cell);
      phi.read_dof_values(src);
      phi.evaluate(false, true);
      for (unsigned int q = 0; q < phi.n_q_points; ++q)
        phi.submit_symmetric_gradient(Cmatx*phi.get_symmetric_gradient(q),q);
      phi.integrate(false, true);
      phi.distribute_local_to_global(dst);
    }
  }

  template <int dim, int fe_degree, int n_components, typename number>
  void
  MechanicsOperator<dim, fe_degree, n_components, number>::
  apply_add (LinearAlgebra::distributed::Vector<number>       &dst,
             const LinearAlgebra::distributed::Vector<number> &src) const
  {
    this->data->cell_loop (&MechanicsOperator::local_apply, this, dst, src);
  }

  template <int dim, int fe_degree, int n_components, typename number>
  void
  MechanicsOperator<dim, fe_degree, n_components, number>::
  compute_residual (LinearAlgebra::distributed::Vector<number>       &dst,
                    const LinearAlgebra::distributed::Vector<number> &src) const
  {
    dst = 0;
    this->data->cell_loop (&MechanicsOperator::local_compute_residual,
                           this, dst, src);
  }

  template <int dim, int fe_degree, int n_components, typename number>
  void
  MechanicsOperator<dim, fe_degree, n_components, number>::
  local_compute_residual
  (const MatrixFree<dim, number>                    &,
   LinearAlgebra::distributed::Vector<number>       &dst,
   const LinearAlgebra::distributed::Vector<number> &solution,
   const std::pair<unsigned int, unsigned int>      &cell_range) const
  {
    FEEvaluation<dim,fe_degree,fe_degree+1,n_components,number> 
      phi (*this->data, 0);
    FEEvaluation<dim,fe_degree,fe_degree+1,n_components,number> 
      phi_nodirichlet (*this->data, 1);
    for (unsigned int cell=cell_range.first; cell<cell_range.second; ++cell)
    {
      phi.reinit (cell);
      phi_nodirichlet.reinit (cell);
      phi_nodirichlet.read_dof_values(solution);
      phi_nodirichlet.evaluate(false, true);
      for (unsigned int q=0; q<phi.n_q_points; ++q)
        phi.submit_symmetric_gradient (-Cmatx *
                              phi_nodirichlet.get_symmetric_gradient(q), q);
      phi.integrate(false, true);
      phi.distribute_local_to_global(dst);
    }
  }

  template <int dim, int fe_degree, int n_components, typename number>
  void
  MechanicsOperator<dim, fe_degree, n_components, number>::
  compute_diagonal ()
  {
    this->inverse_diagonal_entries.
    reset(new DiagonalMatrix<LinearAlgebra::distributed::Vector<number> >());
    LinearAlgebra::distributed::Vector<number> &inverse_diagonal =
      this->inverse_diagonal_entries->get_vector();
    this->data->initialize_dof_vector(inverse_diagonal);
    unsigned int dummy = 0;
    this->data->cell_loop (&MechanicsOperator::local_compute_diagonal, this,
                           inverse_diagonal, dummy);

    this->set_constrained_entries_to_one(inverse_diagonal);

    for (unsigned int i=0; i<inverse_diagonal.local_size(); ++i)
      {
        Assert(inverse_diagonal.local_element(i) > 0.,
               ExcMessage("No diagonal entry in a positive definite operator "
                          "should be zero"));
        inverse_diagonal.local_element(i) =
          1./inverse_diagonal.local_element(i);
        // std::cout << inverse_diagonal.local_element(i) << ' ';
      }
    // std::cout << std::endl;
  }

  template <int dim, int fe_degree, int n_components, typename number>
  void
  MechanicsOperator<dim, fe_degree, n_components, number>::
  local_compute_diagonal 
  (const MatrixFree<dim,number>               &data,
   LinearAlgebra::distributed::Vector<number> &dst,
   const unsigned int &,
   const std::pair<unsigned int,unsigned int> &cell_range) const
  {
    FEEvaluation<dim,fe_degree,fe_degree+1,n_components,number> 
      phi (data, this->selected_rows[0]);
    for (unsigned int cell = cell_range.first; cell < cell_range.second; ++cell)
    {
      phi.reinit(cell);
      AlignedVector<VectorizedArray<number>> diagonal(phi.dofs_per_cell);
      // std::cout << "phi.sym_grad[0]: " << phi.get_symmetric_gradient(0) << '\n';
      for (unsigned int i=0; i<phi.dofs_per_cell; ++i)
      {
        for (unsigned int j=0; j<phi.dofs_per_cell; ++j)
          phi.begin_dof_values()[j] = VectorizedArray<number>();
        phi.begin_dof_values()[i] = make_vectorized_array<number>(1.);

        phi.evaluate(false, true);
        // std::cout << "after evaluate phi.sym_grad[0]: " 
        //   << phi.get_symmetric_gradient(0) << '\n';
        for (unsigned int q=0; q<phi.n_q_points; ++q)
        {
          phi.submit_symmetric_gradient(Cmatx*phi.get_symmetric_gradient(q),q);
        }
        phi.integrate(false, true);
        diagonal[i] = phi.begin_dof_values()[i];
        // std::cout << "diagonal[i]: " << diagonal[i] << std::endl;
      }

      for (unsigned int i=0; i<phi.dofs_per_cell; ++i)
      {
        phi.begin_dof_values()[i] = diagonal[i];
        // phi.begin_dof_values()[i] = make_vectorized_array<number>(1.);
        // std::cout << "diagonal[i]: " << diagonal[i] << std::endl;
      }
      phi.distribute_local_to_global(dst);
    }
    // AlignedVector<Tensor<1,n_components,VectorizedArray<number>>>
    //   diagonal(phi.dofs_per_component);

    // for (unsigned int cell = cell_range.first; cell < cell_range.second; ++cell)
    // {
    //   phi.reinit(cell);
    //   for (unsigned int i = 0; i < phi.dofs_per_component; ++i)
    //   {     
    //     for (unsigned int j = 0; j < phi.dofs_per_component; ++j)
    //     {
    //       phi.submit_dof_value(Tensor<1,dim,VectorizedArray<number>>(), j);
    //     }           
    //     for (unsigned int d=0; d<dim; d++)
    //     {
    //       for (unsigned int j = 0; j < phi.dofs_per_component; ++j)
    //       {
    //         phi.submit_dof_value(Tensor<1,dim,VectorizedArray<number>>(), j);
    //       }
    //       std::cout << "d: " << d << '\n';
    //       Tensor<1,dim,VectorizedArray<number>> diagonal_tensor;
    //       diagonal_tensor[d] = 1.;          
    //       phi.submit_dof_value(diagonal_tensor, i);
    //       std::cout << "get_dof_value(i): " << phi.get_dof_value(i) << '\n';
    //       std::cout << "get_gradient(0): " << phi.get_symmetric_gradient(0) << '\n';
    //       phi.evaluate(false, true);
    //       for (unsigned int q = 0; q < phi.n_q_points; ++q)
    //       {
    //         // phi.submit_symmetric_gradient(Cmatx*phi.get_symmetric_gradient(q),q);
    //         phi.submit_symmetric_gradient(1000*Cmatx*phi.get_symmetric_gradient(q),q);
    //       }   
    //       std::cout << "get_gradient(0): " << phi.get_symmetric_gradient(0) << '\n';
    //       phi.integrate(false,true);
    //       std::cout << "get_dof_value(i): " << phi.get_dof_value(i) << '\n';
    //       diagonal[i][d] = phi.get_dof_value(i)[d]; 
    //       std::cout << "diagonal[i][d]: " << diagonal[i][d] << std::endl;
    //     }               
    //   }
      
    //   for (unsigned int i = 0; i < phi.dofs_per_component; ++i)
    //     phi.submit_dof_value(diagonal[i],i);
              
    //   phi.distribute_local_to_global(dst);
    // }
  }

  template <int dim>
  class MechanicsProblem
  {
  public:
    MechanicsProblem(const double C11, 
                     const double C12, 
                     const double C44);
    void run();

  private:
    void setup_system();
    void assemble_rhs();
    void interpolate_boundary_values();
    void solve();
    void output_results(const unsigned int cycle) const;

#ifdef DEAL_II_WITH_P4EST
    parallel::distributed::Triangulation<dim> triangulation;
#else
    Triangulation<dim> triangulation;
#endif

    // FE_Q<dim>       fe;
    FESystem<dim>   fe;
    DoFHandler<dim> dof_handler;

    AffineConstraints<double>                 constraints;
    AffineConstraints<double>                 constraints_without_dirichlet;

    std::shared_ptr<MatrixFree<dim,double>>   system_matrix_free;
    using SystemMatrixType =
      MechanicsOperator<dim, degree_finite_element, n_components, double>;
    SystemMatrixType system_matrix;

    MGConstrainedDoFs mg_constrained_dofs;
    using LevelMatrixType = 
      MechanicsOperator<dim, degree_finite_element, n_components, float>;
    MGLevelObject<LevelMatrixType> mg_matrices;

    LinearAlgebra::distributed::Vector<double> solution;
    LinearAlgebra::distributed::Vector<double> solution_update;
    LinearAlgebra::distributed::Vector<double> system_rhs;

    double             setup_time;
    ConditionalOStream pcout;
    ConditionalOStream time_details;

    SymmetricTensor<4,dim> Cmatx;
  };

  template <int dim>
  MechanicsProblem<dim>::MechanicsProblem(const double C11, 
                                          const double C12, 
                                          const double C44)
    :
#ifdef DEAL_II_WITH_P4EST
    triangulation(
      MPI_COMM_WORLD,
      Triangulation<dim>::limit_level_difference_at_vertices,
      parallel::distributed::Triangulation<dim>::construct_multigrid_hierarchy)
    ,
#else
    triangulation(Triangulation<dim>::limit_level_difference_at_vertices)
    ,
#endif
    fe(FE_Q<dim>(degree_finite_element),dim)
    , dof_handler(triangulation)
    , setup_time(0.)
    , pcout(std::cout, Utilities::MPI::this_mpi_process(MPI_COMM_WORLD) == 0)
    // The MechanicsProblem class holds an additional output stream that
    // collects detailed timings about the setup phase. This stream, called
    // time_details, is disabled by default through the @p false argument
    // specified here. For detailed timings, removing the @p false argument
    // prints all the details.
    , time_details(std::cout,
                 false && Utilities::MPI::this_mpi_process(MPI_COMM_WORLD) == 0)
    , Cmatx(get_stress_strain_tensor<dim>(C11,C12,C44))
  {}

  template <int dim>
  void MechanicsProblem<dim>::setup_system()
  {
    Timer time;
    setup_time = 0;

    system_matrix_free.reset(new MatrixFree<dim,double>());
    system_matrix.clear();
    mg_matrices.clear_elements();

    dof_handler.distribute_dofs(fe);
    dof_handler.distribute_mg_dofs();

    pcout << "Number of degrees of freedom: " << dof_handler.n_dofs()
          << std::endl;

    IndexSet locally_relevant_dofs;
    DoFTools::extract_locally_relevant_dofs(dof_handler, locally_relevant_dofs);

    constraints.clear();
    constraints.reinit(locally_relevant_dofs);
    DoFTools::make_hanging_node_constraints(dof_handler, constraints);
    const FEValuesExtractors::Scalar disp_x(0);
    VectorTools::interpolate_boundary_values(dof_handler,
                                             10, // left face
                                             Functions::ZeroFunction<dim>(dim),
                                             constraints,
                                             fe.component_mask(disp_x));
    VectorTools::interpolate_boundary_values(dof_handler,
                                             20, // right face
                                             Functions::ZeroFunction<dim>(dim),
                                             constraints,
                                             fe.component_mask(disp_x));
    // for (typename DoFHandler<dim>::active_cell_iterator
    //     cell = dof_handler.begin_active();
    //     cell != dof_handler.end();
    //     ++cell)
    // {
    //   if (cell->is_locally_owned())
    //   {
    //     for (unsigned int vertex_number = 0;
    //         vertex_number<GeometryInfo<dim>::vertices_per_cell;
    //         ++vertex_number)
    //     {
    //       const auto vertex_point = cell->vertex(vertex_number);
    //       if (std::fabs(vertex_point[0]-0)<1e-10 
    //           &&
    //           (std::fabs(vertex_point[1]-0)<1e-10
    //           &&
    //            std::fabs(vertex_point[2]-0)<1e-10))
    //       {
            // types::global_dof_index dofs_index = cell->vertex_dof_index(vertex_number,1); // disp_y
            // constraints.add_line(dofs_index);
            // constraints.set_inhomogeneity(dofs_index,0.0);
            // dofs_index = cell->vertex_dof_index(vertex_number,2); // disp_z
            // constraints.add_line(dofs_index);
            // constraints.set_inhomogeneity(dofs_index,0.0);
            // pcout << "find point" << std::endl;
            // break;
    //       }
    //     }
    //   }
    // }                                             
    constraints.close();

    constraints_without_dirichlet.clear();
    constraints_without_dirichlet.reinit(locally_relevant_dofs);
    DoFTools::make_hanging_node_constraints(dof_handler, constraints_without_dirichlet);
    constraints_without_dirichlet.close();

    setup_time += time.wall_time();
    time_details << "Distribute DoFs & B.C.     (CPU/wall) " << time.cpu_time()
                 << "s/" << time.wall_time() << "s" << std::endl;
    time.restart();

    std::vector<const DoFHandler<dim> *> dof_handlers(2, &dof_handler);
    {
      std::vector<const AffineConstraints<double> *> constraint(2);
      constraint[0] = &constraints;
      constraint[1] = &constraints_without_dirichlet;
      typename MatrixFree<dim, double>::AdditionalData additional_data;
      additional_data.tasks_parallel_scheme =
        MatrixFree<dim, double>::AdditionalData::none;
      additional_data.mapping_update_flags =
        (update_gradients | update_JxW_values | update_quadrature_points);
      system_matrix_free->reinit (dof_handlers, constraint,
                                  QGauss<1>(fe.degree+1), additional_data);
      std::vector<unsigned int> selected_blocks(1, 0);
      system_matrix.initialize (system_matrix_free, selected_blocks);
      std::shared_ptr<MatrixFree<dim, double>> system_mf_storage(
        new MatrixFree<dim, double>());
      system_mf_storage->reinit(dof_handlers,
                                constraint,
                                QGauss<1>(fe.degree + 1),
                                additional_data);
      system_matrix.initialize(system_mf_storage, selected_blocks);
    }

    // system_matrix.evaluate_coefficient(Coefficient<dim>());

    system_matrix.initialize_dof_vector(solution);
    system_matrix.initialize_dof_vector(solution_update);
    system_matrix.initialize_dof_vector(system_rhs);

    setup_time += time.wall_time();
    time_details << "Setup matrix-free system   (CPU/wall) " << time.cpu_time()
                 << "s/" << time.wall_time() << "s" << std::endl;
    time.restart();

    // Next, initialize the matrices for the multigrid method on all the
    // levels. The data structure MGConstrainedDoFs keeps information about
    // the indices subject to boundary conditions as well as the indices on
    // edges between different refinement levels as described in the step-16
    // tutorial program. We then go through the levels of the mesh and
    // construct the constraints and matrices on each level. These follow
    // closely the construction of the system matrix on the original mesh,
    // except the slight difference in naming when accessing information on
    // the levels rather than the active cells.
    const unsigned int nlevels = triangulation.n_global_levels();
    mg_matrices.resize(0, nlevels - 1);

    std::set<types::boundary_id> dirichlet_boundary;
    dirichlet_boundary.insert(0);
    mg_constrained_dofs.initialize(dof_handler);
    mg_constrained_dofs.make_zero_boundary_constraints(dof_handler,
                                                       dirichlet_boundary);

    for (unsigned int level = 0; level < nlevels; ++level)
      {
        IndexSet relevant_dofs;
        DoFTools::extract_locally_relevant_level_dofs(dof_handler,
                                                      level,
                                                      relevant_dofs);
        AffineConstraints<double> level_constraints;
        level_constraints.reinit(relevant_dofs);
        level_constraints.add_lines(
          mg_constrained_dofs.get_boundary_indices(level));
        level_constraints.close();
        std::vector<const AffineConstraints<double> *> constraint(2);
        AffineConstraints<double> dummy;
        dummy.close();
        constraint[0] = &level_constraints;
        constraint[1] = &dummy;

        typename MatrixFree<dim, float>::AdditionalData additional_data;
        additional_data.tasks_parallel_scheme =
          MatrixFree<dim, float>::AdditionalData::none;
        additional_data.mapping_update_flags =
          (update_gradients | update_JxW_values | update_quadrature_points);
        additional_data.level_mg_handler = level;
        std::shared_ptr<MatrixFree<dim, float>> mg_mf_storage_level(
          new MatrixFree<dim, float>());

        std::vector<unsigned int> selected_blocks(1, 0);
        mg_mf_storage_level->reinit(dof_handlers,
                                    constraint,
                                    QGauss<1>(fe.degree + 1),
                                    additional_data);

        mg_matrices[level].initialize(mg_mf_storage_level,
                                      mg_constrained_dofs,
                                      level,
                                      selected_blocks);
        // mg_matrices[level].evaluate_coefficient(Coefficient<dim>());
      }
    setup_time += time.wall_time();
    time_details << "Setup matrix-free levels   (CPU/wall) " << time.cpu_time()
                 << "s/" << time.wall_time() << "s" << std::endl;
  }

  template <int dim>
  void MechanicsProblem<dim>::assemble_rhs()
  {
    Timer time;
    // set inhomogeneous Dirichlet B. C. strategy
    // solution = 0;
    // constraints.distribute(solution);
    // interpolate_boundary_values();
    // solution.update_ghost_values();

    system_rhs = 0;

    FEEvaluation<dim,degree_finite_element,degree_finite_element+1,n_components> 
      phi(*system_matrix.get_matrix_free());

    for (unsigned int cell = 0;
         cell < system_matrix.get_matrix_free()->n_macro_cells();
         ++cell)
    {
      phi.reinit(cell);
      for (unsigned int q=0; q<phi.n_q_points; ++q)
      {
        phi.submit_value(Tensor<1,dim,VectorizedArray<double>>(), q);
      }
      phi.integrate(true, false);
      phi.distribute_local_to_global(system_rhs);
    }
    system_rhs.compress(VectorOperation::add);

    setup_time += time.wall_time();
    time_details << "Assemble right hand side   (CPU/wall) " << time.cpu_time()
                 << "s/" << time.wall_time() << "s" << std::endl;
  }

  template <int dim>
  void MechanicsProblem<dim>::interpolate_boundary_values()
  {
    FEValuesExtractors::Scalar disp_x(0);
    std::map<types::global_dof_index, double> boundary_values;
    VectorTools::interpolate_boundary_values(dof_handler, 20,
      Functions::ConstantFunction<dim>(0.1,dim), boundary_values,
      fe.component_mask(disp_x));
    for (typename std::map<types::global_dof_index, double>::iterator 
      it = boundary_values.begin(); it != boundary_values.end(); ++it)
      {
        if (solution.locally_owned_elements().is_element(it->first))
          solution(it->first) = it->second;
      }
    constraints_without_dirichlet.distribute(solution);
  }

  template <int dim>
  void MechanicsProblem<dim>::solve()
  {
    Timer                            time;
    MGTransferMatrixFree<dim, float> mg_transfer(mg_constrained_dofs);
    mg_transfer.build(dof_handler);
    setup_time += time.wall_time();
    time_details << "MG build transfer time     (CPU/wall) " << time.cpu_time()
                 << "s/" << time.wall_time() << "s\n";
    time.restart();

    // As a smoother, this tutorial program uses a Chebyshev iteration instead
    // of SOR in step-16. (SOR would be very difficult to implement because we
    // do not have the matrix elements available explicitly, and it is
    // difficult to make it work efficiently in %parallel.)  The smoother is
    // initialized with our level matrices and the mandatory additional data
    // for the Chebyshev smoother. We use a relatively high degree here (5),
    // since matrix-vector products are comparably cheap. We choose to smooth
    // out a range of $[1.2 \hat{\lambda}_{\max}/15,1.2 \hat{\lambda}_{\max}]$
    // in the smoother where $\hat{\lambda}_{\max}$ is an estimate of the
    // largest eigenvalue (the factor 1.2 is applied inside
    // PreconditionChebyshev). In order to compute that eigenvalue, the
    // Chebyshev initialization performs a few steps of a CG algorithm
    // without preconditioner. Since the highest eigenvalue is usually the
    // easiest one to find and a rough estimate is enough, we choose 10
    // iterations. Finally, we also set the inner preconditioner type in the
    // Chebyshev method which is a Jacobi iteration. This is represented by
    // the DiagonalMatrix class that gets the inverse diagonal entry provided
    // by our MechanicsOperator class.
    //
    // On level zero, we initialize the smoother differently because we want
    // to use the Chebyshev iteration as a solver. PreconditionChebyshev
    // allows the user to switch to solver mode where the number of iterations
    // is internally chosen to the correct value. In the additional data
    // object, this setting is activated by choosing the polynomial degree to
    // @p numbers::invalid_unsigned_int. The algorithm will then attack all
    // eigenvalues between the smallest and largest one in the coarse level
    // matrix. The number of steps in the Chebyshev smoother are chosen such
    // that the Chebyshev convergence estimates guarantee to reduce the
    // residual by the number specified in the variable @p
    // smoothing_range. Note that for solving, @p smoothing_range is a
    // relative tolerance and chosen smaller than one, in this case, we select
    // three orders of magnitude, whereas it is a number larger than 1 when
    // only selected eigenvalues are smoothed.
    //
    // From a computational point of view, the Chebyshev iteration is a very
    // attractive coarse grid solver as long as the coarse size is
    // moderate. This is because the Chebyshev method performs only
    // matrix-vector products and vector updates, which typically parallelize
    // better to the largest cluster size with more than a few tens of
    // thousands of cores than inner product involved in other iterative
    // methods. The former involves only local communication between neighbors
    // in the (coarse) mesh, whereas the latter requires global communication
    // over all processors.
    using SmootherType =
      PreconditionChebyshev<LevelMatrixType,
                            LinearAlgebra::distributed::Vector<float>>;
    mg::SmootherRelaxation<SmootherType,
                           LinearAlgebra::distributed::Vector<float>>
                                                         mg_smoother;
    MGLevelObject<typename SmootherType::AdditionalData> smoother_data;
    smoother_data.resize(0, triangulation.n_global_levels() - 1);
    for (unsigned int level = 0; level < triangulation.n_global_levels();
         ++level)
      {
        if (level > 0)
          {
            smoother_data[level].smoothing_range     = 15.;
            smoother_data[level].degree              = 5;
            smoother_data[level].eig_cg_n_iterations = 10;
          }
        else
          {
            smoother_data[0].smoothing_range = 1e-3;
            smoother_data[0].degree          = numbers::invalid_unsigned_int;
            smoother_data[0].eig_cg_n_iterations = mg_matrices[0].m();
          }
        mg_matrices[level].compute_diagonal();
        smoother_data[level].preconditioner =
          mg_matrices[level].get_matrix_diagonal_inverse();
      }
    mg_smoother.initialize(mg_matrices, smoother_data);

    MGCoarseGridApplySmoother<LinearAlgebra::distributed::Vector<float>>
      mg_coarse;
    mg_coarse.initialize(mg_smoother);

    // The next step is to set up the interface matrices that are needed for the
    // case with hanging nodes. The adaptive multigrid realization in deal.II
    // implements an approach called local smoothing. This means that the
    // smoothing on the finest level only covers the local part of the mesh
    // defined by the fixed (finest) grid level and ignores parts of the
    // computational domain where the terminal cells are coarser than this
    // level. As the method progresses to coarser levels, more and more of the
    // global mesh will be covered. At some coarser level, the whole mesh will
    // be covered. Since all level matrices in the multigrid method cover a
    // single level in the mesh, no hanging nodes appear on the level matrices.
    // At the interface between multigrid levels, homogeneous Dirichlet boundary
    // conditions are set while smoothing. When the residual is transferred to
    // the next coarser level, however, the coupling over the multigrid
    // interface needs to be taken into account. This is done by the so-called
    // interface (or edge) matrices that compute the part of the residual that
    // is missed by the level matrix with
    // homogeneous Dirichlet conditions. We refer to the @ref mg_paper
    // "Multigrid paper by Janssen and Kanschat" for more details.
    //
    // For the implementation of those interface matrices, there is already a
    // pre-defined class MatrixFreeOperators::MGInterfaceOperator that wraps
    // the routines MatrixFreeOperators::Base::vmult_interface_down() and
    // MatrixFreeOperators::Base::vmult_interface_up() in a new class with @p
    // vmult() and @p Tvmult() operations (that were originally written for
    // matrices, hence expecting those names). Note that vmult_interface_down
    // is used during the restriction phase of the multigrid V-cycle, whereas
    // vmult_interface_up is used during the prolongation phase.
    //
    // Once the interface matrix is created, we set up the remaining Multigrid
    // preconditioner infrastructure in complete analogy to step-16 to obtain
    // a @p preconditioner object that can be applied to a matrix.
    mg::Matrix<LinearAlgebra::distributed::Vector<float>> mg_matrix(
      mg_matrices);

    MGLevelObject<MatrixFreeOperators::MGInterfaceOperator<LevelMatrixType>>
      mg_interface_matrices;
    mg_interface_matrices.resize(0, triangulation.n_global_levels() - 1);
    for (unsigned int level = 0; level < triangulation.n_global_levels();
         ++level)
      mg_interface_matrices[level].initialize(mg_matrices[level]);
    mg::Matrix<LinearAlgebra::distributed::Vector<float>> mg_interface(
      mg_interface_matrices);

    Multigrid<LinearAlgebra::distributed::Vector<float>> mg(
      mg_matrix, mg_coarse, mg_transfer, mg_smoother, mg_smoother);
    mg.set_edge_matrices(mg_interface, mg_interface);

    PreconditionMG<dim,
                   LinearAlgebra::distributed::Vector<float>,
                   MGTransferMatrixFree<dim, float>>
      preconditioner(dof_handler, mg, mg_transfer);

    // The setup of the multigrid routines is quite easy and one cannot see
    // any difference in the solve process as compared to step-16. All the
    // magic is hidden behind the implementation of the MechanicsOperator::vmult
    // operation. Note that we print out the solve time and the accumulated
    // setup time through standard out, i.e., in any case, whereas detailed
    // times for the setup operations are only printed in case the flag for
    // detail_times in the constructor is changed.

    SolverControl solver_control(10*system_matrix.m(), 1e-10);
    SolverCG<LinearAlgebra::distributed::Vector<double>> cg(solver_control);
    setup_time += time.wall_time();
    time_details << "MG build smoother time     (CPU/wall) " << time.cpu_time()
                 << "s/" << time.wall_time() << "s\n";
    pcout << "Total setup time               (wall) " << setup_time << "s\n";
    LinearAlgebra::distributed::Vector<double> residual;
    system_matrix.initialize_dof_vector(residual);
    system_matrix.compute_residual(residual, solution);
    system_rhs += residual;

    time.reset();
    time.start();
    // solve solution_update in inhomogeneous Dirichlet B. C.
    // constraints.set_zero(solution);
    solution_update = 0;
    // constraints.set_zero(solution_update);
    // cg.solve(system_matrix, solution_update, system_rhs, preconditioner);
    cg.solve(system_matrix, solution_update, system_rhs, PreconditionIdentity());

    constraints.distribute(solution_update);
    solution += solution_update;

    pcout << "Time solve (" << solver_control.last_step() << " iterations)"
          << (solver_control.last_step() < 10 ? "  " : " ") << "(CPU/wall) "
          << time.cpu_time() << "s/" << time.wall_time() << "s\n";

    pcout << "rhs l2_norm: " << system_rhs.l2_norm() << '\n';
    pcout << "Initial error: " << solver_control.initial_value() << '\n';
    pcout << "Last error: " << solver_control.last_value() << '\n';
  }

  template <int dim>
  void MechanicsProblem<dim>::output_results(const unsigned int cycle) const
  {
    Timer time;
    if (triangulation.n_global_active_cells() > 1000000)
      return;

    DataOut<dim> data_out;

    solution.update_ghost_values();
    data_out.attach_dof_handler(dof_handler);
    data_out.add_data_vector(solution, "solution");
    data_out.build_patches();

    std::ofstream output(
      "solution-" + std::to_string(cycle) + "." +
      std::to_string(Utilities::MPI::this_mpi_process(MPI_COMM_WORLD)) +
      ".vtu");
    DataOutBase::VtkFlags flags;
    flags.compression_level = DataOutBase::VtkFlags::best_speed;
    data_out.set_flags(flags);
    data_out.write_vtu(output);

    if (Utilities::MPI::this_mpi_process(MPI_COMM_WORLD) == 0)
      {
        std::vector<std::string> filenames;
        for (unsigned int i = 0;
             i < Utilities::MPI::n_mpi_processes(MPI_COMM_WORLD);
             ++i)
          filenames.emplace_back("solution-" + std::to_string(cycle) + "." +
                                 std::to_string(i) + ".vtu");

        std::string master_name =
          "solution-" + Utilities::to_string(cycle) + ".pvtu";
        std::ofstream master_output(master_name);
        data_out.write_pvtu_record(master_output, filenames);
      }

    time_details << "Time write output          (CPU/wall) " << time.cpu_time()
                 << "s/" << time.wall_time() << "s\n";
  }

  template <int dim>
  void MechanicsProblem<dim>::run()
  {
    {
      const unsigned int n_vect_doubles =
        VectorizedArray<double>::n_array_elements;
      const unsigned int n_vect_bits = 8 * sizeof(double) * n_vect_doubles;

      pcout << "Vectorization over " << n_vect_doubles
            << " doubles = " << n_vect_bits << " bits ("
            << Utilities::System::get_current_vectorization_level()
            << "), VECTORIZATION_LEVEL=" << DEAL_II_COMPILER_VECTORIZATION_LEVEL
            << std::endl;
    }

    for (unsigned int cycle = 0; cycle < 9 - dim; ++cycle)
      {
        pcout << "Cycle " << cycle << std::endl;

        if (cycle == 0)
          {
            GridGenerator::hyper_cube(triangulation, 0., 1);
            for (typename Triangulation<dim>::active_cell_iterator
              cell = triangulation.begin_active();
              cell != triangulation.end();
              ++cell)
              {
                if (cell->is_locally_owned())
                  for (unsigned int face_number=0;
                    face_number < GeometryInfo<dim>::faces_per_cell;
                    ++face_number)
                    {
                      const auto center = cell->face(face_number)->center();
                      if (std::abs(center(0) - 0) < 1e-12)
                        cell->face(face_number)->set_boundary_id(10);
                      else if (std::abs(center(0)-1) < 1e-12)
                        cell->face(face_number)->set_boundary_id(20);
                    }
              }
            triangulation.refine_global(3 - dim);
          }
        triangulation.refine_global(1);
        setup_system();
        assemble_rhs();
        interpolate_boundary_values();
        solve();
        output_results(cycle);
        pcout << std::endl;
      };
  }
} // END namespace MyMechanics

int main(int argc, char *argv[])
{
  try
    {
      using namespace MyMechanics;

      Utilities::MPI::MPI_InitFinalize mpi_init(argc, argv, 1);

      std::cout << "Test." << std::endl;

      // const double C11(57.2), C12(36.1), C44(35.9);

      const double C11(2322.98), C12(409.938), C44(956.522);

      // const unsigned int dim = 3;

      // SymmetricTensor<4,dim> Cmatx = get_stress_strain_tensor<dim>(C11,C12,C44);

      // MechanicsOperator<dimension,2,double> 
      //   mf();

      MechanicsProblem<dimension> Mechanics_problem(C11,C12,C44);
      Mechanics_problem.run();

      // MechanicsProblem<dimension> mechanics_problem;
      // machanics_problem.run();
    }
  catch (std::exception &exc)
    {
      std::cerr << std::endl
                << std::endl
                << "----------------------------------------------------"
                << std::endl;
      std::cerr << "Exception on processing: " << std::endl
                << exc.what() << std::endl
                << "Aborting!" << std::endl
                << "----------------------------------------------------"
                << std::endl;
      return 1;
    }
  catch (...)
    {
      std::cerr << std::endl
                << std::endl
                << "----------------------------------------------------"
                << std::endl;
      std::cerr << "Unknown exception!" << std::endl
                << "Aborting!" << std::endl
                << "----------------------------------------------------"
                << std::endl;
      return 1;
    }

  return 0;
}

Reply via email to