Hi,

I finally managed to run ViennaCL with a Block-ILU0 pre-conditioner on the CPU 
using OpenMP. I use the code attached to this email for a matrix that solves 
the "heat equation" on a 2D square. What puzzles me is that : 

- I get the best timing with nb_blocks = 1. I was expecting timing to go down 
when the number of blocks increase because of room for parallelisation.
- During the construction of the ILU0 preconditionner I was expecting the CPU 
to go to 100% with one block, 200% for 2 blocks, etc. I don’t get that. It 
seems that the CPU goes up to 200% for a very short time and then goes back to 
100%. This behavior does not depend upon nb_blocks. Is it expected?

François Fayard
Founder & Consultant - Inside Loop
Tel: +33 (0)6 01 44 06 93 <tel:+33%206%2001%2044%2006%2093>
Web: www.insideloop.io <http://www.insideloop.io/>
Twitter: @insideloop_io

#include <iostream>
#include <chrono>
#include <vector>
#include <map>

#define VIENNACL_WITH_OPENMP
#include <viennacl/scalar.hpp>
#include <viennacl/vector.hpp>
#include <viennacl/compressed_matrix.hpp>
#include <viennacl/linalg/bicgstab.hpp>
#include <viennacl/linalg/ilu.hpp>


int main(int argc, const char *argv[])
{
  typedef double ScalarType;
  auto size = std::vector<std::size_t>{10000000};
  auto side = std::vector<std::size_t>(size.size());
  const auto nb_blocks = std::size_t{1};
  for (auto i = std::size_t{0}; i < size.size(); i++) {
    side[i] = static_cast<std::size_t>(sqrt(size[i]));
  }

  for (auto n : side) {
    auto matrix = std::vector<std::map<std::size_t, ScalarType>>(n * n);
    for (auto i = std::size_t{0}; i < n; i++) {
      for (auto j = std::size_t{0}; j < n; j++) {
        auto left_border = bool{j == 0};
        auto right_border = bool{j == n - 1};
        auto upper_border = bool{i == 0};
        auto lower_border = bool{i == n - 1};
        auto nb_neighbors = std::size_t{1};
        if (!upper_border) {
          matrix[n * i + j][n * (i - 1) + j] = -1.0;
        }
        if (!left_border) {
          matrix[n * i + j][n * i + (j - 1)] = -1.0;
        }
        matrix[n * i + j][n * i + j] = 5.0;
        if (!right_border) {
          matrix[n * i + j][n * i + (j + 1)] = -1.0;
        }
        if (!lower_border) {
          matrix[n * i + j][n * (i + 1) + j] = -1.0;
        }
      }
    }
    auto vcl_matrix = viennacl::compressed_matrix<ScalarType>(n * n, n * n);
    viennacl::copy(matrix, vcl_matrix);

    auto vcl_rhs = viennacl::vector<ScalarType>(n * n);
    for (std::size_t i = 0; i < n * n; i++) {
      vcl_rhs[i] = 1.0;
    }

    std::cout << "Side: " << n << " " << n * n << std::endl;

    auto custom_bicgstab = viennacl::linalg::bicgstab_tag{1.0e-4, 150};
    std::cout << "Start solving " << std::endl;
    auto start_time = std::chrono::high_resolution_clock::now();
    std::cout << "Start Ilu0 " << std::endl;
    auto vcl_block_ilu0 = 
viennacl::linalg::block_ilu_precond<viennacl::compressed_matrix<ScalarType>, 
viennacl::linalg::ilu0_tag>(vcl_matrix, viennacl::linalg::ilu0_tag(), 
nb_blocks);
    auto vcl_result = viennacl::vector<ScalarType>(n * n);

    vcl_result = viennacl::linalg::solve(vcl_matrix, vcl_rhs, custom_bicgstab, 
vcl_block_ilu0); 
    auto end_time = std::chrono::high_resolution_clock::now();
    auto time = std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - 
start_time).count();

    std::cout << "Time elapsed: " << time << std::endl;

    std::cout << 0 << " " << vcl_result[0] << std::endl;
    std::cout << n * n - 1 << " " << vcl_result[n * n - 1] << std::endl;
    std::cout << "Nb of iterations: " << custom_bicgstab.iters() << std::endl;
    std::cout << "Estimated erros: " << custom_bicgstab.error() << std::endl;
    std::cout << std::endl;
  }

  return 0;
}
------------------------------------------------------------------------------
_______________________________________________
ViennaCL-support mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/viennacl-support

Reply via email to