Hi,
I finally managed to run ViennaCL with a Block-ILU0 pre-conditioner on the CPU
using OpenMP. I use the code attached to this email for a matrix that solves
the "heat equation" on a 2D square. What puzzles me is that :
- I get the best timing with nb_blocks = 1. I was expecting timing to go down
when the number of blocks increase because of room for parallelisation.
- During the construction of the ILU0 preconditionner I was expecting the CPU
to go to 100% with one block, 200% for 2 blocks, etc. I don’t get that. It
seems that the CPU goes up to 200% for a very short time and then goes back to
100%. This behavior does not depend upon nb_blocks. Is it expected?
François Fayard
Founder & Consultant - Inside Loop
Tel: +33 (0)6 01 44 06 93 <tel:+33%206%2001%2044%2006%2093>
Web: www.insideloop.io <http://www.insideloop.io/>
Twitter: @insideloop_io
#include <iostream>
#include <chrono>
#include <vector>
#include <map>
#define VIENNACL_WITH_OPENMP
#include <viennacl/scalar.hpp>
#include <viennacl/vector.hpp>
#include <viennacl/compressed_matrix.hpp>
#include <viennacl/linalg/bicgstab.hpp>
#include <viennacl/linalg/ilu.hpp>
int main(int argc, const char *argv[])
{
typedef double ScalarType;
auto size = std::vector<std::size_t>{10000000};
auto side = std::vector<std::size_t>(size.size());
const auto nb_blocks = std::size_t{1};
for (auto i = std::size_t{0}; i < size.size(); i++) {
side[i] = static_cast<std::size_t>(sqrt(size[i]));
}
for (auto n : side) {
auto matrix = std::vector<std::map<std::size_t, ScalarType>>(n * n);
for (auto i = std::size_t{0}; i < n; i++) {
for (auto j = std::size_t{0}; j < n; j++) {
auto left_border = bool{j == 0};
auto right_border = bool{j == n - 1};
auto upper_border = bool{i == 0};
auto lower_border = bool{i == n - 1};
auto nb_neighbors = std::size_t{1};
if (!upper_border) {
matrix[n * i + j][n * (i - 1) + j] = -1.0;
}
if (!left_border) {
matrix[n * i + j][n * i + (j - 1)] = -1.0;
}
matrix[n * i + j][n * i + j] = 5.0;
if (!right_border) {
matrix[n * i + j][n * i + (j + 1)] = -1.0;
}
if (!lower_border) {
matrix[n * i + j][n * (i + 1) + j] = -1.0;
}
}
}
auto vcl_matrix = viennacl::compressed_matrix<ScalarType>(n * n, n * n);
viennacl::copy(matrix, vcl_matrix);
auto vcl_rhs = viennacl::vector<ScalarType>(n * n);
for (std::size_t i = 0; i < n * n; i++) {
vcl_rhs[i] = 1.0;
}
std::cout << "Side: " << n << " " << n * n << std::endl;
auto custom_bicgstab = viennacl::linalg::bicgstab_tag{1.0e-4, 150};
std::cout << "Start solving " << std::endl;
auto start_time = std::chrono::high_resolution_clock::now();
std::cout << "Start Ilu0 " << std::endl;
auto vcl_block_ilu0 =
viennacl::linalg::block_ilu_precond<viennacl::compressed_matrix<ScalarType>,
viennacl::linalg::ilu0_tag>(vcl_matrix, viennacl::linalg::ilu0_tag(),
nb_blocks);
auto vcl_result = viennacl::vector<ScalarType>(n * n);
vcl_result = viennacl::linalg::solve(vcl_matrix, vcl_rhs, custom_bicgstab,
vcl_block_ilu0);
auto end_time = std::chrono::high_resolution_clock::now();
auto time = std::chrono::duration_cast<std::chrono::nanoseconds>(end_time -
start_time).count();
std::cout << "Time elapsed: " << time << std::endl;
std::cout << 0 << " " << vcl_result[0] << std::endl;
std::cout << n * n - 1 << " " << vcl_result[n * n - 1] << std::endl;
std::cout << "Nb of iterations: " << custom_bicgstab.iters() << std::endl;
std::cout << "Estimated erros: " << custom_bicgstab.error() << std::endl;
std::cout << std::endl;
}
return 0;
}------------------------------------------------------------------------------
_______________________________________________
ViennaCL-support mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/viennacl-support