http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/common.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/common.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/common.hpp new file mode 100644 index 0000000..93b0cba --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/common.hpp @@ -0,0 +1,263 @@ +#ifndef VIENNACL_LINALG_DETAIL_ILU_COMMON_HPP_ +#define VIENNACL_LINALG_DETAIL_ILU_COMMON_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file viennacl/linalg/detail/ilu/common.hpp + @brief Common routines used within ILU-type preconditioners +*/ + +#include <vector> +#include <cmath> +#include <iostream> +#include <map> +#include <list> + +#include "viennacl/forwards.h" +#include "viennacl/tools/tools.hpp" +#include "viennacl/backend/memory.hpp" + +#include "viennacl/linalg/host_based/common.hpp" +#include "viennacl/linalg/misc_operations.hpp" + +namespace viennacl +{ +namespace linalg +{ +namespace detail +{ + + +// +// Level Scheduling Setup for ILU: +// + +template<typename NumericT, unsigned int AlignmentV> +void level_scheduling_setup_impl(viennacl::compressed_matrix<NumericT, AlignmentV> const & LU, + viennacl::vector<NumericT> const & diagonal_LU, + std::list<viennacl::backend::mem_handle> & row_index_arrays, + std::list<viennacl::backend::mem_handle> & row_buffers, + std::list<viennacl::backend::mem_handle> & col_buffers, + std::list<viennacl::backend::mem_handle> & element_buffers, + std::list<vcl_size_t> & row_elimination_num_list, + bool setup_U) +{ + NumericT const * diagonal_buf = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(diagonal_LU.handle()); + NumericT const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(LU.handle()); + unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(LU.handle1()); + unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(LU.handle2()); + + // + // Step 1: Determine row elimination order for each row and build up meta information about the number of entries taking part in each elimination step: + // + std::vector<vcl_size_t> row_elimination(LU.size1()); + std::map<vcl_size_t, std::map<vcl_size_t, vcl_size_t> > row_entries_per_elimination_step; + + vcl_size_t max_elimination_runs = 0; + for (vcl_size_t row2 = 0; row2 < LU.size1(); ++row2) + { + vcl_size_t row = setup_U ? (LU.size1() - row2) - 1 : row2; + + vcl_size_t row_begin = row_buffer[row]; + vcl_size_t row_end = row_buffer[row+1]; + vcl_size_t elimination_index = 0; //Note: first run corresponds to elimination_index = 1 (otherwise, type issues with int <-> unsigned int would arise + for (vcl_size_t i = row_begin; i < row_end; ++i) + { + unsigned int col = col_buffer[i]; + if ( (!setup_U && col < row) || (setup_U && col > row) ) + { + elimination_index = std::max<vcl_size_t>(elimination_index, row_elimination[col]); + row_entries_per_elimination_step[row_elimination[col]][row] += 1; + } + } + row_elimination[row] = elimination_index + 1; + max_elimination_runs = std::max<vcl_size_t>(max_elimination_runs, elimination_index + 1); + } + + //std::cout << "Number of elimination runs: " << max_elimination_runs << std::endl; + + // + // Step 2: Build row-major elimination matrix for each elimination step + // + + //std::cout << "Elimination order: " << std::endl; + //for (vcl_size_t i=0; i<row_elimination.size(); ++i) + // std::cout << row_elimination[i] << ", "; + //std::cout << std::endl; + + //vcl_size_t summed_rows = 0; + for (vcl_size_t elimination_run = 1; elimination_run <= max_elimination_runs; ++elimination_run) + { + std::map<vcl_size_t, vcl_size_t> const & current_elimination_info = row_entries_per_elimination_step[elimination_run]; + + // count cols and entries handled in this elimination step + vcl_size_t num_tainted_cols = current_elimination_info.size(); + vcl_size_t num_entries = 0; + + for (std::map<vcl_size_t, vcl_size_t>::const_iterator it = current_elimination_info.begin(); + it != current_elimination_info.end(); + ++it) + num_entries += it->second; + + //std::cout << "num_entries: " << num_entries << std::endl; + //std::cout << "num_tainted_cols: " << num_tainted_cols << std::endl; + + if (num_tainted_cols > 0) + { + row_index_arrays.push_back(viennacl::backend::mem_handle()); + viennacl::backend::switch_memory_context<unsigned int>(row_index_arrays.back(), viennacl::traits::context(LU)); + viennacl::backend::typesafe_host_array<unsigned int> elim_row_index_array(row_index_arrays.back(), num_tainted_cols); + + row_buffers.push_back(viennacl::backend::mem_handle()); + viennacl::backend::switch_memory_context<unsigned int>(row_buffers.back(), viennacl::traits::context(LU)); + viennacl::backend::typesafe_host_array<unsigned int> elim_row_buffer(row_buffers.back(), num_tainted_cols + 1); + + col_buffers.push_back(viennacl::backend::mem_handle()); + viennacl::backend::switch_memory_context<unsigned int>(col_buffers.back(), viennacl::traits::context(LU)); + viennacl::backend::typesafe_host_array<unsigned int> elim_col_buffer(col_buffers.back(), num_entries); + + element_buffers.push_back(viennacl::backend::mem_handle()); + viennacl::backend::switch_memory_context<NumericT>(element_buffers.back(), viennacl::traits::context(LU)); + std::vector<NumericT> elim_elements_buffer(num_entries); + + row_elimination_num_list.push_back(num_tainted_cols); + + vcl_size_t k=0; + vcl_size_t nnz_index = 0; + elim_row_buffer.set(0, 0); + + for (std::map<vcl_size_t, vcl_size_t>::const_iterator it = current_elimination_info.begin(); + it != current_elimination_info.end(); + ++it) + { + //vcl_size_t col = setup_U ? (elimination_matrix.size() - it->first) - 1 : col2; + vcl_size_t row = it->first; + elim_row_index_array.set(k, row); + + vcl_size_t row_begin = row_buffer[row]; + vcl_size_t row_end = row_buffer[row+1]; + for (vcl_size_t i = row_begin; i < row_end; ++i) + { + unsigned int col = col_buffer[i]; + if ( (!setup_U && col < row) || (setup_U && col > row) ) //entry of L/U + { + if (row_elimination[col] == elimination_run) // this entry is substituted in this run + { + elim_col_buffer.set(nnz_index, col); + elim_elements_buffer[nnz_index] = setup_U ? elements[i] / diagonal_buf[it->first] : elements[i]; + ++nnz_index; + } + } + } + + elim_row_buffer.set(++k, nnz_index); + } + + // + // Wrap in memory_handles: + // + viennacl::backend::memory_create(row_index_arrays.back(), elim_row_index_array.raw_size(), viennacl::traits::context(row_index_arrays.back()), elim_row_index_array.get()); + viennacl::backend::memory_create(row_buffers.back(), elim_row_buffer.raw_size(), viennacl::traits::context(row_buffers.back()), elim_row_buffer.get()); + viennacl::backend::memory_create(col_buffers.back(), elim_col_buffer.raw_size(), viennacl::traits::context(col_buffers.back()), elim_col_buffer.get()); + viennacl::backend::memory_create(element_buffers.back(), sizeof(NumericT) * elim_elements_buffer.size(), viennacl::traits::context(element_buffers.back()), &(elim_elements_buffer[0])); + } + + // Print some info: + //std::cout << "Eliminated columns in run " << elimination_run << ": " << num_tainted_cols << " (tainted columns: " << num_tainted_cols << ")" << std::endl; + //summed_rows += eliminated_rows_in_run; + //if (eliminated_rows_in_run == 0) + // break; + } + //std::cout << "Eliminated rows: " << summed_rows << " out of " << row_elimination.size() << std::endl; +} + + +template<typename NumericT, unsigned int AlignmentV> +void level_scheduling_setup_L(viennacl::compressed_matrix<NumericT, AlignmentV> const & LU, + viennacl::vector<NumericT> const & diagonal_LU, + std::list<viennacl::backend::mem_handle> & row_index_arrays, + std::list<viennacl::backend::mem_handle> & row_buffers, + std::list<viennacl::backend::mem_handle> & col_buffers, + std::list<viennacl::backend::mem_handle> & element_buffers, + std::list<vcl_size_t> & row_elimination_num_list) +{ + level_scheduling_setup_impl(LU, diagonal_LU, row_index_arrays, row_buffers, col_buffers, element_buffers, row_elimination_num_list, false); +} + + +// +// Multifrontal setup of U: +// + +template<typename NumericT, unsigned int AlignmentV> +void level_scheduling_setup_U(viennacl::compressed_matrix<NumericT, AlignmentV> const & LU, + viennacl::vector<NumericT> const & diagonal_LU, + std::list<viennacl::backend::mem_handle> & row_index_arrays, + std::list<viennacl::backend::mem_handle> & row_buffers, + std::list<viennacl::backend::mem_handle> & col_buffers, + std::list<viennacl::backend::mem_handle> & element_buffers, + std::list<vcl_size_t> & row_elimination_num_list) +{ + level_scheduling_setup_impl(LU, diagonal_LU, row_index_arrays, row_buffers, col_buffers, element_buffers, row_elimination_num_list, true); +} + + +// +// Multifrontal substitution (both L and U). Will partly be moved to single_threaded/opencl/cuda implementations +// +template<typename NumericT> +void level_scheduling_substitute(viennacl::vector<NumericT> & vec, + std::list<viennacl::backend::mem_handle> const & row_index_arrays, + std::list<viennacl::backend::mem_handle> const & row_buffers, + std::list<viennacl::backend::mem_handle> const & col_buffers, + std::list<viennacl::backend::mem_handle> const & element_buffers, + std::list<vcl_size_t> const & row_elimination_num_list) +{ + typedef typename std::list< viennacl::backend::mem_handle >::const_iterator ListIterator; + ListIterator row_index_array_it = row_index_arrays.begin(); + ListIterator row_buffers_it = row_buffers.begin(); + ListIterator col_buffers_it = col_buffers.begin(); + ListIterator element_buffers_it = element_buffers.begin(); + typename std::list< vcl_size_t>::const_iterator row_elimination_num_it = row_elimination_num_list.begin(); + for (vcl_size_t i=0; i<row_index_arrays.size(); ++i) + { + viennacl::linalg::detail::level_scheduling_substitute(vec, *row_index_array_it, *row_buffers_it, *col_buffers_it, *element_buffers_it, *row_elimination_num_it); + + ++row_index_array_it; + ++row_buffers_it; + ++col_buffers_it; + ++element_buffers_it; + ++row_elimination_num_it; + } +} + + + + + +} // namespace detail +} // namespace linalg +} // namespace viennacl + + + + +#endif + + +
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilu0.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilu0.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilu0.hpp new file mode 100644 index 0000000..1c3191a --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilu0.hpp @@ -0,0 +1,379 @@ +#ifndef VIENNACL_LINALG_DETAIL_ILU0_HPP_ +#define VIENNACL_LINALG_DETAIL_ILU0_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file viennacl/linalg/detail/ilu/ilu0.hpp + @brief Implementations of incomplete factorization preconditioners with static nonzero pattern. + + Contributed by Evan Bollig. + + ILU0 (Incomplete LU with zero fill-in) + - All preconditioner nonzeros exist at locations that were nonzero in the input matrix. + - The number of nonzeros in the output preconditioner are exactly the same number as the input matrix + + Evan Bollig 3/30/12 + + Adapted from viennacl/linalg/detail/ilut.hpp + + Low-level reimplementation by Karl Rupp in Nov 2012, increasing performance substantially. Also added level-scheduling. + +*/ + +#include <vector> +#include <cmath> +#include <iostream> +#include "viennacl/forwards.h" +#include "viennacl/tools/tools.hpp" +#include "viennacl/linalg/detail/ilu/common.hpp" +#include "viennacl/compressed_matrix.hpp" +#include "viennacl/backend/memory.hpp" + +#include "viennacl/linalg/host_based/common.hpp" + +#include <map> + +namespace viennacl +{ +namespace linalg +{ + +/** @brief A tag for incomplete LU factorization with static pattern (ILU0) +*/ +class ilu0_tag +{ +public: + ilu0_tag(bool with_level_scheduling = false) : use_level_scheduling_(with_level_scheduling) {} + + bool use_level_scheduling() const { return use_level_scheduling_; } + void use_level_scheduling(bool b) { use_level_scheduling_ = b; } + +private: + bool use_level_scheduling_; +}; + + +/** @brief Implementation of a ILU-preconditioner with static pattern. Optimized version for CSR matrices. + * + * refer to the Algorithm in Saad's book (1996 edition) + * + * @param A The sparse matrix matrix. The result is directly written to A. + */ +template<typename NumericT> +void precondition(viennacl::compressed_matrix<NumericT> & A, ilu0_tag const & /* tag */) +{ + assert( (A.handle1().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") ); + assert( (A.handle2().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") ); + assert( (A.handle().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") ); + + NumericT * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(A.handle()); + unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle1()); + unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle2()); + + // Note: Line numbers in the following refer to the algorithm in Saad's book + + for (vcl_size_t i=1; i<A.size1(); ++i) // Line 1 + { + unsigned int row_i_begin = row_buffer[i]; + unsigned int row_i_end = row_buffer[i+1]; + for (unsigned int buf_index_k = row_i_begin; buf_index_k < row_i_end; ++buf_index_k) //Note: We do not assume that the column indices within a row are sorted + { + unsigned int k = col_buffer[buf_index_k]; + if (k >= i) + continue; //Note: We do not assume that the column indices within a row are sorted + + unsigned int row_k_begin = row_buffer[k]; + unsigned int row_k_end = row_buffer[k+1]; + + // get a_kk: + NumericT a_kk = 0; + for (unsigned int buf_index_akk = row_k_begin; buf_index_akk < row_k_end; ++buf_index_akk) + { + if (col_buffer[buf_index_akk] == k) + { + a_kk = elements[buf_index_akk]; + break; + } + } + + NumericT & a_ik = elements[buf_index_k]; + a_ik /= a_kk; //Line 3 + + for (unsigned int buf_index_j = row_i_begin; buf_index_j < row_i_end; ++buf_index_j) //Note: We do not assume that the column indices within a row are sorted + { + unsigned int j = col_buffer[buf_index_j]; + if (j <= k) + continue; + + // determine a_kj: + NumericT a_kj = 0; + for (unsigned int buf_index_akj = row_k_begin; buf_index_akj < row_k_end; ++buf_index_akj) + { + if (col_buffer[buf_index_akj] == j) + { + a_kj = elements[buf_index_akj]; + break; + } + } + + //a_ij -= a_ik * a_kj + elements[buf_index_j] -= a_ik * a_kj; //Line 5 + } + } + } + +} + + +/** @brief ILU0 preconditioner class, can be supplied to solve()-routines +*/ +template<typename MatrixT> +class ilu0_precond +{ + typedef typename MatrixT::value_type NumericType; + +public: + ilu0_precond(MatrixT const & mat, ilu0_tag const & tag) : tag_(tag), LU_() + { + //initialize preconditioner: + //std::cout << "Start CPU precond" << std::endl; + init(mat); + //std::cout << "End CPU precond" << std::endl; + } + + template<typename VectorT> + void apply(VectorT & vec) const + { + unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(LU_.handle1()); + unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(LU_.handle2()); + NumericType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericType>(LU_.handle()); + + viennacl::linalg::host_based::detail::csr_inplace_solve<NumericType>(row_buffer, col_buffer, elements, vec, LU_.size2(), unit_lower_tag()); + viennacl::linalg::host_based::detail::csr_inplace_solve<NumericType>(row_buffer, col_buffer, elements, vec, LU_.size2(), upper_tag()); + } + +private: + void init(MatrixT const & mat) + { + viennacl::context host_context(viennacl::MAIN_MEMORY); + viennacl::switch_memory_context(LU_, host_context); + + viennacl::copy(mat, LU_); + viennacl::linalg::precondition(LU_, tag_); + } + + ilu0_tag tag_; + viennacl::compressed_matrix<NumericType> LU_; +}; + + +/** @brief ILU0 preconditioner class, can be supplied to solve()-routines. +* +* Specialization for compressed_matrix +*/ +template<typename NumericT, unsigned int AlignmentV> +class ilu0_precond< viennacl::compressed_matrix<NumericT, AlignmentV> > +{ + typedef viennacl::compressed_matrix<NumericT, AlignmentV> MatrixType; + +public: + ilu0_precond(MatrixType const & mat, ilu0_tag const & tag) + : tag_(tag), + LU_(mat.size1(), mat.size2(), viennacl::traits::context(mat)) + { + //initialize preconditioner: + //std::cout << "Start GPU precond" << std::endl; + init(mat); + //std::cout << "End GPU precond" << std::endl; + } + + void apply(viennacl::vector<NumericT> & vec) const + { + viennacl::context host_context(viennacl::MAIN_MEMORY); + if (vec.handle().get_active_handle_id() != viennacl::MAIN_MEMORY) + { + if (tag_.use_level_scheduling()) + { + //std::cout << "Using multifrontal on GPU..." << std::endl; + detail::level_scheduling_substitute(vec, + multifrontal_L_row_index_arrays_, + multifrontal_L_row_buffers_, + multifrontal_L_col_buffers_, + multifrontal_L_element_buffers_, + multifrontal_L_row_elimination_num_list_); + + vec = viennacl::linalg::element_div(vec, multifrontal_U_diagonal_); + + detail::level_scheduling_substitute(vec, + multifrontal_U_row_index_arrays_, + multifrontal_U_row_buffers_, + multifrontal_U_col_buffers_, + multifrontal_U_element_buffers_, + multifrontal_U_row_elimination_num_list_); + } + else + { + viennacl::context old_context = viennacl::traits::context(vec); + viennacl::switch_memory_context(vec, host_context); + viennacl::linalg::inplace_solve(LU_, vec, unit_lower_tag()); + viennacl::linalg::inplace_solve(LU_, vec, upper_tag()); + viennacl::switch_memory_context(vec, old_context); + } + } + else //apply ILU0 directly on CPU + { + if (tag_.use_level_scheduling()) + { + //std::cout << "Using multifrontal..." << std::endl; + detail::level_scheduling_substitute(vec, + multifrontal_L_row_index_arrays_, + multifrontal_L_row_buffers_, + multifrontal_L_col_buffers_, + multifrontal_L_element_buffers_, + multifrontal_L_row_elimination_num_list_); + + vec = viennacl::linalg::element_div(vec, multifrontal_U_diagonal_); + + detail::level_scheduling_substitute(vec, + multifrontal_U_row_index_arrays_, + multifrontal_U_row_buffers_, + multifrontal_U_col_buffers_, + multifrontal_U_element_buffers_, + multifrontal_U_row_elimination_num_list_); + } + else + { + viennacl::linalg::inplace_solve(LU_, vec, unit_lower_tag()); + viennacl::linalg::inplace_solve(LU_, vec, upper_tag()); + } + } + } + + vcl_size_t levels() const { return multifrontal_L_row_index_arrays_.size(); } + +private: + void init(MatrixType const & mat) + { + viennacl::context host_context(viennacl::MAIN_MEMORY); + viennacl::switch_memory_context(LU_, host_context); + LU_ = mat; + viennacl::linalg::precondition(LU_, tag_); + + if (!tag_.use_level_scheduling()) + return; + + // multifrontal part: + viennacl::switch_memory_context(multifrontal_U_diagonal_, host_context); + multifrontal_U_diagonal_.resize(LU_.size1(), false); + host_based::detail::row_info(LU_, multifrontal_U_diagonal_, viennacl::linalg::detail::SPARSE_ROW_DIAGONAL); + + detail::level_scheduling_setup_L(LU_, + multifrontal_U_diagonal_, //dummy + multifrontal_L_row_index_arrays_, + multifrontal_L_row_buffers_, + multifrontal_L_col_buffers_, + multifrontal_L_element_buffers_, + multifrontal_L_row_elimination_num_list_); + + + detail::level_scheduling_setup_U(LU_, + multifrontal_U_diagonal_, + multifrontal_U_row_index_arrays_, + multifrontal_U_row_buffers_, + multifrontal_U_col_buffers_, + multifrontal_U_element_buffers_, + multifrontal_U_row_elimination_num_list_); + + // + // Bring to device if necessary: + // + + // L: + for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_row_index_arrays_.begin(); + it != multifrontal_L_row_index_arrays_.end(); + ++it) + viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat)); + + for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_row_buffers_.begin(); + it != multifrontal_L_row_buffers_.end(); + ++it) + viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat)); + + for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_col_buffers_.begin(); + it != multifrontal_L_col_buffers_.end(); + ++it) + viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat)); + + for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_element_buffers_.begin(); + it != multifrontal_L_element_buffers_.end(); + ++it) + viennacl::backend::switch_memory_context<NumericT>(*it, viennacl::traits::context(mat)); + + + // U: + + viennacl::switch_memory_context(multifrontal_U_diagonal_, viennacl::traits::context(mat)); + + for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_row_index_arrays_.begin(); + it != multifrontal_U_row_index_arrays_.end(); + ++it) + viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat)); + + for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_row_buffers_.begin(); + it != multifrontal_U_row_buffers_.end(); + ++it) + viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat)); + + for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_col_buffers_.begin(); + it != multifrontal_U_col_buffers_.end(); + ++it) + viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat)); + + for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_element_buffers_.begin(); + it != multifrontal_U_element_buffers_.end(); + ++it) + viennacl::backend::switch_memory_context<NumericT>(*it, viennacl::traits::context(mat)); + + } + + ilu0_tag tag_; + viennacl::compressed_matrix<NumericT> LU_; + + std::list<viennacl::backend::mem_handle> multifrontal_L_row_index_arrays_; + std::list<viennacl::backend::mem_handle> multifrontal_L_row_buffers_; + std::list<viennacl::backend::mem_handle> multifrontal_L_col_buffers_; + std::list<viennacl::backend::mem_handle> multifrontal_L_element_buffers_; + std::list<vcl_size_t> multifrontal_L_row_elimination_num_list_; + + viennacl::vector<NumericT> multifrontal_U_diagonal_; + std::list<viennacl::backend::mem_handle> multifrontal_U_row_index_arrays_; + std::list<viennacl::backend::mem_handle> multifrontal_U_row_buffers_; + std::list<viennacl::backend::mem_handle> multifrontal_U_col_buffers_; + std::list<viennacl::backend::mem_handle> multifrontal_U_element_buffers_; + std::list<vcl_size_t> multifrontal_U_row_elimination_num_list_; + +}; + +} // namespace linalg +} // namespace viennacl + + +#endif + + + http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilut.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilut.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilut.hpp new file mode 100644 index 0000000..11ab842 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilut.hpp @@ -0,0 +1,597 @@ +#ifndef VIENNACL_LINALG_DETAIL_ILUT_HPP_ +#define VIENNACL_LINALG_DETAIL_ILUT_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file viennacl/linalg/detail/ilu/ilut.hpp + @brief Implementations of an incomplete factorization preconditioner with threshold (ILUT) +*/ + +#include <vector> +#include <cmath> +#include <iostream> +#include "viennacl/forwards.h" +#include "viennacl/tools/tools.hpp" + +#include "viennacl/linalg/detail/ilu/common.hpp" +#include "viennacl/compressed_matrix.hpp" + +#include "viennacl/linalg/host_based/common.hpp" + +#include <map> + +namespace viennacl +{ +namespace linalg +{ + +/** @brief A tag for incomplete LU factorization with threshold (ILUT) +*/ +class ilut_tag +{ + public: + /** @brief The constructor. + * + * @param entries_per_row Number of nonzero entries per row in L and U. Note that L and U are stored in a single matrix, thus there are 2*entries_per_row in total. + * @param drop_tolerance The drop tolerance for ILUT + * @param with_level_scheduling Flag for enabling level scheduling on GPUs. + */ + ilut_tag(unsigned int entries_per_row = 20, + double drop_tolerance = 1e-4, + bool with_level_scheduling = false) + : entries_per_row_(entries_per_row), + drop_tolerance_(drop_tolerance), + use_level_scheduling_(with_level_scheduling) {} + + void set_drop_tolerance(double tol) + { + if (tol > 0) + drop_tolerance_ = tol; + } + double get_drop_tolerance() const { return drop_tolerance_; } + + void set_entries_per_row(unsigned int e) + { + if (e > 0) + entries_per_row_ = e; + } + + unsigned int get_entries_per_row() const { return entries_per_row_; } + + bool use_level_scheduling() const { return use_level_scheduling_; } + void use_level_scheduling(bool b) { use_level_scheduling_ = b; } + + private: + unsigned int entries_per_row_; + double drop_tolerance_; + bool use_level_scheduling_; +}; + + +namespace detail +{ + /** @brief Helper struct for holding a sparse vector in linear memory. For internal use only. + * + * Unfortunately, the 'naive' implementation using a std::map<> is almost always too slow. + * + */ + template<typename NumericT> + struct ilut_sparse_vector + { + ilut_sparse_vector(vcl_size_t alloc_size = 0) : size_(0), col_indices_(alloc_size), elements_(alloc_size) {} + + void resize_if_bigger(vcl_size_t s) + { + if (s > elements_.size()) + { + col_indices_.resize(s); + elements_.resize(s); + } + size_ = s; + } + + vcl_size_t size_; + std::vector<unsigned int> col_indices_; + std::vector<NumericT> elements_; + }; + + /** @brief Subtracts a scaled sparse vector u from a sparse vector w and writes the output to z: z = w - alpha * u + * + * Sparsity pattern of u and w are usually different. + * + * @return Length of new vector + */ + template<typename IndexT, typename NumericT> + IndexT merge_subtract_sparse_rows(IndexT const * w_coords, NumericT const * w_elements, IndexT w_size, + IndexT const * u_coords, NumericT const * u_elements, IndexT u_size, NumericT alpha, + IndexT * z_coords, NumericT * z_elements) + { + IndexT index_w = 0; + IndexT index_u = 0; + IndexT index_z = 0; + + while (1) + { + if (index_w < w_size && index_u < u_size) + { + if (w_coords[index_w] < u_coords[index_u]) + { + z_coords[index_z] = w_coords[index_w]; + z_elements[index_z++] = w_elements[index_w++]; + } + else if (w_coords[index_w] == u_coords[index_u]) + { + z_coords[index_z] = w_coords[index_w]; + z_elements[index_z++] = w_elements[index_w++] - alpha * u_elements[index_u++]; + } + else + { + z_coords[index_z] = u_coords[index_u]; + z_elements[index_z++] = - alpha * u_elements[index_u++]; + } + } + else if (index_w == w_size && index_u < u_size) + { + z_coords[index_z] = u_coords[index_u]; + z_elements[index_z++] = - alpha * u_elements[index_u++]; + } + else if (index_w < w_size && index_u == u_size) + { + z_coords[index_z] = w_coords[index_w]; + z_elements[index_z++] = w_elements[index_w++]; + } + else + return index_z; + } + } + + template<typename SizeT, typename NumericT> + void insert_with_value_sort(std::vector<std::pair<SizeT, NumericT> > & map, + SizeT index, NumericT value) + { + NumericT abs_value = std::fabs(value); + if (abs_value > 0) + { + // find first element with smaller absolute value: + std::size_t first_smaller_index = 0; + while (first_smaller_index < map.size() && std::fabs(map[first_smaller_index].second) > abs_value) + ++first_smaller_index; + + std::pair<SizeT, NumericT> tmp(index, value); + for (std::size_t j=first_smaller_index; j<map.size(); ++j) + std::swap(map[j], tmp); + } + } + +} + +/** @brief Implementation of a ILU-preconditioner with threshold. Optimized implementation for compressed_matrix. +* +* refer to Algorithm 10.6 by Saad's book (1996 edition) +* +* @param A The input matrix. Either a compressed_matrix or of type std::vector< std::map<T, U> > +* @param L The output matrix for L. +* @param U The output matrix for U. +* @param tag An ilut_tag in order to dispatch among several other preconditioners. +*/ +template<typename NumericT> +void precondition(viennacl::compressed_matrix<NumericT> const & A, + viennacl::compressed_matrix<NumericT> & L, + viennacl::compressed_matrix<NumericT> & U, + ilut_tag const & tag) +{ + assert(A.size1() == L.size1() && bool("Output matrix size mismatch") ); + assert(A.size1() == U.size1() && bool("Output matrix size mismatch") ); + + L.reserve( tag.get_entries_per_row() * A.size1()); + U.reserve((tag.get_entries_per_row() + 1) * A.size1()); + + vcl_size_t avg_nnz_per_row = static_cast<vcl_size_t>(A.nnz() / A.size1()); + detail::ilut_sparse_vector<NumericT> w1(tag.get_entries_per_row() * (avg_nnz_per_row + 10)); + detail::ilut_sparse_vector<NumericT> w2(tag.get_entries_per_row() * (avg_nnz_per_row + 10)); + detail::ilut_sparse_vector<NumericT> * w_in = &w1; + detail::ilut_sparse_vector<NumericT> * w_out = &w2; + std::vector<NumericT> diagonal_U(A.size1()); + + NumericT const * elements_A = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(A.handle()); + unsigned int const * row_buffer_A = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle1()); + unsigned int const * col_buffer_A = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle2()); + + NumericT * elements_L = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(L.handle()); + unsigned int * row_buffer_L = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.handle1()); row_buffer_L[0] = 0; + unsigned int * col_buffer_L = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.handle2()); + + NumericT * elements_U = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(U.handle()); + unsigned int * row_buffer_U = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.handle1()); row_buffer_U[0] = 0; + unsigned int * col_buffer_U = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.handle2()); + + std::vector<std::pair<unsigned int, NumericT> > sorted_entries_L(tag.get_entries_per_row()); + std::vector<std::pair<unsigned int, NumericT> > sorted_entries_U(tag.get_entries_per_row()); + + for (vcl_size_t i=0; i<viennacl::traits::size1(A); ++i) // Line 1 + { + std::fill(sorted_entries_L.begin(), sorted_entries_L.end(), std::pair<unsigned int, NumericT>(0, NumericT(0))); + std::fill(sorted_entries_U.begin(), sorted_entries_U.end(), std::pair<unsigned int, NumericT>(0, NumericT(0))); + + //line 2: set up w + w_in->resize_if_bigger(row_buffer_A[i+1] - row_buffer_A[i]); + NumericT row_norm = 0; + unsigned int k = 0; + for (unsigned int j = row_buffer_A[i]; j < row_buffer_A[i+1]; ++j, ++k) + { + w_in->col_indices_[k] = col_buffer_A[j]; + NumericT entry = elements_A[j]; + w_in->elements_[k] = entry; + row_norm += entry * entry; + } + row_norm = std::sqrt(row_norm); + NumericT tau_i = static_cast<NumericT>(tag.get_drop_tolerance()) * row_norm; + + //line 3: Iterate over lower diagonal parts of A: + k = 0; + unsigned int current_col = (row_buffer_A[i+1] > row_buffer_A[i]) ? w_in->col_indices_[k] : static_cast<unsigned int>(i); // mind empty rows here! + while (current_col < i) + { + //line 4: + NumericT a_kk = diagonal_U[current_col]; + + NumericT w_k_entry = w_in->elements_[k] / a_kk; + w_in->elements_[k] = w_k_entry; + + //lines 5,6: (dropping rule to w_k) + if ( std::fabs(w_k_entry) > tau_i) + { + //line 7: + unsigned int row_U_begin = row_buffer_U[current_col]; + unsigned int row_U_end = row_buffer_U[current_col + 1]; + + if (row_U_end > row_U_begin) + { + w_out->resize_if_bigger(w_in->size_ + (row_U_end - row_U_begin) - 1); + w_out->size_ = detail::merge_subtract_sparse_rows(&(w_in->col_indices_[0]), &(w_in->elements_[0]), static_cast<unsigned int>(w_in->size_), + col_buffer_U + row_U_begin + 1, elements_U + row_U_begin + 1, (row_U_end - row_U_begin) - 1, w_k_entry, + &(w_out->col_indices_[0]), &(w_out->elements_[0]) + ); + ++k; + } + } + else // drop element + { + w_out->resize_if_bigger(w_in->size_ - 1); + for (unsigned int r = 0; r < k; ++r) + { + w_out->col_indices_[r] = w_in->col_indices_[r]; + w_out->elements_[r] = w_in->elements_[r]; + } + for (unsigned int r = k+1; r < w_in->size_; ++r) + { + w_out->col_indices_[r-1] = w_in->col_indices_[r]; + w_out->elements_[r-1] = w_in->elements_[r]; + } + + // Note: No increment to k here, element was dropped! + } + + // swap pointers to w1 and w2 + std::swap(w_in, w_out); + + // process next entry: + current_col = (k < w_in->size_) ? w_in->col_indices_[k] : static_cast<unsigned int>(i); + } // while() + + // Line 10: Apply a dropping rule to w + // To do so, we write values to a temporary array + for (unsigned int r = 0; r < w_in->size_; ++r) + { + unsigned int col = w_in->col_indices_[r]; + NumericT value = w_in->elements_[r]; + + if (col < i) // entry for L: + detail::insert_with_value_sort(sorted_entries_L, col, value); + else if (col == i) // do not drop diagonal element + { + diagonal_U[i] = value; + if (value <= 0 && value >= 0) + { + std::cerr << "ViennaCL: FATAL ERROR in ILUT(): Diagonal entry computed to zero (" << value << ") in row " << i << "!" << std::endl; + throw zero_on_diagonal_exception("ILUT zero diagonal!"); + } + } + else // entry for U: + detail::insert_with_value_sort(sorted_entries_U, col, value); + } + + //Lines 10-12: Apply a dropping rule to w, write the largest p values to L and U + unsigned int offset_L = row_buffer_L[i]; + std::sort(sorted_entries_L.begin(), sorted_entries_L.end()); + for (unsigned int j=0; j<tag.get_entries_per_row(); ++j) + if (std::fabs(sorted_entries_L[j].second) > 0) + { + col_buffer_L[offset_L] = sorted_entries_L[j].first; + elements_L[offset_L] = sorted_entries_L[j].second; + ++offset_L; + } + row_buffer_L[i+1] = offset_L; + + unsigned int offset_U = row_buffer_U[i]; + col_buffer_U[offset_U] = static_cast<unsigned int>(i); + elements_U[offset_U] = diagonal_U[i]; + ++offset_U; + std::sort(sorted_entries_U.begin(), sorted_entries_U.end()); + for (unsigned int j=0; j<tag.get_entries_per_row(); ++j) + if (std::fabs(sorted_entries_U[j].second) > 0) + { + col_buffer_U[offset_U] = sorted_entries_U[j].first; + elements_U[offset_U] = sorted_entries_U[j].second; + ++offset_U; + } + row_buffer_U[i+1] = offset_U; + + } //for i +} + + +/** @brief ILUT preconditioner class, can be supplied to solve()-routines +*/ +template<typename MatrixT> +class ilut_precond +{ + typedef typename MatrixT::value_type NumericType; + +public: + ilut_precond(MatrixT const & mat, ilut_tag const & tag) : tag_(tag), L_(mat.size1(), mat.size2()), U_(mat.size1(), mat.size2()) + { + //initialize preconditioner: + //std::cout << "Start CPU precond" << std::endl; + init(mat); + //std::cout << "End CPU precond" << std::endl; + } + + template<typename VectorT> + void apply(VectorT & vec) const + { + //Note: Since vec can be a rather arbitrary vector type, we call the more generic version in the backend manually: + { + unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_.handle1()); + unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_.handle2()); + NumericType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericType>(L_.handle()); + + viennacl::linalg::host_based::detail::csr_inplace_solve<NumericType>(row_buffer, col_buffer, elements, vec, L_.size2(), unit_lower_tag()); + } + { + unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_.handle1()); + unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_.handle2()); + NumericType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericType>(U_.handle()); + + viennacl::linalg::host_based::detail::csr_inplace_solve<NumericType>(row_buffer, col_buffer, elements, vec, U_.size2(), upper_tag()); + } + } + +private: + void init(MatrixT const & mat) + { + viennacl::context host_context(viennacl::MAIN_MEMORY); + viennacl::compressed_matrix<NumericType> temp; + viennacl::switch_memory_context(temp, host_context); + viennacl::switch_memory_context(L_, host_context); + viennacl::switch_memory_context(U_, host_context); + + viennacl::copy(mat, temp); + + viennacl::linalg::precondition(temp, L_, U_, tag_); + } + + ilut_tag tag_; + viennacl::compressed_matrix<NumericType> L_; + viennacl::compressed_matrix<NumericType> U_; +}; + + +/** @brief ILUT preconditioner class, can be supplied to solve()-routines. +* +* Specialization for compressed_matrix +*/ +template<typename NumericT, unsigned int AlignmentV> +class ilut_precond< viennacl::compressed_matrix<NumericT, AlignmentV> > +{ +typedef viennacl::compressed_matrix<NumericT, AlignmentV> MatrixType; + +public: + ilut_precond(MatrixType const & mat, ilut_tag const & tag) + : tag_(tag), + L_(mat.size1(), mat.size2(), viennacl::traits::context(mat)), + U_(mat.size1(), mat.size2(), viennacl::traits::context(mat)) + { + //initialize preconditioner: + //std::cout << "Start GPU precond" << std::endl; + init(mat); + //std::cout << "End GPU precond" << std::endl; + } + + void apply(viennacl::vector<NumericT> & vec) const + { + if (vec.handle().get_active_handle_id() != viennacl::MAIN_MEMORY) + { + if (tag_.use_level_scheduling()) + { + //std::cout << "Using multifrontal on GPU..." << std::endl; + detail::level_scheduling_substitute(vec, + multifrontal_L_row_index_arrays_, + multifrontal_L_row_buffers_, + multifrontal_L_col_buffers_, + multifrontal_L_element_buffers_, + multifrontal_L_row_elimination_num_list_); + + vec = viennacl::linalg::element_div(vec, multifrontal_U_diagonal_); + + detail::level_scheduling_substitute(vec, + multifrontal_U_row_index_arrays_, + multifrontal_U_row_buffers_, + multifrontal_U_col_buffers_, + multifrontal_U_element_buffers_, + multifrontal_U_row_elimination_num_list_); + + } + else + { + viennacl::context host_context(viennacl::MAIN_MEMORY); + viennacl::context old_context = viennacl::traits::context(vec); + viennacl::switch_memory_context(vec, host_context); + viennacl::linalg::inplace_solve(L_, vec, unit_lower_tag()); + viennacl::linalg::inplace_solve(U_, vec, upper_tag()); + viennacl::switch_memory_context(vec, old_context); + } + } + else //apply ILUT directly: + { + viennacl::linalg::inplace_solve(L_, vec, unit_lower_tag()); + viennacl::linalg::inplace_solve(U_, vec, upper_tag()); + } + } + +private: + void init(MatrixType const & mat) + { + viennacl::context host_context(viennacl::MAIN_MEMORY); + viennacl::switch_memory_context(L_, host_context); + viennacl::switch_memory_context(U_, host_context); + + if (viennacl::traits::context(mat).memory_type() == viennacl::MAIN_MEMORY) + { + viennacl::linalg::precondition(mat, L_, U_, tag_); + } + else //we need to copy to CPU + { + viennacl::compressed_matrix<NumericT> cpu_mat(mat.size1(), mat.size2(), viennacl::traits::context(mat)); + viennacl::switch_memory_context(cpu_mat, host_context); + + cpu_mat = mat; + + viennacl::linalg::precondition(cpu_mat, L_, U_, tag_); + } + + if (!tag_.use_level_scheduling()) + return; + + // + // multifrontal part: + // + + viennacl::switch_memory_context(multifrontal_U_diagonal_, host_context); + multifrontal_U_diagonal_.resize(U_.size1(), false); + host_based::detail::row_info(U_, multifrontal_U_diagonal_, viennacl::linalg::detail::SPARSE_ROW_DIAGONAL); + + detail::level_scheduling_setup_L(L_, + multifrontal_U_diagonal_, //dummy + multifrontal_L_row_index_arrays_, + multifrontal_L_row_buffers_, + multifrontal_L_col_buffers_, + multifrontal_L_element_buffers_, + multifrontal_L_row_elimination_num_list_); + + + detail::level_scheduling_setup_U(U_, + multifrontal_U_diagonal_, + multifrontal_U_row_index_arrays_, + multifrontal_U_row_buffers_, + multifrontal_U_col_buffers_, + multifrontal_U_element_buffers_, + multifrontal_U_row_elimination_num_list_); + + // + // Bring to device if necessary: + // + + // L: + + for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_row_index_arrays_.begin(); + it != multifrontal_L_row_index_arrays_.end(); + ++it) + viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat)); + + for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_row_buffers_.begin(); + it != multifrontal_L_row_buffers_.end(); + ++it) + viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat)); + + for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_col_buffers_.begin(); + it != multifrontal_L_col_buffers_.end(); + ++it) + viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat)); + + for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_element_buffers_.begin(); + it != multifrontal_L_element_buffers_.end(); + ++it) + viennacl::backend::switch_memory_context<NumericT>(*it, viennacl::traits::context(mat)); + + + // U: + + viennacl::switch_memory_context(multifrontal_U_diagonal_, viennacl::traits::context(mat)); + + for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_row_index_arrays_.begin(); + it != multifrontal_U_row_index_arrays_.end(); + ++it) + viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat)); + + for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_row_buffers_.begin(); + it != multifrontal_U_row_buffers_.end(); + ++it) + viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat)); + + for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_col_buffers_.begin(); + it != multifrontal_U_col_buffers_.end(); + ++it) + viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat)); + + for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_element_buffers_.begin(); + it != multifrontal_U_element_buffers_.end(); + ++it) + viennacl::backend::switch_memory_context<NumericT>(*it, viennacl::traits::context(mat)); + + + } + + ilut_tag tag_; + viennacl::compressed_matrix<NumericT> L_; + viennacl::compressed_matrix<NumericT> U_; + + std::list<viennacl::backend::mem_handle> multifrontal_L_row_index_arrays_; + std::list<viennacl::backend::mem_handle> multifrontal_L_row_buffers_; + std::list<viennacl::backend::mem_handle> multifrontal_L_col_buffers_; + std::list<viennacl::backend::mem_handle> multifrontal_L_element_buffers_; + std::list<vcl_size_t > multifrontal_L_row_elimination_num_list_; + + viennacl::vector<NumericT> multifrontal_U_diagonal_; + std::list<viennacl::backend::mem_handle> multifrontal_U_row_index_arrays_; + std::list<viennacl::backend::mem_handle> multifrontal_U_row_buffers_; + std::list<viennacl::backend::mem_handle> multifrontal_U_col_buffers_; + std::list<viennacl::backend::mem_handle> multifrontal_U_element_buffers_; + std::list<vcl_size_t > multifrontal_U_row_elimination_num_list_; +}; + +} // namespace linalg +} // namespace viennacl + + + + +#endif + + + http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_applier.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_applier.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_applier.hpp new file mode 100644 index 0000000..0e2abb0 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_applier.hpp @@ -0,0 +1,103 @@ +#ifndef VIENNACL_LINALG_DETAIL_OP_APPLIER_HPP +#define VIENNACL_LINALG_DETAIL_OP_APPLIER_HPP + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file viennacl/linalg/detail/op_applier.hpp + * + * @brief Defines the action of certain unary and binary operators and its arguments (for host execution). +*/ + +#include "viennacl/forwards.h" +#include <cmath> + +namespace viennacl +{ +namespace linalg +{ +namespace detail +{ + +/** @brief Worker class for decomposing expression templates. + * + * @tparam A Type to which is assigned to + * @tparam OP One out of {op_assign, op_inplace_add, op_inplace_sub} + @ @tparam T Right hand side of the assignment +*/ +template<typename OpT> +struct op_applier +{ + typedef typename OpT::ERROR_UNKNOWN_OP_TAG_PROVIDED error_type; +}; + +/** \cond */ +template<> +struct op_applier<op_element_binary<op_prod> > +{ + template<typename T> + static void apply(T & result, T const & x, T const & y) { result = x * y; } +}; + +template<> +struct op_applier<op_element_binary<op_div> > +{ + template<typename T> + static void apply(T & result, T const & x, T const & y) { result = x / y; } +}; + +template<> +struct op_applier<op_element_binary<op_pow> > +{ + template<typename T> + static void apply(T & result, T const & x, T const & y) { result = std::pow(x, y); } +}; + +#define VIENNACL_MAKE_UNARY_OP_APPLIER(funcname) \ +template<> \ +struct op_applier<op_element_unary<op_##funcname> > \ +{ \ + template<typename T> \ + static void apply(T & result, T const & x) { using namespace std; result = funcname(x); } \ +} + +VIENNACL_MAKE_UNARY_OP_APPLIER(abs); +VIENNACL_MAKE_UNARY_OP_APPLIER(acos); +VIENNACL_MAKE_UNARY_OP_APPLIER(asin); +VIENNACL_MAKE_UNARY_OP_APPLIER(atan); +VIENNACL_MAKE_UNARY_OP_APPLIER(ceil); +VIENNACL_MAKE_UNARY_OP_APPLIER(cos); +VIENNACL_MAKE_UNARY_OP_APPLIER(cosh); +VIENNACL_MAKE_UNARY_OP_APPLIER(exp); +VIENNACL_MAKE_UNARY_OP_APPLIER(fabs); +VIENNACL_MAKE_UNARY_OP_APPLIER(floor); +VIENNACL_MAKE_UNARY_OP_APPLIER(log); +VIENNACL_MAKE_UNARY_OP_APPLIER(log10); +VIENNACL_MAKE_UNARY_OP_APPLIER(sin); +VIENNACL_MAKE_UNARY_OP_APPLIER(sinh); +VIENNACL_MAKE_UNARY_OP_APPLIER(sqrt); +VIENNACL_MAKE_UNARY_OP_APPLIER(tan); +VIENNACL_MAKE_UNARY_OP_APPLIER(tanh); + +#undef VIENNACL_MAKE_UNARY_OP_APPLIER +/** \endcond */ + +} +} +} + +#endif // VIENNACL_LINALG_DETAIL_OP_EXECUTOR_HPP http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_executor.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_executor.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_executor.hpp new file mode 100644 index 0000000..bd49b3b --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_executor.hpp @@ -0,0 +1,86 @@ +#ifndef VIENNACL_LINALG_DETAIL_OP_EXECUTOR_HPP +#define VIENNACL_LINALG_DETAIL_OP_EXECUTOR_HPP + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file viennacl/linalg/detail/op_executor.hpp + * + * @brief Defines the worker class for decomposing an expression tree into small chunks, which can be processed by the predefined operations in ViennaCL. +*/ + +#include "viennacl/forwards.h" + +namespace viennacl +{ +namespace linalg +{ +namespace detail +{ + +template<typename NumericT, typename B> +bool op_aliasing(vector_base<NumericT> const & /*lhs*/, B const & /*b*/) +{ + return false; +} + +template<typename NumericT> +bool op_aliasing(vector_base<NumericT> const & lhs, vector_base<NumericT> const & b) +{ + return lhs.handle() == b.handle(); +} + +template<typename NumericT, typename LhsT, typename RhsT, typename OpT> +bool op_aliasing(vector_base<NumericT> const & lhs, vector_expression<const LhsT, const RhsT, OpT> const & rhs) +{ + return op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs()); +} + + +template<typename NumericT, typename B> +bool op_aliasing(matrix_base<NumericT> const & /*lhs*/, B const & /*b*/) +{ + return false; +} + +template<typename NumericT> +bool op_aliasing(matrix_base<NumericT> const & lhs, matrix_base<NumericT> const & b) +{ + return lhs.handle() == b.handle(); +} + +template<typename NumericT, typename LhsT, typename RhsT, typename OpT> +bool op_aliasing(matrix_base<NumericT> const & lhs, matrix_expression<const LhsT, const RhsT, OpT> const & rhs) +{ + return op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs()); +} + + +/** @brief Worker class for decomposing expression templates. + * + * @tparam A Type to which is assigned to + * @tparam OP One out of {op_assign, op_inplace_add, op_inplace_sub} + @ @tparam T Right hand side of the assignment +*/ +template<typename A, typename OP, typename T> +struct op_executor {}; + +} +} +} + +#endif // VIENNACL_LINALG_DETAIL_OP_EXECUTOR_HPP http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_matrix.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_matrix.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_matrix.hpp new file mode 100644 index 0000000..12ff77b --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_matrix.hpp @@ -0,0 +1,86 @@ +#ifndef VIENNACL_LINALG_DETAIL_SPAI_BLOCK_MATRIX_HPP +#define VIENNACL_LINALG_DETAIL_SPAI_BLOCK_MATRIX_HPP + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +#include <utility> +#include <iostream> +#include <fstream> +#include <string> +#include <algorithm> +#include <vector> +#include "viennacl/ocl/backend.hpp" +#include "viennacl/tools/tools.hpp" + +/** @file viennacl/linalg/detail/spai/block_matrix.hpp + @brief Implementation of a bunch of (small) matrices on GPU. Experimental. + + SPAI code contributed by Nikolay Lukash +*/ + +namespace viennacl +{ +namespace linalg +{ +namespace detail +{ +namespace spai +{ + +/** +* @brief Represents contigious matrices on GPU +*/ + +class block_matrix +{ +public: + + ////////// non-const + + /** @brief Returns a handle to the elements */ + viennacl::ocl::handle<cl_mem>& handle(){ return elements_; } + + /** @brief Returns a handle to the matrix dimensions */ + viennacl::ocl::handle<cl_mem>& handle1() { return matrix_dimensions_; } + + /** @brief Returns a handle to the start indices of matrix */ + viennacl::ocl::handle<cl_mem>& handle2() { return start_block_inds_; } + + ////////// const + + /** @brief Returns a handle to the const elements */ + const viennacl::ocl::handle<cl_mem>& handle() const { return elements_; } + + /** @brief Returns a handle to the const matrix dimensions */ + const viennacl::ocl::handle<cl_mem>& handle1() const { return matrix_dimensions_; } + + /** @brief Returns a handle to the const start indices of matrix */ + const viennacl::ocl::handle<cl_mem>& handle2() const { return start_block_inds_; } + +private: + viennacl::ocl::handle<cl_mem> elements_; + viennacl::ocl::handle<cl_mem> matrix_dimensions_; + viennacl::ocl::handle<cl_mem> start_block_inds_; +}; + + +} +} +} +} +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_vector.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_vector.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_vector.hpp new file mode 100644 index 0000000..eee6aef --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_vector.hpp @@ -0,0 +1,77 @@ +#ifndef VIENNACL_LINALG_DETAIL_SPAI_BLOCK_VECTOR_HPP +#define VIENNACL_LINALG_DETAIL_SPAI_BLOCK_VECTOR_HPP + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +#include <utility> +#include <iostream> +#include <fstream> +#include <string> +#include <algorithm> +#include <vector> +#include "viennacl/ocl/backend.hpp" +#include "viennacl/tools/tools.hpp" + +/** @file viennacl/linalg/detail/spai/block_vector.hpp + @brief Implementation of a bunch of vectors on GPU. Experimental. + + SPAI code contributed by Nikolay Lukash +*/ + +namespace viennacl +{ +namespace linalg +{ +namespace detail +{ +namespace spai +{ + +/** +* @brief Represents a contiguous vector on the GPU to represent a concatentation of small vectors +*/ +class block_vector +{ +public: + + ///////////// non-const + + /** @brief Return handle to the elements */ + viennacl::ocl::handle<cl_mem> & handle(){ return elements_; } + + /** @brief Return handle to start indices */ + viennacl::ocl::handle<cl_mem> & handle1() { return start_block_inds_; } + + ///////////// const + + /** @brief Return handle to the const elements */ + const viennacl::ocl::handle<cl_mem> & handle() const { return elements_; } + + /** @brief Return handle to const start indices */ + const viennacl::ocl::handle<cl_mem> & handle1() const { return start_block_inds_; } + +private: + viennacl::ocl::handle<cl_mem> elements_; + viennacl::ocl::handle<cl_mem> start_block_inds_; +}; + +} +} +} +} +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/fspai.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/fspai.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/fspai.hpp new file mode 100644 index 0000000..fab81d7 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/fspai.hpp @@ -0,0 +1,402 @@ +#ifndef VIENNACL_LINALG_DETAIL_SPAI_FSPAI_HPP +#define VIENNACL_LINALG_DETAIL_SPAI_FSPAI_HPP + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +#include <utility> +#include <iostream> +#include <fstream> +#include <string> +#include <algorithm> +#include <vector> +#include <math.h> +#include <map> + +//boost includes +#include "boost/numeric/ublas/vector.hpp" +#include "boost/numeric/ublas/matrix.hpp" +#include "boost/numeric/ublas/matrix_proxy.hpp" +#include "boost/numeric/ublas/vector_proxy.hpp" +#include "boost/numeric/ublas/storage.hpp" +#include "boost/numeric/ublas/io.hpp" +#include "boost/numeric/ublas/lu.hpp" +#include "boost/numeric/ublas/triangular.hpp" +#include "boost/numeric/ublas/matrix_expression.hpp" + +// ViennaCL includes +#include "viennacl/linalg/prod.hpp" +#include "viennacl/matrix.hpp" +#include "viennacl/compressed_matrix.hpp" +#include "viennacl/linalg/sparse_matrix_operations.hpp" +#include "viennacl/linalg/matrix_operations.hpp" +#include "viennacl/scalar.hpp" +#include "viennacl/linalg/cg.hpp" +#include "viennacl/linalg/inner_prod.hpp" +#include "viennacl/linalg/ilu.hpp" +//#include <omp.h> + +/** @file viennacl/linalg/detail/spai/fspai.hpp + @brief Implementation of FSPAI. Experimental. +*/ + +namespace viennacl +{ +namespace linalg +{ +namespace detail +{ +namespace spai +{ + +/** @brief A tag for FSPAI. Experimental. +* +* Contains values for the algorithm. +* Must be passed to spai_precond constructor +*/ +class fspai_tag +{ +public: + /** @brief Constructor + * + * @param residual_norm_threshold Calculate until the norm of the residual falls below this threshold + * @param iteration_limit maximum number of iterations + * @param is_static determines if static version of SPAI should be used + * @param is_right determines if left or right preconditioner should be used + */ + fspai_tag( + double residual_norm_threshold = 1e-3, + unsigned int iteration_limit = 5, + bool is_static = false, + bool is_right = false) + : residual_norm_threshold_(residual_norm_threshold), + iteration_limit_(iteration_limit), + is_static_(is_static), + is_right_(is_right) {} + + inline double getResidualNormThreshold() const { return residual_norm_threshold_; } + inline unsigned long getIterationLimit () const { return iteration_limit_; } + inline bool getIsStatic() const { return is_static_; } + inline bool getIsRight() const { return is_right_; } + inline void setResidualNormThreshold(double residual_norm_threshold) + { + if (residual_norm_threshold > 0) + residual_norm_threshold_ = residual_norm_threshold; + } + inline void setIterationLimit(unsigned long iteration_limit) + { + if (iteration_limit > 0) + iteration_limit_ = iteration_limit; + } + inline void setIsRight(bool is_right) { is_right_ = is_right; } + inline void setIsStatic(bool is_static) { is_static_ = is_static; } + +private: + double residual_norm_threshold_; + unsigned long iteration_limit_; + bool is_static_; + bool is_right_; +}; + + +// +// Helper: Store A in an STL container of type, exploiting symmetry +// Reason: ublas interface does not allow to iterate over nonzeros of a particular row without starting an iterator1 from the very beginning of the matrix... +// +template<typename MatrixT, typename NumericT> +void sym_sparse_matrix_to_stl(MatrixT const & A, std::vector<std::map<unsigned int, NumericT> > & STL_A) +{ + STL_A.resize(A.size1()); + for (typename MatrixT::const_iterator1 row_it = A.begin1(); + row_it != A.end1(); + ++row_it) + { + for (typename MatrixT::const_iterator2 col_it = row_it.begin(); + col_it != row_it.end(); + ++col_it) + { + if (col_it.index1() >= col_it.index2()) + STL_A[col_it.index1()][static_cast<unsigned int>(col_it.index2())] = *col_it; + else + break; //go to next row + } + } +} + + +// +// Generate index sets J_k, k=0,...,N-1 +// +template<typename MatrixT> +void generateJ(MatrixT const & A, std::vector<std::vector<vcl_size_t> > & J) +{ + for (typename MatrixT::const_iterator1 row_it = A.begin1(); + row_it != A.end1(); + ++row_it) + { + for (typename MatrixT::const_iterator2 col_it = row_it.begin(); + col_it != row_it.end(); + ++col_it) + { + if (col_it.index1() > col_it.index2()) //Matrix is symmetric, thus only work on lower triangular part + { + J[col_it.index2()].push_back(col_it.index1()); + J[col_it.index1()].push_back(col_it.index2()); + } + else + break; //go to next row + } + } +} + + +// +// Extracts the blocks A(\tilde{J}_k, \tilde{J}_k) from A +// Sets up y_k = A(\tilde{J}_k, k) for the inplace-solution after Cholesky-factoriation +// +template<typename NumericT, typename MatrixT, typename VectorT> +void fill_blocks(std::vector< std::map<unsigned int, NumericT> > & A, + std::vector<MatrixT> & blocks, + std::vector<std::vector<vcl_size_t> > const & J, + std::vector<VectorT> & Y) +{ + for (vcl_size_t k=0; k<A.size(); ++k) + { + std::vector<vcl_size_t> const & Jk = J[k]; + VectorT & yk = Y[k]; + MatrixT & block_k = blocks[k]; + + yk.resize(Jk.size()); + block_k.resize(Jk.size(), Jk.size()); + block_k.clear(); + + for (vcl_size_t i=0; i<Jk.size(); ++i) + { + vcl_size_t row_index = Jk[i]; + std::map<unsigned int, NumericT> & A_row = A[row_index]; + + //fill y_k: + yk[i] = A_row[static_cast<unsigned int>(k)]; + + for (vcl_size_t j=0; j<Jk.size(); ++j) + { + vcl_size_t col_index = Jk[j]; + if (col_index <= row_index && A_row.find(static_cast<unsigned int>(col_index)) != A_row.end()) //block is symmetric, thus store only lower triangular part + block_k(i, j) = A_row[static_cast<unsigned int>(col_index)]; + } + } + } +} + + +// +// Perform Cholesky factorization of A inplace. Cf. Schwarz: Numerische Mathematik, vol 5, p. 58 +// +template<typename MatrixT> +void cholesky_decompose(MatrixT & A) +{ + for (vcl_size_t k=0; k<A.size2(); ++k) + { + if (A(k,k) <= 0) + { + std::cout << "k: " << k << std::endl; + std::cout << "A(k,k): " << A(k,k) << std::endl; + } + + assert(A(k,k) > 0 && bool("Matrix not positive definite in Cholesky factorization.")); + + A(k,k) = std::sqrt(A(k,k)); + + for (vcl_size_t i=k+1; i<A.size1(); ++i) + { + A(i,k) /= A(k,k); + for (vcl_size_t j=k+1; j<=i; ++j) + A(i,j) -= A(i,k) * A(j,k); + } + } +} + + +// +// Compute x in Ax = b, where A is already Cholesky factored (A = L L^T) +// +template<typename MatrixT, typename VectorT> +void cholesky_solve(MatrixT const & L, VectorT & b) +{ + // inplace forward solve L x = b + for (vcl_size_t i=0; i<L.size1(); ++i) + { + for (vcl_size_t j=0; j<i; ++j) + b[i] -= L(i,j) * b[j]; + b[i] /= L(i,i); + } + + // inplace backward solve L^T x = b: + for (vcl_size_t i=L.size1()-1;; --i) + { + for (vcl_size_t k=i+1; k<L.size1(); ++k) + b[i] -= L(k,i) * b[k]; + b[i] /= L(i,i); + + if (i==0) //vcl_size_t might be unsigned, therefore manual check for equality with zero here + break; + } +} + + + +// +// Compute the Cholesky factor L from the sparse vectors y_k +// +template<typename MatrixT, typename VectorT> +void computeL(MatrixT const & A, + MatrixT & L, + MatrixT & L_trans, + std::vector<VectorT> & Y, + std::vector<std::vector<vcl_size_t> > & J) +{ + typedef typename VectorT::value_type NumericType; + typedef std::vector<std::map<unsigned int, NumericType> > STLSparseMatrixType; + + STLSparseMatrixType L_temp(A.size1()); + + for (vcl_size_t k=0; k<A.size1(); ++k) + { + std::vector<vcl_size_t> const & Jk = J[k]; + VectorT const & yk = Y[k]; + + //compute L(k,k): + NumericType Lkk = A(k,k); + for (vcl_size_t i=0; i<Jk.size(); ++i) + Lkk -= A(Jk[i],k) * yk[i]; + + Lkk = NumericType(1) / std::sqrt(Lkk); + L_temp[k][static_cast<unsigned int>(k)] = Lkk; + L_trans(k,k) = Lkk; + + //write lower diagonal entries: + for (vcl_size_t i=0; i<Jk.size(); ++i) + { + L_temp[Jk[i]][static_cast<unsigned int>(k)] = -Lkk * yk[i]; + L_trans(k, Jk[i]) = -Lkk * yk[i]; + } + } //for k + + + //build L from L_temp + for (vcl_size_t i=0; i<L_temp.size(); ++i) + for (typename std::map<unsigned int, NumericType>::const_iterator it = L_temp[i].begin(); + it != L_temp[i].end(); + ++it) + L(i, it->first) = it->second; +} + + +// +// Top level FSPAI function +// +template<typename MatrixT> +void computeFSPAI(MatrixT const & A, + MatrixT const & PatternA, + MatrixT & L, + MatrixT & L_trans, + fspai_tag) +{ + typedef typename MatrixT::value_type NumericT; + typedef boost::numeric::ublas::matrix<NumericT> DenseMatrixType; + typedef std::vector<std::map<unsigned int, NumericT> > SparseMatrixType; + + // + // preprocessing: Store A in a STL container: + // + //std::cout << "Transferring to STL container:" << std::endl; + std::vector<std::vector<NumericT> > y_k(A.size1()); + SparseMatrixType STL_A(A.size1()); + sym_sparse_matrix_to_stl(A, STL_A); + + + // + // Step 1: Generate pattern indices + // + //std::cout << "computeFSPAI(): Generating pattern..." << std::endl; + std::vector<std::vector<vcl_size_t> > J(A.size1()); + generateJ(PatternA, J); + + // + // Step 2: Set up matrix blocks + // + //std::cout << "computeFSPAI(): Setting up matrix blocks..." << std::endl; + std::vector<DenseMatrixType> subblocks_A(A.size1()); + fill_blocks(STL_A, subblocks_A, J, y_k); + STL_A.clear(); //not needed anymore + + // + // Step 3: Cholesky-factor blocks + // + //std::cout << "computeFSPAI(): Cholesky-factorization..." << std::endl; + for (vcl_size_t i=0; i<subblocks_A.size(); ++i) + { + //std::cout << "Block before: " << subblocks_A[i] << std::endl; + cholesky_decompose(subblocks_A[i]); + //std::cout << "Block after: " << subblocks_A[i] << std::endl; + } + + + /*vcl_size_t num_bytes = 0; + for (vcl_size_t i=0; i<subblocks_A.size(); ++i) + num_bytes += 8*subblocks_A[i].size1()*subblocks_A[i].size2();*/ + //std::cout << "Memory for FSPAI matrix: " << num_bytes / (1024.0 * 1024.0) << " MB" << std::endl; + + // + // Step 4: Solve for y_k + // + //std::cout << "computeFSPAI(): Cholesky-solve..." << std::endl; + for (vcl_size_t i=0; i<y_k.size(); ++i) + { + if (subblocks_A[i].size1() > 0) //block might be empty... + { + //y_k[i].resize(subblocks_A[i].size1()); + //std::cout << "y_k[" << i << "]: "; + //for (vcl_size_t j=0; j<y_k[i].size(); ++j) + // std::cout << y_k[i][j] << " "; + //std::cout << std::endl; + cholesky_solve(subblocks_A[i], y_k[i]); + } + } + + + // + // Step 5: Set up Cholesky factors L and L_trans + // + //std::cout << "computeFSPAI(): Computing L..." << std::endl; + L.resize(A.size1(), A.size2(), false); + L.reserve(A.nnz(), false); + L_trans.resize(A.size1(), A.size2(), false); + L_trans.reserve(A.nnz(), false); + computeL(A, L, L_trans, y_k, J); + + //std::cout << "L: " << L << std::endl; +} + + + +} +} +} +} + +#endif
