http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/common.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/common.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/common.hpp deleted file mode 100644 index 93b0cba..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/common.hpp +++ /dev/null @@ -1,263 +0,0 @@ -#ifndef VIENNACL_LINALG_DETAIL_ILU_COMMON_HPP_ -#define VIENNACL_LINALG_DETAIL_ILU_COMMON_HPP_ - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file viennacl/linalg/detail/ilu/common.hpp - @brief Common routines used within ILU-type preconditioners -*/ - -#include <vector> -#include <cmath> -#include <iostream> -#include <map> -#include <list> - -#include "viennacl/forwards.h" -#include "viennacl/tools/tools.hpp" -#include "viennacl/backend/memory.hpp" - -#include "viennacl/linalg/host_based/common.hpp" -#include "viennacl/linalg/misc_operations.hpp" - -namespace viennacl -{ -namespace linalg -{ -namespace detail -{ - - -// -// Level Scheduling Setup for ILU: -// - -template<typename NumericT, unsigned int AlignmentV> -void level_scheduling_setup_impl(viennacl::compressed_matrix<NumericT, AlignmentV> const & LU, - viennacl::vector<NumericT> const & diagonal_LU, - std::list<viennacl::backend::mem_handle> & row_index_arrays, - std::list<viennacl::backend::mem_handle> & row_buffers, - std::list<viennacl::backend::mem_handle> & col_buffers, - std::list<viennacl::backend::mem_handle> & element_buffers, - std::list<vcl_size_t> & row_elimination_num_list, - bool setup_U) -{ - NumericT const * diagonal_buf = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(diagonal_LU.handle()); - NumericT const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(LU.handle()); - unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(LU.handle1()); - unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(LU.handle2()); - - // - // Step 1: Determine row elimination order for each row and build up meta information about the number of entries taking part in each elimination step: - // - std::vector<vcl_size_t> row_elimination(LU.size1()); - std::map<vcl_size_t, std::map<vcl_size_t, vcl_size_t> > row_entries_per_elimination_step; - - vcl_size_t max_elimination_runs = 0; - for (vcl_size_t row2 = 0; row2 < LU.size1(); ++row2) - { - vcl_size_t row = setup_U ? (LU.size1() - row2) - 1 : row2; - - vcl_size_t row_begin = row_buffer[row]; - vcl_size_t row_end = row_buffer[row+1]; - vcl_size_t elimination_index = 0; //Note: first run corresponds to elimination_index = 1 (otherwise, type issues with int <-> unsigned int would arise - for (vcl_size_t i = row_begin; i < row_end; ++i) - { - unsigned int col = col_buffer[i]; - if ( (!setup_U && col < row) || (setup_U && col > row) ) - { - elimination_index = std::max<vcl_size_t>(elimination_index, row_elimination[col]); - row_entries_per_elimination_step[row_elimination[col]][row] += 1; - } - } - row_elimination[row] = elimination_index + 1; - max_elimination_runs = std::max<vcl_size_t>(max_elimination_runs, elimination_index + 1); - } - - //std::cout << "Number of elimination runs: " << max_elimination_runs << std::endl; - - // - // Step 2: Build row-major elimination matrix for each elimination step - // - - //std::cout << "Elimination order: " << std::endl; - //for (vcl_size_t i=0; i<row_elimination.size(); ++i) - // std::cout << row_elimination[i] << ", "; - //std::cout << std::endl; - - //vcl_size_t summed_rows = 0; - for (vcl_size_t elimination_run = 1; elimination_run <= max_elimination_runs; ++elimination_run) - { - std::map<vcl_size_t, vcl_size_t> const & current_elimination_info = row_entries_per_elimination_step[elimination_run]; - - // count cols and entries handled in this elimination step - vcl_size_t num_tainted_cols = current_elimination_info.size(); - vcl_size_t num_entries = 0; - - for (std::map<vcl_size_t, vcl_size_t>::const_iterator it = current_elimination_info.begin(); - it != current_elimination_info.end(); - ++it) - num_entries += it->second; - - //std::cout << "num_entries: " << num_entries << std::endl; - //std::cout << "num_tainted_cols: " << num_tainted_cols << std::endl; - - if (num_tainted_cols > 0) - { - row_index_arrays.push_back(viennacl::backend::mem_handle()); - viennacl::backend::switch_memory_context<unsigned int>(row_index_arrays.back(), viennacl::traits::context(LU)); - viennacl::backend::typesafe_host_array<unsigned int> elim_row_index_array(row_index_arrays.back(), num_tainted_cols); - - row_buffers.push_back(viennacl::backend::mem_handle()); - viennacl::backend::switch_memory_context<unsigned int>(row_buffers.back(), viennacl::traits::context(LU)); - viennacl::backend::typesafe_host_array<unsigned int> elim_row_buffer(row_buffers.back(), num_tainted_cols + 1); - - col_buffers.push_back(viennacl::backend::mem_handle()); - viennacl::backend::switch_memory_context<unsigned int>(col_buffers.back(), viennacl::traits::context(LU)); - viennacl::backend::typesafe_host_array<unsigned int> elim_col_buffer(col_buffers.back(), num_entries); - - element_buffers.push_back(viennacl::backend::mem_handle()); - viennacl::backend::switch_memory_context<NumericT>(element_buffers.back(), viennacl::traits::context(LU)); - std::vector<NumericT> elim_elements_buffer(num_entries); - - row_elimination_num_list.push_back(num_tainted_cols); - - vcl_size_t k=0; - vcl_size_t nnz_index = 0; - elim_row_buffer.set(0, 0); - - for (std::map<vcl_size_t, vcl_size_t>::const_iterator it = current_elimination_info.begin(); - it != current_elimination_info.end(); - ++it) - { - //vcl_size_t col = setup_U ? (elimination_matrix.size() - it->first) - 1 : col2; - vcl_size_t row = it->first; - elim_row_index_array.set(k, row); - - vcl_size_t row_begin = row_buffer[row]; - vcl_size_t row_end = row_buffer[row+1]; - for (vcl_size_t i = row_begin; i < row_end; ++i) - { - unsigned int col = col_buffer[i]; - if ( (!setup_U && col < row) || (setup_U && col > row) ) //entry of L/U - { - if (row_elimination[col] == elimination_run) // this entry is substituted in this run - { - elim_col_buffer.set(nnz_index, col); - elim_elements_buffer[nnz_index] = setup_U ? elements[i] / diagonal_buf[it->first] : elements[i]; - ++nnz_index; - } - } - } - - elim_row_buffer.set(++k, nnz_index); - } - - // - // Wrap in memory_handles: - // - viennacl::backend::memory_create(row_index_arrays.back(), elim_row_index_array.raw_size(), viennacl::traits::context(row_index_arrays.back()), elim_row_index_array.get()); - viennacl::backend::memory_create(row_buffers.back(), elim_row_buffer.raw_size(), viennacl::traits::context(row_buffers.back()), elim_row_buffer.get()); - viennacl::backend::memory_create(col_buffers.back(), elim_col_buffer.raw_size(), viennacl::traits::context(col_buffers.back()), elim_col_buffer.get()); - viennacl::backend::memory_create(element_buffers.back(), sizeof(NumericT) * elim_elements_buffer.size(), viennacl::traits::context(element_buffers.back()), &(elim_elements_buffer[0])); - } - - // Print some info: - //std::cout << "Eliminated columns in run " << elimination_run << ": " << num_tainted_cols << " (tainted columns: " << num_tainted_cols << ")" << std::endl; - //summed_rows += eliminated_rows_in_run; - //if (eliminated_rows_in_run == 0) - // break; - } - //std::cout << "Eliminated rows: " << summed_rows << " out of " << row_elimination.size() << std::endl; -} - - -template<typename NumericT, unsigned int AlignmentV> -void level_scheduling_setup_L(viennacl::compressed_matrix<NumericT, AlignmentV> const & LU, - viennacl::vector<NumericT> const & diagonal_LU, - std::list<viennacl::backend::mem_handle> & row_index_arrays, - std::list<viennacl::backend::mem_handle> & row_buffers, - std::list<viennacl::backend::mem_handle> & col_buffers, - std::list<viennacl::backend::mem_handle> & element_buffers, - std::list<vcl_size_t> & row_elimination_num_list) -{ - level_scheduling_setup_impl(LU, diagonal_LU, row_index_arrays, row_buffers, col_buffers, element_buffers, row_elimination_num_list, false); -} - - -// -// Multifrontal setup of U: -// - -template<typename NumericT, unsigned int AlignmentV> -void level_scheduling_setup_U(viennacl::compressed_matrix<NumericT, AlignmentV> const & LU, - viennacl::vector<NumericT> const & diagonal_LU, - std::list<viennacl::backend::mem_handle> & row_index_arrays, - std::list<viennacl::backend::mem_handle> & row_buffers, - std::list<viennacl::backend::mem_handle> & col_buffers, - std::list<viennacl::backend::mem_handle> & element_buffers, - std::list<vcl_size_t> & row_elimination_num_list) -{ - level_scheduling_setup_impl(LU, diagonal_LU, row_index_arrays, row_buffers, col_buffers, element_buffers, row_elimination_num_list, true); -} - - -// -// Multifrontal substitution (both L and U). Will partly be moved to single_threaded/opencl/cuda implementations -// -template<typename NumericT> -void level_scheduling_substitute(viennacl::vector<NumericT> & vec, - std::list<viennacl::backend::mem_handle> const & row_index_arrays, - std::list<viennacl::backend::mem_handle> const & row_buffers, - std::list<viennacl::backend::mem_handle> const & col_buffers, - std::list<viennacl::backend::mem_handle> const & element_buffers, - std::list<vcl_size_t> const & row_elimination_num_list) -{ - typedef typename std::list< viennacl::backend::mem_handle >::const_iterator ListIterator; - ListIterator row_index_array_it = row_index_arrays.begin(); - ListIterator row_buffers_it = row_buffers.begin(); - ListIterator col_buffers_it = col_buffers.begin(); - ListIterator element_buffers_it = element_buffers.begin(); - typename std::list< vcl_size_t>::const_iterator row_elimination_num_it = row_elimination_num_list.begin(); - for (vcl_size_t i=0; i<row_index_arrays.size(); ++i) - { - viennacl::linalg::detail::level_scheduling_substitute(vec, *row_index_array_it, *row_buffers_it, *col_buffers_it, *element_buffers_it, *row_elimination_num_it); - - ++row_index_array_it; - ++row_buffers_it; - ++col_buffers_it; - ++element_buffers_it; - ++row_elimination_num_it; - } -} - - - - - -} // namespace detail -} // namespace linalg -} // namespace viennacl - - - - -#endif - - -
http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilu0.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilu0.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilu0.hpp deleted file mode 100644 index 1c3191a..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilu0.hpp +++ /dev/null @@ -1,379 +0,0 @@ -#ifndef VIENNACL_LINALG_DETAIL_ILU0_HPP_ -#define VIENNACL_LINALG_DETAIL_ILU0_HPP_ - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file viennacl/linalg/detail/ilu/ilu0.hpp - @brief Implementations of incomplete factorization preconditioners with static nonzero pattern. - - Contributed by Evan Bollig. - - ILU0 (Incomplete LU with zero fill-in) - - All preconditioner nonzeros exist at locations that were nonzero in the input matrix. - - The number of nonzeros in the output preconditioner are exactly the same number as the input matrix - - Evan Bollig 3/30/12 - - Adapted from viennacl/linalg/detail/ilut.hpp - - Low-level reimplementation by Karl Rupp in Nov 2012, increasing performance substantially. Also added level-scheduling. - -*/ - -#include <vector> -#include <cmath> -#include <iostream> -#include "viennacl/forwards.h" -#include "viennacl/tools/tools.hpp" -#include "viennacl/linalg/detail/ilu/common.hpp" -#include "viennacl/compressed_matrix.hpp" -#include "viennacl/backend/memory.hpp" - -#include "viennacl/linalg/host_based/common.hpp" - -#include <map> - -namespace viennacl -{ -namespace linalg -{ - -/** @brief A tag for incomplete LU factorization with static pattern (ILU0) -*/ -class ilu0_tag -{ -public: - ilu0_tag(bool with_level_scheduling = false) : use_level_scheduling_(with_level_scheduling) {} - - bool use_level_scheduling() const { return use_level_scheduling_; } - void use_level_scheduling(bool b) { use_level_scheduling_ = b; } - -private: - bool use_level_scheduling_; -}; - - -/** @brief Implementation of a ILU-preconditioner with static pattern. Optimized version for CSR matrices. - * - * refer to the Algorithm in Saad's book (1996 edition) - * - * @param A The sparse matrix matrix. The result is directly written to A. - */ -template<typename NumericT> -void precondition(viennacl::compressed_matrix<NumericT> & A, ilu0_tag const & /* tag */) -{ - assert( (A.handle1().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") ); - assert( (A.handle2().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") ); - assert( (A.handle().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") ); - - NumericT * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(A.handle()); - unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle1()); - unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle2()); - - // Note: Line numbers in the following refer to the algorithm in Saad's book - - for (vcl_size_t i=1; i<A.size1(); ++i) // Line 1 - { - unsigned int row_i_begin = row_buffer[i]; - unsigned int row_i_end = row_buffer[i+1]; - for (unsigned int buf_index_k = row_i_begin; buf_index_k < row_i_end; ++buf_index_k) //Note: We do not assume that the column indices within a row are sorted - { - unsigned int k = col_buffer[buf_index_k]; - if (k >= i) - continue; //Note: We do not assume that the column indices within a row are sorted - - unsigned int row_k_begin = row_buffer[k]; - unsigned int row_k_end = row_buffer[k+1]; - - // get a_kk: - NumericT a_kk = 0; - for (unsigned int buf_index_akk = row_k_begin; buf_index_akk < row_k_end; ++buf_index_akk) - { - if (col_buffer[buf_index_akk] == k) - { - a_kk = elements[buf_index_akk]; - break; - } - } - - NumericT & a_ik = elements[buf_index_k]; - a_ik /= a_kk; //Line 3 - - for (unsigned int buf_index_j = row_i_begin; buf_index_j < row_i_end; ++buf_index_j) //Note: We do not assume that the column indices within a row are sorted - { - unsigned int j = col_buffer[buf_index_j]; - if (j <= k) - continue; - - // determine a_kj: - NumericT a_kj = 0; - for (unsigned int buf_index_akj = row_k_begin; buf_index_akj < row_k_end; ++buf_index_akj) - { - if (col_buffer[buf_index_akj] == j) - { - a_kj = elements[buf_index_akj]; - break; - } - } - - //a_ij -= a_ik * a_kj - elements[buf_index_j] -= a_ik * a_kj; //Line 5 - } - } - } - -} - - -/** @brief ILU0 preconditioner class, can be supplied to solve()-routines -*/ -template<typename MatrixT> -class ilu0_precond -{ - typedef typename MatrixT::value_type NumericType; - -public: - ilu0_precond(MatrixT const & mat, ilu0_tag const & tag) : tag_(tag), LU_() - { - //initialize preconditioner: - //std::cout << "Start CPU precond" << std::endl; - init(mat); - //std::cout << "End CPU precond" << std::endl; - } - - template<typename VectorT> - void apply(VectorT & vec) const - { - unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(LU_.handle1()); - unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(LU_.handle2()); - NumericType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericType>(LU_.handle()); - - viennacl::linalg::host_based::detail::csr_inplace_solve<NumericType>(row_buffer, col_buffer, elements, vec, LU_.size2(), unit_lower_tag()); - viennacl::linalg::host_based::detail::csr_inplace_solve<NumericType>(row_buffer, col_buffer, elements, vec, LU_.size2(), upper_tag()); - } - -private: - void init(MatrixT const & mat) - { - viennacl::context host_context(viennacl::MAIN_MEMORY); - viennacl::switch_memory_context(LU_, host_context); - - viennacl::copy(mat, LU_); - viennacl::linalg::precondition(LU_, tag_); - } - - ilu0_tag tag_; - viennacl::compressed_matrix<NumericType> LU_; -}; - - -/** @brief ILU0 preconditioner class, can be supplied to solve()-routines. -* -* Specialization for compressed_matrix -*/ -template<typename NumericT, unsigned int AlignmentV> -class ilu0_precond< viennacl::compressed_matrix<NumericT, AlignmentV> > -{ - typedef viennacl::compressed_matrix<NumericT, AlignmentV> MatrixType; - -public: - ilu0_precond(MatrixType const & mat, ilu0_tag const & tag) - : tag_(tag), - LU_(mat.size1(), mat.size2(), viennacl::traits::context(mat)) - { - //initialize preconditioner: - //std::cout << "Start GPU precond" << std::endl; - init(mat); - //std::cout << "End GPU precond" << std::endl; - } - - void apply(viennacl::vector<NumericT> & vec) const - { - viennacl::context host_context(viennacl::MAIN_MEMORY); - if (vec.handle().get_active_handle_id() != viennacl::MAIN_MEMORY) - { - if (tag_.use_level_scheduling()) - { - //std::cout << "Using multifrontal on GPU..." << std::endl; - detail::level_scheduling_substitute(vec, - multifrontal_L_row_index_arrays_, - multifrontal_L_row_buffers_, - multifrontal_L_col_buffers_, - multifrontal_L_element_buffers_, - multifrontal_L_row_elimination_num_list_); - - vec = viennacl::linalg::element_div(vec, multifrontal_U_diagonal_); - - detail::level_scheduling_substitute(vec, - multifrontal_U_row_index_arrays_, - multifrontal_U_row_buffers_, - multifrontal_U_col_buffers_, - multifrontal_U_element_buffers_, - multifrontal_U_row_elimination_num_list_); - } - else - { - viennacl::context old_context = viennacl::traits::context(vec); - viennacl::switch_memory_context(vec, host_context); - viennacl::linalg::inplace_solve(LU_, vec, unit_lower_tag()); - viennacl::linalg::inplace_solve(LU_, vec, upper_tag()); - viennacl::switch_memory_context(vec, old_context); - } - } - else //apply ILU0 directly on CPU - { - if (tag_.use_level_scheduling()) - { - //std::cout << "Using multifrontal..." << std::endl; - detail::level_scheduling_substitute(vec, - multifrontal_L_row_index_arrays_, - multifrontal_L_row_buffers_, - multifrontal_L_col_buffers_, - multifrontal_L_element_buffers_, - multifrontal_L_row_elimination_num_list_); - - vec = viennacl::linalg::element_div(vec, multifrontal_U_diagonal_); - - detail::level_scheduling_substitute(vec, - multifrontal_U_row_index_arrays_, - multifrontal_U_row_buffers_, - multifrontal_U_col_buffers_, - multifrontal_U_element_buffers_, - multifrontal_U_row_elimination_num_list_); - } - else - { - viennacl::linalg::inplace_solve(LU_, vec, unit_lower_tag()); - viennacl::linalg::inplace_solve(LU_, vec, upper_tag()); - } - } - } - - vcl_size_t levels() const { return multifrontal_L_row_index_arrays_.size(); } - -private: - void init(MatrixType const & mat) - { - viennacl::context host_context(viennacl::MAIN_MEMORY); - viennacl::switch_memory_context(LU_, host_context); - LU_ = mat; - viennacl::linalg::precondition(LU_, tag_); - - if (!tag_.use_level_scheduling()) - return; - - // multifrontal part: - viennacl::switch_memory_context(multifrontal_U_diagonal_, host_context); - multifrontal_U_diagonal_.resize(LU_.size1(), false); - host_based::detail::row_info(LU_, multifrontal_U_diagonal_, viennacl::linalg::detail::SPARSE_ROW_DIAGONAL); - - detail::level_scheduling_setup_L(LU_, - multifrontal_U_diagonal_, //dummy - multifrontal_L_row_index_arrays_, - multifrontal_L_row_buffers_, - multifrontal_L_col_buffers_, - multifrontal_L_element_buffers_, - multifrontal_L_row_elimination_num_list_); - - - detail::level_scheduling_setup_U(LU_, - multifrontal_U_diagonal_, - multifrontal_U_row_index_arrays_, - multifrontal_U_row_buffers_, - multifrontal_U_col_buffers_, - multifrontal_U_element_buffers_, - multifrontal_U_row_elimination_num_list_); - - // - // Bring to device if necessary: - // - - // L: - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_row_index_arrays_.begin(); - it != multifrontal_L_row_index_arrays_.end(); - ++it) - viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat)); - - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_row_buffers_.begin(); - it != multifrontal_L_row_buffers_.end(); - ++it) - viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat)); - - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_col_buffers_.begin(); - it != multifrontal_L_col_buffers_.end(); - ++it) - viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat)); - - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_element_buffers_.begin(); - it != multifrontal_L_element_buffers_.end(); - ++it) - viennacl::backend::switch_memory_context<NumericT>(*it, viennacl::traits::context(mat)); - - - // U: - - viennacl::switch_memory_context(multifrontal_U_diagonal_, viennacl::traits::context(mat)); - - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_row_index_arrays_.begin(); - it != multifrontal_U_row_index_arrays_.end(); - ++it) - viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat)); - - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_row_buffers_.begin(); - it != multifrontal_U_row_buffers_.end(); - ++it) - viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat)); - - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_col_buffers_.begin(); - it != multifrontal_U_col_buffers_.end(); - ++it) - viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat)); - - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_element_buffers_.begin(); - it != multifrontal_U_element_buffers_.end(); - ++it) - viennacl::backend::switch_memory_context<NumericT>(*it, viennacl::traits::context(mat)); - - } - - ilu0_tag tag_; - viennacl::compressed_matrix<NumericT> LU_; - - std::list<viennacl::backend::mem_handle> multifrontal_L_row_index_arrays_; - std::list<viennacl::backend::mem_handle> multifrontal_L_row_buffers_; - std::list<viennacl::backend::mem_handle> multifrontal_L_col_buffers_; - std::list<viennacl::backend::mem_handle> multifrontal_L_element_buffers_; - std::list<vcl_size_t> multifrontal_L_row_elimination_num_list_; - - viennacl::vector<NumericT> multifrontal_U_diagonal_; - std::list<viennacl::backend::mem_handle> multifrontal_U_row_index_arrays_; - std::list<viennacl::backend::mem_handle> multifrontal_U_row_buffers_; - std::list<viennacl::backend::mem_handle> multifrontal_U_col_buffers_; - std::list<viennacl::backend::mem_handle> multifrontal_U_element_buffers_; - std::list<vcl_size_t> multifrontal_U_row_elimination_num_list_; - -}; - -} // namespace linalg -} // namespace viennacl - - -#endif - - - http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilut.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilut.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilut.hpp deleted file mode 100644 index 11ab842..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilut.hpp +++ /dev/null @@ -1,597 +0,0 @@ -#ifndef VIENNACL_LINALG_DETAIL_ILUT_HPP_ -#define VIENNACL_LINALG_DETAIL_ILUT_HPP_ - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file viennacl/linalg/detail/ilu/ilut.hpp - @brief Implementations of an incomplete factorization preconditioner with threshold (ILUT) -*/ - -#include <vector> -#include <cmath> -#include <iostream> -#include "viennacl/forwards.h" -#include "viennacl/tools/tools.hpp" - -#include "viennacl/linalg/detail/ilu/common.hpp" -#include "viennacl/compressed_matrix.hpp" - -#include "viennacl/linalg/host_based/common.hpp" - -#include <map> - -namespace viennacl -{ -namespace linalg -{ - -/** @brief A tag for incomplete LU factorization with threshold (ILUT) -*/ -class ilut_tag -{ - public: - /** @brief The constructor. - * - * @param entries_per_row Number of nonzero entries per row in L and U. Note that L and U are stored in a single matrix, thus there are 2*entries_per_row in total. - * @param drop_tolerance The drop tolerance for ILUT - * @param with_level_scheduling Flag for enabling level scheduling on GPUs. - */ - ilut_tag(unsigned int entries_per_row = 20, - double drop_tolerance = 1e-4, - bool with_level_scheduling = false) - : entries_per_row_(entries_per_row), - drop_tolerance_(drop_tolerance), - use_level_scheduling_(with_level_scheduling) {} - - void set_drop_tolerance(double tol) - { - if (tol > 0) - drop_tolerance_ = tol; - } - double get_drop_tolerance() const { return drop_tolerance_; } - - void set_entries_per_row(unsigned int e) - { - if (e > 0) - entries_per_row_ = e; - } - - unsigned int get_entries_per_row() const { return entries_per_row_; } - - bool use_level_scheduling() const { return use_level_scheduling_; } - void use_level_scheduling(bool b) { use_level_scheduling_ = b; } - - private: - unsigned int entries_per_row_; - double drop_tolerance_; - bool use_level_scheduling_; -}; - - -namespace detail -{ - /** @brief Helper struct for holding a sparse vector in linear memory. For internal use only. - * - * Unfortunately, the 'naive' implementation using a std::map<> is almost always too slow. - * - */ - template<typename NumericT> - struct ilut_sparse_vector - { - ilut_sparse_vector(vcl_size_t alloc_size = 0) : size_(0), col_indices_(alloc_size), elements_(alloc_size) {} - - void resize_if_bigger(vcl_size_t s) - { - if (s > elements_.size()) - { - col_indices_.resize(s); - elements_.resize(s); - } - size_ = s; - } - - vcl_size_t size_; - std::vector<unsigned int> col_indices_; - std::vector<NumericT> elements_; - }; - - /** @brief Subtracts a scaled sparse vector u from a sparse vector w and writes the output to z: z = w - alpha * u - * - * Sparsity pattern of u and w are usually different. - * - * @return Length of new vector - */ - template<typename IndexT, typename NumericT> - IndexT merge_subtract_sparse_rows(IndexT const * w_coords, NumericT const * w_elements, IndexT w_size, - IndexT const * u_coords, NumericT const * u_elements, IndexT u_size, NumericT alpha, - IndexT * z_coords, NumericT * z_elements) - { - IndexT index_w = 0; - IndexT index_u = 0; - IndexT index_z = 0; - - while (1) - { - if (index_w < w_size && index_u < u_size) - { - if (w_coords[index_w] < u_coords[index_u]) - { - z_coords[index_z] = w_coords[index_w]; - z_elements[index_z++] = w_elements[index_w++]; - } - else if (w_coords[index_w] == u_coords[index_u]) - { - z_coords[index_z] = w_coords[index_w]; - z_elements[index_z++] = w_elements[index_w++] - alpha * u_elements[index_u++]; - } - else - { - z_coords[index_z] = u_coords[index_u]; - z_elements[index_z++] = - alpha * u_elements[index_u++]; - } - } - else if (index_w == w_size && index_u < u_size) - { - z_coords[index_z] = u_coords[index_u]; - z_elements[index_z++] = - alpha * u_elements[index_u++]; - } - else if (index_w < w_size && index_u == u_size) - { - z_coords[index_z] = w_coords[index_w]; - z_elements[index_z++] = w_elements[index_w++]; - } - else - return index_z; - } - } - - template<typename SizeT, typename NumericT> - void insert_with_value_sort(std::vector<std::pair<SizeT, NumericT> > & map, - SizeT index, NumericT value) - { - NumericT abs_value = std::fabs(value); - if (abs_value > 0) - { - // find first element with smaller absolute value: - std::size_t first_smaller_index = 0; - while (first_smaller_index < map.size() && std::fabs(map[first_smaller_index].second) > abs_value) - ++first_smaller_index; - - std::pair<SizeT, NumericT> tmp(index, value); - for (std::size_t j=first_smaller_index; j<map.size(); ++j) - std::swap(map[j], tmp); - } - } - -} - -/** @brief Implementation of a ILU-preconditioner with threshold. Optimized implementation for compressed_matrix. -* -* refer to Algorithm 10.6 by Saad's book (1996 edition) -* -* @param A The input matrix. Either a compressed_matrix or of type std::vector< std::map<T, U> > -* @param L The output matrix for L. -* @param U The output matrix for U. -* @param tag An ilut_tag in order to dispatch among several other preconditioners. -*/ -template<typename NumericT> -void precondition(viennacl::compressed_matrix<NumericT> const & A, - viennacl::compressed_matrix<NumericT> & L, - viennacl::compressed_matrix<NumericT> & U, - ilut_tag const & tag) -{ - assert(A.size1() == L.size1() && bool("Output matrix size mismatch") ); - assert(A.size1() == U.size1() && bool("Output matrix size mismatch") ); - - L.reserve( tag.get_entries_per_row() * A.size1()); - U.reserve((tag.get_entries_per_row() + 1) * A.size1()); - - vcl_size_t avg_nnz_per_row = static_cast<vcl_size_t>(A.nnz() / A.size1()); - detail::ilut_sparse_vector<NumericT> w1(tag.get_entries_per_row() * (avg_nnz_per_row + 10)); - detail::ilut_sparse_vector<NumericT> w2(tag.get_entries_per_row() * (avg_nnz_per_row + 10)); - detail::ilut_sparse_vector<NumericT> * w_in = &w1; - detail::ilut_sparse_vector<NumericT> * w_out = &w2; - std::vector<NumericT> diagonal_U(A.size1()); - - NumericT const * elements_A = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(A.handle()); - unsigned int const * row_buffer_A = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle1()); - unsigned int const * col_buffer_A = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle2()); - - NumericT * elements_L = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(L.handle()); - unsigned int * row_buffer_L = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.handle1()); row_buffer_L[0] = 0; - unsigned int * col_buffer_L = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L.handle2()); - - NumericT * elements_U = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(U.handle()); - unsigned int * row_buffer_U = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.handle1()); row_buffer_U[0] = 0; - unsigned int * col_buffer_U = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U.handle2()); - - std::vector<std::pair<unsigned int, NumericT> > sorted_entries_L(tag.get_entries_per_row()); - std::vector<std::pair<unsigned int, NumericT> > sorted_entries_U(tag.get_entries_per_row()); - - for (vcl_size_t i=0; i<viennacl::traits::size1(A); ++i) // Line 1 - { - std::fill(sorted_entries_L.begin(), sorted_entries_L.end(), std::pair<unsigned int, NumericT>(0, NumericT(0))); - std::fill(sorted_entries_U.begin(), sorted_entries_U.end(), std::pair<unsigned int, NumericT>(0, NumericT(0))); - - //line 2: set up w - w_in->resize_if_bigger(row_buffer_A[i+1] - row_buffer_A[i]); - NumericT row_norm = 0; - unsigned int k = 0; - for (unsigned int j = row_buffer_A[i]; j < row_buffer_A[i+1]; ++j, ++k) - { - w_in->col_indices_[k] = col_buffer_A[j]; - NumericT entry = elements_A[j]; - w_in->elements_[k] = entry; - row_norm += entry * entry; - } - row_norm = std::sqrt(row_norm); - NumericT tau_i = static_cast<NumericT>(tag.get_drop_tolerance()) * row_norm; - - //line 3: Iterate over lower diagonal parts of A: - k = 0; - unsigned int current_col = (row_buffer_A[i+1] > row_buffer_A[i]) ? w_in->col_indices_[k] : static_cast<unsigned int>(i); // mind empty rows here! - while (current_col < i) - { - //line 4: - NumericT a_kk = diagonal_U[current_col]; - - NumericT w_k_entry = w_in->elements_[k] / a_kk; - w_in->elements_[k] = w_k_entry; - - //lines 5,6: (dropping rule to w_k) - if ( std::fabs(w_k_entry) > tau_i) - { - //line 7: - unsigned int row_U_begin = row_buffer_U[current_col]; - unsigned int row_U_end = row_buffer_U[current_col + 1]; - - if (row_U_end > row_U_begin) - { - w_out->resize_if_bigger(w_in->size_ + (row_U_end - row_U_begin) - 1); - w_out->size_ = detail::merge_subtract_sparse_rows(&(w_in->col_indices_[0]), &(w_in->elements_[0]), static_cast<unsigned int>(w_in->size_), - col_buffer_U + row_U_begin + 1, elements_U + row_U_begin + 1, (row_U_end - row_U_begin) - 1, w_k_entry, - &(w_out->col_indices_[0]), &(w_out->elements_[0]) - ); - ++k; - } - } - else // drop element - { - w_out->resize_if_bigger(w_in->size_ - 1); - for (unsigned int r = 0; r < k; ++r) - { - w_out->col_indices_[r] = w_in->col_indices_[r]; - w_out->elements_[r] = w_in->elements_[r]; - } - for (unsigned int r = k+1; r < w_in->size_; ++r) - { - w_out->col_indices_[r-1] = w_in->col_indices_[r]; - w_out->elements_[r-1] = w_in->elements_[r]; - } - - // Note: No increment to k here, element was dropped! - } - - // swap pointers to w1 and w2 - std::swap(w_in, w_out); - - // process next entry: - current_col = (k < w_in->size_) ? w_in->col_indices_[k] : static_cast<unsigned int>(i); - } // while() - - // Line 10: Apply a dropping rule to w - // To do so, we write values to a temporary array - for (unsigned int r = 0; r < w_in->size_; ++r) - { - unsigned int col = w_in->col_indices_[r]; - NumericT value = w_in->elements_[r]; - - if (col < i) // entry for L: - detail::insert_with_value_sort(sorted_entries_L, col, value); - else if (col == i) // do not drop diagonal element - { - diagonal_U[i] = value; - if (value <= 0 && value >= 0) - { - std::cerr << "ViennaCL: FATAL ERROR in ILUT(): Diagonal entry computed to zero (" << value << ") in row " << i << "!" << std::endl; - throw zero_on_diagonal_exception("ILUT zero diagonal!"); - } - } - else // entry for U: - detail::insert_with_value_sort(sorted_entries_U, col, value); - } - - //Lines 10-12: Apply a dropping rule to w, write the largest p values to L and U - unsigned int offset_L = row_buffer_L[i]; - std::sort(sorted_entries_L.begin(), sorted_entries_L.end()); - for (unsigned int j=0; j<tag.get_entries_per_row(); ++j) - if (std::fabs(sorted_entries_L[j].second) > 0) - { - col_buffer_L[offset_L] = sorted_entries_L[j].first; - elements_L[offset_L] = sorted_entries_L[j].second; - ++offset_L; - } - row_buffer_L[i+1] = offset_L; - - unsigned int offset_U = row_buffer_U[i]; - col_buffer_U[offset_U] = static_cast<unsigned int>(i); - elements_U[offset_U] = diagonal_U[i]; - ++offset_U; - std::sort(sorted_entries_U.begin(), sorted_entries_U.end()); - for (unsigned int j=0; j<tag.get_entries_per_row(); ++j) - if (std::fabs(sorted_entries_U[j].second) > 0) - { - col_buffer_U[offset_U] = sorted_entries_U[j].first; - elements_U[offset_U] = sorted_entries_U[j].second; - ++offset_U; - } - row_buffer_U[i+1] = offset_U; - - } //for i -} - - -/** @brief ILUT preconditioner class, can be supplied to solve()-routines -*/ -template<typename MatrixT> -class ilut_precond -{ - typedef typename MatrixT::value_type NumericType; - -public: - ilut_precond(MatrixT const & mat, ilut_tag const & tag) : tag_(tag), L_(mat.size1(), mat.size2()), U_(mat.size1(), mat.size2()) - { - //initialize preconditioner: - //std::cout << "Start CPU precond" << std::endl; - init(mat); - //std::cout << "End CPU precond" << std::endl; - } - - template<typename VectorT> - void apply(VectorT & vec) const - { - //Note: Since vec can be a rather arbitrary vector type, we call the more generic version in the backend manually: - { - unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_.handle1()); - unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_.handle2()); - NumericType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericType>(L_.handle()); - - viennacl::linalg::host_based::detail::csr_inplace_solve<NumericType>(row_buffer, col_buffer, elements, vec, L_.size2(), unit_lower_tag()); - } - { - unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_.handle1()); - unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_.handle2()); - NumericType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericType>(U_.handle()); - - viennacl::linalg::host_based::detail::csr_inplace_solve<NumericType>(row_buffer, col_buffer, elements, vec, U_.size2(), upper_tag()); - } - } - -private: - void init(MatrixT const & mat) - { - viennacl::context host_context(viennacl::MAIN_MEMORY); - viennacl::compressed_matrix<NumericType> temp; - viennacl::switch_memory_context(temp, host_context); - viennacl::switch_memory_context(L_, host_context); - viennacl::switch_memory_context(U_, host_context); - - viennacl::copy(mat, temp); - - viennacl::linalg::precondition(temp, L_, U_, tag_); - } - - ilut_tag tag_; - viennacl::compressed_matrix<NumericType> L_; - viennacl::compressed_matrix<NumericType> U_; -}; - - -/** @brief ILUT preconditioner class, can be supplied to solve()-routines. -* -* Specialization for compressed_matrix -*/ -template<typename NumericT, unsigned int AlignmentV> -class ilut_precond< viennacl::compressed_matrix<NumericT, AlignmentV> > -{ -typedef viennacl::compressed_matrix<NumericT, AlignmentV> MatrixType; - -public: - ilut_precond(MatrixType const & mat, ilut_tag const & tag) - : tag_(tag), - L_(mat.size1(), mat.size2(), viennacl::traits::context(mat)), - U_(mat.size1(), mat.size2(), viennacl::traits::context(mat)) - { - //initialize preconditioner: - //std::cout << "Start GPU precond" << std::endl; - init(mat); - //std::cout << "End GPU precond" << std::endl; - } - - void apply(viennacl::vector<NumericT> & vec) const - { - if (vec.handle().get_active_handle_id() != viennacl::MAIN_MEMORY) - { - if (tag_.use_level_scheduling()) - { - //std::cout << "Using multifrontal on GPU..." << std::endl; - detail::level_scheduling_substitute(vec, - multifrontal_L_row_index_arrays_, - multifrontal_L_row_buffers_, - multifrontal_L_col_buffers_, - multifrontal_L_element_buffers_, - multifrontal_L_row_elimination_num_list_); - - vec = viennacl::linalg::element_div(vec, multifrontal_U_diagonal_); - - detail::level_scheduling_substitute(vec, - multifrontal_U_row_index_arrays_, - multifrontal_U_row_buffers_, - multifrontal_U_col_buffers_, - multifrontal_U_element_buffers_, - multifrontal_U_row_elimination_num_list_); - - } - else - { - viennacl::context host_context(viennacl::MAIN_MEMORY); - viennacl::context old_context = viennacl::traits::context(vec); - viennacl::switch_memory_context(vec, host_context); - viennacl::linalg::inplace_solve(L_, vec, unit_lower_tag()); - viennacl::linalg::inplace_solve(U_, vec, upper_tag()); - viennacl::switch_memory_context(vec, old_context); - } - } - else //apply ILUT directly: - { - viennacl::linalg::inplace_solve(L_, vec, unit_lower_tag()); - viennacl::linalg::inplace_solve(U_, vec, upper_tag()); - } - } - -private: - void init(MatrixType const & mat) - { - viennacl::context host_context(viennacl::MAIN_MEMORY); - viennacl::switch_memory_context(L_, host_context); - viennacl::switch_memory_context(U_, host_context); - - if (viennacl::traits::context(mat).memory_type() == viennacl::MAIN_MEMORY) - { - viennacl::linalg::precondition(mat, L_, U_, tag_); - } - else //we need to copy to CPU - { - viennacl::compressed_matrix<NumericT> cpu_mat(mat.size1(), mat.size2(), viennacl::traits::context(mat)); - viennacl::switch_memory_context(cpu_mat, host_context); - - cpu_mat = mat; - - viennacl::linalg::precondition(cpu_mat, L_, U_, tag_); - } - - if (!tag_.use_level_scheduling()) - return; - - // - // multifrontal part: - // - - viennacl::switch_memory_context(multifrontal_U_diagonal_, host_context); - multifrontal_U_diagonal_.resize(U_.size1(), false); - host_based::detail::row_info(U_, multifrontal_U_diagonal_, viennacl::linalg::detail::SPARSE_ROW_DIAGONAL); - - detail::level_scheduling_setup_L(L_, - multifrontal_U_diagonal_, //dummy - multifrontal_L_row_index_arrays_, - multifrontal_L_row_buffers_, - multifrontal_L_col_buffers_, - multifrontal_L_element_buffers_, - multifrontal_L_row_elimination_num_list_); - - - detail::level_scheduling_setup_U(U_, - multifrontal_U_diagonal_, - multifrontal_U_row_index_arrays_, - multifrontal_U_row_buffers_, - multifrontal_U_col_buffers_, - multifrontal_U_element_buffers_, - multifrontal_U_row_elimination_num_list_); - - // - // Bring to device if necessary: - // - - // L: - - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_row_index_arrays_.begin(); - it != multifrontal_L_row_index_arrays_.end(); - ++it) - viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat)); - - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_row_buffers_.begin(); - it != multifrontal_L_row_buffers_.end(); - ++it) - viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat)); - - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_col_buffers_.begin(); - it != multifrontal_L_col_buffers_.end(); - ++it) - viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat)); - - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_element_buffers_.begin(); - it != multifrontal_L_element_buffers_.end(); - ++it) - viennacl::backend::switch_memory_context<NumericT>(*it, viennacl::traits::context(mat)); - - - // U: - - viennacl::switch_memory_context(multifrontal_U_diagonal_, viennacl::traits::context(mat)); - - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_row_index_arrays_.begin(); - it != multifrontal_U_row_index_arrays_.end(); - ++it) - viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat)); - - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_row_buffers_.begin(); - it != multifrontal_U_row_buffers_.end(); - ++it) - viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat)); - - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_col_buffers_.begin(); - it != multifrontal_U_col_buffers_.end(); - ++it) - viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat)); - - for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_element_buffers_.begin(); - it != multifrontal_U_element_buffers_.end(); - ++it) - viennacl::backend::switch_memory_context<NumericT>(*it, viennacl::traits::context(mat)); - - - } - - ilut_tag tag_; - viennacl::compressed_matrix<NumericT> L_; - viennacl::compressed_matrix<NumericT> U_; - - std::list<viennacl::backend::mem_handle> multifrontal_L_row_index_arrays_; - std::list<viennacl::backend::mem_handle> multifrontal_L_row_buffers_; - std::list<viennacl::backend::mem_handle> multifrontal_L_col_buffers_; - std::list<viennacl::backend::mem_handle> multifrontal_L_element_buffers_; - std::list<vcl_size_t > multifrontal_L_row_elimination_num_list_; - - viennacl::vector<NumericT> multifrontal_U_diagonal_; - std::list<viennacl::backend::mem_handle> multifrontal_U_row_index_arrays_; - std::list<viennacl::backend::mem_handle> multifrontal_U_row_buffers_; - std::list<viennacl::backend::mem_handle> multifrontal_U_col_buffers_; - std::list<viennacl::backend::mem_handle> multifrontal_U_element_buffers_; - std::list<vcl_size_t > multifrontal_U_row_elimination_num_list_; -}; - -} // namespace linalg -} // namespace viennacl - - - - -#endif - - - http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_applier.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_applier.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_applier.hpp deleted file mode 100644 index 0e2abb0..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_applier.hpp +++ /dev/null @@ -1,103 +0,0 @@ -#ifndef VIENNACL_LINALG_DETAIL_OP_APPLIER_HPP -#define VIENNACL_LINALG_DETAIL_OP_APPLIER_HPP - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file viennacl/linalg/detail/op_applier.hpp - * - * @brief Defines the action of certain unary and binary operators and its arguments (for host execution). -*/ - -#include "viennacl/forwards.h" -#include <cmath> - -namespace viennacl -{ -namespace linalg -{ -namespace detail -{ - -/** @brief Worker class for decomposing expression templates. - * - * @tparam A Type to which is assigned to - * @tparam OP One out of {op_assign, op_inplace_add, op_inplace_sub} - @ @tparam T Right hand side of the assignment -*/ -template<typename OpT> -struct op_applier -{ - typedef typename OpT::ERROR_UNKNOWN_OP_TAG_PROVIDED error_type; -}; - -/** \cond */ -template<> -struct op_applier<op_element_binary<op_prod> > -{ - template<typename T> - static void apply(T & result, T const & x, T const & y) { result = x * y; } -}; - -template<> -struct op_applier<op_element_binary<op_div> > -{ - template<typename T> - static void apply(T & result, T const & x, T const & y) { result = x / y; } -}; - -template<> -struct op_applier<op_element_binary<op_pow> > -{ - template<typename T> - static void apply(T & result, T const & x, T const & y) { result = std::pow(x, y); } -}; - -#define VIENNACL_MAKE_UNARY_OP_APPLIER(funcname) \ -template<> \ -struct op_applier<op_element_unary<op_##funcname> > \ -{ \ - template<typename T> \ - static void apply(T & result, T const & x) { using namespace std; result = funcname(x); } \ -} - -VIENNACL_MAKE_UNARY_OP_APPLIER(abs); -VIENNACL_MAKE_UNARY_OP_APPLIER(acos); -VIENNACL_MAKE_UNARY_OP_APPLIER(asin); -VIENNACL_MAKE_UNARY_OP_APPLIER(atan); -VIENNACL_MAKE_UNARY_OP_APPLIER(ceil); -VIENNACL_MAKE_UNARY_OP_APPLIER(cos); -VIENNACL_MAKE_UNARY_OP_APPLIER(cosh); -VIENNACL_MAKE_UNARY_OP_APPLIER(exp); -VIENNACL_MAKE_UNARY_OP_APPLIER(fabs); -VIENNACL_MAKE_UNARY_OP_APPLIER(floor); -VIENNACL_MAKE_UNARY_OP_APPLIER(log); -VIENNACL_MAKE_UNARY_OP_APPLIER(log10); -VIENNACL_MAKE_UNARY_OP_APPLIER(sin); -VIENNACL_MAKE_UNARY_OP_APPLIER(sinh); -VIENNACL_MAKE_UNARY_OP_APPLIER(sqrt); -VIENNACL_MAKE_UNARY_OP_APPLIER(tan); -VIENNACL_MAKE_UNARY_OP_APPLIER(tanh); - -#undef VIENNACL_MAKE_UNARY_OP_APPLIER -/** \endcond */ - -} -} -} - -#endif // VIENNACL_LINALG_DETAIL_OP_EXECUTOR_HPP http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_executor.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_executor.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_executor.hpp deleted file mode 100644 index bd49b3b..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_executor.hpp +++ /dev/null @@ -1,86 +0,0 @@ -#ifndef VIENNACL_LINALG_DETAIL_OP_EXECUTOR_HPP -#define VIENNACL_LINALG_DETAIL_OP_EXECUTOR_HPP - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file viennacl/linalg/detail/op_executor.hpp - * - * @brief Defines the worker class for decomposing an expression tree into small chunks, which can be processed by the predefined operations in ViennaCL. -*/ - -#include "viennacl/forwards.h" - -namespace viennacl -{ -namespace linalg -{ -namespace detail -{ - -template<typename NumericT, typename B> -bool op_aliasing(vector_base<NumericT> const & /*lhs*/, B const & /*b*/) -{ - return false; -} - -template<typename NumericT> -bool op_aliasing(vector_base<NumericT> const & lhs, vector_base<NumericT> const & b) -{ - return lhs.handle() == b.handle(); -} - -template<typename NumericT, typename LhsT, typename RhsT, typename OpT> -bool op_aliasing(vector_base<NumericT> const & lhs, vector_expression<const LhsT, const RhsT, OpT> const & rhs) -{ - return op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs()); -} - - -template<typename NumericT, typename B> -bool op_aliasing(matrix_base<NumericT> const & /*lhs*/, B const & /*b*/) -{ - return false; -} - -template<typename NumericT> -bool op_aliasing(matrix_base<NumericT> const & lhs, matrix_base<NumericT> const & b) -{ - return lhs.handle() == b.handle(); -} - -template<typename NumericT, typename LhsT, typename RhsT, typename OpT> -bool op_aliasing(matrix_base<NumericT> const & lhs, matrix_expression<const LhsT, const RhsT, OpT> const & rhs) -{ - return op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs()); -} - - -/** @brief Worker class for decomposing expression templates. - * - * @tparam A Type to which is assigned to - * @tparam OP One out of {op_assign, op_inplace_add, op_inplace_sub} - @ @tparam T Right hand side of the assignment -*/ -template<typename A, typename OP, typename T> -struct op_executor {}; - -} -} -} - -#endif // VIENNACL_LINALG_DETAIL_OP_EXECUTOR_HPP http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_matrix.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_matrix.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_matrix.hpp deleted file mode 100644 index 12ff77b..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_matrix.hpp +++ /dev/null @@ -1,86 +0,0 @@ -#ifndef VIENNACL_LINALG_DETAIL_SPAI_BLOCK_MATRIX_HPP -#define VIENNACL_LINALG_DETAIL_SPAI_BLOCK_MATRIX_HPP - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -#include <utility> -#include <iostream> -#include <fstream> -#include <string> -#include <algorithm> -#include <vector> -#include "viennacl/ocl/backend.hpp" -#include "viennacl/tools/tools.hpp" - -/** @file viennacl/linalg/detail/spai/block_matrix.hpp - @brief Implementation of a bunch of (small) matrices on GPU. Experimental. - - SPAI code contributed by Nikolay Lukash -*/ - -namespace viennacl -{ -namespace linalg -{ -namespace detail -{ -namespace spai -{ - -/** -* @brief Represents contigious matrices on GPU -*/ - -class block_matrix -{ -public: - - ////////// non-const - - /** @brief Returns a handle to the elements */ - viennacl::ocl::handle<cl_mem>& handle(){ return elements_; } - - /** @brief Returns a handle to the matrix dimensions */ - viennacl::ocl::handle<cl_mem>& handle1() { return matrix_dimensions_; } - - /** @brief Returns a handle to the start indices of matrix */ - viennacl::ocl::handle<cl_mem>& handle2() { return start_block_inds_; } - - ////////// const - - /** @brief Returns a handle to the const elements */ - const viennacl::ocl::handle<cl_mem>& handle() const { return elements_; } - - /** @brief Returns a handle to the const matrix dimensions */ - const viennacl::ocl::handle<cl_mem>& handle1() const { return matrix_dimensions_; } - - /** @brief Returns a handle to the const start indices of matrix */ - const viennacl::ocl::handle<cl_mem>& handle2() const { return start_block_inds_; } - -private: - viennacl::ocl::handle<cl_mem> elements_; - viennacl::ocl::handle<cl_mem> matrix_dimensions_; - viennacl::ocl::handle<cl_mem> start_block_inds_; -}; - - -} -} -} -} -#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_vector.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_vector.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_vector.hpp deleted file mode 100644 index eee6aef..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_vector.hpp +++ /dev/null @@ -1,77 +0,0 @@ -#ifndef VIENNACL_LINALG_DETAIL_SPAI_BLOCK_VECTOR_HPP -#define VIENNACL_LINALG_DETAIL_SPAI_BLOCK_VECTOR_HPP - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -#include <utility> -#include <iostream> -#include <fstream> -#include <string> -#include <algorithm> -#include <vector> -#include "viennacl/ocl/backend.hpp" -#include "viennacl/tools/tools.hpp" - -/** @file viennacl/linalg/detail/spai/block_vector.hpp - @brief Implementation of a bunch of vectors on GPU. Experimental. - - SPAI code contributed by Nikolay Lukash -*/ - -namespace viennacl -{ -namespace linalg -{ -namespace detail -{ -namespace spai -{ - -/** -* @brief Represents a contiguous vector on the GPU to represent a concatentation of small vectors -*/ -class block_vector -{ -public: - - ///////////// non-const - - /** @brief Return handle to the elements */ - viennacl::ocl::handle<cl_mem> & handle(){ return elements_; } - - /** @brief Return handle to start indices */ - viennacl::ocl::handle<cl_mem> & handle1() { return start_block_inds_; } - - ///////////// const - - /** @brief Return handle to the const elements */ - const viennacl::ocl::handle<cl_mem> & handle() const { return elements_; } - - /** @brief Return handle to const start indices */ - const viennacl::ocl::handle<cl_mem> & handle1() const { return start_block_inds_; } - -private: - viennacl::ocl::handle<cl_mem> elements_; - viennacl::ocl::handle<cl_mem> start_block_inds_; -}; - -} -} -} -} -#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/fspai.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/fspai.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/fspai.hpp deleted file mode 100644 index fab81d7..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/fspai.hpp +++ /dev/null @@ -1,402 +0,0 @@ -#ifndef VIENNACL_LINALG_DETAIL_SPAI_FSPAI_HPP -#define VIENNACL_LINALG_DETAIL_SPAI_FSPAI_HPP - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -#include <utility> -#include <iostream> -#include <fstream> -#include <string> -#include <algorithm> -#include <vector> -#include <math.h> -#include <map> - -//boost includes -#include "boost/numeric/ublas/vector.hpp" -#include "boost/numeric/ublas/matrix.hpp" -#include "boost/numeric/ublas/matrix_proxy.hpp" -#include "boost/numeric/ublas/vector_proxy.hpp" -#include "boost/numeric/ublas/storage.hpp" -#include "boost/numeric/ublas/io.hpp" -#include "boost/numeric/ublas/lu.hpp" -#include "boost/numeric/ublas/triangular.hpp" -#include "boost/numeric/ublas/matrix_expression.hpp" - -// ViennaCL includes -#include "viennacl/linalg/prod.hpp" -#include "viennacl/matrix.hpp" -#include "viennacl/compressed_matrix.hpp" -#include "viennacl/linalg/sparse_matrix_operations.hpp" -#include "viennacl/linalg/matrix_operations.hpp" -#include "viennacl/scalar.hpp" -#include "viennacl/linalg/cg.hpp" -#include "viennacl/linalg/inner_prod.hpp" -#include "viennacl/linalg/ilu.hpp" -//#include <omp.h> - -/** @file viennacl/linalg/detail/spai/fspai.hpp - @brief Implementation of FSPAI. Experimental. -*/ - -namespace viennacl -{ -namespace linalg -{ -namespace detail -{ -namespace spai -{ - -/** @brief A tag for FSPAI. Experimental. -* -* Contains values for the algorithm. -* Must be passed to spai_precond constructor -*/ -class fspai_tag -{ -public: - /** @brief Constructor - * - * @param residual_norm_threshold Calculate until the norm of the residual falls below this threshold - * @param iteration_limit maximum number of iterations - * @param is_static determines if static version of SPAI should be used - * @param is_right determines if left or right preconditioner should be used - */ - fspai_tag( - double residual_norm_threshold = 1e-3, - unsigned int iteration_limit = 5, - bool is_static = false, - bool is_right = false) - : residual_norm_threshold_(residual_norm_threshold), - iteration_limit_(iteration_limit), - is_static_(is_static), - is_right_(is_right) {} - - inline double getResidualNormThreshold() const { return residual_norm_threshold_; } - inline unsigned long getIterationLimit () const { return iteration_limit_; } - inline bool getIsStatic() const { return is_static_; } - inline bool getIsRight() const { return is_right_; } - inline void setResidualNormThreshold(double residual_norm_threshold) - { - if (residual_norm_threshold > 0) - residual_norm_threshold_ = residual_norm_threshold; - } - inline void setIterationLimit(unsigned long iteration_limit) - { - if (iteration_limit > 0) - iteration_limit_ = iteration_limit; - } - inline void setIsRight(bool is_right) { is_right_ = is_right; } - inline void setIsStatic(bool is_static) { is_static_ = is_static; } - -private: - double residual_norm_threshold_; - unsigned long iteration_limit_; - bool is_static_; - bool is_right_; -}; - - -// -// Helper: Store A in an STL container of type, exploiting symmetry -// Reason: ublas interface does not allow to iterate over nonzeros of a particular row without starting an iterator1 from the very beginning of the matrix... -// -template<typename MatrixT, typename NumericT> -void sym_sparse_matrix_to_stl(MatrixT const & A, std::vector<std::map<unsigned int, NumericT> > & STL_A) -{ - STL_A.resize(A.size1()); - for (typename MatrixT::const_iterator1 row_it = A.begin1(); - row_it != A.end1(); - ++row_it) - { - for (typename MatrixT::const_iterator2 col_it = row_it.begin(); - col_it != row_it.end(); - ++col_it) - { - if (col_it.index1() >= col_it.index2()) - STL_A[col_it.index1()][static_cast<unsigned int>(col_it.index2())] = *col_it; - else - break; //go to next row - } - } -} - - -// -// Generate index sets J_k, k=0,...,N-1 -// -template<typename MatrixT> -void generateJ(MatrixT const & A, std::vector<std::vector<vcl_size_t> > & J) -{ - for (typename MatrixT::const_iterator1 row_it = A.begin1(); - row_it != A.end1(); - ++row_it) - { - for (typename MatrixT::const_iterator2 col_it = row_it.begin(); - col_it != row_it.end(); - ++col_it) - { - if (col_it.index1() > col_it.index2()) //Matrix is symmetric, thus only work on lower triangular part - { - J[col_it.index2()].push_back(col_it.index1()); - J[col_it.index1()].push_back(col_it.index2()); - } - else - break; //go to next row - } - } -} - - -// -// Extracts the blocks A(\tilde{J}_k, \tilde{J}_k) from A -// Sets up y_k = A(\tilde{J}_k, k) for the inplace-solution after Cholesky-factoriation -// -template<typename NumericT, typename MatrixT, typename VectorT> -void fill_blocks(std::vector< std::map<unsigned int, NumericT> > & A, - std::vector<MatrixT> & blocks, - std::vector<std::vector<vcl_size_t> > const & J, - std::vector<VectorT> & Y) -{ - for (vcl_size_t k=0; k<A.size(); ++k) - { - std::vector<vcl_size_t> const & Jk = J[k]; - VectorT & yk = Y[k]; - MatrixT & block_k = blocks[k]; - - yk.resize(Jk.size()); - block_k.resize(Jk.size(), Jk.size()); - block_k.clear(); - - for (vcl_size_t i=0; i<Jk.size(); ++i) - { - vcl_size_t row_index = Jk[i]; - std::map<unsigned int, NumericT> & A_row = A[row_index]; - - //fill y_k: - yk[i] = A_row[static_cast<unsigned int>(k)]; - - for (vcl_size_t j=0; j<Jk.size(); ++j) - { - vcl_size_t col_index = Jk[j]; - if (col_index <= row_index && A_row.find(static_cast<unsigned int>(col_index)) != A_row.end()) //block is symmetric, thus store only lower triangular part - block_k(i, j) = A_row[static_cast<unsigned int>(col_index)]; - } - } - } -} - - -// -// Perform Cholesky factorization of A inplace. Cf. Schwarz: Numerische Mathematik, vol 5, p. 58 -// -template<typename MatrixT> -void cholesky_decompose(MatrixT & A) -{ - for (vcl_size_t k=0; k<A.size2(); ++k) - { - if (A(k,k) <= 0) - { - std::cout << "k: " << k << std::endl; - std::cout << "A(k,k): " << A(k,k) << std::endl; - } - - assert(A(k,k) > 0 && bool("Matrix not positive definite in Cholesky factorization.")); - - A(k,k) = std::sqrt(A(k,k)); - - for (vcl_size_t i=k+1; i<A.size1(); ++i) - { - A(i,k) /= A(k,k); - for (vcl_size_t j=k+1; j<=i; ++j) - A(i,j) -= A(i,k) * A(j,k); - } - } -} - - -// -// Compute x in Ax = b, where A is already Cholesky factored (A = L L^T) -// -template<typename MatrixT, typename VectorT> -void cholesky_solve(MatrixT const & L, VectorT & b) -{ - // inplace forward solve L x = b - for (vcl_size_t i=0; i<L.size1(); ++i) - { - for (vcl_size_t j=0; j<i; ++j) - b[i] -= L(i,j) * b[j]; - b[i] /= L(i,i); - } - - // inplace backward solve L^T x = b: - for (vcl_size_t i=L.size1()-1;; --i) - { - for (vcl_size_t k=i+1; k<L.size1(); ++k) - b[i] -= L(k,i) * b[k]; - b[i] /= L(i,i); - - if (i==0) //vcl_size_t might be unsigned, therefore manual check for equality with zero here - break; - } -} - - - -// -// Compute the Cholesky factor L from the sparse vectors y_k -// -template<typename MatrixT, typename VectorT> -void computeL(MatrixT const & A, - MatrixT & L, - MatrixT & L_trans, - std::vector<VectorT> & Y, - std::vector<std::vector<vcl_size_t> > & J) -{ - typedef typename VectorT::value_type NumericType; - typedef std::vector<std::map<unsigned int, NumericType> > STLSparseMatrixType; - - STLSparseMatrixType L_temp(A.size1()); - - for (vcl_size_t k=0; k<A.size1(); ++k) - { - std::vector<vcl_size_t> const & Jk = J[k]; - VectorT const & yk = Y[k]; - - //compute L(k,k): - NumericType Lkk = A(k,k); - for (vcl_size_t i=0; i<Jk.size(); ++i) - Lkk -= A(Jk[i],k) * yk[i]; - - Lkk = NumericType(1) / std::sqrt(Lkk); - L_temp[k][static_cast<unsigned int>(k)] = Lkk; - L_trans(k,k) = Lkk; - - //write lower diagonal entries: - for (vcl_size_t i=0; i<Jk.size(); ++i) - { - L_temp[Jk[i]][static_cast<unsigned int>(k)] = -Lkk * yk[i]; - L_trans(k, Jk[i]) = -Lkk * yk[i]; - } - } //for k - - - //build L from L_temp - for (vcl_size_t i=0; i<L_temp.size(); ++i) - for (typename std::map<unsigned int, NumericType>::const_iterator it = L_temp[i].begin(); - it != L_temp[i].end(); - ++it) - L(i, it->first) = it->second; -} - - -// -// Top level FSPAI function -// -template<typename MatrixT> -void computeFSPAI(MatrixT const & A, - MatrixT const & PatternA, - MatrixT & L, - MatrixT & L_trans, - fspai_tag) -{ - typedef typename MatrixT::value_type NumericT; - typedef boost::numeric::ublas::matrix<NumericT> DenseMatrixType; - typedef std::vector<std::map<unsigned int, NumericT> > SparseMatrixType; - - // - // preprocessing: Store A in a STL container: - // - //std::cout << "Transferring to STL container:" << std::endl; - std::vector<std::vector<NumericT> > y_k(A.size1()); - SparseMatrixType STL_A(A.size1()); - sym_sparse_matrix_to_stl(A, STL_A); - - - // - // Step 1: Generate pattern indices - // - //std::cout << "computeFSPAI(): Generating pattern..." << std::endl; - std::vector<std::vector<vcl_size_t> > J(A.size1()); - generateJ(PatternA, J); - - // - // Step 2: Set up matrix blocks - // - //std::cout << "computeFSPAI(): Setting up matrix blocks..." << std::endl; - std::vector<DenseMatrixType> subblocks_A(A.size1()); - fill_blocks(STL_A, subblocks_A, J, y_k); - STL_A.clear(); //not needed anymore - - // - // Step 3: Cholesky-factor blocks - // - //std::cout << "computeFSPAI(): Cholesky-factorization..." << std::endl; - for (vcl_size_t i=0; i<subblocks_A.size(); ++i) - { - //std::cout << "Block before: " << subblocks_A[i] << std::endl; - cholesky_decompose(subblocks_A[i]); - //std::cout << "Block after: " << subblocks_A[i] << std::endl; - } - - - /*vcl_size_t num_bytes = 0; - for (vcl_size_t i=0; i<subblocks_A.size(); ++i) - num_bytes += 8*subblocks_A[i].size1()*subblocks_A[i].size2();*/ - //std::cout << "Memory for FSPAI matrix: " << num_bytes / (1024.0 * 1024.0) << " MB" << std::endl; - - // - // Step 4: Solve for y_k - // - //std::cout << "computeFSPAI(): Cholesky-solve..." << std::endl; - for (vcl_size_t i=0; i<y_k.size(); ++i) - { - if (subblocks_A[i].size1() > 0) //block might be empty... - { - //y_k[i].resize(subblocks_A[i].size1()); - //std::cout << "y_k[" << i << "]: "; - //for (vcl_size_t j=0; j<y_k[i].size(); ++j) - // std::cout << y_k[i][j] << " "; - //std::cout << std::endl; - cholesky_solve(subblocks_A[i], y_k[i]); - } - } - - - // - // Step 5: Set up Cholesky factors L and L_trans - // - //std::cout << "computeFSPAI(): Computing L..." << std::endl; - L.resize(A.size1(), A.size2(), false); - L.reserve(A.nnz(), false); - L_trans.resize(A.size1(), A.size2(), false); - L_trans.reserve(A.nnz(), false); - computeL(A, L, L_trans, y_k, J); - - //std::cout << "L: " << L << std::endl; -} - - - -} -} -} -} - -#endif
