mahout#241"

apalumbo Fri, 10 Jun 2016 09:52:35 -0700

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/amg/amg_base.hpp
----------------------------------------------------------------------
diff --git 
a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/amg/amg_base.hpp 
b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/amg/amg_base.hpp
deleted file mode 100644
index 8361308..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/amg/amg_base.hpp
+++ /dev/null
@@ -1,208 +0,0 @@
-#ifndef VIENNACL_LINALG_DETAIL_AMG_AMG_BASE_HPP_
-#define VIENNACL_LINALG_DETAIL_AMG_AMG_BASE_HPP_
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   [email protected]
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= 
*/
-
-/** @file amg_base.hpp
-    @brief Helper classes and functions for the AMG preconditioner. 
Experimental.
-
-    AMG code contributed by Markus Wagner
-*/
-
-#include <cmath>
-#include <set>
-#include <list>
-#include <stdexcept>
-#include <algorithm>
-
-#include <map>
-#ifdef VIENNACL_WITH_OPENMP
-#include <omp.h>
-#endif
-
-#include "viennacl/context.hpp"
-
-namespace viennacl
-{
-namespace linalg
-{
-
-/** @brief Enumeration of coarsening methods for algebraic multigrid. */
-enum amg_coarsening_method
-{
-  AMG_COARSENING_METHOD_ONEPASS = 1,
-  AMG_COARSENING_METHOD_AGGREGATION,
-  AMG_COARSENING_METHOD_MIS2_AGGREGATION
-};
-
-/** @brief Enumeration of interpolation methods for algebraic multigrid. */
-enum amg_interpolation_method
-{
-  AMG_INTERPOLATION_METHOD_DIRECT = 1,
-  AMG_INTERPOLATION_METHOD_AGGREGATION,
-  AMG_INTERPOLATION_METHOD_SMOOTHED_AGGREGATION
-};
-
-
-/** @brief A tag for algebraic multigrid (AMG). Used to transport information 
from the user to the implementation.
-*/
-class amg_tag
-{
-public:
-  /** @brief The constructor, setting default values for the various 
parameters.
-    *
-    * Default coarsening routine: Aggreggation based on maximum independent 
sets of distance (MIS-2)
-    * Default interpolation routine: Smoothed aggregation
-    * Default threshold for strong connections: 0.1 (customizations are 
recommeded!)
-    * Default weight for Jacobi smoother: 1.0
-    * Default number of pre-smooth operations: 2
-    * Default number of post-smooth operations: 2
-    * Default number of coarse levels: 0 (this indicates that as many coarse 
levels as needed are constructed until the cutoff is reached)
-    * Default coarse grid size for direct solver (coarsening cutoff): 50
-    */
-  amg_tag()
-  : coarsening_method_(AMG_COARSENING_METHOD_MIS2_AGGREGATION), 
interpolation_method_(AMG_INTERPOLATION_METHOD_AGGREGATION),
-    strong_connection_threshold_(0.1), jacobi_weight_(1.0),
-    presmooth_steps_(2), postsmooth_steps_(2),
-    coarse_levels_(0), coarse_cutoff_(50) {}
-
-  // Getter-/Setter-Functions
-  /** @brief Sets the strategy used for constructing coarse grids  */
-  void set_coarsening_method(amg_coarsening_method s) { coarsening_method_ = 
s; }
-  /** @brief Returns the current coarsening strategy */
-  amg_coarsening_method get_coarsening_method() const { return 
coarsening_method_; }
-
-  /** @brief Sets the interpolation method to the provided method */
-  void set_interpolation_method(amg_interpolation_method interpol) { 
interpolation_method_ = interpol; }
-  /** @brief Returns the current interpolation method */
-  amg_interpolation_method get_interpolation_method() const { return 
interpolation_method_; }
-
-  /** @brief Sets the strong connection threshold. Customizations by the user 
essential for best results!
-    *
-    * With classical interpolation, a connection is considered strong if 
|a_ij| >= threshold * max_k(|a_ik|)
-    * Strength of connection currently ignored for aggregation-based 
coarsening (to be added in the future).
-    */
-  void set_strong_connection_threshold(double threshold) { if (threshold > 0) 
strong_connection_threshold_ = threshold; }
-  /** @brief Returns the strong connection threshold parameter.
-    *
-    * @see set_strong_connection_threshold() for an explanation of the 
threshold parameter
-    */
-  double get_strong_connection_threshold() const { return 
strong_connection_threshold_; }
-
-  /** @brief Sets the weight (damping) for the Jacobi smoother.
-    *
-    * The optimal value depends on the problem at hand. Values of 0.67 or 1.0 
are usually a good starting point for further experiments.
-    */
-  void set_jacobi_weight(double w) { if (w > 0) jacobi_weight_ = w; }
-  /** @brief Returns the Jacobi smoother weight (damping). */
-  double get_jacobi_weight() const { return jacobi_weight_; }
-
-  /** @brief Sets the number of smoother applications on the fine level before 
restriction to the coarser level. */
-  void set_presmooth_steps(vcl_size_t steps) { presmooth_steps_ = steps; }
-  /** @brief Returns the number of smoother applications on the fine level 
before restriction to the coarser level. */
-  vcl_size_t get_presmooth_steps() const { return presmooth_steps_; }
-
-  /** @brief Sets the number of smoother applications on the coarse level 
before interpolation to the finer level. */
-  void set_postsmooth_steps(vcl_size_t steps) { postsmooth_steps_ = steps; }
-  /** @brief Returns the number of smoother applications on the coarse level 
before interpolation to the finer level. */
-  vcl_size_t get_postsmooth_steps() const { return postsmooth_steps_; }
-
-  /** @brief Sets the number of coarse levels. If set to zero, then coarse 
levels are constructed until the cutoff size is reached. */
-  void set_coarse_levels(vcl_size_t levels)  { coarse_levels_ = levels; }
-  /** @brief Returns the number of coarse levels. If zero, then coarse levels 
are constructed until the cutoff size is reached. */
-  vcl_size_t get_coarse_levels() const { return coarse_levels_; }
-
-  /** @brief Sets the coarse grid size for which the recursive multigrid 
scheme is stopped and a direct solver is used. */
-  void set_coarsening_cutoff(vcl_size_t size)  { coarse_cutoff_ = size; }
-  /** @brief Returns the coarse grid size for which the recursive multigrid 
scheme is stopped and a direct solver is used. */
-  vcl_size_t get_coarsening_cutoff() const { return coarse_cutoff_; }
-
-  /** @brief Sets the ViennaCL context for the setup stage. Set this to a host 
context if you want to run the setup on the host.
-    *
-    * Set the ViennaCL context for the solver application via 
set_target_context().
-    * Target and setup context can be different.
-    */
-  void set_setup_context(viennacl::context ctx)  { setup_ctx_ = ctx; }
-  /** @brief Returns the ViennaCL context for the preconditioenr setup. */
-  viennacl::context const & get_setup_context() const { return setup_ctx_; }
-
-  /** @brief Sets the ViennaCL context for the solver cycle stage (i.e. 
preconditioner applications).
-    *
-    * Since the cycle stage easily benefits from accelerators, you usually 
want to set this to a CUDA or OpenCL-enabled context.
-    */
-  void set_target_context(viennacl::context ctx)  { target_ctx_ = ctx; }
-  /** @brief Returns the ViennaCL context for the solver cycle stage (i.e. 
preconditioner applications). */
-  viennacl::context const & get_target_context() const { return target_ctx_; }
-
-private:
-  amg_coarsening_method coarsening_method_;
-  amg_interpolation_method interpolation_method_;
-  double strong_connection_threshold_, jacobi_weight_;
-  vcl_size_t presmooth_steps_, postsmooth_steps_, coarse_levels_, 
coarse_cutoff_;
-  viennacl::context setup_ctx_, target_ctx_;
-};
-
-
-namespace detail
-{
-namespace amg
-{
-
-
-  struct amg_level_context
-  {
-    void resize(vcl_size_t num_points, vcl_size_t max_nnz)
-    {
-      influence_jumper_.resize(num_points + 1, false);
-      influence_ids_.resize(max_nnz, false);
-      influence_values_.resize(num_points, false);
-      point_types_.resize(num_points, false);
-      coarse_id_.resize(num_points, false);
-    }
-
-    void switch_context(viennacl::context ctx)
-    {
-      influence_jumper_.switch_memory_context(ctx);
-      influence_ids_.switch_memory_context(ctx);
-      influence_values_.switch_memory_context(ctx);
-      point_types_.switch_memory_context(ctx);
-      coarse_id_.switch_memory_context(ctx);
-    }
-
-    enum
-    {
-      POINT_TYPE_UNDECIDED = 0,
-      POINT_TYPE_COARSE,
-      POINT_TYPE_FINE
-    } amg_point_types;
-
-    viennacl::vector<unsigned int> influence_jumper_; // similar to row_buffer 
for CSR matrices
-    viennacl::vector<unsigned int> influence_ids_;    // IDs of influencing 
points
-    viennacl::vector<unsigned int> influence_values_; // Influence measure for 
each point
-    viennacl::vector<unsigned int> point_types_;      // 0: undecided, 1: 
coarse point, 2: fine point. Using char here because type for enum might be a 
larger type
-    viennacl::vector<unsigned int> coarse_id_;        // coarse ID used on the 
next level. Only valid for coarse points. Fine points may (ab)use their entry 
for something else.
-    unsigned int num_coarse_;
-  };
-
-
-} //namespace amg
-}
-}
-}
-
-#endif


http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_kernel_calls.hpp
----------------------------------------------------------------------
diff --git 
a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_kernel_calls.hpp
 
b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_kernel_calls.hpp
deleted file mode 100644
index 8308f77..0000000
--- 
a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_kernel_calls.hpp
+++ /dev/null
@@ -1,191 +0,0 @@
-#ifndef VIENNACL_LINALG_DETAIL_BISECT_KERNEL_CALLS_HPP_
-#define VIENNACL_LINALG_DETAIL_BISECT_KERNEL_CALLS_HPP_
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   [email protected]
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= 
*/
-
-/** @file viennacl/linalg/detail/bisect/bisect_kernel_calls.hpp
-    @brief Kernel calls for the bisection algorithm
-
-    Implementation based on the sample provided with the CUDA 6.0 SDK, for 
which
-    the creation of derivative works is allowed by including the following 
statement:
-    "This software contains source code provided by NVIDIA Corporation."
-*/
-
-
-#include "viennacl/forwards.h"
-#include "viennacl/scalar.hpp"
-#include "viennacl/vector.hpp"
-#include "viennacl/vector_proxy.hpp"
-#include "viennacl/tools/tools.hpp"
-#include "viennacl/meta/enable_if.hpp"
-#include "viennacl/meta/predicate.hpp"
-#include "viennacl/meta/result_of.hpp"
-#include "viennacl/traits/size.hpp"
-#include "viennacl/traits/start.hpp"
-#include "viennacl/traits/handle.hpp"
-#include "viennacl/traits/stride.hpp"
-
-#include "viennacl/linalg/detail/bisect/structs.hpp"
-#ifdef VIENNACL_WITH_OPENCL
-   #include "viennacl/linalg/opencl/bisect_kernel_calls.hpp"
-#endif
-
-#ifdef VIENNACL_WITH_CUDA
-  #include "viennacl/linalg/cuda/bisect_kernel_calls.hpp"
-#endif
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace detail
-{
- template<typename NumericT>
- void bisectSmall(const InputData<NumericT> &input, ResultDataSmall<NumericT> 
&result,
-                  const unsigned int mat_size,
-                  const NumericT lg, const NumericT ug,
-                  const NumericT precision)
-  {
-    switch (viennacl::traits::handle(input.g_a).get_active_handle_id())
-    {
-#ifdef VIENNACL_WITH_OPENCL
-      case viennacl::OPENCL_MEMORY:
-        viennacl::linalg::opencl::bisectSmall(input, result,
-                                             mat_size,
-                                             lg,ug,
-                                             precision);
-        break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-      case viennacl::CUDA_MEMORY:
-        viennacl::linalg::cuda::bisectSmall(input, result,
-                                             mat_size,
-                                             lg,ug,
-                                             precision);
-        break;
-#endif
-      case viennacl::MEMORY_NOT_INITIALIZED:
-        throw memory_exception("not initialised!");
-      default:
-        throw memory_exception("not implemented");
-    }
-  }
-
-
-
-
- template<typename NumericT>
- void bisectLarge(const InputData<NumericT> &input, ResultDataLarge<NumericT> 
&result,
-                    const unsigned int mat_size,
-                    const NumericT lg, const NumericT ug,
-                    const NumericT precision)
-  {
-    switch (viennacl::traits::handle(input.g_a).get_active_handle_id())
-    {
-#ifdef VIENNACL_WITH_OPENCL
-      case viennacl::OPENCL_MEMORY:
-        viennacl::linalg::opencl::bisectLarge(input, result,
-                                             mat_size,
-                                             lg,ug,
-                                             precision);
-        break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-      case viennacl::CUDA_MEMORY:
-        viennacl::linalg::cuda::bisectLarge(input, result,
-                                             mat_size,
-                                             lg,ug,
-                                             precision);
-        break;
-#endif
-      case viennacl::MEMORY_NOT_INITIALIZED:
-        throw memory_exception("not initialised!");
-      default:
-        throw memory_exception("not implemented");
-    }
-  }
-
-
-
-
-
- template<typename NumericT>
- void bisectLarge_OneIntervals(const InputData<NumericT> &input, 
ResultDataLarge<NumericT> &result,
-                    const unsigned int mat_size,
-                    const NumericT precision)
-  {
-    switch (viennacl::traits::handle(input.g_a).get_active_handle_id())
-    {
-#ifdef VIENNACL_WITH_OPENCL
-      case viennacl::OPENCL_MEMORY:
-        viennacl::linalg::opencl::bisectLargeOneIntervals(input, result,
-                                             mat_size,
-                                             precision);
-        break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-      case viennacl::CUDA_MEMORY:
-        viennacl::linalg::cuda::bisectLarge_OneIntervals(input, result,
-                                             mat_size,
-                                             precision);
-
-        break;
-#endif
-      case viennacl::MEMORY_NOT_INITIALIZED:
-        throw memory_exception("not initialised!");
-      default:
-        throw memory_exception("not implemented");
-    }
-  }
-
-
-
-
- template<typename NumericT>
- void bisectLarge_MultIntervals(const InputData<NumericT> &input, 
ResultDataLarge<NumericT> &result,
-                    const unsigned int mat_size,
-                    const NumericT precision)
-  {
-    switch (viennacl::traits::handle(input.g_a).get_active_handle_id())
-    {
-#ifdef VIENNACL_WITH_OPENCL
-      case viennacl::OPENCL_MEMORY:
-      viennacl::linalg::opencl::bisectLargeMultIntervals(input, result,
-                                           mat_size,
-                                           precision);
-        break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-      case viennacl::CUDA_MEMORY:
-        viennacl::linalg::cuda::bisectLarge_MultIntervals(input, result,
-                                             mat_size,
-                                             precision);
-        break;
-#endif
-      case viennacl::MEMORY_NOT_INITIALIZED:
-        throw memory_exception("not initialised!");
-      default:
-        throw memory_exception("not implemented");
-    }
-  }
-} // namespace detail
-} // namespace linalg
-} //namespace viennacl
-
-
-#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_large.hpp
----------------------------------------------------------------------
diff --git 
a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_large.hpp 
b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_large.hpp
deleted file mode 100755
index 337858f..0000000
--- 
a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_large.hpp
+++ /dev/null
@@ -1,142 +0,0 @@
-#ifndef VIENNACL_LINALG_DETAIL_BISECT_BISECT_LARGE_HPP_
-#define VIENNACL_LINALG_DETAIL_BISECT_BISECT_LARGE_HPP_
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   [email protected]
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= 
*/
-
-
-/** @file viennacl/linalg/detail//bisect/bisect_large.hpp
-    @brief Computation of eigenvalues of a large symmetric, tridiagonal matrix
-
-    Implementation based on the sample provided with the CUDA 6.0 SDK, for 
which
-    the creation of derivative works is allowed by including the following 
statement:
-    "This software contains source code provided by NVIDIA Corporation."
-*/
-
-
-// includes, system
-#include <iostream>
-#include <iomanip>  
-#include <stdlib.h>
-#include <stdio.h>
-
-// includes, project
-#include "viennacl/linalg/detail/bisect/config.hpp"
-#include "viennacl/linalg/detail/bisect/structs.hpp"
-#include "viennacl/linalg/detail/bisect/bisect_kernel_calls.hpp"
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace detail
-{
-
-////////////////////////////////////////////////////////////////////////////////
-//! Run the kernels to compute the eigenvalues for large matrices
-//! @param  input   handles to input data
-//! @param  result  handles to result data
-//! @param  mat_size  matrix size
-//! @param  precision  desired precision of eigenvalues
-//! @param  lg  lower limit of Gerschgorin interval
-//! @param  ug  upper limit of Gerschgorin interval
-////////////////////////////////////////////////////////////////////////////////
-template<typename NumericT>
-void
-computeEigenvaluesLargeMatrix(InputData<NumericT> &input, 
ResultDataLarge<NumericT> &result,
-                              const unsigned int mat_size,
-                              const NumericT lg, const NumericT ug,  const 
NumericT precision)
-{
-
-
-  // First kernel call: decide on which intervals bisect_Large_OneIntervals/
-  // bisect_Large_MultIntervals is executed
-  viennacl::linalg::detail::bisectLarge(input, result, mat_size, lg, ug, 
precision);
-
-  // compute eigenvalues for intervals that contained only one eigenvalue
-  // after the first processing step
-  viennacl::linalg::detail::bisectLarge_OneIntervals(input, result, mat_size, 
precision);
-
-  // process intervals that contained more than one eigenvalue after
-  // the first processing step
-  viennacl::linalg::detail::bisectLarge_MultIntervals(input, result, mat_size, 
precision);
-
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Process the result, that is obtain result from device and do simple sanity
-//! checking
-//! @param  result  handles to result data
-//! @param  mat_size  matrix size
-////////////////////////////////////////////////////////////////////////////////
-template<typename NumericT>
-bool
-processResultDataLargeMatrix(ResultDataLarge<NumericT> &result,
-                             const unsigned int mat_size)
-{
-    bool bCompareResult = true;
-    // copy data from intervals that contained more than one eigenvalue after
-    // the first processing step
-    std::vector<NumericT> lambda_mult(mat_size);
-    viennacl::copy(result.g_lambda_mult, lambda_mult);
-
-    std::vector<unsigned int> pos_mult(mat_size);
-    viennacl::copy(result.g_pos_mult, pos_mult);
-
-    std::vector<unsigned int> blocks_mult_sum(mat_size);
-    viennacl::copy(result.g_blocks_mult_sum, blocks_mult_sum);
-
-    unsigned int num_one_intervals = result.g_num_one;
-    unsigned int sum_blocks_mult = mat_size - num_one_intervals;
-
-
-    // copy data for intervals that contained one eigenvalue after the first
-    // processing step
-    std::vector<NumericT> left_one(mat_size);
-    std::vector<NumericT> right_one(mat_size);
-    std::vector<unsigned int> pos_one(mat_size);
-
-    viennacl::copy(result.g_left_one, left_one);
-    viennacl::copy(result.g_right_one, right_one);
-    viennacl::copy(result.g_pos_one, pos_one);
-
-
-    // singleton intervals generated in the second step
-    for (unsigned int i = 0; i < sum_blocks_mult; ++i)
-    {
-      if (pos_mult[i] != 0)
-        result.std_eigenvalues[pos_mult[i] - 1] = lambda_mult[i];
-
-      else
-      {
-        throw memory_exception("Invalid array index! Are there more than 256 
equal eigenvalues?");
-      }
-    }
-
-    // singleton intervals generated in the first step
-    unsigned int index = 0;
-
-    for (unsigned int i = 0; i < num_one_intervals; ++i, ++index)
-    {
-        result.std_eigenvalues[pos_one[i] - 1] = left_one[i];
-    }
-    return bCompareResult;
-}
-} // namespace detail
-}  // namespace linalg
-}  // namespace viennacl
-#endif //VIENNACL_LINALG_DETAIL_BISECT_LARGE_HPP_

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_small.hpp
----------------------------------------------------------------------
diff --git 
a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_small.hpp 
b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_small.hpp
deleted file mode 100755
index 144640b..0000000
--- 
a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_small.hpp
+++ /dev/null
@@ -1,96 +0,0 @@
-#ifndef VIENNACL_LINALG_DETAIL_BISECT_SMALL_HPP_
-#define VIENNACL_LINALG_DETAIL_BISECT_SMALL_HPP_
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   [email protected]
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= 
*/
-
-
-/** @file viennacl/linalg/detail//bisect/bisect_small.hpp
-    @brief Computation of eigenvalues of a small symmetric, tridiagonal matrix
-
-    Implementation based on the sample provided with the CUDA 6.0 SDK, for 
which
-    the creation of derivative works is allowed by including the following 
statement:
-    "This software contains source code provided by NVIDIA Corporation."
-*/
-
-// includes, system
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-
-// includes, project
-
-#include "viennacl/linalg/detail/bisect/structs.hpp"
-
-// includes, kernels
-#include "viennacl/linalg/detail/bisect/bisect_kernel_calls.hpp"
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace detail
-{
-////////////////////////////////////////////////////////////////////////////////
-//! Determine eigenvalues for matrices smaller than MAX_SMALL_MATRIX
-//! @param  input  handles to input data of kernel
-//! @param  result handles to result of kernel
-//! @param  mat_size  matrix size
-//! @param  lg  lower limit of Gerschgorin interval
-//! @param  ug  upper limit of Gerschgorin interval
-//! @param  precision  desired precision of eigenvalues
-////////////////////////////////////////////////////////////////////////////////
-template<typename NumericT>
-void
-computeEigenvaluesSmallMatrix(const InputData<NumericT> &input, 
ResultDataSmall<NumericT> &result,
-                              const unsigned int mat_size,
-                              const NumericT lg, const NumericT ug,
-                              const NumericT precision)
-{
-  viennacl::linalg::detail::bisectSmall( input, result, mat_size, lg, ug, 
precision);
-}
-
-
-////////////////////////////////////////////////////////////////////////////////
-//! Process the result obtained on the device, that is transfer to host and
-//! perform basic sanity checking
-//! @param  result  handles to result data
-//! @param  mat_size   matrix size
-////////////////////////////////////////////////////////////////////////////////
-template<typename NumericT>
-void
-processResultSmallMatrix(ResultDataSmall<NumericT> &result,
-                         const unsigned int mat_size)
-{
-  // copy data back to host
-  std::vector<NumericT> left(mat_size);
-  std::vector<unsigned int> left_count(mat_size);
-
-  viennacl::copy(result.vcl_g_left, left);
-  viennacl::copy(result.vcl_g_left_count, left_count);
-
-  for (unsigned int i = 0; i < mat_size; ++i)
-  {
-      result.std_eigenvalues[left_count[i]] = left[i];
-  }
-}
-}  // namespace detail
-}  // namespace linalg
-} // namespace viennacl
-#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/config.hpp
----------------------------------------------------------------------
diff --git 
a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/config.hpp 
b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/config.hpp
deleted file mode 100755
index 3afa509..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/config.hpp
+++ /dev/null
@@ -1,44 +0,0 @@
-#ifndef VIENNACL_LINALG_DETAIL_BISECT_CONFIG_HPP_
-#define VIENNACL_LINALG_DETAIL_BISECT_CONFIG_HPP_
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   [email protected]
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= 
*/
-
-
-
-/** @file viennacl/linalg/detail//bisect/config.hpp
- *     @brief Global configuration parameters
- *
- *         Implementation based on the sample provided with the CUDA 6.0 SDK, 
for which
- *             the creation of derivative works is allowed by including the 
following statement:
- *                 "This software contains source code provided by NVIDIA 
Corporation."
- *                 */
-
-// should be power of two
-#define  VIENNACL_BISECT_MAX_THREADS_BLOCK                256
-
-#ifdef VIENNACL_WITH_OPENCL
-#  define VIENNACL_BISECT_MAX_SMALL_MATRIX                 256
-#  define VIENNACL_BISECT_MAX_THREADS_BLOCK_SMALL_MATRIX   256
-#else                                                          // if CUDA is 
used
-#  define VIENNACL_BISECT_MAX_THREADS_BLOCK_SMALL_MATRIX   512 // change to 
256 if errors occur
-#  define VIENNACL_BISECT_MAX_SMALL_MATRIX                 512 // change to 
256 if errors occur
-#endif
-
- #define  VIENNACL_BISECT_MIN_ABS_INTERVAL                 5.0e-37
-
-#endif 

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/gerschgorin.hpp
----------------------------------------------------------------------
diff --git 
a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/gerschgorin.hpp 
b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/gerschgorin.hpp
deleted file mode 100755
index 53cd863..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/gerschgorin.hpp
+++ /dev/null
@@ -1,94 +0,0 @@
-#ifndef _VIENNACL_LINALG_DETAIL_BISECT_GERSCHORIN_HPP_
-#define _VIENNACL_LINALG_DETAIL_BISECT_GERSCHORIN_HPP_
-
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   [email protected]
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= 
*/
-
-
-/** @file viennacl/linalg/detail//bisect/gerschgorin.hpp
-    @brief  Computation of Gerschgorin interval for symmetric, tridiagonal 
matrix
-
-    Implementation based on the sample provided with the CUDA 6.0 SDK, for 
which
-    the creation of derivative works is allowed by including the following 
statement:
-    "This software contains source code provided by NVIDIA Corporation."
-*/
-
-#include <cstdio>
-#include <cstdlib>
-#include <cmath>
-#include <cfloat>
-
-#include "viennacl/linalg/detail/bisect/util.hpp"
-#include "viennacl/vector.hpp"
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace detail
-{
-  
////////////////////////////////////////////////////////////////////////////////
-  //! Compute Gerschgorin interval for symmetric, tridiagonal matrix
-  //! @param  d  diagonal elements
-  //! @param  s  superdiagonal elements
-  //! @param  n  size of matrix
-  //! @param  lg  lower limit of Gerschgorin interval
-  //! @param  ug  upper limit of Gerschgorin interval
-  
////////////////////////////////////////////////////////////////////////////////
-  template<typename NumericT>
-  void
-  computeGerschgorin(std::vector<NumericT> & d, std::vector<NumericT> & s, 
unsigned int n, NumericT &lg, NumericT &ug)
-  {
-      // compute bounds
-      for (unsigned int i = 1; i < (n - 1); ++i)
-      {
-
-          // sum over the absolute values of all elements of row i
-          NumericT sum_abs_ni = fabsf(s[i]) + fabsf(s[i + 1]);
-
-          lg = min(lg, d[i] - sum_abs_ni);
-          ug = max(ug, d[i] + sum_abs_ni);
-      }
-
-      // first and last row, only one superdiagonal element
-
-      // first row
-      lg = min(lg, d[0] - fabsf(s[1]));
-      ug = max(ug, d[0] + fabsf(s[1]));
-
-      // last row
-      lg = min(lg, d[n-1] - fabsf(s[n-1]));
-      ug = max(ug, d[n-1] + fabsf(s[n-1]));
-
-      // increase interval to avoid side effects of fp arithmetic
-      NumericT bnorm = max(fabsf(ug), fabsf(lg));
-
-      // these values depend on the implmentation of floating count that is
-      // employed in the following
-      NumericT psi_0 = 11 * FLT_EPSILON * bnorm;
-      NumericT psi_n = 11 * FLT_EPSILON * bnorm;
-
-      lg = lg - bnorm * 2 * static_cast<NumericT>(n) * FLT_EPSILON - psi_0;
-      ug = ug + bnorm * 2 * static_cast<NumericT>(n) * FLT_EPSILON + psi_n;
-
-      ug = max(lg, ug);
-  }
-}  // namespace detail
-}  // namespace linalg
-} // namespace viennacl
-#endif  // _VIENNACL_LINALG_DETAIL_GERSCHORIN_H_

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/structs.hpp
----------------------------------------------------------------------
diff --git 
a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/structs.hpp 
b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/structs.hpp
deleted file mode 100755
index 1943da3..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/structs.hpp
+++ /dev/null
@@ -1,182 +0,0 @@
-#ifndef VIENNACL_LINALG_DETAIL_BISECT_STRUCTS_HPP_
-#define VIENNACL_LINALG_DETAIL_BISECT_STRUCTS_HPP_
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   [email protected]
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= 
*/
-
-
-/** @file viennacl/linalg/detail//bisect/structs.hpp
-    @brief  Helper structures to simplify variable handling
-
-    Implementation based on the sample provided with the CUDA 6.0 SDK, for 
which
-    the creation of derivative works is allowed by including the following 
statement:
-    "This software contains source code provided by NVIDIA Corporation."
-*/
-
-
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-#include "viennacl/vector.hpp"
-#include "viennacl/matrix.hpp"
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace detail
-{
-
-/////////////////////////////////////////////////////////////////////////////////
-//! In this class the input matrix is stored
-/////////////////////////////////////////////////////////////////////////////////
-template<typename NumericT>
-struct InputData
-{
-  //! host side representation of diagonal
-  std::vector<NumericT> std_a;
-  //! host side representation superdiagonal
-  std::vector<NumericT> std_b;
-  //! device side representation of diagonal
-  viennacl::vector<NumericT> g_a;
-  //!device side representation of superdiagonal
-  viennacl::vector<NumericT> g_b;
-
-  /** @brief Initialize the input data to the algorithm
-   *
-   * @param diagonal        vector with the diagonal elements
-   * @param superdiagonal   vector with the superdiagonal elements
-   * @param sz              size of the matrix
-   */
-  InputData(std::vector<NumericT> diagonal, std::vector<NumericT> 
superdiagonal, const unsigned int sz) :
-              std_a(sz), std_b(sz), g_a(sz), g_b(sz)
-  {
-   std_a = diagonal;
-   std_b = superdiagonal;
-
-   viennacl::copy(std_b, g_b);
-   viennacl::copy(std_a, g_a);
-  }
-
-  InputData(viennacl::vector<NumericT> diagonal, viennacl::vector<NumericT> 
superdiagonal, const unsigned int sz) :
-              std_a(sz), std_b(sz), g_a(sz), g_b(sz)
-  {
-   g_a = diagonal;
-   g_b = superdiagonal;
-
-   viennacl::copy(g_a, std_a);
-   viennacl::copy(g_b, std_b);
-  }
-};
-
-
-/////////////////////////////////////////////////////////////////////////////////
-//! In this class the data of the result for small matrices is stored
-/////////////////////////////////////////////////////////////////////////////////
-template<typename NumericT>
-struct ResultDataSmall
-{
-  //! eigenvalues (host side)
-  std::vector<NumericT> std_eigenvalues;
-  //! left interval limits at the end of the computation
-  viennacl::vector<NumericT> vcl_g_left;
-  //! right interval limits at the end of the computation
-  viennacl::vector<NumericT> vcl_g_right;
-  //! number of eigenvalues smaller than the left interval limit
-  viennacl::vector<unsigned int> vcl_g_left_count;
-  //! number of eigenvalues bigger than the right interval limit
-  viennacl::vector<unsigned int> vcl_g_right_count;
-
-
-  
////////////////////////////////////////////////////////////////////////////////
-  //! Initialize variables and memory for the result for small matrices
-  
////////////////////////////////////////////////////////////////////////////////
-  ResultDataSmall(const unsigned int mat_size) :
-    std_eigenvalues(mat_size), vcl_g_left(mat_size), vcl_g_right(mat_size), 
vcl_g_left_count(mat_size), vcl_g_right_count(mat_size) {}
-};
-
-
-
-
-
-/////////////////////////////////////////////////////////////////////////////////
-//! In this class the data of the result for large matrices is stored
-/////////////////////////////////////////////////////////////////////////////////
-template<typename NumericT>
-struct ResultDataLarge
-{
-//! eigenvalues
-  std::vector<NumericT> std_eigenvalues;
-
-  //! number of intervals containing one eigenvalue after the first step
-  viennacl::scalar<unsigned int> g_num_one;
-
-  //! number of (thread) blocks of intervals containing multiple eigenvalues 
after the first steo
-  viennacl::scalar<unsigned int> g_num_blocks_mult;
-
-  //! left interval limits of intervals containing one eigenvalue after the 
first iteration step
-  viennacl::vector<NumericT> g_left_one;
-
-  //! right interval limits of intervals containing one eigenvalue after the 
first iteration step
-  viennacl::vector<NumericT> g_right_one;
-
-  //! interval indices (position in sorted listed of eigenvalues) of intervals 
containing one eigenvalue after the first iteration step
-  viennacl::vector<unsigned int> g_pos_one;
-
-  //! left interval limits of intervals containing multiple eigenvalues after 
the first iteration step
-  viennacl::vector<NumericT> g_left_mult;
-  //! right interval limits of intervals containing multiple eigenvalues after 
the first iteration step
-  viennacl::vector<NumericT> g_right_mult;
-
-  //! number of eigenvalues less than the left limit of the eigenvalue 
intervals containing multiple eigenvalues
-  viennacl::vector<unsigned int> g_left_count_mult;
-
-  //! number of eigenvalues less than the right limit of the eigenvalue 
intervals containing multiple eigenvalues
-  viennacl::vector<unsigned int> g_right_count_mult;
-  //! start addresses in g_left_mult etc. of blocks of intervals containing 
more than one eigenvalue after the first step
-  viennacl::vector<unsigned int> g_blocks_mult;
-
-  //! accumulated number of intervals in g_left_mult etc. of blocks of 
intervals containing more than one eigenvalue after the first step
-  viennacl::vector<unsigned int> g_blocks_mult_sum;
-
-  //! eigenvalues that have been generated in the second step from intervals 
that still contained multiple eigenvalues after the first step
-  viennacl::vector<NumericT> g_lambda_mult;
-
-  //! eigenvalue index of intervals that have been generated in the second 
processing step
-  viennacl::vector<unsigned int> g_pos_mult;
-
-  /** @brief Initialize variables and memory for result
-   *
-   * @param  mat_size  size of the matrix
-   */
-  ResultDataLarge(unsigned int mat_size) :
-    std_eigenvalues(mat_size), g_num_one(0), g_num_blocks_mult(0),
-    g_left_one(mat_size), g_right_one(mat_size), g_pos_one(mat_size),
-    g_left_mult(mat_size), g_right_mult(mat_size), 
g_left_count_mult(mat_size), g_right_count_mult(mat_size),
-    g_blocks_mult(mat_size), g_blocks_mult_sum(mat_size), 
g_lambda_mult(mat_size), g_pos_mult(mat_size) {}
-
-};
-} // namespace detail
-} // namespace linalg
-} // namespace viennacl
-#endif // #ifndef VIENNACL_LINALG_DETAIL_STRUCTS_HPP_
-

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/util.hpp
----------------------------------------------------------------------
diff --git 
a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/util.hpp 
b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/util.hpp
deleted file mode 100755
index 883d202..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/util.hpp
+++ /dev/null
@@ -1,106 +0,0 @@
-#ifndef VIENNACL_LINALG_DETAIL_BISECT_UTIL_HPP_
-#define VIENNACL_LINALG_DETAIL_BISECT_UTIL_HPP_
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   [email protected]
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= 
*/
-
-
-/** @file viennacl/linalg/detail//bisect/util.hpp
-    @brief Utility functions
-
-    Implementation based on the sample provided with the CUDA 6.0 SDK, for 
which
-    the creation of derivative works is allowed by including the following 
statement:
-    "This software contains source code provided by NVIDIA Corporation."
-*/
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace detail
-{
-
-////////////////////////////////////////////////////////////////////////////////
-//! Minimum
-////////////////////////////////////////////////////////////////////////////////
-template<class T>
-#ifdef __CUDACC__
-__host__  __device__
-#endif
-T
-min(const T &lhs, const T &rhs)
-{
-
-    return (lhs < rhs) ? lhs : rhs;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Maximum
-////////////////////////////////////////////////////////////////////////////////
-template<class T>
-#ifdef __CUDACC__
-__host__  __device__
-#endif
-T
-max(const T &lhs, const T &rhs)
-{
-
-    return (lhs < rhs) ? rhs : lhs;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Sign of number (float)
-////////////////////////////////////////////////////////////////////////////////
-#ifdef __CUDACC__
-__host__  __device__
-#endif
-inline float
-sign_f(const float &val)
-{
-    return (val < 0.0f) ? -1.0f : 1.0f;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Sign of number (double)
-////////////////////////////////////////////////////////////////////////////////
-#ifdef __CUDACC__
-__host__  __device__
-#endif
-inline double
-sign_d(const double &val)
-{
-    return (val < 0.0) ? -1.0 : 1.0;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-//! Get the number of blocks that are required to process \a num_threads with
-//! \a num_threads_blocks threads per block
-///////////////////////////////////////////////////////////////////////////////
-extern "C"
-inline
-unsigned int
-getNumBlocksLinear(const unsigned int num_threads,
-                   const unsigned int num_threads_block)
-{
-    const unsigned int block_rem =
-        ((num_threads % num_threads_block) != 0) ? 1 : 0;
-    return (num_threads / num_threads_block) + block_rem;
-}
-} // namespace detail
-} // namespace linalg
-} // namespace viennacl
-#endif // #ifndef VIENNACL_LINALG_DETAIL_UTIL_HPP_

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/block_ilu.hpp
----------------------------------------------------------------------
diff --git 
a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/block_ilu.hpp 
b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/block_ilu.hpp
deleted file mode 100644
index 1540e2d..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/block_ilu.hpp
+++ /dev/null
@@ -1,617 +0,0 @@
-#ifndef VIENNACL_LINALG_DETAIL_BLOCK_ILU_HPP_
-#define VIENNACL_LINALG_DETAIL_BLOCK_ILU_HPP_
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   [email protected]
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= 
*/
-
-/** @file viennacl/linalg/detail/ilu/block_ilu.hpp
-    @brief Implementations of incomplete block factorization preconditioners
-*/
-
-#include <vector>
-#include <cmath>
-#include "viennacl/forwards.h"
-#include "viennacl/tools/tools.hpp"
-#include "viennacl/linalg/detail/ilu/common.hpp"
-#include "viennacl/linalg/detail/ilu/ilu0.hpp"
-#include "viennacl/linalg/detail/ilu/ilut.hpp"
-
-#include <map>
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace detail
-{
-  /** @brief Helper range class for representing a subvector of a larger 
buffer. */
-  template<typename VectorT, typename NumericT, typename SizeT = vcl_size_t>
-  class ilu_vector_range
-  {
-  public:
-    ilu_vector_range(VectorT & v,
-                     SizeT start_index,
-                     SizeT vec_size
-                    ) : vec_(v), start_(start_index), size_(vec_size) {}
-
-    NumericT & operator()(SizeT index)
-    {
-      assert(index < size_ && bool("Index out of bounds!"));
-      return vec_[start_ + index];
-    }
-
-    NumericT & operator[](SizeT index)
-    {
-      assert(index < size_ && bool("Index out of bounds!"));
-      return vec_[start_ + index];
-    }
-
-    SizeT size() const { return size_; }
-
-  private:
-    VectorT & vec_;
-    SizeT start_;
-    SizeT size_;
-  };
-
-  /** @brief Extracts a diagonal block from a larger system matrix
-    *
-    * @param A                   The full matrix
-    * @param diagonal_block_A    The output matrix, to which the extracted 
block is written to
-    * @param start_index         First row- and column-index of the block
-    * @param stop_index          First row- and column-index beyond the block
-    */
-  template<typename NumericT>
-  void extract_block_matrix(viennacl::compressed_matrix<NumericT> const & A,
-                            viennacl::compressed_matrix<NumericT>       & 
diagonal_block_A,
-                            vcl_size_t start_index,
-                            vcl_size_t stop_index
-                            )
-  {
-    assert( (A.handle1().get_active_handle_id() == viennacl::MAIN_MEMORY) && 
bool("System matrix must reside in main memory for ILU0") );
-    assert( (A.handle2().get_active_handle_id() == viennacl::MAIN_MEMORY) && 
bool("System matrix must reside in main memory for ILU0") );
-    assert( (A.handle().get_active_handle_id() == viennacl::MAIN_MEMORY) && 
bool("System matrix must reside in main memory for ILU0") );
-
-    NumericT     const * A_elements   = 
viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(A.handle());
-    unsigned int const * A_row_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(A.handle1());
-    unsigned int const * A_col_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(A.handle2());
-
-    NumericT     * output_elements   = 
viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(diagonal_block_A.handle());
-    unsigned int * output_row_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(diagonal_block_A.handle1());
-    unsigned int * output_col_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(diagonal_block_A.handle2());
-
-    vcl_size_t output_counter = 0;
-    for (vcl_size_t row = start_index; row < stop_index; ++row)
-    {
-      unsigned int buffer_col_start = A_row_buffer[row];
-      unsigned int buffer_col_end   = A_row_buffer[row+1];
-
-      output_row_buffer[row - start_index] = static_cast<unsigned 
int>(output_counter);
-
-      for (unsigned int buf_index = buffer_col_start; buf_index < 
buffer_col_end; ++buf_index)
-      {
-        unsigned int col = A_col_buffer[buf_index];
-        if (col < start_index)
-          continue;
-
-        if (col >= static_cast<unsigned int>(stop_index))
-          continue;
-
-        output_col_buffer[output_counter] = static_cast<unsigned int>(col - 
start_index);
-        output_elements[output_counter] = A_elements[buf_index];
-        ++output_counter;
-      }
-      output_row_buffer[row - start_index + 1] = static_cast<unsigned 
int>(output_counter);
-    }
-  }
-
-} // namespace detail
-
-
-
-/** @brief A block ILU preconditioner class, can be supplied to 
solve()-routines
- *
- * @tparam MatrixType   Type of the system matrix
- * @tparam ILUTag       Type of the tag identifiying the ILU preconditioner to 
be used on each block.
-*/
-template<typename MatrixT, typename ILUTag>
-class block_ilu_precond
-{
-typedef typename MatrixT::value_type      ScalarType;
-
-public:
-  typedef std::vector<std::pair<vcl_size_t, vcl_size_t> >    
index_vector_type;   //the pair refers to index range [a, b) of each block
-
-
-  block_ilu_precond(MatrixT const & mat,
-                    ILUTag const & tag,
-                    vcl_size_t num_blocks = 8
-                   ) : tag_(tag), L_blocks(num_blocks), U_blocks(num_blocks)
-  {
-    // Set up vector of block indices:
-    block_indices_.resize(num_blocks);
-    for (vcl_size_t i=0; i<num_blocks; ++i)
-    {
-      vcl_size_t start_index = (   i  * mat.size1()) / num_blocks;
-      vcl_size_t stop_index  = ((i+1) * mat.size1()) / num_blocks;
-
-      block_indices_[i] = std::pair<vcl_size_t, vcl_size_t>(start_index, 
stop_index);
-    }
-
-    //initialize preconditioner:
-    //std::cout << "Start CPU precond" << std::endl;
-    init(mat);
-    //std::cout << "End CPU precond" << std::endl;
-  }
-
-  block_ilu_precond(MatrixT const & mat,
-                    ILUTag const & tag,
-                    index_vector_type const & block_boundaries
-                   ) : tag_(tag), block_indices_(block_boundaries), 
L_blocks(block_boundaries.size()), U_blocks(block_boundaries.size())
-  {
-    //initialize preconditioner:
-    //std::cout << "Start CPU precond" << std::endl;
-    init(mat);
-    //std::cout << "End CPU precond" << std::endl;
-  }
-
-
-  template<typename VectorT>
-  void apply(VectorT & vec) const
-  {
-    for (vcl_size_t i=0; i<block_indices_.size(); ++i)
-      apply_dispatch(vec, i, ILUTag());
-  }
-
-private:
-  void init(MatrixT const & A)
-  {
-    viennacl::context host_context(viennacl::MAIN_MEMORY);
-    viennacl::compressed_matrix<ScalarType> mat(host_context);
-
-    viennacl::copy(A, mat);
-
-    unsigned int const * row_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(mat.handle1());
-
-#ifdef VIENNACL_WITH_OPENMP
-    #pragma omp parallel for
-#endif
-    for (long i2=0; i2<static_cast<long>(block_indices_.size()); ++i2)
-    {
-      vcl_size_t i = static_cast<vcl_size_t>(i2);
-      // Step 1: Extract blocks
-      vcl_size_t block_size = block_indices_[i].second - 
block_indices_[i].first;
-      vcl_size_t block_nnz  = row_buffer[block_indices_[i].second] - 
row_buffer[block_indices_[i].first];
-      viennacl::compressed_matrix<ScalarType> mat_block(block_size, 
block_size, block_nnz, host_context);
-
-      detail::extract_block_matrix(mat, mat_block, block_indices_[i].first, 
block_indices_[i].second);
-
-      // Step 2: Precondition blocks:
-      viennacl::switch_memory_context(L_blocks[i], host_context);
-      viennacl::switch_memory_context(U_blocks[i], host_context);
-      init_dispatch(mat_block, L_blocks[i], U_blocks[i], tag_);
-    }
-
-  }
-
-  void init_dispatch(viennacl::compressed_matrix<ScalarType> const & mat_block,
-                     viennacl::compressed_matrix<ScalarType> & L,
-                     viennacl::compressed_matrix<ScalarType> & U,
-                     viennacl::linalg::ilu0_tag)
-  {
-    (void)U;
-    L = mat_block;
-    viennacl::linalg::precondition(L, tag_);
-  }
-
-  void init_dispatch(viennacl::compressed_matrix<ScalarType> const & mat_block,
-                     viennacl::compressed_matrix<ScalarType> & L,
-                     viennacl::compressed_matrix<ScalarType> & U,
-                     viennacl::linalg::ilut_tag)
-  {
-    L.resize(mat_block.size1(), mat_block.size2());
-    U.resize(mat_block.size1(), mat_block.size2());
-    viennacl::linalg::precondition(mat_block, L, U, tag_);
-  }
-
-  template<typename VectorT>
-  void apply_dispatch(VectorT & vec, vcl_size_t i, viennacl::linalg::ilu0_tag) 
const
-  {
-    detail::ilu_vector_range<VectorT, ScalarType> vec_range(vec, 
block_indices_[i].first, L_blocks[i].size2());
-
-    unsigned int const * row_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(L_blocks[i].handle1());
-    unsigned int const * col_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(L_blocks[i].handle2());
-    ScalarType   const * elements   = 
viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(L_blocks[i].handle());
-
-    
viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, 
col_buffer, elements, vec_range, L_blocks[i].size2(), unit_lower_tag());
-    
viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, 
col_buffer, elements, vec_range, L_blocks[i].size2(), upper_tag());
-  }
-
-  template<typename VectorT>
-  void apply_dispatch(VectorT & vec, vcl_size_t i, viennacl::linalg::ilut_tag) 
const
-  {
-    detail::ilu_vector_range<VectorT, ScalarType> vec_range(vec, 
block_indices_[i].first, L_blocks[i].size2());
-
-    {
-      unsigned int const * row_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(L_blocks[i].handle1());
-      unsigned int const * col_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(L_blocks[i].handle2());
-      ScalarType   const * elements   = 
viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(L_blocks[i].handle());
-
-      
viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, 
col_buffer, elements, vec_range, L_blocks[i].size2(), unit_lower_tag());
-    }
-
-    {
-      unsigned int const * row_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(U_blocks[i].handle1());
-      unsigned int const * col_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(U_blocks[i].handle2());
-      ScalarType   const * elements   = 
viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(U_blocks[i].handle());
-
-      
viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, 
col_buffer, elements, vec_range, U_blocks[i].size2(), upper_tag());
-    }
-  }
-
-  ILUTag tag_;
-  index_vector_type block_indices_;
-  std::vector< viennacl::compressed_matrix<ScalarType> > L_blocks;
-  std::vector< viennacl::compressed_matrix<ScalarType> > U_blocks;
-};
-
-
-
-
-
-/** @brief ILUT preconditioner class, can be supplied to solve()-routines.
-*
-*  Specialization for compressed_matrix
-*/
-template<typename NumericT, unsigned int AlignmentV, typename ILUTagT>
-class block_ilu_precond< compressed_matrix<NumericT, AlignmentV>, ILUTagT>
-{
-  typedef compressed_matrix<NumericT, AlignmentV>        MatrixType;
-
-public:
-  typedef std::vector<std::pair<vcl_size_t, vcl_size_t> >    
index_vector_type;   //the pair refers to index range [a, b) of each block
-
-
-  block_ilu_precond(MatrixType const & mat,
-                    ILUTagT const & tag,
-                    vcl_size_t num_blocks = 8
-                   ) : tag_(tag),
-                       block_indices_(num_blocks),
-                       gpu_block_indices_(),
-                       gpu_L_trans_(0, 0, 
viennacl::context(viennacl::MAIN_MEMORY)),
-                       gpu_U_trans_(0, 0, 
viennacl::context(viennacl::MAIN_MEMORY)),
-                       gpu_D_(mat.size1(), 
viennacl::context(viennacl::MAIN_MEMORY)),
-                       L_blocks_(num_blocks),
-                       U_blocks_(num_blocks)
-  {
-    // Set up vector of block indices:
-    block_indices_.resize(num_blocks);
-    for (vcl_size_t i=0; i<num_blocks; ++i)
-    {
-      vcl_size_t start_index = (   i  * mat.size1()) / num_blocks;
-      vcl_size_t stop_index  = ((i+1) * mat.size1()) / num_blocks;
-
-      block_indices_[i] = std::pair<vcl_size_t, vcl_size_t>(start_index, 
stop_index);
-    }
-
-    //initialize preconditioner:
-    //std::cout << "Start CPU precond" << std::endl;
-    init(mat);
-    //std::cout << "End CPU precond" << std::endl;
-  }
-
-  block_ilu_precond(MatrixType const & mat,
-                    ILUTagT const & tag,
-                    index_vector_type const & block_boundaries
-                   ) : tag_(tag),
-                       block_indices_(block_boundaries),
-                       gpu_block_indices_(),
-                       gpu_L_trans_(0, 0, 
viennacl::context(viennacl::MAIN_MEMORY)),
-                       gpu_U_trans_(0, 0, 
viennacl::context(viennacl::MAIN_MEMORY)),
-                       gpu_D_(mat.size1(), 
viennacl::context(viennacl::MAIN_MEMORY)),
-                       L_blocks_(block_boundaries.size()),
-                       U_blocks_(block_boundaries.size())
-  {
-    //initialize preconditioner:
-    //std::cout << "Start CPU precond" << std::endl;
-    init(mat);
-    //std::cout << "End CPU precond" << std::endl;
-  }
-
-
-  void apply(vector<NumericT> & vec) const
-  {
-    viennacl::linalg::detail::block_inplace_solve(trans(gpu_L_trans_), 
gpu_block_indices_, block_indices_.size(), gpu_D_,
-                                                  vec,
-                                                  
viennacl::linalg::unit_lower_tag());
-
-    viennacl::linalg::detail::block_inplace_solve(trans(gpu_U_trans_), 
gpu_block_indices_, block_indices_.size(), gpu_D_,
-                                                  vec,
-                                                  
viennacl::linalg::upper_tag());
-
-    //apply_cpu(vec);
-  }
-
-
-private:
-
-  void init(MatrixType const & A)
-  {
-    viennacl::context host_context(viennacl::MAIN_MEMORY);
-    viennacl::compressed_matrix<NumericT> mat(host_context);
-
-    mat = A;
-
-    unsigned int const * row_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(mat.handle1());
-
-#ifdef VIENNACL_WITH_OPENMP
-    #pragma omp parallel for
-#endif
-    for (long i=0; i<static_cast<long>(block_indices_.size()); ++i)
-    {
-      // Step 1: Extract blocks
-      vcl_size_t block_size = 
block_indices_[static_cast<vcl_size_t>(i)].second - 
block_indices_[static_cast<vcl_size_t>(i)].first;
-      vcl_size_t block_nnz  = 
row_buffer[block_indices_[static_cast<vcl_size_t>(i)].second] - 
row_buffer[block_indices_[static_cast<vcl_size_t>(i)].first];
-      viennacl::compressed_matrix<NumericT> mat_block(block_size, block_size, 
block_nnz, host_context);
-
-      detail::extract_block_matrix(mat, mat_block, 
block_indices_[static_cast<vcl_size_t>(i)].first, 
block_indices_[static_cast<vcl_size_t>(i)].second);
-
-      // Step 2: Precondition blocks:
-      viennacl::switch_memory_context(L_blocks_[static_cast<vcl_size_t>(i)], 
host_context);
-      viennacl::switch_memory_context(U_blocks_[static_cast<vcl_size_t>(i)], 
host_context);
-      init_dispatch(mat_block, L_blocks_[static_cast<vcl_size_t>(i)], 
U_blocks_[static_cast<vcl_size_t>(i)], tag_);
-    }
-
-    /*
-     * copy resulting preconditioner back to GPU:
-     */
-    viennacl::backend::typesafe_host_array<unsigned int> 
block_indices_uint(gpu_block_indices_, 2 * block_indices_.size());
-    for (vcl_size_t i=0; i<block_indices_.size(); ++i)
-    {
-      block_indices_uint.set(2*i,     block_indices_[i].first);
-      block_indices_uint.set(2*i + 1, block_indices_[i].second);
-    }
-
-    viennacl::backend::memory_create(gpu_block_indices_, 
block_indices_uint.raw_size(), viennacl::traits::context(A), 
block_indices_uint.get());
-
-    blocks_to_device(A);
-
-  }
-
-  // Copy computed preconditioned blocks to OpenCL device
-  void blocks_to_device(MatrixType const & A)
-  {
-    gpu_L_trans_.resize(A.size1(), A.size2());
-    gpu_U_trans_.resize(A.size1(), A.size2());
-    gpu_D_.resize(A.size1());
-
-    unsigned int * L_trans_row_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(gpu_L_trans_.handle1());
-    unsigned int * U_trans_row_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(gpu_U_trans_.handle1());
-
-    //
-    // Count elements per row
-    //
-#ifdef VIENNACL_WITH_OPENMP
-    #pragma omp parallel for
-#endif
-    for (long block_index2 = 0; block_index2 < 
static_cast<long>(L_blocks_.size()); ++block_index2)
-    {
-      vcl_size_t block_index = vcl_size_t(block_index2);
-
-      unsigned int block_start = static_cast<unsigned 
int>(block_indices_[block_index].first);
-      unsigned int block_stop  = static_cast<unsigned 
int>(block_indices_[block_index].second);
-
-      unsigned int const * L_row_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(L_blocks_[block_index].handle1());
-      unsigned int const * L_col_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(L_blocks_[block_index].handle2());
-
-      // zero row array of L:
-      std::fill(L_trans_row_buffer + block_start,
-                L_trans_row_buffer + block_stop,
-                static_cast<unsigned int>(0));
-
-      // count number of elements per row:
-      for (vcl_size_t row = 0; row < L_blocks_[block_index].size1(); ++row)
-      {
-        unsigned int col_start = L_row_buffer[row];
-        unsigned int col_end   = L_row_buffer[row+1];
-
-        for (unsigned int j = col_start; j < col_end; ++j)
-        {
-          unsigned int col = L_col_buffer[j];
-          if (col < static_cast<unsigned int>(row))
-            L_trans_row_buffer[col + block_start] += 1;
-        }
-      }
-
-      ////// same for U
-
-      unsigned int const * U_row_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(U_blocks_[block_index].handle1());
-      unsigned int const * U_col_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(U_blocks_[block_index].handle2());
-
-      // zero row array of U:
-      std::fill(U_trans_row_buffer + block_start,
-                U_trans_row_buffer + block_stop,
-                static_cast<unsigned int>(0));
-
-      // count number of elements per row:
-      for (vcl_size_t row = 0; row < U_blocks_[block_index].size1(); ++row)
-      {
-        unsigned int col_start = U_row_buffer[row];
-        unsigned int col_end   = U_row_buffer[row+1];
-
-        for (unsigned int j = col_start; j < col_end; ++j)
-        {
-          unsigned int col = U_col_buffer[j];
-          if (col > row)
-            U_trans_row_buffer[col + block_start] += 1;
-        }
-      }
-    }
-
-
-    //
-    // Exclusive scan on row buffer (feel free to add parallelization here)
-    //
-    unsigned int current_value = 0;
-    for (vcl_size_t i=0; i<gpu_L_trans_.size1(); ++i)
-    {
-      unsigned int tmp = L_trans_row_buffer[i];
-      L_trans_row_buffer[i] = current_value;
-      current_value += tmp;
-    }
-    gpu_L_trans_.reserve(current_value);
-
-    current_value = 0;
-    for (vcl_size_t i=0; i<gpu_U_trans_.size1(); ++i)
-    {
-      unsigned int tmp = U_trans_row_buffer[i];
-      U_trans_row_buffer[i] = current_value;
-      current_value += tmp;
-    }
-    gpu_U_trans_.reserve(current_value);
-
-
-    //
-    // Fill with data
-    //
-    unsigned int       * L_trans_col_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(gpu_L_trans_.handle2());
-    NumericT           * L_trans_elements   = 
viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(gpu_L_trans_.handle());
-
-    unsigned int       * U_trans_col_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(gpu_U_trans_.handle2());
-    NumericT           * U_trans_elements   = 
viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(gpu_U_trans_.handle());
-
-    NumericT           * D_elements         = 
viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(gpu_D_.handle());
-
-    std::vector<unsigned int> offset_L(gpu_L_trans_.size1());
-    std::vector<unsigned int> offset_U(gpu_U_trans_.size1());
-
-#ifdef VIENNACL_WITH_OPENMP
-    #pragma omp parallel for
-#endif
-    for (long block_index2 = 0; block_index2 < 
static_cast<long>(L_blocks_.size()); ++block_index2)
-    {
-      vcl_size_t   block_index = vcl_size_t(block_index2);
-      unsigned int block_start = static_cast<unsigned 
int>(block_indices_[block_index].first);
-
-      unsigned int const * L_row_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(L_blocks_[block_index].handle1());
-      unsigned int const * L_col_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(L_blocks_[block_index].handle2());
-      NumericT     const * L_elements   = 
viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT    
>(L_blocks_[block_index].handle());
-
-
-      // write L_trans:
-      for (vcl_size_t row = 0; row < L_blocks_[block_index].size1(); ++row)
-      {
-        unsigned int col_start = L_row_buffer[row];
-        unsigned int col_end   = L_row_buffer[row+1];
-
-        for (unsigned int j = col_start; j < col_end; ++j)
-        {
-          unsigned int col = L_col_buffer[j];
-          if (col < row)
-          {
-            unsigned int row_trans = col + block_start;
-            unsigned int k = L_trans_row_buffer[row_trans] + 
offset_L[row_trans];
-            offset_L[row_trans] += 1;
-
-            L_trans_col_buffer[k] = static_cast<unsigned int>(row) + 
block_start;
-            L_trans_elements[k]   = L_elements[j];
-          }
-        }
-      }
-
-      unsigned int const * U_row_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(U_blocks_[block_index].handle1());
-      unsigned int const * U_col_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(U_blocks_[block_index].handle2());
-      NumericT     const * U_elements   = 
viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT    
>(U_blocks_[block_index].handle());
-
-      // write U_trans and D:
-      for (vcl_size_t row = 0; row < U_blocks_[block_index].size1(); ++row)
-      {
-        unsigned int col_start = U_row_buffer[row];
-        unsigned int col_end   = U_row_buffer[row+1];
-
-        for (unsigned int j = col_start; j < col_end; ++j)
-        {
-          unsigned int row_trans = U_col_buffer[j] + block_start;
-          unsigned int k = U_trans_row_buffer[row_trans] + offset_U[row_trans];
-
-          if (row_trans == row + block_start) // entry for D
-          {
-            D_elements[row_trans] = U_elements[j];
-          }
-          else if (row_trans > row + block_start) //entry for U
-          {
-            offset_U[row_trans] += 1;
-
-            U_trans_col_buffer[k] = static_cast<unsigned int>(row) + 
block_start;
-            U_trans_elements[k]   = U_elements[j];
-          }
-        }
-      }
-
-    }
-
-    //
-    // Send to destination device:
-    //
-    viennacl::switch_memory_context(gpu_L_trans_, 
viennacl::traits::context(A));
-    viennacl::switch_memory_context(gpu_U_trans_, 
viennacl::traits::context(A));
-    viennacl::switch_memory_context(gpu_D_,       
viennacl::traits::context(A));
-  }
-
-  void init_dispatch(viennacl::compressed_matrix<NumericT> const & mat_block,
-                     viennacl::compressed_matrix<NumericT> & L,
-                     viennacl::compressed_matrix<NumericT> & U,
-                     viennacl::linalg::ilu0_tag)
-  {
-    L = mat_block;
-    viennacl::linalg::precondition(L, tag_);
-    U = L; // fairly poor workaround...
-  }
-
-  void init_dispatch(viennacl::compressed_matrix<NumericT> const & mat_block,
-                     viennacl::compressed_matrix<NumericT> & L,
-                     viennacl::compressed_matrix<NumericT> & U,
-                     viennacl::linalg::ilut_tag)
-  {
-    L.resize(mat_block.size1(), mat_block.size2());
-    U.resize(mat_block.size1(), mat_block.size2());
-    viennacl::linalg::precondition(mat_block, L, U, tag_);
-  }
-
-
-  ILUTagT                               tag_;
-  index_vector_type                     block_indices_;
-  viennacl::backend::mem_handle         gpu_block_indices_;
-  viennacl::compressed_matrix<NumericT> gpu_L_trans_;
-  viennacl::compressed_matrix<NumericT> gpu_U_trans_;
-  viennacl::vector<NumericT>            gpu_D_;
-
-  std::vector<MatrixType> L_blocks_;
-  std::vector<MatrixType> U_blocks_;
-};
-
-
-}
-}
-
-
-
-
-#endif
-
-
-

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/chow_patel_ilu.hpp
----------------------------------------------------------------------
diff --git 
a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/chow_patel_ilu.hpp 
b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/chow_patel_ilu.hpp
deleted file mode 100644
index 7628cdb..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/chow_patel_ilu.hpp
+++ /dev/null
@@ -1,316 +0,0 @@
-#ifndef VIENNACL_LINALG_DETAIL_CHOW_PATEL_ILU_HPP_
-#define VIENNACL_LINALG_DETAIL_CHOW_PATEL_ILU_HPP_
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   [email protected]
-
-   (A list of authors and contributors can be found in the PDF manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= 
*/
-
-/** @file viennacl/linalg/detail/ilu/chow_patel_ilu.hpp
-  @brief Implementations of incomplete factorization preconditioners with 
fine-grained parallelism.
-
-  Based on "Fine-Grained Parallel Incomplete LU Factorization" by Chow and 
Patel, SIAM J. Sci. Comput., vol. 37, no. 2, pp. C169-C193
-*/
-
-#include <vector>
-#include <cmath>
-#include <iostream>
-#include "viennacl/forwards.h"
-#include "viennacl/tools/tools.hpp"
-#include "viennacl/linalg/detail/ilu/common.hpp"
-#include "viennacl/linalg/ilu_operations.hpp"
-#include "viennacl/linalg/prod.hpp"
-#include "viennacl/backend/memory.hpp"
-
-namespace viennacl
-{
-namespace linalg
-{
-
-/** @brief A tag for incomplete LU and incomplete Cholesky factorization with 
static pattern (Parallel-ILU0, Parallel ICC0)
-*/
-class chow_patel_tag
-{
-public:
-  /** @brief Constructor allowing to set the number of sweeps and Jacobi 
iterations.
-    *
-    * @param num_sweeps        Number of sweeps in setup phase
-    * @param num_jacobi_iters  Number of Jacobi iterations for each triangular 
'solve' when applying the preconditioner to a vector
-    */
-  chow_patel_tag(vcl_size_t num_sweeps = 3, vcl_size_t num_jacobi_iters = 2) : 
sweeps_(num_sweeps), jacobi_iters_(num_jacobi_iters) {}
-
-  /** @brief Returns the number of sweeps (i.e. number of nonlinear 
iterations) in the solver setup stage */
-  vcl_size_t sweeps() const { return sweeps_; }
-  /** @brief Sets the number of sweeps (i.e. number of nonlinear iterations) 
in the solver setup stage */
-  void       sweeps(vcl_size_t num) { sweeps_ = num; }
-
-  /** @brief Returns the number of Jacobi iterations (i.e. applications of 
x_{k+1} = (I - D^{-1}R)x_k + D^{-1} b) for each of the solves y = U^{-1} x and 
z = L^{-1} y) for each preconditioner application. */
-  vcl_size_t jacobi_iters() const { return jacobi_iters_; }
-  /** @brief Sets the number of Jacobi iterations for each triangular 'solve' 
when applying the preconditioner to a vector. */
-  void       jacobi_iters(vcl_size_t num) { jacobi_iters_ = num; }
-
-private:
-  vcl_size_t sweeps_;
-  vcl_size_t jacobi_iters_;
-};
-
-namespace detail
-{
-  /** @brief Implementation of the parallel ICC0 factorization, Algorithm 3 in 
Chow-Patel paper.
-   *
-   *  Rather than dealing with a column-major upper triangular matrix U, we 
use the lower-triangular matrix L such that A is approximately given by LL^T.
-   *  The advantage is that L is readily available in row-major format.
-   */
-  template<typename NumericT>
-  void precondition(viennacl::compressed_matrix<NumericT> const & A,
-                    viennacl::compressed_matrix<NumericT>       & L,
-                    viennacl::vector<NumericT>                  & diag_L,
-                    viennacl::compressed_matrix<NumericT>       & L_trans,
-                    chow_patel_tag const & tag)
-  {
-    // make sure L and U have correct dimensions:
-    L.resize(A.size1(), A.size2(), false);
-
-    // initialize L and U from values in A:
-    viennacl::linalg::extract_L(A, L);
-
-    // diagonally scale values from A in L:
-    viennacl::linalg::icc_scale(A, L);
-
-    viennacl::vector<NumericT> aij_L(L.nnz(), viennacl::traits::context(A));
-    viennacl::backend::memory_copy(L.handle(), aij_L.handle(), 0, 0, 
sizeof(NumericT) * L.nnz());
-
-    // run sweeps:
-    for (vcl_size_t i=0; i<tag.sweeps(); ++i)
-      viennacl::linalg::icc_chow_patel_sweep(L, aij_L);
-
-    // transpose L to obtain L_trans:
-    viennacl::linalg::ilu_transpose(L, L_trans);
-
-    // form (I - D_L^{-1}L) and (I - D_U^{-1} U), with U := L_trans
-    viennacl::linalg::ilu_form_neumann_matrix(L,       diag_L);
-    viennacl::linalg::ilu_form_neumann_matrix(L_trans, diag_L);
-  }
-
-
-  /** @brief Implementation of the parallel ILU0 factorization, Algorithm 2 in 
Chow-Patel paper. */
-  template<typename NumericT>
-  void precondition(viennacl::compressed_matrix<NumericT> const & A,
-                    viennacl::compressed_matrix<NumericT>       & L,
-                    viennacl::vector<NumericT>                  & diag_L,
-                    viennacl::compressed_matrix<NumericT>       & U,
-                    viennacl::vector<NumericT>                  & diag_U,
-                    chow_patel_tag const & tag)
-  {
-    // make sure L and U have correct dimensions:
-    L.resize(A.size1(), A.size2(), false);
-    U.resize(A.size1(), A.size2(), false);
-
-    // initialize L and U from values in A:
-    viennacl::linalg::extract_LU(A, L, U);
-
-    // diagonally scale values from A in L and U:
-    viennacl::linalg::ilu_scale(A, L, U);
-
-    // transpose storage layout of U from CSR to CSC via transposition
-    viennacl::compressed_matrix<NumericT> U_trans;
-    viennacl::linalg::ilu_transpose(U, U_trans);
-
-    // keep entries of a_ij for the sweeps
-    viennacl::vector<NumericT> aij_L      (L.nnz(),       
viennacl::traits::context(A));
-    viennacl::vector<NumericT> aij_U_trans(U_trans.nnz(), 
viennacl::traits::context(A));
-
-    viennacl::backend::memory_copy(      L.handle(), aij_L.handle(),       0, 
0, sizeof(NumericT) * L.nnz());
-    viennacl::backend::memory_copy(U_trans.handle(), aij_U_trans.handle(), 0, 
0, sizeof(NumericT) * U_trans.nnz());
-
-    // run sweeps:
-    for (vcl_size_t i=0; i<tag.sweeps(); ++i)
-      viennacl::linalg::ilu_chow_patel_sweep(L, aij_L, U_trans, aij_U_trans);
-
-    // transpose U_trans back:
-    viennacl::linalg::ilu_transpose(U_trans, U);
-
-    // form (I - D_L^{-1}L) and (I - D_U^{-1} U)
-    viennacl::linalg::ilu_form_neumann_matrix(L, diag_L);
-    viennacl::linalg::ilu_form_neumann_matrix(U, diag_U);
-  }
-
-}
-
-
-
-
-/** @brief Parallel Chow-Patel ILU preconditioner class, can be supplied to 
solve()-routines
-*/
-template<typename MatrixT>
-class chow_patel_icc_precond
-{
-  // only works with compressed_matrix!
-  typedef typename MatrixT::CHOW_PATEL_ICC_ONLY_WORKS_WITH_COMPRESSED_MATRIX  
error_type;
-};
-
-
-/** @brief Parallel Chow-Patel ILU preconditioner class, can be supplied to 
solve()-routines.
-*
-*  Specialization for compressed_matrix
-*/
-template<typename NumericT, unsigned int AlignmentV>
-class chow_patel_icc_precond< viennacl::compressed_matrix<NumericT, 
AlignmentV> >
-{
-
-public:
-  chow_patel_icc_precond(viennacl::compressed_matrix<NumericT, AlignmentV> 
const & A, chow_patel_tag const & tag)
-    : tag_(tag),
-      L_(0, 0, 0, viennacl::traits::context(A)),
-      diag_L_(A.size1(), viennacl::traits::context(A)),
-      L_trans_(0, 0, 0, viennacl::traits::context(A)),
-      x_k_(A.size1(), viennacl::traits::context(A)),
-      b_(A.size1(), viennacl::traits::context(A))
-  {
-    viennacl::linalg::detail::precondition(A, L_, diag_L_, L_trans_, tag_);
-  }
-
-  /** @brief Preconditioner application: LL^Tx = b, computed via Ly = b, L^Tx 
= y using Jacobi iterations.
-    *
-    * L contains (I - D_L^{-1}L), L_trans contains (I - D_L^{-1}L^T) where D 
denotes the respective diagonal matrix
-    */
-  template<typename VectorT>
-  void apply(VectorT & vec) const
-  {
-    //
-    // y = L^{-1} b through Jacobi iteration y_{k+1} = (I - D^{-1}L)y_k + 
D^{-1}x
-    //
-    b_ = viennacl::linalg::element_div(vec, diag_L_);
-    x_k_ = b_;
-    for (unsigned int i=0; i<tag_.jacobi_iters(); ++i)
-    {
-      vec = viennacl::linalg::prod(L_, x_k_);
-      x_k_ = vec + b_;
-    }
-
-    //
-    // x = U^{-1} y through Jacobi iteration x_{k+1} = (I - D^{-1}L^T)x_k + 
D^{-1}b
-    //
-    b_ = viennacl::linalg::element_div(x_k_, diag_L_);
-    x_k_ = b_; // x_1 if x_0 \equiv 0
-    for (unsigned int i=0; i<tag_.jacobi_iters(); ++i)
-    {
-      vec = viennacl::linalg::prod(L_trans_, x_k_);
-      x_k_ = vec + b_;
-    }
-
-    // return result:
-    vec = x_k_;
-  }
-
-private:
-  chow_patel_tag                          tag_;
-  viennacl::compressed_matrix<NumericT>   L_;
-  viennacl::vector<NumericT>              diag_L_;
-  viennacl::compressed_matrix<NumericT>   L_trans_;
-
-  mutable viennacl::vector<NumericT>      x_k_;
-  mutable viennacl::vector<NumericT>      b_;
-};
-
-
-
-
-
-
-/** @brief Parallel Chow-Patel ILU preconditioner class, can be supplied to 
solve()-routines
-*/
-template<typename MatrixT>
-class chow_patel_ilu_precond
-{
-  // only works with compressed_matrix!
-  typedef typename MatrixT::CHOW_PATEL_ILU_ONLY_WORKS_WITH_COMPRESSED_MATRIX  
error_type;
-};
-
-
-/** @brief Parallel Chow-Patel ILU preconditioner class, can be supplied to 
solve()-routines.
-*
-*  Specialization for compressed_matrix
-*/
-template<typename NumericT, unsigned int AlignmentV>
-class chow_patel_ilu_precond< viennacl::compressed_matrix<NumericT, 
AlignmentV> >
-{
-
-public:
-  chow_patel_ilu_precond(viennacl::compressed_matrix<NumericT, AlignmentV> 
const & A, chow_patel_tag const & tag)
-    : tag_(tag),
-      L_(0, 0, 0, viennacl::traits::context(A)),
-      diag_L_(A.size1(), viennacl::traits::context(A)),
-      U_(0, 0, 0, viennacl::traits::context(A)),
-      diag_U_(A.size1(), viennacl::traits::context(A)),
-      x_k_(A.size1(), viennacl::traits::context(A)),
-      b_(A.size1(), viennacl::traits::context(A))
-  {
-    viennacl::linalg::detail::precondition(A, L_, diag_L_, U_, diag_U_, tag_);
-  }
-
-  /** @brief Preconditioner application: LUx = b, computed via Ly = b, Ux = y 
using Jacobi iterations.
-    *
-    * L_ contains (I - D_L^{-1}L), U_ contains (I - D_U^{-1}U) where D denotes 
the respective diagonal matrix
-    */
-  template<typename VectorT>
-  void apply(VectorT & vec) const
-  {
-    //
-    // y = L^{-1} b through Jacobi iteration y_{k+1} = (I - D^{-1}L)y_k + 
D^{-1}x
-    //
-    b_ = viennacl::linalg::element_div(vec, diag_L_);
-    x_k_ = b_;
-    for (unsigned int i=0; i<tag_.jacobi_iters(); ++i)
-    {
-      vec = viennacl::linalg::prod(L_, x_k_);
-      x_k_ = vec + b_;
-    }
-
-    //
-    // x = U^{-1} y through Jacobi iteration x_{k+1} = (I - D^{-1}U)x_k + 
D^{-1}b
-    //
-    b_ = viennacl::linalg::element_div(x_k_, diag_U_);
-    x_k_ = b_; // x_1 if x_0 \equiv 0
-    for (unsigned int i=0; i<tag_.jacobi_iters(); ++i)
-    {
-      vec = viennacl::linalg::prod(U_, x_k_);
-      x_k_ = vec + b_;
-    }
-
-    // return result:
-    vec = x_k_;
-  }
-
-private:
-  chow_patel_tag                          tag_;
-  viennacl::compressed_matrix<NumericT>   L_;
-  viennacl::vector<NumericT>              diag_L_;
-  viennacl::compressed_matrix<NumericT>   U_;
-  viennacl::vector<NumericT>              diag_U_;
-
-  mutable viennacl::vector<NumericT>      x_k_;
-  mutable viennacl::vector<NumericT>      b_;
-};
-
-
-} // namespace linalg
-} // namespace viennacl
-
-
-#endif
-
-
-

[27/51] [partial] mahout git commit: Revert "(nojira) add native-viennaCL module to codebase. closes apache/mahout#241"

Reply via email to