http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/device_specific/templates/utils.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/templates/utils.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/templates/utils.hpp deleted file mode 100644 index 67d089a..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/device_specific/templates/utils.hpp +++ /dev/null @@ -1,105 +0,0 @@ -#ifndef VIENNACL_DEVICE_SPECIFIC_TEMPLATES_REDUCTION_UTILS_HPP -#define VIENNACL_DEVICE_SPECIFIC_TEMPLATES_REDUCTION_UTILS_HPP - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - - -/** @file viennacl/device_specific/templates/utils.hpp - * - * A collection of utilities for the device specific execution templates. -*/ - -#include <vector> - -#include "viennacl/scheduler/forwards.h" - -#include "viennacl/device_specific/tree_parsing.hpp" -#include "viennacl/device_specific/utils.hpp" - - -namespace viennacl -{ -namespace device_specific -{ - -inline void compute_reduction(utils::kernel_generation_stream & os, std::string acc, std::string cur, scheduler::op_element const & op) -{ - if (utils::elementwise_function(op)) - os << acc << "=" << tree_parsing::evaluate(op.type) << "(" << acc << "," << cur << ");" << std::endl; - else - os << acc << "= (" << acc << ")" << tree_parsing::evaluate(op.type) << "(" << cur << ");" << std::endl; -} - -inline void compute_index_reduction(utils::kernel_generation_stream & os, std::string acc, std::string cur, std::string const & acc_value, std::string const & cur_value, scheduler::op_element const & op) -{ - // os << acc << " = " << cur_value << ">" << acc_value << "?" << cur << ":" << acc << ";" << std::endl; - os << acc << "= select(" << acc << "," << cur << "," << cur_value << ">" << acc_value << ");" << std::endl; - os << acc_value << "="; - if (op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGFMAX_TYPE) os << "fmax"; - if (op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGMAX_TYPE) os << "max"; - if (op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGFMIN_TYPE) os << "fmin"; - if (op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGMIN_TYPE) os << "min"; - os << "(" << acc_value << "," << cur_value << ");"<< std::endl; -} - -inline void process_all(std::string const & type_key, std::string const & str, - utils::kernel_generation_stream & stream, std::vector<mapping_type> const & mappings) -{ - for (std::vector<mapping_type>::const_iterator mit = mappings.begin(); mit != mappings.end(); ++mit) - for (mapping_type::const_iterator mmit = mit->begin(); mmit != mit->end(); ++mmit) - if (mmit->second->type_key()==type_key) - stream << mmit->second->process(str) << std::endl; -} - - -inline void process_all_at(std::string const & type_key, std::string const & str, - utils::kernel_generation_stream & stream, std::vector<mapping_type> const & mappings, - vcl_size_t root_idx, leaf_t leaf) -{ - for (std::vector<mapping_type>::const_iterator mit = mappings.begin(); mit != mappings.end(); ++mit) - { - mapped_object * obj = at(*mit, mapping_key(root_idx, leaf)).get(); - if (obj->type_key()==type_key) - stream << obj->process(str) << std::endl; - } -} - -inline std::string neutral_element(scheduler::op_element const & op) -{ - switch (op.type) - { - case scheduler::OPERATION_BINARY_ADD_TYPE : return "0"; - case scheduler::OPERATION_BINARY_MULT_TYPE : return "1"; - case scheduler::OPERATION_BINARY_DIV_TYPE : return "1"; - case scheduler::OPERATION_BINARY_ELEMENT_FMAX_TYPE : return "-INFINITY"; - case scheduler::OPERATION_BINARY_ELEMENT_ARGFMAX_TYPE : return "-INFINITY"; - case scheduler::OPERATION_BINARY_ELEMENT_MAX_TYPE : return "-INFINITY"; - case scheduler::OPERATION_BINARY_ELEMENT_ARGMAX_TYPE : return "-INFINITY"; - case scheduler::OPERATION_BINARY_ELEMENT_FMIN_TYPE : return "INFINITY"; - case scheduler::OPERATION_BINARY_ELEMENT_ARGFMIN_TYPE : return "INFINITY"; - case scheduler::OPERATION_BINARY_ELEMENT_MIN_TYPE : return "INFINITY"; - case scheduler::OPERATION_BINARY_ELEMENT_ARGMIN_TYPE : return "INFINITY"; - - default: throw generator_not_supported_exception("Unsupported reduction operator : no neutral element known"); - } -} - -} -} - -#endif
http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/device_specific/tree_parsing.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/tree_parsing.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/tree_parsing.hpp deleted file mode 100644 index f9cc8a8..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/device_specific/tree_parsing.hpp +++ /dev/null @@ -1,512 +0,0 @@ -#ifndef VIENNACL_DEVICE_SPECIFIC_TREE_PARSING_HPP -#define VIENNACL_DEVICE_SPECIFIC_TREE_PARSING_HPP - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - - -/** @file viennacl/device_specific/tree_parsing.hpp - @brief Code for parsing the expression trees. -*/ - -#include <set> - -#include "viennacl/forwards.h" - -#include "viennacl/scheduler/forwards.h" - -#include "viennacl/device_specific/forwards.h" -#include "viennacl/device_specific/utils.hpp" -#include "viennacl/device_specific/mapped_objects.hpp" - -namespace viennacl -{ -namespace device_specific -{ -namespace tree_parsing -{ - -/** @brief base functor class for traversing a statement */ -class traversal_functor -{ -public: - void call_before_expansion(scheduler::statement const &, vcl_size_t) const { } - void call_after_expansion(scheduler::statement const &, vcl_size_t) const { } -}; - -/** @brief Recursively execute a functor on a statement */ -template<class Fun> -inline void traverse(scheduler::statement const & statement, vcl_size_t root_idx, Fun const & fun, bool inspect) -{ - scheduler::statement_node const & root_node = statement.array()[root_idx]; - bool recurse = utils::node_leaf(root_node.op)?inspect:true; - - fun.call_before_expansion(statement, root_idx); - - //Lhs: - if (recurse) - { - if (root_node.lhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY) - traverse(statement, root_node.lhs.node_index, fun, inspect); - if (root_node.lhs.type_family != scheduler::INVALID_TYPE_FAMILY) - fun(statement, root_idx, LHS_NODE_TYPE); - } - - //Self: - fun(statement, root_idx, PARENT_NODE_TYPE); - - //Rhs: - if (recurse && root_node.rhs.type_family!=scheduler::INVALID_TYPE_FAMILY) - { - if (root_node.rhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY) - traverse(statement, root_node.rhs.node_index, fun, inspect); - if (root_node.rhs.type_family != scheduler::INVALID_TYPE_FAMILY) - fun(statement, root_idx, RHS_NODE_TYPE); - } - - fun.call_after_expansion(statement, root_idx); -} - -class filter : public traversal_functor -{ -public: - typedef bool (*pred_t)(scheduler::statement_node const & node); - - filter(pred_t pred, std::vector<vcl_size_t> & out) : pred_(pred), out_(out){ } - - void operator()(scheduler::statement const & statement, vcl_size_t root_idx, leaf_t) const - { - scheduler::statement_node const * root_node = &statement.array()[root_idx]; - if (pred_(*root_node)) - out_.push_back(root_idx); - } -private: - pred_t pred_; - std::vector<vcl_size_t> & out_; -}; - -class filter_elements : public traversal_functor -{ -public: - filter_elements(scheduler::statement_node_subtype subtype, std::vector<scheduler::lhs_rhs_element> & out) : subtype_(subtype), out_(out) { } - - void operator()(scheduler::statement const & statement, vcl_size_t root_idx, leaf_t) const - { - scheduler::statement_node const * root_node = &statement.array()[root_idx]; - if (root_node->lhs.subtype==subtype_) - out_.push_back(root_node->lhs); - if (root_node->rhs.subtype==subtype_) - out_.push_back(root_node->rhs); - } -private: - scheduler::statement_node_subtype subtype_; - std::vector<scheduler::lhs_rhs_element> & out_; -}; - -/** @brief generate a string from an operation_node_type */ -inline const char * evaluate(scheduler::operation_node_type type) -{ - using namespace scheduler; - // unary expression - switch (type) - { - //Function - case OPERATION_UNARY_ABS_TYPE : return "abs"; - case OPERATION_UNARY_ACOS_TYPE : return "acos"; - case OPERATION_UNARY_ASIN_TYPE : return "asin"; - case OPERATION_UNARY_ATAN_TYPE : return "atan"; - case OPERATION_UNARY_CEIL_TYPE : return "ceil"; - case OPERATION_UNARY_COS_TYPE : return "cos"; - case OPERATION_UNARY_COSH_TYPE : return "cosh"; - case OPERATION_UNARY_EXP_TYPE : return "exp"; - case OPERATION_UNARY_FABS_TYPE : return "fabs"; - case OPERATION_UNARY_FLOOR_TYPE : return "floor"; - case OPERATION_UNARY_LOG_TYPE : return "log"; - case OPERATION_UNARY_LOG10_TYPE : return "log10"; - case OPERATION_UNARY_SIN_TYPE : return "sin"; - case OPERATION_UNARY_SINH_TYPE : return "sinh"; - case OPERATION_UNARY_SQRT_TYPE : return "sqrt"; - case OPERATION_UNARY_TAN_TYPE : return "tan"; - case OPERATION_UNARY_TANH_TYPE : return "tanh"; - - case OPERATION_UNARY_CAST_CHAR_TYPE : return "(char)"; - case OPERATION_UNARY_CAST_UCHAR_TYPE : return "(uchar)"; - case OPERATION_UNARY_CAST_SHORT_TYPE : return "(short)"; - case OPERATION_UNARY_CAST_USHORT_TYPE : return "(ushort)"; - case OPERATION_UNARY_CAST_INT_TYPE : return "(int)"; - case OPERATION_UNARY_CAST_UINT_TYPE : return "(uint)"; - case OPERATION_UNARY_CAST_LONG_TYPE : return "(long)"; - case OPERATION_UNARY_CAST_ULONG_TYPE : return "(ulong)"; - case OPERATION_UNARY_CAST_HALF_TYPE : return "(half)"; - case OPERATION_UNARY_CAST_FLOAT_TYPE : return "(float)"; - case OPERATION_UNARY_CAST_DOUBLE_TYPE : return "(double)"; - - case OPERATION_BINARY_ELEMENT_ARGFMAX_TYPE : return "argfmax"; - case OPERATION_BINARY_ELEMENT_ARGMAX_TYPE : return "argmax"; - case OPERATION_BINARY_ELEMENT_ARGFMIN_TYPE : return "argfmin"; - case OPERATION_BINARY_ELEMENT_ARGMIN_TYPE : return "argmin"; - case OPERATION_BINARY_ELEMENT_POW_TYPE : return "pow"; - - //Arithmetic - case OPERATION_UNARY_MINUS_TYPE : return "-"; - case OPERATION_BINARY_ASSIGN_TYPE : return "="; - case OPERATION_BINARY_INPLACE_ADD_TYPE : return "+="; - case OPERATION_BINARY_INPLACE_SUB_TYPE : return "-="; - case OPERATION_BINARY_ADD_TYPE : return "+"; - case OPERATION_BINARY_SUB_TYPE : return "-"; - case OPERATION_BINARY_MULT_TYPE : return "*"; - case OPERATION_BINARY_ELEMENT_PROD_TYPE : return "*"; - case OPERATION_BINARY_DIV_TYPE : return "/"; - case OPERATION_BINARY_ELEMENT_DIV_TYPE : return "/"; - case OPERATION_BINARY_ACCESS_TYPE : return "[]"; - - //Relational - case OPERATION_BINARY_ELEMENT_EQ_TYPE : return "isequal"; - case OPERATION_BINARY_ELEMENT_NEQ_TYPE : return "isnotequal"; - case OPERATION_BINARY_ELEMENT_GREATER_TYPE : return "isgreater"; - case OPERATION_BINARY_ELEMENT_GEQ_TYPE : return "isgreaterequal"; - case OPERATION_BINARY_ELEMENT_LESS_TYPE : return "isless"; - case OPERATION_BINARY_ELEMENT_LEQ_TYPE : return "islessequal"; - - case OPERATION_BINARY_ELEMENT_FMAX_TYPE : return "fmax"; - case OPERATION_BINARY_ELEMENT_FMIN_TYPE : return "fmin"; - case OPERATION_BINARY_ELEMENT_MAX_TYPE : return "max"; - case OPERATION_BINARY_ELEMENT_MIN_TYPE : return "min"; - //Unary - case OPERATION_UNARY_TRANS_TYPE : return "trans"; - - //Binary - case OPERATION_BINARY_INNER_PROD_TYPE : return "iprod"; - case OPERATION_BINARY_MAT_MAT_PROD_TYPE : return "mmprod"; - case OPERATION_BINARY_MAT_VEC_PROD_TYPE : return "mvprod"; - case OPERATION_BINARY_VECTOR_DIAG_TYPE : return "vdiag"; - case OPERATION_BINARY_MATRIX_DIAG_TYPE : return "mdiag"; - case OPERATION_BINARY_MATRIX_ROW_TYPE : return "row"; - case OPERATION_BINARY_MATRIX_COLUMN_TYPE : return "col"; - - default : throw generator_not_supported_exception("Unsupported operator"); - } -} - -inline const char * operator_string(scheduler::operation_node_type type) -{ - using namespace scheduler; - switch (type) - { - case OPERATION_UNARY_CAST_CHAR_TYPE : return "char"; - case OPERATION_UNARY_CAST_UCHAR_TYPE : return "uchar"; - case OPERATION_UNARY_CAST_SHORT_TYPE : return "short"; - case OPERATION_UNARY_CAST_USHORT_TYPE : return "ushort"; - case OPERATION_UNARY_CAST_INT_TYPE : return "int"; - case OPERATION_UNARY_CAST_UINT_TYPE : return "uint"; - case OPERATION_UNARY_CAST_LONG_TYPE : return "long"; - case OPERATION_UNARY_CAST_ULONG_TYPE : return "ulong"; - case OPERATION_UNARY_CAST_HALF_TYPE : return "half"; - case OPERATION_UNARY_CAST_FLOAT_TYPE : return "float"; - case OPERATION_UNARY_CAST_DOUBLE_TYPE : return "double"; - - case OPERATION_UNARY_MINUS_TYPE : return "umin"; - case OPERATION_BINARY_ASSIGN_TYPE : return "assign"; - case OPERATION_BINARY_INPLACE_ADD_TYPE : return "ip_add"; - case OPERATION_BINARY_INPLACE_SUB_TYPE : return "ip_sub"; - case OPERATION_BINARY_ADD_TYPE : return "add"; - case OPERATION_BINARY_SUB_TYPE : return "sub"; - case OPERATION_BINARY_MULT_TYPE : return "mult"; - case OPERATION_BINARY_ELEMENT_PROD_TYPE : return "eprod"; - case OPERATION_BINARY_DIV_TYPE : return "div"; - case OPERATION_BINARY_ELEMENT_DIV_TYPE : return "ediv"; - case OPERATION_BINARY_ACCESS_TYPE : return "acc"; - default : return evaluate(type); - } -} - -/** @brief functor for generating the expression string from a statement */ -class evaluate_expression_traversal: public tree_parsing::traversal_functor -{ -private: - std::map<std::string, std::string> const & accessors_; - std::string & str_; - mapping_type const & mapping_; - -public: - evaluate_expression_traversal(std::map<std::string, std::string> const & accessors, std::string & str, mapping_type const & mapping) : accessors_(accessors), str_(str), mapping_(mapping){ } - - void call_before_expansion(scheduler::statement const & statement, vcl_size_t root_idx) const - { - scheduler::statement_node const & root_node = statement.array()[root_idx]; - if ((root_node.op.type_family==scheduler::OPERATION_UNARY_TYPE_FAMILY || utils::elementwise_function(root_node.op)) - && !utils::node_leaf(root_node.op)) - str_+=tree_parsing::evaluate(root_node.op.type); - str_+="("; - - } - - void call_after_expansion(scheduler::statement const & /*statement*/, vcl_size_t /*root_idx*/) const - { - str_+=")"; - } - - void operator()(scheduler::statement const & statement, vcl_size_t root_idx, leaf_t leaf) const - { - scheduler::statement_node const & root_node = statement.array()[root_idx]; - mapping_type::key_type key = std::make_pair(root_idx, leaf); - if (leaf==PARENT_NODE_TYPE) - { - if (utils::node_leaf(root_node.op)) - str_ += at(mapping_, key)->evaluate(accessors_); - else if (utils::elementwise_operator(root_node.op)) - str_ += tree_parsing::evaluate(root_node.op.type); - else if (root_node.op.type_family!=scheduler::OPERATION_UNARY_TYPE_FAMILY && utils::elementwise_function(root_node.op)) - str_ += ","; - } - else - { - if (leaf==LHS_NODE_TYPE) - { - if (root_node.lhs.type_family!=scheduler::COMPOSITE_OPERATION_FAMILY) - str_ += at(mapping_, key)->evaluate(accessors_); - } - - if (leaf==RHS_NODE_TYPE) - { - if (root_node.rhs.type_family!=scheduler::COMPOSITE_OPERATION_FAMILY) - str_ += at(mapping_, key)->evaluate(accessors_); - } - } - } -}; - -inline std::string evaluate(leaf_t leaf, std::map<std::string, std::string> const & accessors, - scheduler::statement const & statement, vcl_size_t root_idx, mapping_type const & mapping) -{ - std::string res; - evaluate_expression_traversal traversal_functor(accessors, res, mapping); - scheduler::statement_node const & root_node = statement.array()[root_idx]; - - if (leaf==RHS_NODE_TYPE) - { - if (root_node.rhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY) - tree_parsing::traverse(statement, root_node.rhs.node_index, traversal_functor, false); - else - traversal_functor(statement, root_idx, leaf); - } - else if (leaf==LHS_NODE_TYPE) - { - if (root_node.lhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY) - tree_parsing::traverse(statement, root_node.lhs.node_index, traversal_functor, false); - else - traversal_functor(statement, root_idx, leaf); - } - else - tree_parsing::traverse(statement, root_idx, traversal_functor, false); - - return res; -} - -inline void evaluate(utils::kernel_generation_stream & stream, leaf_t leaf, std::map<std::string, std::string> const & accessors, - statements_container const & statements, std::vector<mapping_type> const & mappings) -{ - statements_container::data_type::const_iterator sit; - std::vector<mapping_type>::const_iterator mit; - - for (mit = mappings.begin(), sit = statements.data().begin(); sit != statements.data().end(); ++mit, ++sit) - stream << evaluate(leaf, accessors, *sit, sit->root(), *mit) << ";" << std::endl; -} - - -/** @brief functor for fetching or writing-back the elements in a statement */ -class process_traversal : public tree_parsing::traversal_functor -{ -public: - process_traversal(std::string const & type_key, std::string const & to_process, utils::kernel_generation_stream & stream, - mapping_type const & mapping, std::set<std::string> & already_processed) : type_key_(type_key), to_process_(to_process), stream_(stream), mapping_(mapping), already_processed_(already_processed){ } - - void operator()(scheduler::statement const & /*statement*/, vcl_size_t root_idx, leaf_t leaf) const - { - mapping_type::const_iterator it = mapping_.find(std::make_pair(root_idx, leaf)); - if (it!=mapping_.end()) - { - mapped_object * obj = it->second.get(); - if (obj->type_key()==type_key_) - { - if (already_processed_.insert(obj->process("#name")).second) - stream_ << obj->process(to_process_) << std::endl; - } - } - } - -private: - std::string const & type_key_; - std::string const & to_process_; - utils::kernel_generation_stream & stream_; - mapping_type const & mapping_; - std::set<std::string> & already_processed_; -}; - -inline void process(utils::kernel_generation_stream & stream, leaf_t leaf, std::string const & type_key, std::string const & to_process, - scheduler::statement const & statement, vcl_size_t root_idx, mapping_type const & mapping, std::set<std::string> & already_processed) -{ - process_traversal traversal_functor(type_key, to_process, stream, mapping, already_processed); - scheduler::statement_node const & root_node = statement.array()[root_idx]; - - if (leaf==RHS_NODE_TYPE) - { - if (root_node.rhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY) - tree_parsing::traverse(statement, root_node.rhs.node_index, traversal_functor, true); - else - traversal_functor(statement, root_idx, leaf); - } - else if (leaf==LHS_NODE_TYPE) - { - if (root_node.lhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY) - tree_parsing::traverse(statement, root_node.lhs.node_index, traversal_functor, true); - else - traversal_functor(statement, root_idx, leaf); - } - else - { - tree_parsing::traverse(statement, root_idx, traversal_functor, true); - } -} - -inline void process(utils::kernel_generation_stream & stream, leaf_t leaf, std::string const & type_key, std::string const & to_process, - statements_container const & statements, std::vector<mapping_type> const & mappings) -{ - statements_container::data_type::const_iterator sit; - std::vector<mapping_type>::const_iterator mit; - std::set<std::string> already_processed; - - for (mit = mappings.begin(), sit = statements.data().begin(); sit != statements.data().end(); ++mit, ++sit) - process(stream, leaf, type_key, to_process, *sit, sit->root(), *mit, already_processed); -} - - -class statement_representation_functor : public traversal_functor{ -private: - static void append_id(char * & ptr, unsigned int val) - { - if (val==0) - *ptr++='0'; - else - while (val>0) - { - *ptr++= (char)('0' + (val % 10)); - val /= 10; - } - } - -public: - typedef void result_type; - - statement_representation_functor(symbolic_binder & binder, char *& ptr) : binder_(binder), ptr_(ptr){ } - - template<class NumericT> - inline result_type operator()(NumericT const & /*scal*/) const - { - *ptr_++='h'; //host - *ptr_++='s'; //scalar - *ptr_++=utils::first_letter_of_type<NumericT>::value(); - } - - /** @brief Scalar mapping */ - template<class NumericT> - inline result_type operator()(scalar<NumericT> const & scal) const - { - *ptr_++='s'; //scalar - *ptr_++=utils::first_letter_of_type<NumericT>::value(); - append_id(ptr_, binder_.get(&traits::handle(scal))); - } - - /** @brief Vector mapping */ - template<class NumericT> - inline result_type operator()(vector_base<NumericT> const & vec) const - { - *ptr_++='v'; //vector - *ptr_++=utils::first_letter_of_type<NumericT>::value(); - append_id(ptr_, binder_.get(&traits::handle(vec))); - } - - /** @brief Implicit vector mapping */ - template<class NumericT> - inline result_type operator()(implicit_vector_base<NumericT> const & /*vec*/) const - { - *ptr_++='i'; //implicit - *ptr_++='v'; //vector - *ptr_++=utils::first_letter_of_type<NumericT>::value(); - } - - /** @brief Matrix mapping */ - template<class NumericT> - inline result_type operator()(matrix_base<NumericT> const & mat) const - { - *ptr_++='m'; //Matrix - *ptr_++=mat.row_major()?'r':'c'; - *ptr_++=utils::first_letter_of_type<NumericT>::value(); - append_id(ptr_, binder_.get(&traits::handle(mat))); - } - - /** @brief Implicit matrix mapping */ - template<class NumericT> - inline result_type operator()(implicit_matrix_base<NumericT> const & /*mat*/) const - { - *ptr_++='i'; //implicit - *ptr_++='m'; //matrix - *ptr_++=utils::first_letter_of_type<NumericT>::value(); - } - - static inline void append(char*& p, const char * str) - { - vcl_size_t n = std::strlen(str); - std::memcpy(p, str, n); - p+=n; - } - - inline void operator()(scheduler::statement const & statement, vcl_size_t root_idx, leaf_t leaf_t) const - { - scheduler::statement_node const & root_node = statement.array()[root_idx]; - if (leaf_t==LHS_NODE_TYPE && root_node.lhs.type_family != scheduler::COMPOSITE_OPERATION_FAMILY) - utils::call_on_element(root_node.lhs, *this); - else if (root_node.op.type_family==scheduler::OPERATION_BINARY_TYPE_FAMILY && leaf_t==RHS_NODE_TYPE && root_node.rhs.type_family != scheduler::COMPOSITE_OPERATION_FAMILY) - utils::call_on_element(root_node.rhs, *this); - else if (leaf_t==PARENT_NODE_TYPE) - append_id(ptr_,root_node.op.type); - } - -private: - symbolic_binder & binder_; - char *& ptr_; -}; - -inline std::string statements_representation(statements_container const & statements, binding_policy_t binding_policy) -{ - std::vector<char> program_name_vector(256); - char* program_name = &(program_name_vector[0]); - if (statements.order()==statements_container::INDEPENDENT) - *program_name++='i'; - else - *program_name++='s'; - tools::shared_ptr<symbolic_binder> binder = make_binder(binding_policy); - for (statements_container::data_type::const_iterator it = statements.data().begin(); it != statements.data().end(); ++it) - tree_parsing::traverse(*it, it->root(), tree_parsing::statement_representation_functor(*binder, program_name),true); - *program_name='\0'; - return std::string(&(program_name_vector[0])); -} - -} -} -} -#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/device_specific/utils.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/utils.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/utils.hpp deleted file mode 100644 index 1f1fc60..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/device_specific/utils.hpp +++ /dev/null @@ -1,568 +0,0 @@ -#ifndef VIENNACL_DEVICE_SPECIFIC_UTILS_HPP -#define VIENNACL_DEVICE_SPECIFIC_UTILS_HPP - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - - -/** @file viennacl/device_specific/utils.hpp - @brief Internal utils -*/ - -#include <sstream> - -#include "viennacl/detail/matrix_def.hpp" -#include "viennacl/detail/vector_def.hpp" - -#include "viennacl/device_specific/forwards.h" -#include "viennacl/ocl/forwards.h" - -#include "viennacl/scheduler/forwards.h" - -#include "viennacl/traits/size.hpp" -#include "viennacl/traits/handle.hpp" -#include "viennacl/traits/row_major.hpp" - -#include "viennacl/tools/tools.hpp" - -namespace viennacl -{ -namespace device_specific -{ -namespace utils -{ - -//CUDA Conversion -inline std::string opencl_source_to_cuda_source(std::string const & opencl_src) -{ - std::string res = opencl_src; - - viennacl::tools::find_and_replace(res,"__attribute__","//__attribute__"); - - //Pointer - viennacl::tools::find_and_replace(res, "__global float*", "float*"); - viennacl::tools::find_and_replace(res, "__local float*", "float*"); - - viennacl::tools::find_and_replace(res, "__global double*", "double*"); - viennacl::tools::find_and_replace(res, "__local double*", "double*"); - - //Qualifiers - viennacl::tools::find_and_replace(res,"__global","__device__"); - viennacl::tools::find_and_replace(res,"__kernel","__global__"); - viennacl::tools::find_and_replace(res,"__constant","__constant__"); - viennacl::tools::find_and_replace(res,"__local","__shared__"); - - //Indexing - viennacl::tools::find_and_replace(res,"get_num_groups(0)","gridDim.x"); - viennacl::tools::find_and_replace(res,"get_num_groups(1)","gridDim.y"); - - viennacl::tools::find_and_replace(res,"get_local_size(0)","blockDim.x"); - viennacl::tools::find_and_replace(res,"get_local_size(1)","blockDim.y"); - - viennacl::tools::find_and_replace(res,"get_group_id(0)","blockIdx.x"); - viennacl::tools::find_and_replace(res,"get_group_id(1)","blockIdx.y"); - - viennacl::tools::find_and_replace(res,"get_local_id(0)","threadIdx.x"); - viennacl::tools::find_and_replace(res,"get_local_id(1)","threadIdx.y"); - - viennacl::tools::find_and_replace(res,"get_global_id(0)","(blockIdx.x*blockDim.x + threadIdx.x)"); - viennacl::tools::find_and_replace(res,"get_global_id(1)","(blockIdx.y*blockDim.y + threadIdx.y)"); - - //Synchronization - viennacl::tools::find_and_replace(res,"barrier(CLK_LOCAL_MEM_FENCE)","__syncthreads()"); - viennacl::tools::find_and_replace(res,"barrier(CLK_GLOBAL_MEM_FENCE)","__syncthreads()"); - - - return res; -} - -static std::string numeric_type_to_string(scheduler::statement_node_numeric_type const & type){ - switch (type) - { - //case scheduler::CHAR_TYPE: return "char"; - //case scheduler::UCHAR_TYPE: return "unsigned char"; - //case scheduler::SHORT_TYPE: return "short"; - //case scheduler::USHORT_TYPE: return "unsigned short"; - case scheduler::INT_TYPE: return "int"; - case scheduler::UINT_TYPE: return "unsigned int"; - case scheduler::LONG_TYPE: return "long"; - case scheduler::ULONG_TYPE: return "unsigned long"; - case scheduler::FLOAT_TYPE : return "float"; - case scheduler::DOUBLE_TYPE : return "double"; - default : throw generator_not_supported_exception("Unsupported Scalartype"); - } -} - - -template<class Fun> -static typename Fun::result_type call_on_host_scalar(scheduler::lhs_rhs_element element, Fun const & fun){ - assert(element.type_family == scheduler::SCALAR_TYPE_FAMILY && bool("Must be called on a host scalar")); - switch (element.numeric_type) - { - //case scheduler::CHAR_TYPE: return fun(element.host_char); - //case scheduler::UCHAR_TYPE: return fun(element.host_uchar); - //case scheduler::SHORT_TYPE: return fun(element.host_short); - //case scheduler::USHORT_TYPE: return fun(element.host_ushort); - case scheduler::INT_TYPE: return fun(element.host_int); - case scheduler::UINT_TYPE: return fun(element.host_uint); - case scheduler::LONG_TYPE: return fun(element.host_long); - case scheduler::ULONG_TYPE: return fun(element.host_ulong); - case scheduler::FLOAT_TYPE : return fun(element.host_float); - case scheduler::DOUBLE_TYPE : return fun(element.host_double); - default : throw generator_not_supported_exception("Unsupported Scalartype"); - } -} - -template<class Fun> -static typename Fun::result_type call_on_scalar(scheduler::lhs_rhs_element element, Fun const & fun){ - assert(element.type_family == scheduler::SCALAR_TYPE_FAMILY && bool("Must be called on a scalar")); - switch (element.numeric_type) - { - //case scheduler::CHAR_TYPE: return fun(*element.scalar_char); - //case scheduler::UCHAR_TYPE: return fun(*element.scalar_uchar); - //case scheduler::SHORT_TYPE: return fun(*element.scalar_short); - //case scheduler::USHORT_TYPE: return fun(*element.scalar_ushort); - case scheduler::INT_TYPE: return fun(*element.scalar_int); - case scheduler::UINT_TYPE: return fun(*element.scalar_uint); - case scheduler::LONG_TYPE: return fun(*element.scalar_long); - case scheduler::ULONG_TYPE: return fun(*element.scalar_ulong); - case scheduler::FLOAT_TYPE : return fun(*element.scalar_float); - case scheduler::DOUBLE_TYPE : return fun(*element.scalar_double); - default : throw generator_not_supported_exception("Unsupported Scalartype"); - } -} - -template<class Fun> -static typename Fun::result_type call_on_vector(scheduler::lhs_rhs_element element, Fun const & fun){ - assert(element.type_family == scheduler::VECTOR_TYPE_FAMILY && bool("Must be called on a vector")); - switch (element.numeric_type) - { - //case scheduler::CHAR_TYPE: return fun(*element.vector_char); - //case scheduler::UCHAR_TYPE: return fun(*element.vector_uchar); - //case scheduler::SHORT_TYPE: return fun(*element.vector_short); - //case scheduler::USHORT_TYPE: return fun(*element.vector_ushort); - case scheduler::INT_TYPE: return fun(*element.vector_int); - case scheduler::UINT_TYPE: return fun(*element.vector_uint); - case scheduler::LONG_TYPE: return fun(*element.vector_long); - case scheduler::ULONG_TYPE: return fun(*element.vector_ulong); - case scheduler::FLOAT_TYPE : return fun(*element.vector_float); - case scheduler::DOUBLE_TYPE : return fun(*element.vector_double); - default : throw generator_not_supported_exception("Unsupported Scalartype"); - } -} - -template<class Fun> -static typename Fun::result_type call_on_implicit_vector(scheduler::lhs_rhs_element element, Fun const & fun){ - assert(element.type_family == scheduler::VECTOR_TYPE_FAMILY && bool("Must be called on a implicit_vector")); - assert(element.subtype == scheduler::IMPLICIT_VECTOR_TYPE && bool("Must be called on a implicit_vector")); - switch (element.numeric_type) - { - //case scheduler::CHAR_TYPE: return fun(*element.implicit_vector_char); - //case scheduler::UCHAR_TYPE: return fun(*element.implicit_vector_uchar); - //case scheduler::SHORT_TYPE: return fun(*element.implicit_vector_short); - //case scheduler::USHORT_TYPE: return fun(*element.implicit_vector_ushort); - case scheduler::INT_TYPE: return fun(*element.implicit_vector_int); - case scheduler::UINT_TYPE: return fun(*element.implicit_vector_uint); - case scheduler::LONG_TYPE: return fun(*element.implicit_vector_long); - case scheduler::ULONG_TYPE: return fun(*element.implicit_vector_ulong); - case scheduler::FLOAT_TYPE : return fun(*element.implicit_vector_float); - case scheduler::DOUBLE_TYPE : return fun(*element.implicit_vector_double); - default : throw generator_not_supported_exception("Unsupported Scalartype"); - } -} - -template<class Fun> -static typename Fun::result_type call_on_matrix(scheduler::lhs_rhs_element element, Fun const & fun){ - assert(element.type_family == scheduler::MATRIX_TYPE_FAMILY && bool("Must be called on a matrix")); - switch (element.numeric_type) - { - //case scheduler::CHAR_TYPE: return fun(*element.matrix_char); - //case scheduler::UCHAR_TYPE: return fun(*element.matrix_uchar); - //case scheduler::SHORT_TYPE: return fun(*element.matrix_short); - //case scheduler::USHORT_TYPE: return fun(*element.matrix_ushort); - case scheduler::INT_TYPE: return fun(*element.matrix_int); - case scheduler::UINT_TYPE: return fun(*element.matrix_uint); - case scheduler::LONG_TYPE: return fun(*element.matrix_long); - case scheduler::ULONG_TYPE: return fun(*element.matrix_ulong); - case scheduler::FLOAT_TYPE : return fun(*element.matrix_float); - case scheduler::DOUBLE_TYPE : return fun(*element.matrix_double); - default : throw generator_not_supported_exception("Unsupported Scalartype"); - } -} - - -template<class Fun> -static typename Fun::result_type call_on_implicit_matrix(scheduler::lhs_rhs_element element, Fun const & fun){ - assert(element.subtype == scheduler::IMPLICIT_MATRIX_TYPE && bool("Must be called on a implicit matrix")); - switch (element.numeric_type) - { - //case scheduler::CHAR_TYPE: return fun(*element.implicit_matrix_char); - //case scheduler::UCHAR_TYPE: return fun(*element.implicit_matrix_uchar); - //case scheduler::SHORT_TYPE: return fun(*element.implicit_matrix_short); - //case scheduler::USHORT_TYPE: return fun(*element.implicit_matrix_ushort); - case scheduler::INT_TYPE: return fun(*element.implicit_matrix_int); - case scheduler::UINT_TYPE: return fun(*element.implicit_matrix_uint); - case scheduler::LONG_TYPE: return fun(*element.implicit_matrix_long); - case scheduler::ULONG_TYPE: return fun(*element.implicit_matrix_ulong); - case scheduler::FLOAT_TYPE : return fun(*element.implicit_matrix_float); - case scheduler::DOUBLE_TYPE : return fun(*element.implicit_matrix_double); - default : throw generator_not_supported_exception("Unsupported Scalartype"); - } -} - -template<class Fun> -static typename Fun::result_type call_on_element(scheduler::lhs_rhs_element const & element, Fun const & fun){ - switch (element.type_family) - { - case scheduler::SCALAR_TYPE_FAMILY: - if (element.subtype == scheduler::HOST_SCALAR_TYPE) - return call_on_host_scalar(element, fun); - else - return call_on_scalar(element, fun); - case scheduler::VECTOR_TYPE_FAMILY : - if (element.subtype == scheduler::IMPLICIT_VECTOR_TYPE) - return call_on_implicit_vector(element, fun); - else - return call_on_vector(element, fun); - case scheduler::MATRIX_TYPE_FAMILY: - if (element.subtype == scheduler::IMPLICIT_MATRIX_TYPE) - return call_on_implicit_matrix(element, fun); - else - return call_on_matrix(element,fun); - default: - throw generator_not_supported_exception("Unsupported datastructure type : Not among {Scalar, Vector, Matrix}"); - } -} - -struct scalartype_size_fun -{ - typedef vcl_size_t result_type; - result_type operator()(float const &) const { return sizeof(float); } - result_type operator()(double const &) const { return sizeof(double); } - template<class T> result_type operator()(T const &) const { return sizeof(typename viennacl::result_of::cpu_value_type<T>::type); } -}; - -struct internal_size_fun -{ - typedef vcl_size_t result_type; - template<class T> result_type operator()(T const &t) const { return viennacl::traits::internal_size(t); } -}; - -struct size_fun -{ - typedef vcl_size_t result_type; - template<class T> result_type operator()(T const &t) const { return viennacl::traits::size(t); } -}; - -struct stride_fun -{ - typedef vcl_size_t result_type; - template<class T> result_type operator()(T const &t) const { return viennacl::traits::stride(t); } -}; - -struct start1_fun -{ - typedef vcl_size_t result_type; - template<class T> result_type operator()(T const &t) const { return viennacl::traits::start1(t); } -}; - -struct start2_fun -{ - typedef vcl_size_t result_type; - template<class T> result_type operator()(T const &t) const { return viennacl::traits::start2(t); } -}; - -struct leading_stride -{ - typedef vcl_size_t result_type; - template<class T> result_type operator()(T const &t) const { return viennacl::traits::row_major(t)?viennacl::traits::stride2(t):viennacl::traits::stride1(t); } -}; - -struct leading_start -{ - typedef vcl_size_t result_type; - template<class T> result_type operator()(T const &t) const { return viennacl::traits::row_major(t)?viennacl::traits::start2(t):viennacl::traits::start1(t); } -}; - -struct stride1_fun -{ - typedef vcl_size_t result_type; - template<class T> result_type operator()(T const &t) const { return viennacl::traits::stride1(t); } -}; - -struct stride2_fun -{ - typedef vcl_size_t result_type; - template<class T> result_type operator()(T const &t) const { return viennacl::traits::stride2(t); } -}; - -struct handle_fun -{ - typedef cl_mem result_type; - template<class T> - result_type operator()(T const &t) const { return viennacl::traits::opencl_handle(t); } -}; - -struct internal_size1_fun -{ - typedef vcl_size_t result_type; - template<class T> - result_type operator()(T const &t) const { return viennacl::traits::internal_size1(t); } -}; - -struct row_major_fun -{ - typedef bool result_type; - template<class T> - result_type operator()(T const &t) const { return viennacl::traits::row_major(t); } -}; - -struct internal_size2_fun -{ - typedef vcl_size_t result_type; - template<class T> - result_type operator()(T const &t) const { return viennacl::traits::internal_size2(t); } -}; - -struct size1_fun -{ - typedef vcl_size_t result_type; - template<class T> - result_type operator()(T const &t) const { return viennacl::traits::size1(t); } -}; - -struct size2_fun -{ - typedef vcl_size_t result_type; - template<class T> - result_type operator()(T const &t) const { return viennacl::traits::size2(t); } -}; - -template<class T, class U> -struct is_same_type { enum { value = 0 }; }; - -template<class T> -struct is_same_type<T,T> { enum { value = 1 }; }; - -inline bool is_reduction(scheduler::statement_node const & node) -{ - return node.op.type_family==scheduler::OPERATION_VECTOR_REDUCTION_TYPE_FAMILY - || node.op.type_family==scheduler::OPERATION_COLUMNS_REDUCTION_TYPE_FAMILY - || node.op.type_family==scheduler::OPERATION_ROWS_REDUCTION_TYPE_FAMILY - || node.op.type==scheduler::OPERATION_BINARY_INNER_PROD_TYPE - || node.op.type==scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE; -} - -inline bool is_index_reduction(scheduler::op_element const & op) -{ - return op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGFMAX_TYPE - || op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGMAX_TYPE - || op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGFMIN_TYPE - || op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGMIN_TYPE; -} -template<class T> -struct type_to_string; -template<> struct type_to_string<unsigned char> { static const char * value() { return "uchar"; } }; -template<> struct type_to_string<char> { static const char * value() { return "char"; } }; -template<> struct type_to_string<unsigned short> { static const char * value() { return "ushort"; } }; -template<> struct type_to_string<short> { static const char * value() { return "short"; } }; -template<> struct type_to_string<unsigned int> { static const char * value() { return "uint"; } }; -template<> struct type_to_string<int> { static const char * value() { return "int"; } }; -template<> struct type_to_string<unsigned long> { static const char * value() { return "ulong"; } }; -template<> struct type_to_string<long> { static const char * value() { return "long"; } }; -template<> struct type_to_string<float> { static const char * value() { return "float"; } }; -template<> struct type_to_string<double> { static const char * value() { return "double"; } }; - - -template<class T> -struct first_letter_of_type; -template<> struct first_letter_of_type<char> { static char value() { return 'c'; } }; -template<> struct first_letter_of_type<unsigned char> { static char value() { return 'd'; } }; -template<> struct first_letter_of_type<short> { static char value() { return 's'; } }; -template<> struct first_letter_of_type<unsigned short> { static char value() { return 't'; } }; -template<> struct first_letter_of_type<int> { static char value() { return 'i'; } }; -template<> struct first_letter_of_type<unsigned int> { static char value() { return 'j'; } }; -template<> struct first_letter_of_type<long> { static char value() { return 'l'; } }; -template<> struct first_letter_of_type<unsigned long> { static char value() { return 'm'; } }; -template<> struct first_letter_of_type<float> { static char value() { return 'f'; } }; -template<> struct first_letter_of_type<double> { static char value() { return 'd'; } }; - -class kernel_generation_stream : public std::ostream -{ - class kgenstream : public std::stringbuf - { - public: - kgenstream(std::ostringstream& osstream,unsigned int const & tab_count) : oss_(osstream), tab_count_(tab_count){ } - int sync() { - for (unsigned int i=0; i<tab_count_;++i) - oss_ << " "; - oss_ << str(); - str(""); - return !oss_; - } -#if defined(_MSC_VER) - ~kgenstream() throw() { pubsync(); } -#else - ~kgenstream() { pubsync(); } -#endif - private: - std::ostream& oss_; - unsigned int const & tab_count_; - }; - -public: - kernel_generation_stream() : std::ostream(new kgenstream(oss,tab_count_)), tab_count_(0){ } -#if defined(_MSC_VER) - ~kernel_generation_stream() throw() { delete rdbuf(); } -#else - ~kernel_generation_stream(){ delete rdbuf(); } -#endif - - std::string str(){ return oss.str(); } - void inc_tab(){ ++tab_count_; } - void dec_tab(){ --tab_count_; } -private: - unsigned int tab_count_; - std::ostringstream oss; -}; - -inline bool node_leaf(scheduler::op_element const & op) -{ - using namespace scheduler; - return op.type==OPERATION_UNARY_NORM_1_TYPE - || op.type==OPERATION_UNARY_NORM_2_TYPE - || op.type==OPERATION_UNARY_NORM_INF_TYPE - || op.type==OPERATION_UNARY_TRANS_TYPE - || op.type==OPERATION_BINARY_MAT_VEC_PROD_TYPE - || op.type==OPERATION_BINARY_MAT_MAT_PROD_TYPE - || op.type==OPERATION_BINARY_INNER_PROD_TYPE - || op.type==OPERATION_BINARY_MATRIX_DIAG_TYPE - || op.type==OPERATION_BINARY_VECTOR_DIAG_TYPE - || op.type==OPERATION_BINARY_MATRIX_ROW_TYPE - || op.type==OPERATION_BINARY_MATRIX_COLUMN_TYPE - || op.type_family==OPERATION_VECTOR_REDUCTION_TYPE_FAMILY - || op.type_family==OPERATION_ROWS_REDUCTION_TYPE_FAMILY - || op.type_family==OPERATION_COLUMNS_REDUCTION_TYPE_FAMILY; -} - -inline bool elementwise_operator(scheduler::op_element const & op) -{ - using namespace scheduler; - return op.type== OPERATION_BINARY_ASSIGN_TYPE - || op.type== OPERATION_BINARY_INPLACE_ADD_TYPE - || op.type== OPERATION_BINARY_INPLACE_SUB_TYPE - || op.type== OPERATION_BINARY_ADD_TYPE - || op.type== OPERATION_BINARY_SUB_TYPE - || op.type== OPERATION_BINARY_ELEMENT_PROD_TYPE - || op.type== OPERATION_BINARY_ELEMENT_DIV_TYPE - || op.type== OPERATION_BINARY_MULT_TYPE - || op.type== OPERATION_BINARY_DIV_TYPE; -} - -inline bool elementwise_function(scheduler::op_element const & op) -{ - using namespace scheduler; - return - - op.type == OPERATION_UNARY_CAST_CHAR_TYPE - || op.type == OPERATION_UNARY_CAST_UCHAR_TYPE - || op.type == OPERATION_UNARY_CAST_SHORT_TYPE - || op.type == OPERATION_UNARY_CAST_USHORT_TYPE - || op.type == OPERATION_UNARY_CAST_INT_TYPE - || op.type == OPERATION_UNARY_CAST_UINT_TYPE - || op.type == OPERATION_UNARY_CAST_LONG_TYPE - || op.type == OPERATION_UNARY_CAST_ULONG_TYPE - || op.type == OPERATION_UNARY_CAST_HALF_TYPE - || op.type == OPERATION_UNARY_CAST_FLOAT_TYPE - || op.type == OPERATION_UNARY_CAST_DOUBLE_TYPE - - || op.type== OPERATION_UNARY_ABS_TYPE - || op.type== OPERATION_UNARY_ACOS_TYPE - || op.type== OPERATION_UNARY_ASIN_TYPE - || op.type== OPERATION_UNARY_ATAN_TYPE - || op.type== OPERATION_UNARY_CEIL_TYPE - || op.type== OPERATION_UNARY_COS_TYPE - || op.type== OPERATION_UNARY_COSH_TYPE - || op.type== OPERATION_UNARY_EXP_TYPE - || op.type== OPERATION_UNARY_FABS_TYPE - || op.type== OPERATION_UNARY_FLOOR_TYPE - || op.type== OPERATION_UNARY_LOG_TYPE - || op.type== OPERATION_UNARY_LOG10_TYPE - || op.type== OPERATION_UNARY_SIN_TYPE - || op.type== OPERATION_UNARY_SINH_TYPE - || op.type== OPERATION_UNARY_SQRT_TYPE - || op.type== OPERATION_UNARY_TAN_TYPE - || op.type== OPERATION_UNARY_TANH_TYPE - - || op.type== OPERATION_BINARY_ELEMENT_POW_TYPE - || op.type== OPERATION_BINARY_ELEMENT_EQ_TYPE - || op.type== OPERATION_BINARY_ELEMENT_NEQ_TYPE - || op.type== OPERATION_BINARY_ELEMENT_GREATER_TYPE - || op.type== OPERATION_BINARY_ELEMENT_LESS_TYPE - || op.type== OPERATION_BINARY_ELEMENT_GEQ_TYPE - || op.type== OPERATION_BINARY_ELEMENT_LEQ_TYPE - || op.type== OPERATION_BINARY_ELEMENT_FMAX_TYPE - || op.type== OPERATION_BINARY_ELEMENT_FMIN_TYPE - || op.type== OPERATION_BINARY_ELEMENT_MAX_TYPE - || op.type== OPERATION_BINARY_ELEMENT_MIN_TYPE; - -} - -inline scheduler::lhs_rhs_element & lhs_rhs_element(scheduler::statement const & st, vcl_size_t idx, leaf_t leaf) -{ - using namespace tree_parsing; - assert(leaf==LHS_NODE_TYPE || leaf==RHS_NODE_TYPE); - if (leaf==LHS_NODE_TYPE) - return const_cast<scheduler::lhs_rhs_element &>(st.array()[idx].lhs); - return const_cast<scheduler::lhs_rhs_element &>(st.array()[idx].rhs); -} - -inline unsigned int size_of(scheduler::statement_node_numeric_type type) -{ - using namespace scheduler; - switch (type) - { - case UCHAR_TYPE: - case CHAR_TYPE: return 1; - - case USHORT_TYPE: - case SHORT_TYPE: - case HALF_TYPE: return 2; - - case UINT_TYPE: - case INT_TYPE: - case FLOAT_TYPE: return 4; - - case ULONG_TYPE: - case LONG_TYPE: - case DOUBLE_TYPE: return 8; - - default: throw generator_not_supported_exception("Unsupported scalartype"); - } -} - -inline std::string append_width(std::string const & str, unsigned int width) -{ - if (width==1) - return str; - return str + tools::to_string(width); -} - -} -} -} -#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/ell_matrix.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/ell_matrix.hpp b/native-viennaCL/src/main/cpp/viennacl/ell_matrix.hpp deleted file mode 100644 index 3c3a428..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/ell_matrix.hpp +++ /dev/null @@ -1,362 +0,0 @@ -#ifndef VIENNACL_ELL_MATRIX_HPP_ -#define VIENNACL_ELL_MATRIX_HPP_ - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file viennacl/ell_matrix.hpp - @brief Implementation of the ell_matrix class - - Contributed by Volodymyr Kysenko. -*/ - - -#include "viennacl/forwards.h" -#include "viennacl/vector.hpp" - -#include "viennacl/tools/tools.hpp" - -#include "viennacl/linalg/sparse_matrix_operations.hpp" - -namespace viennacl -{ -/** @brief Sparse matrix class using the ELLPACK format for storing the nonzeros. - * - * This format works best for matrices where the number of nonzeros per row is mostly the same. - * Finite element and finite difference methods on nicely shaped domains often result in such a nonzero pattern. - * For a matrix - * - * (1 2 0 0 0) - * (2 3 4 0 0) - * (0 5 6 0 7) - * (0 0 8 9 0) - * - * the entries are layed out in chunks of size 3 as - * (1 2 5 8; 2 3 6 9; 0 4 7 0) - * Note that this is a 'transposed' representation in order to maximize coalesced memory access. - */ -template<typename NumericT, unsigned int AlignmentV /* see forwards.h for default argument */> -class ell_matrix -{ -public: - typedef viennacl::backend::mem_handle handle_type; - typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<NumericT>::ResultType> value_type; - typedef vcl_size_t size_type; - - ell_matrix() : rows_(0), cols_(0), maxnnz_(0) {} - - ell_matrix(viennacl::context ctx) : rows_(0), cols_(0), maxnnz_(0) - { - coords_.switch_active_handle_id(ctx.memory_type()); - elements_.switch_active_handle_id(ctx.memory_type()); - -#ifdef VIENNACL_WITH_OPENCL - if (ctx.memory_type() == OPENCL_MEMORY) - { - coords_.opencl_handle().context(ctx.opencl_context()); - elements_.opencl_handle().context(ctx.opencl_context()); - } -#endif - } - - /** @brief Resets all entries in the matrix back to zero without changing the matrix size. Resets the sparsity pattern. */ - void clear() - { - maxnnz_ = 0; - - viennacl::backend::typesafe_host_array<unsigned int> host_coords_buffer(coords_, internal_size1()); - std::vector<NumericT> host_elements(internal_size1()); - - viennacl::backend::memory_create(coords_, host_coords_buffer.element_size() * internal_size1(), viennacl::traits::context(coords_), host_coords_buffer.get()); - viennacl::backend::memory_create(elements_, sizeof(NumericT) * internal_size1(), viennacl::traits::context(elements_), &(host_elements[0])); - } - - vcl_size_t internal_size1() const { return viennacl::tools::align_to_multiple<vcl_size_t>(rows_, AlignmentV); } - vcl_size_t internal_size2() const { return viennacl::tools::align_to_multiple<vcl_size_t>(cols_, AlignmentV); } - - vcl_size_t size1() const { return rows_; } - vcl_size_t size2() const { return cols_; } - - vcl_size_t internal_maxnnz() const {return viennacl::tools::align_to_multiple<vcl_size_t>(maxnnz_, AlignmentV); } - vcl_size_t maxnnz() const { return maxnnz_; } - - vcl_size_t nnz() const { return rows_ * maxnnz_; } - vcl_size_t internal_nnz() const { return internal_size1() * internal_maxnnz(); } - - handle_type & handle() { return elements_; } - const handle_type & handle() const { return elements_; } - - handle_type & handle2() { return coords_; } - const handle_type & handle2() const { return coords_; } - -#if defined(_MSC_VER) && _MSC_VER < 1500 //Visual Studio 2005 needs special treatment - template<typename CPUMatrixT> - friend void copy(const CPUMatrixT & cpu_matrix, ell_matrix & gpu_matrix ); -#else - template<typename CPUMatrixT, typename T, unsigned int ALIGN> - friend void copy(const CPUMatrixT & cpu_matrix, ell_matrix<T, ALIGN> & gpu_matrix ); -#endif - -private: - vcl_size_t rows_; - vcl_size_t cols_; - vcl_size_t maxnnz_; - - handle_type coords_; - handle_type elements_; -}; - -template<typename CPUMatrixT, typename NumericT, unsigned int AlignmentV> -void copy(const CPUMatrixT& cpu_matrix, ell_matrix<NumericT, AlignmentV>& gpu_matrix ) -{ - assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); - assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); - - if (cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0) - { - //determine max capacity for row - vcl_size_t max_entries_per_row = 0; - for (typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) - { - vcl_size_t num_entries = 0; - for (typename CPUMatrixT::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) - ++num_entries; - - max_entries_per_row = std::max(max_entries_per_row, num_entries); - } - - //setup GPU matrix - gpu_matrix.maxnnz_ = max_entries_per_row; - gpu_matrix.rows_ = cpu_matrix.size1(); - gpu_matrix.cols_ = cpu_matrix.size2(); - - vcl_size_t nnz = gpu_matrix.internal_nnz(); - - viennacl::backend::typesafe_host_array<unsigned int> coords(gpu_matrix.handle2(), nnz); - std::vector<NumericT> elements(nnz, 0); - - // std::cout << "ELL_MATRIX copy " << gpu_matrix.maxnnz_ << " " << gpu_matrix.rows_ << " " << gpu_matrix.cols_ << " " - // << gpu_matrix.internal_maxnnz() << "\n"; - - for (typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) - { - vcl_size_t data_index = 0; - - for (typename CPUMatrixT::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) - { - coords.set(gpu_matrix.internal_size1() * data_index + col_it.index1(), col_it.index2()); - elements[gpu_matrix.internal_size1() * data_index + col_it.index1()] = *col_it; - //std::cout << *col_it << "\n"; - data_index++; - } - } - - viennacl::backend::memory_create(gpu_matrix.handle2(), coords.raw_size(), traits::context(gpu_matrix.handle2()), coords.get()); - viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(NumericT) * elements.size(), traits::context(gpu_matrix.handle()), &(elements[0])); - } -} - - - -/** @brief Copies a sparse matrix from the host to the compute device. The host type is the std::vector< std::map < > > format . - * - * @param cpu_matrix A sparse matrix on the host composed of an STL vector and an STL map. - * @param gpu_matrix The sparse ell_matrix from ViennaCL - */ -template<typename IndexT, typename NumericT, unsigned int AlignmentV> -void copy(std::vector< std::map<IndexT, NumericT> > const & cpu_matrix, - ell_matrix<NumericT, AlignmentV> & gpu_matrix) -{ - vcl_size_t max_col = 0; - for (vcl_size_t i=0; i<cpu_matrix.size(); ++i) - { - if (cpu_matrix[i].size() > 0) - max_col = std::max<vcl_size_t>(max_col, (cpu_matrix[i].rbegin())->first); - } - - viennacl::copy(tools::const_sparse_matrix_adapter<NumericT, IndexT>(cpu_matrix, cpu_matrix.size(), max_col + 1), gpu_matrix); -} - - - - - - -template<typename CPUMatrixT, typename NumericT, unsigned int AlignmentV> -void copy(const ell_matrix<NumericT, AlignmentV>& gpu_matrix, CPUMatrixT& cpu_matrix) -{ - assert( (viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); - assert( (viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); - - if (gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0) - { - std::vector<NumericT> elements(gpu_matrix.internal_nnz()); - viennacl::backend::typesafe_host_array<unsigned int> coords(gpu_matrix.handle2(), gpu_matrix.internal_nnz()); - - viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT) * elements.size(), &(elements[0])); - viennacl::backend::memory_read(gpu_matrix.handle2(), 0, coords.raw_size(), coords.get()); - - for (vcl_size_t row = 0; row < gpu_matrix.size1(); row++) - { - for (vcl_size_t ind = 0; ind < gpu_matrix.internal_maxnnz(); ind++) - { - vcl_size_t offset = gpu_matrix.internal_size1() * ind + row; - - NumericT val = elements[offset]; - if (val <= 0 && val >= 0) // val == 0 without compiler warnings - continue; - - if (coords[offset] >= gpu_matrix.size2()) - { - std::cerr << "ViennaCL encountered invalid data " << offset << " " << ind << " " << row << " " << coords[offset] << " " << gpu_matrix.size2() << std::endl; - return; - } - - cpu_matrix(row, coords[offset]) = val; - } - } - } -} - - -/** @brief Copies a sparse matrix from the compute device to the host. The host type is the std::vector< std::map < > > format . - * - * @param gpu_matrix The sparse ell_matrix from ViennaCL - * @param cpu_matrix A sparse matrix on the host composed of an STL vector and an STL map. - */ -template<typename NumericT, unsigned int AlignmentV, typename IndexT> -void copy(const ell_matrix<NumericT, AlignmentV> & gpu_matrix, - std::vector< std::map<IndexT, NumericT> > & cpu_matrix) -{ - if (cpu_matrix.size() == 0) - cpu_matrix.resize(gpu_matrix.size1()); - - assert(cpu_matrix.size() == gpu_matrix.size1() && bool("Matrix dimension mismatch!")); - - tools::sparse_matrix_adapter<NumericT, IndexT> temp(cpu_matrix, gpu_matrix.size1(), gpu_matrix.size2()); - viennacl::copy(gpu_matrix, temp); -} - -// -// Specify available operations: -// - -/** \cond */ - -namespace linalg -{ -namespace detail -{ - // x = A * y - template<typename T, unsigned int A> - struct op_executor<vector_base<T>, op_assign, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> > - { - static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) - { - // check for the special case x = A * x - if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) - { - viennacl::vector<T> temp(lhs); - viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), temp, T(0)); - lhs = temp; - } - else - viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), lhs, T(0)); - } - }; - - template<typename T, unsigned int A> - struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> > - { - static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) - { - // check for the special case x += A * x - if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) - { - viennacl::vector<T> temp(lhs); - viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), temp, T(0)); - lhs += temp; - } - else - viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), lhs, T(1)); - } - }; - - template<typename T, unsigned int A> - struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> > - { - static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) - { - // check for the special case x -= A * x - if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) - { - viennacl::vector<T> temp(lhs); - viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), temp, T(0)); - lhs -= temp; - } - else - viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(-1), lhs, T(1)); - } - }; - - - // x = A * vec_op - template<typename T, unsigned int A, typename LHS, typename RHS, typename OP> - struct op_executor<vector_base<T>, op_assign, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> > - { - static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) - { - viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); - viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs); - } - }; - - // x = A * vec_op - template<typename T, unsigned int A, typename LHS, typename RHS, typename OP> - struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> > - { - static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) - { - viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); - viennacl::vector<T> temp_result(lhs); - viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); - lhs += temp_result; - } - }; - - // x = A * vec_op - template<typename T, unsigned int A, typename LHS, typename RHS, typename OP> - struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> > - { - static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) - { - viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); - viennacl::vector<T> temp_result(lhs); - viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); - lhs -= temp_result; - } - }; - -} // namespace detail -} // namespace linalg - -/** \endcond */ -} - -#endif - - http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/fft.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/fft.hpp b/native-viennaCL/src/main/cpp/viennacl/fft.hpp deleted file mode 100644 index bacd911..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/fft.hpp +++ /dev/null @@ -1,282 +0,0 @@ -#ifndef VIENNACL_FFT_HPP -#define VIENNACL_FFT_HPP - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory - ============================================================================= */ - -/** @file viennacl/fft.hpp - @brief All routines related to the Fast Fourier Transform. Experimental. - */ - -#include <viennacl/vector.hpp> -#include <viennacl/matrix.hpp> - -#include "viennacl/linalg/fft_operations.hpp" -#include "viennacl/traits/handle.hpp" - -#include <cmath> - -#include <stdexcept> -/// @cond -namespace viennacl -{ -namespace detail -{ -namespace fft -{ - inline bool is_radix2(vcl_size_t data_size) - { - return !((data_size > 2) && (data_size & (data_size - 1))); - } -} //namespace fft -} //namespace detail - -/** - * @brief Generic inplace version of 1-D Fourier transformation. - * - * @param input Input vector, result will be stored here. - * @param batch_num Number of items in batch - * @param sign Sign of exponent, default is -1.0 - */ -template<class NumericT, unsigned int AlignmentV> -void inplace_fft(viennacl::vector<NumericT, AlignmentV>& input, vcl_size_t batch_num = 1, - NumericT sign = -1.0) -{ - vcl_size_t size = (input.size() >> 1) / batch_num; - - if (!viennacl::detail::fft::is_radix2(size)) - { - viennacl::vector<NumericT, AlignmentV> output(input.size()); - viennacl::linalg::direct(input, output, size, size, batch_num, sign); - viennacl::copy(output, input); - } - else - viennacl::linalg::radix2(input, size, size, batch_num, sign); -} - -/** - * @brief Generic version of 1-D Fourier transformation. - * - * @param input Input vector. - * @param output Output vector. - * @param batch_num Number of items in batch. - * @param sign Sign of exponent, default is -1.0 - */ -template<class NumericT, unsigned int AlignmentV> -void fft(viennacl::vector<NumericT, AlignmentV>& input, - viennacl::vector<NumericT, AlignmentV>& output, vcl_size_t batch_num = 1, NumericT sign = -1.0) -{ - vcl_size_t size = (input.size() >> 1) / batch_num; - if (viennacl::detail::fft::is_radix2(size)) - { - viennacl::copy(input, output); - viennacl::linalg::radix2(output, size, size, batch_num, sign); - } - else - viennacl::linalg::direct(input, output, size, size, batch_num, sign); -} - -/** - * @brief Generic inplace version of 2-D Fourier transformation. - * - * @param input Input matrix, result will be stored here. - * @param sign Sign of exponent, default is -1.0 - */ -template<class NumericT, unsigned int AlignmentV> -void inplace_fft(viennacl::matrix<NumericT, viennacl::row_major, AlignmentV>& input, - NumericT sign = -1.0) -{ - vcl_size_t rows_num = input.size1(); - vcl_size_t cols_num = input.size2() >> 1; - - vcl_size_t cols_int = input.internal_size2() >> 1; - - // batch with rows - if (viennacl::detail::fft::is_radix2(cols_num)) - viennacl::linalg::radix2(input, cols_num, cols_int, rows_num, sign, - viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR); - else - { - viennacl::matrix<NumericT, viennacl::row_major, AlignmentV> output(input.size1(), - input.size2()); - - viennacl::linalg::direct(input, output, cols_num, cols_int, rows_num, sign, - viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR); - - input = output; - } - - // batch with cols - if (viennacl::detail::fft::is_radix2(rows_num)) - viennacl::linalg::radix2(input, rows_num, cols_int, cols_num, sign, - viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::COL_MAJOR); - else - { - viennacl::matrix<NumericT, viennacl::row_major, AlignmentV> output(input.size1(), - input.size2()); - - viennacl::linalg::direct(input, output, rows_num, cols_int, cols_num, sign, - viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::COL_MAJOR); - - input = output; - } - -} - -/** - * @brief Generic version of 2-D Fourier transformation. - * - * @param input Input vector. - * @param output Output vector. - * @param sign Sign of exponent, default is -1.0 - */ -template<class NumericT, unsigned int AlignmentV> -void fft(viennacl::matrix<NumericT, viennacl::row_major, AlignmentV>& input, //TODO - viennacl::matrix<NumericT, viennacl::row_major, AlignmentV>& output, NumericT sign = -1.0) -{ - - vcl_size_t rows_num = input.size1(); - vcl_size_t cols_num = input.size2() >> 1; - vcl_size_t cols_int = input.internal_size2() >> 1; - - // batch with rows - if (viennacl::detail::fft::is_radix2(cols_num)) - { - output = input; - viennacl::linalg::radix2(output, cols_num, cols_int, rows_num, sign, - viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR); - } - else - viennacl::linalg::direct(input, output, cols_num, cols_int, rows_num, sign, - viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR); - - // batch with cols - if (viennacl::detail::fft::is_radix2(rows_num)) - { - //std::cout<<"output"<<output<<std::endl; - - viennacl::linalg::radix2(output, rows_num, cols_int, cols_num, sign, - viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::COL_MAJOR); - } - else - { - viennacl::matrix<NumericT, viennacl::row_major, AlignmentV> tmp(output.size1(), - output.size2()); - tmp = output; - //std::cout<<"tmp"<<tmp<<std::endl; - viennacl::linalg::direct(tmp, output, rows_num, cols_int, cols_num, sign, - viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::COL_MAJOR); - } -} - -/** - * @brief Generic inplace version of inverse 1-D Fourier transformation. - * - * Shorthand function for fft(sign = 1.0) - * - * @param input Input vector. - * @param batch_num Number of items in batch. - * @param sign Sign of exponent, default is -1.0 - */ -template<class NumericT, unsigned int AlignmentV> -void inplace_ifft(viennacl::vector<NumericT, AlignmentV>& input, vcl_size_t batch_num = 1) -{ - viennacl::inplace_fft(input, batch_num, NumericT(1.0)); - viennacl::linalg::normalize(input); -} - -/** - * @brief Generic version of inverse 1-D Fourier transformation. - * - * Shorthand function for fft(sign = 1.0) - * - * @param input Input vector. - * @param output Output vector. - * @param batch_num Number of items in batch. - * @param sign Sign of exponent, default is -1.0 - */ -template<class NumericT, unsigned int AlignmentV> -void ifft(viennacl::vector<NumericT, AlignmentV>& input, - viennacl::vector<NumericT, AlignmentV>& output, vcl_size_t batch_num = 1) -{ - viennacl::fft(input, output, batch_num, NumericT(1.0)); - viennacl::linalg::normalize(output); -} - -namespace linalg -{ - /** - * @brief 1-D convolution of two vectors. - * - * This function does not make any changes to input vectors - * - * @param input1 Input vector #1. - * @param input2 Input vector #2. - * @param output Output vector. - */ - template<class NumericT, unsigned int AlignmentV> - void convolve(viennacl::vector<NumericT, AlignmentV>& input1, - viennacl::vector<NumericT, AlignmentV>& input2, - viennacl::vector<NumericT, AlignmentV>& output) - { - assert(input1.size() == input2.size()); - assert(input1.size() == output.size()); - //temporal arrays - viennacl::vector<NumericT, AlignmentV> tmp1(input1.size()); - viennacl::vector<NumericT, AlignmentV> tmp2(input2.size()); - viennacl::vector<NumericT, AlignmentV> tmp3(output.size()); - - // align input arrays to equal size - // FFT of input data - viennacl::fft(input1, tmp1); - viennacl::fft(input2, tmp2); - - // multiplication of input data - viennacl::linalg::multiply_complex(tmp1, tmp2, tmp3); - // inverse FFT of input data - viennacl::ifft(tmp3, output); - } - - /** - * @brief 1-D convolution of two vectors. - * - * This function can make changes to input vectors to avoid additional memory allocations. - * - * @param input1 Input vector #1. - * @param input2 Input vector #2. - * @param output Output vector. - */ - template<class NumericT, unsigned int AlignmentV> - void convolve_i(viennacl::vector<NumericT, AlignmentV>& input1, - viennacl::vector<NumericT, AlignmentV>& input2, - viennacl::vector<NumericT, AlignmentV>& output) - { - assert(input1.size() == input2.size()); - assert(input1.size() == output.size()); - - viennacl::inplace_fft(input1); - viennacl::inplace_fft(input2); - - viennacl::linalg::multiply_complex(input1, input2, output); - - viennacl::inplace_ifft(output); - } -} //namespace linalg -} //namespace viennacl - -/// @endcond -#endif
