http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/templates/utils.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/templates/utils.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/templates/utils.hpp new file mode 100644 index 0000000..67d089a --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/templates/utils.hpp @@ -0,0 +1,105 @@ +#ifndef VIENNACL_DEVICE_SPECIFIC_TEMPLATES_REDUCTION_UTILS_HPP +#define VIENNACL_DEVICE_SPECIFIC_TEMPLATES_REDUCTION_UTILS_HPP + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + + +/** @file viennacl/device_specific/templates/utils.hpp + * + * A collection of utilities for the device specific execution templates. +*/ + +#include <vector> + +#include "viennacl/scheduler/forwards.h" + +#include "viennacl/device_specific/tree_parsing.hpp" +#include "viennacl/device_specific/utils.hpp" + + +namespace viennacl +{ +namespace device_specific +{ + +inline void compute_reduction(utils::kernel_generation_stream & os, std::string acc, std::string cur, scheduler::op_element const & op) +{ + if (utils::elementwise_function(op)) + os << acc << "=" << tree_parsing::evaluate(op.type) << "(" << acc << "," << cur << ");" << std::endl; + else + os << acc << "= (" << acc << ")" << tree_parsing::evaluate(op.type) << "(" << cur << ");" << std::endl; +} + +inline void compute_index_reduction(utils::kernel_generation_stream & os, std::string acc, std::string cur, std::string const & acc_value, std::string const & cur_value, scheduler::op_element const & op) +{ + // os << acc << " = " << cur_value << ">" << acc_value << "?" << cur << ":" << acc << ";" << std::endl; + os << acc << "= select(" << acc << "," << cur << "," << cur_value << ">" << acc_value << ");" << std::endl; + os << acc_value << "="; + if (op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGFMAX_TYPE) os << "fmax"; + if (op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGMAX_TYPE) os << "max"; + if (op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGFMIN_TYPE) os << "fmin"; + if (op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGMIN_TYPE) os << "min"; + os << "(" << acc_value << "," << cur_value << ");"<< std::endl; +} + +inline void process_all(std::string const & type_key, std::string const & str, + utils::kernel_generation_stream & stream, std::vector<mapping_type> const & mappings) +{ + for (std::vector<mapping_type>::const_iterator mit = mappings.begin(); mit != mappings.end(); ++mit) + for (mapping_type::const_iterator mmit = mit->begin(); mmit != mit->end(); ++mmit) + if (mmit->second->type_key()==type_key) + stream << mmit->second->process(str) << std::endl; +} + + +inline void process_all_at(std::string const & type_key, std::string const & str, + utils::kernel_generation_stream & stream, std::vector<mapping_type> const & mappings, + vcl_size_t root_idx, leaf_t leaf) +{ + for (std::vector<mapping_type>::const_iterator mit = mappings.begin(); mit != mappings.end(); ++mit) + { + mapped_object * obj = at(*mit, mapping_key(root_idx, leaf)).get(); + if (obj->type_key()==type_key) + stream << obj->process(str) << std::endl; + } +} + +inline std::string neutral_element(scheduler::op_element const & op) +{ + switch (op.type) + { + case scheduler::OPERATION_BINARY_ADD_TYPE : return "0"; + case scheduler::OPERATION_BINARY_MULT_TYPE : return "1"; + case scheduler::OPERATION_BINARY_DIV_TYPE : return "1"; + case scheduler::OPERATION_BINARY_ELEMENT_FMAX_TYPE : return "-INFINITY"; + case scheduler::OPERATION_BINARY_ELEMENT_ARGFMAX_TYPE : return "-INFINITY"; + case scheduler::OPERATION_BINARY_ELEMENT_MAX_TYPE : return "-INFINITY"; + case scheduler::OPERATION_BINARY_ELEMENT_ARGMAX_TYPE : return "-INFINITY"; + case scheduler::OPERATION_BINARY_ELEMENT_FMIN_TYPE : return "INFINITY"; + case scheduler::OPERATION_BINARY_ELEMENT_ARGFMIN_TYPE : return "INFINITY"; + case scheduler::OPERATION_BINARY_ELEMENT_MIN_TYPE : return "INFINITY"; + case scheduler::OPERATION_BINARY_ELEMENT_ARGMIN_TYPE : return "INFINITY"; + + default: throw generator_not_supported_exception("Unsupported reduction operator : no neutral element known"); + } +} + +} +} + +#endif
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/tree_parsing.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/tree_parsing.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/tree_parsing.hpp new file mode 100644 index 0000000..f9cc8a8 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/tree_parsing.hpp @@ -0,0 +1,512 @@ +#ifndef VIENNACL_DEVICE_SPECIFIC_TREE_PARSING_HPP +#define VIENNACL_DEVICE_SPECIFIC_TREE_PARSING_HPP + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + + +/** @file viennacl/device_specific/tree_parsing.hpp + @brief Code for parsing the expression trees. +*/ + +#include <set> + +#include "viennacl/forwards.h" + +#include "viennacl/scheduler/forwards.h" + +#include "viennacl/device_specific/forwards.h" +#include "viennacl/device_specific/utils.hpp" +#include "viennacl/device_specific/mapped_objects.hpp" + +namespace viennacl +{ +namespace device_specific +{ +namespace tree_parsing +{ + +/** @brief base functor class for traversing a statement */ +class traversal_functor +{ +public: + void call_before_expansion(scheduler::statement const &, vcl_size_t) const { } + void call_after_expansion(scheduler::statement const &, vcl_size_t) const { } +}; + +/** @brief Recursively execute a functor on a statement */ +template<class Fun> +inline void traverse(scheduler::statement const & statement, vcl_size_t root_idx, Fun const & fun, bool inspect) +{ + scheduler::statement_node const & root_node = statement.array()[root_idx]; + bool recurse = utils::node_leaf(root_node.op)?inspect:true; + + fun.call_before_expansion(statement, root_idx); + + //Lhs: + if (recurse) + { + if (root_node.lhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY) + traverse(statement, root_node.lhs.node_index, fun, inspect); + if (root_node.lhs.type_family != scheduler::INVALID_TYPE_FAMILY) + fun(statement, root_idx, LHS_NODE_TYPE); + } + + //Self: + fun(statement, root_idx, PARENT_NODE_TYPE); + + //Rhs: + if (recurse && root_node.rhs.type_family!=scheduler::INVALID_TYPE_FAMILY) + { + if (root_node.rhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY) + traverse(statement, root_node.rhs.node_index, fun, inspect); + if (root_node.rhs.type_family != scheduler::INVALID_TYPE_FAMILY) + fun(statement, root_idx, RHS_NODE_TYPE); + } + + fun.call_after_expansion(statement, root_idx); +} + +class filter : public traversal_functor +{ +public: + typedef bool (*pred_t)(scheduler::statement_node const & node); + + filter(pred_t pred, std::vector<vcl_size_t> & out) : pred_(pred), out_(out){ } + + void operator()(scheduler::statement const & statement, vcl_size_t root_idx, leaf_t) const + { + scheduler::statement_node const * root_node = &statement.array()[root_idx]; + if (pred_(*root_node)) + out_.push_back(root_idx); + } +private: + pred_t pred_; + std::vector<vcl_size_t> & out_; +}; + +class filter_elements : public traversal_functor +{ +public: + filter_elements(scheduler::statement_node_subtype subtype, std::vector<scheduler::lhs_rhs_element> & out) : subtype_(subtype), out_(out) { } + + void operator()(scheduler::statement const & statement, vcl_size_t root_idx, leaf_t) const + { + scheduler::statement_node const * root_node = &statement.array()[root_idx]; + if (root_node->lhs.subtype==subtype_) + out_.push_back(root_node->lhs); + if (root_node->rhs.subtype==subtype_) + out_.push_back(root_node->rhs); + } +private: + scheduler::statement_node_subtype subtype_; + std::vector<scheduler::lhs_rhs_element> & out_; +}; + +/** @brief generate a string from an operation_node_type */ +inline const char * evaluate(scheduler::operation_node_type type) +{ + using namespace scheduler; + // unary expression + switch (type) + { + //Function + case OPERATION_UNARY_ABS_TYPE : return "abs"; + case OPERATION_UNARY_ACOS_TYPE : return "acos"; + case OPERATION_UNARY_ASIN_TYPE : return "asin"; + case OPERATION_UNARY_ATAN_TYPE : return "atan"; + case OPERATION_UNARY_CEIL_TYPE : return "ceil"; + case OPERATION_UNARY_COS_TYPE : return "cos"; + case OPERATION_UNARY_COSH_TYPE : return "cosh"; + case OPERATION_UNARY_EXP_TYPE : return "exp"; + case OPERATION_UNARY_FABS_TYPE : return "fabs"; + case OPERATION_UNARY_FLOOR_TYPE : return "floor"; + case OPERATION_UNARY_LOG_TYPE : return "log"; + case OPERATION_UNARY_LOG10_TYPE : return "log10"; + case OPERATION_UNARY_SIN_TYPE : return "sin"; + case OPERATION_UNARY_SINH_TYPE : return "sinh"; + case OPERATION_UNARY_SQRT_TYPE : return "sqrt"; + case OPERATION_UNARY_TAN_TYPE : return "tan"; + case OPERATION_UNARY_TANH_TYPE : return "tanh"; + + case OPERATION_UNARY_CAST_CHAR_TYPE : return "(char)"; + case OPERATION_UNARY_CAST_UCHAR_TYPE : return "(uchar)"; + case OPERATION_UNARY_CAST_SHORT_TYPE : return "(short)"; + case OPERATION_UNARY_CAST_USHORT_TYPE : return "(ushort)"; + case OPERATION_UNARY_CAST_INT_TYPE : return "(int)"; + case OPERATION_UNARY_CAST_UINT_TYPE : return "(uint)"; + case OPERATION_UNARY_CAST_LONG_TYPE : return "(long)"; + case OPERATION_UNARY_CAST_ULONG_TYPE : return "(ulong)"; + case OPERATION_UNARY_CAST_HALF_TYPE : return "(half)"; + case OPERATION_UNARY_CAST_FLOAT_TYPE : return "(float)"; + case OPERATION_UNARY_CAST_DOUBLE_TYPE : return "(double)"; + + case OPERATION_BINARY_ELEMENT_ARGFMAX_TYPE : return "argfmax"; + case OPERATION_BINARY_ELEMENT_ARGMAX_TYPE : return "argmax"; + case OPERATION_BINARY_ELEMENT_ARGFMIN_TYPE : return "argfmin"; + case OPERATION_BINARY_ELEMENT_ARGMIN_TYPE : return "argmin"; + case OPERATION_BINARY_ELEMENT_POW_TYPE : return "pow"; + + //Arithmetic + case OPERATION_UNARY_MINUS_TYPE : return "-"; + case OPERATION_BINARY_ASSIGN_TYPE : return "="; + case OPERATION_BINARY_INPLACE_ADD_TYPE : return "+="; + case OPERATION_BINARY_INPLACE_SUB_TYPE : return "-="; + case OPERATION_BINARY_ADD_TYPE : return "+"; + case OPERATION_BINARY_SUB_TYPE : return "-"; + case OPERATION_BINARY_MULT_TYPE : return "*"; + case OPERATION_BINARY_ELEMENT_PROD_TYPE : return "*"; + case OPERATION_BINARY_DIV_TYPE : return "/"; + case OPERATION_BINARY_ELEMENT_DIV_TYPE : return "/"; + case OPERATION_BINARY_ACCESS_TYPE : return "[]"; + + //Relational + case OPERATION_BINARY_ELEMENT_EQ_TYPE : return "isequal"; + case OPERATION_BINARY_ELEMENT_NEQ_TYPE : return "isnotequal"; + case OPERATION_BINARY_ELEMENT_GREATER_TYPE : return "isgreater"; + case OPERATION_BINARY_ELEMENT_GEQ_TYPE : return "isgreaterequal"; + case OPERATION_BINARY_ELEMENT_LESS_TYPE : return "isless"; + case OPERATION_BINARY_ELEMENT_LEQ_TYPE : return "islessequal"; + + case OPERATION_BINARY_ELEMENT_FMAX_TYPE : return "fmax"; + case OPERATION_BINARY_ELEMENT_FMIN_TYPE : return "fmin"; + case OPERATION_BINARY_ELEMENT_MAX_TYPE : return "max"; + case OPERATION_BINARY_ELEMENT_MIN_TYPE : return "min"; + //Unary + case OPERATION_UNARY_TRANS_TYPE : return "trans"; + + //Binary + case OPERATION_BINARY_INNER_PROD_TYPE : return "iprod"; + case OPERATION_BINARY_MAT_MAT_PROD_TYPE : return "mmprod"; + case OPERATION_BINARY_MAT_VEC_PROD_TYPE : return "mvprod"; + case OPERATION_BINARY_VECTOR_DIAG_TYPE : return "vdiag"; + case OPERATION_BINARY_MATRIX_DIAG_TYPE : return "mdiag"; + case OPERATION_BINARY_MATRIX_ROW_TYPE : return "row"; + case OPERATION_BINARY_MATRIX_COLUMN_TYPE : return "col"; + + default : throw generator_not_supported_exception("Unsupported operator"); + } +} + +inline const char * operator_string(scheduler::operation_node_type type) +{ + using namespace scheduler; + switch (type) + { + case OPERATION_UNARY_CAST_CHAR_TYPE : return "char"; + case OPERATION_UNARY_CAST_UCHAR_TYPE : return "uchar"; + case OPERATION_UNARY_CAST_SHORT_TYPE : return "short"; + case OPERATION_UNARY_CAST_USHORT_TYPE : return "ushort"; + case OPERATION_UNARY_CAST_INT_TYPE : return "int"; + case OPERATION_UNARY_CAST_UINT_TYPE : return "uint"; + case OPERATION_UNARY_CAST_LONG_TYPE : return "long"; + case OPERATION_UNARY_CAST_ULONG_TYPE : return "ulong"; + case OPERATION_UNARY_CAST_HALF_TYPE : return "half"; + case OPERATION_UNARY_CAST_FLOAT_TYPE : return "float"; + case OPERATION_UNARY_CAST_DOUBLE_TYPE : return "double"; + + case OPERATION_UNARY_MINUS_TYPE : return "umin"; + case OPERATION_BINARY_ASSIGN_TYPE : return "assign"; + case OPERATION_BINARY_INPLACE_ADD_TYPE : return "ip_add"; + case OPERATION_BINARY_INPLACE_SUB_TYPE : return "ip_sub"; + case OPERATION_BINARY_ADD_TYPE : return "add"; + case OPERATION_BINARY_SUB_TYPE : return "sub"; + case OPERATION_BINARY_MULT_TYPE : return "mult"; + case OPERATION_BINARY_ELEMENT_PROD_TYPE : return "eprod"; + case OPERATION_BINARY_DIV_TYPE : return "div"; + case OPERATION_BINARY_ELEMENT_DIV_TYPE : return "ediv"; + case OPERATION_BINARY_ACCESS_TYPE : return "acc"; + default : return evaluate(type); + } +} + +/** @brief functor for generating the expression string from a statement */ +class evaluate_expression_traversal: public tree_parsing::traversal_functor +{ +private: + std::map<std::string, std::string> const & accessors_; + std::string & str_; + mapping_type const & mapping_; + +public: + evaluate_expression_traversal(std::map<std::string, std::string> const & accessors, std::string & str, mapping_type const & mapping) : accessors_(accessors), str_(str), mapping_(mapping){ } + + void call_before_expansion(scheduler::statement const & statement, vcl_size_t root_idx) const + { + scheduler::statement_node const & root_node = statement.array()[root_idx]; + if ((root_node.op.type_family==scheduler::OPERATION_UNARY_TYPE_FAMILY || utils::elementwise_function(root_node.op)) + && !utils::node_leaf(root_node.op)) + str_+=tree_parsing::evaluate(root_node.op.type); + str_+="("; + + } + + void call_after_expansion(scheduler::statement const & /*statement*/, vcl_size_t /*root_idx*/) const + { + str_+=")"; + } + + void operator()(scheduler::statement const & statement, vcl_size_t root_idx, leaf_t leaf) const + { + scheduler::statement_node const & root_node = statement.array()[root_idx]; + mapping_type::key_type key = std::make_pair(root_idx, leaf); + if (leaf==PARENT_NODE_TYPE) + { + if (utils::node_leaf(root_node.op)) + str_ += at(mapping_, key)->evaluate(accessors_); + else if (utils::elementwise_operator(root_node.op)) + str_ += tree_parsing::evaluate(root_node.op.type); + else if (root_node.op.type_family!=scheduler::OPERATION_UNARY_TYPE_FAMILY && utils::elementwise_function(root_node.op)) + str_ += ","; + } + else + { + if (leaf==LHS_NODE_TYPE) + { + if (root_node.lhs.type_family!=scheduler::COMPOSITE_OPERATION_FAMILY) + str_ += at(mapping_, key)->evaluate(accessors_); + } + + if (leaf==RHS_NODE_TYPE) + { + if (root_node.rhs.type_family!=scheduler::COMPOSITE_OPERATION_FAMILY) + str_ += at(mapping_, key)->evaluate(accessors_); + } + } + } +}; + +inline std::string evaluate(leaf_t leaf, std::map<std::string, std::string> const & accessors, + scheduler::statement const & statement, vcl_size_t root_idx, mapping_type const & mapping) +{ + std::string res; + evaluate_expression_traversal traversal_functor(accessors, res, mapping); + scheduler::statement_node const & root_node = statement.array()[root_idx]; + + if (leaf==RHS_NODE_TYPE) + { + if (root_node.rhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY) + tree_parsing::traverse(statement, root_node.rhs.node_index, traversal_functor, false); + else + traversal_functor(statement, root_idx, leaf); + } + else if (leaf==LHS_NODE_TYPE) + { + if (root_node.lhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY) + tree_parsing::traverse(statement, root_node.lhs.node_index, traversal_functor, false); + else + traversal_functor(statement, root_idx, leaf); + } + else + tree_parsing::traverse(statement, root_idx, traversal_functor, false); + + return res; +} + +inline void evaluate(utils::kernel_generation_stream & stream, leaf_t leaf, std::map<std::string, std::string> const & accessors, + statements_container const & statements, std::vector<mapping_type> const & mappings) +{ + statements_container::data_type::const_iterator sit; + std::vector<mapping_type>::const_iterator mit; + + for (mit = mappings.begin(), sit = statements.data().begin(); sit != statements.data().end(); ++mit, ++sit) + stream << evaluate(leaf, accessors, *sit, sit->root(), *mit) << ";" << std::endl; +} + + +/** @brief functor for fetching or writing-back the elements in a statement */ +class process_traversal : public tree_parsing::traversal_functor +{ +public: + process_traversal(std::string const & type_key, std::string const & to_process, utils::kernel_generation_stream & stream, + mapping_type const & mapping, std::set<std::string> & already_processed) : type_key_(type_key), to_process_(to_process), stream_(stream), mapping_(mapping), already_processed_(already_processed){ } + + void operator()(scheduler::statement const & /*statement*/, vcl_size_t root_idx, leaf_t leaf) const + { + mapping_type::const_iterator it = mapping_.find(std::make_pair(root_idx, leaf)); + if (it!=mapping_.end()) + { + mapped_object * obj = it->second.get(); + if (obj->type_key()==type_key_) + { + if (already_processed_.insert(obj->process("#name")).second) + stream_ << obj->process(to_process_) << std::endl; + } + } + } + +private: + std::string const & type_key_; + std::string const & to_process_; + utils::kernel_generation_stream & stream_; + mapping_type const & mapping_; + std::set<std::string> & already_processed_; +}; + +inline void process(utils::kernel_generation_stream & stream, leaf_t leaf, std::string const & type_key, std::string const & to_process, + scheduler::statement const & statement, vcl_size_t root_idx, mapping_type const & mapping, std::set<std::string> & already_processed) +{ + process_traversal traversal_functor(type_key, to_process, stream, mapping, already_processed); + scheduler::statement_node const & root_node = statement.array()[root_idx]; + + if (leaf==RHS_NODE_TYPE) + { + if (root_node.rhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY) + tree_parsing::traverse(statement, root_node.rhs.node_index, traversal_functor, true); + else + traversal_functor(statement, root_idx, leaf); + } + else if (leaf==LHS_NODE_TYPE) + { + if (root_node.lhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY) + tree_parsing::traverse(statement, root_node.lhs.node_index, traversal_functor, true); + else + traversal_functor(statement, root_idx, leaf); + } + else + { + tree_parsing::traverse(statement, root_idx, traversal_functor, true); + } +} + +inline void process(utils::kernel_generation_stream & stream, leaf_t leaf, std::string const & type_key, std::string const & to_process, + statements_container const & statements, std::vector<mapping_type> const & mappings) +{ + statements_container::data_type::const_iterator sit; + std::vector<mapping_type>::const_iterator mit; + std::set<std::string> already_processed; + + for (mit = mappings.begin(), sit = statements.data().begin(); sit != statements.data().end(); ++mit, ++sit) + process(stream, leaf, type_key, to_process, *sit, sit->root(), *mit, already_processed); +} + + +class statement_representation_functor : public traversal_functor{ +private: + static void append_id(char * & ptr, unsigned int val) + { + if (val==0) + *ptr++='0'; + else + while (val>0) + { + *ptr++= (char)('0' + (val % 10)); + val /= 10; + } + } + +public: + typedef void result_type; + + statement_representation_functor(symbolic_binder & binder, char *& ptr) : binder_(binder), ptr_(ptr){ } + + template<class NumericT> + inline result_type operator()(NumericT const & /*scal*/) const + { + *ptr_++='h'; //host + *ptr_++='s'; //scalar + *ptr_++=utils::first_letter_of_type<NumericT>::value(); + } + + /** @brief Scalar mapping */ + template<class NumericT> + inline result_type operator()(scalar<NumericT> const & scal) const + { + *ptr_++='s'; //scalar + *ptr_++=utils::first_letter_of_type<NumericT>::value(); + append_id(ptr_, binder_.get(&traits::handle(scal))); + } + + /** @brief Vector mapping */ + template<class NumericT> + inline result_type operator()(vector_base<NumericT> const & vec) const + { + *ptr_++='v'; //vector + *ptr_++=utils::first_letter_of_type<NumericT>::value(); + append_id(ptr_, binder_.get(&traits::handle(vec))); + } + + /** @brief Implicit vector mapping */ + template<class NumericT> + inline result_type operator()(implicit_vector_base<NumericT> const & /*vec*/) const + { + *ptr_++='i'; //implicit + *ptr_++='v'; //vector + *ptr_++=utils::first_letter_of_type<NumericT>::value(); + } + + /** @brief Matrix mapping */ + template<class NumericT> + inline result_type operator()(matrix_base<NumericT> const & mat) const + { + *ptr_++='m'; //Matrix + *ptr_++=mat.row_major()?'r':'c'; + *ptr_++=utils::first_letter_of_type<NumericT>::value(); + append_id(ptr_, binder_.get(&traits::handle(mat))); + } + + /** @brief Implicit matrix mapping */ + template<class NumericT> + inline result_type operator()(implicit_matrix_base<NumericT> const & /*mat*/) const + { + *ptr_++='i'; //implicit + *ptr_++='m'; //matrix + *ptr_++=utils::first_letter_of_type<NumericT>::value(); + } + + static inline void append(char*& p, const char * str) + { + vcl_size_t n = std::strlen(str); + std::memcpy(p, str, n); + p+=n; + } + + inline void operator()(scheduler::statement const & statement, vcl_size_t root_idx, leaf_t leaf_t) const + { + scheduler::statement_node const & root_node = statement.array()[root_idx]; + if (leaf_t==LHS_NODE_TYPE && root_node.lhs.type_family != scheduler::COMPOSITE_OPERATION_FAMILY) + utils::call_on_element(root_node.lhs, *this); + else if (root_node.op.type_family==scheduler::OPERATION_BINARY_TYPE_FAMILY && leaf_t==RHS_NODE_TYPE && root_node.rhs.type_family != scheduler::COMPOSITE_OPERATION_FAMILY) + utils::call_on_element(root_node.rhs, *this); + else if (leaf_t==PARENT_NODE_TYPE) + append_id(ptr_,root_node.op.type); + } + +private: + symbolic_binder & binder_; + char *& ptr_; +}; + +inline std::string statements_representation(statements_container const & statements, binding_policy_t binding_policy) +{ + std::vector<char> program_name_vector(256); + char* program_name = &(program_name_vector[0]); + if (statements.order()==statements_container::INDEPENDENT) + *program_name++='i'; + else + *program_name++='s'; + tools::shared_ptr<symbolic_binder> binder = make_binder(binding_policy); + for (statements_container::data_type::const_iterator it = statements.data().begin(); it != statements.data().end(); ++it) + tree_parsing::traverse(*it, it->root(), tree_parsing::statement_representation_functor(*binder, program_name),true); + *program_name='\0'; + return std::string(&(program_name_vector[0])); +} + +} +} +} +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/utils.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/utils.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/utils.hpp new file mode 100644 index 0000000..1f1fc60 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/utils.hpp @@ -0,0 +1,568 @@ +#ifndef VIENNACL_DEVICE_SPECIFIC_UTILS_HPP +#define VIENNACL_DEVICE_SPECIFIC_UTILS_HPP + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + + +/** @file viennacl/device_specific/utils.hpp + @brief Internal utils +*/ + +#include <sstream> + +#include "viennacl/detail/matrix_def.hpp" +#include "viennacl/detail/vector_def.hpp" + +#include "viennacl/device_specific/forwards.h" +#include "viennacl/ocl/forwards.h" + +#include "viennacl/scheduler/forwards.h" + +#include "viennacl/traits/size.hpp" +#include "viennacl/traits/handle.hpp" +#include "viennacl/traits/row_major.hpp" + +#include "viennacl/tools/tools.hpp" + +namespace viennacl +{ +namespace device_specific +{ +namespace utils +{ + +//CUDA Conversion +inline std::string opencl_source_to_cuda_source(std::string const & opencl_src) +{ + std::string res = opencl_src; + + viennacl::tools::find_and_replace(res,"__attribute__","//__attribute__"); + + //Pointer + viennacl::tools::find_and_replace(res, "__global float*", "float*"); + viennacl::tools::find_and_replace(res, "__local float*", "float*"); + + viennacl::tools::find_and_replace(res, "__global double*", "double*"); + viennacl::tools::find_and_replace(res, "__local double*", "double*"); + + //Qualifiers + viennacl::tools::find_and_replace(res,"__global","__device__"); + viennacl::tools::find_and_replace(res,"__kernel","__global__"); + viennacl::tools::find_and_replace(res,"__constant","__constant__"); + viennacl::tools::find_and_replace(res,"__local","__shared__"); + + //Indexing + viennacl::tools::find_and_replace(res,"get_num_groups(0)","gridDim.x"); + viennacl::tools::find_and_replace(res,"get_num_groups(1)","gridDim.y"); + + viennacl::tools::find_and_replace(res,"get_local_size(0)","blockDim.x"); + viennacl::tools::find_and_replace(res,"get_local_size(1)","blockDim.y"); + + viennacl::tools::find_and_replace(res,"get_group_id(0)","blockIdx.x"); + viennacl::tools::find_and_replace(res,"get_group_id(1)","blockIdx.y"); + + viennacl::tools::find_and_replace(res,"get_local_id(0)","threadIdx.x"); + viennacl::tools::find_and_replace(res,"get_local_id(1)","threadIdx.y"); + + viennacl::tools::find_and_replace(res,"get_global_id(0)","(blockIdx.x*blockDim.x + threadIdx.x)"); + viennacl::tools::find_and_replace(res,"get_global_id(1)","(blockIdx.y*blockDim.y + threadIdx.y)"); + + //Synchronization + viennacl::tools::find_and_replace(res,"barrier(CLK_LOCAL_MEM_FENCE)","__syncthreads()"); + viennacl::tools::find_and_replace(res,"barrier(CLK_GLOBAL_MEM_FENCE)","__syncthreads()"); + + + return res; +} + +static std::string numeric_type_to_string(scheduler::statement_node_numeric_type const & type){ + switch (type) + { + //case scheduler::CHAR_TYPE: return "char"; + //case scheduler::UCHAR_TYPE: return "unsigned char"; + //case scheduler::SHORT_TYPE: return "short"; + //case scheduler::USHORT_TYPE: return "unsigned short"; + case scheduler::INT_TYPE: return "int"; + case scheduler::UINT_TYPE: return "unsigned int"; + case scheduler::LONG_TYPE: return "long"; + case scheduler::ULONG_TYPE: return "unsigned long"; + case scheduler::FLOAT_TYPE : return "float"; + case scheduler::DOUBLE_TYPE : return "double"; + default : throw generator_not_supported_exception("Unsupported Scalartype"); + } +} + + +template<class Fun> +static typename Fun::result_type call_on_host_scalar(scheduler::lhs_rhs_element element, Fun const & fun){ + assert(element.type_family == scheduler::SCALAR_TYPE_FAMILY && bool("Must be called on a host scalar")); + switch (element.numeric_type) + { + //case scheduler::CHAR_TYPE: return fun(element.host_char); + //case scheduler::UCHAR_TYPE: return fun(element.host_uchar); + //case scheduler::SHORT_TYPE: return fun(element.host_short); + //case scheduler::USHORT_TYPE: return fun(element.host_ushort); + case scheduler::INT_TYPE: return fun(element.host_int); + case scheduler::UINT_TYPE: return fun(element.host_uint); + case scheduler::LONG_TYPE: return fun(element.host_long); + case scheduler::ULONG_TYPE: return fun(element.host_ulong); + case scheduler::FLOAT_TYPE : return fun(element.host_float); + case scheduler::DOUBLE_TYPE : return fun(element.host_double); + default : throw generator_not_supported_exception("Unsupported Scalartype"); + } +} + +template<class Fun> +static typename Fun::result_type call_on_scalar(scheduler::lhs_rhs_element element, Fun const & fun){ + assert(element.type_family == scheduler::SCALAR_TYPE_FAMILY && bool("Must be called on a scalar")); + switch (element.numeric_type) + { + //case scheduler::CHAR_TYPE: return fun(*element.scalar_char); + //case scheduler::UCHAR_TYPE: return fun(*element.scalar_uchar); + //case scheduler::SHORT_TYPE: return fun(*element.scalar_short); + //case scheduler::USHORT_TYPE: return fun(*element.scalar_ushort); + case scheduler::INT_TYPE: return fun(*element.scalar_int); + case scheduler::UINT_TYPE: return fun(*element.scalar_uint); + case scheduler::LONG_TYPE: return fun(*element.scalar_long); + case scheduler::ULONG_TYPE: return fun(*element.scalar_ulong); + case scheduler::FLOAT_TYPE : return fun(*element.scalar_float); + case scheduler::DOUBLE_TYPE : return fun(*element.scalar_double); + default : throw generator_not_supported_exception("Unsupported Scalartype"); + } +} + +template<class Fun> +static typename Fun::result_type call_on_vector(scheduler::lhs_rhs_element element, Fun const & fun){ + assert(element.type_family == scheduler::VECTOR_TYPE_FAMILY && bool("Must be called on a vector")); + switch (element.numeric_type) + { + //case scheduler::CHAR_TYPE: return fun(*element.vector_char); + //case scheduler::UCHAR_TYPE: return fun(*element.vector_uchar); + //case scheduler::SHORT_TYPE: return fun(*element.vector_short); + //case scheduler::USHORT_TYPE: return fun(*element.vector_ushort); + case scheduler::INT_TYPE: return fun(*element.vector_int); + case scheduler::UINT_TYPE: return fun(*element.vector_uint); + case scheduler::LONG_TYPE: return fun(*element.vector_long); + case scheduler::ULONG_TYPE: return fun(*element.vector_ulong); + case scheduler::FLOAT_TYPE : return fun(*element.vector_float); + case scheduler::DOUBLE_TYPE : return fun(*element.vector_double); + default : throw generator_not_supported_exception("Unsupported Scalartype"); + } +} + +template<class Fun> +static typename Fun::result_type call_on_implicit_vector(scheduler::lhs_rhs_element element, Fun const & fun){ + assert(element.type_family == scheduler::VECTOR_TYPE_FAMILY && bool("Must be called on a implicit_vector")); + assert(element.subtype == scheduler::IMPLICIT_VECTOR_TYPE && bool("Must be called on a implicit_vector")); + switch (element.numeric_type) + { + //case scheduler::CHAR_TYPE: return fun(*element.implicit_vector_char); + //case scheduler::UCHAR_TYPE: return fun(*element.implicit_vector_uchar); + //case scheduler::SHORT_TYPE: return fun(*element.implicit_vector_short); + //case scheduler::USHORT_TYPE: return fun(*element.implicit_vector_ushort); + case scheduler::INT_TYPE: return fun(*element.implicit_vector_int); + case scheduler::UINT_TYPE: return fun(*element.implicit_vector_uint); + case scheduler::LONG_TYPE: return fun(*element.implicit_vector_long); + case scheduler::ULONG_TYPE: return fun(*element.implicit_vector_ulong); + case scheduler::FLOAT_TYPE : return fun(*element.implicit_vector_float); + case scheduler::DOUBLE_TYPE : return fun(*element.implicit_vector_double); + default : throw generator_not_supported_exception("Unsupported Scalartype"); + } +} + +template<class Fun> +static typename Fun::result_type call_on_matrix(scheduler::lhs_rhs_element element, Fun const & fun){ + assert(element.type_family == scheduler::MATRIX_TYPE_FAMILY && bool("Must be called on a matrix")); + switch (element.numeric_type) + { + //case scheduler::CHAR_TYPE: return fun(*element.matrix_char); + //case scheduler::UCHAR_TYPE: return fun(*element.matrix_uchar); + //case scheduler::SHORT_TYPE: return fun(*element.matrix_short); + //case scheduler::USHORT_TYPE: return fun(*element.matrix_ushort); + case scheduler::INT_TYPE: return fun(*element.matrix_int); + case scheduler::UINT_TYPE: return fun(*element.matrix_uint); + case scheduler::LONG_TYPE: return fun(*element.matrix_long); + case scheduler::ULONG_TYPE: return fun(*element.matrix_ulong); + case scheduler::FLOAT_TYPE : return fun(*element.matrix_float); + case scheduler::DOUBLE_TYPE : return fun(*element.matrix_double); + default : throw generator_not_supported_exception("Unsupported Scalartype"); + } +} + + +template<class Fun> +static typename Fun::result_type call_on_implicit_matrix(scheduler::lhs_rhs_element element, Fun const & fun){ + assert(element.subtype == scheduler::IMPLICIT_MATRIX_TYPE && bool("Must be called on a implicit matrix")); + switch (element.numeric_type) + { + //case scheduler::CHAR_TYPE: return fun(*element.implicit_matrix_char); + //case scheduler::UCHAR_TYPE: return fun(*element.implicit_matrix_uchar); + //case scheduler::SHORT_TYPE: return fun(*element.implicit_matrix_short); + //case scheduler::USHORT_TYPE: return fun(*element.implicit_matrix_ushort); + case scheduler::INT_TYPE: return fun(*element.implicit_matrix_int); + case scheduler::UINT_TYPE: return fun(*element.implicit_matrix_uint); + case scheduler::LONG_TYPE: return fun(*element.implicit_matrix_long); + case scheduler::ULONG_TYPE: return fun(*element.implicit_matrix_ulong); + case scheduler::FLOAT_TYPE : return fun(*element.implicit_matrix_float); + case scheduler::DOUBLE_TYPE : return fun(*element.implicit_matrix_double); + default : throw generator_not_supported_exception("Unsupported Scalartype"); + } +} + +template<class Fun> +static typename Fun::result_type call_on_element(scheduler::lhs_rhs_element const & element, Fun const & fun){ + switch (element.type_family) + { + case scheduler::SCALAR_TYPE_FAMILY: + if (element.subtype == scheduler::HOST_SCALAR_TYPE) + return call_on_host_scalar(element, fun); + else + return call_on_scalar(element, fun); + case scheduler::VECTOR_TYPE_FAMILY : + if (element.subtype == scheduler::IMPLICIT_VECTOR_TYPE) + return call_on_implicit_vector(element, fun); + else + return call_on_vector(element, fun); + case scheduler::MATRIX_TYPE_FAMILY: + if (element.subtype == scheduler::IMPLICIT_MATRIX_TYPE) + return call_on_implicit_matrix(element, fun); + else + return call_on_matrix(element,fun); + default: + throw generator_not_supported_exception("Unsupported datastructure type : Not among {Scalar, Vector, Matrix}"); + } +} + +struct scalartype_size_fun +{ + typedef vcl_size_t result_type; + result_type operator()(float const &) const { return sizeof(float); } + result_type operator()(double const &) const { return sizeof(double); } + template<class T> result_type operator()(T const &) const { return sizeof(typename viennacl::result_of::cpu_value_type<T>::type); } +}; + +struct internal_size_fun +{ + typedef vcl_size_t result_type; + template<class T> result_type operator()(T const &t) const { return viennacl::traits::internal_size(t); } +}; + +struct size_fun +{ + typedef vcl_size_t result_type; + template<class T> result_type operator()(T const &t) const { return viennacl::traits::size(t); } +}; + +struct stride_fun +{ + typedef vcl_size_t result_type; + template<class T> result_type operator()(T const &t) const { return viennacl::traits::stride(t); } +}; + +struct start1_fun +{ + typedef vcl_size_t result_type; + template<class T> result_type operator()(T const &t) const { return viennacl::traits::start1(t); } +}; + +struct start2_fun +{ + typedef vcl_size_t result_type; + template<class T> result_type operator()(T const &t) const { return viennacl::traits::start2(t); } +}; + +struct leading_stride +{ + typedef vcl_size_t result_type; + template<class T> result_type operator()(T const &t) const { return viennacl::traits::row_major(t)?viennacl::traits::stride2(t):viennacl::traits::stride1(t); } +}; + +struct leading_start +{ + typedef vcl_size_t result_type; + template<class T> result_type operator()(T const &t) const { return viennacl::traits::row_major(t)?viennacl::traits::start2(t):viennacl::traits::start1(t); } +}; + +struct stride1_fun +{ + typedef vcl_size_t result_type; + template<class T> result_type operator()(T const &t) const { return viennacl::traits::stride1(t); } +}; + +struct stride2_fun +{ + typedef vcl_size_t result_type; + template<class T> result_type operator()(T const &t) const { return viennacl::traits::stride2(t); } +}; + +struct handle_fun +{ + typedef cl_mem result_type; + template<class T> + result_type operator()(T const &t) const { return viennacl::traits::opencl_handle(t); } +}; + +struct internal_size1_fun +{ + typedef vcl_size_t result_type; + template<class T> + result_type operator()(T const &t) const { return viennacl::traits::internal_size1(t); } +}; + +struct row_major_fun +{ + typedef bool result_type; + template<class T> + result_type operator()(T const &t) const { return viennacl::traits::row_major(t); } +}; + +struct internal_size2_fun +{ + typedef vcl_size_t result_type; + template<class T> + result_type operator()(T const &t) const { return viennacl::traits::internal_size2(t); } +}; + +struct size1_fun +{ + typedef vcl_size_t result_type; + template<class T> + result_type operator()(T const &t) const { return viennacl::traits::size1(t); } +}; + +struct size2_fun +{ + typedef vcl_size_t result_type; + template<class T> + result_type operator()(T const &t) const { return viennacl::traits::size2(t); } +}; + +template<class T, class U> +struct is_same_type { enum { value = 0 }; }; + +template<class T> +struct is_same_type<T,T> { enum { value = 1 }; }; + +inline bool is_reduction(scheduler::statement_node const & node) +{ + return node.op.type_family==scheduler::OPERATION_VECTOR_REDUCTION_TYPE_FAMILY + || node.op.type_family==scheduler::OPERATION_COLUMNS_REDUCTION_TYPE_FAMILY + || node.op.type_family==scheduler::OPERATION_ROWS_REDUCTION_TYPE_FAMILY + || node.op.type==scheduler::OPERATION_BINARY_INNER_PROD_TYPE + || node.op.type==scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE; +} + +inline bool is_index_reduction(scheduler::op_element const & op) +{ + return op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGFMAX_TYPE + || op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGMAX_TYPE + || op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGFMIN_TYPE + || op.type==scheduler::OPERATION_BINARY_ELEMENT_ARGMIN_TYPE; +} +template<class T> +struct type_to_string; +template<> struct type_to_string<unsigned char> { static const char * value() { return "uchar"; } }; +template<> struct type_to_string<char> { static const char * value() { return "char"; } }; +template<> struct type_to_string<unsigned short> { static const char * value() { return "ushort"; } }; +template<> struct type_to_string<short> { static const char * value() { return "short"; } }; +template<> struct type_to_string<unsigned int> { static const char * value() { return "uint"; } }; +template<> struct type_to_string<int> { static const char * value() { return "int"; } }; +template<> struct type_to_string<unsigned long> { static const char * value() { return "ulong"; } }; +template<> struct type_to_string<long> { static const char * value() { return "long"; } }; +template<> struct type_to_string<float> { static const char * value() { return "float"; } }; +template<> struct type_to_string<double> { static const char * value() { return "double"; } }; + + +template<class T> +struct first_letter_of_type; +template<> struct first_letter_of_type<char> { static char value() { return 'c'; } }; +template<> struct first_letter_of_type<unsigned char> { static char value() { return 'd'; } }; +template<> struct first_letter_of_type<short> { static char value() { return 's'; } }; +template<> struct first_letter_of_type<unsigned short> { static char value() { return 't'; } }; +template<> struct first_letter_of_type<int> { static char value() { return 'i'; } }; +template<> struct first_letter_of_type<unsigned int> { static char value() { return 'j'; } }; +template<> struct first_letter_of_type<long> { static char value() { return 'l'; } }; +template<> struct first_letter_of_type<unsigned long> { static char value() { return 'm'; } }; +template<> struct first_letter_of_type<float> { static char value() { return 'f'; } }; +template<> struct first_letter_of_type<double> { static char value() { return 'd'; } }; + +class kernel_generation_stream : public std::ostream +{ + class kgenstream : public std::stringbuf + { + public: + kgenstream(std::ostringstream& osstream,unsigned int const & tab_count) : oss_(osstream), tab_count_(tab_count){ } + int sync() { + for (unsigned int i=0; i<tab_count_;++i) + oss_ << " "; + oss_ << str(); + str(""); + return !oss_; + } +#if defined(_MSC_VER) + ~kgenstream() throw() { pubsync(); } +#else + ~kgenstream() { pubsync(); } +#endif + private: + std::ostream& oss_; + unsigned int const & tab_count_; + }; + +public: + kernel_generation_stream() : std::ostream(new kgenstream(oss,tab_count_)), tab_count_(0){ } +#if defined(_MSC_VER) + ~kernel_generation_stream() throw() { delete rdbuf(); } +#else + ~kernel_generation_stream(){ delete rdbuf(); } +#endif + + std::string str(){ return oss.str(); } + void inc_tab(){ ++tab_count_; } + void dec_tab(){ --tab_count_; } +private: + unsigned int tab_count_; + std::ostringstream oss; +}; + +inline bool node_leaf(scheduler::op_element const & op) +{ + using namespace scheduler; + return op.type==OPERATION_UNARY_NORM_1_TYPE + || op.type==OPERATION_UNARY_NORM_2_TYPE + || op.type==OPERATION_UNARY_NORM_INF_TYPE + || op.type==OPERATION_UNARY_TRANS_TYPE + || op.type==OPERATION_BINARY_MAT_VEC_PROD_TYPE + || op.type==OPERATION_BINARY_MAT_MAT_PROD_TYPE + || op.type==OPERATION_BINARY_INNER_PROD_TYPE + || op.type==OPERATION_BINARY_MATRIX_DIAG_TYPE + || op.type==OPERATION_BINARY_VECTOR_DIAG_TYPE + || op.type==OPERATION_BINARY_MATRIX_ROW_TYPE + || op.type==OPERATION_BINARY_MATRIX_COLUMN_TYPE + || op.type_family==OPERATION_VECTOR_REDUCTION_TYPE_FAMILY + || op.type_family==OPERATION_ROWS_REDUCTION_TYPE_FAMILY + || op.type_family==OPERATION_COLUMNS_REDUCTION_TYPE_FAMILY; +} + +inline bool elementwise_operator(scheduler::op_element const & op) +{ + using namespace scheduler; + return op.type== OPERATION_BINARY_ASSIGN_TYPE + || op.type== OPERATION_BINARY_INPLACE_ADD_TYPE + || op.type== OPERATION_BINARY_INPLACE_SUB_TYPE + || op.type== OPERATION_BINARY_ADD_TYPE + || op.type== OPERATION_BINARY_SUB_TYPE + || op.type== OPERATION_BINARY_ELEMENT_PROD_TYPE + || op.type== OPERATION_BINARY_ELEMENT_DIV_TYPE + || op.type== OPERATION_BINARY_MULT_TYPE + || op.type== OPERATION_BINARY_DIV_TYPE; +} + +inline bool elementwise_function(scheduler::op_element const & op) +{ + using namespace scheduler; + return + + op.type == OPERATION_UNARY_CAST_CHAR_TYPE + || op.type == OPERATION_UNARY_CAST_UCHAR_TYPE + || op.type == OPERATION_UNARY_CAST_SHORT_TYPE + || op.type == OPERATION_UNARY_CAST_USHORT_TYPE + || op.type == OPERATION_UNARY_CAST_INT_TYPE + || op.type == OPERATION_UNARY_CAST_UINT_TYPE + || op.type == OPERATION_UNARY_CAST_LONG_TYPE + || op.type == OPERATION_UNARY_CAST_ULONG_TYPE + || op.type == OPERATION_UNARY_CAST_HALF_TYPE + || op.type == OPERATION_UNARY_CAST_FLOAT_TYPE + || op.type == OPERATION_UNARY_CAST_DOUBLE_TYPE + + || op.type== OPERATION_UNARY_ABS_TYPE + || op.type== OPERATION_UNARY_ACOS_TYPE + || op.type== OPERATION_UNARY_ASIN_TYPE + || op.type== OPERATION_UNARY_ATAN_TYPE + || op.type== OPERATION_UNARY_CEIL_TYPE + || op.type== OPERATION_UNARY_COS_TYPE + || op.type== OPERATION_UNARY_COSH_TYPE + || op.type== OPERATION_UNARY_EXP_TYPE + || op.type== OPERATION_UNARY_FABS_TYPE + || op.type== OPERATION_UNARY_FLOOR_TYPE + || op.type== OPERATION_UNARY_LOG_TYPE + || op.type== OPERATION_UNARY_LOG10_TYPE + || op.type== OPERATION_UNARY_SIN_TYPE + || op.type== OPERATION_UNARY_SINH_TYPE + || op.type== OPERATION_UNARY_SQRT_TYPE + || op.type== OPERATION_UNARY_TAN_TYPE + || op.type== OPERATION_UNARY_TANH_TYPE + + || op.type== OPERATION_BINARY_ELEMENT_POW_TYPE + || op.type== OPERATION_BINARY_ELEMENT_EQ_TYPE + || op.type== OPERATION_BINARY_ELEMENT_NEQ_TYPE + || op.type== OPERATION_BINARY_ELEMENT_GREATER_TYPE + || op.type== OPERATION_BINARY_ELEMENT_LESS_TYPE + || op.type== OPERATION_BINARY_ELEMENT_GEQ_TYPE + || op.type== OPERATION_BINARY_ELEMENT_LEQ_TYPE + || op.type== OPERATION_BINARY_ELEMENT_FMAX_TYPE + || op.type== OPERATION_BINARY_ELEMENT_FMIN_TYPE + || op.type== OPERATION_BINARY_ELEMENT_MAX_TYPE + || op.type== OPERATION_BINARY_ELEMENT_MIN_TYPE; + +} + +inline scheduler::lhs_rhs_element & lhs_rhs_element(scheduler::statement const & st, vcl_size_t idx, leaf_t leaf) +{ + using namespace tree_parsing; + assert(leaf==LHS_NODE_TYPE || leaf==RHS_NODE_TYPE); + if (leaf==LHS_NODE_TYPE) + return const_cast<scheduler::lhs_rhs_element &>(st.array()[idx].lhs); + return const_cast<scheduler::lhs_rhs_element &>(st.array()[idx].rhs); +} + +inline unsigned int size_of(scheduler::statement_node_numeric_type type) +{ + using namespace scheduler; + switch (type) + { + case UCHAR_TYPE: + case CHAR_TYPE: return 1; + + case USHORT_TYPE: + case SHORT_TYPE: + case HALF_TYPE: return 2; + + case UINT_TYPE: + case INT_TYPE: + case FLOAT_TYPE: return 4; + + case ULONG_TYPE: + case LONG_TYPE: + case DOUBLE_TYPE: return 8; + + default: throw generator_not_supported_exception("Unsupported scalartype"); + } +} + +inline std::string append_width(std::string const & str, unsigned int width) +{ + if (width==1) + return str; + return str + tools::to_string(width); +} + +} +} +} +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/ell_matrix.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/ell_matrix.hpp b/native-viennaCL/src/main/cpp/viennacl/ell_matrix.hpp new file mode 100644 index 0000000..3c3a428 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/ell_matrix.hpp @@ -0,0 +1,362 @@ +#ifndef VIENNACL_ELL_MATRIX_HPP_ +#define VIENNACL_ELL_MATRIX_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file viennacl/ell_matrix.hpp + @brief Implementation of the ell_matrix class + + Contributed by Volodymyr Kysenko. +*/ + + +#include "viennacl/forwards.h" +#include "viennacl/vector.hpp" + +#include "viennacl/tools/tools.hpp" + +#include "viennacl/linalg/sparse_matrix_operations.hpp" + +namespace viennacl +{ +/** @brief Sparse matrix class using the ELLPACK format for storing the nonzeros. + * + * This format works best for matrices where the number of nonzeros per row is mostly the same. + * Finite element and finite difference methods on nicely shaped domains often result in such a nonzero pattern. + * For a matrix + * + * (1 2 0 0 0) + * (2 3 4 0 0) + * (0 5 6 0 7) + * (0 0 8 9 0) + * + * the entries are layed out in chunks of size 3 as + * (1 2 5 8; 2 3 6 9; 0 4 7 0) + * Note that this is a 'transposed' representation in order to maximize coalesced memory access. + */ +template<typename NumericT, unsigned int AlignmentV /* see forwards.h for default argument */> +class ell_matrix +{ +public: + typedef viennacl::backend::mem_handle handle_type; + typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<NumericT>::ResultType> value_type; + typedef vcl_size_t size_type; + + ell_matrix() : rows_(0), cols_(0), maxnnz_(0) {} + + ell_matrix(viennacl::context ctx) : rows_(0), cols_(0), maxnnz_(0) + { + coords_.switch_active_handle_id(ctx.memory_type()); + elements_.switch_active_handle_id(ctx.memory_type()); + +#ifdef VIENNACL_WITH_OPENCL + if (ctx.memory_type() == OPENCL_MEMORY) + { + coords_.opencl_handle().context(ctx.opencl_context()); + elements_.opencl_handle().context(ctx.opencl_context()); + } +#endif + } + + /** @brief Resets all entries in the matrix back to zero without changing the matrix size. Resets the sparsity pattern. */ + void clear() + { + maxnnz_ = 0; + + viennacl::backend::typesafe_host_array<unsigned int> host_coords_buffer(coords_, internal_size1()); + std::vector<NumericT> host_elements(internal_size1()); + + viennacl::backend::memory_create(coords_, host_coords_buffer.element_size() * internal_size1(), viennacl::traits::context(coords_), host_coords_buffer.get()); + viennacl::backend::memory_create(elements_, sizeof(NumericT) * internal_size1(), viennacl::traits::context(elements_), &(host_elements[0])); + } + + vcl_size_t internal_size1() const { return viennacl::tools::align_to_multiple<vcl_size_t>(rows_, AlignmentV); } + vcl_size_t internal_size2() const { return viennacl::tools::align_to_multiple<vcl_size_t>(cols_, AlignmentV); } + + vcl_size_t size1() const { return rows_; } + vcl_size_t size2() const { return cols_; } + + vcl_size_t internal_maxnnz() const {return viennacl::tools::align_to_multiple<vcl_size_t>(maxnnz_, AlignmentV); } + vcl_size_t maxnnz() const { return maxnnz_; } + + vcl_size_t nnz() const { return rows_ * maxnnz_; } + vcl_size_t internal_nnz() const { return internal_size1() * internal_maxnnz(); } + + handle_type & handle() { return elements_; } + const handle_type & handle() const { return elements_; } + + handle_type & handle2() { return coords_; } + const handle_type & handle2() const { return coords_; } + +#if defined(_MSC_VER) && _MSC_VER < 1500 //Visual Studio 2005 needs special treatment + template<typename CPUMatrixT> + friend void copy(const CPUMatrixT & cpu_matrix, ell_matrix & gpu_matrix ); +#else + template<typename CPUMatrixT, typename T, unsigned int ALIGN> + friend void copy(const CPUMatrixT & cpu_matrix, ell_matrix<T, ALIGN> & gpu_matrix ); +#endif + +private: + vcl_size_t rows_; + vcl_size_t cols_; + vcl_size_t maxnnz_; + + handle_type coords_; + handle_type elements_; +}; + +template<typename CPUMatrixT, typename NumericT, unsigned int AlignmentV> +void copy(const CPUMatrixT& cpu_matrix, ell_matrix<NumericT, AlignmentV>& gpu_matrix ) +{ + assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); + assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); + + if (cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0) + { + //determine max capacity for row + vcl_size_t max_entries_per_row = 0; + for (typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) + { + vcl_size_t num_entries = 0; + for (typename CPUMatrixT::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) + ++num_entries; + + max_entries_per_row = std::max(max_entries_per_row, num_entries); + } + + //setup GPU matrix + gpu_matrix.maxnnz_ = max_entries_per_row; + gpu_matrix.rows_ = cpu_matrix.size1(); + gpu_matrix.cols_ = cpu_matrix.size2(); + + vcl_size_t nnz = gpu_matrix.internal_nnz(); + + viennacl::backend::typesafe_host_array<unsigned int> coords(gpu_matrix.handle2(), nnz); + std::vector<NumericT> elements(nnz, 0); + + // std::cout << "ELL_MATRIX copy " << gpu_matrix.maxnnz_ << " " << gpu_matrix.rows_ << " " << gpu_matrix.cols_ << " " + // << gpu_matrix.internal_maxnnz() << "\n"; + + for (typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) + { + vcl_size_t data_index = 0; + + for (typename CPUMatrixT::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) + { + coords.set(gpu_matrix.internal_size1() * data_index + col_it.index1(), col_it.index2()); + elements[gpu_matrix.internal_size1() * data_index + col_it.index1()] = *col_it; + //std::cout << *col_it << "\n"; + data_index++; + } + } + + viennacl::backend::memory_create(gpu_matrix.handle2(), coords.raw_size(), traits::context(gpu_matrix.handle2()), coords.get()); + viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(NumericT) * elements.size(), traits::context(gpu_matrix.handle()), &(elements[0])); + } +} + + + +/** @brief Copies a sparse matrix from the host to the compute device. The host type is the std::vector< std::map < > > format . + * + * @param cpu_matrix A sparse matrix on the host composed of an STL vector and an STL map. + * @param gpu_matrix The sparse ell_matrix from ViennaCL + */ +template<typename IndexT, typename NumericT, unsigned int AlignmentV> +void copy(std::vector< std::map<IndexT, NumericT> > const & cpu_matrix, + ell_matrix<NumericT, AlignmentV> & gpu_matrix) +{ + vcl_size_t max_col = 0; + for (vcl_size_t i=0; i<cpu_matrix.size(); ++i) + { + if (cpu_matrix[i].size() > 0) + max_col = std::max<vcl_size_t>(max_col, (cpu_matrix[i].rbegin())->first); + } + + viennacl::copy(tools::const_sparse_matrix_adapter<NumericT, IndexT>(cpu_matrix, cpu_matrix.size(), max_col + 1), gpu_matrix); +} + + + + + + +template<typename CPUMatrixT, typename NumericT, unsigned int AlignmentV> +void copy(const ell_matrix<NumericT, AlignmentV>& gpu_matrix, CPUMatrixT& cpu_matrix) +{ + assert( (viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); + assert( (viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); + + if (gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0) + { + std::vector<NumericT> elements(gpu_matrix.internal_nnz()); + viennacl::backend::typesafe_host_array<unsigned int> coords(gpu_matrix.handle2(), gpu_matrix.internal_nnz()); + + viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT) * elements.size(), &(elements[0])); + viennacl::backend::memory_read(gpu_matrix.handle2(), 0, coords.raw_size(), coords.get()); + + for (vcl_size_t row = 0; row < gpu_matrix.size1(); row++) + { + for (vcl_size_t ind = 0; ind < gpu_matrix.internal_maxnnz(); ind++) + { + vcl_size_t offset = gpu_matrix.internal_size1() * ind + row; + + NumericT val = elements[offset]; + if (val <= 0 && val >= 0) // val == 0 without compiler warnings + continue; + + if (coords[offset] >= gpu_matrix.size2()) + { + std::cerr << "ViennaCL encountered invalid data " << offset << " " << ind << " " << row << " " << coords[offset] << " " << gpu_matrix.size2() << std::endl; + return; + } + + cpu_matrix(row, coords[offset]) = val; + } + } + } +} + + +/** @brief Copies a sparse matrix from the compute device to the host. The host type is the std::vector< std::map < > > format . + * + * @param gpu_matrix The sparse ell_matrix from ViennaCL + * @param cpu_matrix A sparse matrix on the host composed of an STL vector and an STL map. + */ +template<typename NumericT, unsigned int AlignmentV, typename IndexT> +void copy(const ell_matrix<NumericT, AlignmentV> & gpu_matrix, + std::vector< std::map<IndexT, NumericT> > & cpu_matrix) +{ + if (cpu_matrix.size() == 0) + cpu_matrix.resize(gpu_matrix.size1()); + + assert(cpu_matrix.size() == gpu_matrix.size1() && bool("Matrix dimension mismatch!")); + + tools::sparse_matrix_adapter<NumericT, IndexT> temp(cpu_matrix, gpu_matrix.size1(), gpu_matrix.size2()); + viennacl::copy(gpu_matrix, temp); +} + +// +// Specify available operations: +// + +/** \cond */ + +namespace linalg +{ +namespace detail +{ + // x = A * y + template<typename T, unsigned int A> + struct op_executor<vector_base<T>, op_assign, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) + { + // check for the special case x = A * x + if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) + { + viennacl::vector<T> temp(lhs); + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), temp, T(0)); + lhs = temp; + } + else + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), lhs, T(0)); + } + }; + + template<typename T, unsigned int A> + struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) + { + // check for the special case x += A * x + if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) + { + viennacl::vector<T> temp(lhs); + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), temp, T(0)); + lhs += temp; + } + else + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), lhs, T(1)); + } + }; + + template<typename T, unsigned int A> + struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) + { + // check for the special case x -= A * x + if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) + { + viennacl::vector<T> temp(lhs); + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), temp, T(0)); + lhs -= temp; + } + else + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(-1), lhs, T(1)); + } + }; + + + // x = A * vec_op + template<typename T, unsigned int A, typename LHS, typename RHS, typename OP> + struct op_executor<vector_base<T>, op_assign, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) + { + viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); + viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs); + } + }; + + // x = A * vec_op + template<typename T, unsigned int A, typename LHS, typename RHS, typename OP> + struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) + { + viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); + viennacl::vector<T> temp_result(lhs); + viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); + lhs += temp_result; + } + }; + + // x = A * vec_op + template<typename T, unsigned int A, typename LHS, typename RHS, typename OP> + struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) + { + viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); + viennacl::vector<T> temp_result(lhs); + viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); + lhs -= temp_result; + } + }; + +} // namespace detail +} // namespace linalg + +/** \endcond */ +} + +#endif + + http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/fft.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/fft.hpp b/native-viennaCL/src/main/cpp/viennacl/fft.hpp new file mode 100644 index 0000000..bacd911 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/fft.hpp @@ -0,0 +1,282 @@ +#ifndef VIENNACL_FFT_HPP +#define VIENNACL_FFT_HPP + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory + ============================================================================= */ + +/** @file viennacl/fft.hpp + @brief All routines related to the Fast Fourier Transform. Experimental. + */ + +#include <viennacl/vector.hpp> +#include <viennacl/matrix.hpp> + +#include "viennacl/linalg/fft_operations.hpp" +#include "viennacl/traits/handle.hpp" + +#include <cmath> + +#include <stdexcept> +/// @cond +namespace viennacl +{ +namespace detail +{ +namespace fft +{ + inline bool is_radix2(vcl_size_t data_size) + { + return !((data_size > 2) && (data_size & (data_size - 1))); + } +} //namespace fft +} //namespace detail + +/** + * @brief Generic inplace version of 1-D Fourier transformation. + * + * @param input Input vector, result will be stored here. + * @param batch_num Number of items in batch + * @param sign Sign of exponent, default is -1.0 + */ +template<class NumericT, unsigned int AlignmentV> +void inplace_fft(viennacl::vector<NumericT, AlignmentV>& input, vcl_size_t batch_num = 1, + NumericT sign = -1.0) +{ + vcl_size_t size = (input.size() >> 1) / batch_num; + + if (!viennacl::detail::fft::is_radix2(size)) + { + viennacl::vector<NumericT, AlignmentV> output(input.size()); + viennacl::linalg::direct(input, output, size, size, batch_num, sign); + viennacl::copy(output, input); + } + else + viennacl::linalg::radix2(input, size, size, batch_num, sign); +} + +/** + * @brief Generic version of 1-D Fourier transformation. + * + * @param input Input vector. + * @param output Output vector. + * @param batch_num Number of items in batch. + * @param sign Sign of exponent, default is -1.0 + */ +template<class NumericT, unsigned int AlignmentV> +void fft(viennacl::vector<NumericT, AlignmentV>& input, + viennacl::vector<NumericT, AlignmentV>& output, vcl_size_t batch_num = 1, NumericT sign = -1.0) +{ + vcl_size_t size = (input.size() >> 1) / batch_num; + if (viennacl::detail::fft::is_radix2(size)) + { + viennacl::copy(input, output); + viennacl::linalg::radix2(output, size, size, batch_num, sign); + } + else + viennacl::linalg::direct(input, output, size, size, batch_num, sign); +} + +/** + * @brief Generic inplace version of 2-D Fourier transformation. + * + * @param input Input matrix, result will be stored here. + * @param sign Sign of exponent, default is -1.0 + */ +template<class NumericT, unsigned int AlignmentV> +void inplace_fft(viennacl::matrix<NumericT, viennacl::row_major, AlignmentV>& input, + NumericT sign = -1.0) +{ + vcl_size_t rows_num = input.size1(); + vcl_size_t cols_num = input.size2() >> 1; + + vcl_size_t cols_int = input.internal_size2() >> 1; + + // batch with rows + if (viennacl::detail::fft::is_radix2(cols_num)) + viennacl::linalg::radix2(input, cols_num, cols_int, rows_num, sign, + viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR); + else + { + viennacl::matrix<NumericT, viennacl::row_major, AlignmentV> output(input.size1(), + input.size2()); + + viennacl::linalg::direct(input, output, cols_num, cols_int, rows_num, sign, + viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR); + + input = output; + } + + // batch with cols + if (viennacl::detail::fft::is_radix2(rows_num)) + viennacl::linalg::radix2(input, rows_num, cols_int, cols_num, sign, + viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::COL_MAJOR); + else + { + viennacl::matrix<NumericT, viennacl::row_major, AlignmentV> output(input.size1(), + input.size2()); + + viennacl::linalg::direct(input, output, rows_num, cols_int, cols_num, sign, + viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::COL_MAJOR); + + input = output; + } + +} + +/** + * @brief Generic version of 2-D Fourier transformation. + * + * @param input Input vector. + * @param output Output vector. + * @param sign Sign of exponent, default is -1.0 + */ +template<class NumericT, unsigned int AlignmentV> +void fft(viennacl::matrix<NumericT, viennacl::row_major, AlignmentV>& input, //TODO + viennacl::matrix<NumericT, viennacl::row_major, AlignmentV>& output, NumericT sign = -1.0) +{ + + vcl_size_t rows_num = input.size1(); + vcl_size_t cols_num = input.size2() >> 1; + vcl_size_t cols_int = input.internal_size2() >> 1; + + // batch with rows + if (viennacl::detail::fft::is_radix2(cols_num)) + { + output = input; + viennacl::linalg::radix2(output, cols_num, cols_int, rows_num, sign, + viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR); + } + else + viennacl::linalg::direct(input, output, cols_num, cols_int, rows_num, sign, + viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR); + + // batch with cols + if (viennacl::detail::fft::is_radix2(rows_num)) + { + //std::cout<<"output"<<output<<std::endl; + + viennacl::linalg::radix2(output, rows_num, cols_int, cols_num, sign, + viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::COL_MAJOR); + } + else + { + viennacl::matrix<NumericT, viennacl::row_major, AlignmentV> tmp(output.size1(), + output.size2()); + tmp = output; + //std::cout<<"tmp"<<tmp<<std::endl; + viennacl::linalg::direct(tmp, output, rows_num, cols_int, cols_num, sign, + viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::COL_MAJOR); + } +} + +/** + * @brief Generic inplace version of inverse 1-D Fourier transformation. + * + * Shorthand function for fft(sign = 1.0) + * + * @param input Input vector. + * @param batch_num Number of items in batch. + * @param sign Sign of exponent, default is -1.0 + */ +template<class NumericT, unsigned int AlignmentV> +void inplace_ifft(viennacl::vector<NumericT, AlignmentV>& input, vcl_size_t batch_num = 1) +{ + viennacl::inplace_fft(input, batch_num, NumericT(1.0)); + viennacl::linalg::normalize(input); +} + +/** + * @brief Generic version of inverse 1-D Fourier transformation. + * + * Shorthand function for fft(sign = 1.0) + * + * @param input Input vector. + * @param output Output vector. + * @param batch_num Number of items in batch. + * @param sign Sign of exponent, default is -1.0 + */ +template<class NumericT, unsigned int AlignmentV> +void ifft(viennacl::vector<NumericT, AlignmentV>& input, + viennacl::vector<NumericT, AlignmentV>& output, vcl_size_t batch_num = 1) +{ + viennacl::fft(input, output, batch_num, NumericT(1.0)); + viennacl::linalg::normalize(output); +} + +namespace linalg +{ + /** + * @brief 1-D convolution of two vectors. + * + * This function does not make any changes to input vectors + * + * @param input1 Input vector #1. + * @param input2 Input vector #2. + * @param output Output vector. + */ + template<class NumericT, unsigned int AlignmentV> + void convolve(viennacl::vector<NumericT, AlignmentV>& input1, + viennacl::vector<NumericT, AlignmentV>& input2, + viennacl::vector<NumericT, AlignmentV>& output) + { + assert(input1.size() == input2.size()); + assert(input1.size() == output.size()); + //temporal arrays + viennacl::vector<NumericT, AlignmentV> tmp1(input1.size()); + viennacl::vector<NumericT, AlignmentV> tmp2(input2.size()); + viennacl::vector<NumericT, AlignmentV> tmp3(output.size()); + + // align input arrays to equal size + // FFT of input data + viennacl::fft(input1, tmp1); + viennacl::fft(input2, tmp2); + + // multiplication of input data + viennacl::linalg::multiply_complex(tmp1, tmp2, tmp3); + // inverse FFT of input data + viennacl::ifft(tmp3, output); + } + + /** + * @brief 1-D convolution of two vectors. + * + * This function can make changes to input vectors to avoid additional memory allocations. + * + * @param input1 Input vector #1. + * @param input2 Input vector #2. + * @param output Output vector. + */ + template<class NumericT, unsigned int AlignmentV> + void convolve_i(viennacl::vector<NumericT, AlignmentV>& input1, + viennacl::vector<NumericT, AlignmentV>& input2, + viennacl::vector<NumericT, AlignmentV>& output) + { + assert(input1.size() == input2.size()); + assert(input1.size() == output.size()); + + viennacl::inplace_fft(input1); + viennacl::inplace_fft(input2); + + viennacl::linalg::multiply_complex(input1, input2, output); + + viennacl::inplace_ifft(output); + } +} //namespace linalg +} //namespace viennacl + +/// @endcond +#endif
