http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/amd/southern_islands/tahiti.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/amd/southern_islands/tahiti.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/amd/southern_islands/tahiti.hpp new file mode 100644 index 0000000..aec9043 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/amd/southern_islands/tahiti.hpp @@ -0,0 +1,84 @@ +#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_AMD_SOUTHERN_ISLANDS_HPP_ +#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_AMD_SOUTHERN_ISLANDS_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +#include "viennacl/device_specific/templates/matrix_product_template.hpp" + +#include "viennacl/device_specific/forwards.h" +#include "viennacl/device_specific/builtin_database/common.hpp" + +namespace viennacl{ +namespace device_specific{ +namespace builtin_database{ +namespace devices{ +namespace gpu{ +namespace amd{ +namespace southern_islands{ +namespace tahiti{ + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>) +{ + db.add_8B(amd_id, CL_DEVICE_TYPE_GPU, ocl::southern_islands, "Tahiti", matrix_product_template::parameters_type(1,32,16,8,1,1,16,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,16,16)); +} + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>) +{ + db.add_8B(amd_id, CL_DEVICE_TYPE_GPU, ocl::southern_islands, "Tahiti", matrix_product_template::parameters_type(1,16,16,16,4,1,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,4,64)); +} + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>) +{ + db.add_8B(amd_id, CL_DEVICE_TYPE_GPU, ocl::southern_islands, "Tahiti", matrix_product_template::parameters_type(2,8,2,16,4,2,4,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_CONTIGUOUS,0,0)); +} + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>) +{ + db.add_8B(amd_id, CL_DEVICE_TYPE_GPU, ocl::southern_islands, "Tahiti", matrix_product_template::parameters_type(1,16,4,4,4,2,4,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_LOCAL,4,16)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>) +{ + db.add_4B(amd_id, CL_DEVICE_TYPE_GPU, ocl::southern_islands, "Tahiti", matrix_product_template::parameters_type(1,8,32,32,4,1,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,32,8)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>) +{ + db.add_4B(amd_id, CL_DEVICE_TYPE_GPU, ocl::southern_islands, "Tahiti", matrix_product_template::parameters_type(1,8,32,32,4,1,2,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,32,8)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>) +{ + db.add_4B(amd_id, CL_DEVICE_TYPE_GPU, ocl::southern_islands, "Tahiti", matrix_product_template::parameters_type(1,64,32,4,4,2,4,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_LOCAL,16,16)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>) +{ + db.add_4B(amd_id, CL_DEVICE_TYPE_GPU, ocl::southern_islands, "Tahiti", matrix_product_template::parameters_type(1,128,32,2,2,1,8,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_LOCAL,32,8)); +} + + +} +} +} +} +} +} +} +} +#endif
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/amd/volcanic_islands/hawaii.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/amd/volcanic_islands/hawaii.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/amd/volcanic_islands/hawaii.hpp new file mode 100644 index 0000000..c2674f0 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/amd/volcanic_islands/hawaii.hpp @@ -0,0 +1,84 @@ +#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_AMD_VOLCANIC_ISLANDS_HPP_ +#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_AMD_VOLCANIC_ISLANDS_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +#include "viennacl/device_specific/templates/matrix_product_template.hpp" + +#include "viennacl/device_specific/forwards.h" +#include "viennacl/device_specific/builtin_database/common.hpp" + +namespace viennacl{ +namespace device_specific{ +namespace builtin_database{ +namespace devices{ +namespace gpu{ +namespace amd{ +namespace volcanic_islands{ +namespace hawaii{ + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>) +{ + db.add_8B(amd_id, CL_DEVICE_TYPE_GPU, ocl::volcanic_islands, "Hawaii", matrix_product_template::parameters_type(1,8,16,32,4,1,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,16,16)); +} + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>) +{ + db.add_8B(amd_id, CL_DEVICE_TYPE_GPU, ocl::volcanic_islands, "Hawaii", matrix_product_template::parameters_type(1,8,16,32,4,1,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,16,16)); +} + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>) +{ + db.add_8B(amd_id, CL_DEVICE_TYPE_GPU, ocl::volcanic_islands, "Hawaii", matrix_product_template::parameters_type(2,8,8,8,6,1,6,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,8,8)); +} + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>) +{ + db.add_8B(amd_id, CL_DEVICE_TYPE_GPU, ocl::volcanic_islands, "Hawaii", matrix_product_template::parameters_type(1,16,16,16,2,1,8,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,16,16)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>) +{ + db.add_4B(amd_id, CL_DEVICE_TYPE_GPU, ocl::volcanic_islands, "Hawaii", matrix_product_template::parameters_type(1,1,2,64,8,2,4,FETCH_FROM_GLOBAL_CONTIGUOUS,FETCH_FROM_GLOBAL_STRIDED,0,0)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>) +{ + db.add_4B(amd_id, CL_DEVICE_TYPE_GPU, ocl::volcanic_islands, "Hawaii", matrix_product_template::parameters_type(1,16,16,16,2,1,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,16,16)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>) +{ + db.add_4B(amd_id, CL_DEVICE_TYPE_GPU, ocl::volcanic_islands, "Hawaii", matrix_product_template::parameters_type(2,16,16,16,6,1,6,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,16,16)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>) +{ + db.add_4B(amd_id, CL_DEVICE_TYPE_GPU, ocl::volcanic_islands, "Hawaii", matrix_product_template::parameters_type(1,64,64,4,2,4,8,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_LOCAL,64,4)); +} + + +} +} +} +} +} +} +} +} +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/fallback.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/fallback.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/fallback.hpp new file mode 100644 index 0000000..ff307f3 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/fallback.hpp @@ -0,0 +1,84 @@ +#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_FALLBACK_HPP_ +#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_FALLBACK_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +#include "viennacl/device_specific/forwards.h" +#include "viennacl/device_specific/builtin_database/common.hpp" + +#include "viennacl/device_specific/templates/matrix_product_template.hpp" + +namespace viennacl{ +namespace device_specific{ +namespace builtin_database{ +namespace devices{ +namespace gpu{ +namespace fallback{ + + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>) +{ + db.add_4B(unknown_id, CL_DEVICE_TYPE_GPU, unknown, "", matrix_product_template::parameters_type(1,8,8,8,4,4,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,8,8)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>) +{ + db.add_4B(unknown_id, CL_DEVICE_TYPE_GPU, unknown, "", matrix_product_template::parameters_type(1,8,8,8,4,4,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,8,8)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>) +{ + db.add_4B(unknown_id, CL_DEVICE_TYPE_GPU, unknown, "", matrix_product_template::parameters_type(1,8,8,8,4,4,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,8,8)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>) +{ + db.add_4B(unknown_id, CL_DEVICE_TYPE_GPU, unknown, "", matrix_product_template::parameters_type(1,8,8,8,4,4,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,8,8)); +} + + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>) +{ + db.add_8B(unknown_id, CL_DEVICE_TYPE_GPU, unknown, "", matrix_product_template::parameters_type(1,8,8,8,4,4,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,8,8)); +} + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>) +{ + db.add_8B(unknown_id, CL_DEVICE_TYPE_GPU, unknown, "", matrix_product_template::parameters_type(1,8,8,8,4,4,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,8,8)); +} + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>) +{ + db.add_8B(unknown_id, CL_DEVICE_TYPE_GPU, unknown, "", matrix_product_template::parameters_type(1,8,8,8,4,4,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,8,8)); +} + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>) +{ + db.add_8B(unknown_id, CL_DEVICE_TYPE_GPU, unknown, "", matrix_product_template::parameters_type(1,8,8,8,4,4,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,8,8)); +} + + +} +} +} +} +} +} + + +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gt_540m.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gt_540m.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gt_540m.hpp new file mode 100644 index 0000000..24c02b7 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gt_540m.hpp @@ -0,0 +1,59 @@ +#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_NVIDIA_FERMI_GT540M_HPP_ +#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_NVIDIA_FERMI_GT540M_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +#include "viennacl/device_specific/templates/matrix_product_template.hpp" + +#include "viennacl/device_specific/forwards.h" +#include "viennacl/device_specific/builtin_database/common.hpp" + +namespace viennacl{ +namespace device_specific{ +namespace builtin_database{ +namespace devices{ +namespace gpu{ +namespace nvidia{ +namespace fermi{ +namespace geforce_gt_540m{ + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GT 540M", matrix_product_template::parameters_type(1, 16, 16, 8, 4, 1, 8, FETCH_FROM_LOCAL, FETCH_FROM_LOCAL, 16, 8)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GT 540M", matrix_product_template::parameters_type(1, 16, 16, 16, 8, 1, 4, FETCH_FROM_LOCAL, FETCH_FROM_LOCAL, 32, 8)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GT 540M", matrix_product_template::parameters_type(1, 8, 16, 16, 8, 1, 4, FETCH_FROM_LOCAL, FETCH_FROM_LOCAL, 16, 8)); +} + + +} +} +} +} +} +} +} +} +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gtx_470.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gtx_470.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gtx_470.hpp new file mode 100644 index 0000000..31a329b --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gtx_470.hpp @@ -0,0 +1,83 @@ +#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_NVIDIA_FERMI_GEFORCE_GTX_470_HPP_ +#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_NVIDIA_FERMI_GEFORCE_GTX_470_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +#include "viennacl/device_specific/templates/matrix_product_template.hpp" + +#include "viennacl/device_specific/forwards.h" +#include "viennacl/device_specific/builtin_database/common.hpp" + +namespace viennacl{ +namespace device_specific{ +namespace builtin_database{ +namespace devices{ +namespace gpu{ +namespace nvidia{ +namespace fermi{ +namespace geforce_gtx_470{ + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>) +{ + db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 470", matrix_product_template::parameters_type(1,2,32,32,4,1,2,FETCH_FROM_LOCAL,FETCH_FROM_GLOBAL_STRIDED,32,2)); +} + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>) +{ + db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 470", matrix_product_template::parameters_type(1,8,16,8,2,2,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,16,4)); +} + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>) +{ + db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 470", matrix_product_template::parameters_type(1,128,32,1,2,1,8,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_CONTIGUOUS,0,0)); +} + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>) +{ + db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 470", matrix_product_template::parameters_type(1,16,32,4,4,1,4,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_CONTIGUOUS,0,0)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 470", matrix_product_template::parameters_type(1,2,16,64,8,1,2,FETCH_FROM_LOCAL,FETCH_FROM_GLOBAL_CONTIGUOUS,16,8)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 470", matrix_product_template::parameters_type(1,32,32,16,2,4,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,32,16)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 470", matrix_product_template::parameters_type(1,8,16,32,8,2,2,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,32,8)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 470", matrix_product_template::parameters_type(1,16,32,16,4,1,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,32,8)); +} + +} +} +} +} +} +} +} +} +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gtx_580.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gtx_580.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gtx_580.hpp new file mode 100644 index 0000000..7015ea5 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gtx_580.hpp @@ -0,0 +1,84 @@ +#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_NVIDIA_FERMI_GTX580_HPP_ +#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_NVIDIA_FERMI_GTX580_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +#include "viennacl/device_specific/templates/matrix_product_template.hpp" + +#include "viennacl/device_specific/forwards.h" +#include "viennacl/device_specific/builtin_database/common.hpp" + +namespace viennacl{ +namespace device_specific{ +namespace builtin_database{ +namespace devices{ +namespace gpu{ +namespace nvidia{ +namespace fermi{ +namespace geforce_gtx_580{ + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>) +{ + db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 580", matrix_product_template::parameters_type(1,2,1,128,4,1,4,FETCH_FROM_GLOBAL_CONTIGUOUS,FETCH_FROM_GLOBAL_STRIDED,0,0)); +} + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>) +{ + db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 580", matrix_product_template::parameters_type(1,16,128,32,2,4,2,FETCH_FROM_LOCAL,FETCH_FROM_GLOBAL_STRIDED,32,16)); +} + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>) +{ + db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 580", matrix_product_template::parameters_type(1,4,64,128,4,1,2,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_STRIDED,0,0)); +} + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>) +{ + db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 580", matrix_product_template::parameters_type(1,128,32,1,1,1,8,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_LOCAL,32,4)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 580", matrix_product_template::parameters_type(1,2,32,32,8,1,2,FETCH_FROM_LOCAL,FETCH_FROM_GLOBAL_STRIDED,32,2)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 580", matrix_product_template::parameters_type(1,16,32,16,4,4,2,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,32,8)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 580", matrix_product_template::parameters_type(2,16,16,16,4,4,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,16,16)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "GeForce GTX 580", matrix_product_template::parameters_type(1,128,16,2,4,1,8,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_CONTIGUOUS,0,0)); +} + + +} +} +} +} +} +} +} +} +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/tesla_c2050.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/tesla_c2050.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/tesla_c2050.hpp new file mode 100644 index 0000000..f430d6c --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/tesla_c2050.hpp @@ -0,0 +1,84 @@ +#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_NVIDIA_FERMI_TESLA_C2050_HPP_ +#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_NVIDIA_FERMI_TESLA_C2050_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +#include "viennacl/device_specific/templates/matrix_product_template.hpp" + +#include "viennacl/device_specific/forwards.h" +#include "viennacl/device_specific/builtin_database/common.hpp" + +namespace viennacl{ +namespace device_specific{ +namespace builtin_database{ +namespace devices{ +namespace gpu{ +namespace nvidia{ +namespace fermi{ +namespace tesla_c2050{ + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>) +{ + db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "Tesla C2050", matrix_product_template::parameters_type(1,4,32,32,8,1,2,FETCH_FROM_LOCAL,FETCH_FROM_GLOBAL_STRIDED,16,8)); +} + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>) +{ + db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "Tesla C2050", matrix_product_template::parameters_type(1,16,32,32,4,1,4,FETCH_FROM_LOCAL,FETCH_FROM_GLOBAL_STRIDED,32,16)); +} + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>) +{ + db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "Tesla C2050", matrix_product_template::parameters_type(1,4,128,64,4,1,4,FETCH_FROM_LOCAL,FETCH_FROM_GLOBAL_CONTIGUOUS,16,16)); +} + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>) +{ + db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "Tesla C2050", matrix_product_template::parameters_type(1,16,16,16,4,1,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,16,16)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "Tesla C2050", matrix_product_template::parameters_type(1,16,32,16,4,1,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,32,8)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "Tesla C2050", matrix_product_template::parameters_type(1,16,32,16,4,1,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,32,8)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "Tesla C2050", matrix_product_template::parameters_type(1,16,16,16,4,1,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,32,8)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::fermi, "Tesla C2050", matrix_product_template::parameters_type(1,16,32,16,4,1,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,32,8)); +} + + +} +} +} +} +} +} +} +} +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/kepler/tesla_k20m.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/kepler/tesla_k20m.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/kepler/tesla_k20m.hpp new file mode 100644 index 0000000..73a62fc --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/kepler/tesla_k20m.hpp @@ -0,0 +1,84 @@ +#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_NVIDIA_KEPLER_K20M_HPP_ +#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_NVIDIA_KEPLER_K20M_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +#include "viennacl/device_specific/templates/matrix_product_template.hpp" + +#include "viennacl/device_specific/forwards.h" +#include "viennacl/device_specific/builtin_database/common.hpp" + +namespace viennacl{ +namespace device_specific{ +namespace builtin_database{ +namespace devices{ +namespace gpu{ +namespace nvidia{ +namespace kepler{ +namespace tesla_k20m{ + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>) +{ + db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::kepler, "Tesla K20m", matrix_product_template::parameters_type(1,2,8,32,8,2,4,FETCH_FROM_LOCAL,FETCH_FROM_GLOBAL_STRIDED,4,16)); +} + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>) +{ + db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::kepler, "Tesla K20m", matrix_product_template::parameters_type(1,16,16,32,2,1,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,16,32)); +} + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>) +{ + db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::kepler, "Tesla K20m", matrix_product_template::parameters_type(1,2,8,64,16,1,2,FETCH_FROM_LOCAL,FETCH_FROM_GLOBAL_STRIDED,32,4)); +} + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>) +{ + db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::kepler, "Tesla K20m", matrix_product_template::parameters_type(1,128,32,1,1,1,16,FETCH_FROM_GLOBAL_CONTIGUOUS,FETCH_FROM_LOCAL,16,8)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::kepler, "Tesla K20m", matrix_product_template::parameters_type(1,8,32,16,4,8,4,FETCH_FROM_LOCAL,FETCH_FROM_GLOBAL_STRIDED,8,16)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::kepler, "Tesla K20m", matrix_product_template::parameters_type(1,32,16,32,8,2,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,16,64)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::kepler, "Tesla K20m", matrix_product_template::parameters_type(4,8,2,4,8,2,8,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_CONTIGUOUS,0,0)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::kepler, "Tesla K20m", matrix_product_template::parameters_type(1,128,64,1,4,2,16,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_LOCAL,16,8)); +} + + +} +} +} +} +} +} +} +} +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/maxwell/geforce_gtx_750_ti.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/maxwell/geforce_gtx_750_ti.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/maxwell/geforce_gtx_750_ti.hpp new file mode 100644 index 0000000..2c3f080 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/maxwell/geforce_gtx_750_ti.hpp @@ -0,0 +1,85 @@ +#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_NVIDIA_3_GEFORCE_GTX_750_TI_HPP_ +#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_NVIDIA_3_GEFORCE_GTX_750_TI_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +#include "viennacl/device_specific/templates/matrix_product_template.hpp" + + +#include "viennacl/device_specific/forwards.h" +#include "viennacl/device_specific/builtin_database/common.hpp" + +namespace viennacl{ +namespace device_specific{ +namespace builtin_database{ +namespace devices{ +namespace gpu{ +namespace nvidia{ +namespace maxwell{ +namespace geforce_gtx_750_ti{ + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>) +{ + db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::maxwell, "GeForce GTX 750 Ti", matrix_product_template::parameters_type(1,2,8,128,1,1,2,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_STRIDED,0,0)); +} + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>) +{ + db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::maxwell, "GeForce GTX 750 Ti", matrix_product_template::parameters_type(1,8,32,32,2,1,2,FETCH_FROM_GLOBAL_CONTIGUOUS,FETCH_FROM_GLOBAL_CONTIGUOUS,0,0)); +} + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>) +{ + db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::maxwell, "GeForce GTX 750 Ti", matrix_product_template::parameters_type(1,16,8,32,1,8,2,FETCH_FROM_GLOBAL_CONTIGUOUS,FETCH_FROM_GLOBAL_CONTIGUOUS,0,0)); +} + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>) +{ + db.add_8B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::maxwell, "GeForce GTX 750 Ti", matrix_product_template::parameters_type(1,16,8,32,1,2,2,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_STRIDED,0,0)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::maxwell, "GeForce GTX 750 Ti", matrix_product_template::parameters_type(1,4,32,16,8,2,4,FETCH_FROM_LOCAL,FETCH_FROM_GLOBAL_STRIDED,16,4)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::maxwell, "GeForce GTX 750 Ti", matrix_product_template::parameters_type(1,16,16,16,4,2,8,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,16,16)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::maxwell, "GeForce GTX 750 Ti", matrix_product_template::parameters_type(1,16,16,32,4,8,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,32,16)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::maxwell, "GeForce GTX 750 Ti", matrix_product_template::parameters_type(1,16,16,16,8,4,4,FETCH_FROM_LOCAL,FETCH_FROM_LOCAL,16,16)); +} + + +} +} +} +} +} +} +} +} +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/tesla/geforce_gtx_260.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/tesla/geforce_gtx_260.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/tesla/geforce_gtx_260.hpp new file mode 100644 index 0000000..88dd596 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/devices/gpu/nvidia/tesla/geforce_gtx_260.hpp @@ -0,0 +1,84 @@ +#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_NVIDIA_TESLA_HPP_ +#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_DEVICES_GPU_NVIDIA_TESLA_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +#include "viennacl/device_specific/templates/matrix_product_template.hpp" + +#include "viennacl/device_specific/forwards.h" +#include "viennacl/device_specific/builtin_database/common.hpp" + +namespace viennacl{ +namespace device_specific{ +namespace builtin_database{ +namespace devices{ +namespace gpu{ +namespace nvidia{ +namespace tesla{ +namespace geforce_gtx_260{ + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::tesla, "GeForce GTX 260", matrix_product_template::parameters_type(1,32,2,16,1,1,4,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_CONTIGUOUS,0,0)); +} + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::tesla, "GeForce GTX 260", matrix_product_template::parameters_type(1,32,2,16,1,1,4,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_CONTIGUOUS,0,0)); +} + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::tesla, "GeForce GTX 260", matrix_product_template::parameters_type(1,32,2,16,1,1,4,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_CONTIGUOUS,0,0)); +} + +inline void add_8B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::tesla, "GeForce GTX 260", matrix_product_template::parameters_type(1,32,2,16,1,1,4,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_CONTIGUOUS,0,0)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'T'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::tesla, "GeForce GTX 260", matrix_product_template::parameters_type(1,16,2,16,1,1,4,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_CONTIGUOUS,0,0)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'T'>, char_to_type<'N'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::tesla, "GeForce GTX 260", matrix_product_template::parameters_type(1,16,2,16,1,1,4,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_CONTIGUOUS,0,0)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'T'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::tesla, "GeForce GTX 260", matrix_product_template::parameters_type(1,16,2,16,1,1,4,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_CONTIGUOUS,0,0)); +} + +inline void add_4B(database_type<matrix_product_template::parameters_type> & db, char_to_type<'N'>, char_to_type<'N'>) +{ + db.add_4B(nvidia_id, CL_DEVICE_TYPE_GPU, ocl::tesla, "GeForce GTX 260", matrix_product_template::parameters_type(1,16,2,16,1,1,4,FETCH_FROM_GLOBAL_STRIDED,FETCH_FROM_GLOBAL_CONTIGUOUS,0,0)); +} + + +} +} +} +} +} +} +} +} +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/matrix_product.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/matrix_product.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/matrix_product.hpp new file mode 100644 index 0000000..4437956 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/builtin_database/matrix_product.hpp @@ -0,0 +1,244 @@ +#ifndef VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_MATRIX_PRODUCT_HPP_ +#define VIENNACL_DEVICE_SPECIFIC_BUILTIN_DATABASE_MATRIX_PRODUCT_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +#include "viennacl/device_specific/builtin_database/devices/gpu/amd/northern_islands/barts.hpp" + +#include "viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/tesla_c2050.hpp" + +#include "viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gtx_470.hpp" + +#include "viennacl/device_specific/builtin_database/devices/gpu/nvidia/maxwell/geforce_gtx_750_ti.hpp" + +#include "viennacl/device_specific/builtin_database/devices/gpu/amd/northern_islands/scrapper.hpp" + +#include "viennacl/device_specific/builtin_database/devices/gpu/nvidia/tesla/geforce_gtx_260.hpp" + +#include "viennacl/device_specific/builtin_database/devices/gpu/amd/southern_islands/tahiti.hpp" +#include "viennacl/device_specific/builtin_database/devices/gpu/amd/northern_islands/devastator.hpp" + +#include "viennacl/device_specific/builtin_database/devices/gpu/nvidia/kepler/tesla_k20m.hpp" +#include "viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gtx_580.hpp" + +#include "viennacl/device_specific/builtin_database/devices/gpu/amd/volcanic_islands/hawaii.hpp" + +#include "viennacl/device_specific/builtin_database/devices/gpu/amd/evergreen/cypress.hpp" +#include "viennacl/device_specific/builtin_database/devices/gpu/amd/evergreen/cedar.hpp" + + +#include "viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gt_540m.hpp" + + +#include "viennacl/ocl/device_utils.hpp" +#include "viennacl/scheduler/forwards.h" + +#include "viennacl/device_specific/builtin_database/devices/accelerator/fallback.hpp" +#include "viennacl/device_specific/builtin_database/devices/cpu/fallback.hpp" +#include "viennacl/device_specific/builtin_database/devices/gpu/fallback.hpp" +#include "viennacl/device_specific/builtin_database/devices/gpu/nvidia/fermi/geforce_gt_540m.hpp" + +/** @file viennacl/device_specific/builtin_database/matrix_product.hpp +* +* Initializes the device database with the provided profiles. Updated semi-automatically. +*/ + +namespace viennacl +{ +namespace device_specific +{ +namespace builtin_database +{ + +inline database_type<matrix_product_template::parameters_type> init_matrix_product_N_N() +{ + database_type<matrix_product_template::parameters_type> result; + + devices::accelerator::fallback::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>()); + devices::accelerator::fallback::add_8B(result, char_to_type<'N'>(), char_to_type<'N'>()); + + devices::cpu::fallback::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>()); + devices::cpu::fallback::add_8B(result, char_to_type<'N'>(), char_to_type<'N'>()); + + devices::gpu::fallback::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>()); + devices::gpu::fallback::add_8B(result, char_to_type<'N'>(), char_to_type<'N'>()); + + devices::gpu::amd::evergreen::cedar::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>()); + devices::gpu::amd::evergreen::cypress::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>()); + devices::gpu::amd::volcanic_islands::hawaii::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>()); + devices::gpu::amd::volcanic_islands::hawaii::add_8B(result, char_to_type<'N'>(), char_to_type<'N'>()); + devices::gpu::nvidia::fermi::geforce_gtx_580::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>()); + devices::gpu::nvidia::fermi::geforce_gtx_580::add_8B(result, char_to_type<'N'>(), char_to_type<'N'>()); + devices::gpu::nvidia::kepler::tesla_k20m::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>()); + devices::gpu::nvidia::kepler::tesla_k20m::add_8B(result, char_to_type<'N'>(), char_to_type<'N'>()); + devices::gpu::amd::southern_islands::tahiti::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>()); + devices::gpu::amd::southern_islands::tahiti::add_8B(result, char_to_type<'N'>(), char_to_type<'N'>()); + devices::gpu::amd::northern_islands::devastator::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>()); + devices::gpu::nvidia::tesla::geforce_gtx_260::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>()); + devices::gpu::amd::northern_islands::scrapper::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>()); + devices::gpu::nvidia::maxwell::geforce_gtx_750_ti::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>()); + devices::gpu::nvidia::maxwell::geforce_gtx_750_ti::add_8B(result, char_to_type<'N'>(), char_to_type<'N'>()); + devices::gpu::nvidia::fermi::geforce_gtx_470::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>()); + devices::gpu::nvidia::fermi::geforce_gtx_470::add_8B(result, char_to_type<'N'>(), char_to_type<'N'>()); + devices::gpu::nvidia::fermi::tesla_c2050::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>()); + devices::gpu::nvidia::fermi::tesla_c2050::add_8B(result, char_to_type<'N'>(), char_to_type<'N'>()); + devices::gpu::amd::northern_islands::barts::add_4B(result, char_to_type<'N'>(), char_to_type<'N'>()); + + return result; +} + +inline database_type<matrix_product_template::parameters_type> init_matrix_product_T_N() +{ + database_type<matrix_product_template::parameters_type> result; + + devices::accelerator::fallback::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>()); + devices::accelerator::fallback::add_8B(result, char_to_type<'T'>(), char_to_type<'N'>()); + + devices::cpu::fallback::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>()); + devices::cpu::fallback::add_8B(result, char_to_type<'T'>(), char_to_type<'N'>()); + + devices::gpu::fallback::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>()); + devices::gpu::fallback::add_8B(result, char_to_type<'T'>(), char_to_type<'N'>()); + devices::gpu::nvidia::fermi::geforce_gt_540m::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>()); + devices::gpu::amd::evergreen::cedar::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>()); + devices::gpu::amd::evergreen::cypress::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>()); + devices::gpu::amd::volcanic_islands::hawaii::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>()); + devices::gpu::amd::volcanic_islands::hawaii::add_8B(result, char_to_type<'T'>(), char_to_type<'N'>()); + devices::gpu::nvidia::fermi::geforce_gtx_580::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>()); + devices::gpu::nvidia::fermi::geforce_gtx_580::add_8B(result, char_to_type<'T'>(), char_to_type<'N'>()); + devices::gpu::nvidia::kepler::tesla_k20m::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>()); + devices::gpu::nvidia::kepler::tesla_k20m::add_8B(result, char_to_type<'T'>(), char_to_type<'N'>()); + devices::gpu::amd::southern_islands::tahiti::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>()); + devices::gpu::amd::southern_islands::tahiti::add_8B(result, char_to_type<'T'>(), char_to_type<'N'>()); + devices::gpu::amd::northern_islands::devastator::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>()); + devices::gpu::nvidia::tesla::geforce_gtx_260::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>()); + devices::gpu::amd::northern_islands::scrapper::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>()); + devices::gpu::nvidia::maxwell::geforce_gtx_750_ti::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>()); + devices::gpu::nvidia::maxwell::geforce_gtx_750_ti::add_8B(result, char_to_type<'T'>(), char_to_type<'N'>()); + devices::gpu::nvidia::fermi::geforce_gtx_470::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>()); + devices::gpu::nvidia::fermi::geforce_gtx_470::add_8B(result, char_to_type<'T'>(), char_to_type<'N'>()); + devices::gpu::nvidia::fermi::tesla_c2050::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>()); + devices::gpu::nvidia::fermi::tesla_c2050::add_8B(result, char_to_type<'T'>(), char_to_type<'N'>()); + devices::gpu::amd::northern_islands::barts::add_4B(result, char_to_type<'T'>(), char_to_type<'N'>()); + + return result; +} + +inline database_type<matrix_product_template::parameters_type> init_matrix_product_N_T() +{ + database_type<matrix_product_template::parameters_type> result; + + devices::accelerator::fallback::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>()); + devices::accelerator::fallback::add_8B(result, char_to_type<'N'>(), char_to_type<'T'>()); + + devices::cpu::fallback::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>()); + devices::cpu::fallback::add_8B(result, char_to_type<'N'>(), char_to_type<'T'>()); + + devices::gpu::fallback::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>()); + devices::gpu::fallback::add_8B(result, char_to_type<'N'>(), char_to_type<'T'>()); + + devices::gpu::nvidia::fermi::geforce_gt_540m::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>()); + devices::gpu::amd::evergreen::cedar::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>()); + devices::gpu::amd::evergreen::cypress::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>()); + devices::gpu::amd::volcanic_islands::hawaii::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>()); + devices::gpu::amd::volcanic_islands::hawaii::add_8B(result, char_to_type<'N'>(), char_to_type<'T'>()); + devices::gpu::nvidia::fermi::geforce_gtx_580::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>()); + devices::gpu::nvidia::fermi::geforce_gtx_580::add_8B(result, char_to_type<'N'>(), char_to_type<'T'>()); + devices::gpu::nvidia::kepler::tesla_k20m::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>()); + devices::gpu::nvidia::kepler::tesla_k20m::add_8B(result, char_to_type<'N'>(), char_to_type<'T'>()); + devices::gpu::amd::southern_islands::tahiti::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>()); + devices::gpu::amd::southern_islands::tahiti::add_8B(result, char_to_type<'N'>(), char_to_type<'T'>()); + devices::gpu::amd::northern_islands::devastator::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>()); + devices::gpu::nvidia::tesla::geforce_gtx_260::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>()); + + + devices::gpu::amd::northern_islands::scrapper::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>()); + devices::gpu::nvidia::maxwell::geforce_gtx_750_ti::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>()); + devices::gpu::nvidia::maxwell::geforce_gtx_750_ti::add_8B(result, char_to_type<'N'>(), char_to_type<'T'>()); + devices::gpu::nvidia::fermi::geforce_gtx_470::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>()); + devices::gpu::nvidia::fermi::geforce_gtx_470::add_8B(result, char_to_type<'N'>(), char_to_type<'T'>()); + devices::gpu::nvidia::fermi::tesla_c2050::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>()); + devices::gpu::nvidia::fermi::tesla_c2050::add_8B(result, char_to_type<'N'>(), char_to_type<'T'>()); + devices::gpu::amd::northern_islands::barts::add_4B(result, char_to_type<'N'>(), char_to_type<'T'>()); + + return result; +} + +inline database_type<matrix_product_template::parameters_type> init_matrix_product_T_T() +{ + database_type<matrix_product_template::parameters_type> result; + + devices::accelerator::fallback::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>()); + devices::accelerator::fallback::add_8B(result, char_to_type<'T'>(), char_to_type<'T'>()); + + devices::cpu::fallback::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>()); + devices::cpu::fallback::add_8B(result, char_to_type<'T'>(), char_to_type<'T'>()); + + devices::gpu::fallback::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>()); + devices::gpu::fallback::add_8B(result, char_to_type<'T'>(), char_to_type<'T'>()); + devices::gpu::amd::evergreen::cedar::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>()); + devices::gpu::amd::evergreen::cypress::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>()); + devices::gpu::amd::volcanic_islands::hawaii::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>()); + devices::gpu::amd::volcanic_islands::hawaii::add_8B(result, char_to_type<'T'>(), char_to_type<'T'>()); + devices::gpu::nvidia::fermi::geforce_gtx_580::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>()); + devices::gpu::nvidia::fermi::geforce_gtx_580::add_8B(result, char_to_type<'T'>(), char_to_type<'T'>()); + devices::gpu::nvidia::kepler::tesla_k20m::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>()); + devices::gpu::nvidia::kepler::tesla_k20m::add_8B(result, char_to_type<'T'>(), char_to_type<'T'>()); + devices::gpu::amd::southern_islands::tahiti::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>()); + devices::gpu::amd::southern_islands::tahiti::add_8B(result, char_to_type<'T'>(), char_to_type<'T'>()); + devices::gpu::amd::northern_islands::devastator::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>()); + devices::gpu::nvidia::tesla::geforce_gtx_260::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>()); + devices::gpu::amd::northern_islands::scrapper::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>()); + devices::gpu::nvidia::maxwell::geforce_gtx_750_ti::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>()); + devices::gpu::nvidia::maxwell::geforce_gtx_750_ti::add_8B(result, char_to_type<'T'>(), char_to_type<'T'>()); + devices::gpu::nvidia::fermi::geforce_gtx_470::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>()); + devices::gpu::nvidia::fermi::geforce_gtx_470::add_8B(result, char_to_type<'T'>(), char_to_type<'T'>()); + devices::gpu::nvidia::fermi::tesla_c2050::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>()); + devices::gpu::nvidia::fermi::tesla_c2050::add_8B(result, char_to_type<'T'>(), char_to_type<'T'>()); + devices::gpu::amd::northern_islands::barts::add_4B(result, char_to_type<'T'>(), char_to_type<'T'>()); + + return result; +} + +static database_type<matrix_product_template::parameters_type> matrix_product_N_N = init_matrix_product_N_N(); +static database_type<matrix_product_template::parameters_type> matrix_product_T_N = init_matrix_product_T_N(); +static database_type<matrix_product_template::parameters_type> matrix_product_N_T = init_matrix_product_N_T(); +static database_type<matrix_product_template::parameters_type> matrix_product_T_T = init_matrix_product_T_T(); + +template<class NumericT> +matrix_product_template::parameters_type const & matrix_product_params(ocl::device const & device, char A_trans, char B_trans) +{ + assert(A_trans=='N' || A_trans=='T'); + assert(B_trans=='N' || B_trans=='T'); + database_type<matrix_product_template::parameters_type> * db; + if (A_trans=='N' && B_trans=='N') + db = &matrix_product_N_N; + else if (A_trans=='T' && B_trans=='N') + db = &matrix_product_T_N; + else if (A_trans=='N' && B_trans=='T') + db = &matrix_product_N_T; + else + db = &matrix_product_T_T; + return get_parameters<NumericT>(*db, device); +} + + +} +} +} +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/execute.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/execute.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/execute.hpp new file mode 100644 index 0000000..2f4960a --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/execute.hpp @@ -0,0 +1,55 @@ +#ifndef VIENNACL_DEVICE_SPECIFIC_EXECUTE_HPP +#define VIENNACL_DEVICE_SPECIFIC_EXECUTE_HPP + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + + +/** @file viennacl/device_specific/execute.hpp + @brief the user interface for the code generator +*/ + +#include <cstring> +#include <vector> +#include <typeinfo> + +#include "viennacl/scheduler/forwards.h" +#include "viennacl/device_specific/forwards.h" +#include "viennacl/device_specific/templates/template_base.hpp" +#include "viennacl/device_specific/tree_parsing.hpp" +#include "viennacl/device_specific/execution_handler.hpp" + +#include "viennacl/tools/tools.hpp" +#include "viennacl/tools/timer.hpp" + +namespace viennacl +{ +namespace device_specific +{ + +inline void execute(template_base const & T, statements_container const & statements, viennacl::ocl::context & ctx = viennacl::ocl::current_context(), bool force_compilation = false) +{ + //Generate program name + std::string program_name = tree_parsing::statements_representation(statements, BIND_TO_HANDLE); + execution_handler handler(program_name, ctx, ctx.current_device(), force_compilation); + handler.add(program_name, T, statements); + handler.execute(program_name, statements); +} + +} +} +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/execution_handler.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/execution_handler.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/execution_handler.hpp new file mode 100644 index 0000000..8f725fd --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/execution_handler.hpp @@ -0,0 +1,102 @@ +#ifndef VIENNACL_DEVICE_SPECIFIC_EXECUTION_HANDLER_HPP +#define VIENNACL_DEVICE_SPECIFIC_EXECUTION_HANDLER_HPP + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + + +/** @file viennacl/device_specific/execution_handler.hpp + @brief Helper for handling fallbacks, lazy compilation, input-dependent kernels, etc +*/ + +#include <map> + +#include "viennacl/tools/shared_ptr.hpp" + +#include "viennacl/device_specific/lazy_program_compiler.hpp" +#include "viennacl/device_specific/templates/template_base.hpp" +#include "viennacl/device_specific/utils.hpp" + +namespace viennacl +{ +namespace device_specific +{ + +class execution_handler +{ +public: + typedef std::map< std::string, tools::shared_ptr<template_base> > container_type; + +private: + std::string append_prefix(std::string const & str) + { + return "_" + str; + } + + std::string define_extension(std::string const & ext) + { + // Note: On devices without double precision support, 'ext' is an empty string. + return (ext.length() > 1) ? std::string("#pragma OPENCL EXTENSION " + ext + " : enable\n") : std::string("\n"); + } + + void init_program_compiler(std::string const & name, bool force_recompilation) + { + lazy_programs_.push_back(lazy_program_compiler(&ctx_, name, force_recompilation)); + lazy_programs_.back().add(define_extension(device_.double_support_extension())); + } + +public: + execution_handler(std::string const & program_name_base, viennacl::ocl::context & ctx, viennacl::ocl::device const & device, bool force_recompilation = false) : ctx_(ctx), device_(device), program_names_(2) + { + lazy_programs_.reserve(2); + init_program_compiler(program_name_base + "_0", force_recompilation); + init_program_compiler(program_name_base + "_1", force_recompilation); + } + + void add(std::string const & key, template_base const & T, statements_container const & statements) + { + if (kernels_.insert(container_type::value_type(key, T.clone())).second) + { + std::vector<std::string> sources = at(kernels_, key)->generate(append_prefix(key), statements, device_); + assert(sources.size()<=2); + for (unsigned int i = 0; i < sources.size(); ++i) + lazy_programs_[i].add(sources[i]); + } + } + + template_base * template_of(std::string const & key) + { + return at(kernels_, key).get(); + } + + void execute(container_type::key_type const & key, statements_container const & statements) + { + tools::shared_ptr<template_base> & template_pointer = at(kernels_, key); + template_pointer->enqueue(append_prefix(key), lazy_programs_, statements); + } + +private: + viennacl::ocl::context & ctx_; + viennacl::ocl::device const & device_; + container_type kernels_; + std::vector<std::string> program_names_; + std::vector<lazy_program_compiler> lazy_programs_; +}; + +} +} +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/forwards.h ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/forwards.h b/native-viennaCL/src/main/cpp/viennacl/device_specific/forwards.h new file mode 100644 index 0000000..590ed1f --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/forwards.h @@ -0,0 +1,294 @@ +#ifndef VIENNACL_DEVICE_SPECIFIC_FORWARDS_H +#define VIENNACL_DEVICE_SPECIFIC_FORWARDS_H + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + + +/** @file viennacl/device_specific/forwards.h + @brief Forwards declaration +*/ + +#include <list> +#include <map> +#include <set> +#include <stdexcept> + +#include "viennacl/scheduler/io.hpp" + +#include "viennacl/ocl/forwards.h" +#include "viennacl/tools/shared_ptr.hpp" +#include "viennacl/scheduler/forwards.h" + +#include "viennacl/backend/mem_handle.hpp" + +namespace viennacl +{ +namespace device_specific +{ + +//Error codes +static const int TEMPLATE_VALID = 0; +static const int TEMPLATE_LOCAL_MEMORY_OVERFLOW = -1; +static const int TEMPLATE_WORK_GROUP_SIZE_OVERFLOW = -2; +static const int TEMPLATE_LOCAL_SIZE_0_OVERFLOW = -3; +static const int TEMPLATE_LOCAL_SIZE_1_OVERFLOW = -4; +static const int TEMPLATE_LOCAL_SIZE_2_OVERFLOW = -5; +static const int TEMPLATE_LOCAL_SIZE_NOT_WARP_MULTIPLE = -6; +static const int TEMPLATE_INVALID_SIMD_WIDTH = -7; +static const int TEMPLATE_INVALID_FETCHING_POLICY_TYPE= -9; + +static const int TEMPLATE_GLOBAL_MEMORY_REQUIRES_ZERO_LOCAL_FETCH = -10; +static const int TEMPLATE_MS_NS_MUST_BE_SIMD_WIDTH_MULTIPLE = -11; +static const int TEMPLATE_KS_MUST_BE_SMALLER_THAN_KL = -12; +static const int TEMPLATE_SIMD_WIDTH_MUST_BE_ONE = -13; +static const int TEMPLATE_LOCAL_FETCH_PRODUCT_MUST_MATCH_LOCAL_SIZE_PRODUCT = -14; +static const int TEMPLATE_LOCAL_FETCH_0_MUST_BE_KL_MULTIPLE = -15; +static const int TEMPLATE_LOCAL_FETCH_0_MUST_BE_NL_MULTIPLE = -16; +static const int TEMPLATE_LOCAL_FETCH_1_MUST_BE_KL_MULTIPLE = -17; +static const int TEMPLATE_LOCAL_FETCH_1_MUST_BE_ML_MULTIPLE = -18; + +struct index_tuple +{ + index_tuple(std::string const & _i, std::string const & _bound0) : i(_i), bound0(_bound0), j(""), bound1(""){ } + index_tuple(std::string const & _i, std::string const & _bound0, std::string const & _j, std::string const & _bound1) : i(_i), bound0(_bound0), j(_j), bound1(_bound1){ } + std::string i; + std::string bound0; + std::string j; + std::string bound1; +}; + +inline bool is_scalar_reduction(scheduler::statement_node const & node) +{ + return node.op.type==scheduler::OPERATION_BINARY_INNER_PROD_TYPE || node.op.type_family==scheduler::OPERATION_VECTOR_REDUCTION_TYPE_FAMILY; +} + +inline bool is_vector_reduction(scheduler::statement_node const & node) +{ + return node.op.type==scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE + || node.op.type_family==scheduler::OPERATION_ROWS_REDUCTION_TYPE_FAMILY + || node.op.type_family==scheduler::OPERATION_COLUMNS_REDUCTION_TYPE_FAMILY; +} + +inline scheduler::statement_node const & lhs_most(scheduler::statement::container_type const & array, vcl_size_t root) +{ + scheduler::statement_node const * current = &array[root]; + while (current->lhs.type_family==scheduler::COMPOSITE_OPERATION_FAMILY) + current = &array[current->lhs.node_index]; + return *current; +} + +enum expression_type +{ + SCALAR_AXPY_TYPE, + VECTOR_AXPY_TYPE, + MATRIX_AXPY_TYPE, + REDUCTION_TYPE, + ROW_WISE_REDUCTION_Nx_TYPE, + ROW_WISE_REDUCTION_Tx_TYPE, + MATRIX_PRODUCT_NN_TYPE, + MATRIX_PRODUCT_TN_TYPE, + MATRIX_PRODUCT_NT_TYPE, + MATRIX_PRODUCT_TT_TYPE, + INVALID_EXPRESSION_TYPE +}; + +inline const char * expression_type_to_string(expression_type type) +{ + switch (type) + { + case SCALAR_AXPY_TYPE : return "Scalar AXPY"; + case VECTOR_AXPY_TYPE : return "Vector AXPY"; + case MATRIX_AXPY_TYPE : return "Matrix AXPY"; + case REDUCTION_TYPE : return "Reduction"; + case ROW_WISE_REDUCTION_Nx_TYPE : return "Row-wise reduction: Ax"; + case ROW_WISE_REDUCTION_Tx_TYPE : return "Row-wise reduction : Tx"; + case MATRIX_PRODUCT_NN_TYPE : return "Matrix-Matrix Product : AA"; + case MATRIX_PRODUCT_TN_TYPE : return "Matrix-Matrix Product : TA"; + case MATRIX_PRODUCT_NT_TYPE : return "Matrix-Matrix Product : AT"; + case MATRIX_PRODUCT_TT_TYPE : return "Matrix-Matrix Product : TT"; + default : return "INVALID EXPRESSION"; + } +} + +/** @brief generate the string for a pointer kernel argument */ +static std::string generate_value_kernel_argument(std::string const & scalartype, std::string const & name) +{ + return scalartype + ' ' + name + ","; +} + +/** @brief generate the string for a pointer kernel argument */ +static std::string generate_pointer_kernel_argument(std::string const & address_space, std::string const & scalartype, std::string const & name) +{ + return address_space + " " + scalartype + "* " + name + ","; +} + +/** @brief Emulation of C++11's .at() member for std::map<>, const-version */ +template<typename KeyT, typename ValueT> +ValueT const & at(std::map<KeyT, ValueT> const & map, KeyT const & key) +{ + typename std::map<KeyT, ValueT>::const_iterator it = map.find(key); + if (it != map.end()) + return it->second; + + throw std::out_of_range("Generator: Key not found in map"); +} + +/** @brief Emulation of C++11's .at() member for std::map<>, non-const version */ +template<typename KeyT, typename ValueT> +ValueT & at(std::map<KeyT, ValueT> & map, KeyT const & key) +{ + typename std::map<KeyT, ValueT>::iterator it = map.find(key); + if (it != map.end()) + return it->second; + + throw std::out_of_range("Generator: Key not found in map"); +} + +/** @brief Exception for the case the generator is unable to deal with the operation */ +class generator_not_supported_exception : public std::exception +{ +public: + generator_not_supported_exception() : message_() {} + generator_not_supported_exception(std::string message) : message_("ViennaCL: Internal error: The generator cannot handle the statement provided: " + message) {} + virtual const char* what() const throw() { return message_.c_str(); } + virtual ~generator_not_supported_exception() throw() {} +private: + std::string message_; +}; + +namespace utils +{ + class kernel_generation_stream; +} + + +enum leaf_t +{ + LHS_NODE_TYPE, + PARENT_NODE_TYPE, + RHS_NODE_TYPE +}; + +class mapped_object; +class template_base; + +typedef std::pair<vcl_size_t, leaf_t> mapping_key; +typedef std::map<mapping_key, tools::shared_ptr<mapped_object> > mapping_type; + + +namespace tree_parsing +{ + + template<class Fun> + inline void traverse(scheduler::statement const & statement, vcl_size_t root_idx, Fun const & fun, bool inspect); + + inline void process(utils::kernel_generation_stream & stream, leaf_t leaf, std::string const & type_key, std::string const & to_process, + scheduler::statement const & statement, vcl_size_t root_idx, mapping_type const & mapping, std::set<std::string> & already_processed); + inline std::string evaluate(leaf_t leaf, std::map<std::string, std::string> const & accessors, scheduler::statement const & statement, vcl_size_t root_idx,mapping_type const & mapping); +} + +using scheduler::INT_TYPE; +using scheduler::UINT_TYPE; +using scheduler::ULONG_TYPE; +using scheduler::LONG_TYPE; +using scheduler::FLOAT_TYPE; +using scheduler::DOUBLE_TYPE; + +typedef cl_uint vendor_id_type; +typedef cl_device_type device_type; +typedef std::string device_name_type; + +class symbolic_binder +{ +public: + virtual ~symbolic_binder(){ } + virtual bool bind(viennacl::backend::mem_handle const * ph) = 0; + virtual unsigned int get(viennacl::backend::mem_handle const * ph) = 0; +}; + +class bind_to_handle : public symbolic_binder +{ +public: + bind_to_handle() : current_arg_(0){ } + bool bind(viennacl::backend::mem_handle const * ph) {return (ph==NULL)?true:memory.insert(std::make_pair((void*)ph, current_arg_)).second; } + unsigned int get(viennacl::backend::mem_handle const * ph){ return bind(ph) ? current_arg_++ : at(memory, (void*)ph); } +private: + unsigned int current_arg_; + std::map<void*,unsigned int> memory; +}; + +class bind_all_unique : public symbolic_binder +{ +public: + bind_all_unique() : current_arg_(0){ } + bool bind(viennacl::backend::mem_handle const *) {return true; } + unsigned int get(viennacl::backend::mem_handle const *){ return current_arg_++; } +private: + unsigned int current_arg_; + std::map<void*,unsigned int> memory; +}; + +enum binding_policy_t{ + BIND_ALL_UNIQUE, + BIND_TO_HANDLE +}; + +inline tools::shared_ptr<symbolic_binder> make_binder(binding_policy_t policy) +{ + if (policy==BIND_TO_HANDLE) + return tools::shared_ptr<symbolic_binder>(new bind_to_handle()); + else + return tools::shared_ptr<symbolic_binder>(new bind_all_unique()); +} + +template<char C> +struct char_to_type{ }; + +class statements_container +{ +public: + typedef std::list<scheduler::statement> data_type; + enum order_type { SEQUENTIAL, INDEPENDENT }; + + statements_container(data_type const & data, order_type order) : data_(data), order_(order) + { } + + statements_container(scheduler::statement const & s0) : order_(INDEPENDENT) + { + data_.push_back(s0); + } + + statements_container(scheduler::statement const & s0, scheduler::statement const & s1, order_type order) : order_(order) + { + data_.push_back(s0); + data_.push_back(s1); + } + + std::list<scheduler::statement> const & data() const { return data_; } + + order_type order() const { return order_; } + +private: + std::list<scheduler::statement> data_; + order_type order_; +}; + +} + +} +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/device_specific/lazy_program_compiler.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/device_specific/lazy_program_compiler.hpp b/native-viennaCL/src/main/cpp/viennacl/device_specific/lazy_program_compiler.hpp new file mode 100644 index 0000000..3e75b9b --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/device_specific/lazy_program_compiler.hpp @@ -0,0 +1,74 @@ +#ifndef VIENNACL_DEVICE_SPECIFIC_LAZY_PROGRAM_COMPILER_HPP +#define VIENNACL_DEVICE_SPECIFIC_LAZY_PROGRAM_COMPILER_HPP + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + + +/** @file viennacl/device_specific/lazy_program_compiler.hpp + @brief Helper for compiling a program lazily +*/ + +#include <map> + +#include "viennacl/ocl/context.hpp" + +namespace viennacl +{ + +namespace device_specific +{ + + class lazy_program_compiler + { + public: + + lazy_program_compiler(viennacl::ocl::context * ctx, std::string const & name, std::string const & src, bool force_recompilation) : ctx_(ctx), name_(name), src_(src), force_recompilation_(force_recompilation){ } + lazy_program_compiler(viennacl::ocl::context * ctx, std::string const & name, bool force_recompilation) : ctx_(ctx), name_(name), force_recompilation_(force_recompilation){ } + + void add(std::string const & src) { src_+=src; } + + std::string const & src() const { return src_; } + + viennacl::ocl::program & program() + { + if (force_recompilation_ && ctx_->has_program(name_)) + ctx_->delete_program(name_); + if (!ctx_->has_program(name_)) + { +#ifdef VIENNACL_BUILD_INFO + std::cerr << "Creating program " << program_name << std::endl; +#endif + ctx_->add_program(src_, name_); +#ifdef VIENNACL_BUILD_INFO + std::cerr << "Done creating program " << program_name << std::endl; +#endif + } + return ctx_->get_program(name_); + } + + private: + viennacl::ocl::context * ctx_; + std::string name_; + std::string src_; + bool force_recompilation_; + }; + +} + +} +#endif
