Hi Forum,

I would like to thank Carlos to share his code with me of his attempt to bring 
OSG and OpenCL together. I tried the code but i am having a segmentation fault. 
So i am putting the code here for all your kind review and share with the forum 
your ideas here:


Code:

#include <iostream>
#include <osg/GL>
#include <osgGA/TrackballManipulator>
#include <osgViewer/Viewer>
#include <osgViewer/ViewerEventHandlers>
#include <osgViewer/api/X11/GraphicsWindowX11>
#include <cassert>

#define __CL_ENABLE_EXCEPTIONS
#include <CL/cl.hpp>

const int nElements = 4;

osg::ref_ptr<osg::Geode> CreateGeometry()
{
        osg::Geometry * g = new osg::Geometry;
        g->setUseDisplayList(false);
        g->setUseVertexBufferObjects(true);
        g->getOrCreateVertexBufferObject()->setUsage(GL_STATIC_DRAW);

        osg::DrawElementsUInt * puint = new osg::DrawElementsUInt(GL_TRIANGLES);
        puint->addElement(0);
        puint->addElement(1);
        puint->addElement(2);
        g->addPrimitiveSet(puint);

        osg::Vec4Array * varray = new osg::Vec4Array(nElements);
        (*varray)[0]= osg::Vec4f(0,0,0,1);
        (*varray)[1] = osg::Vec4f(0,0,1,1);
        (*varray)[2] = osg::Vec4f(1,0,0,1);
        g->setVertexArray(varray);

        osg::ref_ptr<osg::Geode> ans = new osg::Geode;
        ans->addDrawable(g);
        return ans;
}

const std::string rotateprog =
        "__kernel void ROT(global float4 * v,float dt) {\n"
        "       int i = get_global_id(0);\n"
        "       v[i].x = v[i].x - v[i].z*dt;\n"
        "       v[i].z = v[i].z + v[i].x*dt;\n"
        "}\n";

int main(int argc, char **argv) {

        try {
                osg::ref_ptr<osg::Group> root = new osg::Group;
                osgViewer::Viewer theViewer;

                //This is crucial
                theViewer.setThreadingModel( 
osgViewer::ViewerBase::SingleThreaded);

                theViewer.setUpViewInWindow(0,0,800,600);

                theViewer.setSceneData(root.get());
                theViewer.setCameraManipulator(new 
osgGA::TrackballManipulator());
                theViewer.addEventHandler(new osgViewer::StatsHandler);
                theViewer.realize();

                osg::ref_ptr<osg::Geode> g = CreateGeometry();
                root->addChild(g);
                theViewer.frame();

                std::vector<cl::Platform> platforms;
                cl::Platform::get(&platforms);

                std::vector<cl::Device> devices;
                platforms[0].getDevices(CL_DEVICE_TYPE_GPU,&devices);

                cl::Device dev = devices[0];

                osgViewer::GraphicsWindowX11 * X11Wnd = 
dynamic_cast<osgViewer::GraphicsWindowX11*>(theViewer.getCamera()->getGraphicsContext());


                if(!X11Wnd)
                  {
                    exit(EXIT_FAILURE);
                  }


                cl_context_properties prop[] = {
                        CL_GL_CONTEXT_KHR,(cl_context_properties) 
X11Wnd->getContext(),
                        
CL_GLX_DISPLAY_KHR,(cl_context_properties)X11Wnd->getDisplay(),
                        0
                };

                cl::Context cntx (devices,prop);
                cl::CommandQueue mainq (cntx,dev);

                int cntxtid = 
theViewer.getCamera()->getGraphicsContext()->getState()->getContextID();

                osg::Geometry * geo = 
dynamic_cast<osg::Geometry*>(g->getDrawable(0));
                osg::BufferObject * buff = geo->getOrCreateVertexBufferObject();
                osg::GLBufferObject * glbuff = buff->getGLBufferObject(cntxtid);

                int reqsize = buff->computeRequiredBufferSize();
                int needsize = nElements*sizeof(cl_float4);
                assert (reqsize == needsize);

//              cl::Program rProg (cntx,cl::Program::Sources ({ 
std::make_pair(rotateprog.c_str(),0)}));
//              rProg.build(devices,"-g");

                cl::Program::Sources srcs;
                srcs.push_back(std::make_pair(rotateprog.c_str(),0));
                cl::Program rProg(cntx,srcs);
                rProg.build(devices,"-g");

                cl::Kernel k (rProg,"ROT");
                assert(glbuff->getGLObjectID());

                //This may be a bug of my opencl implementation
                //Opencl documentation doesn't say if the user is required to 
make the GL context current, but i need this or it doesn't work.
                X11Wnd->makeCurrent();

                std::cout << "OBJID= " << glbuff->getGLObjectID() << std::endl;
                cl::BufferGL 
buffgl(cntx,CL_MEM_READ_WRITE,glbuff->getGLObjectID());

                while(!theViewer.done()) {
                        glFinish();
                        std::vector<cl::Memory> globjects;
                        globjects.push_back(buffgl);
                        mainq.enqueueAcquireGLObjects(&globjects);
                        k.setArg(0, buffgl());
                        k.setArg<cl_float>(1,0.1);
                        
mainq.enqueueNDRangeKernel(k,cl::NDRange(0),cl::NDRange(nElements),cl::NDRange());
                        mainq.enqueueReleaseGLObjects(&globjects);
                        mainq.finish();
                        theViewer.frame();
                }
                cl_float4 readback[4];
                
mainq.enqueueReadBuffer(buffgl,true,0,sizeof(readback),readback);

                for (int i=0; i < 4;++i) {
                        for (int j=0;j < 4;++j)
                                std::cout << readback[i].s[j] << " ";
                        std::cout << std::endl;
                }
        }
        catch(cl::Error e) {
                std::cout << " ERROR = " << e.err() << " in: " << e.what() << 
std::endl;
        }
    return 0;
}





I am having a segmentation fault while opencl context creation. Then i 
re-compiled OSG with C++11 support with the following addition within the 
CMakeLists.txt


Code:

IF(UNIX)
  SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -std=gnu++0x")
ENDIF()




I was suggested with the creator of this tiny application that re-compilation 
of the OSG with the mentioned support helped him to get rid of the problem that 
i am having now.

Unfortunately it did not. I tried to find out the reason with GDB and here goes 
the GDB backtrace output:


Code:

Program received signal SIGSEGV, Segmentation fault.
0xb5d13544 in ?? () from /usr/lib/libnvidia-glcore.so.319.60
(gdb) backtrace
#0  0xb5d13544 in ?? () from /usr/lib/libnvidia-glcore.so.319.60
#1  0xb5d0a81a in ?? () from /usr/lib/libnvidia-glcore.so.319.60
#2  0xb5d0aa2d in ?? () from /usr/lib/libnvidia-glcore.so.319.60
#3  0xb5cc5dc0 in ?? () from /usr/lib/libnvidia-glcore.so.319.60
#4  0xb77aaa0b in glcuR0d4nX () from /usr/lib/libGL.so.1
#5  0xb3389c38 in ?? () from /usr/lib/libnvidia-opencl.so.1
#6  0xb32c1018 in ?? () from /usr/lib/libnvidia-opencl.so.1
#7  0xb32c22df in ?? () from /usr/lib/libnvidia-opencl.so.1
#8  0xb32c296f in ?? () from /usr/lib/libnvidia-opencl.so.1
#9  0xb33a545a in ?? () from /usr/lib/libnvidia-opencl.so.1
#10 0xb33a4fac in ?? () from /usr/lib/libnvidia-opencl.so.1
#11 0xb7850056 in clCreateContext () from /usr/lib/libOpenCL.so.1
#12 0x0805226c in cl::Context::Context (this=0xbffff1a4, devices=..., 
properties=0xbffff13c, notifyFptr=0, data=0x0, err=0x0) at 
/usr/local/cuda/include/CL/cl.hpp:1435
#13 0x08050ce9 in main (argc=1, argv=0xbffff2a4) at main.cpp:94





I believe that there are some interested to bring OSG and OpenCL together. I 
earnestly request you to try the code on your system and provide your 
feed-back. That might give give you some insight what might have went wrong at 
my side.

I am providing more details of my system info :


Code:

OpenCL SW Info:

 CL_PLATFORM_NAME:      NVIDIA CUDA
 CL_PLATFORM_VERSION:   OpenCL 1.1 CUDA 4.2.1
 OpenCL SDK Revision:   7027912


OpenCL Device Info:

 1 devices found supporting OpenCL:

 ---------------------------------
 Device GeForce GTX 560M
 ---------------------------------
  CL_DEVICE_NAME:                       GeForce GTX 560M
  CL_DEVICE_VENDOR:                     NVIDIA Corporation
  CL_DRIVER_VERSION:                    319.60
  CL_DEVICE_VERSION:                    OpenCL 1.1 CUDA
  CL_DEVICE_OPENCL_C_VERSION:           OpenCL C 1.1 
  CL_DEVICE_TYPE:                       CL_DEVICE_TYPE_GPU
  CL_DEVICE_MAX_COMPUTE_UNITS:          4
  CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:   3
  CL_DEVICE_MAX_WORK_ITEM_SIZES:        1024 / 1024 / 64 
  CL_DEVICE_MAX_WORK_GROUP_SIZE:        1024
  CL_DEVICE_MAX_CLOCK_FREQUENCY:        1550 MHz
  CL_DEVICE_ADDRESS_BITS:               32
  CL_DEVICE_MAX_MEM_ALLOC_SIZE:         767 MByte
  CL_DEVICE_GLOBAL_MEM_SIZE:            3071 MByte
  CL_DEVICE_ERROR_CORRECTION_SUPPORT:   no
  CL_DEVICE_LOCAL_MEM_TYPE:             local
  CL_DEVICE_LOCAL_MEM_SIZE:             48 KByte
  CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:   64 KByte
  CL_DEVICE_QUEUE_PROPERTIES:           CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
  CL_DEVICE_QUEUE_PROPERTIES:           CL_QUEUE_PROFILING_ENABLE
  CL_DEVICE_IMAGE_SUPPORT:              1
  CL_DEVICE_MAX_READ_IMAGE_ARGS:        128
  CL_DEVICE_MAX_WRITE_IMAGE_ARGS:       8
  CL_DEVICE_SINGLE_FP_CONFIG:           denorms INF-quietNaNs round-to-nearest 
round-to-zero round-to-inf fma 

  CL_DEVICE_IMAGE <dim>                 2D_MAX_WIDTH     32768
                                        2D_MAX_HEIGHT    32768
                                        3D_MAX_WIDTH     2048
                                        3D_MAX_HEIGHT    2048
                                        3D_MAX_DEPTH     2048

  CL_DEVICE_EXTENSIONS:                 cl_khr_byte_addressable_store
                                        cl_khr_icd
                                        cl_khr_gl_sharing
                                        cl_nv_compiler_options
                                        cl_nv_device_attribute_query
                                        cl_nv_pragma_unroll
                                        cl_khr_global_int32_base_atomics
                                        cl_khr_global_int32_extended_atomics
                                        cl_khr_local_int32_base_atomics
                                        cl_khr_local_int32_extended_atomics
                                        cl_khr_fp64


  CL_DEVICE_COMPUTE_CAPABILITY_NV:      2.1
  NUMBER OF MULTIPROCESSORS:            4
  NUMBER OF CUDA CORES:                 192
  CL_DEVICE_REGISTERS_PER_BLOCK_NV:     32768
  CL_DEVICE_WARP_SIZE_NV:               32
  CL_DEVICE_GPU_OVERLAP_NV:             CL_TRUE
  CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV:     CL_TRUE
  CL_DEVICE_INTEGRATED_MEMORY_NV:       CL_FALSE
  CL_DEVICE_PREFERRED_VECTOR_WIDTH_<t>  CHAR 1, SHORT 1, INT 1, LONG 1, FLOAT 
1, DOUBLE 1





I shall be eagerly looking forward to your hint over this issue that i am stuck 
with for a while.

Thanks,
Sajjadul

------------------
Read this topic online here:
http://forum.openscenegraph.org/viewtopic.php?p=56889#56889





_______________________________________________
osg-users mailing list
[email protected]
http://lists.openscenegraph.org/listinfo.cgi/osg-users-openscenegraph.org

Reply via email to