Dear all,
 
I’m interested to see whether I can use osgCompute for my project’s goal, which 
is real-time height-map deformation system. For starters, I’m trying to let an 
osgCompute node modify a 2D OSG float texture (the height-map) each frame, 
using a CUDA kernel. 
My problem now is that I can’t get osgCompute to make persistent changes to the 
source texture, it seems that either the CUDA kernel changes the texture only 
once or that it performs the same changes each frame to the original texture 
data. Any advice or hints on how to set up my buffers, textures and arrays in 
osgCompute to achieve persistent changes would be greatly appreciated.
I’m using osgCompute 0.4 (with CUDA 2.3, OSG 2.8.2, VS 9.0 / WinXP) and I 
started off with modifying the provided osgTexDemo example:
 
With regard to the original source code I changed the SrceBuffer and TempBuffer 
to single dimension (linear) osg::Cuda::Arrays with the intent to alternatingly 
read from, and write to, these respectively.
The extended (overloaded) osgCompute::Module::launch() alternatingly maps the 
SrceBuffer and TempBuffer as the source and target for the primary Cuda kernel 
respectively.
The setImage() call in the else is a hack to ensure that osgCompute cannot 
overwrite the buffer with the original image again when mapping the buffer to 
the device. This code so far has been tailored to simply try get it to work 
based on the original example; I can image this not being to best approach 
however. 
 
Sincerely,
     Asmar B. Arsala
 
(for code snippets see attachment)                                        
_________________________________________________________________
Express yourself instantly with MSN Messenger! Download today it's FREE!
http://messenger.msn.click-url.com/go/onm00200471ave/direct/01/
// Setup
{
        osg::Image *pImage = osgDB::readImageFile( "Data//Image.bmp" );

        osgCuda::Texture2D *pTexture = new osgCuda::Texture2D;
        pTexture->setInternalFormat( GL_RGBA );
        pTexture->setSourceType( GL_UNSIGNED_BYTE );
        pTexture->setElementSize( sizeof( osg::Vec4ub ) );
        pTexture->setDimension( 0, pImage0->s() );
        pTexture->setDimension( 1, pImage0->t() );
        pTexture->setName( "DestBuffer" );
        pTexture->addHandle( "DEST_BUFFER" );
        pTexture->setFilter( osg::Texture2D::MIN_FILTER, 
osg::Texture2D::NEAREST );
        pTexture->setFilter( osg::Texture2D::MAG_FILTER, 
osg::Texture2D::NEAREST );

        osgCuda::Array *pSrceBuffer = new osgCuda::Array;
        pSrceBuffer->setElementSize( sizeof( osg::Vec4ub ) );
        pSrceBuffer->setChannelFormatDesc( CudaChannelFormatDesc );
        pSrceBuffer->setDimension( 0, pImage->s() * pImage->t() );
        pSrceBuffer->setImage( pImage );
        pSrceBuffer->setName( "SrceBuffer");
        pSrceBuffer->addHandle( "SRCE_BUFFER" );

        osgCuda::Array *pTempBuffer = new osgCuda::Array;
        pTempBuffer->setElementSize( sizeof( osg::Vec4ub ) );
        pTempBuffer->setChannelFormatDesc( CudaChannelFormatDesc );
        pTempBuffer->setDimension( 0, pImage->s() * pImage->t() );
        pTempBuffer->setName( "TempBuffer" );
        pTempBuffer->addHandle( "TEMP_BUFFER" );
}

void CCudaModule::launch( const osgCompute::Context &rContext ) const
{
        static int iToggle = false;
        static int iSwitch = true;

        if( !isClear() )
        {
                void *pvSrceBuffer;
                void *pvTempBuffer;
                void *pvDestBuffer;

                if( iToggle = !iToggle )
                {
                        pvSrceBuffer = pSrceBuffer->map( rContext, 
osgCompute::MAP_DEVICE_SOURCE );
                        pvTempBuffer = pTempBuffer->map( rContext, 
osgCompute::MAP_DEVICE );
                        pvDestBuffer = pDestBuffer->map( rContext, 
osgCompute::MAP_DEVICE_TARGET );
                }
                else
                {
                        osgCuda::Array *pArray = static_cast< osgCuda::Array* 
>( const_cast< osgCompute::Buffer* >( pSrceArray ) );
                        if( pArray->getImage() ) pArray->setImage( NULL );
                        
                        pvSrceBuffer = pTempBuffer->map( rContext, 
osgCompute::MAP_DEVICE_SOURCE );
                        pvTempBuffer = pSrceBuffer->map( rContext, 
osgCompute::MAP_DEVICE );
                        pvDestBuffer = pDestBuffer->map( rContext, 
osgCompute::MAP_DEVICE_TARGET );
                }
                // KERNEL CALL 0 
                cuda_filter( NumBlocks, NumThreads, pvSrceBuffer, pvTempBuffer 
);

                // KERNEL CALL 1 
                cuda_copy( NumBlocks, NumThreads, pvTempBuffer, pvDestBuffer );
        }
}

extern "C"
void cuda_filter( osg::Vec2s &rNumBlocks, osg::Vec2s &rNumThreads, void 
*pvSrceArray, void *pvDestBuffer )
{
        …
        SrceTexture.normalized = true;
        SrceTexture.filterMode = cudaFilterModeLinear;
        SrceTexture.addressMode[0] = cudaAddressModeClamp;
        SrceTexture.addressMode[1] = cudaAddressModeClamp;

        switch( cudaBindTextureToArray( SrceTexture, reinterpret_cast< 
cudaArray* >( pvSrceArray ) ) )
        {
                case( cudaSuccess ):
                {
                        k_filter<<< Blocks, Threads >>>( reinterpret_cast< 
uchar4* >( pvDestBuffer ) );
                }
                break;
                case( cudaErrorInvalidValue ): …
                break;
                case( cudaErrorInvalidDevicePointer ): …
                break;
                case( cudaErrorInvalidTexture ): …
                break;
        }
}
__global__ 
void k_filter( uchar4 *pOutput ) 
{
        …
        float4 Color = tex1Dfetch( SrceTexture, iIndex );
        
        // Modify Color here…

        pOutput[ iIndex ] = 
                make_uchar4
                ( 
                        (unsigned char)( Color.x * 255.0f ),
                        (unsigned char)( Color.y * 255.0f ),
                        (unsigned char)( Color.z * 255.0f ),
                        (unsigned char)( Color.w * 255.0f )
                );
}

extern "C"
void cuda_copy( osg::Vec2s &rNumBlocks, osg::Vec2s &rNumThreads, void * 
pvSrceBuffer, void *pvDestBuffer )
{
        …
        k_copy<<< Blocks, Threads >>>
        ( 
                reinterpret_cast< uchar4* >( pvSrceBuffer ),
                reinterpret_cast< uchar4* >( pvDestBuffer )
        );
}

__global__ 
void k_copy( uchar4 *pInput, uchar4 *pOutput ) 
{
pOutput[ iIndex ] =
                make_uchar4
                ( 
                        pInput[ iIndex ].x, 
                        pInput[ iIndex ].y, 
                        pInput[ iIndex ].z,  
                        pInput[ iIndex ].w
                );
}
_______________________________________________
osg-users mailing list
[email protected]
http://lists.openscenegraph.org/listinfo.cgi/osg-users-openscenegraph.org

Reply via email to