DCompute is my effort to target CUDA and SPIR to enable hassle free native execution on the gpu.

It is a D library +modification of LDC and is available at https://github.com/thewilsonator/ldc/tree/dcompute

The compiler is nearing minimal functionality and is able to generate code for cuda's pxt format.

Required to build is Khronos's llvm https://github.com/KhronosGroup/SPIRV-LLVM and the usual build requirements of LDC,i.e. a dmd compatible d compiler, cmake.

Post cmake configuration and generation you will possibly need to add -L-lLLVMSPIRVLib to the (l|g)dmd invocation.

The code at the moment is very hacky and very hardcoded, with a lot of magic, and currently requires a manual invocation of llc, but hopefully this will improve.
$car pointer.d
@compute module dcompute.types.pointer;
import ldc.attributes;


enum Private = 0;
enum Global = 1;
enum Shared = 2;
enum Constant = 3;
enum Generic = 4;

@kernel //magic attribute
void xdfgcmain(Pointer!(1,float) a,Pointer!(1,float) b , float p)
{
    *a = *b + p;
}

struct compute {} //really hacky and somewhat magic
pure: @trusted: nothrow: @nogc:

struct Pointer(uint p, T) if(p <= Generic) //magic type
{
    T* ptr;
    alias ptr this;
}

$../ldcbuild/bin/ldc2 pointer.d -output-ll -O1
$llc -mcpu=sm_20 gpusuff_ptx.ll -o kernel.ptx
$cat kernel.ptx
//
// Generated by LLVM NVPTX Back-End
//

.version 3.2
.target sm_20
.address_size 64

// .globl _D8dcompute5types7pointer9xdfgcmainFS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerfZv // @_D8dcompute5types7pointer9xdfgcmainFS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerfZv .visible .entry _D8dcompute5types7pointer9xdfgcmainFS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerfZv( .param .align 8 .b8 _D8dcompute5types7pointer9xdfgcmainFS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerfZv_param_0[8], .param .align 8 .b8 _D8dcompute5types7pointer9xdfgcmainFS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerfZv_param_1[8], .param .f32 _D8dcompute5types7pointer9xdfgcmainFS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerfZv_param_2
)
{
        .reg .f32       %f<4>;
        .reg .s64       %rd<3>;

// BB#0:
ld.param.u64 %rd1, [_D8dcompute5types7pointer9xdfgcmainFS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerfZv_param_0]; ld.param.u64 %rd2, [_D8dcompute5types7pointer9xdfgcmainFS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerfZv_param_1]; ld.param.f32 %f1, [_D8dcompute5types7pointer9xdfgcmainFS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerfZv_param_2];
        ld.global.f32   %f2, [%rd2];
        add.rn.f32      %f3, %f2, %f1;
        st.global.f32   [%rd1], %f3;
        ret;
}

Reply via email to