dcompute is nearing minimal functionality

Nicholas Wilson via Digitalmars-d Wed, 13 Jul 2016 20:36:06 -0700

DCompute is my effort to target CUDA and SPIR to enable hasslefree native execution on the gpu.

It is a D library +modification of LDC and is available athttps://github.com/thewilsonator/ldc/tree/dcompute

The compiler is nearing minimal functionality and is able togenerate code for cuda's pxt format.

Required to build is Khronos's llvmhttps://github.com/KhronosGroup/SPIRV-LLVMand the usual build requirements of LDC,i.e. a dmd compatible dcompiler, cmake.

Post cmake configuration and generation you will possibly need toadd -L-lLLVMSPIRVLib to the (l|g)dmd invocation.

The code at the moment is very hacky and very hardcoded, with alot of magic, and currently requires a manual invocation of llc,but hopefully this will improve.

$car pointer.d
@compute module dcompute.types.pointer;
import ldc.attributes;


enum Private = 0;
enum Global = 1;
enum Shared = 2;
enum Constant = 3;
enum Generic = 4;

@kernel //magic attribute
void xdfgcmain(Pointer!(1,float) a,Pointer!(1,float) b , float p)
{
    *a = *b + p;
}

struct compute {} //really hacky and somewhat magic
pure: @trusted: nothrow: @nogc:

struct Pointer(uint p, T) if(p <= Generic) //magic type
{
    T* ptr;
    alias ptr this;
}

$../ldcbuild/bin/ldc2 pointer.d -output-ll -O1
$llc -mcpu=sm_20 gpusuff_ptx.ll -o kernel.ptx
$cat kernel.ptx
//
// Generated by LLVM NVPTX Back-End
//

.version 3.2
.target sm_20
.address_size 64

//.globl _D8dcompute5types7pointer9xdfgcmainFS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerfZv//@_D8dcompute5types7pointer9xdfgcmainFS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerfZv.visible .entry_D8dcompute5types7pointer9xdfgcmainFS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerfZv(.param .align 8 .b8_D8dcompute5types7pointer9xdfgcmainFS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerfZv_param_0[8],.param .align 8 .b8_D8dcompute5types7pointer9xdfgcmainFS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerfZv_param_1[8],.param .f32_D8dcompute5types7pointer9xdfgcmainFS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerfZv_param_2

)
{
        .reg .f32       %f<4>;
        .reg .s64       %rd<3>;

// BB#0:

ld.param.u64 %rd1,[_D8dcompute5types7pointer9xdfgcmainFS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerfZv_param_0];ld.param.u64 %rd2,[_D8dcompute5types7pointer9xdfgcmainFS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerfZv_param_1];ld.param.f32 %f1,[_D8dcompute5types7pointer9xdfgcmainFS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerfZv_param_2];

        ld.global.f32   %f2, [%rd2];
        add.rn.f32      %f3, %f2, %f1;
        st.global.f32   [%rd1], %f3;
        ret;
}

dcompute is nearing minimal functionality

Reply via email to