On 12/29/2011 12:19 PM, Vladimir Panteleev wrote:
Before you disagree with any of the above, first
(for starters) I'd like to invite you to translate Daniel Vik's C memcpy
implementation to D:
http://www.danielvik.com/2010/02/fast-memcpy-in-c.html . It doesn't even
use inline assembler or compiler intrinsics.

Ok, I have performed a direct translation (with all the preprocessor stuff replaced by string mixins). However, I think I could do a lot better starting from scratch in D. I have performed some basic testing with all the configuration options, and it seems to work correctly.

// File: memcpy.d direct translation of memcpy.c

/********************************************************************
 ** File:     memcpy.c
 **
 ** Copyright (C) 1999-2010 Daniel Vik
 **
 ** This software is provided 'as-is', without any express or implied
 ** warranty. In no event will the authors be held liable for any
 ** damages arising from the use of this software.
 ** Permission is granted to anyone to use this software for any
 ** purpose, including commercial applications, and to alter it and
 ** redistribute it freely, subject to the following restrictions:
 **
 ** 1. The origin of this software must not be misrepresented; you
 **    must not claim that you wrote the original software. If you
 **    use this software in a product, an acknowledgment in the
 **    use this software in a product, an acknowledgment in the
 **    product documentation would be appreciated but is not
 **    required.
 **
 ** 2. Altered source versions must be plainly marked as such, and
 **    must not be misrepresented as being the original software.
 **
 ** 3. This notice may not be removed or altered from any source
 **    distribution.
 **
 **
 ** Description: Implementation of the standard library function memcpy.
 **             This implementation of memcpy() is ANSI-C89 compatible.
 **
 **             The following configuration options can be set:
 **
 **           LITTLE_ENDIAN   - Uses processor with little endian
 **                             addressing. Default is big endian.
 **
 **           PRE_INC_PTRS    - Use pre increment of pointers.
 **                             Default is post increment of
 **                             pointers.
 **
 **           INDEXED_COPY    - Copying data using array indexing.
 **                             Using this option, disables the
 **                             PRE_INC_PTRS option.
 **
 **           MEMCPY_64BIT    - Compiles memcpy for 64 bit
 **                             architectures
 **
 **
 ** Best Settings:
 **
 ** Intel x86:  LITTLE_ENDIAN and INDEXED_COPY
 **
 *******************************************************************/


/********************************************************************
 ** Configuration definitions.
 *******************************************************************/

version = LITTLE_ENDIAN;
version = INDEXED_COPY;


/********************************************************************
 ** Includes for size_t definition
 *******************************************************************/

/********************************************************************
 ** Typedefs
 *******************************************************************/

version(MEMCPY_64BIT) version(D_LP32) static assert(0, "not a 64 bit compile");
version(D_LP64){
    alias ulong              UIntN;
    enum TYPE_WIDTH =        8;
}else{
    alias uint               UIntN;
    enum TYPE_WIDTH =        4;
}


/********************************************************************
 ** Remove definitions when INDEXED_COPY is defined.
 *******************************************************************/

version(INDEXED_COPY){
    version(PRE_INC_PTRS)
static assert(0, "cannot use INDEXED_COPY together with PRE_INC_PTRS!");
}

/********************************************************************
 ** The X template
 *******************************************************************/

string Ximpl(string x){
    import utf = std.utf;
    string r=`"`;
for(typeof(x.length) i=0;i<x.length;r~=x[i..i+utf.stride(x,i)],i+=utf.stride(x,i)){
        if(x[i]=='@'&&x[i+1]=='('){
            auto start = ++i; int nest=1;
            while(nest){
                i+=utf.stride(x,i);
                if(x[i]=='(') nest++;
                else if(x[i]==')') nest--;
            }
            i++;
            r~=`"~`~x[start..i]~`~"`;
            if(i==x.length) break;
        }
        if(x[i]=='"'||x[i]=='\\'){r~="\\"; continue;}
    }
    return r~`"`;
}

template X(string x){
    enum X = Ximpl(x);
}


/********************************************************************
 ** Definitions for pre and post increment of pointers.
 *******************************************************************/

// uses *(*&x)++ and similar to work around a bug in the parser

version(PRE_INC_PTRS){
    string START_VAL(string x)           {return mixin(X!q{(*&@(x))--;});}
    string INC_VAL(string x)             {return mixin(X!q{*++(*&@(x))});}
    string CAST_TO_U8(string p, string o){
        return mixin(X!q{(cast(ubyte*)@(p) + @(o) + TYPE_WIDTH)});
    }
    enum WHILE_DEST_BREAK  =                     (TYPE_WIDTH - 1);
    enum PRE_LOOP_ADJUST   =                     q{- (TYPE_WIDTH - 1)};
    enum PRE_SWITCH_ADJUST =                     q{+ 1};
}else{
    string START_VAL(string x)           {return q{};}
    string INC_VAL(string x)             {return mixin(X!q{*(*&@(x))++});}
    string CAST_TO_U8(string p, string o){
        return mixin(X!q{(cast(ubyte*)@(p) + @(o))});
    }
    enum WHILE_DEST_BREAK  =                     0;
    enum PRE_LOOP_ADJUST   =                     q{};
    enum PRE_SWITCH_ADJUST =                     q{};
}




/********************************************************************
 ** Definitions for endians
 *******************************************************************/

version(LITTLE_ENDIAN){
    enum SHL = q{>>};
    enum SHR = q{<<};
}else{
    enum SHL = q{<<};
    enum SHR = q{>>};
}

/********************************************************************
 ** Macros for copying words of  different alignment.
 ** Uses incremening pointers.
 *******************************************************************/

string CP_INCR() {
    return mixin(X!q{
        @(INC_VAL(q{dstN})) = @(INC_VAL(q{srcN}));
    });
}

string CP_INCR_SH(string shl, string shr) {
    return mixin(X!q{
        dstWord   = srcWord @(SHL) @(shl);
        srcWord   = @(INC_VAL(q{srcN}));
        dstWord  |= srcWord @(SHR) @(shr);
        @(INC_VAL(q{dstN})) = dstWord;
    });
}



/********************************************************************
 ** Macros for copying words of  different alignment.
 ** Uses array indexes.
 *******************************************************************/

string CP_INDEX(string idx) {
    return mixin(X!q{
        dstN[@(idx)] = srcN[@(idx)];
    });
}

string CP_INDEX_SH(string x, string shl, string shr) {
    return mixin(X!q{
        dstWord   = srcWord @(SHL) @(shl);
        srcWord   = srcN[@(x)];
        dstWord  |= srcWord @(SHR) @(shr);
        dstN[@(x)]= dstWord;
    });
}



/********************************************************************
 ** Macros for copying words of different alignment.
 ** Uses incremening pointers or array indexes depending on
 ** configuration.
 *******************************************************************/

version(INDEXED_COPY){
    alias CP_INDEX CP;
    alias CP_INDEX_SH CP_SH;
    string INC_INDEX(string p, string o){
        return mixin(X!q{
            ((@(p)) += (@(o)));
        });
    }
}else{
    string CP(string idx) {return mixin(X!q{@(CP_INCR())});}
    string CP_SH(string idx, string shl, string shr){
        return mixin(X!q{
            @(CP_INCR_SH(mixin(X!q{@(shl)}), mixin(X!q{@(shr)})));
        });
    }
    string INC_INDEX(string p, string o){return q{};}
}


string COPY_REMAINING(string count) {
    return mixin(X!q{
        @(START_VAL(q{dst8}));
        @(START_VAL(q{src8}));

        switch (@(count)) {
        case 7: @(INC_VAL(q{dst8})) = @(INC_VAL(q{src8}));
        case 6: @(INC_VAL(q{dst8})) = @(INC_VAL(q{src8}));
        case 5: @(INC_VAL(q{dst8})) = @(INC_VAL(q{src8}));
        case 4: @(INC_VAL(q{dst8})) = @(INC_VAL(q{src8}));
        case 3: @(INC_VAL(q{dst8})) = @(INC_VAL(q{src8}));
        case 2: @(INC_VAL(q{dst8})) = @(INC_VAL(q{src8}));
        case 1: @(INC_VAL(q{dst8})) = @(INC_VAL(q{src8}));
        case 0:
        default: break;
        }
    });
}

string COPY_NO_SHIFT() {
    return mixin(X!q{
        UIntN* dstN = cast(UIntN*)(dst8 @(PRE_LOOP_ADJUST));
        UIntN* srcN = cast(UIntN*)(src8 @(PRE_LOOP_ADJUST));
        size_t length = count / TYPE_WIDTH;

        while (length & 7) {
            @(CP_INCR());
            length--;
        }

        length /= 8;

        while (length--) {
            @(CP(q{0}));
            @(CP(q{1}));
            @(CP(q{2}));
            @(CP(q{3}));
            @(CP(q{4}));
            @(CP(q{5}));
            @(CP(q{6}));
            @(CP(q{7}));

            @(INC_INDEX(q{dstN}, q{8}));
            @(INC_INDEX(q{srcN}, q{8}));
        }

        src8 = @(CAST_TO_U8(q{srcN}, q{0}));
        dst8 = @(CAST_TO_U8(q{dstN}, q{0}));

        @(COPY_REMAINING(q{count & (TYPE_WIDTH - 1)}));

        return dest;
    });
}



string COPY_SHIFT(string shift) {
    return mixin(X!q{
UIntN* dstN = cast(UIntN*)(((cast(UIntN)dst8) @(PRE_LOOP_ADJUST)) &
                                    ~(TYPE_WIDTH - 1));
UIntN* srcN = cast(UIntN*)(((cast(UIntN)src8) @(PRE_LOOP_ADJUST)) &
                                    ~(TYPE_WIDTH - 1));
        size_t length  = count / TYPE_WIDTH;
        UIntN srcWord = @(INC_VAL(q{srcN}));
        UIntN dstWord;

        while (length & 7) {
@(CP_INCR_SH(mixin(X!q{8 * @(shift)}), mixin(X!q{8 * (TYPE_WIDTH - @(shift))})));
            length--;
        }

        length /= 8;

        while (length--) {
@(CP_SH(q{0}, mixin(X!q{8 * @(shift)}), mixin(X!q{8 * (TYPE_WIDTH - @(shift))}))); @(CP_SH(q{1}, mixin(X!q{8 * @(shift)}), mixin(X!q{8 * (TYPE_WIDTH - @(shift))}))); @(CP_SH(q{2}, mixin(X!q{8 * @(shift)}), mixin(X!q{8 * (TYPE_WIDTH - @(shift))}))); @(CP_SH(q{3}, mixin(X!q{8 * @(shift)}), mixin(X!q{8 * (TYPE_WIDTH - @(shift))}))); @(CP_SH(q{4}, mixin(X!q{8 * @(shift)}), mixin(X!q{8 * (TYPE_WIDTH - @(shift))}))); @(CP_SH(q{5}, mixin(X!q{8 * @(shift)}), mixin(X!q{8 * (TYPE_WIDTH - @(shift))}))); @(CP_SH(q{6}, mixin(X!q{8 * @(shift)}), mixin(X!q{8 * (TYPE_WIDTH - @(shift))}))); @(CP_SH(q{7}, mixin(X!q{8 * @(shift)}), mixin(X!q{8 * (TYPE_WIDTH - @(shift))})));

            @(INC_INDEX(q{dstN}, q{8}));
            @(INC_INDEX(q{srcN}, q{8}));
        }

src8 = @(CAST_TO_U8(q{srcN}, mixin(X!q{(@(shift) - TYPE_WIDTH)})));
        dst8 = @(CAST_TO_U8(q{dstN}, q{0}));

        @(COPY_REMAINING(q{count & (TYPE_WIDTH - 1)}));

        return dest;
    });
}


/********************************************************************
 **
 ** void *memcpy(void *dest, const void *src, size_t count)
 **
 ** Args:     dest        - pointer to destination buffer
 **           src         - pointer to source buffer
 **           count       - number of bytes to copy
 **
 ** Return:   A pointer to destination buffer
 **
 ** Purpose:  Copies count bytes from src to dest.
 **           No overlap check is performed.
 **
 *******************************************************************/

void *memcpy(void *dest, const void *src, size_t count)
{
    ubyte* dst8 = cast(ubyte*)dest;
    ubyte* src8 = cast(ubyte*)src;
    if (count < 8) {
        mixin(COPY_REMAINING(q{count}));
        return dest;
    }

    mixin(START_VAL(q{dst8}));
    mixin(START_VAL(q{src8}));

    while ((cast(UIntN)dst8 & (TYPE_WIDTH - 1)) != WHILE_DEST_BREAK) {
        mixin(INC_VAL(q{dst8})) = mixin(INC_VAL(q{src8}));
        count--;
    }
switch ((mixin(`(cast(UIntN)src8)`~ PRE_SWITCH_ADJUST)) & (TYPE_WIDTH - 1)) {
    // { } required to work around DMD bug
    case 0: {mixin(COPY_NO_SHIFT());} break;
    case 1: {mixin(COPY_SHIFT(q{1}));}   break;
    case 2: {mixin(COPY_SHIFT(q{2}));}   break;
    case 3: {mixin(COPY_SHIFT(q{3}));}   break;
static if(TYPE_WIDTH > 4){ // was TYPE_WIDTH >= 4. bug in original code.
    case 4: {mixin(COPY_SHIFT(q{4}));}   break;
    case 5: {mixin(COPY_SHIFT(q{5}));}   break;
    case 6: {mixin(COPY_SHIFT(q{6}));}   break;
    case 7: {mixin(COPY_SHIFT(q{7}));}   break;
}
    default: assert(0);
    }
}


void main(){
    int[13] x = [1,2,3,4,5,6,7,8,9,0,1,2,3];
    int[13] y;
    memcpy(y.ptr, x.ptr, x.sizeof);
    import std.stdio;   writeln(y);
}

Reply via email to