Hi,
03.10.2012 5:29, luiz americo pereira camara:
[...]
The complete procedure:

{$ASMMODE INTEL}

procedure AlphaBlendLineConstant(Source, Destination: Pointer; Count:
Integer; ConstantAlpha, Bias: Integer);

asm

{$ifdef CPU64}
// RCX contains Source
// RDX contains Destination
// R8D contains Count
// R9D contains ConstantAlpha
// Bias is on the stack

The procedure declaration above is not specific enough for the assembler block listed below, because
- the size of "integer" generally may vary;
- calling convention generally may vary;
I'd suggest to
- replace "integer" by "longint" or "word", according to what assembler code expects; - specify some calling convention according to what assembler code expects (IIRC in this case it is cdecl but I'm not completely sure).

HTH
Nikolai


         //.NOFRAME

         // Load XMM3 with the constant alpha value (replicate it for
every component).
         // Expand it to word size.
         MOVD        XMM3, R9D  // ConstantAlpha
         PUNPCKLWD   XMM3, XMM3
         PUNPCKLDQ   XMM3, XMM3

         // Load XMM5 with the bias value.
         MOVD        XMM5, [Bias]
         PUNPCKLWD   XMM5, XMM5
         PUNPCKLDQ   XMM5, XMM5

         // Load XMM4 with 128 to allow for saturated biasing.
         MOV         R10D, 128
         MOVD        XMM4, R10D
         PUNPCKLWD   XMM4, XMM4
         PUNPCKLDQ   XMM4, XMM4

@1:     // The pixel loop calculates an entire pixel in one run.
         // Note: The pixel byte values are expanded into the higher
bytes of a word due
         //       to the way unpacking works. We compensate for this
with an extra shift.
         MOVD        XMM1, DWORD PTR [RCX]   // data is unaligned
         MOVD        XMM2, DWORD PTR [RDX]   // data is unaligned
         PXOR        XMM0, XMM0    // clear source pixel register for
unpacking
         PUNPCKLBW   XMM0, XMM1{[RCX]}    // unpack source pixel byte
values into words
         PSRLW       XMM0, 8       // move higher bytes to lower bytes
         PXOR        XMM1, XMM1    // clear target pixel register for
unpacking
         PUNPCKLBW   XMM1, XMM2{[RDX]}    // unpack target pixel byte
values into words
         MOVQ        XMM2, XMM1    // make a copy of the shifted values,
we need them again
         PSRLW       XMM1, 8       // move higher bytes to lower bytes

         // calculation is: target = (alpha * (source - target) + 256 *
target) / 256
         PSUBW       XMM0, XMM1    // source - target
         PMULLW      XMM0, XMM3    // alpha * (source - target)
         PADDW       XMM0, XMM2    // add target (in shifted form)
         PSRLW       XMM0, 8       // divide by 256

         // Bias is accounted for by conversion of range 0..255 to
-128..127,
         // doing a saturated add and convert back to 0..255.
         PSUBW     XMM0, XMM4
         PADDSW    XMM0, XMM5
         PADDW     XMM0, XMM4
         PACKUSWB  XMM0, XMM0      // convert words to bytes with saturation
         MOVD      DWORD PTR [RDX], XMM0     // store the result
@3:
         ADD       RCX, 4
         ADD       RDX, 4
         DEC       R8D
         JNZ       @1

{$endif}

end;




_______________________________________________
fpc-devel maillist  -  fpc-devel@lists.freepascal.org
http://lists.freepascal.org/mailman/listinfo/fpc-devel

_______________________________________________
fpc-devel maillist  -  fpc-devel@lists.freepascal.org
http://lists.freepascal.org/mailman/listinfo/fpc-devel

Reply via email to