Author: radek Date: Tue May 8 21:35:45 2007 GMT Module: SOURCES Tag: HEAD ---- Log message: - TEXTREL fix from gentoo: http://bugs.gentoo.org/show_bug.cgi?id=135326
---- Files affected: SOURCES: xvid-1.1.2-textrel.patch (NONE -> 1.1) (NEW) ---- Diffs: ================================================================ Index: SOURCES/xvid-1.1.2-textrel.patch diff -u /dev/null SOURCES/xvid-1.1.2-textrel.patch:1.1 --- /dev/null Tue May 8 23:35:45 2007 +++ SOURCES/xvid-1.1.2-textrel.patch Tue May 8 23:35:40 2007 @@ -0,0 +1,5757 @@ +diff -urp xvidcore-1.1.2-old/src/bitstream/x86_asm/cbp_mmx.asm xvidcore-1.1.2/src/bitstream/x86_asm/cbp_mmx.asm +--- xvidcore-1.1.2-old/src/bitstream/x86_asm/cbp_mmx.asm 2007-01-27 19:43:48.000000000 +0100 ++++ xvidcore-1.1.2/src/bitstream/x86_asm/cbp_mmx.asm 2007-01-27 13:33:30.000000000 +0100 +@@ -50,23 +50,6 @@ BITS 32 + %endmacro + + ;============================================================================= +-; Local data +-;============================================================================= +- +-%ifdef FORMAT_COFF +-SECTION .rodata +-%else +-SECTION .rodata align=16 +-%endif +- +-ALIGN 16 +- +-mult_mask: +- db 0x10,0x20,0x04,0x08,0x01,0x02,0x00,0x00 +-ignore_dc: +- dw 0, -1, -1, -1 +- +-;============================================================================= + ; Code + ;============================================================================= + +@@ -91,7 +74,12 @@ ALIGN 16 + calc_cbp_mmx: + mov eax, [esp + 4] ; coeff + +- movq mm7, [ignore_dc] ++ push byte 0 ; align esp to 8 bytes ++ push byte -1 ++ push dword 0xFFFF0000 ++ movq mm7, [esp] ++ add esp, byte 8 ++ + pxor mm6, mm6 ; used only for comparing + movq mm0, [eax+128*0] + movq mm1, [eax+128*1] +@@ -123,7 +111,11 @@ calc_cbp_mmx: + MAKE_LOAD 13 + MAKE_LOAD 14 + +- movq mm7, [mult_mask] ++ push dword 0x00000201 ++ push dword 0x08042010 ++ movq mm7, [esp] ++ add esp, byte 12 ++ + packssdw mm0, mm1 + packssdw mm2, mm3 + packssdw mm4, mm5 +diff -urp xvidcore-1.1.2-old/src/bitstream/x86_asm/cbp_sse2.asm xvidcore-1.1.2/src/bitstream/x86_asm/cbp_sse2.asm +--- xvidcore-1.1.2-old/src/bitstream/x86_asm/cbp_sse2.asm 2007-01-27 19:43:48.000000000 +0100 ++++ xvidcore-1.1.2/src/bitstream/x86_asm/cbp_sse2.asm 2007-01-27 13:33:30.000000000 +0100 +@@ -69,20 +69,6 @@ BITS 32 + %endmacro + + ;============================================================================= +-; Data (Read Only) +-;============================================================================= +- +-%ifdef FORMAT_COFF +-SECTION .rodata +-%else +-SECTION .rodata align=16 +-%endif +- +-ALIGN 16 +-ignore_dc: +- dw 0, -1, -1, -1, -1, -1, -1, -1 +- +-;============================================================================= + ; Code + ;============================================================================= + +@@ -98,7 +84,13 @@ calc_cbp_sse2: + mov edx, [esp+4] ; coeff[] + xor eax, eax ; cbp = 0 + +- movdqu xmm7, [ignore_dc] ; mask to ignore dc value ++ sub esp,byte 12 ; align esp to 16 bytes ++ push byte -1 ++ push byte -1 ++ push byte -1 ++ push dword 0xFFFF0000 ++ movdqu xmm7, [esp] ; mask to ignore dc value ++ add esp, byte 28 + pxor xmm6, xmm6 ; zero + + LOOP_SSE2 0 +diff -urp xvidcore-1.1.2-old/src/dct/x86_asm/fdct_mmx_ffmpeg.asm xvidcore-1.1.2/src/dct/x86_asm/fdct_mmx_ffmpeg.asm +--- xvidcore-1.1.2-old/src/dct/x86_asm/fdct_mmx_ffmpeg.asm 2007-01-27 19:43:48.000000000 +0100 ++++ xvidcore-1.1.2/src/dct/x86_asm/fdct_mmx_ffmpeg.asm 2007-01-27 13:33:30.000000000 +0100 +@@ -204,7 +204,7 @@ fdct_r_row: + psllw mm4, SHIFT_FRW_COL + movq mm6, mm0 + psubsw mm2, mm1 +- movq mm1, [fdct_tg_all_16 + 4*2] ++ movq mm1, [ebx + fdct_tg_all_16 + 4*2 wrt ..gotoff] + psubsw mm0, mm4 + movq mm7, [%2 + %3*2 + 3*16] + pmulhw mm1, mm0 +@@ -216,9 +216,9 @@ fdct_r_row: + psubsw mm5, mm7 + paddsw mm1, mm5 + paddsw mm4, mm7 +- por mm1, [fdct_one_corr] ++ por mm1, [ebx + fdct_one_corr wrt ..gotoff] + psllw mm2, SHIFT_FRW_COL + 1 +- pmulhw mm5, [fdct_tg_all_16 + 4*2] ++ pmulhw mm5, [ebx + fdct_tg_all_16 + 4*2 wrt ..gotoff] + movq mm7, mm4 + psubsw mm3, [%2 + %3*2 + 5*16] + psubsw mm4, mm6 +@@ -230,34 +230,34 @@ fdct_r_row: + movq mm6, mm2 + movq [%1 + %3*2 + 4*16], mm4 + paddsw mm2, mm3 +- pmulhw mm2, [ocos_4_16] ++ pmulhw mm2, [ebx + ocos_4_16 wrt ..gotoff] + psubsw mm6, mm3 +- pmulhw mm6, [ocos_4_16] ++ pmulhw mm6, [ebx + ocos_4_16 wrt ..gotoff] + psubsw mm5, mm0 +- por mm5, [fdct_one_corr] ++ por mm5, [ebx + fdct_one_corr wrt ..gotoff] + psllw mm1, SHIFT_FRW_COL +- por mm2, [fdct_one_corr] ++ por mm2, [ebx + fdct_one_corr wrt ..gotoff] + movq mm4, mm1 + movq mm3, [%2 + %3*2 + 0*16] + paddsw mm1, mm6 + psubsw mm3, [%2 + %3*2 + 7*16] + psubsw mm4, mm6 +- movq mm0, [fdct_tg_all_16 + 0*2] ++ movq mm0, [ebx + fdct_tg_all_16 + 0*2 wrt ..gotoff] + psllw mm3, SHIFT_FRW_COL +- movq mm6, [fdct_tg_all_16 + 8*2] ++ movq mm6, [ebx + fdct_tg_all_16 + 8*2 wrt ..gotoff] + pmulhw mm0, mm1 + movq [%1 + %3*2 + 0*16], mm7 + pmulhw mm6, mm4 + movq [%1 + %3*2 + 6*16], mm5 + movq mm7, mm3 +- movq mm5, [fdct_tg_all_16 + 8*2] ++ movq mm5, [ebx + fdct_tg_all_16 + 8*2 wrt ..gotoff] + psubsw mm7, mm2 + paddsw mm3, mm2 + pmulhw mm5, mm7 + paddsw mm0, mm3 + paddsw mm6, mm4 +- pmulhw mm3, [fdct_tg_all_16 + 0*2] +- por mm0, [fdct_one_corr] ++ pmulhw mm3, [ebx + fdct_tg_all_16 + 0*2 wrt ..gotoff] ++ por mm0, [ebx + fdct_one_corr wrt ..gotoff] + paddsw mm5, mm7 + psubsw mm7, mm6 + movq [%1 + %3*2 + 1*16], mm0 +@@ -287,28 +287,28 @@ fdct_r_row: + movq mm6, mm5 + punpckldq mm3, mm5 + punpckhdq mm6, mm3 +- movq mm3, [%3 + 0*2] +- movq mm4, [%3 + 4*2] ++ movq mm3, [0*2 + %3] ++ movq mm4, [4*2 + %3] + punpckldq mm2, mm0 + pmaddwd mm3, mm0 + punpckhdq mm1, mm2 +- movq mm2, [%3 + 16*2] ++ movq mm2, [16*2 + %3] + pmaddwd mm4, mm1 +- pmaddwd mm0, [%3 + 8*2] +- movq mm7, [%3 + 20*2] ++ pmaddwd mm0, [8*2 + %3] ++ movq mm7, [20*2 + %3] + pmaddwd mm2, mm5 +- paddd mm3, [fdct_r_row] ++ paddd mm3, [ebx + fdct_r_row wrt ..gotoff] + pmaddwd mm7, mm6 +- pmaddwd mm1, [%3 + 12*2] ++ pmaddwd mm1, [12*2 + %3] + paddd mm3, mm4 +- pmaddwd mm5, [%3 + 24*2] +- pmaddwd mm6, [%3 + 28*2] ++ pmaddwd mm5, [24*2 + %3] ++ pmaddwd mm6, [28*2 + %3] + paddd mm2, mm7 +- paddd mm0, [fdct_r_row] ++ paddd mm0, [ebx + fdct_r_row wrt ..gotoff] + psrad mm3, SHIFT_FRW_ROW +- paddd mm2, [fdct_r_row] ++ paddd mm2, [ebx + fdct_r_row wrt ..gotoff] + paddd mm0, mm1 +- paddd mm5, [fdct_r_row] ++ paddd mm5, [ebx + fdct_r_row wrt ..gotoff] + psrad mm2, SHIFT_FRW_ROW + paddd mm5, mm6 + psrad mm0, SHIFT_FRW_ROW +@@ -336,23 +336,23 @@ fdct_r_row: + psubsw mm1, mm5 + pshufw mm2, mm0, 0x4E + pshufw mm3, mm1, 0x4E +- movq mm4, [%3 + 0*2] +- movq mm6, [%3 + 4*2] +- movq mm5, [%3 + 16*2] +- movq mm7, [%3 + 20*2] ++ movq mm4, [ 0*2 + %3] ++ movq mm6, [ 4*2 + %3] ++ movq mm5, [16*2 + %3] ++ movq mm7, [20*2 + %3] + pmaddwd mm4, mm0 + pmaddwd mm5, mm1 + pmaddwd mm6, mm2 + pmaddwd mm7, mm3 +- pmaddwd mm0, [%3 + 8*2] +- pmaddwd mm2, [%3 + 12*2] +- pmaddwd mm1, [%3 + 24*2] +- pmaddwd mm3, [%3 + 28*2] ++ pmaddwd mm0, [ 8*2 + %3] ++ pmaddwd mm2, [12*2 + %3] ++ pmaddwd mm1, [24*2 + %3] ++ pmaddwd mm3, [28*2 + %3] + paddd mm4, mm6 + paddd mm5, mm7 + paddd mm0, mm2 + paddd mm1, mm3 +- movq mm7, [fdct_r_row] ++ movq mm7, [ebx + fdct_r_row wrt ..gotoff] + paddd mm4, mm7 + paddd mm5, mm7 + paddd mm0, mm7 +@@ -377,6 +377,10 @@ cglobal %1 + ;; Move the destination/source address to the eax register + mov eax, [esp + 4] + ++ push ebx ++ call get_pc.bx ++ add ebx, _GLOBAL_OFFSET_TABLE_ + $$ - $ wrt ..gotpc ++ + ;; Process the columns (4 at a time) + FDCT_COLUMN_COMMON eax, eax, 0 ; columns 0..3 + FDCT_COLUMN_COMMON eax, eax, 4 ; columns 4..7 +@@ -386,12 +390,12 @@ cglobal %1 + %assign i 0 + %rep 8 + ;; Process the 'i'th row +- %2 eax+2*i*8, eax+2*i*8, tab_frw_01234567+2*32*i ++ %2 eax+2*i*8, eax+2*i*8, ebx + tab_frw_01234567+2*32*i wrt ..gotoff + %assign i i+1 + %endrep + %else + mov ecx, 8 +- mov edx, tab_frw_01234567 ++ mov edx, [ebx + tab_frw_01234567 wrt ..gotoff] + ALIGN 8 + .loop + %2 eax, eax, edx +@@ -401,6 +405,7 @@ ALIGN 8 + jne .loop + %endif + ++ pop ebx + ret + .endfunc + %endmacro +@@ -411,6 +416,11 @@ ALIGN 8 + + SECTION .text + ++extern _GLOBAL_OFFSET_TABLE_ ++get_pc.bx: ++ mov ebx, [esp] ++ retn ++ + ;----------------------------------------------------------------------------- + ; void fdct_mmx_ffmpeg(int16_t block[64]); + ;----------------------------------------------------------------------------- +diff -urp xvidcore-1.1.2-old/src/dct/x86_asm/fdct_mmx_skal.asm xvidcore-1.1.2/src/dct/x86_asm/fdct_mmx_skal.asm +--- xvidcore-1.1.2-old/src/dct/x86_asm/fdct_mmx_skal.asm 2007-01-27 19:43:48.000000000 +0100 ++++ xvidcore-1.1.2/src/dct/x86_asm/fdct_mmx_skal.asm 2007-01-27 13:33:30.000000000 +0100 +@@ -294,15 +294,15 @@ MMX_One: + paddsw mm2, mm1 ; mm2: t6+t5 + movq [%1+0*16], mm5 ; => out0 + +- movq mm4, [tan2] ; mm4 <= tan2 ++ movq mm4, [ebx + tan2 wrt ..gotoff] ; mm4 <= tan2 + pmulhw mm4, mm7 ; tm03*tan2 +- movq mm5, [tan2] ; mm5 <= tan2 ++ movq mm5, [ebx + tan2 wrt ..gotoff] ; mm5 <= tan2 + psubsw mm4, mm6 ; out6 = tm03*tan2 - tm12 + pmulhw mm5, mm6 ; tm12*tan2 + paddsw mm5, mm7 ; out2 = tm12*tan2 + tm03 + +- movq mm6, [sqrt2] +- movq mm7, [MMX_One] ++ movq mm6, [ebx + sqrt2 wrt ..gotoff] ++ movq mm7, [ebx + MMX_One wrt ..gotoff] + + pmulhw mm2, mm6 ; mm2: tp65 = (t6 + t5)*cos4 + por mm5, mm7 ; correct out2 +@@ -320,8 +320,8 @@ MMX_One: + paddsw mm2, mm4 ; mm2: tp765 = t7 + tp65 + paddsw mm1, mm5 ; mm1: tp465 = t4 + tm65 + +- movq mm4, [tan3] ; tan3 - 1 +- movq mm5, [tan1] ; tan1 ++ movq mm4, [ebx + tan3 wrt ..gotoff] ; tan3 - 1 ++ movq mm5, [ebx + tan1 wrt ..gotoff] ; tan1 + + movq mm7, mm3 ; save tm465 + pmulhw mm3, mm4 ; tm465*(tan3-1) +@@ -364,23 +364,23 @@ MMX_One: + punpckldq mm0, mm7 ; mm0 = [a0 a1 b0 b1] + punpckhdq mm1, mm7 ; mm1 = [b2 b3 a2 a3] + +- movq mm2, qword [%3 + 0] ; [ M00 M01 M16 M17] +- movq mm3, qword [%3 + 8] ; [ M02 M03 M18 M19] ++ movq mm2, qword [0 + %3] ; [ M00 M01 M16 M17] ++ movq mm3, qword [8 + %3] ; [ M02 M03 M18 M19] + pmaddwd mm2, mm0 ; [a0.M00+a1.M01 | b0.M16+b1.M17] +- movq mm4, qword [%3 + 16] ; [ M04 M05 M20 M21] ++ movq mm4, qword [16 + %3] ; [ M04 M05 M20 M21] + pmaddwd mm3, mm1 ; [a2.M02+a3.M03 | b2.M18+b3.M19] +- movq mm5, qword [%3 + 24] ; [ M06 M07 M22 M23] ++ movq mm5, qword [24 + %3] ; [ M06 M07 M22 M23] + pmaddwd mm4, mm0 ; [a0.M04+a1.M05 | b0.M20+b1.M21] +- movq mm6, qword [%3 + 32] ; [ M08 M09 M24 M25] ++ movq mm6, qword [32 + %3] ; [ M08 M09 M24 M25] + pmaddwd mm5, mm1 ; [a2.M06+a3.M07 | b2.M22+b3.M23] +- movq mm7, qword [%3 + 40] ; [ M10 M11 M26 M27] ++ movq mm7, qword [40 + %3] ; [ M10 M11 M26 M27] + pmaddwd mm6, mm0 ; [a0.M08+a1.M09 | b0.M24+b1.M25] + paddd mm2, mm3 ; [ out0 | out1 ] + pmaddwd mm7, mm1 ; [a0.M10+a1.M11 | b0.M26+b1.M27] + psrad mm2, 16 +- pmaddwd mm0, qword [%3 + 48] ; [a0.M12+a1.M13 | b0.M28+b1.M29] ++ pmaddwd mm0, qword [48 + %3] ; [a0.M12+a1.M13 | b0.M28+b1.M29] + paddd mm4, mm5 ; [ out2 | out3 ] +- pmaddwd mm1, qword [%3 + 56] ; [a0.M14+a1.M15 | b0.M30+b1.M31] ++ pmaddwd mm1, qword [56 + %3] ; [a0.M14+a1.M15 | b0.M30+b1.M31] + psrad mm4, 16 + + paddd mm6, mm7 ; [ out4 | out5 ] +@@ -422,23 +422,23 @@ MMX_One: + punpckldq mm0, mm7 ; mm0 = [a0 a1 b0 b1] + punpckhdq mm1, mm7 ; mm1 = [b2 b3 a2 a3] + +- movq mm2, qword [%3 + 0] ; [ M00 M01 M16 M17] +- movq mm3, qword [%3 + 8] ; [ M02 M03 M18 M19] ++ movq mm2, qword [0 + %3] ; [ M00 M01 M16 M17] ++ movq mm3, qword [8 + %3] ; [ M02 M03 M18 M19] + pmaddwd mm2, mm0 ; [a0.M00+a1.M01 | b0.M16+b1.M17] +- movq mm4, qword [%3 + 16] ; [ M04 M05 M20 M21] ++ movq mm4, qword [16 + %3] ; [ M04 M05 M20 M21] + pmaddwd mm3, mm1 ; [a2.M02+a3.M03 | b2.M18+b3.M19] +- movq mm5, qword [%3 + 24] ; [ M06 M07 M22 M23] ++ movq mm5, qword [24 + %3] ; [ M06 M07 M22 M23] + pmaddwd mm4, mm0 ; [a0.M04+a1.M05 | b0.M20+b1.M21] +- movq mm6, qword [%3 + 32] ; [ M08 M09 M24 M25] ++ movq mm6, qword [32 + %3] ; [ M08 M09 M24 M25] + pmaddwd mm5, mm1 ; [a2.M06+a3.M07 | b2.M22+b3.M23] +- movq mm7, qword [%3 + 40] ; [ M10 M11 M26 M27] ++ movq mm7, qword [40 + %3] ; [ M10 M11 M26 M27] + pmaddwd mm6, mm0 ; [a0.M08+a1.M09 | b0.M24+b1.M25] + paddd mm2, mm3 ; [ out0 | out1 ] + pmaddwd mm7, mm1 ; [a0.M10+a1.M11 | b0.M26+b1.M27] + psrad mm2, 16 +- pmaddwd mm0, qword [%3 + 48] ; [a0.M12+a1.M13 | b0.M28+b1.M29] ++ pmaddwd mm0, qword [48 + %3] ; [a0.M12+a1.M13 | b0.M28+b1.M29] + paddd mm4, mm5 ; [ out2 | out3 ] +- pmaddwd mm1, qword [%3 + 56] ; [a0.M14+a1.M15 | b0.M30+b1.M31] ++ pmaddwd mm1, qword [56 + %3] ; [a0.M14+a1.M15 | b0.M30+b1.M31] + psrad mm4, 16 + + paddd mm6, mm7 ; [ out4 | out5 ] +@@ -467,12 +467,16 @@ MMX_One: + ALIGN 16 + cglobal %1 + %1: ++ push ebx ++ call get_pc.bx ++ add ebx, _GLOBAL_OFFSET_TABLE_ + $$ - $ wrt ..gotpc ++ + %ifdef UNROLLED_LOOP +- mov ecx, [esp + 4] ++ mov ecx, [esp + 4 + 4] + %else +- push ebx ++ push esi + push edi +- mov ecx, [esp + 8 + 4] ++ mov ecx, [esp + 12 + 4] + %endif + + fLLM_PASS ecx+0, ecx+0, 3 +@@ -481,27 +485,28 @@ cglobal %1 + %ifdef UNROLLED_LOOP + %assign i 0 + %rep 8 +- %2 ecx+i*16, ecx+i*16, fdct_table+i*64, fdct_rounding_1+i*8, fdct_rounding_2+i*8 ++ %2 ecx+i*16, ecx+i*16, ebx + fdct_table+i*64 wrt ..gotoff, ebx + fdct_rounding_1+i*8 wrt ..gotoff, ebx + fdct_rounding_2+i*8 wrt ..gotoff + %assign i i+1 + %endrep + %else + mov eax, 8 +- mov edx, fdct_table +- mov ebx, fdct_rounding_1 +- mov edi, fdct_rounding_2 ++ lea edx, [ebx + fdct_table wrt ..gotoff] ++ lea esi, [ebx + fdct_rounding_1 wrt ..gotoff] ++ lea edi, [ebx + fdct_rounding_2 wrt ..gotoff] + .loop +- %2 ecx, ecx, edx, ebx, edi ++ %2 ecx, ecx, edx, esi, edi + add ecx, 2*8 + add edx, 2*32 +- add ebx, 2*4 ++ add esi, 2*4 + add edi, 2*4 + dec eax + jne .loop + + pop edi +- pop ebx ++ pop esi + %endif + ++ pop ebx + ret + .endfunc + %endmacro +@@ -512,6 +517,11 @@ cglobal %1 + + SECTION .text + ++extern _GLOBAL_OFFSET_TABLE_ ++get_pc.bx: ++ mov ebx, [esp] ++ retn ++ + ;----------------------------------------------------------------------------- + ; void fdct_mmx_skal(int16_t block[64]]; + ;----------------------------------------------------------------------------- +diff -urp xvidcore-1.1.2-old/src/dct/x86_asm/fdct_sse2_skal.asm xvidcore-1.1.2/src/dct/x86_asm/fdct_sse2_skal.asm +--- xvidcore-1.1.2-old/src/dct/x86_asm/fdct_sse2_skal.asm 2007-01-27 19:43:48.000000000 +0100 ++++ xvidcore-1.1.2/src/dct/x86_asm/fdct_sse2_skal.asm 2007-01-27 13:33:30.000000000 +0100 +@@ -238,10 +238,10 @@ cglobal fdct_sse2_skal + pshufd xmm6, xmm0, 01010101b ; [13131313] + pshufd xmm7, xmm0, 11111111b ; [57575757] + +- pmaddwd xmm4, [%2+ 0] ; dot [M00,M01][M04,M05][M08,M09][M12,M13] +- pmaddwd xmm5, [%2+16] ; dot [M02,M03][M06,M07][M10,M11][M14,M15] +- pmaddwd xmm6, [%2+32] ; dot [M16,M17][M20,M21][M24,M25][M28,M29] +- pmaddwd xmm7, [%2+48] ; dot [M18,M19][M22,M23][M26,M27][M30,M31] ++ pmaddwd xmm4, [ 0 + %2] ; dot [M00,M01][M04,M05][M08,M09][M12,M13] ++ pmaddwd xmm5, [16 + %2] ; dot [M02,M03][M06,M07][M10,M11][M14,M15] ++ pmaddwd xmm6, [32 + %2] ; dot [M16,M17][M20,M21][M24,M25][M28,M29] ++ pmaddwd xmm7, [48 + %2] ; dot [M18,M19][M22,M23][M26,M27][M30,M31] + paddd xmm4, [%3] ; Round + + paddd xmm6, xmm7 ; [b0|b1|b2|b3] +@@ -267,12 +267,12 @@ cglobal fdct_sse2_skal + + %macro iLLM_PASS 1 ; %1: src/dst + +- movdqa xmm0, [tan3] ; t3-1 ++ movdqa xmm0, [ebx + tan3 wrt ..gotoff] ; t3-1 + movdqa xmm3, [%1+16*3] ; x3 + movdqa xmm1, xmm0 ; t3-1 + movdqa xmm5, [%1+16*5] ; x5 + +- movdqa xmm4, [tan1] ; t1 ++ movdqa xmm4, [ebx + tan1 wrt ..gotoff] ; t1 + movdqa xmm6, [%1+16*1] ; x1 + movdqa xmm7, [%1+16*7] ; x7 + movdqa xmm2, xmm4 ; t1 +@@ -290,7 +290,7 @@ cglobal fdct_sse2_skal + psubsw xmm2, xmm7 ; x1*t1-x7 = tm17 + + +- movdqa xmm3, [sqrt2] ++ movdqa xmm3, [ebx + sqrt2 wrt ..gotoff] + movdqa xmm7, xmm4 + movdqa xmm6, xmm2 + psubsw xmm4, xmm1 ; tp17-tp35 = t1 +@@ -310,7 +310,7 @@ cglobal fdct_sse2_skal + paddsw xmm0, xmm0 ; 2.(t1+t2) = b1 + paddsw xmm4, xmm4 ; 2.(t1-t2) = b2 + +- movdqa xmm7, [tan2] ; t2 ++ movdqa xmm7, [ebx + tan2 wrt ..gotoff] ; t2 + movdqa xmm3, [%1+2*16] ; x2 + movdqa xmm6, [%1+6*16] ; x6 + movdqa xmm5, xmm7 ; t2 +@@ -402,55 +402,58 @@ cglobal fdct_sse2_skal + + ALIGN 16 + idct_sse2_skal: ++ push ebx ++ call get_pc.bx ++ add ebx, _GLOBAL_OFFSET_TABLE_ + $$ - $ wrt ..gotpc + +- mov ecx, [esp+ 4] ; Src ++ mov ecx, [esp+ 4 +4] ; Src + + TEST_ROW ecx, .Row0_Round +- iMTX_MULT 0, iTab1, Walken_Idct_Rounders + 16*0, 11 ++ iMTX_MULT 0, ebx + iTab1 wrt ..gotoff, ebx + 16*0 + Walken_Idct_Rounders wrt ..gotoff, 11 + jmp .Row1 + .Row0_Round +- movdqa xmm0, [Walken_Idct_Rounders + 16*8 + 8*0] ++ movdqa xmm0, [ebx + 16*8 + 8*0 + Walken_Idct_Rounders wrt ..gotoff] + movdqa [ecx ], xmm0 + + .Row1 + TEST_ROW ecx+16, .Row1_Round +- iMTX_MULT 1, iTab2, Walken_Idct_Rounders + 16*1, 11 ++ iMTX_MULT 1, ebx + iTab2 wrt ..gotoff, ebx + 16*1 + Walken_Idct_Rounders wrt ..gotoff, 11 + jmp .Row2 + .Row1_Round +- movdqa xmm0, [Walken_Idct_Rounders + 16*8 + 16*1] ++ movdqa xmm0, [ebx + 16*8 + 16*1 + Walken_Idct_Rounders wrt ..gotoff] + movdqa [ecx+16 ], xmm0 + + .Row2 + TEST_ROW ecx+32, .Row2_Round +- iMTX_MULT 2, iTab3, Walken_Idct_Rounders + 16*2, 11 ++ iMTX_MULT 2, ebx + iTab3 wrt ..gotoff, ebx + 16*2 + Walken_Idct_Rounders wrt ..gotoff, 11 + jmp .Row3 + .Row2_Round +- movdqa xmm0, [Walken_Idct_Rounders + 16*8 + 16*2] ++ movdqa xmm0, [ebx + 16*8 + 16*2 + Walken_Idct_Rounders wrt ..gotoff] + movdqa [ecx+32 ], xmm0 + + .Row3 + TEST_ROW ecx+48, .Row4 +- iMTX_MULT 3, iTab4, Walken_Idct_Rounders + 16*3, 11 ++ iMTX_MULT 3, ebx + iTab4 wrt ..gotoff, ebx + 16*3 + Walken_Idct_Rounders wrt ..gotoff, 11 + + .Row4 + TEST_ROW ecx+64, .Row5 +- iMTX_MULT 4, iTab1, Walken_Idct_Rounders + 16*4, 11 ++ iMTX_MULT 4, ebx + iTab1 wrt ..gotoff, ebx + 16*4 + Walken_Idct_Rounders wrt ..gotoff, 11 + + .Row5 + TEST_ROW ecx+80, .Row6 +- iMTX_MULT 5, iTab4, Walken_Idct_Rounders + 16*5, 11 ++ iMTX_MULT 5, ebx + iTab4 wrt ..gotoff, ebx + 16*5 + Walken_Idct_Rounders wrt ..gotoff, 11 + + .Row6 + TEST_ROW ecx+96, .Row7 +- iMTX_MULT 6, iTab3, Walken_Idct_Rounders + 16*6, 11 ++ iMTX_MULT 6, ebx + iTab3 wrt ..gotoff, ebx + 16*6 + Walken_Idct_Rounders wrt ..gotoff, 11 + + .Row7 + TEST_ROW ecx+112, .End +- iMTX_MULT 7, iTab2, Walken_Idct_Rounders + 16*7, 11 ++ iMTX_MULT 7, ebx + iTab2 wrt ..gotoff, ebx + 16*7 + Walken_Idct_Rounders wrt ..gotoff, 11 + .End + + iLLM_PASS ecx +- ++ pop ebx + ret + .endfunc + +@@ -507,15 +510,15 @@ idct_sse2_skal: + paddsw xmm2, xmm1 ; xmm2: t6+t5 + movdqa [%1+0*16], xmm5 ; => out0 + +- movdqa xmm4, [tan2] ; xmm4 <= tan2 ++ movdqa xmm4, [ebx + tan2 wrt ..gotoff] ; xmm4 <= tan2 + pmulhw xmm4, xmm7 ; tm03*tan2 +- movdqa xmm5, [tan2] ; xmm5 <= tan2 ++ movdqa xmm5, [ebx + tan2 wrt ..gotoff] ; xmm5 <= tan2 + psubsw xmm4, xmm6 ; out6 = tm03*tan2 - tm12 + pmulhw xmm5, xmm6 ; tm12*tan2 + paddsw xmm5, xmm7 ; out2 = tm12*tan2 + tm03 + +- movdqa xmm6, [sqrt2] +- movdqa xmm7, [Rounder1] ++ movdqa xmm6, [ebx + sqrt2 wrt ..gotoff] ++ movdqa xmm7, [ebx + Rounder1 wrt ..gotoff] + + pmulhw xmm2, xmm6 ; xmm2: tp65 = (t6 + t5)*cos4 + por xmm5, xmm7 ; correct out2 +@@ -533,8 +536,8 @@ idct_sse2_skal: <<Diff was trimmed, longer than 597 lines>> _______________________________________________ pld-cvs-commit mailing list [email protected] http://lists.pld-linux.org/mailman/listinfo/pld-cvs-commit
