summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'media-sound/lame/files/lame-3.98-pic-fix.patch')
-rw-r--r--media-sound/lame/files/lame-3.98-pic-fix.patch585
1 files changed, 585 insertions, 0 deletions
diff --git a/media-sound/lame/files/lame-3.98-pic-fix.patch b/media-sound/lame/files/lame-3.98-pic-fix.patch
new file mode 100644
index 000000000..8874611cf
--- /dev/null
+++ b/media-sound/lame/files/lame-3.98-pic-fix.patch
@@ -0,0 +1,585 @@
+diff -urp lame-398-orig/libmp3lame/i386/choose_table.nas lame-398/libmp3lame/i386/choose_table.nas
+--- lame-398-orig/libmp3lame/i386/choose_table.nas 2008-07-16 21:47:19.000000000 +0200
++++ lame-398/libmp3lame/i386/choose_table.nas 2008-07-16 21:47:30.000000000 +0200
+@@ -111,33 +111,42 @@ choose_table_H
+ dw 0x1d16, 0x1e16, 0x1e17, 0x1f17, 0x1f17
+
+ choose_jump_table_L:
+- dd table_MMX.L_case_0
+- dd table_MMX.L_case_1
+- dd table_MMX.L_case_2
+- dd table_MMX.L_case_3
+- dd table_MMX.L_case_45
+- dd table_MMX.L_case_45
+- dd table_MMX.L_case_67
+- dd table_MMX.L_case_67
+- dd table_MMX.L_case_8_15
+- dd table_MMX.L_case_8_15
+- dd table_MMX.L_case_8_15
+- dd table_MMX.L_case_8_15
+- dd table_MMX.L_case_8_15
+- dd table_MMX.L_case_8_15
+- dd table_MMX.L_case_8_15
+- dd table_MMX.L_case_8_15
++ dd table_MMX.L_case_0 - choose_table_MMX
++ dd table_MMX.L_case_1 - choose_table_MMX
++ dd table_MMX.L_case_2 - choose_table_MMX
++ dd table_MMX.L_case_3 - choose_table_MMX
++ dd table_MMX.L_case_45 - choose_table_MMX
++ dd table_MMX.L_case_45 - choose_table_MMX
++ dd table_MMX.L_case_67 - choose_table_MMX
++ dd table_MMX.L_case_67 - choose_table_MMX
++ dd table_MMX.L_case_8_15 - choose_table_MMX
++ dd table_MMX.L_case_8_15 - choose_table_MMX
++ dd table_MMX.L_case_8_15 - choose_table_MMX
++ dd table_MMX.L_case_8_15 - choose_table_MMX
++ dd table_MMX.L_case_8_15 - choose_table_MMX
++ dd table_MMX.L_case_8_15 - choose_table_MMX
++ dd table_MMX.L_case_8_15 - choose_table_MMX
++ dd table_MMX.L_case_8_15 - choose_table_MMX
+
+ segment_code
+ ;
+ ; use MMX
+ ;
+
++extern _GLOBAL_OFFSET_TABLE_
++get_pc.bp:
++ mov ebp, [esp]
++ retn
++
+ align 16
+ ; int choose_table(int *ix, int *end, int *s)
+ choose_table_MMX:
+- mov ecx,[esp+4] ;ecx = begin
+- mov edx,[esp+8] ;edx = end
++ push ebp
++ call get_pc.bp
++ add ebp, _GLOBAL_OFFSET_TABLE_ + $$ - $ wrt ..gotpc
++
++ mov ecx,[esp+8] ;ecx = begin
++ mov edx,[esp+12] ;edx = end
+ sub ecx,edx ;ecx = begin-end(should be minus)
+ test ecx,8
+ pxor mm0,mm0 ;mm0=[0:0]
+@@ -169,13 +178,16 @@ choose_table_MMX:
+
+ cmp eax,15
+ ja .with_ESC
+- jmp [choose_jump_table_L+eax*4]
++ lea ecx,[ebp + choose_table_MMX wrt ..gotoff]
++ add ecx,[ebp + choose_jump_table_L+eax*4 wrt ..gotoff]
++ jmp ecx
+
+ .with_ESC1:
+ emms
+- mov ecx, [esp+12] ; *s
++ mov ecx, [esp+16] ; *s
+ mov [ecx], eax
+ or eax,-1
++ pop ebp
+ ret
+
+ .with_ESC:
+@@ -187,12 +199,12 @@ choose_table_MMX:
+ push esi
+ bsr eax, eax
+ %assign _P 4*2
+- movq mm5, [D15_15_15_15]
+- movq mm6, [D14_14_14_14]
+- movq mm3, [mul_add]
++ movq mm5, [ebp + D15_15_15_15 wrt ..gotoff]
++ movq mm6, [ebp + D14_14_14_14 wrt ..gotoff]
++ movq mm3, [ebp + mul_add wrt ..gotoff]
+
+- mov ecx, [esp+_P+4] ; = ix
+-; mov edx, [esp+_P+8] ; = end
++ mov ecx, [esp+_P+8] ; = ix
++; mov edx, [esp+_P+12] ; = end
+ sub ecx, edx
+
+ xor esi, esi ; sum = 0
+@@ -209,7 +221,7 @@ choose_table_MMX:
+ psubw mm7, mm2 ; 14$B$h$jBg$-$$$H$-(B linbits_sum++;
+ pmaddwd mm0, mm3 ; {0, 0, y, x}*{1, 16, 1, 16}
+ movd ebx, mm0
+- mov esi, [largetbl+ebx*4+(16*16+16)*4]
++ mov esi, [ebp + largetbl+ebx*4+(16*16+16)*4 wrt ..gotoff]
+
+ jz .H_dual_exit
+
+@@ -224,9 +236,9 @@ choose_table_MMX:
+ pmaddwd mm0, mm3 ; {y, x, y, x}*{1, 16, 1, 16}
+ movd ebx, mm0
+ punpckhdq mm0,mm0
+- add esi, [largetbl+ebx*4+(16*16+16)*4]
++ add esi, [ebp + largetbl+ebx*4+(16*16+16)*4 wrt ..gotoff]
+ movd ebx, mm0
+- add esi, [largetbl+ebx*4+(16*16+16)*4]
++ add esi, [ebp + largetbl+ebx*4+(16*16+16)*4 wrt ..gotoff]
+ add ecx, 16
+ psubw mm7, mm2 ; 14$B$h$jBg$-$$$H$-(B linbits_sum++;
+ jnz .H_dual_lp1
+@@ -237,8 +249,8 @@ choose_table_MMX:
+ paddd mm7,mm1
+ punpckldq mm7,mm7
+
+- pmaddwd mm7, [linbits32+eax*8] ; linbits
+- mov ax, [choose_table_H+eax*2]
++ pmaddwd mm7, [ebp + linbits32+eax*8 wrt ..gotoff] ; linbits
++ mov ax, [ebp + choose_table_H+eax*2 wrt ..gotoff]
+
+ movd ecx, mm7
+ punpckhdq mm7,mm7
+@@ -261,54 +273,57 @@ choose_table_MMX:
+ mov edx, ecx
+ shr eax, 8
+ .chooseE_s1:
+- mov ecx, [esp+12] ; *s
++ mov ecx, [esp+16] ; *s
+ and eax, 0xff
+ add [ecx], edx
++ pop ebp
+ ret
+
+ table_MMX.L_case_0:
+ emms
++ pop ebp
+ ret
+
+ table_MMX.L_case_1:
+ emms
+- mov eax, [esp+12] ; *s
+- mov ecx, [esp+4] ; *ix
++ mov eax, [esp+16] ; *s
++ mov ecx, [esp+8] ; *ix
+ sub ecx, edx
+ push ebx
+ .lp:
+ mov ebx, [edx+ecx]
+ add ebx, ebx
+ add ebx, [edx+ecx+4]
+- movzx ebx, byte [ebx+t1l]
++ movzx ebx, byte [ebp + ebx+t1l wrt ..gotoff]
+ add [eax], ebx
+ add ecx, 8
+ jnz .lp
+ pop ebx
+ mov eax, 1
++ pop ebp
+ ret
+
+ table_MMX.L_case_45:
+ push dword 7
+- mov ecx, tableABC+9*8
++ lea ecx, [ebp + tableABC+9*8 wrt ..gotoff]
+ jmp from3
+
+ table_MMX.L_case_67:
+ push dword 10
+- mov ecx, tableABC
++ lea ecx, [ebp + tableABC wrt ..gotoff]
+ jmp from3
+
+ table_MMX.L_case_8_15:
+ push dword 13
+- mov ecx, tableDEF
++ lea ecx, [ebp + tableDEF wrt ..gotoff]
+ from3:
+- mov eax,[esp+8] ;eax = *begin
+-; mov edx,[esp+12] ;edx = *end
++ mov eax,[esp+12] ;eax = *begin
++; mov edx,[esp+16] ;edx = *end
+
+ push ebx
+ sub eax, edx
+
+- movq mm5,[mul_add]
++ movq mm5,[ebp + mul_add wrt ..gotoff]
+ pxor mm2,mm2 ;mm2 = sum
+
+ test eax, 8
+@@ -361,22 +376,23 @@ from3:
+ .choose3_s2:
+ pop ecx
+ add eax, ecx
+- mov ecx, [esp+12] ; *s
++ mov ecx, [esp+16] ; *s
+ add [ecx], edx
++ pop ebp
+ ret
+
+ table_MMX.L_case_2:
+ push dword 2
+- mov ecx,table23
+- pmov mm5,[mul_add23]
++ lea ecx,[ebp + table23 wrt ..gotoff]
++ pmov mm5,[ebp + mul_add23 wrt ..gotoff]
+ jmp from2
+ table_MMX.L_case_3:
+ push dword 5
+- mov ecx,table56
+- pmov mm5,[mul_add56]
++ lea ecx,[ebp + table56 wrt ..gotoff]
++ pmov mm5,[ebp + mul_add56 wrt ..gotoff]
+ from2:
+- mov eax,[esp+8] ;eax = *begin
+-; mov edx,[esp+12] ;edx = *end
++ mov eax,[esp+12] ;eax = *begin
++; mov edx,[esp+16] ;edx = *end
+ push ebx
+ push edi
+
+@@ -426,8 +442,9 @@ from2:
+ mov edx, ecx
+ inc eax
+ .choose2_s1:
+- mov ecx, [esp+12] ; *s
++ mov ecx, [esp+16] ; *s
+ add [ecx], edx
++ pop ebp
+ ret
+
+ end
+diff -urp lame-398-orig/libmp3lame/i386/fft3dn.nas lame-398/libmp3lame/i386/fft3dn.nas
+--- lame-398-orig/libmp3lame/i386/fft3dn.nas 2008-07-16 21:47:19.000000000 +0200
++++ lame-398/libmp3lame/i386/fft3dn.nas 2008-07-16 21:47:30.000000000 +0200
+@@ -24,26 +24,35 @@ D_1_0_0_0 dd 0.0 , 1.0
+
+ segment_code
+
++extern _GLOBAL_OFFSET_TABLE_
++get_pc.bp:
++ mov ebp, [esp]
++ retn
++
+ ;void fht_3DN(float *fz, int nn);
+
+ proc fht_3DN
+
+ pushd ebp, ebx, esi, edi
+
+- mov r0, [esp+20] ;fi
+- mov r1, [esp+24] ;r1 = nn
+- sub esp, 16
++ sub esp, 20
++
++ call get_pc.bp
++ add ebp, _GLOBAL_OFFSET_TABLE_ + $$ - $ wrt ..gotpc
+
++ mov r0, [esp+40] ;fi
++ mov r1, [esp+44] ;r1 = nn
++ lea r3, [ebp + costab wrt ..gotoff] ;tri = costab
++ lea r4, [r0+r1*8] ;r4 = fn = &fz[n]
++ mov [esp+16], r4
+ mov r4, 8 ;kx = k1/2
+- mov r3, costab ;tri = costab
+- lea r6, [r0+r1*8] ;r6 = fn = &fz[n]
+
+ pmov mm7, [r3]
+
+ loopalign 16
+ .do1
+ lea r3, [r3+16] ;tri += 2;
+- pmov mm6, [costab+8]
++ pmov mm6, [ebp + costab+8 wrt ..gotoff]
+ lea r2, [r4+r4*2] ;k3*fsize/2
+ mov r5, 4 ;i = 1*fsize
+
+@@ -104,7 +113,7 @@ proc fht_3DN
+ pmovd [r1+r4*4], mm4 ;gi[k2]
+ puphdq mm4, mm4
+
+- cmp r0, r6
++ cmp r0, [esp + 16]
+ pmovd [r1+r4*2], mm0 ;gi[k1]
+ pmovd [r1+r2*2], mm4 ;gi[k3]
+
+@@ -119,12 +128,12 @@ proc fht_3DN
+ ; mm7 = 0x800000000 | 0
+ ;
+ pmov mm1, mm6
+- mov r0, [esp+36] ; fz
++ mov r0, [esp+40] ; fz
+ puphdq mm1, mm1 ; c1 | c1
+ lea r1, [r0+r4*2]
+ pfadd mm1, mm1 ; c1+c1 | c1+c1
+ pfmul mm1, mm6 ; 2*c1*c1 | 2*c1*s1
+- pfsub mm1, [D_1_0_0_0] ; 2*c1*c1-1.0 | 2*c1*s1 = -c2 | s2
++ pfsub mm1, [ebp + D_1_0_0_0 wrt ..gotoff] ; 2*c1*c1-1.0 | 2*c1*s1 = -c2 | s2
+
+ pmov mm0, mm1
+ pxor mm7, mm6 ; c1 | -s1
+@@ -134,7 +143,7 @@ proc fht_3DN
+ puphdq mm0, mm2 ; s2 | c2
+ puphdq mm6, mm3 ;-s1 | c1
+
+- pxor mm0, [costab] ; c2 | -s2
++ pxor mm0, [ebp + costab wrt ..gotoff] ; c2 | -s2
+
+ ; mm0 = s2| c2
+ ; mm1 = -c2| s2
+@@ -233,7 +242,7 @@ proc fht_3DN
+
+ lea r0, [r0+r4*8]
+ lea r1, [r1+r4*8]
+- cmp r0, r6
++ cmp r0, [esp + 16]
+ pmov mm0, [esp]
+ pmov mm1, [esp+8]
+
+@@ -249,17 +258,17 @@ proc fht_3DN
+ pfsub mm6, mm7 ; c1*a-s1*b | s1*a+c1*b
+ pupldq mm7,mm6
+ puphdq mm6,mm7
+- pmov mm7, [costab]
++ pmov mm7, [ebp + costab wrt ..gotoff]
+ jb near .for
+
+- mov r0, [esp+36] ;fi
+- cmp r4, [esp+36+4]
++ mov r0, [esp+40] ;fi
++ cmp r4, [esp+40+4]
+ lea r4, [r4*4] ;kx *= 4
+
+ jb near .do1
+ .exitttt
+ femms
+- add esp,16
++ add esp,20
+ popd ebp, ebx, esi, edi
+ endproc
+
+@@ -270,20 +279,24 @@ proc fht_E3DN
+
+ pushd ebp, ebx, esi, edi
+
+- mov r0, [esp+20] ;fi
+- mov r1, [esp+24] ;r1 = nn
+- sub esp, 16
++ sub esp, 20
++
++ call get_pc.bp
++ add ebp, _GLOBAL_OFFSET_TABLE_ + $$ - $ wrt ..gotpc
+
++ mov r0, [esp+40] ;fi
++ mov r1, [esp+44] ;r1 = nn
++ lea r3, [ebp + costab wrt ..gotoff] ;tri = costab
++ lea r4, [r0+r1*8] ;r4 = fn = &fz[n]
++ mov [esp+16], r4
+ mov r4, 8 ;kx = k1/2
+- mov r3, costab ;tri = costab
+- lea r6, [r0+r1*8] ;r6 = fn = &fz[n]
+
+ pmov mm7, [r3]
+
+ loopalign 16
+ .do1
+ lea r3, [r3+16] ;tri += 2;
+- pmov mm6, [costab+8]
++ pmov mm6, [ebp + costab+8 wrt ..gotoff]
+ lea r2, [r4+r4*2] ;k3*fsize/2
+ mov r5, 4 ;i = 1*fsize
+
+@@ -324,7 +337,7 @@ proc fht_E3DN
+ pfadd mm3, mm4 ;f0+f2|f1+f3
+ pfsub mm5, mm4 ;f0-f2|f1-f3
+
+- cmp r0, r6
++ cmp r0, [esp + 16]
+ pmovd [r1+r4*2], mm3 ;gi[k1]
+ pmovd [r1+r2*2], mm5 ;gi[k3]
+ puphdq mm3, mm3
+@@ -343,12 +356,12 @@ proc fht_E3DN
+ ; mm7 = 0x800000000 | 0
+ ;
+ pmov mm5, mm6
+- mov r0, [esp+36] ; fz
++ mov r0, [esp+40] ; fz
+ puphdq mm5, mm5 ; c1 | c1
+ lea r1, [r0+r4*2]
+ pfadd mm5, mm5 ; c1+c1 | c1+c1
+ pfmul mm5, mm6 ; 2*c1*c1 | 2*c1*s1
+- pfsub mm5, [D_1_0_0_0] ; 2*c1*c1-1.0 | 2*c1*s1 = -c2 | s2
++ pfsub mm5, [ebp + D_1_0_0_0 wrt ..gotoff] ; 2*c1*c1-1.0 | 2*c1*s1 = -c2 | s2
+
+ pswapd mm4, mm5 ; s2 |-c2
+ pxor mm4, mm7 ; s2 | c2
+@@ -447,7 +460,7 @@ proc fht_E3DN
+
+ lea r0, [r0+r4*8]
+ lea r1, [r1+r4*8]
+- cmp r0, r6
++ cmp r0, [esp + 16]
+ pmov mm4, [esp]
+ pmov mm5, [esp+8]
+
+@@ -462,16 +475,16 @@ proc fht_E3DN
+
+ pfsub mm6, mm7 ; c1*a-s1*b | s1*a+c1*b
+ pswapd mm6, mm6 ; ??? ; s1*a+c1*b | c1*a-s1*b
+- pmov mm7, [costab]
++ pmov mm7, [ebp + costab wrt ..gotoff]
+ jb near .for
+
+- mov r0, [esp+36] ;fi
+- cmp r4, [esp+36+4]
++ mov r0, [esp+40] ;fi
++ cmp r4, [esp+40+4]
+ lea r4, [r4*4] ;kx *= 4
+
+ jb near .do1
+ .exitttt
+ femms
+- add esp,16
++ add esp,20
+ popd ebp, ebx, esi, edi
+ endproc
+diff -urp lame-398-orig/libmp3lame/i386/fftsse.nas lame-398/libmp3lame/i386/fftsse.nas
+--- lame-398-orig/libmp3lame/i386/fftsse.nas 2008-07-16 21:47:19.000000000 +0200
++++ lame-398/libmp3lame/i386/fftsse.nas 2008-07-16 21:48:10.000000000 +0200
+@@ -25,6 +25,12 @@ costab_fft:
+ S_SQRT2 dd 1.414213562
+
+ segment_code
++
++extern _GLOBAL_OFFSET_TABLE_
++get_pc.bp:
++ mov ebp, [esp]
++ retn
++
+ ;------------------------------------------------------------------------
+ ; by K. SAKAI
+ ; 99/08/18 PIII 23k[clk]
+@@ -40,15 +46,20 @@ fht_SSE:
+ push esi
+ push edi
+ push ebp
+-%assign _P 4*4
++
++%assign _P 4*5
+
+ ;2つ目のループ
+- mov eax,[esp+_P+4] ;eax=fz
+- mov ebp,[esp+_P+8] ;=n
++ mov eax,[esp+_P+0] ;eax=fz
++ mov ebp,[esp+_P+4] ;=n
+ shl ebp,3
+ add ebp,eax ; fn = fz + n, この関数終了まで不変
++ push ebp
++
++ call get_pc.bp
++ add ebp, _GLOBAL_OFFSET_TABLE_ + $$ - $ wrt ..gotpc
+
+- lea ecx,[costab_fft]
++ lea ecx,[ebp + costab_fft wrt ..gotoff]
+ xor eax,eax
+ mov al,8 ; =k1=1*(sizeof float) // 4, 16, 64, 256,...
+ .lp2: ; do{
+@@ -101,12 +112,12 @@ fht_SSE:
+ ; gi[k3] = g1 - g3;
+ fld dword [edi]
+ fadd dword [edi+eax*2]
+- fld dword [S_SQRT2]
++ fld dword [ebp + S_SQRT2 wrt ..gotoff]
+ fmul dword [edi+eax*4]
+
+ fld dword [edi]
+ fsub dword [edi+eax*2]
+- fld dword [S_SQRT2]
++ fld dword [ebp + S_SQRT2 wrt ..gotoff]
+ fmul dword [edi+edx*2]
+
+ fld st1
+@@ -121,7 +132,7 @@ fht_SSE:
+ fsubp st1,st0
+ fstp dword [edi+eax*4]
+
+- cmp ebx,ebp
++ cmp ebx,[esp]
+ jl near .lp20 ; while (fi<fn);
+
+
+@@ -136,17 +147,17 @@ fht_SSE:
+ ; s2 = c1*s1 + s1*c1 = 2*s1*c1;
+ shufps xmm7,xmm7,R4(1,0,0,1)
+ movss xmm5,xmm7 ; = { --, --, --, s1}
+- xorps xmm7,[Q_MMPP] ; = {-s1, -c1, +c1, +s1} -> 必要
++ xorps xmm7,[ebp + Q_MMPP wrt ..gotoff] ; = {-s1, -c1, +c1, +s1} -> 必要
+
+ addss xmm5,xmm5 ; = (--, --, --, 2*s1)
+ add esi,4 ; esi = fi = fz + i
+ shufps xmm5,xmm5,R4(0,0,0,0) ; = (2*s1, 2*s1, 2*s1, 2*s1)
+ mulps xmm5,xmm6 ; = (2*s1*c1, 2*s1*s1, 2*s1*s1, 2*s1*c1)
+- subps xmm5,[D_1100] ; = (--, 2*s1*s1-1, --, 2*s1*c1) = {-- -c2 -- s2}
++ subps xmm5,[ebp + D_1100 wrt ..gotoff] ; = (--, 2*s1*s1-1, --, 2*s1*c1) = {-- -c2 -- s2}
+ movaps xmm4,xmm5
+ shufps xmm5,xmm5,R4(2,0,2,0) ; = {-c2, s2, -c2, s2} -> 必要
+
+- xorps xmm4,[Q_MMPP] ; = {--, c2, --, s2}
++ xorps xmm4,[ebp + Q_MMPP wrt ..gotoff] ; = {--, c2, --, s2}
+ shufps xmm4,xmm4,R4(0,2,0,2) ; = {s2, c2, s2, c2} -> 必要
+
+ loopalign 16
+@@ -222,7 +233,7 @@ fht_SSE:
+ movss [edi+eax*4],xmm2
+ movss [esi+edx*2],xmm0
+ lea esi,[esi + eax*8] ; fi += (k1 * 4);
+- cmp esi,ebp
++ cmp esi,[esp]
+ jl near .lp21 ; while (fi<fn);
+
+
+@@ -247,7 +258,7 @@ fht_SSE:
+ shufps xmm0,xmm0,R4(1,1,0,0) ; = {t_s, t_s, t_c, t_c}
+ mulps xmm6,xmm0 ; = {c3*ts, s3*ts, s3*tc, c3*tc}
+ movhlps xmm4,xmm6 ; = {--, --, c3*ts, s3*ts}
+- xorps xmm4,[Q_MPMP] ; = {--, --, -c3*ts, s3*ts}
++ xorps xmm4,[ebp + Q_MPMP wrt ..gotoff] ; = {--, --, -c3*ts, s3*ts}
+ subps xmm6,xmm4 ; = {-,-, c3*ts+s3*tc, c3*tc-s3*ts}={-,-,s1,c1}
+
+ ; c3 = c1*t_c - s1*t_s;
+@@ -255,7 +266,7 @@ fht_SSE:
+ shufps xmm6,xmm6,0x14 ; = {c1, s1, s1, c1}
+ mulps xmm0,xmm6 ; = {ts*c1 ts*s1 tc*s1 tc*c1}
+ movhlps xmm3,xmm0
+- xorps xmm3,[Q_MPMP]
++ xorps xmm3,[ebp + Q_MPMP wrt ..gotoff]
+ subps xmm0,xmm3 ; = {--, --, s3, c3}
+
+ ; {s2 s4 c4 c2} = {2*s1*c1 2*s3*c3 1-2*s3*s3 1-2*s1*s1}
+@@ -268,7 +279,7 @@ fht_SSE:
+ sub edi,ebx ; edi = fz - i/2
+ mulps xmm7, xmm6 ; {s1*s1*2, s3*s3*2, s3*c3*2, s1*c1*2}
+ lea esi,[edi + ebx*2] ; esi = fi = fz +i/2
+- subps xmm7, [D_1100] ; {-c2, -c4, s4, s2}
++ subps xmm7, [ebp + D_1100 wrt ..gotoff] ; {-c2, -c4, s4, s2}
+ lea edi,[edi + eax*2-4] ; edi = gi = fz +k1-i/2
+
+ ; fi = fz +i;
+@@ -286,7 +297,7 @@ fht_SSE:
+ ; d = s2*fi[k3 ] - c2*gi[k3 ];
+
+ movaps xmm4,xmm7 ; = {-c2 -c4 s4 s2}
+- xorps xmm4,[Q_MMPP] ; = { c2 c4 s4 s2}
++ xorps xmm4,[ebp + Q_MMPP wrt ..gotoff] ; = { c2 c4 s4 s2}
+ shufps xmm4,xmm4,0x1B ; = { s2 s4 c4 c2}
+ movlps xmm0,[esi+eax*2]
+ movlps xmm1,[edi+eax*2]
+@@ -390,7 +401,7 @@ fht_SSE:
+ ; fi += k4;
+ lea edi,[edi + eax*8] ; gi += (k1 * 4);
+ lea esi,[esi + eax*8] ; fi += (k1 * 4);
+- cmp esi,ebp
++ cmp esi,[esp]
+ jl near .lp220 ; while (fi<fn);
+ ; } while (fi<fn);
+
+@@ -405,6 +416,7 @@ fht_SSE:
+ cmp eax,[esp+_P+8] ; while ((k1 * 4)<n);
+ jle near .lp2
+ pop ebp
++ pop ebp
+ pop edi
+ pop esi
+ pop ebx