1 files changed, 585 insertions, 0 deletions
diff --git a/media-sound/lame/files/lame-3.98-pic-fix.patch b/media-sound/lame/files/lame-3.98-pic-fix.patch
new file mode 100644
index 000000000..8874611cf
--- /dev/null
+++ b/media-sound/lame/files/lame-3.98-pic-fix.patch
@@ -0,0 +1,585 @@
+diff -urp lame-398-orig/libmp3lame/i386/choose_table.nas lame-398/libmp3lame/i386/choose_table.nas
+--- lame-398-orig/libmp3lame/i386/choose_table.nas	2008-07-16 21:47:19.000000000 +0200
++++ lame-398/libmp3lame/i386/choose_table.nas	2008-07-16 21:47:30.000000000 +0200
+@@ -111,33 +111,42 @@ choose_table_H
+ 	dw	0x1d16, 0x1e16, 0x1e17, 0x1f17, 0x1f17
+ 
+ choose_jump_table_L:
+-	dd	table_MMX.L_case_0
+-	dd	table_MMX.L_case_1
+-	dd	table_MMX.L_case_2
+-	dd	table_MMX.L_case_3
+-	dd	table_MMX.L_case_45
+-	dd	table_MMX.L_case_45
+-	dd	table_MMX.L_case_67
+-	dd	table_MMX.L_case_67
+-	dd	table_MMX.L_case_8_15
+-	dd	table_MMX.L_case_8_15
+-	dd	table_MMX.L_case_8_15
+-	dd	table_MMX.L_case_8_15
+-	dd	table_MMX.L_case_8_15
+-	dd	table_MMX.L_case_8_15
+-	dd	table_MMX.L_case_8_15
+-	dd	table_MMX.L_case_8_15
++	dd	table_MMX.L_case_0    - choose_table_MMX
++	dd	table_MMX.L_case_1    - choose_table_MMX
++	dd	table_MMX.L_case_2    - choose_table_MMX
++	dd	table_MMX.L_case_3    - choose_table_MMX
++	dd	table_MMX.L_case_45   - choose_table_MMX
++	dd	table_MMX.L_case_45   - choose_table_MMX
++	dd	table_MMX.L_case_67   - choose_table_MMX
++	dd	table_MMX.L_case_67   - choose_table_MMX
++	dd	table_MMX.L_case_8_15 - choose_table_MMX
++	dd	table_MMX.L_case_8_15 - choose_table_MMX
++	dd	table_MMX.L_case_8_15 - choose_table_MMX
++	dd	table_MMX.L_case_8_15 - choose_table_MMX
++	dd	table_MMX.L_case_8_15 - choose_table_MMX
++	dd	table_MMX.L_case_8_15 - choose_table_MMX
++	dd	table_MMX.L_case_8_15 - choose_table_MMX
++	dd	table_MMX.L_case_8_15 - choose_table_MMX
+ 
+ 	segment_code
+ ;
+ ; use MMX
+ ;
+ 
++extern  _GLOBAL_OFFSET_TABLE_
++get_pc.bp:
++	mov ebp, [esp]
++	retn
++
+ 	align	16
+ ; int choose_table(int *ix, int *end, int *s)
+ choose_table_MMX:
+-	mov	ecx,[esp+4]	;ecx = begin
+-	mov	edx,[esp+8]	;edx = end
++	push	ebp
++	call	get_pc.bp
++	add	ebp, _GLOBAL_OFFSET_TABLE_ + $$ - $ wrt ..gotpc
++
++	mov	ecx,[esp+8]	;ecx = begin
++	mov	edx,[esp+12]	;edx = end
+ 	sub	ecx,edx		;ecx = begin-end(should be minus)
+ 	test	ecx,8
+  	pxor	mm0,mm0		;mm0=[0:0]
+@@ -169,13 +178,16 @@ choose_table_MMX:
+ 
+ 	cmp	eax,15
+ 	ja	.with_ESC
+-	jmp	[choose_jump_table_L+eax*4]
++	lea	ecx,[ebp + choose_table_MMX wrt ..gotoff]
++	add	ecx,[ebp + choose_jump_table_L+eax*4 wrt ..gotoff]
++	jmp 	ecx
+ 
+ .with_ESC1:
+ 	emms
+-	mov	ecx, [esp+12]	; *s
++	mov	ecx, [esp+16]	; *s
+ 	mov	[ecx], eax
+ 	or	eax,-1
++	pop	ebp
+ 	ret
+ 
+ .with_ESC:
+@@ -187,12 +199,12 @@ choose_table_MMX:
+ 	push	esi
+ 	bsr	eax, eax
+ %assign _P 4*2
+-	movq    mm5, [D15_15_15_15]
+-	movq	mm6, [D14_14_14_14]
+-	movq	mm3, [mul_add]
++	movq    mm5, [ebp + D15_15_15_15 wrt ..gotoff]
++	movq	mm6, [ebp + D14_14_14_14 wrt ..gotoff]
++	movq	mm3, [ebp + mul_add wrt ..gotoff]
+ 
+-	mov	ecx, [esp+_P+4]		; = ix
+-;	mov	edx, [esp+_P+8]		; = end
++	mov	ecx, [esp+_P+8]		; = ix
++;	mov	edx, [esp+_P+12]	; = end
+ 	sub	ecx, edx
+ 
+ 	xor	esi, esi	; sum = 0
+@@ -209,7 +221,7 @@ choose_table_MMX:
+ 	psubw	mm7, mm2	; 14より大きいとき linbits_sum++;
+ 	pmaddwd	mm0, mm3	; {0, 0, y, x}*{1, 16, 1, 16}
+ 	movd	ebx, mm0
+-	mov	esi, [largetbl+ebx*4+(16*16+16)*4]
++	mov	esi, [ebp + largetbl+ebx*4+(16*16+16)*4 wrt ..gotoff]
+ 
+ 	jz	.H_dual_exit
+ 
+@@ -224,9 +236,9 @@ choose_table_MMX:
+ 	pmaddwd	mm0, mm3	; {y, x, y, x}*{1, 16, 1, 16}
+ 	movd	ebx, mm0
+ 	punpckhdq	mm0,mm0
+-	add	esi, [largetbl+ebx*4+(16*16+16)*4]
++	add	esi, [ebp + largetbl+ebx*4+(16*16+16)*4 wrt ..gotoff]
+ 	movd	ebx, mm0
+-	add	esi, [largetbl+ebx*4+(16*16+16)*4]
++	add	esi, [ebp + largetbl+ebx*4+(16*16+16)*4 wrt ..gotoff]
+ 	add	ecx, 16
+ 	psubw	mm7, mm2	; 14より大きいとき linbits_sum++;
+ 	jnz	.H_dual_lp1
+@@ -237,8 +249,8 @@ choose_table_MMX:
+ 	paddd	mm7,mm1
+ 	punpckldq	mm7,mm7
+ 
+-	pmaddwd	mm7, [linbits32+eax*8]	; linbits
+-	mov	ax, [choose_table_H+eax*2]
++	pmaddwd	mm7, [ebp + linbits32+eax*8 wrt ..gotoff]	; linbits
++	mov	ax, [ebp + choose_table_H+eax*2 wrt ..gotoff]
+ 
+ 	movd	ecx, mm7
+ 	punpckhdq	mm7,mm7
+@@ -261,54 +273,57 @@ choose_table_MMX:
+ 	mov	edx, ecx
+ 	shr	eax, 8
+ .chooseE_s1:
+-	mov	ecx, [esp+12] ; *s
++	mov	ecx, [esp+16] ; *s
+ 	and	eax, 0xff
+ 	add	[ecx], edx
++	pop	ebp
+ 	ret
+ 
+ table_MMX.L_case_0:
+ 	emms
++	pop	ebp
+ 	ret
+ 
+ table_MMX.L_case_1:
+ 	emms
+-	mov	eax, [esp+12] ; *s
+-	mov	ecx, [esp+4] ; *ix
++	mov	eax, [esp+16] ; *s
++	mov	ecx, [esp+8] ; *ix
+ 	sub	ecx, edx
+ 	push	ebx
+ .lp:
+ 	mov	ebx, [edx+ecx]
+ 	add	ebx, ebx
+ 	add	ebx, [edx+ecx+4]
+-	movzx	ebx, byte [ebx+t1l]
++	movzx	ebx, byte [ebp + ebx+t1l wrt ..gotoff]
+ 	add	[eax], ebx
+ 	add	ecx, 8
+ 	jnz	.lp
+ 	pop	ebx
+ 	mov	eax, 1
++	pop	ebp
+ 	ret
+ 
+ table_MMX.L_case_45:
+ 	push	dword 7
+-	mov	ecx, tableABC+9*8
++	lea	ecx, [ebp + tableABC+9*8 wrt ..gotoff]
+ 	jmp	from3
+ 
+ table_MMX.L_case_67:
+ 	push	dword 10
+-	mov	ecx, tableABC
++	lea	ecx, [ebp + tableABC wrt ..gotoff]
+ 	jmp	from3
+ 
+ table_MMX.L_case_8_15:
+ 	push	dword 13
+-	mov	ecx, tableDEF
++	lea	ecx, [ebp + tableDEF wrt ..gotoff]
+ from3:
+-	mov	eax,[esp+8]	;eax = *begin
+-;	mov	edx,[esp+12]	;edx = *end
++	mov	eax,[esp+12]	;eax = *begin
++;	mov	edx,[esp+16]	;edx = *end
+ 
+ 	push	ebx
+ 	sub	eax, edx
+ 
+-	movq	mm5,[mul_add]
++	movq	mm5,[ebp + mul_add wrt ..gotoff]
+ 	pxor	mm2,mm2	;mm2 = sum
+ 
+ 	test	eax, 8
+@@ -361,22 +376,23 @@ from3:
+ .choose3_s2:
+ 	pop	ecx
+ 	add	eax, ecx
+-	mov	ecx, [esp+12] ; *s
++	mov	ecx, [esp+16] ; *s
+ 	add	[ecx], edx
++	pop	ebp
+ 	ret
+ 
+ table_MMX.L_case_2:
+ 	push	dword 2
+-	mov	ecx,table23
+-	pmov	mm5,[mul_add23]
++	lea	ecx,[ebp + table23 wrt ..gotoff]
++	pmov	mm5,[ebp + mul_add23 wrt ..gotoff]
+ 	jmp	from2
+ table_MMX.L_case_3:
+ 	push	dword 5
+-	mov	ecx,table56
+-	pmov	mm5,[mul_add56]
++	lea	ecx,[ebp + table56 wrt ..gotoff]
++	pmov	mm5,[ebp + mul_add56 wrt ..gotoff]
+ from2:
+-	mov	eax,[esp+8]	;eax = *begin
+-;	mov	edx,[esp+12]	;edx = *end
++	mov	eax,[esp+12]	;eax = *begin
++;	mov	edx,[esp+16]	;edx = *end
+ 	push	ebx
+ 	push	edi
+ 
+@@ -426,8 +442,9 @@ from2:
+ 	mov	edx, ecx
+ 	inc	eax
+ .choose2_s1:
+-	mov	ecx, [esp+12] ; *s
++	mov	ecx, [esp+16] ; *s
+ 	add	[ecx], edx
++	pop	ebp
+ 	ret
+ 
+ 	end
+diff -urp lame-398-orig/libmp3lame/i386/fft3dn.nas lame-398/libmp3lame/i386/fft3dn.nas
+--- lame-398-orig/libmp3lame/i386/fft3dn.nas	2008-07-16 21:47:19.000000000 +0200
++++ lame-398/libmp3lame/i386/fft3dn.nas	2008-07-16 21:47:30.000000000 +0200
+@@ -24,26 +24,35 @@ D_1_0_0_0	dd	0.0		, 1.0
+ 
+ 	segment_code
+ 
++extern  _GLOBAL_OFFSET_TABLE_
++get_pc.bp:
++	mov ebp, [esp]
++	retn
++
+ ;void fht_3DN(float *fz, int nn);
+ 
+ proc	fht_3DN
+ 
+ 	pushd	ebp, ebx, esi, edi
+ 
+-	mov	r0, [esp+20]		;fi
+-	mov	r1, [esp+24]		;r1 = nn
+-	sub	esp, 16
++	sub	esp, 20
++
++	call	get_pc.bp
++	add	ebp, _GLOBAL_OFFSET_TABLE_ + $$ - $ wrt ..gotpc
+ 
++	mov	r0, [esp+40]		;fi
++	mov	r1, [esp+44]		;r1 = nn
++	lea	r3, [ebp + costab wrt ..gotoff]		;tri = costab
++	lea	r4, [r0+r1*8]		;r4 = fn = &fz[n]
++	mov	[esp+16], r4
+ 	mov	r4, 8			;kx = k1/2
+-	mov	r3, costab		;tri = costab
+-	lea	r6, [r0+r1*8]		;r6 = fn = &fz[n]
+ 
+ 	pmov	mm7, [r3]
+ 
+ 	loopalign 16
+ .do1
+ 	lea	r3, [r3+16]	;tri += 2;
+-	pmov	mm6, [costab+8]
++	pmov	mm6, [ebp + costab+8 wrt ..gotoff]
+ 	lea	r2, [r4+r4*2]		;k3*fsize/2
+ 	mov	r5, 4		;i = 1*fsize
+ 
+@@ -104,7 +113,7 @@ proc	fht_3DN
+ 	pmovd	[r1+r4*4], mm4	;gi[k2]
+ 	puphdq	mm4, mm4
+ 
+-	cmp	r0, r6
++	cmp	r0, [esp + 16]
+ 	pmovd	[r1+r4*2], mm0	;gi[k1]
+ 	pmovd	[r1+r2*2], mm4	;gi[k3]
+ 
+@@ -119,12 +128,12 @@ proc	fht_3DN
+ ; mm7 = 0x800000000 | 0
+ ;
+ 	pmov	mm1, mm6
+-	mov	r0, [esp+36]	; fz
++	mov	r0, [esp+40]	; fz
+ 	puphdq	mm1, mm1	; c1 | c1
+ 	lea	r1, [r0+r4*2]
+ 	pfadd	mm1, mm1	; c1+c1 | c1+c1
+ 	pfmul	mm1, mm6	; 2*c1*c1 | 2*c1*s1
+-	pfsub	mm1, [D_1_0_0_0] ; 2*c1*c1-1.0 | 2*c1*s1 = -c2 | s2
++	pfsub	mm1, [ebp + D_1_0_0_0 wrt ..gotoff] ; 2*c1*c1-1.0 | 2*c1*s1 = -c2 | s2
+ 
+ 	pmov	mm0, mm1
+ 	pxor	mm7, mm6	; c1 | -s1
+@@ -134,7 +143,7 @@ proc	fht_3DN
+ 	puphdq	mm0, mm2	; s2 | c2
+ 	puphdq	mm6, mm3	;-s1 | c1
+ 
+-	pxor	mm0, [costab]	; c2 | -s2
++	pxor	mm0, [ebp + costab wrt ..gotoff]	; c2 | -s2
+ 
+ ; mm0 =  s2| c2
+ ; mm1 = -c2| s2
+@@ -233,7 +242,7 @@ proc	fht_3DN
+ 
+ 	lea	r0, [r0+r4*8]
+ 	lea	r1, [r1+r4*8]
+-	cmp	r0, r6
++	cmp	r0, [esp + 16]
+ 	pmov	mm0, [esp]
+ 	pmov	mm1, [esp+8]
+ 
+@@ -249,17 +258,17 @@ proc	fht_3DN
+ 	pfsub	mm6, mm7	; c1*a-s1*b | s1*a+c1*b
+ 	pupldq	mm7,mm6
+ 	puphdq	mm6,mm7
+-	pmov	mm7, [costab]
++	pmov	mm7, [ebp + costab wrt ..gotoff]
+ 	jb near	.for
+ 
+-	mov	r0, [esp+36]	;fi
+-	cmp	r4, [esp+36+4]
++	mov	r0, [esp+40]	;fi
++	cmp	r4, [esp+40+4]
+ 	lea	r4, [r4*4]	;kx *= 4
+ 
+ 	jb near	.do1
+ .exitttt
+ 	femms
+-	add	esp,16
++	add	esp,20
+ 	popd	ebp, ebx, esi, edi
+ endproc
+ 
+@@ -270,20 +279,24 @@ proc	fht_E3DN
+ 
+ 	pushd	ebp, ebx, esi, edi
+ 
+-	mov	r0, [esp+20]		;fi
+-	mov	r1, [esp+24]		;r1 = nn
+-	sub	esp, 16
++	sub	esp, 20
++
++	call	get_pc.bp
++	add	ebp, _GLOBAL_OFFSET_TABLE_ + $$ - $ wrt ..gotpc
+ 
++	mov	r0, [esp+40]		;fi
++	mov	r1, [esp+44]		;r1 = nn
++	lea	r3, [ebp + costab wrt ..gotoff]		;tri = costab
++	lea	r4, [r0+r1*8]		;r4 = fn = &fz[n]
++	mov	[esp+16], r4
+ 	mov	r4, 8			;kx = k1/2
+-	mov	r3, costab		;tri = costab
+-	lea	r6, [r0+r1*8]		;r6 = fn = &fz[n]
+ 
+ 	pmov	mm7, [r3]
+ 
+ 	loopalign 16
+ .do1
+ 	lea	r3, [r3+16]	;tri += 2;
+-	pmov	mm6, [costab+8]
++	pmov	mm6, [ebp + costab+8 wrt ..gotoff]
+ 	lea	r2, [r4+r4*2]		;k3*fsize/2
+ 	mov	r5, 4		;i = 1*fsize
+ 
+@@ -324,7 +337,7 @@ proc	fht_E3DN
+ 	pfadd	mm3, mm4	;f0+f2|f1+f3
+ 	pfsub	mm5, mm4	;f0-f2|f1-f3
+ 
+-	cmp	r0, r6
++	cmp	r0, [esp + 16]
+ 	pmovd	[r1+r4*2], mm3	;gi[k1]
+ 	pmovd	[r1+r2*2], mm5	;gi[k3]
+ 	puphdq	mm3, mm3
+@@ -343,12 +356,12 @@ proc	fht_E3DN
+ ; mm7 = 0x800000000 | 0
+ ;
+ 	pmov	mm5, mm6
+-	mov	r0, [esp+36]	; fz
++	mov	r0, [esp+40]	; fz
+ 	puphdq	mm5, mm5	; c1 | c1
+ 	lea	r1, [r0+r4*2]
+ 	pfadd	mm5, mm5	; c1+c1 | c1+c1
+ 	pfmul	mm5, mm6	; 2*c1*c1 | 2*c1*s1
+-	pfsub	mm5, [D_1_0_0_0] ; 2*c1*c1-1.0 | 2*c1*s1 = -c2 | s2
++	pfsub	mm5, [ebp + D_1_0_0_0 wrt ..gotoff] ; 2*c1*c1-1.0 | 2*c1*s1 = -c2 | s2
+ 
+ 	pswapd	mm4, mm5	; s2 |-c2
+ 	pxor	mm4, mm7	; s2 | c2
+@@ -447,7 +460,7 @@ proc	fht_E3DN
+ 
+ 	lea	r0, [r0+r4*8]
+ 	lea	r1, [r1+r4*8]
+-	cmp	r0, r6
++	cmp	r0, [esp + 16]
+ 	pmov	mm4, [esp]
+ 	pmov	mm5, [esp+8]
+ 
+@@ -462,16 +475,16 @@ proc	fht_E3DN
+ 
+ 	pfsub	mm6, mm7	; c1*a-s1*b | s1*a+c1*b
+ 	pswapd	mm6, mm6 ; ???	; s1*a+c1*b | c1*a-s1*b
+-	pmov	mm7, [costab]
++	pmov	mm7, [ebp + costab wrt ..gotoff]
+ 	jb near	.for
+ 
+-	mov	r0, [esp+36]	;fi
+-	cmp	r4, [esp+36+4]
++	mov	r0, [esp+40]	;fi
++	cmp	r4, [esp+40+4]
+ 	lea	r4, [r4*4]	;kx *= 4
+ 
+ 	jb near	.do1
+ .exitttt
+ 	femms
+-	add	esp,16
++	add	esp,20
+ 	popd	ebp, ebx, esi, edi
+ endproc
+diff -urp lame-398-orig/libmp3lame/i386/fftsse.nas lame-398/libmp3lame/i386/fftsse.nas
+--- lame-398-orig/libmp3lame/i386/fftsse.nas	2008-07-16 21:47:19.000000000 +0200
++++ lame-398/libmp3lame/i386/fftsse.nas	2008-07-16 21:48:10.000000000 +0200
+@@ -25,6 +25,12 @@ costab_fft:
+ S_SQRT2	dd	1.414213562
+ 
+ 	segment_code
++
++extern  _GLOBAL_OFFSET_TABLE_
++get_pc.bp:
++	mov ebp, [esp]
++	retn
++
+ ;------------------------------------------------------------------------
+ ;	by K. SAKAI
+ ;	99/08/18	PIII 23k[clk]
+@@ -40,15 +46,20 @@ fht_SSE:
+ 	push	esi
+ 	push	edi
+ 	push	ebp
+-%assign _P 4*4
++
++%assign _P 4*5
+ 
+ 	;2������������
+-	mov	eax,[esp+_P+4]	;eax=fz
+-	mov	ebp,[esp+_P+8]	;=n
++	mov	eax,[esp+_P+0]	;eax=fz
++	mov	ebp,[esp+_P+4]	;=n
+ 	shl	ebp,3
+ 	add	ebp,eax		; fn  = fz + n, ��������������������
++	push	ebp
++
++	call	get_pc.bp
++	add	ebp, _GLOBAL_OFFSET_TABLE_ + $$ - $ wrt ..gotpc
+ 
+-	lea	ecx,[costab_fft]
++	lea	ecx,[ebp + costab_fft wrt ..gotoff]
+ 	xor	eax,eax
+ 	mov	al,8		; =k1=1*(sizeof float)	// 4, 16, 64, 256,...
+ .lp2:				; do{
+@@ -101,12 +112,12 @@ fht_SSE:
+ ;                       gi[k3] = g1     - g3;
+ 	fld	dword [edi]
+ 	fadd	dword [edi+eax*2]
+-	fld	dword [S_SQRT2]
++	fld	dword [ebp + S_SQRT2 wrt ..gotoff]
+ 	fmul	dword [edi+eax*4]
+ 
+ 	fld	dword [edi]
+ 	fsub	dword [edi+eax*2]
+-	fld	dword [S_SQRT2]
++	fld	dword [ebp + S_SQRT2 wrt ..gotoff]
+ 	fmul	dword [edi+edx*2]
+ 
+ 	fld	st1
+@@ -121,7 +132,7 @@ fht_SSE:
+ 	fsubp	st1,st0
+ 	fstp	dword [edi+eax*4]
+ 
+-	cmp	ebx,ebp
++	cmp	ebx,[esp]
+ 	jl	near .lp20		; while (fi<fn);
+ 
+ 
+@@ -136,17 +147,17 @@ fht_SSE:
+ ;                       s2 = c1*s1 + s1*c1 = 2*s1*c1;
+ 	shufps	xmm7,xmm7,R4(1,0,0,1)
+ 	movss	xmm5,xmm7		; = { --,  --,  --, s1}
+-	xorps	xmm7,[Q_MMPP]	; = {-s1, -c1, +c1, +s1} -> ����
++	xorps	xmm7,[ebp + Q_MMPP wrt ..gotoff]	; = {-s1, -c1, +c1, +s1} -> ����
+ 
+ 	addss	xmm5,xmm5		; = (--, --,  --, 2*s1)
+ 	add	esi,4		; esi = fi = fz + i
+ 	shufps	xmm5,xmm5,R4(0,0,0,0)	; = (2*s1, 2*s1, 2*s1, 2*s1)
+ 	mulps	xmm5,xmm6		; = (2*s1*c1, 2*s1*s1, 2*s1*s1, 2*s1*c1)
+-	subps	xmm5,[D_1100]		; = (--, 2*s1*s1-1, --, 2*s1*c1) = {-- -c2 -- s2}
++	subps	xmm5,[ebp + D_1100 wrt ..gotoff]		; = (--, 2*s1*s1-1, --, 2*s1*c1) = {-- -c2 -- s2}
+ 	movaps	xmm4,xmm5
+ 	shufps	xmm5,xmm5,R4(2,0,2,0)	; = {-c2, s2, -c2, s2} -> ����
+ 
+-	xorps	xmm4,[Q_MMPP]		; = {--, c2, --, s2}
++	xorps	xmm4,[ebp + Q_MMPP wrt ..gotoff]		; = {--, c2, --, s2}
+ 	shufps	xmm4,xmm4,R4(0,2,0,2)	; = {s2, c2, s2, c2} -> ����
+ 
+ 	loopalign	16
+@@ -222,7 +233,7 @@ fht_SSE:
+ 	movss	[edi+eax*4],xmm2
+ 	movss	[esi+edx*2],xmm0
+ 	lea	esi,[esi + eax*8] ; fi += (k1 * 4);
+-	cmp	esi,ebp
++	cmp	esi,[esp]
+ 	jl	near .lp21		; while (fi<fn);
+ 
+ 
+@@ -247,7 +258,7 @@ fht_SSE:
+ 	shufps	xmm0,xmm0,R4(1,1,0,0)	; = {t_s, t_s, t_c, t_c}
+ 	mulps	xmm6,xmm0	; = {c3*ts, s3*ts, s3*tc, c3*tc}
+ 	movhlps	xmm4,xmm6	; = {--,    --,    c3*ts, s3*ts}
+-	xorps	xmm4,[Q_MPMP]	; = {--,    --,   -c3*ts, s3*ts}
++	xorps	xmm4,[ebp + Q_MPMP wrt ..gotoff]	; = {--,    --,   -c3*ts, s3*ts}
+ 	subps	xmm6,xmm4	; = {-,-, c3*ts+s3*tc, c3*tc-s3*ts}={-,-,s1,c1}
+ 
+ ;                       c3 = c1*t_c - s1*t_s;
+@@ -255,7 +266,7 @@ fht_SSE:
+ 	shufps	xmm6,xmm6,0x14	; = {c1, s1, s1, c1}
+ 	mulps	xmm0,xmm6	; = {ts*c1 ts*s1 tc*s1 tc*c1}
+ 	movhlps	xmm3,xmm0
+-	xorps	xmm3,[Q_MPMP]
++	xorps	xmm3,[ebp + Q_MPMP wrt ..gotoff]
+ 	subps	xmm0,xmm3	; = {--, --, s3, c3}
+ 
+ ; {s2 s4 c4 c2} = {2*s1*c1 2*s3*c3 1-2*s3*s3 1-2*s1*s1}
+@@ -268,7 +279,7 @@ fht_SSE:
+ 	sub	edi,ebx			; edi = fz - i/2
+ 	mulps	xmm7, xmm6		; {s1*s1*2, s3*s3*2, s3*c3*2, s1*c1*2}
+ 	lea	esi,[edi + ebx*2]	; esi = fi = fz +i/2
+-	subps	xmm7, [D_1100]		; {-c2, -c4, s4, s2}
++	subps	xmm7, [ebp + D_1100 wrt ..gotoff]		; {-c2, -c4, s4, s2}
+ 	lea	edi,[edi + eax*2-4]	; edi = gi = fz +k1-i/2
+ 
+ ;                       fi = fz +i;
+@@ -286,7 +297,7 @@ fht_SSE:
+ ;                               d       = s2*fi[k3  ] - c2*gi[k3  ];
+ 
+ 	movaps	xmm4,xmm7	; = {-c2 -c4  s4  s2}
+-	xorps	xmm4,[Q_MMPP]	; = { c2  c4  s4  s2}
++	xorps	xmm4,[ebp + Q_MMPP wrt ..gotoff]	; = { c2  c4  s4  s2}
+ 	shufps	xmm4,xmm4,0x1B	; = { s2  s4  c4  c2}
+ 	movlps	xmm0,[esi+eax*2]
+ 	movlps	xmm1,[edi+eax*2]
+@@ -390,7 +401,7 @@ fht_SSE:
+ ;                               fi     += k4;
+ 	lea	edi,[edi + eax*8] ; gi += (k1 * 4);
+ 	lea	esi,[esi + eax*8] ; fi += (k1 * 4);
+-	cmp	esi,ebp
++	cmp	esi,[esp]
+ 	jl	near .lp220		; while (fi<fn);
+ ;                       } while (fi<fn);
+ 
+@@ -405,6 +416,7 @@ fht_SSE:
+ 	cmp	eax,[esp+_P+8]	; while ((k1 * 4)<n);
+ 	jle	near .lp2
+ 	pop	ebp
++	pop	ebp
+ 	pop	edi
+ 	pop	esi
+ 	pop	ebx