summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhang Le <r0bertz@gentoo.org>2010-12-25 21:43:13 +0800
committerZhang Le <r0bertz@gentoo.org>2010-12-25 21:43:13 +0800
commitc520f4ed4a833ff86c8fe184e789973adb68ceab (patch)
tree353aa6893590e0dff85d3d8074d0e62c66ba8c65
parentadded xulrunner-1.9.2.13, firefox-3.6.13 and xorg-server-1.9.2.902 (diff)
downloadloongson-c520f4ed4a833ff86c8fe184e789973adb68ceab.tar.gz
loongson-c520f4ed4a833ff86c8fe184e789973adb68ceab.tar.bz2
loongson-c520f4ed4a833ff86c8fe184e789973adb68ceab.zip
added loongson2f patch for pixman
Signed-off-by: Zhang Le <r0bertz@gentoo.org>
-rw-r--r--net-libs/xulrunner/Manifest7
-rw-r--r--x11-libs/pixman/Manifest2
-rw-r--r--x11-libs/pixman/files/pixman-loongson2f.patch2745
-rw-r--r--x11-libs/pixman/pixman-9999.ebuild50
4 files changed, 2804 insertions, 0 deletions
diff --git a/net-libs/xulrunner/Manifest b/net-libs/xulrunner/Manifest
index 68ddaa8..a1c6a4d 100644
--- a/net-libs/xulrunner/Manifest
+++ b/net-libs/xulrunner/Manifest
@@ -20,6 +20,10 @@ AUX xulrunner-2.0_beta5_pre-chromium-mips.patch 7240 RMD160 33307a04b330e01d4779
AUX xulrunner-2.0_beta5_pre-mips-bus-error.patch 957 RMD160 fab47698b8e22205d5b43ee3a5268b5776b32663 SHA1 d4bc49bc1d4871ec2001e72825cdf1e0035fd2fa SHA256 609c10f9f35fe039a472e802af8628c89c796f8d257687f05f43fde846d1a109
AUX xulrunner-2.0_beta5_pre-mips-n32.patch 24738 RMD160 63995685e4343420469fbb056e051afd28cc56db SHA1 e6435e8e9b15db8103b6d6d0c9213ce36ec88f94 SHA256 24ac461bffbd7c2978a2cd30dc4cbed95370a77a2db054ade2794e902620d3de
AUX xulrunner-2.0_beta5_pre.patch 96688 RMD160 e8190a6c67210a08f06a64c02d1c8d017f5462dc SHA1 d8e0e29b597076945aa1303101df884c8880b6e6 SHA256 0325a78e4b22fa55a38c242001f75afdb92670a629fab6b99a1d5d013b56288b
+AUX xulrunner-2.0_beta8_pre-chromium-mips.patch 7240 RMD160 33307a04b330e01d477929dd7327bdf567226fad SHA1 f0f8d5792ee7b9d4d7c3e4f3cc2d9a327ecee9c6 SHA256 25a776115233784d998b7e9fe76d991fc410938e7793c815ed17509736c0604a
+AUX xulrunner-2.0_beta8_pre-mips-bus-error.patch 957 RMD160 fab47698b8e22205d5b43ee3a5268b5776b32663 SHA1 d4bc49bc1d4871ec2001e72825cdf1e0035fd2fa SHA256 609c10f9f35fe039a472e802af8628c89c796f8d257687f05f43fde846d1a109
+AUX xulrunner-2.0_beta8_pre-mips-n32.patch 24738 RMD160 63995685e4343420469fbb056e051afd28cc56db SHA1 e6435e8e9b15db8103b6d6d0c9213ce36ec88f94 SHA256 24ac461bffbd7c2978a2cd30dc4cbed95370a77a2db054ade2794e902620d3de
+AUX xulrunner-2.0_beta8_pre.patch 96688 RMD160 e8190a6c67210a08f06a64c02d1c8d017f5462dc SHA1 d8e0e29b597076945aa1303101df884c8880b6e6 SHA256 0325a78e4b22fa55a38c242001f75afdb92670a629fab6b99a1d5d013b56288b
AUX xulrunner-chromium-mips.patch 7752 RMD160 7fd4aab6cd2a4c0d22613812909ed45fd23f5f47 SHA1 f4c0352a809433cfff366833156c7de420f2b46c SHA256 08f5935cc72cc05f4dc9e8500a7f351e0161b5398630a00f44d9a17cd8268d15
AUX xulrunner-default-prefs.js 677 RMD160 73ec130da332c1d6517a6f838d5fab0cc134289a SHA1 b9efc3ef5e95ad3081f6b68d771e32e848b4b9ac SHA256 53493080276f221f1619153c114caf268206a6fa107972b33a4eb7f1c1cf6fe8
AUX xulrunner-mips-bus-error.patch 1997 RMD160 22b41e26d5ac86b53fc35c5a9220fd80520a99a6 SHA1 384f1e3c507ababbebd8bbc4c08b04490b837436 SHA256 1b3553e751711cfda9e91295fb9bdaf892eee6b0d9f0ac9fc44a532105a784c0
@@ -30,13 +34,16 @@ DIST firefox-3.6.13.source.tar.bz2 51478675 RMD160 658409db60a8f412ac74ff15a8c1d
DIST firefox-3.6.8.source.tar.bz2 51238976 RMD160 14e245c643d41a1da25fe4dbc15d6ae466a19e5b SHA1 4936e543f6c7492c5954cbd5b30ddda6b20e3797 SHA256 fc609cc6a0ddaa2a9ebd8511ec39ae4a404e1107a12e07b233e2afca51d9a10e
DIST firefox-3.7a5.source.tar.bz2 50425212 RMD160 aa8874224fc080897d5de3c61adff83b9e91bb7c SHA1 6e71e59898baa5d8a9c51ad354accc02fa0ca46a SHA256 38644edc35d922a6cfb1c7ebc27dfb75d5a9bf870fa19d06821e1a9131b33b2d
DIST firefox-4.0b5_pre_414ff9016349.source.tar.bz2 62665981 RMD160 2a1ee9e1b8596f6ec84e3658cb671bec4ebed969 SHA1 bead1813f2b78885aa83556c2be458b3dbae8d1d SHA256 30eac26556c1d39b960afba86bf842aa76580510a57b99431e9120dbdc04a3e1
+DIST firefox-4.0b8_pre_25d77f95d92c.source.tar.bz2 63362878 RMD160 5658b1e0f66f8bff0d66ec983e585b988019a036 SHA1 791fbeb50d6ed81c41dfbfd18dbf245e3d8d5e9b SHA256 4fb9089ee665fc2ffd8bab4e4eac362b95d219330f86dab12e477e32c0e8790d
DIST xulrunner-1.9.2-patches-0.6.tar.bz2 16308 RMD160 6066ea0b01f8b6c9eae1a0692ad9a07f04ab7327 SHA1 ebba861e23ef36256dc29d7d04820222be8e4241 SHA256 6a9d709f9ee15d6c139a35f8a6826cd7b473188290bd5e81e56297d8f36e2235
DIST xulrunner-1.9.2-patches-0.9.tar.bz2 15320 RMD160 2dfb86650c0a9a1f0c5bf86df7434c7c286658d6 SHA1 a2276c708eec2e92611ac1e7a2eaf25c39263d30 SHA256 5cca685442f1ab920eb1ba86719a38a437ae05bd5cefc42ababc2556425ebf5a
DIST xulrunner-1.9.3-patches-0.1.tar.bz2 4761 RMD160 d13fa5e53235fbe4eba50f210107b352530d95ab SHA1 78904c4543eb27717131d159e8ef02762e94c6f7 SHA256 fab399d174e6959afb998446e152f1da41575058afb40e9c442cfa804de326fc
DIST xulrunner-2.0-patches-0.4.tar.bz2 6121 RMD160 4bb2a879632894a4185c7c778fbf80a6c80f6847 SHA1 139af0b2fabe4f2c46bda27c9e29f7dd2731c6a3 SHA256 606e9f9b44e6ce61035047e58f4c0dc8e4dc6c67b1d159a0623212dedc50c4b9
+DIST xulrunner-2.0-patches-1.1.tar.bz2 50702 RMD160 bd918e4709696ab85c9eb611cd6e09a622487ab8 SHA1 bd7278b7d35503ea561191140d7409c2b4d64903 SHA256 ab171b0e3f36567b0cca859e8d356f34c779d8aca5f9a4bd4449076a311a0ae3
EBUILD xulrunner-1.9.2.13.ebuild 8110 RMD160 a19351d61fa537b0b90c36dbb048915d8a8f6ebd SHA1 7b586c0456b5e6ac11089634b3bf9a9282a14a27 SHA256 f3c6882464edf0902b34d172ef8f014f0242b317a2e9299c300c2b188a5f3a49
EBUILD xulrunner-1.9.2.8-r1.ebuild 8213 RMD160 5c8d817c91155e4bbd8763bbd0d99b8ca78cdc93 SHA1 5292e24359386e503b4b3a1e3ecb7cbac3754bbe SHA256 716743ab7bf42421333843adf5a75b23109ae142519a0fb85fda87a4a94a6923
EBUILD xulrunner-1.9.3_alpha5.ebuild 6811 RMD160 3ac0a6d11b664b0a77b80164e640469c770ed089 SHA1 35bac75d31b1927848235e4d60d993edf487d7cb SHA256 a3a229d9d24b51641b5d2e8e9819766cc1b3ebc19b0827210e87857a9a609dc7
EBUILD xulrunner-2.0_beta5_pre-r2.ebuild 8139 RMD160 bd837f2c4696444398b0a7aae93d5d8172a33c5e SHA1 f03c08dbdf05005dbd5011121dd68f97238cbe92 SHA256 bc7d175790269a8699bb7cd3c3eb9947f9e63e033a1076b3af7c0a7940fc1f6c
+EBUILD xulrunner-2.0_beta8_pre-r9.ebuild 8164 RMD160 7b9c6e4758988d326d1e9f4ab3b9bc296011248c SHA1 71dd9982488e9d593b003148668476f226fdffc1 SHA256 60301a2a4fdde19fd29e0cee1c3c993d57a6f6f525fec207fa4f81f02da64bf9
MISC ChangeLog 20231 RMD160 53d013acdc632eee5a57c68cc91590a9d97d322b SHA1 50a8a72f61cdf952923a6c40d3ddbdafa10682fc SHA256 4dc8981ad9cda5b06ff6f5e7b826a01d32082c6a306879ee1e3923a8aa93a449
MISC metadata.xml 263 RMD160 9ed9d79d17d0a991e88c31ae6342996519ffef5b SHA1 0de183203298ce0f1c2ecc3fc8149842c4a0ec60 SHA256 64bfc93c4ab02ef2fe9b61b8157eb722d6cca5d552eecc18eaaf8e95980d2550
diff --git a/x11-libs/pixman/Manifest b/x11-libs/pixman/Manifest
new file mode 100644
index 0000000..95605c7
--- /dev/null
+++ b/x11-libs/pixman/Manifest
@@ -0,0 +1,2 @@
+AUX pixman-loongson2f.patch 76952 RMD160 633c72e7c75d1c2cf32cfe28f8253b09a96ba2cc SHA1 ce4d69ea341f21fdc30f6d401ee479cd3571dab3 SHA256 48c8db615b44285f9123905798a8c17e00273def3f75aae5adfb4de2c3cf6134
+EBUILD pixman-9999.ebuild 1482 RMD160 7ffc7058020249dabf2d900893c59e090bfacd63 SHA1 961da2a0754d51e9ab8acc48ef6a2b8d281dddcd SHA256 5f88192d2ba405fb01c0e860bde53e1f6ecb751542c845ad6e4e1d60a301a00a
diff --git a/x11-libs/pixman/files/pixman-loongson2f.patch b/x11-libs/pixman/files/pixman-loongson2f.patch
new file mode 100644
index 0000000..15e01cb
--- /dev/null
+++ b/x11-libs/pixman/files/pixman-loongson2f.patch
@@ -0,0 +1,2745 @@
+diff -urN pixman//configure.ac Pixman.Loongson//configure.ac
+--- pixman//configure.ac 2010-12-25 18:46:00.018699000 +0800
++++ Pixman.Loongson//configure.ac 2010-12-25 18:39:15.298778000 +0800
+@@ -264,6 +264,43 @@
+ ])
+
+ dnl ===========================================================================
++dnl Check for Loongson SIMD
++
++have_loongson_intrinsics=no
++AC_MSG_CHECKING(whether to use Loongson SIMD intrinsics)
++
++AC_COMPILE_IFELSE([
++#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 4))
++error "Need GCC >= 4.4 for Loongson SIMD compilation"
++#endif
++int main () {
++ /* Test with a loongson SIMD instruction. */
++ asm volatile ( ".set arch = loongson2f \n\t" "and \$f0, \$f0, \$f0 \n\t" : : : "cc", "memory" );
++ return 0;
++}], have_loongson_intrinsics=yes)
++
++
++AC_ARG_ENABLE(loongson,
++ [AC_HELP_STRING([--disable-loongson],
++ [disable Loongson fast paths])],
++ [enable_loongson=$enableval], [enable_loongson=auto])
++
++if test $enable_loongson = no ; then
++ have_loongson_intrinsics=disabled
++fi
++
++if test $have_loongson_intrinsics = yes ; then
++ AC_DEFINE(USE_LS, 1, [use Loongson compiler intrinsics])
++fi
++
++AC_MSG_RESULT($have_loongson_intrinsics)
++if test $enable_loongson = yes && test $have_loongson_intrinsics = no ; then
++ AC_MSG_ERROR([Loongson intrinsics not detected])
++fi
++
++AM_CONDITIONAL(USE_LS, test $have_loongson_intrinsics = yes)
++
++dnl ===========================================================================
+ dnl Check for MMX
+
+ if test "x$MMX_CFLAGS" = "x" ; then
+diff -urN pixman//pixman/Makefile.am Pixman.Loongson//pixman/Makefile.am
+--- pixman//pixman/Makefile.am 2010-12-25 18:46:00.025027000 +0800
++++ Pixman.Loongson//pixman/Makefile.am 2010-12-25 18:39:15.303599000 +0800
+@@ -55,6 +55,19 @@
+ pixman-combine.h.template solaris-hwcap.mapfile pixman-x64-mmx-emulation.h
+ CLEANFILES = pixman-combine32.c pixman-combine64.c pixman-combine32.h pixman-combine64.h
+
++# loongson code
++if USE_LS
++noinst_LTLIBRARIES += libpixman-ls.la
++libpixman_ls_la_SOURCES = \
++ pixman-ls.c
++libpixman_ls_la_CFLAGS = $(DEP_CFLAGS) $(LS_CFLAGS)
++libpixman_ls_la_LIBADD = $(DEP_LIBS)
++libpixman_1_la_LDFLAGS += $(LS_LDFLAGS)
++libpixman_1_la_LIBADD += libpixman-ls.la
++
++ASM_CFLAGS_ls=$(LS_CFLAGS)
++endif
++
+ # mmx code
+ if USE_MMX
+ noinst_LTLIBRARIES += libpixman-mmx.la
+diff -urN pixman//pixman/pixman-combine-ls.c Pixman.Loongson//pixman/pixman-combine-ls.c
+--- pixman//pixman/pixman-combine-ls.c 1970-01-01 08:00:00.000000000 +0800
++++ Pixman.Loongson//pixman/pixman-combine-ls.c 2010-12-25 18:39:15.344171000 +0800
+@@ -0,0 +1,911 @@
++static force_inline uint32_t
++combine (const uint32_t *src, const uint32_t *mask)
++{
++ uint32_t ssrc = *src;
++
++ if (mask)
++ {
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%1,$f22)
++ load8888r(%0,$f20)
++ expand_alpha($f22,$f22)
++ pix_multiply($f20,$f22)
++ store8888r($f8,%0)
++ :"+r"(ssrc):"r"(*mask):clobber
++ );
++ }
++ return ssrc;
++}
++
++static void
++ls_combine_saturate_u (pixman_implementation_t *imp,
++ pixman_op_t op,
++ uint32_t * dest,
++ const uint32_t * src,
++ const uint32_t * mask,
++ int width)
++{
++ const uint32_t *end = dest + width;
++
++ while (dest < end)
++ {
++ uint32_t s = combine (src, mask);
++ uint32_t d = *dest;
++
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%1,$f22)
++ load8888r(%0,$f20)
++ :"+r"(d):"r"(s):clobber
++ );
++
++ uint32_t sa = s >> 24;
++ uint32_t da = ~d >> 24;
++
++ if (sa > da)
++ {
++ uint32_t dds = DIV_UN8 (da, sa) << 24;
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%0,$f24)
++ expand_alpha($f24,$f24)
++ pix_multiply($f22,$f24)
++ save_to($f22)
++ ::"r"(dds):clobber
++ );
++ }
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ pix_add($f20,$f22)
++ store8888r($f8,%0)
++ :"=r"(*dest)::clobber
++ );
++
++ ++src;
++ ++dest;
++ if (mask)
++ mask++;
++ }
++}
++static void
++ls_combine_out_u (pixman_implementation_t *imp,
++ pixman_op_t op,
++ uint32_t * dest,
++ const uint32_t * src,
++ const uint32_t * mask,
++ int width)
++{
++ const uint32_t *end = dest + width;
++
++ while (dest < end)
++ {
++ if (mask)
++ {
++
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++
++ load8888r(%2,$f22)
++ load8888r(%1,$f20)
++ expand_alpha($f22,$f22)
++ pix_multiply($f20,$f22)
++ save_to ($f20)
++
++ load8888r(%0,$f24)
++ expand_alpha($f24,$f24)
++ negate($f24,$f24)
++ pix_multiply($f20,$f24)
++ store8888r($f8,%0)
++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber
++ );
++
++ mask++;
++ }else {
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++
++
++ load8888r(%1,$f20)
++
++ load8888r(%0,$f24)
++ expand_alpha($f24,$f24)
++ negate($f24,$f24)
++ pix_multiply($f20,$f24)
++ store8888r($f8,%0)
++ :"+r"(*dest):"r"(*src):clobber
++ );
++
++ }
++ ++dest;
++ ++src;
++ }
++}
++
++static void
++ls_combine_out_reverse_u (pixman_implementation_t *imp,
++ pixman_op_t op,
++ uint32_t * dest,
++ const uint32_t * src,
++ const uint32_t * mask,
++ int width)
++{
++ const uint32_t *end = dest + width;
++
++ while (dest < end)
++ {
++ if (mask)
++ {
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++
++ load8888r(%2,$f22)
++ load8888r(%1,$f20)
++ expand_alpha($f22,$f22)
++ pix_multiply($f20,$f22)
++ save_to ($f20)
++
++ load8888r(%0,$f24)
++ expand_alpha($f20,$f20)
++ negate($f20,$f20)
++ pix_multiply($f20,$f24)
++ store8888r($f8,%0)
++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber
++ );
++
++ mask++;
++ }else{
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++
++ load8888r(%1,$f20)
++
++ load8888r(%0,$f24)
++ expand_alpha($f20,$f20)
++ negate($f20,$f20)
++ pix_multiply($f20,$f24)
++ store8888r($f8,%0)
++ :"+r"(*dest):"r"(*src):clobber
++ );
++ }
++ ++dest;
++ ++src;
++
++ }
++}
++
++static void
++ls_combine_out_ca (pixman_implementation_t *imp,
++ pixman_op_t op,
++ uint32_t * dest,
++ const uint32_t * src,
++ const uint32_t * mask,
++ int width)
++{
++ const uint32_t *end = src + width;
++
++ while (src < end)
++ {
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%2,$f22)
++ load8888r(%1,$f20)
++ load8888r(%0,$f24)
++ expand_alpha($f24,$f26)
++ negate($f26,$f26)
++ pix_multiply($f20,$f22)
++ save_to($f20)
++ pix_multiply($f20,$f26)
++ store8888r($f8,%0)
++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber
++ );
++ ++src;
++ ++dest;
++ ++mask;
++ }
++}
++
++static void
++ls_combine_out_reverse_ca (pixman_implementation_t *imp,
++ pixman_op_t op,
++ uint32_t * dest,
++ const uint32_t * src,
++ const uint32_t * mask,
++ int width)
++{
++ const uint32_t *end = src + width;
++
++ while (src < end)
++ {
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%2,$f22)
++ load8888r(%1,$f20)
++ load8888r(%0,$f24)
++ expand_alpha($f20,$f28)
++ pix_multiply($f22,$f28)
++ save_to($f22)
++ negate($f22,$f22)
++ pix_multiply($f24,$f22)
++ store8888r($f8,%0)
++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber
++ );
++ ++src;
++ ++dest;
++ ++mask;
++ }
++}
++
++
++static void
++ls_combine_atop_u (pixman_implementation_t *imp,
++ pixman_op_t op,
++ uint32_t * dest,
++ const uint32_t * src,
++ const uint32_t * mask,
++ int width)
++{
++ const uint32_t *end = dest + width;
++
++ while (dest < end)
++ {
++ if (mask)
++ {
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++
++ load8888r(%2,$f22)
++ load8888r(%1,$f20)
++ expand_alpha($f22,$f22)
++ pix_multiply($f20,$f22)
++ save_to ($f20)
++
++ load8888r(%0,$f24)
++ expand_alpha($f20,$f26)
++ expand_alpha($f24,$f28)
++ negate($f26,$f26)
++ pix_add_mul($f20,$f28,$f24,$f26)
++ store8888r($f8,%0)
++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber
++ );
++
++ mask++;
++ }else {
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%1,$f20)
++
++ load8888r(%0,$f24)
++ expand_alpha($f20,$f26)
++ expand_alpha($f24,$f28)
++ negate($f26,$f26)
++ pix_add_mul($f20,$f28,$f24,$f26)
++ store8888r($f8,%0)
++ :"+r"(*dest):"r"(*src):clobber
++ );
++ }
++ ++dest;
++ ++src;
++
++ }
++}
++
++static void
++ls_combine_atop_reverse_u (pixman_implementation_t *imp,
++ pixman_op_t op,
++ uint32_t * dest,
++ const uint32_t * src,
++ const uint32_t * mask,
++ int width)
++{
++ const uint32_t *end;
++
++ end = dest + width;
++
++ while (dest < end)
++ {
++ if (mask){
++
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++
++ load8888r(%2,$f22)
++ load8888r(%1,$f20)
++ expand_alpha($f22,$f22)
++ pix_multiply($f20,$f22)
++ save_to ($f20)
++
++ load8888r(%0,$f24)
++ expand_alpha($f20,$f26)
++ expand_alpha($f24,$f28)
++ negate($f28,$f28)
++ pix_add_mul($f20,$f28,$f24,$f26)
++ store8888r($f8,%0)
++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber
++ );
++ mask++;
++ }else{
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++
++ load8888r(%1,$f20)
++
++ load8888r(%0,$f24)
++ expand_alpha($f20,$f26)
++ expand_alpha($f24,$f28)
++ negate($f28,$f28)
++ pix_add_mul($f20,$f28,$f24,$f26)
++ store8888r($f8,%0)
++ :"+r"(*dest):"r"(*src):clobber
++ );
++ }
++ ++dest;
++ ++src;
++ }
++}
++
++
++static void
++ls_combine_atop_ca (pixman_implementation_t *imp,
++ pixman_op_t op,
++ uint32_t * dest,
++ const uint32_t * src,
++ const uint32_t * mask,
++ int width)
++{
++ const uint32_t *end = src + width;
++
++ while (src < end)
++ {
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%2,$f22)
++ load8888r(%1,$f20)
++ load8888r(%0,$f24)
++ expand_alpha($f24,$f26)
++ expand_alpha($f20,$f28)
++ pix_multiply($f20,$f22)
++ save_to($f20)
++ pix_multiply($f22,$f28)
++ save_to($f22)
++ negate($f22,$f22)
++ pix_add_mul($f24,$f22,$f20,$f26)
++ store8888r($f8,%0)
++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber
++ );
++ ++src;
++ ++dest;
++ ++mask;
++ }
++}
++
++static void
++ls_combine_atop_reverse_ca (pixman_implementation_t *imp,
++ pixman_op_t op,
++ uint32_t * dest,
++ const uint32_t * src,
++ const uint32_t * mask,
++ int width)
++{
++ const uint32_t *end = src + width;
++
++ while (src < end)
++ {
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%2,$f22)
++ load8888r(%1,$f20)
++ load8888r(%0,$f24)
++ expand_alpha($f24,$f26)
++ expand_alpha($f20,$f28)
++ pix_multiply($f20,$f22)
++ save_to($f20)
++ pix_multiply($f22,$f28)
++ save_to($f22)
++ negate($f26,$f26)
++ pix_add_mul($f24,$f22,$f20,$f26)
++ store8888r($f8,%0)
++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber
++ );
++ ++src;
++ ++dest;
++ ++mask;
++ }
++}
++
++static void
++ls_combine_xor_u (pixman_implementation_t *imp,
++ pixman_op_t op,
++ uint32_t * dest,
++ const uint32_t * src,
++ const uint32_t * mask,
++ int width)
++{
++ const uint32_t *end = dest + width;
++
++ while (dest < end)
++ {
++ if (mask)
++ {
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%2,$f22)
++ load8888r(%1,$f20)
++ expand_alpha($f22,$f22)
++ pix_multiply($f20,$f22)
++ save_to ($f20)
++
++
++ load8888r(%0,$f24)
++ expand_alpha($f20,$f26)
++ expand_alpha($f24,$f28)
++ negate($f26,$f26)
++ negate($f28,$f28)
++ pix_add_mul($f20,$f28,$f24,$f26)
++ store8888r($f8,%0)
++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber
++ );
++ mask++;
++ }else{
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%1,$f20)
++
++ load8888r(%0,$f24)
++ expand_alpha($f20,$f26)
++ expand_alpha($f24,$f28)
++ negate($f26,$f26)
++ negate($f28,$f28)
++ pix_add_mul($f20,$f28,$f24,$f26)
++ store8888r($f8,%0)
++ :"+r"(*dest):"r"(*src):clobber
++ );
++ }
++ ++dest;
++ ++src;
++
++ }
++}
++
++static void
++ls_combine_xor_ca (pixman_implementation_t *imp,
++ pixman_op_t op,
++ uint32_t * dest,
++ const uint32_t * src,
++ const uint32_t * mask,
++ int width)
++{
++ const uint32_t *end = src + width;
++
++ while (src < end)
++ {
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%2,$f22)
++ load8888r(%1,$f20)
++ load8888r(%0,$f24)
++ expand_alpha($f24,$f26)
++ expand_alpha($f20,$f28)
++ pix_multiply($f20,$f22)
++ save_to($f20)
++ pix_multiply($f22,$f28)
++ save_to($f22)
++ negate($f26,$f26)
++ negate($f22,$f22)
++ pix_add_mul($f24,$f22,$f20,$f26)
++ store8888r($f8,%0)
++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber
++ );
++ ++src;
++ ++dest;
++ ++mask;
++ }
++}
++
++
++static void
++ls_combine_in_reverse_u (pixman_implementation_t *imp,
++ pixman_op_t op,
++ uint32_t * dest,
++ const uint32_t * src,
++ const uint32_t * mask,
++ int width)
++{
++ const uint32_t *end = dest + width;
++
++ while (dest < end)
++ {
++
++ if (mask)
++ {
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%2,$f22)
++ load8888r(%1,$f20)
++ expand_alpha($f22,$f22)
++ pix_multiply($f20,$f22)
++ save_to ($f20)
++
++ load8888r(%0,$f24)
++ expand_alpha($f20,$f26)
++ pix_multiply($f24,$f26)
++ store8888r($f8,%0)
++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber
++ );
++ mask++;
++ } else {
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%1,$f20)
++
++ load8888r(%0,$f24)
++ expand_alpha($f20,$f26)
++ pix_multiply($f24,$f26)
++ store8888r($f8,%0)
++ :"+r"(*dest):"r"(*src):clobber
++ );
++ }
++ ++dest;
++ ++src;
++ }
++}
++
++static void
++ls_combine_in_reverse_ca (pixman_implementation_t *imp,
++ pixman_op_t op,
++ uint32_t * dest,
++ const uint32_t * src,
++ const uint32_t * mask,
++ int width)
++{
++ const uint32_t *end = src + width;
++
++ while (src < end)
++ {
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%2,$f22)
++ load8888r(%1,$f20)
++ load8888r(%0,$f24)
++ expand_alpha($f20,$f20)
++ pix_multiply($f22,$f20)
++ save_to($f26)
++ pix_multiply($f24,$f26)
++ store8888r($f8,%0)
++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber
++ );
++ ++src;
++ ++dest;
++ ++mask;
++ }
++}
++
++static void
++ls_combine_in_u (pixman_implementation_t *imp,
++ pixman_op_t op,
++ uint32_t * dest,
++ const uint32_t * src,
++ const uint32_t * mask,
++ int width)
++{
++ const uint32_t *end = dest + width;
++
++ while (dest < end)
++ {
++ if (mask)
++ {
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%2,$f22)
++ load8888r(%1,$f20)
++ expand_alpha($f22,$f22)
++ pix_multiply($f20,$f22)
++ save_to ($f20)
++
++ load8888r(%0,$f24)
++ expand_alpha($f24,$f24)
++ pix_multiply($f20,$f24)
++ store8888r($f8,%0)
++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber
++ );
++ mask++;
++ } else {
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%1,$f20)
++
++ load8888r(%0,$f24)
++ expand_alpha($f24,$f24)
++ pix_multiply($f20,$f24)
++ store8888r($f8,%0)
++ :"+r"(*dest):"r"(*src):clobber
++ );
++
++ }
++ ++dest;
++ ++src;
++ }
++}
++
++static void
++ls_combine_in_ca (pixman_implementation_t *imp,
++ pixman_op_t op,
++ uint32_t * dest,
++ const uint32_t * src,
++ const uint32_t * mask,
++ int width)
++{
++ const uint32_t *end = src + width;
++
++ while (src < end)
++ {
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%2,$f22)
++ load8888r(%1,$f20)
++ load8888r(%0,$f24)
++ expand_alpha($f24,$f24)
++ pix_multiply($f20,$f22)
++ save_to($f26)
++ pix_multiply($f26,$f24)
++ store8888r($f8,%0)
++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber
++ );
++ ++src;
++ ++dest;
++ ++mask;
++ }
++ }
++static void
++ls_combine_src_ca (pixman_implementation_t *imp,
++ pixman_op_t op,
++ uint32_t * dest,
++ const uint32_t * src,
++ const uint32_t * mask,
++ int width)
++{
++ const uint32_t *end = src + width;
++
++ while (src < end)
++ {
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%2,$f22)
++ load8888r(%1,$f20)
++ pix_multiply($f20,$f22)
++ store8888r($f8,%0)
++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber
++ );
++ ++src;
++ ++mask;
++ ++dest;
++ }
++
++}
++
++
++static void
++ls_combine_over_u (pixman_implementation_t *imp,
++ pixman_op_t op,
++ uint32_t * dest,
++ const uint32_t * src,
++ const uint32_t * mask,
++ int width)
++{
++ const uint32_t *end = dest + width;
++
++ while (dest < end)
++ {
++
++ uint32_t ssrc = combine (src, mask);
++ uint32_t a = ssrc >> 24;
++
++ if (a == 0xff)
++ {
++ *dest = ssrc;
++ }
++ else if (ssrc)
++ {
++
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%1,$f20)
++
++ expand_alpha($f20,$f24)
++ load8888r(%0,$f26)
++ over($f20,$f24,$f26)
++ store8888r($f8,%0)
++ :"+r"(*dest):"r"(ssrc):clobber
++ );
++ }
++
++ ++dest;
++ ++src;
++ if (mask)
++ ++mask;
++ }
++}
++
++static void
++ls_combine_over_reverse_u (pixman_implementation_t *imp,
++ pixman_op_t op,
++ uint32_t * dest,
++ const uint32_t * src,
++ const uint32_t * mask,
++ int width)
++{
++ const uint32_t *end = dest + width;
++
++ while (dest < end)
++ {
++ if (mask)
++ {
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++
++ load8888r(%2,$f22)
++ load8888r(%1,$f20)
++ expand_alpha($f22,$f22)
++ pix_multiply($f20,$f22)
++ save_to ($f20)
++
++ load8888r(%0,$f26)
++ expand_alpha($f26,$f28)
++ over($f26,$f28,$f20)
++ store8888r($f8,%0)
++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber
++ );
++ mask++;
++ }else{
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%1,$f20)
++
++ load8888r(%0,$f26)
++ expand_alpha($f26,$f28)
++ over($f26,$f28,$f20)
++ store8888r($f8,%0)
++ :"+r"(*dest):"r"(*src):clobber
++ );
++
++ }
++ ++dest;
++ ++src;
++ }
++}
++
++
++static void
++ls_combine_over_ca (pixman_implementation_t *imp,
++ pixman_op_t op,
++ uint32_t * dest,
++ const uint32_t * src,
++ const uint32_t * mask,
++ int width)
++{
++ const uint32_t *end = src + width;
++
++ while (src < end)
++ {
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%0,$f20)
++ load8888r(%1,$f22)
++ load8888r(%2,$f24)
++ expand_alpha($f22,$f26)
++ in_over($f22,$f26,$f24,$f20)
++ store8888r($f8,%0)
++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber
++ );
++ ++src;
++ ++dest;
++ ++mask;
++ }
++
++}
++
++static void
++ls_combine_over_reverse_ca (pixman_implementation_t *imp,
++ pixman_op_t op,
++ uint32_t * dest,
++ const uint32_t * src,
++ const uint32_t * mask,
++ int width)
++{
++ const uint32_t *end = src + width;
++
++ while (src < end)
++ {
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%0,$f20)
++ load8888r(%1,$f22)
++ load8888r(%2,$f24)
++ in($f22,$f24)
++ save_to($f22)
++ expand_alpha($f20,$f28)
++ over($f20,$f28,$f22)
++ store8888r($f8,%0)
++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber
++ );
++ ++src;
++ ++dest;
++ ++mask;
++ }
++
++}
++
++static void
++ls_combine_add_u (pixman_implementation_t *imp,
++ pixman_op_t op,
++ uint32_t * dest,
++ const uint32_t * src,
++ const uint32_t * mask,
++ int width)
++{
++ const uint32_t *end = dest + width;
++
++ while (dest < end)
++ {
++
++ if (mask)
++ {
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%2,$f22)
++ load8888r(%1,$f20)
++ expand_alpha($f22,$f22)
++ pix_multiply($f20,$f22)
++ save_to ($f20)
++
++ load8888r(%0,$f22)
++ pix_add($f20,$f22)
++ store8888r($f8,%0)
++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber
++ );
++ mask++;
++ }else{
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%1,$f20)
++
++ load8888r(%0,$f22)
++ pix_add($f20,$f22)
++ store8888r($f8,%0)
++ :"+r"(*dest):"r"(*src):clobber
++ );
++
++ }
++ ++dest;
++ ++src;
++ }
++}
++
++static void
++ls_combine_add_ca (pixman_implementation_t *imp,
++ pixman_op_t op,
++ uint32_t * dest,
++ const uint32_t * src,
++ const uint32_t * mask,
++ int width)
++{
++ const uint32_t *end = src + width;
++
++ while (src < end)
++ {
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%0,$f20)
++ load8888r(%1,$f22)
++ load8888r(%2,$f24)
++ pix_multiply($f22,$f24)
++ save_to($f22)
++ pix_add($f22,$f20)
++ store8888r($f8,%0)
++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber
++ );
++ ++src;
++ ++dest;
++ ++mask;
++ }
++}
+diff -urN pixman//pixman/pixman-composite-ls.c Pixman.Loongson//pixman/pixman-composite-ls.c
+--- pixman//pixman/pixman-composite-ls.c 1970-01-01 08:00:00.000000000 +0800
++++ Pixman.Loongson//pixman/pixman-composite-ls.c 2010-12-25 18:39:15.356667000 +0800
+@@ -0,0 +1,967 @@
++static void
++ls_composite_over_x888_8_8888 (pixman_implementation_t *imp,
++ pixman_op_t op,
++ pixman_image_t * src_image,
++ pixman_image_t * mask_image,
++ pixman_image_t * dst_image,
++ int32_t src_x,
++ int32_t src_y,
++ int32_t mask_x,
++ int32_t mask_y,
++ int32_t dest_x,
++ int32_t dest_y,
++ int32_t width,
++ int32_t height)
++{
++
++ uint32_t *src, *src_line;
++ uint32_t *dst, *dst_line;
++ uint8_t *mask, *mask_line;
++ int src_stride, mask_stride, dst_stride;
++ uint32_t m;
++ uint32_t s, d;
++ int32_t w;
++
++ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
++ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
++
++ while (height--)
++ {
++ src = src_line;
++ src_line += src_stride;
++ dst = dst_line;
++ dst_line += dst_stride;
++ mask = mask_line;
++ mask_line += mask_stride;
++
++ w = width;
++ while (w--)
++ {
++ m = *mask++;
++ if (m)
++ {
++ s = *src | 0xff000000;
++
++ if (m == 0xff)
++ {
++ *dst = s;
++ }
++ else
++ {
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%0,$f20)
++ load8888r(%1,$f22)
++ load8888r(%2,$f24)
++ expand_alpha($f22,$f26)
++ expand_alpha_rev($f24,$f28)
++ in_over($f22,$f26,$f28,$f20)
++ store8888r($f8,%0)
++ :"+r"(*dst):"r"(s),"r"(m):clobber
++ );
++
++// __m64 sa = expand_alpha (s);
++// __m64 vm = expand_alpha_rev (to_m64 (m));
++// __m64 vdest = in_over (s, sa, vm, load8888 (*dst));
++// *dst = store8888 (vdest);
++
++ }
++ }
++ src++;
++ dst++;
++ }
++ }
++}
++
++
++
++
++
++static void
++ls_composite_over_8888_8888 (pixman_implementation_t *imp,
++ pixman_op_t op,
++ pixman_image_t * src_image,
++ pixman_image_t * mask_image,
++ pixman_image_t * dst_image,
++ int32_t src_x,
++ int32_t src_y,
++ int32_t mask_x,
++ int32_t mask_y,
++ int32_t dest_x,
++ int32_t dest_y,
++ int32_t width,
++ int32_t height)
++{
++ uint32_t *dst_line, *dst;
++ uint32_t *src_line, *src;
++ uint32_t s;
++ int dst_stride, src_stride;
++ uint8_t a;
++ int32_t w;
++
++ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
++
++ while (height--)
++ {
++ dst = dst_line;
++ dst_line += dst_stride;
++ src = src_line;
++ src_line += src_stride;
++ w = width;
++
++ while (w--)
++ {
++ s = *src;
++ a = s >> 24;
++
++ if (a == 0xff)
++ {
++ *dst = s;
++ }
++ else if (s)
++ {
++
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%1,$f24)
++ load8888r(%0,$f20)
++ expand_alpha($f24,$f26)
++ over($f24,$f26,$f20)
++ store8888r($f8,%0)
++ :"+r"(*dst):"r"(*src):clobber
++ );
++ }
++ dst++;
++ src++;
++
++ }
++ }
++}
++
++
++static void
++ls_composite_over_8888_n_8888 (pixman_implementation_t *imp,
++ pixman_op_t op,
++ pixman_image_t * src_image,
++ pixman_image_t * mask_image,
++ pixman_image_t * dst_image,
++ int32_t src_x,
++ int32_t src_y,
++ int32_t mask_x,
++ int32_t mask_y,
++ int32_t dest_x,
++ int32_t dest_y,
++ int32_t width,
++ int32_t height)
++{
++ uint32_t *dst_line, *dst;
++ uint32_t *src_line, *src;
++ uint32_t mask;
++ __m64 vmask;
++ int dst_stride, src_stride;
++ int32_t w;
++ __m64 srca;
++
++ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
++
++ mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);
++ mask = mask | mask >> 8 | mask >> 16 | mask >> 24;
++
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888(%1,$f24)
++ store64a($f24,%0)
++ :"=m"(vmask):"m"(mask):clobber
++ );
++
++ srca = ls_4x00ff;
++
++ while (height--)
++ {
++ dst = dst_line;
++ dst_line += dst_stride;
++ src = src_line;
++ src_line += src_stride;
++ w = width;
++
++ while (w)
++ {
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%1,$f20)
++ load8888r(%0,$f22)
++ expand_alpha($f20,$f28)
++ in_over($f20,$f28,$f24,$f22)
++ store8888r($f8,%0)
++ :"+r"(*dst):"r"(*src):clobber
++ );
++
++ w--;
++ dst++;
++ src++;
++ }
++ }
++}
++
++static void
++ls_composite_over_n_8888 (pixman_implementation_t *imp,
++ pixman_op_t op,
++ pixman_image_t * src_image,
++ pixman_image_t * mask_image,
++ pixman_image_t * dst_image,
++ int32_t src_x,
++ int32_t src_y,
++ int32_t mask_x,
++ int32_t mask_y,
++ int32_t dest_x,
++ int32_t dest_y,
++ int32_t width,
++ int32_t height)
++{
++ uint32_t src;
++ uint32_t *dst_line, *dst;
++ int32_t w;
++ int dst_stride;
++ __m64 vsrc, vsrca;
++
++ src = _pixman_image_get_solid (src_image, dst_image->bits.format);
++
++ if (src == 0)
++ return;
++
++ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
++
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%2,$f24)
++ store64($f24,%0)
++ expand_alpha($f24,$f26)
++ store64($f26,%1)
++ :"=m"(vsrc), "=m"(vsrca):"r"(src):clobber
++ );
++
++ while (height--)
++ {
++ dst = dst_line;
++ dst_line += dst_stride;
++ w = width;
++
++ while (w)
++ {
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%0,$f28)
++ over($f24,$f26,$f28)
++ store8888r($f8,%0)
++ :"+r"(*dst)::clobber
++ );
++
++ w--;
++ dst++;
++ }
++ }
++}
++
++static void
++ls_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
++ pixman_op_t op,
++ pixman_image_t * src_image,
++ pixman_image_t * mask_image,
++ pixman_image_t * dst_image,
++ int32_t src_x,
++ int32_t src_y,
++ int32_t mask_x,
++ int32_t mask_y,
++ int32_t dest_x,
++ int32_t dest_y,
++ int32_t width,
++ int32_t height)
++{
++ uint32_t src, srca;
++ uint32_t *dst_line;
++ uint32_t *mask_line;
++ int dst_stride, mask_stride;
++ __m64 vsrc, vsrca;
++
++ src = _pixman_image_get_solid (src_image, dst_image->bits.format);
++
++ srca = src >> 24;
++ if (src == 0)
++ return;
++
++ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
++ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
++
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%2,$f24)
++ store64($f24,%0)
++ expand_alpha($f24,$f26)
++ store64($f26,%1)
++ :"=m"(vsrc), "=m"(vsrca):"r"(src):clobber
++ );
++
++ while (height--)
++ {
++ int twidth = width;
++ uint32_t *p = (uint32_t *)mask_line;
++ uint32_t *q = (uint32_t *)dst_line;
++
++ while (twidth)
++ {
++
++ if (*p)
++ {
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%0,$f28)
++ load8888r(%1,$f20)
++ in_over($f24,$f26,$f20,$f28)
++ store8888r($f8,%0)
++ :"+r"(*q):"r"(*p):clobber
++ );
++ }
++ twidth--;
++ p++;
++ q++;
++ }
++
++ dst_line += dst_stride;
++ mask_line += mask_stride;
++ }
++}
++
++
++static void
++ls_composite_over_n_8_8888 (pixman_implementation_t *imp,
++ pixman_op_t op,
++ pixman_image_t * src_image,
++ pixman_image_t * mask_image,
++ pixman_image_t * dst_image,
++ int32_t src_x,
++ int32_t src_y,
++ int32_t mask_x,
++ int32_t mask_y,
++ int32_t dest_x,
++ int32_t dest_y,
++ int32_t width,
++ int32_t height)
++{
++ uint32_t src, srca;
++ uint32_t *dst_line, *dst;
++ uint8_t *mask_line, *mask;
++ int dst_stride, mask_stride;
++ int32_t w;
++ __m64 vsrc, vsrca;
++ uint64_t srcsrc;
++
++ src = _pixman_image_get_solid (src_image, dst_image->bits.format);
++
++ srca = src >> 24;
++ if (src == 0)
++ return;
++
++ srcsrc = (uint64_t)src << 32 | src;
++
++ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
++ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
++
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%2,$f24)
++ store64a($f24,%0)
++ expand_alpha($f24,$f26)
++ store64a($f26,%1)
++ :"=m"(vsrc), "=m"(vsrca):"r"(src):clobber
++ );
++
++ while (height--)
++ {
++ dst = dst_line;
++ dst_line += dst_stride;
++ mask = mask_line;
++ mask_line += mask_stride;
++ w = width;
++
++ while (w)
++ {
++ uint32_t m = *mask;
++
++ if (m)
++ {
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%0,$f20)
++ load32r(%1,$f22)
++ expand_alpha_rev($f22,$f28)
++ in_over($f24,$f26,$f28,$f20)
++ store8888r($f8,%0)
++ :"+r"(*dst):"r"(m):clobber
++ );
++ }
++
++ w--;
++ mask++;
++ dst++;
++ }
++ }
++
++}
++
++static void
++ls_composite_over_x888_n_8888 (pixman_implementation_t *imp,
++ pixman_op_t op,
++ pixman_image_t * src_image,
++ pixman_image_t * mask_image,
++ pixman_image_t * dst_image,
++ int32_t src_x,
++ int32_t src_y,
++ int32_t mask_x,
++ int32_t mask_y,
++ int32_t dest_x,
++ int32_t dest_y,
++ int32_t width,
++ int32_t height)
++{
++ uint32_t *dst_line, *dst;
++ uint32_t *src_line, *src;
++ uint32_t mask;
++ __m64 vmask;
++ int dst_stride, src_stride;
++ int32_t w;
++ __m64 srca;
++
++ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
++ mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);
++
++ mask &= 0xff000000;
++ mask = mask | mask >> 8 | mask >> 16 | mask >> 24;
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%1,$f24)
++ store64a($f24,%0)
++ :"=m"(vmask):"r"(mask):clobber
++ );
++
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load64a(%1,$f26)
++ store64a($f26,%0)
++ :"=m"(srca):"m"(ls_4x00ff):clobber
++ );
++
++ while (height--)
++ {
++ dst = dst_line;
++ dst_line += dst_stride;
++ src = src_line;
++ src_line += src_stride;
++ w = width;
++
++ while (w)
++ {
++ uint32_t src_tmp = *src | 0xff000000;
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%1,$f20)
++ load8888r(%0,$f22)
++ in_over($f20,$f26,$f24,$f22)
++ store8888r($f8,%0)
++ :"+r"(*dst):"r"(src_tmp):clobber
++ );
++
++ w--;
++ dst++;
++ src++;
++ }
++ }
++}
++
++
++static void
++ls_composite_over_8888_0565 (pixman_implementation_t *imp,
++ pixman_op_t op,
++ pixman_image_t * src_image,
++ pixman_image_t * mask_image,
++ pixman_image_t * dst_image,
++ int32_t src_x,
++ int32_t src_y,
++ int32_t mask_x,
++ int32_t mask_y,
++ int32_t dest_x,
++ int32_t dest_y,
++ int32_t width,
++ int32_t height)
++{
++ uint16_t *dst_line, *dst;
++ uint32_t d;
++ uint32_t *src_line, *src, s;
++ uint8_t a;
++ int dst_stride, src_stride;
++ int32_t w;
++
++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
++ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
++
++ while (height--)
++ {
++ dst = dst_line;
++ dst_line += dst_stride;
++ src = src_line;
++ src_line += src_stride;
++ w = width;
++
++ while (w--)
++ {
++ s = *src++;
++ a = s >> 24;
++ if (s)
++ {
++ if (a == 0xff)
++ {
++ d = s;
++ }
++ else
++ {
++ d = *dst;
++ d = CONVERT_0565_TO_0888 (d);
++
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%1,$f24)
++ load8888r(%0,$f20)
++ expand_alpha($f24,$f26)
++ over($f24,$f26,$f20)
++ store8888r($f8,%0)
++ :"+r"(d):"r"(s):clobber
++ );
++
++
++ }
++ *dst = CONVERT_8888_TO_0565 (d);
++ }
++ dst++;
++ }
++ }
++}
++
++static void
++ls_composite_over_n_0565 (pixman_implementation_t *imp,
++ pixman_op_t op,
++ pixman_image_t * src_image,
++ pixman_image_t * mask_image,
++ pixman_image_t * dst_image,
++ int32_t src_x,
++ int32_t src_y,
++ int32_t mask_x,
++ int32_t mask_y,
++ int32_t dest_x,
++ int32_t dest_y,
++ int32_t width,
++ int32_t height)
++{
++ uint32_t src;
++ uint32_t d;
++ uint16_t *dst_line, *dst;
++ int32_t w;
++ int dst_stride;
++ __m64 vsrc, vsrca;
++
++ src = _pixman_image_get_solid (src_image, dst_image->bits.format);
++
++ if (src == 0)
++ return;
++
++ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%2,$f24)
++ store64a($f24,%0)
++ expand_alpha($f24,$f26)
++ store64a($f26,%1)
++ :"=m"(vsrc), "=m"(vsrca):"r"(src):clobber
++ );
++
++ while (height--)
++ {
++ dst = dst_line;
++ dst_line += dst_stride;
++ w = width;
++
++ while (w)
++ {
++
++ d = *dst;
++ d = CONVERT_0565_TO_0888 (d);
++
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%0,$f20)
++
++ over($f24,$f26,$f20)
++ store8888r($f8,%0)
++ :"+r"(d)::clobber
++ );
++
++ *dst = CONVERT_8888_TO_0565 (d);
++
++ w--;
++ dst++;
++ }
++ }
++}
++
++static void
++ls_composite_over_n_8_0565 (pixman_implementation_t *imp,
++ pixman_op_t op,
++ pixman_image_t * src_image,
++ pixman_image_t * mask_image,
++ pixman_image_t * dst_image,
++ int32_t src_x,
++ int32_t src_y,
++ int32_t mask_x,
++ int32_t mask_y,
++ int32_t dest_x,
++ int32_t dest_y,
++ int32_t width,
++ int32_t height)
++{
++ uint32_t src, srca, m, d;
++ uint16_t *dst_line, *dst;
++ uint8_t *mask_line, *mask;
++ int dst_stride, mask_stride;
++ int32_t w;
++ __m64 vsrc, vsrca;
++
++ src = _pixman_image_get_solid (src_image, dst_image->bits.format);
++
++ srca = src >> 24;
++ if (src == 0)
++ return;
++
++ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
++ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%2,$f24)
++ store64a($f24,%0)
++ expand_alpha($f24,$f26)
++ store64a($f26,%1)
++ :"=m"(vsrc), "=m"(vsrca):"r"(src):clobber
++ );
++
++ while (height--)
++ {
++ dst = dst_line;
++ dst_line += dst_stride;
++ mask = mask_line;
++ mask_line += mask_stride;
++ w = width;
++
++ while (w)
++ {
++ m = *mask;
++ d = *dst;
++
++ if (m)
++ {
++
++ d = CONVERT_0565_TO_0888 (d);
++
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%0,$f20)
++ load32r(%1,$f22)
++ expand_alpha_rev($f22,$f28)
++ in_over($f24,$f26,$f28,$f20)
++ store8888r($f8,%0)
++ :"+r"(d):"r"(m):clobber
++ );
++
++ *dst = CONVERT_8888_TO_0565 (d);
++
++ }
++
++ w--;
++ mask++;
++ dst++;
++ }
++ }
++}
++
++static void
++ls_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
++ pixman_op_t op,
++ pixman_image_t * src_image,
++ pixman_image_t * mask_image,
++ pixman_image_t * dst_image,
++ int32_t src_x,
++ int32_t src_y,
++ int32_t mask_x,
++ int32_t mask_y,
++ int32_t dest_x,
++ int32_t dest_y,
++ int32_t width,
++ int32_t height)
++{
++ uint32_t src, srca, m, d;
++ uint16_t *dst_line;
++ uint32_t *mask_line;
++ int dst_stride, mask_stride;
++ __m64 vsrc, vsrca;
++
++ src = _pixman_image_get_solid (src_image, dst_image->bits.format);
++
++ srca = src >> 24;
++ if (src == 0)
++ return;
++
++ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
++ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%2,$f24)
++ store64a($f24,%0)
++ expand_alpha($f24,$f26)
++ store64a($f26,%1)
++ :"=m"(vsrc), "=m"(vsrca):"r"(src):clobber
++ );
++
++ while (height--)
++ {
++ int twidth = width;
++ uint32_t *p = (uint32_t *)mask_line;
++ uint16_t *q = (uint16_t *)dst_line;
++
++ while (twidth)
++ {
++
++ m = *(uint32_t *)p;
++ d = *q;
++
++ if (m)
++ {
++
++ d = CONVERT_0565_TO_0888 (d);
++
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%0,$f20)
++ load8888r(%1,$f22)
++ in_over($f24,$f26,$f22,$f20)
++ store8888r($f8,%0)
++ :"+r"(d):"r"(m):clobber
++ );
++
++ *q = CONVERT_8888_TO_0565 (d);
++
++ }
++
++ twidth--;
++ p++;
++ q++;
++ }
++
++ mask_line += mask_stride;
++ dst_line += dst_stride;
++ }
++}
++static void
++ls_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
++ pixman_op_t op,
++ pixman_image_t * src_image,
++ pixman_image_t * mask_image,
++ pixman_image_t * dst_image,
++ int32_t src_x,
++ int32_t src_y,
++ int32_t mask_x,
++ int32_t mask_y,
++ int32_t dest_x,
++ int32_t dest_y,
++ int32_t width,
++ int32_t height)
++{
++ uint32_t *dst_line, *dst;
++ uint32_t *src_line, *src;
++ int dst_stride, src_stride;
++ int32_t w;
++
++ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
++
++#if 0
++ /* FIXME */
++ assert (src_image->drawable == mask_image->drawable);
++#endif
++
++ while (height--)
++ {
++ dst = dst_line;
++ dst_line += dst_stride;
++ src = src_line;
++ src_line += src_stride;
++ w = width;
++
++ while (w)
++ {
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%1,$f22)
++ load8888r(%0,$f20)
++ over_rev_non_pre($f22,$f20)
++ store8888r($f8,%0)
++ :"+r"(*dst):"r"(*src):clobber
++ );
++
++ w--;
++ dst++;
++ src++;
++ }
++ }
++}
++static void
++ls_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
++ pixman_op_t op,
++ pixman_image_t * src_image,
++ pixman_image_t * mask_image,
++ pixman_image_t * dst_image,
++ int32_t src_x,
++ int32_t src_y,
++ int32_t mask_x,
++ int32_t mask_y,
++ int32_t dest_x,
++ int32_t dest_y,
++ int32_t width,
++ int32_t height)
++{
++ uint16_t *dst_line, *dst;
++ uint32_t *src_line, *src, d;
++ int dst_stride, src_stride;
++ int32_t w;
++
++ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
++
++#if 0
++ /* FIXME */
++ assert (src_image->drawable == mask_image->drawable);
++#endif
++
++ while (height--)
++ {
++ dst = dst_line;
++ dst_line += dst_stride;
++ src = src_line;
++ src_line += src_stride;
++ w = width;
++
++ while (w)
++ {
++
++ d = *dst;
++ d = CONVERT_0565_TO_0888 (d);
++
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%1,$f20)
++ load8888r(%0,$f24)
++ over_rev_non_pre($f20,$f24)
++ store8888r($f8,%0)
++ :"+r"(d):"r"(*src):clobber
++ );
++
++ *dst = CONVERT_8888_TO_0565 (d);
++
++ w--;
++ dst++;
++ src++;
++ }
++ }
++}
++
++static void
++ls_composite_src_n_8_8888 (pixman_implementation_t *imp,
++ pixman_op_t op,
++ pixman_image_t * src_image,
++ pixman_image_t * mask_image,
++ pixman_image_t * dst_image,
++ int32_t src_x,
++ int32_t src_y,
++ int32_t mask_x,
++ int32_t mask_y,
++ int32_t dest_x,
++ int32_t dest_y,
++ int32_t width,
++ int32_t height)
++{
++ uint32_t src, srca;
++ uint32_t *dst_line, *dst, m;
++ uint8_t *mask_line, *mask;
++ int dst_stride, mask_stride;
++ int32_t w;
++ __m64 vsrc, vsrca;
++ uint64_t srcsrc;
++
++ src = _pixman_image_get_solid (src_image, dst_image->bits.format);
++
++ srca = src >> 24;
++ if (src == 0)
++ {
++ pixman_fill_ls (dst_image->bits.bits, dst_image->bits.rowstride,
++ PIXMAN_FORMAT_BPP (dst_image->bits.format),
++ dest_x, dest_y, width, height, 0);
++ return;
++ }
++
++ srcsrc = (uint64_t)src << 32 | src;
++
++ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
++ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
++
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load8888r(%2,$f24)
++ store64a($f24,%0)
++ expand_alpha($f24,$f26)
++ store64a($f26,%1)
++ :"=m"(vsrc), "=m"(vsrca):"r"(src):clobber
++ );
++ while (height--)
++ {
++ dst = dst_line;
++ dst_line += dst_stride;
++ mask = mask_line;
++ mask_line += mask_stride;
++ w = width;
++
++ while (w)
++ {
++ m = *mask;
++
++ if (m)
++ {
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ load32r(%1,$f20)
++ expand_alpha_rev($f20,$f28)
++ in($f24,$f28)
++ store8888r($f8,%0)
++ :"=r"(*dst):"r"(m):clobber
++ );
++
++ }
++ else
++ {
++ *dst = 0;
++ }
++
++ w--;
++ mask++;
++ dst++;
++ }
++ }
++}
+diff -urN pixman//pixman/pixman-cpu.c Pixman.Loongson//pixman/pixman-cpu.c
+--- pixman//pixman/pixman-cpu.c 2010-12-25 18:46:00.073234000 +0800
++++ Pixman.Loongson//pixman/pixman-cpu.c 2010-12-25 18:39:15.360337000 +0800
+@@ -579,7 +579,9 @@
+ if (pixman_have_mmx ())
+ return _pixman_implementation_create_mmx ();
+ #endif
+-
++#ifdef USE_LS
++ return _pixman_implementation_create_ls ();
++#endif
+ #ifdef USE_ARM_NEON
+ if (pixman_have_arm_neon ())
+ return _pixman_implementation_create_arm_neon ();
+diff -urN pixman//pixman/pixman-ls.c Pixman.Loongson//pixman/pixman-ls.c
+--- pixman//pixman/pixman-ls.c 1970-01-01 08:00:00.000000000 +0800
++++ Pixman.Loongson//pixman/pixman-ls.c 2010-12-25 18:39:15.386759000 +0800
+@@ -0,0 +1,538 @@
++/*
++* Based on pixman-mmx.c
++* Implemented for loongson 2F only.
++* Free software based on GPL licence.
++* Copyright 2010 WG Ge.
++*/
++
++#ifdef HAVE_CONFIG_H
++#include <config.h>
++#endif
++#include <stdlib.h>
++#include <string.h>
++#include <math.h>
++#include <limits.h>
++#include <stdio.h>
++#include "pixman-private.h"
++#include "pixman-combine32.h"
++#include "primitive.h"
++
++#define __m64 __attribute__ ((aligned (8))) uint64_t
++#define DECLARE_ALIGNED(n,t,v) t __attribute__ ((aligned (n))) v
++#define DECLARE_ALIGNED_8(t, v, ...) DECLARE_ALIGNED(8, t, v)
++
++DECLARE_ALIGNED_8 (const uint64_t, ls_4x00ff ) = 0x00ff00ff00ff00ffULL;
++DECLARE_ALIGNED_8 (const uint64_t, ls_4x0080 ) = 0x0080008000800080ULL;
++DECLARE_ALIGNED_8 (const uint64_t, ls_565_rgb ) = 0x000001f0003f001fULL;
++DECLARE_ALIGNED_8 (const uint64_t, ls_565_unpack_multiplier ) = 0x0000008404100840ULL;
++DECLARE_ALIGNED_8 (const uint64_t, ls_565_r ) = 0x000000f800000000ULL;
++DECLARE_ALIGNED_8 (const uint64_t, ls_565_g ) = 0x0000000000fc0000ULL;
++DECLARE_ALIGNED_8 (const uint64_t, ls_565_b ) = 0x00000000000000f8ULL;
++DECLARE_ALIGNED_8 (const uint64_t, ls_mask_0 ) = 0xffffffffffff0000ULL;
++DECLARE_ALIGNED_8 (const uint64_t, ls_mask_1 ) = 0xffffffff0000ffffULL;
++DECLARE_ALIGNED_8 (const uint64_t, ls_mask_2 ) = 0xffff0000ffffffffULL;
++DECLARE_ALIGNED_8 (const uint64_t, ls_mask_3 ) = 0x0000ffffffffffffULL;
++DECLARE_ALIGNED_8 (const uint64_t, ls_full_alpha ) = 0x00ff000000000000ULL;
++DECLARE_ALIGNED_8 (const uint64_t, ls_ffff0000ffff0000 ) = 0xffff0000ffff0000ULL;
++DECLARE_ALIGNED_8 (const uint64_t, ls_0000ffff00000000 ) = 0x0000ffff00000000ULL;
++DECLARE_ALIGNED_8 (const uint64_t, ls_000000000000ffff ) = 0x000000000000ffffULL;
++
++
++pixman_bool_t
++pixman_fill_ls (uint32_t *bits,
++ int stride,
++ int bpp,
++ int x,
++ int y,
++ int width,
++ int height,
++ uint32_t xor)
++{
++ uint64_t fill;
++ uint32_t byte_width;
++ uint8_t *byte_line;
++
++
++
++ if (bpp != 16 && bpp != 32 && bpp != 8)
++ return FALSE;
++
++ if (bpp == 8)
++ {
++ stride = stride * (int) sizeof (uint32_t) / 1;
++ byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x);
++ byte_width = width;
++ stride *= 1;
++ xor = (xor & 0xff) * 0x01010101;
++ }
++ else if (bpp == 16)
++ {
++ stride = stride * (int) sizeof (uint32_t) / 2;
++ byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
++ byte_width = 2 * width;
++ stride *= 2;
++ xor = (xor & 0xffff) * 0x00010001;
++ }
++ else
++ {
++ stride = stride * (int) sizeof (uint32_t) / 4;
++ byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
++ byte_width = 4 * width;
++ stride *= 4;
++ }
++
++ fill = ((uint64_t)xor << 32) | xor;
++
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ "ldc1 $f24, %0 \n\t"
++ ::"m"(fill):"$f24"
++ );
++ while (height--)
++ {
++ int w;
++ uint8_t *d = byte_line;
++
++ byte_line += stride;
++ w = byte_width;
++
++ while (w >= 1 && ((unsigned long)d & 1))
++ {
++ *(uint8_t *)d = (xor & 0xff);
++ w--;
++ d++;
++ }
++
++ while (w >= 2 && ((unsigned long)d & 3))
++ {
++ *(uint16_t *)d = xor;
++ w -= 2;
++ d += 2;
++ }
++
++ while (w >= 4 && ((unsigned long)d & 7))
++ {
++ *(uint32_t *)d = xor;
++
++ w -= 4;
++ d += 4;
++ }
++
++ while (w >= 64)
++ {
++
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ "dmfc1 $8, $f24 \n\t"
++ "sd $8 , (%0) \n\t"
++ "sd $8 , 8(%0) \n\t"
++ "sd $8 , 16(%0) \n\t"
++ "sd $8 , 24(%0) \n\t"
++ "sd $8 , 32(%0) \n\t"
++ "sd $8 , 40(%0) \n\t"
++ "sd $8 , 48(%0) \n\t"
++ "sd $8 , 56(%0) \n\t"
++ ::"r"(d):"$8","memory","$f24"
++ );
++ w -= 64;
++ d += 64;
++ }
++
++ while (w >= 4)
++ {
++ *(uint32_t *)d = xor;
++
++ w -= 4;
++ d += 4;
++ }
++ while (w >= 2)
++ {
++ *(uint16_t *)d = xor;
++ w -= 2;
++ d += 2;
++ }
++ while (w >= 1)
++ {
++ *(uint8_t *)d = (xor & 0xff);
++ w--;
++ d++;
++ }
++
++ }
++ return TRUE;
++}
++
++static pixman_bool_t
++pixman_blt_ls (uint32_t *src_bits,
++ uint32_t *dst_bits,
++ int src_stride,
++ int dst_stride,
++ int src_bpp,
++ int dst_bpp,
++ int src_x,
++ int src_y,
++ int dst_x,
++ int dst_y,
++ int width,
++ int height)
++{
++ uint8_t * src_bytes;
++ uint8_t * dst_bytes;
++ int byte_width;
++
++ if (src_bpp != dst_bpp)
++ return FALSE;
++
++ if (src_bpp == 16)
++ {
++ src_stride = src_stride * (int) sizeof (uint32_t) / 2;
++ dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
++ src_bytes = (uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
++ dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
++ byte_width = 2 * width;
++ src_stride *= 2;
++ dst_stride *= 2;
++ }
++ else if (src_bpp == 32)
++ {
++ src_stride = src_stride * (int) sizeof (uint32_t) / 4;
++ dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
++ src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
++ dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
++ byte_width = 4 * width;
++ src_stride *= 4;
++ dst_stride *= 4;
++ }
++ else
++ {
++ return FALSE;
++ }
++
++ while (height--)
++ {
++ int w;
++ uint8_t *s = src_bytes;
++ uint8_t *d = dst_bytes;
++ src_bytes += src_stride;
++ dst_bytes += dst_stride;
++ w = byte_width;
++
++ while (w >= 2 && ((unsigned long)d & 3))
++ {
++ *(uint16_t *)d = *(uint16_t *)s;
++ w -= 2;
++ s += 2;
++ d += 2;
++ }
++
++ while (w >= 4 && ((unsigned long)d & 7))
++ {
++ *(uint32_t *)d = *(uint32_t *)s;
++
++ w -= 4;
++ s += 4;
++ d += 4;
++ }
++ if ((unsigned long)s & 7)
++{
++ while (w >= 64)
++ {
++
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ "uld $8 , (%1) \n\t"
++ "uld $9 , 8(%1) \n\t"
++ "uld $10, 16(%1) \n\t"
++ "uld $11, 24(%1) \n\t"
++ "sd $8 , (%0) \n\t"
++ "sd $9 , 8(%0) \n\t"
++ "sd $10, 16(%0) \n\t"
++ "sd $11, 24(%0) \n\t"
++
++ "uld $8 , 32(%1) \n\t"
++ "uld $9 , 40(%1) \n\t"
++ "uld $10, 48(%1) \n\t"
++ "uld $11, 56(%1) \n\t"
++ "sd $8 , 32(%0) \n\t"
++ "sd $9 , 40(%0) \n\t"
++ "sd $10, 48(%0) \n\t"
++ "sd $11, 56(%0) \n\t"
++ ::"r"(d),"r"(s):"$8","$9","$10","$11","memory"
++ );
++ w -= 64;
++ s += 64;
++ d += 64;
++ }
++}
++else
++{
++ while (w >= 64)
++ {
++
++ __asm__ volatile (
++ ".set arch=loongson2f \n\t"
++ "ld $8 , (%1) \n\t"
++ "ld $9 , 8(%1) \n\t"
++ "ld $10, 16(%1) \n\t"
++ "ld $11, 24(%1) \n\t"
++ "sd $8 , (%0) \n\t"
++ "sd $9 , 8(%0) \n\t"
++ "sd $10, 16(%0) \n\t"
++ "sd $11, 24(%0) \n\t"
++
++ "ld $8 , 32(%1) \n\t"
++ "ld $9 , 40(%1) \n\t"
++ "ld $10, 48(%1) \n\t"
++ "ld $11, 56(%1) \n\t"
++ "sd $8 , 32(%0) \n\t"
++ "sd $9 , 40(%0) \n\t"
++ "sd $10, 48(%0) \n\t"
++ "sd $11, 56(%0) \n\t"
++ ::"r"(d),"r"(s):"$8","$9","$10","$11","memory"
++ );
++ w -= 64;
++ s += 64;
++ d += 64;
++ }
++}
++
++ while (w >= 4)
++ {
++ *(uint32_t *)d = *(uint32_t *)s;
++
++ w -= 4;
++ s += 4;
++ d += 4;
++ }
++ if (w >= 2)
++ {
++ *(uint16_t *)d = *(uint16_t *)s;
++ w -= 2;
++ s += 2;
++ d += 2;
++ }
++ }
++ return TRUE;
++}
++
++
++#include "pixman-composite-ls.c"
++#include "pixman-combine-ls.c"
++
++static pixman_bool_t
++ls_blt (pixman_implementation_t *imp,
++ uint32_t * src_bits,
++ uint32_t * dst_bits,
++ int src_stride,
++ int dst_stride,
++ int src_bpp,
++ int dst_bpp,
++ int src_x,
++ int src_y,
++ int dst_x,
++ int dst_y,
++ int width,
++ int height)
++{
++ if (!pixman_blt_ls (
++ src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
++ src_x, src_y, dst_x, dst_y, width, height))
++ {
++ return _pixman_implementation_blt (
++ imp->delegate,
++ src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
++ src_x, src_y, dst_x, dst_y, width, height);
++ }
++
++ return TRUE;
++}
++
++static pixman_bool_t
++ls_fill (pixman_implementation_t *imp,
++ uint32_t * bits,
++ int stride,
++ int bpp,
++ int x,
++ int y,
++ int width,
++ int height,
++ uint32_t xor)
++{
++ if (!pixman_fill_ls (bits, stride, bpp, x, y, width, height, xor))
++ {
++ return _pixman_implementation_fill (
++ imp->delegate, bits, stride, bpp, x, y, width, height, xor);
++ }
++
++ return TRUE;
++}
++
++static void
++ls_composite_copy_area (pixman_implementation_t *imp,
++ pixman_op_t op,
++ pixman_image_t * src_image,
++ pixman_image_t * mask_image,
++ pixman_image_t * dst_image,
++ int32_t src_x,
++ int32_t src_y,
++ int32_t mask_x,
++ int32_t mask_y,
++ int32_t dest_x,
++ int32_t dest_y,
++ int32_t width,
++ int32_t height)
++{
++ pixman_blt_ls (src_image->bits.bits,
++ dst_image->bits.bits,
++ src_image->bits.rowstride,
++ dst_image->bits.rowstride,
++ PIXMAN_FORMAT_BPP (src_image->bits.format),
++ PIXMAN_FORMAT_BPP (dst_image->bits.format),
++ src_x, src_y, dest_x, dest_y, width, height);
++}
++
++
++static const pixman_fast_path_t ls_fast_paths[] =
++{
++
++//these are implemented so far
++#if 1
++ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, ls_composite_over_x888_8_8888 ),
++ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, ls_composite_over_x888_8_8888 ),
++ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, ls_composite_over_x888_8_8888 ),
++ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, ls_composite_over_x888_8_8888 ),
++#endif
++
++#if 1
++//over_8888_0565 significant perf improvement, slight better L1, L2, 30% better RT
++ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, ls_composite_over_8888_0565 ),
++ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, ls_composite_over_8888_0565 ),
++ PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, r5g6b5, ls_composite_over_pixbuf_0565 ),
++ PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, b5g6r5, ls_composite_over_pixbuf_0565 ),
++
++//big improvement some closing 100%
++ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, ls_composite_over_n_8888_0565_ca ),
++ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, ls_composite_over_n_8888_0565_ca ),
++ PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, ls_composite_over_n_8_0565 ),
++ PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, ls_composite_over_n_8_0565 ),
++ PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, ls_composite_over_n_0565 ),
++
++//ubalbe to bench with lowlevel bench, believe it is a gain in perf
++ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, a8r8g8b8, ls_composite_over_x888_n_8888 ),
++ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, x8r8g8b8, ls_composite_over_x888_n_8888 ),
++ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, a8b8g8r8, ls_composite_over_x888_n_8888 ),
++ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, x8b8g8r8, ls_composite_over_x888_n_8888 ),
++
++//performance regress 30% in L1,L2, but significant improvement in RT
++ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, ls_composite_over_8888_8888 ),
++ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, ls_composite_over_8888_8888 ),
++ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, ls_composite_over_8888_8888 ),
++ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, ls_composite_over_8888_8888 ),
++ PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, a8r8g8b8, ls_composite_over_pixbuf_8888 ),
++ PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, x8r8g8b8, ls_composite_over_pixbuf_8888 ),
++ PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, a8b8g8r8, ls_composite_over_pixbuf_8888 ),
++ PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, x8b8g8r8, ls_composite_over_pixbuf_8888 ),
++
++//same performance in L1,L2, but significant improvement in RT 30-40%
++ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, ls_composite_over_8888_n_8888 ),
++ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, ls_composite_over_8888_n_8888 ),
++ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, ls_composite_over_8888_n_8888 ),
++ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, ls_composite_over_8888_n_8888 ),
++
++//significant perf improvement 20%
++ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, ls_composite_over_n_8_8888 ),
++ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, ls_composite_over_n_8_8888 ),
++ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, ls_composite_over_n_8_8888 ),
++ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, ls_composite_over_n_8_8888 ),
++
++//3 times perf improvements
++ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, ls_composite_over_n_8888_8888_ca ),
++ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, ls_composite_over_n_8888_8888_ca ),
++ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, ls_composite_over_n_8888_8888_ca ),
++ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, ls_composite_over_n_8888_8888_ca ),
++
++//significant performance boost
++ PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, ls_composite_over_n_8888 ),
++ PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, ls_composite_over_n_8888 ),
++//simple add, expect better perf in generic code
++// PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, ls_composite_add_8888_8888 ),
++// PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, ls_composite_add_8888_8888 ),
++
++// FIXME: Copy memory are not better than geneic code
++#if 0
++ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, ls_composite_copy_area ),
++ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, ls_composite_copy_area ),
++ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, ls_composite_copy_area ),
++ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, ls_composite_copy_area ),
++ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, ls_composite_copy_area ),
++ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, ls_composite_copy_area ),
++ PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, ls_composite_copy_area ),
++ PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, ls_composite_copy_area ),
++ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, ls_composite_copy_area ),
++ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, ls_composite_copy_area ),
++#endif
++
++//significant improvement
++ PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, ls_composite_src_n_8_8888 ),
++ PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, ls_composite_src_n_8_8888 ),
++ PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, ls_composite_src_n_8_8888 ),
++ PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, ls_composite_src_n_8_8888 ),
++
++#endif
++
++//these are not yet implemented
++
++#if 0
++
++ PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, ls_composite_add_8000_8000 ),
++ PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, ls_composite_add_n_8_8 ),
++ PIXMAN_STD_FAST_PATH (IN, a8, null, a8, ls_composite_in_8_8 ),
++ PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, ls_composite_in_n_8_8 ),
++#endif
++
++
++ { PIXMAN_OP_NONE },
++};
++
++pixman_implementation_t *
++_pixman_implementation_create_ls (void)
++{
++ pixman_implementation_t *general = _pixman_implementation_create_fast_path ();
++ pixman_implementation_t *imp = _pixman_implementation_create (general, ls_fast_paths);
++
++//Turned on but unable to benchmark.
++#if 1
++ imp->combine_32[PIXMAN_OP_OVER] = ls_combine_over_u;
++ imp->combine_32[PIXMAN_OP_OVER_REVERSE] = ls_combine_over_reverse_u;
++ imp->combine_32[PIXMAN_OP_IN] = ls_combine_in_u;
++ imp->combine_32[PIXMAN_OP_IN_REVERSE] = ls_combine_in_reverse_u;
++ imp->combine_32[PIXMAN_OP_OUT] = ls_combine_out_u;
++ imp->combine_32[PIXMAN_OP_OUT_REVERSE] = ls_combine_out_reverse_u;
++ imp->combine_32[PIXMAN_OP_ATOP] = ls_combine_atop_u;
++ imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = ls_combine_atop_reverse_u;
++ imp->combine_32[PIXMAN_OP_XOR] = ls_combine_xor_u;
++ imp->combine_32[PIXMAN_OP_ADD] = ls_combine_add_u;
++ imp->combine_32[PIXMAN_OP_SATURATE] = ls_combine_saturate_u;
++
++ imp->combine_32_ca[PIXMAN_OP_SRC] = ls_combine_src_ca;
++ imp->combine_32_ca[PIXMAN_OP_OVER] = ls_combine_over_ca;
++ imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = ls_combine_over_reverse_ca;
++ imp->combine_32_ca[PIXMAN_OP_IN] = ls_combine_in_ca;
++ imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = ls_combine_in_reverse_ca;
++ imp->combine_32_ca[PIXMAN_OP_OUT] = ls_combine_out_ca;
++ imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = ls_combine_out_reverse_ca;
++ imp->combine_32_ca[PIXMAN_OP_ATOP] = ls_combine_atop_ca;
++ imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = ls_combine_atop_reverse_ca;
++ imp->combine_32_ca[PIXMAN_OP_XOR] = ls_combine_xor_ca;
++ imp->combine_32_ca[PIXMAN_OP_ADD] = ls_combine_add_ca;
++#endif
++
++//FIXME blt and fill not shown better perf than geneic code
++#if 0
++ imp->blt = ls_blt;
++ imp->fill = ls_fill;
++#endif
++
++ return imp;
++}
++
+diff -urN pixman//pixman/pixman-private.h Pixman.Loongson//pixman/pixman-private.h
+--- pixman//pixman/pixman-private.h 2010-12-25 18:46:00.102841000 +0800
++++ Pixman.Loongson//pixman/pixman-private.h 2010-12-25 18:39:15.401808000 +0800
+@@ -493,6 +493,11 @@
+ pixman_implementation_t *
+ _pixman_implementation_create_fast_path (void);
+
++#ifdef USE_LS
++pixman_implementation_t *
++_pixman_implementation_create_ls (void);
++#endif
++
+ #ifdef USE_MMX
+ pixman_implementation_t *
+ _pixman_implementation_create_mmx (void);
+diff -urN pixman//pixman/primitive.h Pixman.Loongson//pixman/primitive.h
+--- pixman//pixman/primitive.h 1970-01-01 08:00:00.000000000 +0800
++++ Pixman.Loongson//pixman/primitive.h 2010-12-25 18:39:15.457084000 +0800
+@@ -0,0 +1,214 @@
++/*
++* MMX register usage protocal
++* return result: f8
++* tmp immediate f12
++* tmp register in primtive f14 f16 f18
++* tmp register in pixman f0,f4,f6,f10,f20,f22,
++* globals in function f24, f26, f28,f30
++* Exceptions for load and store:
++* load will specify dest FPR register
++* store will specify src FPR register
++* expand_alpha(_rev) implemented with GPR, dest FPR as the 2nd parameter
++*
++* Special alert: don't use return result $f8 as input, it might be overwritten
++*/
++
++
++/*primitive macros */
++
++#define clobber "$8","$9","$f0","$f2","$f8",\
++ "$f12","$f14","$f16","$f18","$f20",\
++ "$f22","$f24","$f26","$f28","$f30"
++
++#define DMTC1_IMM(regc1,imm) \
++ "dli $8, "#imm" \n\t" \
++ "dmtc1 $8, "#regc1" \n\t"
++
++#define MTC1_IMM(regc1,imm) \
++ "li $8, "#imm" \n\t" \
++ "dmtc1 $8, "#regc1" \n\t"
++
++
++#define save_to(reg1) "mov.d "#reg1", $f8 \n\t"
++#define zero(reg1) "xor "#reg1","#reg1","#reg1" \n\t"
++
++#define load32(sp,reg1) \
++ "ulw $8, "#sp" \n\t" \
++ "dmtc1 $8, "#reg1" \n\t"
++
++#define load32a(sp,reg1) \
++ "lw $8, "#sp" \n\t" \
++ "dmtc1 $8, "#reg1" \n\t"
++
++#define load32r(sp,reg1) \
++ "dmtc1 "#sp", "#reg1" \n\t"
++
++#define load64(sp,reg1) \
++ "uld $8, "#sp" \n\t" \
++ "dmtc1 $8, "#reg1" \n\t"
++
++#define load64a(sp,reg1) \
++ "ld $8, "#sp" \n\t" \
++ "dmtc1 $8, "#reg1" \n\t"
++
++
++#define store32(reg1,sp) \
++ "dmfc1 $8, "#reg1" \n\t" \
++ "usw $8, "#sp" \n\t"
++
++#define store32r(reg1,sp) \
++ "dmfc1 "#sp", "#reg1" \n\t"
++
++#define store32a(reg1,sp) \
++ "swc1 "#reg1", "#sp" \n\t"
++
++#define store64(reg1,sp) \
++ "dmfc1 $8, "#reg1" \n\t" \
++ "usd $8, "#sp" \n\t"
++
++#define store64a(reg1,sp) \
++ "sdc1 "#reg1", "#sp" \n\t"
++
++#define load8888(sp,reg1) \
++ load64(sp,reg1) \
++ "xor $f12, $f12, $f12 \n\t" \
++ "punpcklbh "#reg1", "#reg1", $f12 \n\t"
++
++#define load8888r(sp,reg1) \
++ load32r(sp,reg1) \
++ "xor $f12, $f12, $f12 \n\t" \
++ "punpcklbh "#reg1", "#reg1", $f12 \n\t"
++
++#define load8888a(sp,reg1) \
++ load64a(sp,reg1) \
++ "xor $f12, $f12, $f12 \n\t" \
++ "punpcklbh "#reg1", "#reg1", $f12 \n\t"
++
++#define load8888ah(sp,reg1) \
++ load64a(sp,reg1) \
++ "xor $f12, $f12, $f12 \n\t" \
++ "punpckhbh "#reg1", "#reg1", $f12 \n\t"
++
++#define store8888(reg1,sp) \
++ "xor $f12, $f12, $f12 \n\t" \
++ "packushb "#reg1", "#reg1", $f12 \n\t" \
++ store64(reg1,sp)
++
++#define store8888r(reg1,sp) \
++ "xor $f12, $f12, $f12 \n\t" \
++ "packushb "#reg1", "#reg1", $f12 \n\t" \
++ store32r(reg1,sp)
++
++#define store8888a(reg1,sp) \
++ "xor $f12, $f12, $f12 \n\t" \
++ "packushb "#reg1", "#reg1", $f12 \n\t" \
++ store64a(reg1,sp)
++
++#define pack8888(reg1,reg2) \
++ "packushb $f8, "#reg1","#reg2" \n\t"
++
++#define unpack8888(reg1,reg2) \
++ "punpcklbh $f8, "#reg1","#reg2" \n\t"
++
++
++#define negate(sreg,dreg) \
++ DMTC1_IMM($f12, 0x00ff00ff00ff00ff)\
++ "xor "#dreg", "#sreg", $f12 \n\t"
++
++#define pix_add(reg1,reg2) \
++ "paddusb $f8, "#reg1", "#reg2" \n\t"
++
++#define pix_multiply(reg1,reg2) \
++ "pmullh $f14, "#reg1", "#reg2" \n\t " \
++ DMTC1_IMM($f12, 0x0080008000800080) \
++ "paddush $f14, $f14, $f12 \n\t "\
++ MTC1_IMM($f12, 8) \
++ "psrlh $f16, $f14, $f12 \n\t" \
++ "paddush $f14, $f14, $f16 \n\t" \
++ "psrlh $f8, $f14, $f12 \n\t"
++
++#define pix_add_mul(reg1,reg2,reg3,reg4) \
++ pix_multiply(reg1,reg2) \
++ "mov.d $f18, $f8 \n\t" \
++ pix_multiply(reg3,reg4) \
++ pix_add($f18,$f8)
++
++#define expand_alpha(sreg,dreg) \
++ "dmfc1 $8, "#sreg" \n\t" \
++ "dsrl32 $8, $8, 16 \n\t" \
++ "dsll $9, $8, 16 \n\t" \
++ "or $8, $8, $9 \n\t" \
++ "dsll32 $9, $8, 0 \n\t" \
++ "or $8, $8, $9 \n\t" \
++ "dmtc1 $8, "#dreg" \n\t"
++
++#define expand_alpha_rev(sreg,dreg)\
++ "dmfc1 $8, "#sreg" \n\t" \
++ "dsll32 $8, $8, 16 \n\t" \
++ "dsrl32 $8, $8, 16 \n\t" \
++ "dsll $9, $8, 16 \n\t" \
++ "or $8, $8, $9 \n\t" \
++ "dsll32 $9, $8, 0 \n\t" \
++ "or $8, $8, $9 \n\t" \
++ "dmtc1 $8, "#dreg" \n\t"
++
++#define expand8888(reg1,pos) expand8888_##pos(reg1)
++
++#define expand8888_0(reg1) \
++ "xor $f12, $f12, $f12 \n\t" \
++ "punpcklbh $f8, "#reg1", $f12 \n\t"
++
++#define expand8888_1(reg1) \
++ "xor $f12, $f12, $f12 \n\t" \
++ "punpckhbh $f8, "#reg1", $f12 \n\t"
++
++#define expandx888(reg1,pos) \
++ expand8888(reg1,pos) \
++ DMTC1_IMM($f12, 0x00ff000000000000) \
++ "or $f8, $f8, $f12 \n\t"
++
++#define invert_colors(reg1) \
++ DMTC1_IMM($f12, 0xffff0000ffff0000) \
++ "and $f14, "#reg1", $f12 \n\t" \
++ DMTC1_IMM($f12, 0x000000000000ffff) \
++ "and $f16, "#reg1", $f12 \n\t" \
++ DMTC1_IMM($f12, 0x0000ffff00000000) \
++ "and $f18, "#reg1", $f12 \n\t" \
++ MTC1_IMM($f12, 32) \
++ "dsll $f16, $f16, $f12 \n\t" \
++ "dsrl $f18, $f18, $f12 \n\t" \
++ "or $f14, $f14, $f16 \n\t" \
++ "or $f8, $f14, $f18 \n\t"
++
++#define over(reg1,reg2,reg3) \
++ negate(reg2,$f8) \
++ pix_multiply(reg3, $f8)\
++ pix_add(reg1, $f8)
++
++
++#define over_rev_non_pre(reg1,reg2) \
++ expand_alpha(reg1,$f0) \
++ DMTC1_IMM($f12,0x00ff000000000000) \
++ "or $f2, $f0, $f12 \n\t" \
++ invert_colors(reg1) \
++ pix_multiply($f8,$f2) \
++ save_to($f2) \
++ over($f2, $f0, reg2)
++
++#define in(reg1,reg2) pix_multiply(reg1,reg2)
++
++#define in_over_full_src_alpha(reg1,reg2,reg3) \
++ DMTC1_IMM($f12,0x00ff000000000000) \
++ "or $f0, "#reg1", $f12 \n\t" \
++ in($f0,reg2) \
++ save_to($f0) \
++ over($f0,reg2,reg3)
++
++#define in_over(reg1,reg2,reg3,reg4) \
++ in(reg1,reg3) \
++ "mov.d $f0, $f8 \n\t" \
++ pix_multiply(reg2,reg3) \
++ "mov.d $f2, $f8 \n\t" \
++ over($f0,$f2,reg4)
++
++
diff --git a/x11-libs/pixman/pixman-9999.ebuild b/x11-libs/pixman/pixman-9999.ebuild
new file mode 100644
index 0000000..d1c4cc1
--- /dev/null
+++ b/x11-libs/pixman/pixman-9999.ebuild
@@ -0,0 +1,50 @@
+# Copyright 1999-2009 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+# $Header: $
+
+EAPI=3
+inherit xorg-2 toolchain-funcs versionator
+
+EGIT_REPO_URI="git://anongit.freedesktop.org/git/pixman"
+DESCRIPTION="Low-level pixel manipulation routines"
+
+KEYWORDS="~alpha ~amd64 ~arm ~hppa ~ia64 ~mips ~ppc ~ppc64 ~s390 ~sh ~sparc ~x86 ~x86-fbsd"
+IUSE="altivec mmx sse2"
+
+pkg_setup() {
+ xorg-2_pkg_setup
+ CONFIGURE_OPTIONS="
+ $(use_enable altivec vmx)
+ --disable-gtk"
+
+ local enable_mmx="$(use mmx && echo 1 || echo 0)"
+ local enable_sse2="$(use sse2 && echo 1 || echo 0)"
+
+ # this block fixes bug #260287
+ if use x86; then
+ if use sse2 && ! $(version_is_at_least "4.2" "$(gcc-version)"); then
+ ewarn "SSE2 instructions require GCC 4.2 or higher."
+ ewarn "pixman will be built *without* SSE2 support"
+ enable_sse2="0"
+ fi
+ fi
+
+ # this block fixes bug #236558
+ case "$enable_mmx,$enable_sse2" in
+ '1,1')
+ CONFIGURE_OPTIONS="${CONFIGURE_OPTIONS} --enable-mmx --enable-sse2" ;;
+ '1,0')
+ CONFIGURE_OPTIONS="${CONFIGURE_OPTIONS} --enable-mmx --disable-sse2" ;;
+ '0,1')
+ ewarn "You enabled SSE2 but have MMX disabled. This is an invalid."
+ ewarn "pixman will be built *without* MMX/SSE2 support."
+ CONFIGURE_OPTIONS="${CONFIGURE_OPTIONS} --disable-mmx --disable-sse2" ;;
+ '0,0')
+ CONFIGURE_OPTIONS="${CONFIGURE_OPTIONS} --disable-mmx --disable-sse2" ;;
+ esac
+}
+
+src_prepare() {
+ epatch "${FILESDIR}/${PN}-loongson2f.patch"
+ xorg-2_src_prepare
+}