aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'sci-libs/superlu_mt')
-rw-r--r--sci-libs/superlu_mt/ChangeLog11
-rw-r--r--sci-libs/superlu_mt/Manifest1
-rw-r--r--sci-libs/superlu_mt/files/superlu_mt-2.1-duplicate-symbols.patch900
-rw-r--r--sci-libs/superlu_mt/files/superlu_mt-2.1-missing-includes.patch44
-rw-r--r--sci-libs/superlu_mt/metadata.xml23
-rw-r--r--sci-libs/superlu_mt/superlu_mt-2.1.ebuild105
6 files changed, 1084 insertions, 0 deletions
diff --git a/sci-libs/superlu_mt/ChangeLog b/sci-libs/superlu_mt/ChangeLog
new file mode 100644
index 000000000..f6d1810dd
--- /dev/null
+++ b/sci-libs/superlu_mt/ChangeLog
@@ -0,0 +1,11 @@
+# ChangeLog for sci-libs/superlu_mt
+# Copyright 1999-2013 Gentoo Foundation; Distributed under the GPL v2
+# $Header: $
+
+*superlu_mt-2.1 (15 Jul 2013)
+
+ 15 Jul 2013; Sébastien Fabbro <bicatali@gentoo.org>
+ +files/superlu_mt-2.1-duplicate-symbols.patch,
+ +files/superlu_mt-2.1-missing-includes.patch, +metadata.xml,
+ +superlu_mt-2.1.ebuild:
+ sci-libs/superlu_mt: Initial import
diff --git a/sci-libs/superlu_mt/Manifest b/sci-libs/superlu_mt/Manifest
new file mode 100644
index 000000000..929cbbd3a
--- /dev/null
+++ b/sci-libs/superlu_mt/Manifest
@@ -0,0 +1 @@
+DIST superlu_mt_2.1.tar.gz 2718660 SHA256 77fd2a67a789704b566681dc614fa8f759b2925d3ff49cda9e11376b6dc38ed9 SHA512 1abd94c086404a12b82dcf39238a2aef584ba9d11ca24942faad1dbd8a283f257acbc594325ba3a64ec7323b2d738b2dcb8e2551953d01d017ca91f3a2d05890 WHIRLPOOL e7482c9c29e50af0a23bd943a9b80f2cb8bdcb7169f435994e7680b28e9fd9ec0876713ff1ac5b58a325cd000f7ca63fc1caad4eb52599e63922288dcf9e5505
diff --git a/sci-libs/superlu_mt/files/superlu_mt-2.1-duplicate-symbols.patch b/sci-libs/superlu_mt/files/superlu_mt-2.1-duplicate-symbols.patch
new file mode 100644
index 000000000..6b2c5d57b
--- /dev/null
+++ b/sci-libs/superlu_mt/files/superlu_mt-2.1-duplicate-symbols.patch
@@ -0,0 +1,900 @@
+diff -Nur SRC.orig/cmatgen.c SRC/cmatgen.c
+--- SRC.orig/cmatgen.c 2013-07-15 11:47:52.512735420 -0700
++++ SRC/cmatgen.c 2013-07-15 11:49:05.149137948 -0700
+@@ -93,76 +93,4 @@
+ xa[n] = lasta;
+ }
+
+-double dlaran_(int *iseed)
+-{
+-/* -- LAPACK auxiliary routine (version 2.0) --
+- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+- Courant Institute, Argonne National Lab, and Rice University
+- February 29, 1992
+-
+- Purpose
+- =======
+-
+- DLARAN returns a random real number from a uniform (0,1)
+- distribution.
+-
+- Arguments
+- =========
+-
+- ISEED (input/output) INT array, dimension (4)
+- On entry, the seed of the random number generator; the array
+-
+- elements must be between 0 and 4095, and ISEED(4) must be
+- odd.
+- On exit, the seed is updated.
+-
+- Further Details
+- ===============
+-
+- This routine uses a multiplicative congruential method with modulus
+- 2**48 and multiplier 33952834046453 (see G.S.Fishman,
+- 'Multiplicative congruential random number generators with modulus
+- 2**b: an exhaustive analysis for b = 32 and a partial analysis for
+- b = 48', Math. Comp. 189, pp 331-344, 1990).
+-
+- 48-bit integers are stored in 4 integer array elements with 12 bits
+- per element. Hence the routine is portable across machines with
+- integers of 32 bits or more.
+-
+- =====================================================================
+-*/
+-
+- /* Local variables */
+- int it1, it2, it3, it4;
+-
+- --iseed;
+-
+- /* multiply the seed by the multiplier modulo 2**48 */
+- it4 = iseed[4] * 2549;
+- it3 = it4 / 4096;
+- it4 -= it3 << 12;
+- it3 = it3 + iseed[3] * 2549 + iseed[4] * 2508;
+- it2 = it3 / 4096;
+- it3 -= it2 << 12;
+- it2 = it2 + iseed[2] * 2549 + iseed[3] * 2508 + iseed[4] * 322;
+- it1 = it2 / 4096;
+- it2 -= it1 << 12;
+- it1 = it1 + iseed[1] * 2549 + iseed[2] * 2508 + iseed[3] * 322 + iseed[4]
+- * 494;
+- it1 %= 4096;
+-
+- /* return updated seed */
+-
+- iseed[1] = it1;
+- iseed[2] = it2;
+- iseed[3] = it3;
+- iseed[4] = it4;
+-
+- /* convert 48-bit integer to a real number in the interval (0,1) */
+-
+- return ((double) it1 +
+- ((double) it2 + ((double) it3 + (double) it4 * 2.44140625e-4) *
+- 2.44140625e-4) * 2.44140625e-4) * 2.44140625e-4;
+-
+-} /* dlaran_ */
+
+diff -Nur SRC.orig/cmyblas2.c SRC/cmyblas2.c
+--- SRC.orig/cmyblas2.c 2013-07-15 11:47:52.509735400 -0700
++++ SRC/cmyblas2.c 2013-07-15 11:49:05.149137948 -0700
+@@ -183,3 +183,127 @@
+
+ }
+
++/*
++ * Performs dense matrix-vector multiply with 2 vectors:
++ * y0 = y0 + A * x0
++ * y1 = y1 + A * x1
++ */
++void cmatvec2 (
++ int lda, /* leading dimension of A */
++ int m,
++ int n,
++ complex *A, /* in - size m-by-n */
++ complex *x0, /* in - size n-by-1 */
++ complex *x1, /* in - size n-by-1 */
++ complex *y0, /* out - size n-by-1 */
++ complex *y1 /* out - size n-by-1 */
++ )
++
++{
++ complex v00, v10, v20, v30, v40, v50, v60, v70,
++ v01, v11, v21, v31, v41, v51, v61, v71;
++ complex t0, t1, t2, t3, t4, t5, t6, t7;
++ complex f0, f1;
++ complex *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
++ register int firstcol = 0;
++ complex *M0, temp;
++ int k;
++
++ M0 = &A[0];
++
++ while ( firstcol < n - 7 ) { /* Do 8 columns */
++
++ Mki0 = M0;
++ Mki1 = Mki0 + lda;
++ Mki2 = Mki1 + lda;
++ Mki3 = Mki2 + lda;
++ Mki4 = Mki3 + lda;
++ Mki5 = Mki4 + lda;
++ Mki6 = Mki5 + lda;
++ Mki7 = Mki6 + lda;
++
++ v00 = x0[firstcol]; v01 = x1[firstcol++];
++ v10 = x0[firstcol]; v11 = x1[firstcol++];
++ v20 = x0[firstcol]; v21 = x1[firstcol++];
++ v30 = x0[firstcol]; v31 = x1[firstcol++];
++ v40 = x0[firstcol]; v41 = x1[firstcol++];
++ v50 = x0[firstcol]; v51 = x1[firstcol++];
++ v60 = x0[firstcol]; v61 = x1[firstcol++];
++ v70 = x0[firstcol]; v71 = x1[firstcol++];
++
++ for (k = 0; k < m; k++) {
++ f0 = y0[k];
++ f1 = y1[k];
++ t0 = Mki0[k]; cc_mult(&temp, &v00, &t0);c_add(&f0,&f0,&temp);
++ cc_mult(&temp,&v01,&t0);c_add(&f1,&f1,&temp);
++ t1 = Mki1[k]; cc_mult(&temp,&v10,&t1);c_add(&f0,&f0,&temp);
++ cc_mult(&temp,&v11,&t1);c_add(&f1,&f1,&temp);
++ t2 = Mki2[k]; cc_mult(&temp,&v20,&t2);c_add(&f0,&f0,&temp);
++ cc_mult(&temp,&v21,&t2);c_add(&f1,&f1,&temp);
++ t3 = Mki3[k]; cc_mult(&temp,&v30,&t3);c_add(&f0,&f0,&temp);
++ cc_mult(&temp,&v31,&t3);c_add(&f1,&f1,&temp);
++ t4 = Mki4[k]; cc_mult(&temp,&v40,&t4);c_add(&f0,&f0,&temp);
++ cc_mult(&temp,&v41,&t4);c_add(&f1,&f1,&temp);
++ t5 = Mki5[k]; cc_mult(&temp,&v50,&t5);c_add(&f0,&f0,&temp);
++ cc_mult(&temp,&v51,&t5);c_add(&f1,&f1,&temp);
++ t6 = Mki6[k]; cc_mult(&temp,&v60,&t6);c_add(&f0,&f0,&temp);
++ cc_mult(&temp,&v61,&t6);c_add(&f1,&f1,&temp);
++ t7 = Mki7[k]; cc_mult(&temp,&v70,&t7);c_add(&f0,&f0,&temp);
++ cc_mult(&temp,&v71,&t7);c_add(&f1,&f1,&temp);
++ y0[k] = f0;
++ y1[k] = f1;
++ }
++
++ M0 += 8 * lda;
++ }
++
++ while ( firstcol < n - 3 ) { /* Do 4 columns */
++ Mki0 = M0;
++ Mki1 = Mki0 + lda;
++ Mki2 = Mki1 + lda;
++ Mki3 = Mki2 + lda;
++
++ v00 = x0[firstcol]; v01 = x1[firstcol++];
++ v10 = x0[firstcol]; v11 = x1[firstcol++];
++ v20 = x0[firstcol]; v21 = x1[firstcol++];
++ v30 = x0[firstcol]; v31 = x1[firstcol++];
++
++ for (k = 0; k < m; k++) {
++ f0 = y0[k];
++ f1 = y1[k];
++ t0 = Mki0[k]; cc_mult(&temp,&v00,&t0);c_add(&f0,&f0,&temp);
++ cc_mult(&temp,&v01,&t0);c_add(&f1,&f1,&temp);
++ t1 = Mki1[k]; cc_mult(&temp,&v10,&t1);c_add(&f0,&f0,&temp);
++ cc_mult(&temp,&v11,&t1);c_add(&f1,&f1,&temp);
++ t2 = Mki2[k]; cc_mult(&temp,&v20,&t2);c_add(&f0,&f0,&temp);
++ cc_mult(&temp,&v21,&t2);c_add(&f1,&f1,&temp);
++ t3 = Mki3[k]; cc_mult(&temp,&v30,&t3);c_add(&f0,&f0,&temp);
++ cc_mult(&temp,&v31,&t3);c_add(&f1,&f1,&temp);
++ y0[k] = f0;
++ y1[k] = f1;
++ }
++
++ M0 += 4 * lda;
++
++ }
++
++ while ( firstcol < n ) { /* Do 1 column */
++ Mki0 = M0;
++ v00 = x0[firstcol]; v01 = x1[firstcol++];
++
++ for (k = 0; k < m; k++) {
++ f0 = y0[k];
++ f1 = y1[k];
++ t0 = Mki0[k];
++ cc_mult(&temp,&v00,&t0);c_add(&f0,&f0,&temp);
++ cc_mult(&temp,&v01,&t0);c_add(&f1,&f1,&temp);
++ y0[k] = f0;
++ y1[k] = f1;
++ }
++
++ M0 += lda;
++ }
++
++}
++
++
+diff -Nur SRC.orig/cmyblas2.c.orig SRC/cmyblas2.c.orig
+--- SRC.orig/cmyblas2.c.orig 1969-12-31 16:00:00.000000000 -0800
++++ SRC/cmyblas2.c.orig 2013-07-15 11:49:05.149137948 -0700
+@@ -0,0 +1,185 @@
++
++/*
++ * -- SuperLU routine (version 2.0) --
++ * Lawrence Berkeley National Lab, Univ. of California Berkeley,
++ * and Xerox Palo Alto Research Center.
++ * September 10, 2007
++ *
++ */
++/*
++ * File name: cmyblas2.c
++ * Purpose:
++ * Level 2 BLAS operations: solves and matvec, written in C.
++ * Note:
++ * This is only used when the system lacks an efficient BLAS library.
++ */
++#include "slu_scomplex.h"
++
++
++/*
++ * Solves a dense UNIT lower triangular system. The unit lower
++ * triangular matrix is stored in a 2D array M(1:nrow,1:ncol).
++ * The solution will be returned in the rhs vector.
++ */
++void clsolve ( int ldm, int ncol, complex *M, complex *rhs )
++{
++ int k;
++ complex x0, x1, x2, x3, temp;
++ complex *M0;
++ complex *Mki0, *Mki1, *Mki2, *Mki3;
++ register int firstcol = 0;
++
++ M0 = &M[0];
++
++
++ while ( firstcol < ncol - 3 ) { /* Do 4 columns */
++ Mki0 = M0 + 1;
++ Mki1 = Mki0 + ldm + 1;
++ Mki2 = Mki1 + ldm + 1;
++ Mki3 = Mki2 + ldm + 1;
++
++ x0 = rhs[firstcol];
++ cc_mult(&temp, &x0, Mki0); Mki0++;
++ c_sub(&x1, &rhs[firstcol+1], &temp);
++ cc_mult(&temp, &x0, Mki0); Mki0++;
++ c_sub(&x2, &rhs[firstcol+2], &temp);
++ cc_mult(&temp, &x1, Mki1); Mki1++;
++ c_sub(&x2, &x2, &temp);
++ cc_mult(&temp, &x0, Mki0); Mki0++;
++ c_sub(&x3, &rhs[firstcol+3], &temp);
++ cc_mult(&temp, &x1, Mki1); Mki1++;
++ c_sub(&x3, &x3, &temp);
++ cc_mult(&temp, &x2, Mki2); Mki2++;
++ c_sub(&x3, &x3, &temp);
++
++ rhs[++firstcol] = x1;
++ rhs[++firstcol] = x2;
++ rhs[++firstcol] = x3;
++ ++firstcol;
++
++ for (k = firstcol; k < ncol; k++) {
++ cc_mult(&temp, &x0, Mki0); Mki0++;
++ c_sub(&rhs[k], &rhs[k], &temp);
++ cc_mult(&temp, &x1, Mki1); Mki1++;
++ c_sub(&rhs[k], &rhs[k], &temp);
++ cc_mult(&temp, &x2, Mki2); Mki2++;
++ c_sub(&rhs[k], &rhs[k], &temp);
++ cc_mult(&temp, &x3, Mki3); Mki3++;
++ c_sub(&rhs[k], &rhs[k], &temp);
++ }
++
++ M0 += 4 * ldm + 4;
++ }
++
++ if ( firstcol < ncol - 1 ) { /* Do 2 columns */
++ Mki0 = M0 + 1;
++ Mki1 = Mki0 + ldm + 1;
++
++ x0 = rhs[firstcol];
++ cc_mult(&temp, &x0, Mki0); Mki0++;
++ c_sub(&x1, &rhs[firstcol+1], &temp);
++
++ rhs[++firstcol] = x1;
++ ++firstcol;
++
++ for (k = firstcol; k < ncol; k++) {
++ cc_mult(&temp, &x0, Mki0); Mki0++;
++ c_sub(&rhs[k], &rhs[k], &temp);
++ cc_mult(&temp, &x1, Mki1); Mki1++;
++ c_sub(&rhs[k], &rhs[k], &temp);
++ }
++ }
++
++}
++
++/*
++ * Solves a dense upper triangular system. The upper triangular matrix is
++ * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
++ * in the rhs vector.
++ */
++void
++cusolve (
++int ldm, /* in */
++int ncol, /* in */
++complex *M, /* in */
++complex *rhs /* modified */
++)
++{
++ complex xj, temp;
++ int jcol, j, irow;
++
++ jcol = ncol - 1;
++
++ for (j = 0; j < ncol; j++) {
++
++ c_div(&xj, &rhs[jcol], &M[jcol + jcol*ldm]); /* M(jcol, jcol) */
++ rhs[jcol] = xj;
++
++ for (irow = 0; irow < jcol; irow++) {
++ cc_mult(&temp, &xj, &M[irow+jcol*ldm]); /* M(irow, jcol) */
++ c_sub(&rhs[irow], &rhs[irow], &temp);
++ }
++
++ jcol--;
++
++ }
++}
++
++
++/*
++ * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
++ * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
++ */
++void cmatvec (
++int ldm, /* in -- leading dimension of M */
++int nrow, /* in */
++int ncol, /* in */
++complex *M, /* in */
++complex *vec, /* in */
++complex *Mxvec /* in/out */
++)
++{
++ complex vi0, vi1, vi2, vi3;
++ complex *M0, temp;
++ complex *Mki0, *Mki1, *Mki2, *Mki3;
++ register int firstcol = 0;
++ int k;
++
++ M0 = &M[0];
++
++ while ( firstcol < ncol - 3 ) { /* Do 4 columns */
++ Mki0 = M0;
++ Mki1 = Mki0 + ldm;
++ Mki2 = Mki1 + ldm;
++ Mki3 = Mki2 + ldm;
++
++ vi0 = vec[firstcol++];
++ vi1 = vec[firstcol++];
++ vi2 = vec[firstcol++];
++ vi3 = vec[firstcol++];
++ for (k = 0; k < nrow; k++) {
++ cc_mult(&temp, &vi0, Mki0); Mki0++;
++ c_add(&Mxvec[k], &Mxvec[k], &temp);
++ cc_mult(&temp, &vi1, Mki1); Mki1++;
++ c_add(&Mxvec[k], &Mxvec[k], &temp);
++ cc_mult(&temp, &vi2, Mki2); Mki2++;
++ c_add(&Mxvec[k], &Mxvec[k], &temp);
++ cc_mult(&temp, &vi3, Mki3); Mki3++;
++ c_add(&Mxvec[k], &Mxvec[k], &temp);
++ }
++
++ M0 += 4 * ldm;
++ }
++
++ while ( firstcol < ncol ) { /* Do 1 column */
++ Mki0 = M0;
++ vi0 = vec[firstcol++];
++ for (k = 0; k < nrow; k++) {
++ cc_mult(&temp, &vi0, Mki0); Mki0++;
++ c_add(&Mxvec[k], &Mxvec[k], &temp);
++ }
++ M0 += ldm;
++ }
++
++}
++
+diff -Nur SRC.orig/Makefile SRC/Makefile
+--- SRC.orig/Makefile 2013-07-15 11:47:52.511735412 -0700
++++ SRC/Makefile 2013-07-15 11:53:15.393528085 -0700
+@@ -31,7 +31,7 @@
+ #
+ #######################################################################
+
+-ALLAUX = superlu_timer.o dclock.o sp_ienv.o lsame.o xerbla.o \
++ALLAUX = superlu_timer.o sp_ienv.o lsame.o xerbla.o \
+ util.o pmemory.o qrnzcnt.o await.o \
+ get_perm_c.o mmd.o colamd.o sp_coletree.o \
+ pxgstrf_scheduler.o sp_colorder.o \
+diff -Nur SRC.orig/smatgen.c SRC/smatgen.c
+--- SRC.orig/smatgen.c 2013-07-15 11:47:52.512735420 -0700
++++ SRC/smatgen.c 2013-07-15 11:49:05.149137948 -0700
+@@ -93,76 +93,3 @@
+ xa[n] = lasta;
+ }
+
+-double dlaran_(int *iseed)
+-{
+-/* -- LAPACK auxiliary routine (version 2.0) --
+- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+- Courant Institute, Argonne National Lab, and Rice University
+- February 29, 1992
+-
+- Purpose
+- =======
+-
+- DLARAN returns a random real number from a uniform (0,1)
+- distribution.
+-
+- Arguments
+- =========
+-
+- ISEED (input/output) INT array, dimension (4)
+- On entry, the seed of the random number generator; the array
+-
+- elements must be between 0 and 4095, and ISEED(4) must be
+- odd.
+- On exit, the seed is updated.
+-
+- Further Details
+- ===============
+-
+- This routine uses a multiplicative congruential method with modulus
+- 2**48 and multiplier 33952834046453 (see G.S.Fishman,
+- 'Multiplicative congruential random number generators with modulus
+- 2**b: an exhaustive analysis for b = 32 and a partial analysis for
+- b = 48', Math. Comp. 189, pp 331-344, 1990).
+-
+- 48-bit integers are stored in 4 integer array elements with 12 bits
+- per element. Hence the routine is portable across machines with
+- integers of 32 bits or more.
+-
+- =====================================================================
+-*/
+-
+- /* Local variables */
+- int it1, it2, it3, it4;
+-
+- --iseed;
+-
+- /* multiply the seed by the multiplier modulo 2**48 */
+- it4 = iseed[4] * 2549;
+- it3 = it4 / 4096;
+- it4 -= it3 << 12;
+- it3 = it3 + iseed[3] * 2549 + iseed[4] * 2508;
+- it2 = it3 / 4096;
+- it3 -= it2 << 12;
+- it2 = it2 + iseed[2] * 2549 + iseed[3] * 2508 + iseed[4] * 322;
+- it1 = it2 / 4096;
+- it2 -= it1 << 12;
+- it1 = it1 + iseed[1] * 2549 + iseed[2] * 2508 + iseed[3] * 322 + iseed[4]
+- * 494;
+- it1 %= 4096;
+-
+- /* return updated seed */
+-
+- iseed[1] = it1;
+- iseed[2] = it2;
+- iseed[3] = it3;
+- iseed[4] = it4;
+-
+- /* convert 48-bit integer to a real number in the interval (0,1) */
+-
+- return ((double) it1 +
+- ((double) it2 + ((double) it3 + (double) it4 * 2.44140625e-4) *
+- 2.44140625e-4) * 2.44140625e-4) * 2.44140625e-4;
+-
+-} /* dlaran_ */
+-
+diff -Nur SRC.orig/xerbla.c SRC/xerbla.c
+--- SRC.orig/xerbla.c 2013-07-15 11:47:52.513735427 -0700
++++ SRC/xerbla.c 2013-07-15 11:49:05.150137959 -0700
+@@ -1,3 +1,4 @@
++#include <stdio.h>
+ /* Subroutine */ int xerbla_(char *srname, int *info)
+ {
+ /* -- LAPACK auxiliary routine (version 2.0) --
+diff -Nur SRC.orig/zmatgen.c SRC/zmatgen.c
+--- SRC.orig/zmatgen.c 2013-07-15 11:47:52.513735427 -0700
++++ SRC/zmatgen.c 2013-07-15 11:49:05.150137959 -0700
+@@ -93,76 +93,3 @@
+ xa[n] = lasta;
+ }
+
+-double dlaran_(int *iseed)
+-{
+-/* -- LAPACK auxiliary routine (version 2.0) --
+- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+- Courant Institute, Argonne National Lab, and Rice University
+- February 29, 1992
+-
+- Purpose
+- =======
+-
+- DLARAN returns a random real number from a uniform (0,1)
+- distribution.
+-
+- Arguments
+- =========
+-
+- ISEED (input/output) INT array, dimension (4)
+- On entry, the seed of the random number generator; the array
+-
+- elements must be between 0 and 4095, and ISEED(4) must be
+- odd.
+- On exit, the seed is updated.
+-
+- Further Details
+- ===============
+-
+- This routine uses a multiplicative congruential method with modulus
+- 2**48 and multiplier 33952834046453 (see G.S.Fishman,
+- 'Multiplicative congruential random number generators with modulus
+- 2**b: an exhaustive analysis for b = 32 and a partial analysis for
+- b = 48', Math. Comp. 189, pp 331-344, 1990).
+-
+- 48-bit integers are stored in 4 integer array elements with 12 bits
+- per element. Hence the routine is portable across machines with
+- integers of 32 bits or more.
+-
+- =====================================================================
+-*/
+-
+- /* Local variables */
+- int it1, it2, it3, it4;
+-
+- --iseed;
+-
+- /* multiply the seed by the multiplier modulo 2**48 */
+- it4 = iseed[4] * 2549;
+- it3 = it4 / 4096;
+- it4 -= it3 << 12;
+- it3 = it3 + iseed[3] * 2549 + iseed[4] * 2508;
+- it2 = it3 / 4096;
+- it3 -= it2 << 12;
+- it2 = it2 + iseed[2] * 2549 + iseed[3] * 2508 + iseed[4] * 322;
+- it1 = it2 / 4096;
+- it2 -= it1 << 12;
+- it1 = it1 + iseed[1] * 2549 + iseed[2] * 2508 + iseed[3] * 322 + iseed[4]
+- * 494;
+- it1 %= 4096;
+-
+- /* return updated seed */
+-
+- iseed[1] = it1;
+- iseed[2] = it2;
+- iseed[3] = it3;
+- iseed[4] = it4;
+-
+- /* convert 48-bit integer to a real number in the interval (0,1) */
+-
+- return ((double) it1 +
+- ((double) it2 + ((double) it3 + (double) it4 * 2.44140625e-4) *
+- 2.44140625e-4) * 2.44140625e-4) * 2.44140625e-4;
+-
+-} /* dlaran_ */
+-
+diff -Nur SRC.orig/zmyblas2.c SRC/zmyblas2.c
+--- SRC.orig/zmyblas2.c 2013-07-15 11:47:52.511735412 -0700
++++ SRC/zmyblas2.c 2013-07-15 11:49:05.150137959 -0700
+@@ -183,3 +183,127 @@
+
+ }
+
++/*
++ * Performs dense matrix-vector multiply with 2 vectors:
++ * y0 = y0 + A * x0
++ * y1 = y1 + A * x1
++ */
++void zmatvec2 (
++ int lda, /* leading dimension of A */
++ int m,
++ int n,
++ doublecomplex *A, /* in - size m-by-n */
++ doublecomplex *x0, /* in - size n-by-1 */
++ doublecomplex *x1, /* in - size n-by-1 */
++ doublecomplex *y0, /* out - size n-by-1 */
++ doublecomplex *y1 /* out - size n-by-1 */
++ )
++
++{
++ doublecomplex v00, v10, v20, v30, v40, v50, v60, v70,
++ v01, v11, v21, v31, v41, v51, v61, v71;
++ doublecomplex t0, t1, t2, t3, t4, t5, t6, t7;
++ doublecomplex f0, f1;
++ doublecomplex *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
++ register int firstcol = 0;
++ doublecomplex *M0, temp;
++ int k;
++
++ M0 = &A[0];
++
++ while ( firstcol < n - 7 ) { /* Do 8 columns */
++
++ Mki0 = M0;
++ Mki1 = Mki0 + lda;
++ Mki2 = Mki1 + lda;
++ Mki3 = Mki2 + lda;
++ Mki4 = Mki3 + lda;
++ Mki5 = Mki4 + lda;
++ Mki6 = Mki5 + lda;
++ Mki7 = Mki6 + lda;
++
++ v00 = x0[firstcol]; v01 = x1[firstcol++];
++ v10 = x0[firstcol]; v11 = x1[firstcol++];
++ v20 = x0[firstcol]; v21 = x1[firstcol++];
++ v30 = x0[firstcol]; v31 = x1[firstcol++];
++ v40 = x0[firstcol]; v41 = x1[firstcol++];
++ v50 = x0[firstcol]; v51 = x1[firstcol++];
++ v60 = x0[firstcol]; v61 = x1[firstcol++];
++ v70 = x0[firstcol]; v71 = x1[firstcol++];
++
++ for (k = 0; k < m; k++) {
++ f0 = y0[k];
++ f1 = y1[k];
++ t0 = Mki0[k]; zz_mult(&temp,&v00,&t0);z_add(&f0,&f0,&temp);
++ zz_mult(&temp,&v01,&t0);z_add(&f1,&f1,&temp);
++ t1 = Mki1[k]; zz_mult(&temp,&v10,&t1);z_add(&f0,&f0,&temp);
++ zz_mult(&temp,&v11,&t1);z_add(&f1,&f1,&temp);
++ t2 = Mki2[k]; zz_mult(&temp,&v20,&t2);z_add(&f0,&f0,&temp);
++ zz_mult(&temp,&v21,&t2);z_add(&f1,&f1,&temp);
++ t3 = Mki3[k]; zz_mult(&temp,&v30,&t3);z_add(&f0,&f0,&temp);
++ zz_mult(&temp,&v31,&t3);z_add(&f1,&f1,&temp);
++ t4 = Mki4[k]; zz_mult(&temp,&v40,&t4);z_add(&f0,&f0,&temp);
++ zz_mult(&temp,&v41,&t4);z_add(&f1,&f1,&temp);
++ t5 = Mki5[k]; zz_mult(&temp,&v50,&t5);z_add(&f0,&f0,&temp);
++ zz_mult(&temp,&v51,&t5);z_add(&f1,&f1,&temp);
++ t6 = Mki6[k]; zz_mult(&temp,&v60,&t6);z_add(&f0,&f0,&temp);
++ zz_mult(&temp,&v61,&t6);z_add(&f1,&f1,&temp);
++ t7 = Mki7[k]; zz_mult(&temp,&v70,&t7);z_add(&f0,&f0,&temp);
++ zz_mult(&temp,&v71,&t7);z_add(&f1,&f1,&temp);
++ y0[k] = f0;
++ y1[k] = f1;
++ }
++
++ M0 += 8 * lda;
++ }
++
++ while ( firstcol < n - 3 ) { /* Do 4 columns */
++ Mki0 = M0;
++ Mki1 = Mki0 + lda;
++ Mki2 = Mki1 + lda;
++ Mki3 = Mki2 + lda;
++
++ v00 = x0[firstcol]; v01 = x1[firstcol++];
++ v10 = x0[firstcol]; v11 = x1[firstcol++];
++ v20 = x0[firstcol]; v21 = x1[firstcol++];
++ v30 = x0[firstcol]; v31 = x1[firstcol++];
++
++ for (k = 0; k < m; k++) {
++ f0 = y0[k];
++ f1 = y1[k];
++ t0 = Mki0[k]; zz_mult(&temp,&v00,&t0);z_add(&f0,&f0,&temp);
++ zz_mult(&temp,&v01,&t0);z_add(&f1,&f1,&temp);
++ t1 = Mki1[k]; zz_mult(&temp,&v10,&t1);z_add(&f0,&f0,&temp);
++ zz_mult(&temp,&v11,&t1);z_add(&f1,&f1,&temp);
++ t2 = Mki2[k]; zz_mult(&temp,&v20,&t2);z_add(&f0,&f0,&temp);
++ zz_mult(&temp,&v21,&t2);z_add(&f1,&f1,&temp);
++ t3 = Mki3[k]; zz_mult(&temp,&v30,&t3);z_add(&f0,&f0,&temp);
++ zz_mult(&temp,&v31,&t3);z_add(&f1,&f1,&temp);
++ y0[k] = f0;
++ y1[k] = f1;
++ }
++
++ M0 += 4 * lda;
++
++ }
++
++ while ( firstcol < n ) { /* Do 1 column */
++ Mki0 = M0;
++ v00 = x0[firstcol]; v01 = x1[firstcol++];
++
++ for (k = 0; k < m; k++) {
++ f0 = y0[k];
++ f1 = y1[k];
++ t0 = Mki0[k];
++ zz_mult(&temp,&v00,&t0);z_add(&f0,&f0,&temp);
++ zz_mult(&temp,&v01,&t0);z_add(&f1,&f1,&temp);
++ y0[k] = f0;
++ y1[k] = f1;
++ }
++
++ M0 += lda;
++ }
++
++}
++
++
+diff -Nur SRC.orig/zmyblas2.c.orig SRC/zmyblas2.c.orig
+--- SRC.orig/zmyblas2.c.orig 1969-12-31 16:00:00.000000000 -0800
++++ SRC/zmyblas2.c.orig 2013-07-15 11:49:05.150137959 -0700
+@@ -0,0 +1,185 @@
++
++/*
++ * -- SuperLU routine (version 2.0) --
++ * Lawrence Berkeley National Lab, Univ. of California Berkeley,
++ * and Xerox Palo Alto Research Center.
++ * September 10, 2007
++ *
++ */
++/*
++ * File name: zmyblas2.c
++ * Purpose:
++ * Level 2 BLAS operations: solves and matvec, written in C.
++ * Note:
++ * This is only used when the system lacks an efficient BLAS library.
++ */
++#include "slu_dcomplex.h"
++
++
++/*
++ * Solves a dense UNIT lower triangular system. The unit lower
++ * triangular matrix is stored in a 2D array M(1:nrow,1:ncol).
++ * The solution will be returned in the rhs vector.
++ */
++void zlsolve ( int ldm, int ncol, doublecomplex *M, doublecomplex *rhs )
++{
++ int k;
++ doublecomplex x0, x1, x2, x3, temp;
++ doublecomplex *M0;
++ doublecomplex *Mki0, *Mki1, *Mki2, *Mki3;
++ register int firstcol = 0;
++
++ M0 = &M[0];
++
++
++ while ( firstcol < ncol - 3 ) { /* Do 4 columns */
++ Mki0 = M0 + 1;
++ Mki1 = Mki0 + ldm + 1;
++ Mki2 = Mki1 + ldm + 1;
++ Mki3 = Mki2 + ldm + 1;
++
++ x0 = rhs[firstcol];
++ zz_mult(&temp, &x0, Mki0); Mki0++;
++ z_sub(&x1, &rhs[firstcol+1], &temp);
++ zz_mult(&temp, &x0, Mki0); Mki0++;
++ z_sub(&x2, &rhs[firstcol+2], &temp);
++ zz_mult(&temp, &x1, Mki1); Mki1++;
++ z_sub(&x2, &x2, &temp);
++ zz_mult(&temp, &x0, Mki0); Mki0++;
++ z_sub(&x3, &rhs[firstcol+3], &temp);
++ zz_mult(&temp, &x1, Mki1); Mki1++;
++ z_sub(&x3, &x3, &temp);
++ zz_mult(&temp, &x2, Mki2); Mki2++;
++ z_sub(&x3, &x3, &temp);
++
++ rhs[++firstcol] = x1;
++ rhs[++firstcol] = x2;
++ rhs[++firstcol] = x3;
++ ++firstcol;
++
++ for (k = firstcol; k < ncol; k++) {
++ zz_mult(&temp, &x0, Mki0); Mki0++;
++ z_sub(&rhs[k], &rhs[k], &temp);
++ zz_mult(&temp, &x1, Mki1); Mki1++;
++ z_sub(&rhs[k], &rhs[k], &temp);
++ zz_mult(&temp, &x2, Mki2); Mki2++;
++ z_sub(&rhs[k], &rhs[k], &temp);
++ zz_mult(&temp, &x3, Mki3); Mki3++;
++ z_sub(&rhs[k], &rhs[k], &temp);
++ }
++
++ M0 += 4 * ldm + 4;
++ }
++
++ if ( firstcol < ncol - 1 ) { /* Do 2 columns */
++ Mki0 = M0 + 1;
++ Mki1 = Mki0 + ldm + 1;
++
++ x0 = rhs[firstcol];
++ zz_mult(&temp, &x0, Mki0); Mki0++;
++ z_sub(&x1, &rhs[firstcol+1], &temp);
++
++ rhs[++firstcol] = x1;
++ ++firstcol;
++
++ for (k = firstcol; k < ncol; k++) {
++ zz_mult(&temp, &x0, Mki0); Mki0++;
++ z_sub(&rhs[k], &rhs[k], &temp);
++ zz_mult(&temp, &x1, Mki1); Mki1++;
++ z_sub(&rhs[k], &rhs[k], &temp);
++ }
++ }
++
++}
++
++/*
++ * Solves a dense upper triangular system. The upper triangular matrix is
++ * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
++ * in the rhs vector.
++ */
++void
++zusolve (
++int ldm, /* in */
++int ncol, /* in */
++doublecomplex *M, /* in */
++doublecomplex *rhs /* modified */
++)
++{
++ doublecomplex xj, temp;
++ int jcol, j, irow;
++
++ jcol = ncol - 1;
++
++ for (j = 0; j < ncol; j++) {
++
++ z_div(&xj, &rhs[jcol], &M[jcol + jcol*ldm]); /* M(jcol, jcol) */
++ rhs[jcol] = xj;
++
++ for (irow = 0; irow < jcol; irow++) {
++ zz_mult(&temp, &xj, &M[irow+jcol*ldm]); /* M(irow, jcol) */
++ z_sub(&rhs[irow], &rhs[irow], &temp);
++ }
++
++ jcol--;
++
++ }
++}
++
++
++/*
++ * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
++ * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
++ */
++void zmatvec (
++int ldm, /* in -- leading dimension of M */
++int nrow, /* in */
++int ncol, /* in */
++doublecomplex *M, /* in */
++doublecomplex *vec, /* in */
++doublecomplex *Mxvec /* in/out */
++)
++{
++ doublecomplex vi0, vi1, vi2, vi3;
++ doublecomplex *M0, temp;
++ doublecomplex *Mki0, *Mki1, *Mki2, *Mki3;
++ register int firstcol = 0;
++ int k;
++
++ M0 = &M[0];
++
++ while ( firstcol < ncol - 3 ) { /* Do 4 columns */
++ Mki0 = M0;
++ Mki1 = Mki0 + ldm;
++ Mki2 = Mki1 + ldm;
++ Mki3 = Mki2 + ldm;
++
++ vi0 = vec[firstcol++];
++ vi1 = vec[firstcol++];
++ vi2 = vec[firstcol++];
++ vi3 = vec[firstcol++];
++ for (k = 0; k < nrow; k++) {
++ zz_mult(&temp, &vi0, Mki0); Mki0++;
++ z_add(&Mxvec[k], &Mxvec[k], &temp);
++ zz_mult(&temp, &vi1, Mki1); Mki1++;
++ z_add(&Mxvec[k], &Mxvec[k], &temp);
++ zz_mult(&temp, &vi2, Mki2); Mki2++;
++ z_add(&Mxvec[k], &Mxvec[k], &temp);
++ zz_mult(&temp, &vi3, Mki3); Mki3++;
++ z_add(&Mxvec[k], &Mxvec[k], &temp);
++ }
++
++ M0 += 4 * ldm;
++ }
++
++ while ( firstcol < ncol ) { /* Do 1 column */
++ Mki0 = M0;
++ vi0 = vec[firstcol++];
++ for (k = 0; k < nrow; k++) {
++ zz_mult(&temp, &vi0, Mki0); Mki0++;
++ z_add(&Mxvec[k], &Mxvec[k], &temp);
++ }
++ M0 += ldm;
++ }
++
++}
++
diff --git a/sci-libs/superlu_mt/files/superlu_mt-2.1-missing-includes.patch b/sci-libs/superlu_mt/files/superlu_mt-2.1-missing-includes.patch
new file mode 100644
index 000000000..afeca8fa7
--- /dev/null
+++ b/sci-libs/superlu_mt/files/superlu_mt-2.1-missing-includes.patch
@@ -0,0 +1,44 @@
+diff -Nur TESTING.orig/MATGEN/clatb4.c TESTING/MATGEN/clatb4.c
+--- TESTING.orig/MATGEN/clatb4.c 2013-07-15 11:48:34.285967038 -0700
++++ TESTING/MATGEN/clatb4.c 2013-07-15 11:49:05.150137959 -0700
+@@ -3,6 +3,7 @@
+ -lf2c -lm (in that order)
+ */
+
++#include <string.h>
+ #include "f2c.h"
+
+ /* Table of constant values */
+diff -Nur TESTING.orig/MATGEN/dlatb4.c TESTING/MATGEN/dlatb4.c
+--- TESTING.orig/MATGEN/dlatb4.c 2013-07-15 11:48:34.288966998 -0700
++++ TESTING/MATGEN/dlatb4.c 2013-07-15 11:49:05.150137959 -0700
+@@ -3,6 +3,7 @@
+ -lf2c -lm (in that order)
+ */
+
++#include <string.h>
+ #include "f2c.h"
+
+ /* Table of constant values */
+diff -Nur TESTING.orig/MATGEN/slatb4.c TESTING/MATGEN/slatb4.c
+--- TESTING.orig/MATGEN/slatb4.c 2013-07-15 11:48:34.288966998 -0700
++++ TESTING/MATGEN/slatb4.c 2013-07-15 11:49:05.150137959 -0700
+@@ -3,6 +3,7 @@
+ -lf2c -lm (in that order)
+ */
+
++#include <string.h>
+ #include "f2c.h"
+
+ /* Table of constant values */
+diff -Nur TESTING.orig/MATGEN/zlatb4.c TESTING/MATGEN/zlatb4.c
+--- TESTING.orig/MATGEN/zlatb4.c 2013-07-15 11:48:34.288966998 -0700
++++ TESTING/MATGEN/zlatb4.c 2013-07-15 11:49:05.150137959 -0700
+@@ -3,6 +3,7 @@
+ -lf2c -lm (in that order)
+ */
+
++#include <string.h>
+ #include "f2c.h"
+
+ /* Table of constant values */
diff --git a/sci-libs/superlu_mt/metadata.xml b/sci-libs/superlu_mt/metadata.xml
new file mode 100644
index 000000000..91de91fe6
--- /dev/null
+++ b/sci-libs/superlu_mt/metadata.xml
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE pkgmetadata SYSTEM "http://www.gentoo.org/dtd/metadata.dtd">
+<pkgmetadata>
+<herd>sci</herd>
+<longdescription lang="en">
+ SuperLU is a general purpose library for the direct solution of
+ large, sparse, nonsymmetric systems of linear equations on high
+ performance machines. The library is written in C and is callable
+ from either C or Fortran. The library routines will perform an LU
+ decomposition with partial pivoting and triangular system solves
+ through forward and back substitution. The LU factorization routines
+ can handle non-square matrices but the triangular solves are
+ performed only for square matrices. The matrix columns may be
+ preordered (before factorization) either through library or user
+ supplied routines. This preordering for sparsity is completely
+ separate from the factorization. Working precision iterative
+ refinement subroutines are provided for improved backward
+ stability. Routines are also provided to equilibrate the system,
+ estimate the condition number, calculate the relative backward
+ error, and estimate error bounds for the refined solutions.
+ This is the multi-threaded version (POSIX threads or OpenMP).
+</longdescription>
+</pkgmetadata>
diff --git a/sci-libs/superlu_mt/superlu_mt-2.1.ebuild b/sci-libs/superlu_mt/superlu_mt-2.1.ebuild
new file mode 100644
index 000000000..bfd2866d4
--- /dev/null
+++ b/sci-libs/superlu_mt/superlu_mt-2.1.ebuild
@@ -0,0 +1,105 @@
+# Copyright 1999-2013 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+# $Header: /var/cvsroot/gentoo-x86/sci-libs/superlu/superlu-4.3.ebuild,v 1.7 2012/12/12 20:31:31 jlec Exp $
+
+EAPI=5
+
+inherit eutils fortran-2 toolchain-funcs
+
+MYPN=SuperLU_MT
+
+DESCRIPTION="Sparse LU factorization library multithreading library"
+HOMEPAGE="http://crd.lbl.gov/~xiaoye/SuperLU/"
+SRC_URI="${HOMEPAGE}/${PN}_${PV}.tar.gz"
+
+LICENSE="BSD"
+SLOT="0"
+KEYWORDS="~amd64 ~ppc ~ppc64 ~x86 ~amd64-linux ~x86-linux"
+IUSE="doc openmp threads examples static-libs test"
+
+RDEPEND="
+ virtual/cblas"
+DEPEND="${RDEPEND}
+ virtual/pkgconfig
+ test? ( app-shells/tcsh )"
+
+S="${WORKDIR}/${MYPN}_${PV}"
+
+pkg_setup() {
+ if use threads; then
+ export CTHREADS="-D__PTHREAD" LDTHREADS="-pthread"
+ elif use openmp; then
+ if [[ $(tc-getCC) == *gcc ]] && ! tc-has-openmp; then
+ ewarn "OpenMP is not available in your current selected gcc"
+ die "need openmp capable gcc"
+ fi
+ FORTRAN_NEED_OPENMP=1
+ export CTHREADS="-D__OPENMP"
+ [[ $(tc-getCC) == *gcc ]] && LDTHREADS="-fopenmp"
+ else
+ ewarn "Neither threads or openmp selected. Forcing threads"
+ export CTHREADS="-D__PTHREAD" LDTHREADS="-pthread"
+ fi
+ fortran-2_pkg_setup
+}
+
+src_prepare() {
+ epatch \
+ "${FILESDIR}"/${P}-duplicate-symbols.patch \
+ "${FILESDIR}"/${P}-missing-includes.patch
+}
+
+src_configure() {
+ sed -i \
+ -e 's/^\(PLAT\s*=\).*/\1/' \
+ -e "s:^\(CC\s*=\).*:\1 $(tc-getCC):" \
+ -e "/CFLAGS/s:-O3:${CFLAGS} \$(PIC):" \
+ -e "s:^\(PREDEFS\s*=\).*:\1 ${CPPFLAGS} -DUSE_VENDOR_BLAS \$(CTHREADS)$:" \
+ -e "s:^\(NOOPTS\s*=.*\):\1 \$(PIC):" \
+ -e "s:^\(FORTRAN\s*=\).*:\1 $(tc-getFC):" \
+ -e "s:^\(FFLAGS\s*=\).*:\1 ${FFLAGS} \$(PIC):" \
+ -e "s:^\(ARCH\s*=\).*:\1 $(tc-getAR):" \
+ -e "s:^\(RANLIB\s*=\).*:\1 $(tc-getRANLIB):" \
+ -e "s:^\(LOADER\s*=\).*:\1 $(tc-getCC):" \
+ -e "s:^\(LOADOPTS\s*=\).*:\1 ${LDFLAGS} \$(LDTHREADS):" \
+ -e "/MPLIB/d" \
+ -e "s:^\(BLASLIB\s*=\).*:\1 $($(tc-getPKG_CONFIG) --libs blas):" \
+ make.inc || die
+ SONAME=libsuperlu_mt.so.0
+ sed -i \
+ -e 's|../make.inc|make.inc|' \
+ -e "s|../SRC|${EPREFIX}/usr/include/${PN}|" \
+ -e '/:.*$(SUPERLULIB)/s|../lib/$(SUPERLULIB)||g' \
+ -e 's|../lib/$(SUPERLULIB)|-lsuperlu_mt|g' \
+ EXAMPLE/Makefile || die
+}
+
+src_compile() {
+ emake superlulib \
+ PIC="-fPIC" ARCH="echo" ARCHFLAGS="" RANLIB="echo"
+ $(tc-getCC) ${LDFLAGS} ${LDTHREADS} -shared -Wl,-soname=${SONAME} SRC/*.o \
+ $($(tc-getPKG_CONFIG) --libs blas) -lm -o lib/${SONAME} || die
+ ln -s ${SONAME} lib/libsuperlu_mt.so || die
+
+ use static-libs && rm -f SRC/*.o && emake superlulib \
+ PIC="" ARCH="$(tc-getAR)" ARCHFLAGS="cr" RANLIB="$(tc-getRANLIB)"
+}
+
+src_test() {
+ emake -j1 tmglib
+ LD_LIBRARY_PATH="${S}/lib:${LD_LIBRARY_PATH}" \
+ emake SUPERLULIB="${SONAME}" testing
+}
+
+src_install() {
+ dolib.so lib/*so*
+ use static-libs && dolib.a lib/*.a
+ insinto /usr/include/${PN}
+ doins SRC/*h
+ dodoc README
+ use doc && dodoc DOC/ug.pdf
+ if use examples; then
+ insinto /usr/share/doc/${PF}/examples
+ doins -r EXAMPLE/* make.inc
+ fi
+}