diff options
Diffstat (limited to 'sci-libs/superlu_mt')
-rw-r--r-- | sci-libs/superlu_mt/ChangeLog | 11 | ||||
-rw-r--r-- | sci-libs/superlu_mt/Manifest | 1 | ||||
-rw-r--r-- | sci-libs/superlu_mt/files/superlu_mt-2.1-duplicate-symbols.patch | 900 | ||||
-rw-r--r-- | sci-libs/superlu_mt/files/superlu_mt-2.1-missing-includes.patch | 44 | ||||
-rw-r--r-- | sci-libs/superlu_mt/metadata.xml | 23 | ||||
-rw-r--r-- | sci-libs/superlu_mt/superlu_mt-2.1.ebuild | 105 |
6 files changed, 1084 insertions, 0 deletions
diff --git a/sci-libs/superlu_mt/ChangeLog b/sci-libs/superlu_mt/ChangeLog new file mode 100644 index 000000000..f6d1810dd --- /dev/null +++ b/sci-libs/superlu_mt/ChangeLog @@ -0,0 +1,11 @@ +# ChangeLog for sci-libs/superlu_mt +# Copyright 1999-2013 Gentoo Foundation; Distributed under the GPL v2 +# $Header: $ + +*superlu_mt-2.1 (15 Jul 2013) + + 15 Jul 2013; Sébastien Fabbro <bicatali@gentoo.org> + +files/superlu_mt-2.1-duplicate-symbols.patch, + +files/superlu_mt-2.1-missing-includes.patch, +metadata.xml, + +superlu_mt-2.1.ebuild: + sci-libs/superlu_mt: Initial import diff --git a/sci-libs/superlu_mt/Manifest b/sci-libs/superlu_mt/Manifest new file mode 100644 index 000000000..929cbbd3a --- /dev/null +++ b/sci-libs/superlu_mt/Manifest @@ -0,0 +1 @@ +DIST superlu_mt_2.1.tar.gz 2718660 SHA256 77fd2a67a789704b566681dc614fa8f759b2925d3ff49cda9e11376b6dc38ed9 SHA512 1abd94c086404a12b82dcf39238a2aef584ba9d11ca24942faad1dbd8a283f257acbc594325ba3a64ec7323b2d738b2dcb8e2551953d01d017ca91f3a2d05890 WHIRLPOOL e7482c9c29e50af0a23bd943a9b80f2cb8bdcb7169f435994e7680b28e9fd9ec0876713ff1ac5b58a325cd000f7ca63fc1caad4eb52599e63922288dcf9e5505 diff --git a/sci-libs/superlu_mt/files/superlu_mt-2.1-duplicate-symbols.patch b/sci-libs/superlu_mt/files/superlu_mt-2.1-duplicate-symbols.patch new file mode 100644 index 000000000..6b2c5d57b --- /dev/null +++ b/sci-libs/superlu_mt/files/superlu_mt-2.1-duplicate-symbols.patch @@ -0,0 +1,900 @@ +diff -Nur SRC.orig/cmatgen.c SRC/cmatgen.c +--- SRC.orig/cmatgen.c 2013-07-15 11:47:52.512735420 -0700 ++++ SRC/cmatgen.c 2013-07-15 11:49:05.149137948 -0700 +@@ -93,76 +93,4 @@ + xa[n] = lasta; + } + +-double dlaran_(int *iseed) +-{ +-/* -- LAPACK auxiliary routine (version 2.0) -- +- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +- Courant Institute, Argonne National Lab, and Rice University +- February 29, 1992 +- +- Purpose +- ======= +- +- DLARAN returns a random real number from a uniform (0,1) +- distribution. +- +- Arguments +- ========= +- +- ISEED (input/output) INT array, dimension (4) +- On entry, the seed of the random number generator; the array +- +- elements must be between 0 and 4095, and ISEED(4) must be +- odd. +- On exit, the seed is updated. +- +- Further Details +- =============== +- +- This routine uses a multiplicative congruential method with modulus +- 2**48 and multiplier 33952834046453 (see G.S.Fishman, +- 'Multiplicative congruential random number generators with modulus +- 2**b: an exhaustive analysis for b = 32 and a partial analysis for +- b = 48', Math. Comp. 189, pp 331-344, 1990). +- +- 48-bit integers are stored in 4 integer array elements with 12 bits +- per element. Hence the routine is portable across machines with +- integers of 32 bits or more. +- +- ===================================================================== +-*/ +- +- /* Local variables */ +- int it1, it2, it3, it4; +- +- --iseed; +- +- /* multiply the seed by the multiplier modulo 2**48 */ +- it4 = iseed[4] * 2549; +- it3 = it4 / 4096; +- it4 -= it3 << 12; +- it3 = it3 + iseed[3] * 2549 + iseed[4] * 2508; +- it2 = it3 / 4096; +- it3 -= it2 << 12; +- it2 = it2 + iseed[2] * 2549 + iseed[3] * 2508 + iseed[4] * 322; +- it1 = it2 / 4096; +- it2 -= it1 << 12; +- it1 = it1 + iseed[1] * 2549 + iseed[2] * 2508 + iseed[3] * 322 + iseed[4] +- * 494; +- it1 %= 4096; +- +- /* return updated seed */ +- +- iseed[1] = it1; +- iseed[2] = it2; +- iseed[3] = it3; +- iseed[4] = it4; +- +- /* convert 48-bit integer to a real number in the interval (0,1) */ +- +- return ((double) it1 + +- ((double) it2 + ((double) it3 + (double) it4 * 2.44140625e-4) * +- 2.44140625e-4) * 2.44140625e-4) * 2.44140625e-4; +- +-} /* dlaran_ */ + +diff -Nur SRC.orig/cmyblas2.c SRC/cmyblas2.c +--- SRC.orig/cmyblas2.c 2013-07-15 11:47:52.509735400 -0700 ++++ SRC/cmyblas2.c 2013-07-15 11:49:05.149137948 -0700 +@@ -183,3 +183,127 @@ + + } + ++/* ++ * Performs dense matrix-vector multiply with 2 vectors: ++ * y0 = y0 + A * x0 ++ * y1 = y1 + A * x1 ++ */ ++void cmatvec2 ( ++ int lda, /* leading dimension of A */ ++ int m, ++ int n, ++ complex *A, /* in - size m-by-n */ ++ complex *x0, /* in - size n-by-1 */ ++ complex *x1, /* in - size n-by-1 */ ++ complex *y0, /* out - size n-by-1 */ ++ complex *y1 /* out - size n-by-1 */ ++ ) ++ ++{ ++ complex v00, v10, v20, v30, v40, v50, v60, v70, ++ v01, v11, v21, v31, v41, v51, v61, v71; ++ complex t0, t1, t2, t3, t4, t5, t6, t7; ++ complex f0, f1; ++ complex *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7; ++ register int firstcol = 0; ++ complex *M0, temp; ++ int k; ++ ++ M0 = &A[0]; ++ ++ while ( firstcol < n - 7 ) { /* Do 8 columns */ ++ ++ Mki0 = M0; ++ Mki1 = Mki0 + lda; ++ Mki2 = Mki1 + lda; ++ Mki3 = Mki2 + lda; ++ Mki4 = Mki3 + lda; ++ Mki5 = Mki4 + lda; ++ Mki6 = Mki5 + lda; ++ Mki7 = Mki6 + lda; ++ ++ v00 = x0[firstcol]; v01 = x1[firstcol++]; ++ v10 = x0[firstcol]; v11 = x1[firstcol++]; ++ v20 = x0[firstcol]; v21 = x1[firstcol++]; ++ v30 = x0[firstcol]; v31 = x1[firstcol++]; ++ v40 = x0[firstcol]; v41 = x1[firstcol++]; ++ v50 = x0[firstcol]; v51 = x1[firstcol++]; ++ v60 = x0[firstcol]; v61 = x1[firstcol++]; ++ v70 = x0[firstcol]; v71 = x1[firstcol++]; ++ ++ for (k = 0; k < m; k++) { ++ f0 = y0[k]; ++ f1 = y1[k]; ++ t0 = Mki0[k]; cc_mult(&temp, &v00, &t0);c_add(&f0,&f0,&temp); ++ cc_mult(&temp,&v01,&t0);c_add(&f1,&f1,&temp); ++ t1 = Mki1[k]; cc_mult(&temp,&v10,&t1);c_add(&f0,&f0,&temp); ++ cc_mult(&temp,&v11,&t1);c_add(&f1,&f1,&temp); ++ t2 = Mki2[k]; cc_mult(&temp,&v20,&t2);c_add(&f0,&f0,&temp); ++ cc_mult(&temp,&v21,&t2);c_add(&f1,&f1,&temp); ++ t3 = Mki3[k]; cc_mult(&temp,&v30,&t3);c_add(&f0,&f0,&temp); ++ cc_mult(&temp,&v31,&t3);c_add(&f1,&f1,&temp); ++ t4 = Mki4[k]; cc_mult(&temp,&v40,&t4);c_add(&f0,&f0,&temp); ++ cc_mult(&temp,&v41,&t4);c_add(&f1,&f1,&temp); ++ t5 = Mki5[k]; cc_mult(&temp,&v50,&t5);c_add(&f0,&f0,&temp); ++ cc_mult(&temp,&v51,&t5);c_add(&f1,&f1,&temp); ++ t6 = Mki6[k]; cc_mult(&temp,&v60,&t6);c_add(&f0,&f0,&temp); ++ cc_mult(&temp,&v61,&t6);c_add(&f1,&f1,&temp); ++ t7 = Mki7[k]; cc_mult(&temp,&v70,&t7);c_add(&f0,&f0,&temp); ++ cc_mult(&temp,&v71,&t7);c_add(&f1,&f1,&temp); ++ y0[k] = f0; ++ y1[k] = f1; ++ } ++ ++ M0 += 8 * lda; ++ } ++ ++ while ( firstcol < n - 3 ) { /* Do 4 columns */ ++ Mki0 = M0; ++ Mki1 = Mki0 + lda; ++ Mki2 = Mki1 + lda; ++ Mki3 = Mki2 + lda; ++ ++ v00 = x0[firstcol]; v01 = x1[firstcol++]; ++ v10 = x0[firstcol]; v11 = x1[firstcol++]; ++ v20 = x0[firstcol]; v21 = x1[firstcol++]; ++ v30 = x0[firstcol]; v31 = x1[firstcol++]; ++ ++ for (k = 0; k < m; k++) { ++ f0 = y0[k]; ++ f1 = y1[k]; ++ t0 = Mki0[k]; cc_mult(&temp,&v00,&t0);c_add(&f0,&f0,&temp); ++ cc_mult(&temp,&v01,&t0);c_add(&f1,&f1,&temp); ++ t1 = Mki1[k]; cc_mult(&temp,&v10,&t1);c_add(&f0,&f0,&temp); ++ cc_mult(&temp,&v11,&t1);c_add(&f1,&f1,&temp); ++ t2 = Mki2[k]; cc_mult(&temp,&v20,&t2);c_add(&f0,&f0,&temp); ++ cc_mult(&temp,&v21,&t2);c_add(&f1,&f1,&temp); ++ t3 = Mki3[k]; cc_mult(&temp,&v30,&t3);c_add(&f0,&f0,&temp); ++ cc_mult(&temp,&v31,&t3);c_add(&f1,&f1,&temp); ++ y0[k] = f0; ++ y1[k] = f1; ++ } ++ ++ M0 += 4 * lda; ++ ++ } ++ ++ while ( firstcol < n ) { /* Do 1 column */ ++ Mki0 = M0; ++ v00 = x0[firstcol]; v01 = x1[firstcol++]; ++ ++ for (k = 0; k < m; k++) { ++ f0 = y0[k]; ++ f1 = y1[k]; ++ t0 = Mki0[k]; ++ cc_mult(&temp,&v00,&t0);c_add(&f0,&f0,&temp); ++ cc_mult(&temp,&v01,&t0);c_add(&f1,&f1,&temp); ++ y0[k] = f0; ++ y1[k] = f1; ++ } ++ ++ M0 += lda; ++ } ++ ++} ++ ++ +diff -Nur SRC.orig/cmyblas2.c.orig SRC/cmyblas2.c.orig +--- SRC.orig/cmyblas2.c.orig 1969-12-31 16:00:00.000000000 -0800 ++++ SRC/cmyblas2.c.orig 2013-07-15 11:49:05.149137948 -0700 +@@ -0,0 +1,185 @@ ++ ++/* ++ * -- SuperLU routine (version 2.0) -- ++ * Lawrence Berkeley National Lab, Univ. of California Berkeley, ++ * and Xerox Palo Alto Research Center. ++ * September 10, 2007 ++ * ++ */ ++/* ++ * File name: cmyblas2.c ++ * Purpose: ++ * Level 2 BLAS operations: solves and matvec, written in C. ++ * Note: ++ * This is only used when the system lacks an efficient BLAS library. ++ */ ++#include "slu_scomplex.h" ++ ++ ++/* ++ * Solves a dense UNIT lower triangular system. The unit lower ++ * triangular matrix is stored in a 2D array M(1:nrow,1:ncol). ++ * The solution will be returned in the rhs vector. ++ */ ++void clsolve ( int ldm, int ncol, complex *M, complex *rhs ) ++{ ++ int k; ++ complex x0, x1, x2, x3, temp; ++ complex *M0; ++ complex *Mki0, *Mki1, *Mki2, *Mki3; ++ register int firstcol = 0; ++ ++ M0 = &M[0]; ++ ++ ++ while ( firstcol < ncol - 3 ) { /* Do 4 columns */ ++ Mki0 = M0 + 1; ++ Mki1 = Mki0 + ldm + 1; ++ Mki2 = Mki1 + ldm + 1; ++ Mki3 = Mki2 + ldm + 1; ++ ++ x0 = rhs[firstcol]; ++ cc_mult(&temp, &x0, Mki0); Mki0++; ++ c_sub(&x1, &rhs[firstcol+1], &temp); ++ cc_mult(&temp, &x0, Mki0); Mki0++; ++ c_sub(&x2, &rhs[firstcol+2], &temp); ++ cc_mult(&temp, &x1, Mki1); Mki1++; ++ c_sub(&x2, &x2, &temp); ++ cc_mult(&temp, &x0, Mki0); Mki0++; ++ c_sub(&x3, &rhs[firstcol+3], &temp); ++ cc_mult(&temp, &x1, Mki1); Mki1++; ++ c_sub(&x3, &x3, &temp); ++ cc_mult(&temp, &x2, Mki2); Mki2++; ++ c_sub(&x3, &x3, &temp); ++ ++ rhs[++firstcol] = x1; ++ rhs[++firstcol] = x2; ++ rhs[++firstcol] = x3; ++ ++firstcol; ++ ++ for (k = firstcol; k < ncol; k++) { ++ cc_mult(&temp, &x0, Mki0); Mki0++; ++ c_sub(&rhs[k], &rhs[k], &temp); ++ cc_mult(&temp, &x1, Mki1); Mki1++; ++ c_sub(&rhs[k], &rhs[k], &temp); ++ cc_mult(&temp, &x2, Mki2); Mki2++; ++ c_sub(&rhs[k], &rhs[k], &temp); ++ cc_mult(&temp, &x3, Mki3); Mki3++; ++ c_sub(&rhs[k], &rhs[k], &temp); ++ } ++ ++ M0 += 4 * ldm + 4; ++ } ++ ++ if ( firstcol < ncol - 1 ) { /* Do 2 columns */ ++ Mki0 = M0 + 1; ++ Mki1 = Mki0 + ldm + 1; ++ ++ x0 = rhs[firstcol]; ++ cc_mult(&temp, &x0, Mki0); Mki0++; ++ c_sub(&x1, &rhs[firstcol+1], &temp); ++ ++ rhs[++firstcol] = x1; ++ ++firstcol; ++ ++ for (k = firstcol; k < ncol; k++) { ++ cc_mult(&temp, &x0, Mki0); Mki0++; ++ c_sub(&rhs[k], &rhs[k], &temp); ++ cc_mult(&temp, &x1, Mki1); Mki1++; ++ c_sub(&rhs[k], &rhs[k], &temp); ++ } ++ } ++ ++} ++ ++/* ++ * Solves a dense upper triangular system. The upper triangular matrix is ++ * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned ++ * in the rhs vector. ++ */ ++void ++cusolve ( ++int ldm, /* in */ ++int ncol, /* in */ ++complex *M, /* in */ ++complex *rhs /* modified */ ++) ++{ ++ complex xj, temp; ++ int jcol, j, irow; ++ ++ jcol = ncol - 1; ++ ++ for (j = 0; j < ncol; j++) { ++ ++ c_div(&xj, &rhs[jcol], &M[jcol + jcol*ldm]); /* M(jcol, jcol) */ ++ rhs[jcol] = xj; ++ ++ for (irow = 0; irow < jcol; irow++) { ++ cc_mult(&temp, &xj, &M[irow+jcol*ldm]); /* M(irow, jcol) */ ++ c_sub(&rhs[irow], &rhs[irow], &temp); ++ } ++ ++ jcol--; ++ ++ } ++} ++ ++ ++/* ++ * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec. ++ * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[]. ++ */ ++void cmatvec ( ++int ldm, /* in -- leading dimension of M */ ++int nrow, /* in */ ++int ncol, /* in */ ++complex *M, /* in */ ++complex *vec, /* in */ ++complex *Mxvec /* in/out */ ++) ++{ ++ complex vi0, vi1, vi2, vi3; ++ complex *M0, temp; ++ complex *Mki0, *Mki1, *Mki2, *Mki3; ++ register int firstcol = 0; ++ int k; ++ ++ M0 = &M[0]; ++ ++ while ( firstcol < ncol - 3 ) { /* Do 4 columns */ ++ Mki0 = M0; ++ Mki1 = Mki0 + ldm; ++ Mki2 = Mki1 + ldm; ++ Mki3 = Mki2 + ldm; ++ ++ vi0 = vec[firstcol++]; ++ vi1 = vec[firstcol++]; ++ vi2 = vec[firstcol++]; ++ vi3 = vec[firstcol++]; ++ for (k = 0; k < nrow; k++) { ++ cc_mult(&temp, &vi0, Mki0); Mki0++; ++ c_add(&Mxvec[k], &Mxvec[k], &temp); ++ cc_mult(&temp, &vi1, Mki1); Mki1++; ++ c_add(&Mxvec[k], &Mxvec[k], &temp); ++ cc_mult(&temp, &vi2, Mki2); Mki2++; ++ c_add(&Mxvec[k], &Mxvec[k], &temp); ++ cc_mult(&temp, &vi3, Mki3); Mki3++; ++ c_add(&Mxvec[k], &Mxvec[k], &temp); ++ } ++ ++ M0 += 4 * ldm; ++ } ++ ++ while ( firstcol < ncol ) { /* Do 1 column */ ++ Mki0 = M0; ++ vi0 = vec[firstcol++]; ++ for (k = 0; k < nrow; k++) { ++ cc_mult(&temp, &vi0, Mki0); Mki0++; ++ c_add(&Mxvec[k], &Mxvec[k], &temp); ++ } ++ M0 += ldm; ++ } ++ ++} ++ +diff -Nur SRC.orig/Makefile SRC/Makefile +--- SRC.orig/Makefile 2013-07-15 11:47:52.511735412 -0700 ++++ SRC/Makefile 2013-07-15 11:53:15.393528085 -0700 +@@ -31,7 +31,7 @@ + # + ####################################################################### + +-ALLAUX = superlu_timer.o dclock.o sp_ienv.o lsame.o xerbla.o \ ++ALLAUX = superlu_timer.o sp_ienv.o lsame.o xerbla.o \ + util.o pmemory.o qrnzcnt.o await.o \ + get_perm_c.o mmd.o colamd.o sp_coletree.o \ + pxgstrf_scheduler.o sp_colorder.o \ +diff -Nur SRC.orig/smatgen.c SRC/smatgen.c +--- SRC.orig/smatgen.c 2013-07-15 11:47:52.512735420 -0700 ++++ SRC/smatgen.c 2013-07-15 11:49:05.149137948 -0700 +@@ -93,76 +93,3 @@ + xa[n] = lasta; + } + +-double dlaran_(int *iseed) +-{ +-/* -- LAPACK auxiliary routine (version 2.0) -- +- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +- Courant Institute, Argonne National Lab, and Rice University +- February 29, 1992 +- +- Purpose +- ======= +- +- DLARAN returns a random real number from a uniform (0,1) +- distribution. +- +- Arguments +- ========= +- +- ISEED (input/output) INT array, dimension (4) +- On entry, the seed of the random number generator; the array +- +- elements must be between 0 and 4095, and ISEED(4) must be +- odd. +- On exit, the seed is updated. +- +- Further Details +- =============== +- +- This routine uses a multiplicative congruential method with modulus +- 2**48 and multiplier 33952834046453 (see G.S.Fishman, +- 'Multiplicative congruential random number generators with modulus +- 2**b: an exhaustive analysis for b = 32 and a partial analysis for +- b = 48', Math. Comp. 189, pp 331-344, 1990). +- +- 48-bit integers are stored in 4 integer array elements with 12 bits +- per element. Hence the routine is portable across machines with +- integers of 32 bits or more. +- +- ===================================================================== +-*/ +- +- /* Local variables */ +- int it1, it2, it3, it4; +- +- --iseed; +- +- /* multiply the seed by the multiplier modulo 2**48 */ +- it4 = iseed[4] * 2549; +- it3 = it4 / 4096; +- it4 -= it3 << 12; +- it3 = it3 + iseed[3] * 2549 + iseed[4] * 2508; +- it2 = it3 / 4096; +- it3 -= it2 << 12; +- it2 = it2 + iseed[2] * 2549 + iseed[3] * 2508 + iseed[4] * 322; +- it1 = it2 / 4096; +- it2 -= it1 << 12; +- it1 = it1 + iseed[1] * 2549 + iseed[2] * 2508 + iseed[3] * 322 + iseed[4] +- * 494; +- it1 %= 4096; +- +- /* return updated seed */ +- +- iseed[1] = it1; +- iseed[2] = it2; +- iseed[3] = it3; +- iseed[4] = it4; +- +- /* convert 48-bit integer to a real number in the interval (0,1) */ +- +- return ((double) it1 + +- ((double) it2 + ((double) it3 + (double) it4 * 2.44140625e-4) * +- 2.44140625e-4) * 2.44140625e-4) * 2.44140625e-4; +- +-} /* dlaran_ */ +- +diff -Nur SRC.orig/xerbla.c SRC/xerbla.c +--- SRC.orig/xerbla.c 2013-07-15 11:47:52.513735427 -0700 ++++ SRC/xerbla.c 2013-07-15 11:49:05.150137959 -0700 +@@ -1,3 +1,4 @@ ++#include <stdio.h> + /* Subroutine */ int xerbla_(char *srname, int *info) + { + /* -- LAPACK auxiliary routine (version 2.0) -- +diff -Nur SRC.orig/zmatgen.c SRC/zmatgen.c +--- SRC.orig/zmatgen.c 2013-07-15 11:47:52.513735427 -0700 ++++ SRC/zmatgen.c 2013-07-15 11:49:05.150137959 -0700 +@@ -93,76 +93,3 @@ + xa[n] = lasta; + } + +-double dlaran_(int *iseed) +-{ +-/* -- LAPACK auxiliary routine (version 2.0) -- +- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +- Courant Institute, Argonne National Lab, and Rice University +- February 29, 1992 +- +- Purpose +- ======= +- +- DLARAN returns a random real number from a uniform (0,1) +- distribution. +- +- Arguments +- ========= +- +- ISEED (input/output) INT array, dimension (4) +- On entry, the seed of the random number generator; the array +- +- elements must be between 0 and 4095, and ISEED(4) must be +- odd. +- On exit, the seed is updated. +- +- Further Details +- =============== +- +- This routine uses a multiplicative congruential method with modulus +- 2**48 and multiplier 33952834046453 (see G.S.Fishman, +- 'Multiplicative congruential random number generators with modulus +- 2**b: an exhaustive analysis for b = 32 and a partial analysis for +- b = 48', Math. Comp. 189, pp 331-344, 1990). +- +- 48-bit integers are stored in 4 integer array elements with 12 bits +- per element. Hence the routine is portable across machines with +- integers of 32 bits or more. +- +- ===================================================================== +-*/ +- +- /* Local variables */ +- int it1, it2, it3, it4; +- +- --iseed; +- +- /* multiply the seed by the multiplier modulo 2**48 */ +- it4 = iseed[4] * 2549; +- it3 = it4 / 4096; +- it4 -= it3 << 12; +- it3 = it3 + iseed[3] * 2549 + iseed[4] * 2508; +- it2 = it3 / 4096; +- it3 -= it2 << 12; +- it2 = it2 + iseed[2] * 2549 + iseed[3] * 2508 + iseed[4] * 322; +- it1 = it2 / 4096; +- it2 -= it1 << 12; +- it1 = it1 + iseed[1] * 2549 + iseed[2] * 2508 + iseed[3] * 322 + iseed[4] +- * 494; +- it1 %= 4096; +- +- /* return updated seed */ +- +- iseed[1] = it1; +- iseed[2] = it2; +- iseed[3] = it3; +- iseed[4] = it4; +- +- /* convert 48-bit integer to a real number in the interval (0,1) */ +- +- return ((double) it1 + +- ((double) it2 + ((double) it3 + (double) it4 * 2.44140625e-4) * +- 2.44140625e-4) * 2.44140625e-4) * 2.44140625e-4; +- +-} /* dlaran_ */ +- +diff -Nur SRC.orig/zmyblas2.c SRC/zmyblas2.c +--- SRC.orig/zmyblas2.c 2013-07-15 11:47:52.511735412 -0700 ++++ SRC/zmyblas2.c 2013-07-15 11:49:05.150137959 -0700 +@@ -183,3 +183,127 @@ + + } + ++/* ++ * Performs dense matrix-vector multiply with 2 vectors: ++ * y0 = y0 + A * x0 ++ * y1 = y1 + A * x1 ++ */ ++void zmatvec2 ( ++ int lda, /* leading dimension of A */ ++ int m, ++ int n, ++ doublecomplex *A, /* in - size m-by-n */ ++ doublecomplex *x0, /* in - size n-by-1 */ ++ doublecomplex *x1, /* in - size n-by-1 */ ++ doublecomplex *y0, /* out - size n-by-1 */ ++ doublecomplex *y1 /* out - size n-by-1 */ ++ ) ++ ++{ ++ doublecomplex v00, v10, v20, v30, v40, v50, v60, v70, ++ v01, v11, v21, v31, v41, v51, v61, v71; ++ doublecomplex t0, t1, t2, t3, t4, t5, t6, t7; ++ doublecomplex f0, f1; ++ doublecomplex *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7; ++ register int firstcol = 0; ++ doublecomplex *M0, temp; ++ int k; ++ ++ M0 = &A[0]; ++ ++ while ( firstcol < n - 7 ) { /* Do 8 columns */ ++ ++ Mki0 = M0; ++ Mki1 = Mki0 + lda; ++ Mki2 = Mki1 + lda; ++ Mki3 = Mki2 + lda; ++ Mki4 = Mki3 + lda; ++ Mki5 = Mki4 + lda; ++ Mki6 = Mki5 + lda; ++ Mki7 = Mki6 + lda; ++ ++ v00 = x0[firstcol]; v01 = x1[firstcol++]; ++ v10 = x0[firstcol]; v11 = x1[firstcol++]; ++ v20 = x0[firstcol]; v21 = x1[firstcol++]; ++ v30 = x0[firstcol]; v31 = x1[firstcol++]; ++ v40 = x0[firstcol]; v41 = x1[firstcol++]; ++ v50 = x0[firstcol]; v51 = x1[firstcol++]; ++ v60 = x0[firstcol]; v61 = x1[firstcol++]; ++ v70 = x0[firstcol]; v71 = x1[firstcol++]; ++ ++ for (k = 0; k < m; k++) { ++ f0 = y0[k]; ++ f1 = y1[k]; ++ t0 = Mki0[k]; zz_mult(&temp,&v00,&t0);z_add(&f0,&f0,&temp); ++ zz_mult(&temp,&v01,&t0);z_add(&f1,&f1,&temp); ++ t1 = Mki1[k]; zz_mult(&temp,&v10,&t1);z_add(&f0,&f0,&temp); ++ zz_mult(&temp,&v11,&t1);z_add(&f1,&f1,&temp); ++ t2 = Mki2[k]; zz_mult(&temp,&v20,&t2);z_add(&f0,&f0,&temp); ++ zz_mult(&temp,&v21,&t2);z_add(&f1,&f1,&temp); ++ t3 = Mki3[k]; zz_mult(&temp,&v30,&t3);z_add(&f0,&f0,&temp); ++ zz_mult(&temp,&v31,&t3);z_add(&f1,&f1,&temp); ++ t4 = Mki4[k]; zz_mult(&temp,&v40,&t4);z_add(&f0,&f0,&temp); ++ zz_mult(&temp,&v41,&t4);z_add(&f1,&f1,&temp); ++ t5 = Mki5[k]; zz_mult(&temp,&v50,&t5);z_add(&f0,&f0,&temp); ++ zz_mult(&temp,&v51,&t5);z_add(&f1,&f1,&temp); ++ t6 = Mki6[k]; zz_mult(&temp,&v60,&t6);z_add(&f0,&f0,&temp); ++ zz_mult(&temp,&v61,&t6);z_add(&f1,&f1,&temp); ++ t7 = Mki7[k]; zz_mult(&temp,&v70,&t7);z_add(&f0,&f0,&temp); ++ zz_mult(&temp,&v71,&t7);z_add(&f1,&f1,&temp); ++ y0[k] = f0; ++ y1[k] = f1; ++ } ++ ++ M0 += 8 * lda; ++ } ++ ++ while ( firstcol < n - 3 ) { /* Do 4 columns */ ++ Mki0 = M0; ++ Mki1 = Mki0 + lda; ++ Mki2 = Mki1 + lda; ++ Mki3 = Mki2 + lda; ++ ++ v00 = x0[firstcol]; v01 = x1[firstcol++]; ++ v10 = x0[firstcol]; v11 = x1[firstcol++]; ++ v20 = x0[firstcol]; v21 = x1[firstcol++]; ++ v30 = x0[firstcol]; v31 = x1[firstcol++]; ++ ++ for (k = 0; k < m; k++) { ++ f0 = y0[k]; ++ f1 = y1[k]; ++ t0 = Mki0[k]; zz_mult(&temp,&v00,&t0);z_add(&f0,&f0,&temp); ++ zz_mult(&temp,&v01,&t0);z_add(&f1,&f1,&temp); ++ t1 = Mki1[k]; zz_mult(&temp,&v10,&t1);z_add(&f0,&f0,&temp); ++ zz_mult(&temp,&v11,&t1);z_add(&f1,&f1,&temp); ++ t2 = Mki2[k]; zz_mult(&temp,&v20,&t2);z_add(&f0,&f0,&temp); ++ zz_mult(&temp,&v21,&t2);z_add(&f1,&f1,&temp); ++ t3 = Mki3[k]; zz_mult(&temp,&v30,&t3);z_add(&f0,&f0,&temp); ++ zz_mult(&temp,&v31,&t3);z_add(&f1,&f1,&temp); ++ y0[k] = f0; ++ y1[k] = f1; ++ } ++ ++ M0 += 4 * lda; ++ ++ } ++ ++ while ( firstcol < n ) { /* Do 1 column */ ++ Mki0 = M0; ++ v00 = x0[firstcol]; v01 = x1[firstcol++]; ++ ++ for (k = 0; k < m; k++) { ++ f0 = y0[k]; ++ f1 = y1[k]; ++ t0 = Mki0[k]; ++ zz_mult(&temp,&v00,&t0);z_add(&f0,&f0,&temp); ++ zz_mult(&temp,&v01,&t0);z_add(&f1,&f1,&temp); ++ y0[k] = f0; ++ y1[k] = f1; ++ } ++ ++ M0 += lda; ++ } ++ ++} ++ ++ +diff -Nur SRC.orig/zmyblas2.c.orig SRC/zmyblas2.c.orig +--- SRC.orig/zmyblas2.c.orig 1969-12-31 16:00:00.000000000 -0800 ++++ SRC/zmyblas2.c.orig 2013-07-15 11:49:05.150137959 -0700 +@@ -0,0 +1,185 @@ ++ ++/* ++ * -- SuperLU routine (version 2.0) -- ++ * Lawrence Berkeley National Lab, Univ. of California Berkeley, ++ * and Xerox Palo Alto Research Center. ++ * September 10, 2007 ++ * ++ */ ++/* ++ * File name: zmyblas2.c ++ * Purpose: ++ * Level 2 BLAS operations: solves and matvec, written in C. ++ * Note: ++ * This is only used when the system lacks an efficient BLAS library. ++ */ ++#include "slu_dcomplex.h" ++ ++ ++/* ++ * Solves a dense UNIT lower triangular system. The unit lower ++ * triangular matrix is stored in a 2D array M(1:nrow,1:ncol). ++ * The solution will be returned in the rhs vector. ++ */ ++void zlsolve ( int ldm, int ncol, doublecomplex *M, doublecomplex *rhs ) ++{ ++ int k; ++ doublecomplex x0, x1, x2, x3, temp; ++ doublecomplex *M0; ++ doublecomplex *Mki0, *Mki1, *Mki2, *Mki3; ++ register int firstcol = 0; ++ ++ M0 = &M[0]; ++ ++ ++ while ( firstcol < ncol - 3 ) { /* Do 4 columns */ ++ Mki0 = M0 + 1; ++ Mki1 = Mki0 + ldm + 1; ++ Mki2 = Mki1 + ldm + 1; ++ Mki3 = Mki2 + ldm + 1; ++ ++ x0 = rhs[firstcol]; ++ zz_mult(&temp, &x0, Mki0); Mki0++; ++ z_sub(&x1, &rhs[firstcol+1], &temp); ++ zz_mult(&temp, &x0, Mki0); Mki0++; ++ z_sub(&x2, &rhs[firstcol+2], &temp); ++ zz_mult(&temp, &x1, Mki1); Mki1++; ++ z_sub(&x2, &x2, &temp); ++ zz_mult(&temp, &x0, Mki0); Mki0++; ++ z_sub(&x3, &rhs[firstcol+3], &temp); ++ zz_mult(&temp, &x1, Mki1); Mki1++; ++ z_sub(&x3, &x3, &temp); ++ zz_mult(&temp, &x2, Mki2); Mki2++; ++ z_sub(&x3, &x3, &temp); ++ ++ rhs[++firstcol] = x1; ++ rhs[++firstcol] = x2; ++ rhs[++firstcol] = x3; ++ ++firstcol; ++ ++ for (k = firstcol; k < ncol; k++) { ++ zz_mult(&temp, &x0, Mki0); Mki0++; ++ z_sub(&rhs[k], &rhs[k], &temp); ++ zz_mult(&temp, &x1, Mki1); Mki1++; ++ z_sub(&rhs[k], &rhs[k], &temp); ++ zz_mult(&temp, &x2, Mki2); Mki2++; ++ z_sub(&rhs[k], &rhs[k], &temp); ++ zz_mult(&temp, &x3, Mki3); Mki3++; ++ z_sub(&rhs[k], &rhs[k], &temp); ++ } ++ ++ M0 += 4 * ldm + 4; ++ } ++ ++ if ( firstcol < ncol - 1 ) { /* Do 2 columns */ ++ Mki0 = M0 + 1; ++ Mki1 = Mki0 + ldm + 1; ++ ++ x0 = rhs[firstcol]; ++ zz_mult(&temp, &x0, Mki0); Mki0++; ++ z_sub(&x1, &rhs[firstcol+1], &temp); ++ ++ rhs[++firstcol] = x1; ++ ++firstcol; ++ ++ for (k = firstcol; k < ncol; k++) { ++ zz_mult(&temp, &x0, Mki0); Mki0++; ++ z_sub(&rhs[k], &rhs[k], &temp); ++ zz_mult(&temp, &x1, Mki1); Mki1++; ++ z_sub(&rhs[k], &rhs[k], &temp); ++ } ++ } ++ ++} ++ ++/* ++ * Solves a dense upper triangular system. The upper triangular matrix is ++ * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned ++ * in the rhs vector. ++ */ ++void ++zusolve ( ++int ldm, /* in */ ++int ncol, /* in */ ++doublecomplex *M, /* in */ ++doublecomplex *rhs /* modified */ ++) ++{ ++ doublecomplex xj, temp; ++ int jcol, j, irow; ++ ++ jcol = ncol - 1; ++ ++ for (j = 0; j < ncol; j++) { ++ ++ z_div(&xj, &rhs[jcol], &M[jcol + jcol*ldm]); /* M(jcol, jcol) */ ++ rhs[jcol] = xj; ++ ++ for (irow = 0; irow < jcol; irow++) { ++ zz_mult(&temp, &xj, &M[irow+jcol*ldm]); /* M(irow, jcol) */ ++ z_sub(&rhs[irow], &rhs[irow], &temp); ++ } ++ ++ jcol--; ++ ++ } ++} ++ ++ ++/* ++ * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec. ++ * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[]. ++ */ ++void zmatvec ( ++int ldm, /* in -- leading dimension of M */ ++int nrow, /* in */ ++int ncol, /* in */ ++doublecomplex *M, /* in */ ++doublecomplex *vec, /* in */ ++doublecomplex *Mxvec /* in/out */ ++) ++{ ++ doublecomplex vi0, vi1, vi2, vi3; ++ doublecomplex *M0, temp; ++ doublecomplex *Mki0, *Mki1, *Mki2, *Mki3; ++ register int firstcol = 0; ++ int k; ++ ++ M0 = &M[0]; ++ ++ while ( firstcol < ncol - 3 ) { /* Do 4 columns */ ++ Mki0 = M0; ++ Mki1 = Mki0 + ldm; ++ Mki2 = Mki1 + ldm; ++ Mki3 = Mki2 + ldm; ++ ++ vi0 = vec[firstcol++]; ++ vi1 = vec[firstcol++]; ++ vi2 = vec[firstcol++]; ++ vi3 = vec[firstcol++]; ++ for (k = 0; k < nrow; k++) { ++ zz_mult(&temp, &vi0, Mki0); Mki0++; ++ z_add(&Mxvec[k], &Mxvec[k], &temp); ++ zz_mult(&temp, &vi1, Mki1); Mki1++; ++ z_add(&Mxvec[k], &Mxvec[k], &temp); ++ zz_mult(&temp, &vi2, Mki2); Mki2++; ++ z_add(&Mxvec[k], &Mxvec[k], &temp); ++ zz_mult(&temp, &vi3, Mki3); Mki3++; ++ z_add(&Mxvec[k], &Mxvec[k], &temp); ++ } ++ ++ M0 += 4 * ldm; ++ } ++ ++ while ( firstcol < ncol ) { /* Do 1 column */ ++ Mki0 = M0; ++ vi0 = vec[firstcol++]; ++ for (k = 0; k < nrow; k++) { ++ zz_mult(&temp, &vi0, Mki0); Mki0++; ++ z_add(&Mxvec[k], &Mxvec[k], &temp); ++ } ++ M0 += ldm; ++ } ++ ++} ++ diff --git a/sci-libs/superlu_mt/files/superlu_mt-2.1-missing-includes.patch b/sci-libs/superlu_mt/files/superlu_mt-2.1-missing-includes.patch new file mode 100644 index 000000000..afeca8fa7 --- /dev/null +++ b/sci-libs/superlu_mt/files/superlu_mt-2.1-missing-includes.patch @@ -0,0 +1,44 @@ +diff -Nur TESTING.orig/MATGEN/clatb4.c TESTING/MATGEN/clatb4.c +--- TESTING.orig/MATGEN/clatb4.c 2013-07-15 11:48:34.285967038 -0700 ++++ TESTING/MATGEN/clatb4.c 2013-07-15 11:49:05.150137959 -0700 +@@ -3,6 +3,7 @@ + -lf2c -lm (in that order) + */ + ++#include <string.h> + #include "f2c.h" + + /* Table of constant values */ +diff -Nur TESTING.orig/MATGEN/dlatb4.c TESTING/MATGEN/dlatb4.c +--- TESTING.orig/MATGEN/dlatb4.c 2013-07-15 11:48:34.288966998 -0700 ++++ TESTING/MATGEN/dlatb4.c 2013-07-15 11:49:05.150137959 -0700 +@@ -3,6 +3,7 @@ + -lf2c -lm (in that order) + */ + ++#include <string.h> + #include "f2c.h" + + /* Table of constant values */ +diff -Nur TESTING.orig/MATGEN/slatb4.c TESTING/MATGEN/slatb4.c +--- TESTING.orig/MATGEN/slatb4.c 2013-07-15 11:48:34.288966998 -0700 ++++ TESTING/MATGEN/slatb4.c 2013-07-15 11:49:05.150137959 -0700 +@@ -3,6 +3,7 @@ + -lf2c -lm (in that order) + */ + ++#include <string.h> + #include "f2c.h" + + /* Table of constant values */ +diff -Nur TESTING.orig/MATGEN/zlatb4.c TESTING/MATGEN/zlatb4.c +--- TESTING.orig/MATGEN/zlatb4.c 2013-07-15 11:48:34.288966998 -0700 ++++ TESTING/MATGEN/zlatb4.c 2013-07-15 11:49:05.150137959 -0700 +@@ -3,6 +3,7 @@ + -lf2c -lm (in that order) + */ + ++#include <string.h> + #include "f2c.h" + + /* Table of constant values */ diff --git a/sci-libs/superlu_mt/metadata.xml b/sci-libs/superlu_mt/metadata.xml new file mode 100644 index 000000000..91de91fe6 --- /dev/null +++ b/sci-libs/superlu_mt/metadata.xml @@ -0,0 +1,23 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE pkgmetadata SYSTEM "http://www.gentoo.org/dtd/metadata.dtd"> +<pkgmetadata> +<herd>sci</herd> +<longdescription lang="en"> + SuperLU is a general purpose library for the direct solution of + large, sparse, nonsymmetric systems of linear equations on high + performance machines. The library is written in C and is callable + from either C or Fortran. The library routines will perform an LU + decomposition with partial pivoting and triangular system solves + through forward and back substitution. The LU factorization routines + can handle non-square matrices but the triangular solves are + performed only for square matrices. The matrix columns may be + preordered (before factorization) either through library or user + supplied routines. This preordering for sparsity is completely + separate from the factorization. Working precision iterative + refinement subroutines are provided for improved backward + stability. Routines are also provided to equilibrate the system, + estimate the condition number, calculate the relative backward + error, and estimate error bounds for the refined solutions. + This is the multi-threaded version (POSIX threads or OpenMP). +</longdescription> +</pkgmetadata> diff --git a/sci-libs/superlu_mt/superlu_mt-2.1.ebuild b/sci-libs/superlu_mt/superlu_mt-2.1.ebuild new file mode 100644 index 000000000..bfd2866d4 --- /dev/null +++ b/sci-libs/superlu_mt/superlu_mt-2.1.ebuild @@ -0,0 +1,105 @@ +# Copyright 1999-2013 Gentoo Foundation +# Distributed under the terms of the GNU General Public License v2 +# $Header: /var/cvsroot/gentoo-x86/sci-libs/superlu/superlu-4.3.ebuild,v 1.7 2012/12/12 20:31:31 jlec Exp $ + +EAPI=5 + +inherit eutils fortran-2 toolchain-funcs + +MYPN=SuperLU_MT + +DESCRIPTION="Sparse LU factorization library multithreading library" +HOMEPAGE="http://crd.lbl.gov/~xiaoye/SuperLU/" +SRC_URI="${HOMEPAGE}/${PN}_${PV}.tar.gz" + +LICENSE="BSD" +SLOT="0" +KEYWORDS="~amd64 ~ppc ~ppc64 ~x86 ~amd64-linux ~x86-linux" +IUSE="doc openmp threads examples static-libs test" + +RDEPEND=" + virtual/cblas" +DEPEND="${RDEPEND} + virtual/pkgconfig + test? ( app-shells/tcsh )" + +S="${WORKDIR}/${MYPN}_${PV}" + +pkg_setup() { + if use threads; then + export CTHREADS="-D__PTHREAD" LDTHREADS="-pthread" + elif use openmp; then + if [[ $(tc-getCC) == *gcc ]] && ! tc-has-openmp; then + ewarn "OpenMP is not available in your current selected gcc" + die "need openmp capable gcc" + fi + FORTRAN_NEED_OPENMP=1 + export CTHREADS="-D__OPENMP" + [[ $(tc-getCC) == *gcc ]] && LDTHREADS="-fopenmp" + else + ewarn "Neither threads or openmp selected. Forcing threads" + export CTHREADS="-D__PTHREAD" LDTHREADS="-pthread" + fi + fortran-2_pkg_setup +} + +src_prepare() { + epatch \ + "${FILESDIR}"/${P}-duplicate-symbols.patch \ + "${FILESDIR}"/${P}-missing-includes.patch +} + +src_configure() { + sed -i \ + -e 's/^\(PLAT\s*=\).*/\1/' \ + -e "s:^\(CC\s*=\).*:\1 $(tc-getCC):" \ + -e "/CFLAGS/s:-O3:${CFLAGS} \$(PIC):" \ + -e "s:^\(PREDEFS\s*=\).*:\1 ${CPPFLAGS} -DUSE_VENDOR_BLAS \$(CTHREADS)$:" \ + -e "s:^\(NOOPTS\s*=.*\):\1 \$(PIC):" \ + -e "s:^\(FORTRAN\s*=\).*:\1 $(tc-getFC):" \ + -e "s:^\(FFLAGS\s*=\).*:\1 ${FFLAGS} \$(PIC):" \ + -e "s:^\(ARCH\s*=\).*:\1 $(tc-getAR):" \ + -e "s:^\(RANLIB\s*=\).*:\1 $(tc-getRANLIB):" \ + -e "s:^\(LOADER\s*=\).*:\1 $(tc-getCC):" \ + -e "s:^\(LOADOPTS\s*=\).*:\1 ${LDFLAGS} \$(LDTHREADS):" \ + -e "/MPLIB/d" \ + -e "s:^\(BLASLIB\s*=\).*:\1 $($(tc-getPKG_CONFIG) --libs blas):" \ + make.inc || die + SONAME=libsuperlu_mt.so.0 + sed -i \ + -e 's|../make.inc|make.inc|' \ + -e "s|../SRC|${EPREFIX}/usr/include/${PN}|" \ + -e '/:.*$(SUPERLULIB)/s|../lib/$(SUPERLULIB)||g' \ + -e 's|../lib/$(SUPERLULIB)|-lsuperlu_mt|g' \ + EXAMPLE/Makefile || die +} + +src_compile() { + emake superlulib \ + PIC="-fPIC" ARCH="echo" ARCHFLAGS="" RANLIB="echo" + $(tc-getCC) ${LDFLAGS} ${LDTHREADS} -shared -Wl,-soname=${SONAME} SRC/*.o \ + $($(tc-getPKG_CONFIG) --libs blas) -lm -o lib/${SONAME} || die + ln -s ${SONAME} lib/libsuperlu_mt.so || die + + use static-libs && rm -f SRC/*.o && emake superlulib \ + PIC="" ARCH="$(tc-getAR)" ARCHFLAGS="cr" RANLIB="$(tc-getRANLIB)" +} + +src_test() { + emake -j1 tmglib + LD_LIBRARY_PATH="${S}/lib:${LD_LIBRARY_PATH}" \ + emake SUPERLULIB="${SONAME}" testing +} + +src_install() { + dolib.so lib/*so* + use static-libs && dolib.a lib/*.a + insinto /usr/include/${PN} + doins SRC/*h + dodoc README + use doc && dodoc DOC/ug.pdf + if use examples; then + insinto /usr/share/doc/${PF}/examples + doins -r EXAMPLE/* make.inc + fi +} |