aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Mokrejš <mmokrejs@fold.natur.cuni.cz>2017-11-21 17:11:28 +0100
committerMartin Mokrejš <mmokrejs@fold.natur.cuni.cz>2017-11-21 17:11:28 +0100
commitaab87747b89b64107677056a3d4874d8f5ee7bbf (patch)
treefc6fa5d0ca67d5fb0a376e53af19221dfbad3c6f /sci-biology/SEECER
parentsci-biology/jellyfish: rename jellyfish to jellyfish1 (diff)
downloadsci-aab87747b89b64107677056a3d4874d8f5ee7bbf.tar.gz
sci-aab87747b89b64107677056a3d4874d8f5ee7bbf.tar.bz2
sci-aab87747b89b64107677056a3d4874d8f5ee7bbf.zip
sci-biology/SEECER: execute jellyfish1 instead of jellyfish
Also I wrote a few cleanup patches to expose THREADS variable and cleanup the code. Package-Manager: Portage-2.3.14, Repoman-2.3.6
Diffstat (limited to 'sci-biology/SEECER')
-rw-r--r--sci-biology/SEECER/SEECER-0.1.3-r2.ebuild7
-rw-r--r--sci-biology/SEECER/files/rename_jellyfish_binary.patch11
-rw-r--r--sci-biology/SEECER/files/run_jellyfish.sh.patch72
-rw-r--r--sci-biology/SEECER/files/run_seecer.sh.patch42
4 files changed, 130 insertions, 2 deletions
diff --git a/sci-biology/SEECER/SEECER-0.1.3-r2.ebuild b/sci-biology/SEECER/SEECER-0.1.3-r2.ebuild
index 60862d8c8..0b7ec3bfa 100644
--- a/sci-biology/SEECER/SEECER-0.1.3-r2.ebuild
+++ b/sci-biology/SEECER/SEECER-0.1.3-r2.ebuild
@@ -5,7 +5,7 @@ EAPI=6
inherit eutils
-DESCRIPTION="SEquencing Error Corrector for RNA-Seq reads"
+DESCRIPTION="SEquence Error Corrector for RNA-Seq reads"
HOMEPAGE="http://sb.cs.cmu.edu/seecer/"
SRC_URI="
http://sb.cs.cmu.edu/seecer/downloads/"${P}".tar.gz
@@ -22,12 +22,15 @@ DEPEND="
sci-libs/gsl:0=
sci-biology/seqan:0="
RDEPEND="${DEPEND}
- =sci-biology/jellyfish-1.1.11"
+ =sci-biology/jellyfish-1.1.11-r1"
S="${S}"/SEECER
PATCHES=(
"${FILESDIR}"/remove-hardcoded-paths.patch
+ "${FILESDIR}"/run_seecer.sh.patch
+ "${FILESDIR}"/run_jellyfish.sh.patch
+ "${FILESDIR}"/rename_jellyfish_binary.patch
)
src_prepare(){
diff --git a/sci-biology/SEECER/files/rename_jellyfish_binary.patch b/sci-biology/SEECER/files/rename_jellyfish_binary.patch
new file mode 100644
index 000000000..c6548cee1
--- /dev/null
+++ b/sci-biology/SEECER/files/rename_jellyfish_binary.patch
@@ -0,0 +1,11 @@
+--- SEECER/bin/run_seecer.sh.ori 2017-11-21 16:56:28.808767468 +0100
++++ SEECER/bin/run_seecer.sh 2017-11-21 16:57:07.469835728 +0100
+@@ -26,7 +26,7 @@
+
+
+ BINDIR='' #this can be hardcoded to /absolute/path/to/SEECER/bin/
+-JF="jellyfish" #this may be hardcoded to /absolute/path/to/jellyfish/bin/
++JF="jellyfish1" #this may be hardcoded to /absolute/path/to/jellyfish/bin/jellyfish
+
+ K=17
+ SEECER_PARAMS=""
diff --git a/sci-biology/SEECER/files/run_jellyfish.sh.patch b/sci-biology/SEECER/files/run_jellyfish.sh.patch
new file mode 100644
index 000000000..7631f5a4c
--- /dev/null
+++ b/sci-biology/SEECER/files/run_jellyfish.sh.patch
@@ -0,0 +1,72 @@
+--- SEECER-0.1.3/bin/run_jellyfish.sh.ori 2017-11-21 16:41:54.164599838 +0100
++++ SEECER-0.1.3/bin/run_jellyfish.sh 2017-11-21 16:46:28.022166903 +0100
+@@ -1,18 +1,45 @@
+ #!/bin/bash
++
++# Usage: run_jellyfish.sh jellyfish_binpath tempfile_prefix kmersize mincount tmpdir infile1 [infile2] threads
+ JF=$1
+ LCOUNT=$4
+ TMPDIR=$5
++THREADS=${8:-32}
+
+ if [ -z "$JF" ]; then
+ echo "No path to jellyfish binary provided, exiting.";
+ exit 255;
+ fi
+
++# Usage: jellyfish count [options] file:path+
++#
++# Count k-mers or qmers in fasta or fastq files
++#
++# Options (default value in (), *required):
++# -m, --mer-len=uint32 *Length of mer
++# -s, --size=uint64 *Hash size
++# -t, --threads=uint32 Number of threads (1)
++# -o, --output=string Output prefix (mer_counts)
++# -c, --counter-len=Length in bits Length of counting field (7)
++# --out-counter-len=Length in bytes Length of counter field in output (4)
++# -C, --both-strands Count both strand, canonical representation (false)
++# -p, --reprobes=uint32 Maximum number of reprobes (62)
++# -r, --raw Write raw database (false)
++# -q, --quake Quake compatibility mode (false)
++# --quality-start=uint32 Starting ASCII for quality values (64)
++# --min-quality=uint32 Minimum quality. A base with lesser quality becomes an N (0)
++# -L, --lower-count=uint64 Don't output k-mer with count < lower-count
++# -U, --upper-count=uint64 Don't output k-mer with count > upper-count
++# --invalid-char=warn|ignore|error How to treat invalid characters. The char is changed to a N. (warn)
++# --matrix=Matrix file Hash function binary matrix
++# --timing=Timing file Print timing information
++# --stats=Stats file Print stats
++#
+ if [ "$#" -eq "4" ];
+ then
+-$JF count -m $3 -o $TMPDIR/jf_tmp -c 3 -s 10000000 -t 32 --both-strands $6 || exit 255
++$JF count -m $3 -o $TMPDIR/jf_tmp -c 3 -s 10000000 -t $THREADS --both-strands $6 || exit 255
+ else
+-$JF count -m $3 -o $TMPDIR/jf_tmp -c 3 -s 10000000 -t 32 --both-strands $6 $7 || exit 255
++$JF count -m $3 -o $TMPDIR/jf_tmp -c 3 -s 10000000 -t $THREADS --both-strands $6 $7 || exit 255
+ fi;
+
+ # merge
+@@ -25,5 +52,21 @@
+ rm $TMPDIR/jf_tmp_*
+ fi
+
++#
++# Usage: jellyfish dump [options] db:path
++#
++# Dump k-mer counts
++#
++# By default, dump in a fasta format where the header is the count and
++# the sequence is the sequence of the k-mer. The column format is a 2
++# column output: k-mer count.
++#
++# Options (default value in (), *required):
++# -c, --column Column format (false)
++# -t, --tab Tab separator (false)
++# -L, --lower-count=uint64 Don't output k-mer with count < lower-count
++# -U, --upper-count=uint64 Don't output k-mer with count > upper-count
++# -o, --output=string Output file
++#
+ $JF dump --lower-count=$LCOUNT -o $2 -c $TMPDIR/jf_merged_$3 || exit 255
+ rm $TMPDIR/jf_merged_$3
diff --git a/sci-biology/SEECER/files/run_seecer.sh.patch b/sci-biology/SEECER/files/run_seecer.sh.patch
new file mode 100644
index 000000000..a20c7917f
--- /dev/null
+++ b/sci-biology/SEECER/files/run_seecer.sh.patch
@@ -0,0 +1,42 @@
+--- SEECER/bin/run_seecer.sh.old 2013-10-02 18:55:24.000000000 +0200
++++ SEECER/bin/run_seecer.sh 2017-11-21 16:24:24.065584149 +0100
+@@ -33,6 +33,7 @@
+ SeecerStep=1
+ LCOUNT=3
+ TMPDIR=''
++THREADS=32
+
+ usage=$(cat << EOF
+ # This script runs the SEECER pipeline of 4 steps:
+@@ -54,11 +55,12 @@
+ -j <v> : specify the location of JELLYFISH binary (default = $JF).
+ -p <v> : specify extra SEECER parameters (default = '').
+ -s <v> : specify the starting step ( default = 1). Values = 1,2,3,4.
++ -c <v> : number of threads (default = 32).
+ -h : help message
+ EOF
+ );
+
+-while getopts ":j:p:k:s:t:h" opt; do
++while getopts ":j:p:k:s:t:c:h" opt; do
+ case $opt in
+ t)
+ TMPDIR=$OPTARG
+@@ -75,6 +77,8 @@
+ s)
+ SeecerStep=$OPTARG
+ ;;
++ c)
++ THREADS=$OPTARG
+ \?)
+ echo "Invalid option: -$OPTARG" >&2
+ echo "$usage"
+@@ -170,7 +177,7 @@
+ then
+ echo "++ Step 2: Running JELLYFISH to count kmers ..."
+ echo
+- bash "${BINDIR}"run_jellyfish.sh $JF $TMPDIR/counts_${K}_${LCOUNT} $K $LCOUNT $TMPDIR $Read1_N $Read2_N || exit 255
++ bash "${BINDIR}"run_jellyfish.sh $JF $TMPDIR/counts_${K}_${LCOUNT} $K $LCOUNT $TMPDIR $Read1_N $Read2_N $THREADS || exit 255
+ fi;
+
+ if [ ! -r $TMPDIR/counts_${K}_${LCOUNT} ];