--- est_clustering.pm.ori 2011-02-21 13:12:39.000000000 +0100 +++ est_clustering.pm 2011-02-22 20:23:16.000000000 +0100 @@ -76,7 +76,7 @@ } sub contig_image ($) { - print localtime().": Creating contig images\n"; + print localtime().": Creating contig images using contigimage.pl\n"; my $config = shift; my $bin = read_config($config,'contigimage_bin','contigimage.pl'); my $ace = read_config($config,'ace_file',''); @@ -94,6 +94,8 @@ my $qual_file=read_config($config,'seqclean_masked_qual',''); #output file for the qualities my $db=read_config($config,'db_path',''); #path to the database in the format dbi:$dbi_driver:$db_name:$host my @libraries=split(/,/,read_config($config,'libraries','')); #libraries to be retrieved + # if populate_clone_table=0 then the clone table is empty, therefore the SELECT would return empty results + my $populate_clone_table=read_config($config,'populate_clone_table',''); my $ok=1; print localtime().": Fetching processed ESTs from the database\n"; @@ -108,20 +110,36 @@ #we create the handler for the database my $dbh; my $sth; #database and statements handlers if ($ok){$dbh=open_db($db) or $ok=0}; #we open the database - my $st="SELECT sequence.name,sequence.sequence,sequence.quality FROM sequence,est,clone WHERE ( "; + + my $st=""; + if ($populate_clone_table) { + $st="SELECT sequence.name,sequence.sequence,sequence.quality FROM sequence,est,clone WHERE ( "; foreach (@libraries) { $st.="clone.library = '$_' OR " } $_=$st; s/ OR $//;$st=$_; #this removes the trailing OR $st.=" ) "; + } else { + $st="SELECT sequence.name,sequence.sequence,sequence.quality FROM sequence,est WHERE "; + } + + # if populate_clone_table=0 then the clone table is empty, therefore the SELECT would return empty results + # mysql> SELECT sequence.name,sequence.sequence,sequence.quality FROM sequence,est,clone WHERE + # ( clone.library = 'sgn' OR clone.library = 'fake' OR clone.library = 'tigr' ) + # AND clone.name=est.clone AND est.processed_seq=sequence.name; + # Empty set (0.00 sec) + if ($populate_clone_table) { $st.= "AND clone.name=est.clone AND est.processed_seq=sequence.name"; + } else { + $st.= "est.processed_seq=sequence.name"; + } if ($ok) {$sth=$dbh->prepare($st) or $ok=0} #we execute the statement if ($ok) { $sth->execute() or $ok=0; unless ($ok) { - print localtime().": There was an error executing the SELECT statement\n"; + print localtime().": There was an error executing the SELECT statement: $DBI::errstr\n"; } } #now all the results are written in the output files @@ -138,6 +156,7 @@ $seq_stream->write_seq($seq); $qual_stream->write_seq($seq->qual_obj()); } + print localtime().": Fetched ".$sth->rows()." from the database.\n"; close_st($sth); close_db($dbh); return $ok; @@ -159,11 +178,11 @@ ); my $st="DELETE FROM $table WHERE object_type='contig' OR object_type='singleton';"; my $sth=prepare_st($dbh,$st) or ( - print localtime().": Unable to prepare the statement $st\n" and + print localtime().": Unable to prepare the statement $st: $DBI::errstr\n" and return 0 ); $sth->execute or ( - print localtime().": Unable to execute the statement $st\n" and + print localtime().": Unable to execute the statement $st: $DBI::errstr\n" and return 0 ); close_st($sth); @@ -172,11 +191,11 @@ $st="UPDATE $table SET unigene=NULL, location_begin=NULL, location_end=NULL, orientation_fwd=NULL, r_begin_in=NULL, r_end_in=NULL, inserts=NULL;"; $sth=prepare_st($dbh,$st) or ( - print localtime().": Unable to prepare the statement $st\n" and + print localtime().": Unable to prepare the statement $st: $DBI::errstr\n" and return 0 ); $sth->execute or ( - print localtime().": Unable to execute the statement $st\n" and + print localtime().": Unable to execute the statement $st: $DBI::errstr\n" and return 0 ); close_st($sth); @@ -227,7 +246,7 @@ # First we run tgicl chdir($temp_dir); - $command="$bin_tgicl ".$in_seq." -q ".$in_qual; + $command="$bin_tgicl -F ".$in_seq." -q ".$in_qual; if ($tgicl_opt) { $command .= " $tgicl_opt"; } @@ -247,19 +266,25 @@ unless ($tgicl_success) { print localtime().": There was a problem running tgicl. The command was:\n"; - print "$command\n"; + print "$command: $!\n"; return $tgicl_success; } # Now we save the results on the results dir # file with the reads that form every contig print localtime().": Saving the tgicl contigs results\n"; + if (-r $in_seq.$contigs_ext) { + # files from older tgicl are *_clusters $command="mv ".$in_seq.$contigs_ext." ".$contigs_file; + } elsif (-r $in_seq."_cl".$contigs_ext) { + # files from newer tgicl are *_cl_clusters + $command="mv ".$in_seq."_cl".$contigs_ext." ".$contigs_file; + } $tgicl_success= not(system($command)); unless ($tgicl_success) { print localtime().": There was a problem moving the tgicl result. The command was:\n"; - print "$command\n"; + print "$command: $!\n"; return $tgicl_success; } @@ -270,7 +295,7 @@ unless ($tgicl_success) { print localtime().": There was a problem running tgicl. The command was:\n"; - print "$command\n"; + print "$command: $!\n"; return $tgicl_success; } @@ -286,7 +311,7 @@ unless ($tgicl_success) { print localtime().": There was a problem running tgicl. The command was:\n"; - print "$command\n"; + print "$command: $!\n"; return $tgicl_success; } @@ -302,7 +327,7 @@ unless ($tgicl_success) { print localtime().": There was a problem running tgicl. The command was:\n"; - print "$command\n"; + print "$command: $!\n"; return $tgicl_success; } @@ -313,7 +338,7 @@ $tgicl_success=not(system($command)); unless ($tgicl_success) { - print localtime().": There was a problem running cdbyank\n"; + print localtime().": There was a problem running $command: $!\n"; return $tgicl_success; } @@ -353,7 +378,7 @@ $ok=not(system($command)); unless ($ok) { print localtime().": There was a problem running cap3. The command was:\n"; - print "$command\n"; + print "$command: $!\n"; return $ok; } my $qual_file=$seq_file.".qual"; @@ -361,7 +386,7 @@ system($command); unless ($ok) { print localtime().": There was a problem running cap3. The command was:\n"; - print "$command\n"; + print "$command: $!\n"; return $ok; } @@ -390,7 +415,7 @@ unless ($ok) { print localtime().": There was a problem running cap3. The command was:\n"; - print "$command\n"; + print "$command: $!\n"; return $ok; } @@ -402,7 +427,7 @@ unless ($ok) { print localtime().": There was a problem running cap3. The command was:\n"; - print "$command\n"; + print "$command: $!\n"; return $ok; } @@ -412,7 +437,7 @@ unless ($ok) { print localtime().": There was a problem running tgicl. The command was:\n"; - print "$command\n"; + print "$command: $!\n"; return $ok; } @@ -423,7 +448,7 @@ unless ($ok) { print localtime().": There was a problem running tgicl. The command was:\n"; - print "$command\n"; + print "$command: $!\n"; return $ok; } @@ -442,17 +467,19 @@ #we should keep the original singletons names my $single_file=read_config($config,'singletons_list',''); my $ori_single_file=read_config($config,'original_singletons_list',''); + + my $prefix=read_config($config,'prefix',''); + my $postfix=read_config($config,'postfix',''); + # doh, but the routine update_est_table_with_singletons() anyways looks for the unchanged original file my $cmd="cp $single_file $ori_single_file"; my $not_ok=system ($cmd); if ($not_ok){ - print localtime()."; There was a problem with the command $cmd\n"; + print localtime()."; There was a problem with the command $cmd: $!\n"; return 0; } - my $prefix=read_config($config,'prefix',''); - my $postfix=read_config($config,'postfix',''); unless ($prefix or $postfix){ - print localtime().": WARNING, there are no prefix or postfix to change unigene name\n"; - print localtime().": WARNING, it won't be any name change\n"; + print localtime().": WARNING, there are no prefix or postfix defined to change unigene name\n"; + print localtime().": WARNING, there won't be any name change\n"; return 1; } change_seq_names_in_file($contigs_file,$config,'') or return 0; @@ -470,8 +497,8 @@ my $postfix=read_config($config,'singleton_seq_tag',''); my $temp_file=select_temp_file($config); - open IN, "<$file"; - open OUT, ">$temp_file\n"; + open IN, "<$file" or die "Error: failed to open $file: $!\n"; + open OUT, ">$temp_file" or die "Error: failed to write $temp_file: $!\n"; while (my $line = ){ #for fasta files if ($line =~ /^>/){ @@ -586,7 +613,7 @@ my $ok=1; open OUT,">$outf" or $ok= 0; unless ($ok){ - print localtime().": There was a problem opening the file: $outf\n"; + print localtime().": There was a problem opening the file: $outf: $!\n"; return 0; } @@ -629,7 +656,7 @@ my $ok=1; open IN, "<$dnaf" or $ok=0; unless ($ok){ - print localtime().": ESTScan result file $dnaf couldn't be opened\n"; + print localtime().": ESTScan result file $dnaf couldn't be opened: $!\n"; return 0; } while (){ @@ -653,18 +680,18 @@ my $backf=$revf.".bak"; my $not_ok=system ("cp $revf $backf"); if ($not_ok){ - print localtime().": There was a problem when trying to do a copy of $revf to $backf\n"; + print localtime().": There was a problem when trying to do a copy of $revf to $backf: $!\n"; return 0; } open OUT, ">$revf" or $ok=0; unless ($ok){ - print localtime().": Reverse file $revf couldn't be opened for output\n"; + print localtime().": Reverse file $revf couldn't be opened for output: $!\n"; return 0; } open IN, "<$backf" or $ok=0; unless ($ok){ - print localtime().": Reverse file $backf couldn't be opened for input\n"; + print localtime().": Reverse file $backf couldn't be opened for input: $!\n"; return 0; } while (){ @@ -711,20 +738,20 @@ open ACE, "<$ace_file" or $ok=0; unless ($ok) { - print localtime().": There has been a problem opening the file $ace_file\n"; + print localtime().": There has been a problem opening the file $ace_file: $!\n"; print localtime().": Maybe is not in the correct tgicl_ace format.\n"; return 0; } my $dbh=open_db($db); unless ($dbh) { - print localtime().": Database $dbh is not reachable\n"; + print localtime().": Database $dbh is not reachable: $DBI::errstr\n"; return 0; } my $st= write_insert_st ($table,@fields); my $sth=prepare_st($dbh,$st); unless ($sth) { - print localtime().": There has been a problem preparing the statement: $st\n"; + print localtime().": There has been a problem preparing the statement: $st: $DBI::errstr\n"; close_db($dbh); return 0; } @@ -839,6 +866,10 @@ push(@$orientation,$1); } $contig =~ s/^\n+//; + while ($contig =~ s/^BS\s+?\d+\s+?\d+\s+?([^\s]+)\s*\n//){ + print localtime().": Skipping BS line in the ACE output due to newer cap3: https://listas.upv.es/pipermail/est2uni/2008-October/000191.html\n"; + } + $contig =~ s/^\n+//; my $read_len=[]; my $seq_names=[]; my $r_begin_in_read=[]; my $r_end_in_read=[]; @@ -955,7 +986,7 @@ $table=read_config($config,'est_table',''); $st="UPDATE $table SET unigene=?,orientation_fwd=?,location_begin=?,location_end=?,r_begin_in=?,r_end_in=?,inserts=? WHERE processed_seq=?"; $sth=prepare_st($dbh,$st) or ( - print localtime().": There was a problem preparing the statement $st\n" and + print localtime().": There was a problem preparing the statement: $st: $DBI::errstr\n" and return 0 ); foreach my $contig_info (@contig_est_info){ @@ -981,7 +1012,7 @@ $sth->bind_param(7,$insertion); $sth->bind_param(8,$seq_name); $sth->execute or ( - print localtime().": There was a problem executing the statement $st\n" and + print localtime().": There was a problem executing the statement: $st: $DBI::errstr\n" and return 0 ); unless (@{${$contig_info}[1]} and @{${$contig_info}[2]} and @{${$contig_info}[3]}){ @@ -1042,19 +1073,19 @@ my $db=read_config($config,'db_path',''); my $dbh=open_db($db); unless ($dbh) { - print localtime().": Database $dbh is not reachable\n"; + print localtime().": Database $dbh is not reachable: $DBI::errstr\n"; return 0; } my $st="UPDATE $est_table SET unigene=?,orientation_fwd=1,location_begin=1,location_end=?,r_begin_in=1,r_end_in=? WHERE processed_seq=?"; my $sth=prepare_st($dbh,$st) or ( - print localtime().": There was a problem preparing the statement $st\n" and + print localtime().": There was a problem preparing the statement: $st: $DBI::errstr\n" and return 0 ); my $singletons_file=read_config($config,'seq_singletons_file',''); #singletons fasta file my $ori_singletons_file=read_config($config,'original_singletons_list',''); #singletons list file without the name change this is in fact the name of the est sequences that correspond to every singleton open ORI, "<$ori_singletons_file" or ( - print localtime().": There was a problem opening file: $ori_singletons_file\n" and + print localtime().": There was a problem opening file: $ori_singletons_file: $!\n" and return 0 ); @@ -1070,7 +1101,7 @@ $sth->bind_param(3,$len); $sth->bind_param(4,$ori_line); $sth->execute or ( - print localtime().": There was a problem executing the statement $st\n" and + print localtime().": There was a problem executing the statement: $st: $DBI::errstr\n" and return 0 ); }