summaryrefslogtreecommitdiff
blob: 37ee59c0fdb18dc1fa3af42dae3d5924f081b6ff (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/usr/bin/perl -w 

use strict;

my $name="";
my $sequence="";
my $infile  = $ARGV[0];
my $outfile = $ARGV[1];

open(LIB, $infile);

open(CLIB, ">", $outfile);

while (<LIB>)
{
    if (m/^>/)
    {
	if (not $name eq "")
	{
	    if ($sequence eq "")
	    {
		print "---------------------------------------------------------------------------------------\n";
		print "Empty sequence: " . $name . "\n";
	    } 
	    else
	    {
		if ($sequence =~ m/^[ACGTNWSYRMHKXDVB]*$/)
		{
		    $sequence =~ s/[WSYRMHKXDVB]/N/g;
		    if (not $sequence =~ m/NNNN/)
		    {
			print CLIB $name;
			print CLIB $sequence . "\n";
		    }
		    else
		    {
			print "---------------------------------------------------------------------------------------\n";
			print "Too many N's:  " . $name;
		    }
		}
		else
		{
		    print "---------------------------------------------------------------------------------------\n";
		    print "Invalid entry: " . $name;
		    $sequence =~ s/[ACGTN]//g;
		    print "Reduced sequence:\n";
		    print $sequence . "\n";
		}
		$sequence = "";
	    }
	}
	$name = $_;
    }
    else
    {
	$sequence .= uc($_);
	$sequence =~ s/\n//g;
    }
}