Btab for blast2

Nathan O. Siemers nathan at dna.bms.com
Thu Sep 4 20:37:35 EST 1997


ynakamu at kazusa.or.jp (Yasukazu Nakamura) writes:
> Hi,
> 
> Have anyone written Btab (like) program for BLAST2 output?
> 
> Thanks in advance.
> 
> Yaskaz
> 

Just another Perl Hacker. Baby Perl, at that.

nathan

| Nathan Siemers - Department of Applied Genomics - Bristol-Myers |
| Squibb Pharmaceutical Research Institute - K23-05, PO Box 4000, |
| Princeton, NJ 08543-4000 - (609) 252-6568 - siemers at bms.com     |


blast22table.pl:

#!/usr/local/bin/perl5
# blast22table.pl
# make tab delimited table of blast2 results
# Nathan Siemers

# just to be nice:

$intorecord = 0;
$count = 0;
$defcount = 0;
$line = 0;

while (<>) {

    # lines we wish were not there...

    if (/^SCORE_MISMATCH/) {
	next;
    }

    @words = split(" ");
    $line++;

    if ($line == 1) {
	$search = $words[0];
    }

    if (/^Query=/){
	$query = $words[1];
    }

    if (/^ +\(.* letters\)/) {
	$words[0] =~ s/\(//;
	$querylength = $words[0];
	$querylength =~ s/\,//g;
    }

    if (/^Database\:/) {
	$db = $words[1];
    }
 

    if (/^ *Length =/) {
	if ($search eq  "TBLASTN" || $search eq "BLASTX" || $search eq "BLASTN") {
	    $lengthline = $intorecord;
	    $scoreline = $lengthline + 4;
	    $idline = $scoreline + 1;
	    $qalignline = $idline + 2;
	    $salignline = $qalignline + 2;
	}
	if ($search eq  "BLASTP") {
	    $lengthline = $intorecord;
	    $scoreline = $lengthline + 2;
	    $idline = $scoreline + 1;
	    $qalignline = $idline + 2;
	    $salignline = $qalignline + 2;
	}
	
	    $hitgenelength[$count] = $words[2];
	    $hitgenelength[$count] =~ s/\,//g;
    }
    
	

    if ( $intorecord == $scoreline) {
	$score[$count] = $words[2];
	$expect[$count] = $words[7];
	$expect[$count] =~ s/\,//;
    }

    if ($intorecord == $idline) {
	$t = $words[2];
	$t =~ s/.*\///g;
	$hitlength[$count] = $t;
	$t = $words[3];
	$t =~ s/[\(\)\%\,]//g;
	$id[$count] = $t;
	$frame[$count] = $words[10];
    }

    if ($intorecord == $qalignline) {
	$qstart[$count] = $words[1];
    }

    if ($intorecord == $salignline) {
	$sstart[$count] = $words[1];
    }

#scan for first line of annotation

    if (/^\>/) {
	$intorecord = 1;
	$count++;
	$accession[$count] = $words[0];
	$accession[$count] =~ s/^>//;
	shift @words;
	$description[$count] = join " ", @words;
    }	
    $intorecord++ if ($intorecord);
}

# print the suckers out
#ignore identical entries! 

for ($i = 1; $i <= $count; $i++) {
    if ($query ne $accession[$i]) {
	print "$query\t$querylength\t$search\t$db\t$accession[$i]\t",
	"$description[$i]\t$hitgenelength[$i]",
	"\t$frame[$i]\t$score[$i]\t$expect[$i]\t$id[$i]\t$hitlength[$i]\t$qstart[$i]\t$sstart[$i]\n";
    }
}




More information about the Bio-soft mailing list