Richard Friedman <friedman at cuccfa.ccc.columbia.edu> writes:
> Everybody,
>> Does anyone have a program to convert an alignment in *msf,
> Clustal, Molphy, or Phylip format and the corresponding tree
> in Newick format into Nexus format? It turns out that the
> GCG Program Paupdisplay can root a tree using the mean of the longest
> Path method, but requires that the input file be in Nexus format.
>> Thanks and best wishes,
> Rich
here is a quick and dirty tclsh script for clustal files ...
(tested with clustalw1.7 alignments)
usage: aln2.tcl <file.aln> <format(nexus)>
optional: -paup ... add a string containing paup commands
e.g.
aln2.tcl test.aln nexus -paup "set criterion=distance;bootstrap;describetrees all /brlens=yes tcompress=yes plot=phylogram" > test.nexus
mail me if there are problems ...
-thomas
CUT HERE =========== CUT HERE =========== CUT HERE =========== CUT HERE
#!/bin/sh
# The next line is executed by /bin/sh, but not tcl \
exec tclsh $0 ${1+"$@"}
# Created: Fri Jun 19 15:00:49 1998
# Last changed: Time-stamp: <98/11/17 15:47:11 thomas>
# Thomas.Sicheritz at molbio.uu.se, http://evolution.bmc.uu.se/~thomas
# File: aln2.tcl
package require opt
::tcl::OptProc checkopts {
{file "clustal alignment"}
{format -choice {nexus mase fasta} "alignment format"}
{-min 0 "minimal length"}
{-v "verbose"}
{-short "(nexus) shorter version"}
{-paup {} "paup commands"}
{-pauplog {} "log paup session to file"}
} {
foreach i [info locals] {
if {$i=="args" || $i=="Args"} continue
set ::data($i) [ set $i]
}
}
if {[catch {eval checkopts $argv} msg]} { puts stderr $msg; exit}
proc prettySeq {seq} {
regsub -all "............................................................" $seq \&\n newseq
return $newseq
}
proc read_clustal {_file} {
global entry store
set tmpfile /tmp/tmp[pid]nex.aln
if { [ regexp {\.gz$} $_file]} {
exec gzip -d -c $_file > $tmpfile
set file $tmpfile
} else {
set file $_file
}
set fid [ open $file r]
gets $fid line
gets $fid line
gets $fid line
while { [gets $fid line]>=0 } {
if { [ regexp {^[a-zA-Z0-9]} $line]} {
set name [ string trim [ string range $line 0 15]]
set seq [ string range $line 16 end]
append entry($name) $seq
}
}
foreach i [ array names entry] {
regsub -all -- "-" $entry($i) {} tmp
set store(length,$i) [ string length $tmp]
set store(seq,$i) $tmp
}
if { [ regexp {\.gz$} $_file]} {
file delete $tmpfile
}
}
proc to_nexus {} {
global entry data
if { !$data(short)} { append res "\#NEXUS\nBEGIN taxa;\nDIMENSIONS ntax=[ llength [ array names entry]];\nTAXLABELS\n"} else {
append res "\#NEXUS\nBEGIN data;\nDIMENSIONS ntax=[ llength [ array names entry]] "
}
set name_list [ lsort [ array names entry]]
if { !$data(short)} { foreach i $name_list { append res "$i\n" }}
set first [ lindex $name_list 0]
if { !$data(short)} {
append res ";\nEND;\n"
append res "begin characters;\ndimensions "
}
append res " nchar=[ string length $entry($first)];\n"
append res "format datatype=protein missing=-;\nMATRIX\n"
foreach i $name_list {
append res "$i "
if { ! $data(short) } { append res "\n"}
regsub -all {[xX]} $entry($i) "-" seq
append res "$seq\n"
}
append res "; \nend;\n"
if {[ string length $data(paup)]!=0} {
append res "begin paup;\nset nowarnreset autoclose;\n"
if {[ string length $data(pauplog)]!=0} {
append res "log file=$data(pauplog);\n"
}
foreach i [ split $data(paup) \;] {
append res "$i;\n"
}
append res "endblock;"
}
return $res
}
proc to_mase {} {
global entry
set res ";; saved by aln2 on [clock format [clock seconds] -format {%b %d %H:%M}]"
foreach i [ array names entry] {
append res "$i\n"
append res ";no comment\n$i\n"
set seq [ prettySeq $entry($i)]
append res "$seq\n"
}
return $res
}
proc to_fasta {} {
global entry
foreach i [ array names entry] {
set seq [ prettySeq $entry($i)]
append res ">$i\n$seq\n"
}
return $res
}
proc delete_less_than {min} {
global entry store
foreach i [ array names entry] {
if { $store(length,$i) <$min} {
unset entry($i)
unset store(length,$i)
unset store(seq,$i)
}
}
}
read_clustal $data(file)
if {$data(min)} { delete_less_than $data(min)}
puts [ to_$data(format)]
if {$data(v)} {
foreach i [ array names entry] {
puts stderr "$i $store(length,$i)"
}
}
CUT HERE =========== CUT HERE =========== CUT HERE =========== CUT HERE