Changing to Nexus Format

Thomas Sicheritz Thomas.Sicheritz at molbio.uu.se
Thu Dec 3 11:12:24 EST 1998


Richard Friedman <friedman at cuccfa.ccc.columbia.edu> writes:

> Everybody,
> 
> 	Does anyone have a program to convert an alignment in *msf,
> Clustal, Molphy, or Phylip format and the corresponding tree
> in Newick format into Nexus format? It turns out that the
> GCG Program Paupdisplay can root a tree using the mean of the longest
> Path method, but requires that the input file be in Nexus format.
> 
> Thanks and best wishes,
> Rich

here is a quick and dirty tclsh script for clustal files ...
(tested with clustalw1.7 alignments)

usage: aln2.tcl <file.aln> <format(nexus)>
optional: -paup ... add a string containing paup commands

e.g.
aln2.tcl test.aln nexus -paup "set criterion=distance;bootstrap;describetrees all /brlens=yes tcompress=yes plot=phylogram"  > test.nexus

mail me if there are problems ...

-thomas

CUT HERE =========== CUT HERE =========== CUT HERE =========== CUT HERE

#!/bin/sh
# The next line is executed by /bin/sh, but not tcl \
exec tclsh $0 ${1+"$@"}

# Created: Fri Jun 19 15:00:49 1998
# Last changed: Time-stamp: <98/11/17 15:47:11 thomas>
# Thomas.Sicheritz at molbio.uu.se, http://evolution.bmc.uu.se/~thomas
# File: aln2.tcl

package require opt
::tcl::OptProc checkopts {
    {file      "clustal alignment"}
    {format  -choice {nexus mase fasta} "alignment format"}
    {-min 0 "minimal length"}
    {-v "verbose"}
    {-short "(nexus) shorter version"}
    {-paup {} "paup commands"}
    {-pauplog {} "log paup session to file"}
} {
    foreach i [info locals] {
	if {$i=="args" || $i=="Args"} continue
	set ::data($i) [ set $i]
    }
}
if {[catch {eval checkopts $argv} msg]} { puts stderr $msg; exit}

proc prettySeq {seq} {
    regsub -all "............................................................" $seq \&\n newseq
    return $newseq
}
proc read_clustal {_file} {
    global entry store
    set tmpfile /tmp/tmp[pid]nex.aln
    if { [ regexp {\.gz$} $_file]} {
	exec gzip -d -c $_file > $tmpfile
	set file $tmpfile
    } else {
	set file $_file
    }
    set fid [ open $file r]
    gets $fid line
    gets $fid line
    gets $fid line
    while { [gets $fid line]>=0 } {
	if { [ regexp {^[a-zA-Z0-9]} $line]} {
	    set name [ string trim [ string range $line 0 15]]
	    set seq [ string range $line 16 end]
	    append entry($name) $seq
	}
    }
    foreach i [ array names entry] {
	regsub -all -- "-" $entry($i) {} tmp
	set store(length,$i) [ string length $tmp]
	set store(seq,$i) $tmp
    }
    if { [ regexp {\.gz$} $_file]} {
	file  delete $tmpfile
    }
}
proc to_nexus {} {
    global entry data
    if { !$data(short)} { append res  "\#NEXUS\nBEGIN taxa;\nDIMENSIONS ntax=[ llength [ array names entry]];\nTAXLABELS\n"} else {
	append res  "\#NEXUS\nBEGIN data;\nDIMENSIONS ntax=[ llength [ array names entry]] "
    }
    set name_list [ lsort [ array names entry]]
    if { !$data(short)} { foreach i $name_list  { append res  "$i\n" }}
    set first [ lindex $name_list 0]
    if { !$data(short)} { 
	append res  ";\nEND;\n"
	append res  "begin characters;\ndimensions "
    }
    append res " nchar=[ string length $entry($first)];\n"
    append res  "format datatype=protein  missing=-;\nMATRIX\n"
    foreach i $name_list {
	append res  "$i "
	if { ! $data(short) } { append res "\n"}
	regsub -all {[xX]} $entry($i) "-" seq
	append res  "$seq\n"
    }
    append res  "; \nend;\n"

    if {[ string length $data(paup)]!=0} {
	append res "begin paup;\nset nowarnreset autoclose;\n"
	if {[ string length $data(pauplog)]!=0} {
	    append res "log file=$data(pauplog);\n"
	}
 	foreach i [ split $data(paup) \;] {
 	    append res "$i;\n"
 	}
	append res "endblock;"
    }
    return $res
}
proc to_mase {} {
    global entry
    set res ";; saved by aln2 on [clock format [clock seconds] -format {%b %d %H:%M}]"
    foreach i  [ array names entry] {
	append res  "$i\n"
	append res ";no comment\n$i\n"
	set seq [ prettySeq $entry($i)]
	append res "$seq\n"
    }
    return $res
}
proc to_fasta {} {
    global entry
    foreach i  [ array names entry] {
	set seq [ prettySeq $entry($i)]
	append res  ">$i\n$seq\n"
    }
    return $res
}
proc delete_less_than {min} {
    global entry store
    foreach i [ array names entry] {
	if { $store(length,$i) <$min} {
	    unset entry($i)
	    unset store(length,$i)
	    unset store(seq,$i)
	}
    }
}
read_clustal $data(file)
if {$data(min)} { delete_less_than $data(min)}
puts [ to_$data(format)]


if {$data(v)} {
    foreach i [ array names entry] {
	puts stderr "$i $store(length,$i)"
    }
}

CUT HERE =========== CUT HERE =========== CUT HERE =========== CUT HERE








More information about the Mol-evol mailing list