translation source code

francis at NCBI.NLM.NIH.GOV francis at NCBI.NLM.NIH.GOV
Mon Aug 12 07:57:29 EST 1996


A problem with this code is that it does not take into account the
various genetic codes which exist.


      The Standard Code 
      The Vertebrate Mitochondrial Code 
      The Yeast Mitochondrial Code 
      The Mold, Protozoan, and Coelenterate Mitochondrial Code and the
          Mycoplasma/Spiroplasma Code 
      The Invertebrate Mitochondrial Code 
      The Ciliate, Dasycladacean and Hexamita Nuclear Code 
      The Echinoderm Mitochondrial Code 
      The Euplotid Nuclear Code 
      The Bacterial "Code" 
      The Alternative Yeast Nuclear Code 
      The Ascidian Mitochondrial Code 
      The Flatworm Mitochondrial Code 
      The Blepharisma Nuclear Code 


To see a description of these codes, have a look at:

http://www3.ncbi.nlm.nih.gov/htbin-post/Taxonomy/wprintgc?mode=c

We have something in our toolkit for translations (using the various
genetic codes as an argument), but I will need to look into it a
little, and tweese out what you may want.  On the other hand, the code
below may be all you need as far as translations are concerned, if you
don't care or worry about alternative codes.

cheers,

f.

--
| B.F. Francis Ouellette  
| GenBank
|
| francis at ncbi.nlm.nih.gov   



> From roach at u.washington.edu Mon Aug 12 05:04:21 1996
> To: bio-soft at net.bio.net
> From: roach at u.washington.edu (Jared Roach)
> Subject: Re: translation source code
> Date: 11 Aug 1996 23:20:04 GMT
> NNTP-Posting-Host: saul3.u.washington.edu
> NNTP-Posting-User: roach
> 
> 	Well, here's C translation source code I wrote as an XFCN for HyperCard
> for the Macintosh, so you would be able to dispense with most of it for 
> whatever you wanted it for, most likely.  I include the whole code just to 
> put it into context.  Please don't hesitate to ask for clarification.  
> Note that my elegance and experience in programming C probably falls 
> somewhere in the middle of the lowest percentile, so be warned!
> 	The XFCN parts of the code are shareware by Mark Hanrek.
> 
> //  DNA2Prot XFCN by Jared Roach  © August 1996
> //  This program translates DNA sequences
> //  The XFCN shell was downloaded form the Web and is © 1992 Mark Hanrek
> 
> 
> //***************************************************************************************
> //	 Hanrek XCMD Shell 1.2
> //
> //   ©1992 Mark Hanrek & The Information Workshop.  All Rights Reserved.
> //
> //	 Note: 	Do all your programming between the bold black lines below. 
> //			Put additional functions you create into the "Support Functions" 
> //			section below that.  Put function prototypes into ExampleXFCN.h.
> //
> 
> /******************************************************************* Includes ********/
> 
> #include "SetUpA4.h"
> #include "HyperXCmd.h"
> #include "SuperCard.h"
> #include "StandardFunctions.h"
> #include "string.h"
> 
> /******************************************************************* Main Entry ******/
> 
> pascal void main( XCmdPtr paramPtr )	// No need to ever change any of this...
> {
> 	RememberA0();
> 	SetUpA4();
> 	InitializeReturnInfo( paramPtr );
> 	ExternalHandler( paramPtr );
> 	RestoreA4();
> }	
> 
> 
> 
> 
> //¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥
> //
> //	DNA2ProtXFCN HyperTalk Syntax:
> //  put 1 into frame (should be 1,2, or 3)
> //  put 1 into flag
> //  
> //  put cd fld "Sequence" into seq
> //  
> //  put DNA2ProtXFCN( seq, frame, flag ) into cd fld "output"
> //  
> //
> 
> #include "DNA2ProtXFCN.h"
> 
> void ExternalHandler( XCmdPtr paramPtr )
> {
> 	short	i=0;
> 	short	k=0;
> 	short	q=0;
> 	short	p=0;
> 	short	nFrame;
> 	short	nProtLength;
> 	
> 	
> 	char 	pBase;
> 	char 	pResidue;
> 	char 	pSeq[3000];
> 	char 	pProtSeq[1000];
> 	
> 	
>  	size_t 	nSeqLength;
>  	short 	nFlagVariable = 0;
> 	
> 	
> 	ParamToCString( 0, pSeq );			// the first parameter is the sequence
> 	ParamToShort( 1, &nFrame ); 			
> 	ParamToShort( 2, &nFlagVariable ); 			
> 	
> 	nSeqLength=strlen(pSeq); 
> 	nProtLength = (nSeqLength-nFrame+1)/3;
> 	
> 	
> 	
> 	if ( nSeqLength < 1 )
> 		return;
> 	
> 	k = nFrame-1;		
> 	q = nProtLength*3+k;
> 		
>   	for (i=k; i < q; ++i)	{
> 		pBase = pSeq[i];
>   		if ( pBase == 'A' ) {
>   			pBase = pSeq[++i];
>   			if ( pBase == 'A' ) {
>   				pBase = pSeq[++i];
>   				if ( pBase == 'A' ) {
>   					pResidue = 'K';
>   				}
> 				else if ( pBase == 'C' ) {
>   					pResidue = 'N';
>   				}
> 				else if ( pBase == 'G' ) {
>   					pResidue = 'K';  		
>   				}
> 				else if ( pBase == 'T' ) {
>   					pResidue = 'N';  		
>   				}
> 				else  {									// case of N or other character 3rd base
> 					pResidue = 'X';
> 				}
>   			}
> 			else if ( pBase == 'C' ) {
>   				++i;
>   				pResidue = 'T';							//all codons starting with AC are threonine
>   			}
> 			else if ( pBase == 'G' ) {
>   				pBase = pSeq[++i];
>   				if ( pBase == 'A' ) {
>   					pResidue = 'R';
>   				}
> 				else if ( pBase == 'C' ) {
>   					pResidue = 'S';
>   				}
> 				else if ( pBase == 'G' ) {
>   					pResidue = 'R';  		
>   				}
> 				else if ( pBase == 'T' ) {
>   					pResidue = 'S';  		
>   				}
> 				else  {									// case of N or other character 3rd base
> 					pResidue = 'X';
> 				}  		
>   			}
> 			else if ( pBase == 'T' ) {
>   				pBase = pSeq[++i];
>   				if ( pBase == 'A' ) {
>   					pResidue = 'I';
>   				}
> 				else if ( pBase == 'C' ) {
>   					pResidue = 'I';
>   				}
> 				else if ( pBase == 'G' ) {
>   					pResidue = 'M';  		
>   				}
> 				else if ( pBase == 'T' ) {
>   					pResidue = 'I';  		
>   				}
> 				else  {									// case of N or other character 3rd base
> 					pResidue = 'X';
> 				}  		
>   			}
> 			else  {									// case of N or other character 2nd base
> 				++i;
> 				pResidue = 'X';
> 			}
>   		}
> 		else if ( pBase == 'C' ) {
>   			pBase = pSeq[++i];
>   			if ( pBase == 'A' ) {
>   				pBase = pSeq[++i];
>   				if ( pBase == 'A' ) {
>   					pResidue = 'Q';
>   				}
> 				else if ( pBase == 'C' ) {
>   					pResidue = 'H';
>   				}
> 				else if ( pBase == 'G' ) {
>   					pResidue = 'Q';  		
>   				}
> 				else if ( pBase == 'T' ) {
>   					pResidue = 'H';  		
>   				}
> 				else  {									// case of N or other character 3rd base
> 					pResidue = 'X';
> 				}
>   			}
> 			else if ( pBase == 'C' ) {
>   				++i;
>   				pResidue = 'P';							//all codons starting with CC are proline
>   			}
> 			else if ( pBase == 'G' ) {
>   				++i;
>   				pResidue = 'R';							//all codons starting with CG are arginine
>   			}
> 			else if ( pBase == 'T' ) {
>   				++i;
>   				pResidue = 'L';							//all codons starting with CT are leucine
>   			}
> 			else  {									// case of N or other character 2nd base
> 				++i;
> 				pResidue = 'X';
> 			}  		
>   		}
> 		else if ( pBase == 'G' ) {
>   			pBase = pSeq[++i];
>   			if ( pBase == 'A' ) {
>   				pBase = pSeq[++i];
>   				if ( pBase == 'A' ) {
>   					pResidue = 'E';
>   				}
> 				else if ( pBase == 'C' ) {
>   					pResidue = 'D';
>   				}
> 				else if ( pBase == 'G' ) {
>   					pResidue = 'E';  		
>   				}
> 				else if ( pBase == 'T' ) {
>   					pResidue = 'D';  		
>   				}
> 				else  {									// case of N or other character 3rd base
> 					pResidue = 'X';
> 				}
>   			}
> 			else if ( pBase == 'C' ) {
>   				++i;
>   				pResidue = 'A';							//all codons starting with GC are alanine
>   			}
> 			else if ( pBase == 'G' ) {
>   				++i;
>   				pResidue = 'G';							//all codons starting with GG are glycine
>   			}
> 			else if ( pBase == 'T' ) {
>   				++i;
>   				pResidue = 'V';							//all codons starting with GT are valine
>   			}
> 			else  {									// case of N or other character 2nd base
> 				++i;
> 				pResidue = 'X';
> 			}  		
>   		}
> 		else if ( pBase == 'T' ) {
>   			pBase = pSeq[++i];
>   			if ( pBase == 'A' ) {
>   				pBase = pSeq[++i];
>   				if ( pBase == 'A' ) {
>   					pResidue = ' ';
>   				}
> 				else if ( pBase == 'C' ) {
>   					pResidue = 'Y';
>   				}
> 				else if ( pBase == 'G' ) {
>   					pResidue = ' ';  		
>   				}
> 				else if ( pBase == 'T' ) {
>   					pResidue = 'Y';  		
>   				}
> 				else  {									// case of N or other character 3rd base
> 					pResidue = 'Y';						//  I call uncertainty between tyrosine and stop: tyrosine
> 				}
>   			}
> 			else if ( pBase == 'C' ) {
>   				++i;
>   				pResidue = 'S';							//all codons starting with TC are serine
>   			}
> 			else if ( pBase == 'G' ) {
>   				pBase = pSeq[++i];
>   				if ( pBase == 'A' ) {
>   					pResidue = ' ';
>   				}
> 				else if ( pBase == 'C' ) {
>   					pResidue = 'C';
>   				}
> 				else if ( pBase == 'G' ) {
>   					pResidue = 'W';  		
>   				}
> 				else if ( pBase == 'T' ) {
>   					pResidue = 'C';  		
>   				}
> 				else  {									// case of N or other character 3rd base
> 					pResidue = 'X';
> 				}  		
>   			}
> 			else if ( pBase == 'T' ) {
>   				pBase = pSeq[++i];
>   				if ( pBase == 'A' ) {
>   					pResidue = 'L';
>   				}
> 				else if ( pBase == 'C' ) {
>   					pResidue = 'F';
>   				}
> 				else if ( pBase == 'G' ) {
>   					pResidue = 'L';  		
>   				}
> 				else if ( pBase == 'T' ) {
>   					pResidue = 'F';  		
>   				}
> 				else  {									// case of N or other character 3rd base
> 					pResidue = 'X';
> 				}  		
>   			}
> 			else  {									// case of N or other character 2nd base
> 				++i;
> 				pResidue = 'X';
> 			}  		
>   		}
> 		else  {									// case of N or other character 1st base
> 			++i;
> 			++i;
> 			pResidue = 'X';
> 		}
> 		pProtSeq[p++]=pResidue;
> 	}
> 	
>   	
>   	
>   	
> 
> 
> 	//Return Result
> 	for (i=0; i < p; ++i)
> 		AppendReturnInfo( kResult, "|b", pProtSeq[i] );
> 	AppendReturnInfo( kResult, "\r");
> 	
> 	
> 	
> 	if (nFlagVariable == 1)
> 		AppendReturnInfo( kResult, "\r DNA2Prot XFCN copyright Jared Roach 8/8/96 v0.01");
> }	
> 
> 
> 
> 
> 
> 
> 




More information about the Bio-soft mailing list