Read PDB ---> find Phi, Psi, etc.

Richard Burd Macdonald rbm8p at darwin.clas.Virginia.EDU
Tue Sep 21 15:38:43 EST 1993



I've now sent corrected sifi.c code to about twenty people.  I'm
putting it here in the hope that someone who sees it will play with
it to make it into yet another one of those too-general-to-use
packages.  Obviously there are plenty of folks out there who could
use a fairly generic set of routines to manipulate pdb files.  This
program should provide enough of an example for any who thusfar have
been afraid to dip their fingers into C programming.

My code is not robust, but I'd be thrilled if anyone who adds to 
it or improves it would pass the code along. 

Thanks!

Rick

---> Since sending this out the first time, I've found that
it fails under some compilers.  There is a  
commented line in the read_atom_record() subroutine which
should help anyone who wants to fix it.

--------------------------------------------------------------------
The short C program that follows calculates phi-psi and alpha-carbon
bend and torsion angles for Brookhaven protein data files.  The
people who manage the data bank also have FORTRAN routines to do
this.

I have run it on SGI 4D and IBM RS6000's, but you may have to fiddle
with your local math library routines.

To Build: cc sifi.c -o sifi -lm  

Usage: sifi <pdbfile>

This routine is not particularly robust, and will fail miserably
if fed anything remotely unfamiliar.

/*********************   CUT HERE      ********************************/

/***********************************************************************
This program was written by 

Rick MacDonald
was: University of Virginia Biophysics
now: MIT Health Sciences and Technology
rbm8p at virginia.edu

This program will print for each amino acid residue in a pdb file: 
    psi
    phi
    alpha carbon bend
    alpha carbon dihedral

To Build: cc sifi.c -o sifi -lm  

Usage: sifi <pdbfile>
***********************************************************************/

#include <stdio.h>
#include <math.h>

#define TRUE	1
#define FALSE	0

#define LINELEN 100

#define MAXATOMS 10000
#define MAXRES   1000

typedef struct {
	int	num;
	char	a_type[6];
	char	r_type[4];
	char	subunit;
	int	resnum;
	float	p[3];	    /* position x,y,z */
} ATOM;

typedef struct {
	int	nitrog;
	int	alphac;
	int	carbon;
	int	oxygen;
	char	*r_type;    /* 3 letter code */
	char	res_type;   /* 1 letter code */
	char	subunit;
	float	phi;
	float	psi;
	float	cab;
	float	cad;
	int 	resnum;
} AMINO;

ATOM	atom[MAXATOMS];
AMINO	res[MAXRES];

int 	err;


main(int argc, char **argv)
{
char	*strstr(), *strncpy(), *strcat();
char	*next_str(char *string);
float	angle(float *v1, float *v2);
float	dot(float *v1, float *v2);
float	dihedral(float *p1, float *p2, float *p3, float *p4);
void	cross(float *v1, float *v2, float *p);
float	len(float *v1);
int 	cmpstr(char *s1, char *s2);
void	read_atom_record(char *rec, ATOM *atom1);
char	single_letter_code(char *string); 

    register i;

    FILE    *Fpdb;

    char    tname[80], fname[80], line[LINELEN], *temp, WAS, WUZ, WLB;

    float  v1[3], v2[3];
    int	    ndx, n_atoms, n_res, rno, is, was;
    
/*-----------------------------------------------------------------------
Open file specified by command line, if it exists 
-----------------------------------------------------------------------*/
    if(argc<2) err = printf("Usage: sifi <pdbfile>\n");

    if(argc!=2) exit(1);

    temp = strncpy(fname,argv[argc-1],sizeof(tname));
    if ((Fpdb = fopen(fname,"r"))==NULL)
    	err = printf("Could not open file %s\n",fname);

/*-----------------------------------------------------------------------
Read through the PDB format file to extract ATOM records
-----------------------------------------------------------------------*/

    ndx = 0;
    while (fgets(line,LINELEN,Fpdb)!=0) {
    	if (strstr(line,"ATOM  ")!=NULL) {
    	    read_atom_record(line,&atom[ndx]);
    	    ndx++;
    	}
    }
    n_atoms = ndx;

/*-----------------------------------------------------------------------
Now we've got the ATOMs. Re-order them into residues. 
-----------------------------------------------------------------------*/
    rno = 0;
    was = atom[0].resnum;
    for (ndx = 0; ndx < n_atoms; ndx++) {
	is = atom[ndx].resnum;
    	if (is != was) rno++;
	if (cmpstr(atom[ndx].a_type,"CA")!=NULL) {
		res[rno].resnum = atom[ndx].resnum;
		res[rno].alphac = ndx;
		res[rno].r_type = atom[ndx].r_type;
		res[rno].subunit= atom[ndx].subunit;
	}
	else if (cmpstr(atom[ndx].a_type,"N")!=NULL)
		res[rno].nitrog = ndx;
	else if (cmpstr(atom[ndx].a_type,"C")!=NULL)
		res[rno].carbon = ndx;
	else if (cmpstr(atom[ndx].a_type,"O")!=NULL)
		res[rno].oxygen = ndx;
	was = is;
    }
    
    n_res = rno; 

/*-----------------------------------------------------------------------
Find for each residue:
    	single letter residue code
	phi angle
	psi angle
	cab angle = bend angle between adjacent alpha carbons 
	cad angle = is the dihedral() or torsion angle
-----------------------------------------------------------------------*/

    res[0].res_type = single_letter_code(res[0].r_type);
    res[0].psi	    = -(dihedral(atom[res[1].nitrog].p,
				    	 atom[res[1].alphac].p,
				    	 atom[res[1].carbon].p,
				    	 atom[res[2].nitrog].p));
    res[1].res_type = single_letter_code(res[1].r_type);
    res[1].phi	    = -(dihedral(atom[res[0].carbon].p,
				    	 atom[res[1].nitrog].p,
				    	 atom[res[1].alphac].p,
				    	 atom[res[1].carbon].p));
    res[1].psi	    = -(dihedral(atom[res[1].nitrog].p,
				    	 atom[res[1].alphac].p,
				    	 atom[res[1].carbon].p,
				    	 atom[res[2].nitrog].p));

    for (rno = 2; rno <= n_res; rno++) {
	res[rno].res_type   = single_letter_code(res[rno].r_type);
	
	res[rno].phi	    = -(dihedral(atom[res[rno-1].carbon].p,
				    	 atom[res[rno  ].nitrog].p,
				    	 atom[res[rno  ].alphac].p,
				    	 atom[res[rno  ].carbon].p));

	res[rno].psi	    = -(dihedral(atom[res[rno  ].nitrog].p,
				    	 atom[res[rno  ].alphac].p,
				    	 atom[res[rno  ].carbon].p,
				    	 atom[res[rno+1].nitrog].p));
					
    	res[rno].cad	    = dihedral( atom[res[rno-2].alphac].p,
    	    	    	    	    	atom[res[rno-1].alphac].p,
				    	atom[res[rno  ].alphac].p,
				    	atom[res[rno+1].alphac].p);
    	for (i=0;i<=2;i++) {
    	    v1[i] = atom[res[rno-1].alphac].p[i] - atom[res[rno].alphac].p[i];
    	    v2[i] = atom[res[rno+1].alphac].p[i] - atom[res[rno].alphac].p[i];
    	}    
    	res[rno].cab	    = angle(v1, v2);
    }
			

/*-----------------------------------------------------------------------
Print this stuff out as a table.
-----------------------------------------------------------------------*/
    	printf("%s\n", fname);
    	printf("   #     resid   phi    psi     Cab      Cat\n");

    	WAS = 'x';
	WUZ = 'x';
	WLB = res[0].subunit;
	res[n_res+1].subunit = 'x';
    	for (rno = 0; rno <= n_res; rno++) {
	
	    if (res[rno].subunit != WAS) {  	/* This gibberish flags the meaningless      */
		res[rno].phi = 999.;	    	/* values at the ends of each subunit as 999 */
	    }
	    if (res[rno].subunit != WUZ) {
		res[rno].cab = 999.;
		res[rno].cad = 999.;
	    }
	    WUZ = WAS;
	    WAS = res[rno].subunit;
	    WLB = res[rno+1].subunit;
	    if (res[rno].subunit != WLB) {
		res[rno].psi = 999.;
		res[rno].cab = 999.;
		res[rno].cad = 999.;
	    }
	    
	
	    printf("%4d %c   %3s %c ", 
	    	res[rno].resnum, res[rno].subunit, res[rno].r_type, res[rno].res_type);
	    printf("%5.0f  ",  res[rno].phi);
	    printf("%5.0f   ", res[rno].psi);
	    printf("%5.0f  ",  res[rno].cab);
	    printf("%5.0f",    res[rno].cad);

	    printf("\n");
    	}

} /* end main */

/***********************************************************************

***********************************************************************/
void read_atom_record(char *rec, ATOM *atom1) 
{
	char	*next_str(char *string);
	char	*ptr;

	float	fl;

	ptr = rec;
	
	err = sscanf( (ptr = next_str(ptr)), "%d",  &atom1->num    );
	err = sscanf( (ptr = next_str(ptr)), "%3s",  atom1->a_type );
	err = sscanf( (ptr = next_str(ptr)), "%3s",   atom1->r_type );
	ptr = ptr + 4;
	atom1->subunit = *ptr;
	err = sscanf( (ptr = next_str(ptr)), "%d",  &atom1->resnum );


/*  Remove this comment to print atom records as read from pdb 
printf("\n%5d %4s %s %c %d ", atom1->num, atom1->a_type, atom1->r_type, atom1->subunit, atom1->resnum);
*/


/* This is a silly patch --- but I can't make %Lf read a float! */
	err = sscanf( (ptr = next_str(ptr)), "%f", &fl   );
		atom1->p[0] = fl;
	err = sscanf( (ptr = next_str(ptr)), "%f", &fl   );
		atom1->p[1] = fl;
	err = sscanf( (ptr = next_str(ptr)), "%f", &fl   );
		atom1->p[2] = fl;

    	return;
}

/***********************************************************************

***********************************************************************/
char single_letter_code(char *string) 
{
    	     if (cmpstr(string,"CYS")!=NULL) return 'C';
	else if (cmpstr(string,"HIS")!=NULL) return 'H';
	else if (cmpstr(string,"ILE")!=NULL) return 'I';
	else if (cmpstr(string,"MET")!=NULL) return 'M';
	else if (cmpstr(string,"SER")!=NULL) return 'S';
	else if (cmpstr(string,"VAL")!=NULL) return 'V';
	else if (cmpstr(string,"ALA")!=NULL) return 'A';
	else if (cmpstr(string,"GLY")!=NULL) return 'G';
	else if (cmpstr(string,"LEU")!=NULL) return 'L';
	else if (cmpstr(string,"PRO")!=NULL) return 'P';
	else if (cmpstr(string,"THR")!=NULL) return 'T';
	else if (cmpstr(string,"PHE")!=NULL) return 'F';
	else if (cmpstr(string,"ARG")!=NULL) return 'R';
	else if (cmpstr(string,"TYR")!=NULL) return 'Y';
	else if (cmpstr(string,"TRP")!=NULL) return 'W';
	else if (cmpstr(string,"ASN")!=NULL) return 'N';
	else if (cmpstr(string,"GLU")!=NULL) return 'E';
	else if (cmpstr(string,"GLN")!=NULL) return 'Q';
	else if (cmpstr(string,"LYS")!=NULL) return 'K';
	else if (cmpstr(string,"ASP")!=NULL) return 'D';
    return '*';
}

/***********************************************************************
 cmpstr does a simple compare of a string s1 with string s2 and returns a  
 pointer to the beginning of s1 if same or NULL if not same
***********************************************************************/
int cmpstr(char *s1, char *s2)
{
	loop:  if(*s1 != *s2) return(0);
	if(*s1 == '\0') return(1);
	s1++;   s2++;
	goto loop;
}

/***********************************************************************

***********************************************************************/
char *next_str(char *string)
{
	char	c;
	while (((c = *string) != ' ') && (c != '\t')) string++;
	while (((c = *string) == ' ') || (c == '\t')) string++;
	return string;
}

/***********************************************************************
Calculate the dihedral angle between two vectors given endpoints.
***********************************************************************/
float  dihedral(float *p1, float *p2, float *p3, float *p4)
{
void	cross(float *v1, float *v2, float *p);
float	dot(float *v1, float *v2);
float	angle(float *v1, float *v2);

	float	v1[3], v2[3], v3[3], p[3], q[3], r[3], theta;
	register i;

    	for (i = 0; (i < 3); i++) {
		v1[i] = (p2[i] - p1[i]);
		v2[i] = (p2[i] - p3[i]);
		v3[i] = (p4[i] - p3[i]);
	}
	cross(v1,v2,p);
	cross(v2,v3,q);
	theta = angle(p,q);
	cross(p,q,r);
	if(dot(v2,r) < 0) theta = -theta;
	return theta;
}

/***********************************************************************
The vector cross product is returned as the third parameter
***********************************************************************/
void  cross(float *v1, float *v2, float *p)
{
    	p[0] = (v1[1] * v2[2]) - (v1[2] * v2[1]);
    	p[1] = (v1[2] * v2[0]) - (v1[0] * v2[2]);
    	p[2] = (v1[0] * v2[1]) - (v1[1] * v2[0]);
}

/***********************************************************************
Return angle (degrees) between two vectors
***********************************************************************/
float  angle(float *v1, float *v2)
{
	
float	len(float *v1);
float	dot(float *v1, float *v2);
	float cos_val, sin_val, sinsq;

	cos_val = dot(v1,v2)/(len(v1)*len(v2));
	sinsq = 1.0 - cos_val*cos_val;
	if( sinsq < 0.0 ) sinsq = 0.0;
	sin_val = sqrt(sinsq);
	return(atan2(sin_val,cos_val) * 57.29578); /* pi radians */

}

/***********************************************************************
Return vector dot product
***********************************************************************/
float  dot(float *v1, float *v2)
{
	int	i;
	float	sum;

	sum = 0;
	for (i = 0; (i <= 2); i++)  {
		sum = sum + (v1[i] * v2[i]);
	}

	return sum;
}

/***********************************************************************
Return the length of a vector
***********************************************************************/
float len (float *v1)
{
/*  This can be faster sometimes.
	return ( fexp(0.5*flog ( (v1[0]*v1[0]) + (v1[1]*v1[1]) + (v1[2]*v1[2]) )) );
*/
	return ( sqrt ( (v1[0]*v1[0]) + (v1[1]*v1[1]) + (v1[2]*v1[2]) ) );

}



More information about the Proteins mailing list