a program to find signal sequences, version II

Robert C. Colgrove robin at cco.caltech.edu
Wed Jul 28 21:02:25 EST 1993


here's the filtered version of the signal sequence finder
which gives a nicer looking plot.
see previous post for details.
	-robin

********************************cut here********************************

/* A program to generate a signal sequence profile that makes output */
/* than can be piped to unix plot routines. compile with -lm flag */
/* algorithn by von Hiejne (see below), code by R. C. Colgrove */

#include <stdio.h>
#include <math.h>
#define aminotype "ACDEFGHIKLMNPQRSTVWY"
#define SAMPLE 450
#define seqfile argv[1]
float expect[] = {14.5,4.5,8.9,10.0,5.6,12.1,3.4,7.4,11.3,12.1,
                  2.7,7.1,7.4,6.3,7.6,11.4,9.7,11.1,1.8,5.6};
float freq[20][15] =
        {       16,13,14,15,20,18,18,17,25,15,47,6,80,18,6,
                3,6,9,7,9,14,6,8,5,6,19,3,9,8,3,
                0,0,0,0,0,0,0,0,5,3,0,5,0,10,11,
                0,0,0,1,0,0,0,0,3,7,0,7,0,13,14,
                13,9,11,11,6,7,18,13,4,5,0,13,0,6,4,
                4,4,3,6,3,13,3,2,19,34,5,7,39,10,7,
                0,0,0,0,0,1,1,0,5,0,0,6,0,4,2,
                15,15,8,6,11,5,4,8,5,1,10,5,0,8,7,
                0,0,0,1,0,0,1,0,0,4,0,2,0,11,9,
                71,68,72,79,78,45,64,49,10,23,8,20,1,8,4,
                0,3,7,4,1,6,2,2,0,0,0,1,0,1,2,
                0,1,0,1,1,0,0,0,3,3,0,10,0,4,7,
                2,0,2,0,0,4,1,8,20,14,0,1,3,0,22,
                0,0,0,1,0,6,1,0,10,8,0,18,3,19,10,
                2,0,0,0,0,1,0,0,7,4,0,15,0,12,9,
                9,3,8,6,13,10,15,16,26,11,23,17,20,15,10,
                2,10,5,4,5,13,7,7,12,6,17,8,6,3,10,
                20,25,15,18,13,15,11,27,0,12,32,3,0,8,17,
                4,3,3,1,1,2,5,3,1,3,0,9,0,2,0,
                0,1,4,0,0,1,3,1,1,2,0,5,0,1,7   };
main(argc,argv)
int argc;
char *argv[];
{
        int i,j,k,shift;
        float weight[20][15],score[1000],max;
        FILE *fp, *fopen();
        char c, seqbuff[15];
        if(argc==1)
        {
                printf("\n\t\tSIGNAL SEQUENCE FINDER\n");
                printf("\n\nFREQUENCY MATRIX\n\n");
                printf("\t");
                for(i=0;i<16;i++)
                {
                        if(i>3) printf(" ");
                        if(i>12) printf(" ");
                        if(i==13) i++;
                        printf("%d ",i-13);
                }
                printf("\tEXPECT\n\n");
                for(j=0;j<20;j++)
                {
                        printf("%c\t",aminotype[j]);
                        for(i=0;i<15;i++) printf(" %2.0f ",freq[j][i]);
                        printf("\t%4.1f\n",expect[j]);
                }
        }
        for(j=0;j<20;j++)
        {
                for(i=0;i<15;i++)
                {
                        if(freq[j][i]==0)
                        {
                                freq[j][i]=1;
                                if(i==10||i==12) freq[j][i]=expect[j]/SAMPLE;
                        }
                        weight[j][i]=log(freq[j][i]/expect[j]);
                }
        }
        if(argc==1)
        {
                printf("\n\nWEIGHT MATRIX (x 100)\n\n");
                printf("  ");
                for(i=0;i<16;i++)
                {
                        if(i>3) printf(" ");
                        if(i>12) printf(" ");
                        if(i==13) i++;
                        printf("  %d",i-13);
                }
                printf("\n\n");
                for(j=0;j<20;j++)
                {
                        printf("%c  ",aminotype[j]);
                        for(i=0;i<15;i++) printf("%4.0f ",100*(weight[j][i]));
                        printf("\n");
                }
           printf("\nmethod from: Gunnar von Heijne, NAR 14, p.4683, 1986\n");
        }
        if(argc!=1)
        {
                if((fp=fopen(seqfile,"r"))==NULL) printf("no such file\n");
                if(argc==2)
        printf("\n\nSCORE PROFILE of %s\n\ncleave\tscore\tresidue\n\n",seqfile);
                while((c=getc(fp))!=EOF&&c!=NULL)
                {
                        if(c==' '||c=='\t'||c=='\n') continue;
                        seqbuff[14]=c;
                        for(i=0;i<15;i++)
                         for(j=0;j<20;j++)
                          if(seqbuff[i]==aminotype[j]) score[k] += weight[j][i];
                        if(score[k]>max)
                        {
                                max=score[k];
                                shift=k;
                        }
                        if(k>1)
                        {
                          if(argc>2&&score[k]<0)
                                printf("%d\t0",k-1);
                          else
                          {
                                if(argc>2) printf("%4.2f\t0\n",k-1.25);
                                printf("%d\t%4.2f", k-1, score[k]);
                                if(argc>2) printf("\n%4.2f\t0",k-0.75);
                          }
                        }
                        if(argc==2) printf("\t%c",seqbuff[12]);
                        printf("\n");
                        for(i=0;i<14;i++) seqbuff[i]=seqbuff[i+1];
                        k++;
                }
          if(argc==2)
          {
           printf("\nmax score is %4.2f, cleaved after res %d\n\n",max,shift-1);
                    if(max<0.0) printf("improbable signal sequence\n");
           if(max>=0.0&&max<5.0) printf("possible but ambiguous signal\n");
           if(max>=5.0&&max<10.0) printf("probable signal sequence\n");
           if(max>=10.0) printf("highly probable signal sequence\n");
          }
          else
          {
           printf("%d\t0\t\" \"\n",k-1);
           printf("%d\t%4.2f\t\<res%d,score%4.2f\>\n",shift-1,max,shift-1,max);
           printf("%d\t2.5\tpossible\n",k);
           printf("%d\t7.5\tprobable\n",k);
           printf("%d\t12.5\tdefinite\n",k);
          }
        }
}



More information about the Bioforum mailing list