swiss2ace

Danielle et jean Thierry-Mieg mieg at kaa.crbm.cnrs-mop.fr
Sun Jun 28 05:47:35 EST 1998


the folowing awk scripts do produce ace files out of swissprot records
they may not be complete

they can be found in the acedb distrib as wscripts/swiss.awk wscripts/swissbig.awk

===========

# File wscripts/swiss.awk, postprocessing commands used by blast_search script
# @(#)swiss.awk	1.1 3/28/97

BEGIN { state = 0 ;}
/^AC/ {seq = "SP:"$2 ; gsub(/;/,"",seq) ; printf("\nProtein %s\nDatabase SWISSPROT %s\n",seq,seq); next;}
/^DE/ {xx = substr($0,6) ; printf("Description \"%s\n", xx) ; next ;}
/^RX/ {xx = substr($0,6) ; printf("Reference \"%s\n", xx) ; next ;}
/^RL/ {xx = substr($0,6) ; printf("Reference \"%s\n", xx) ; next ;}
/^GN/ {xx = substr($0,6) ;  gsub(/\./,"",xx) ; gsub (/ OR /,"\nLocus \"",xx) ; printf("Locus \"%s\n", xx) ; next ;}
/^\/\// {state = 0 ; printf("\n") ; next; }

/^SQ/ {printf ("Peptide %s\n\n",seq) ; state = 2 ; next ;}
{ if (state == 2) { printf("Peptide %s\n", seq) ; state = 3 ;  }
  if (state == 3) 
    { pp = $0 ; gsub(/[0-9]/,"",pp) ; gsub(/ /,"",pp) ;
      printf("%s\n", pp) ;
    }
}

===========

# File wscripts/swissbig.awk, postprocessing commands used by blast_search script
# @(#)swiss.awk	1.1 3/28/97

BEGIN { state = 0 ; ce = 0 ; seq="" ; ac = "" ;}
/^ID/ {seq = "SW:"$2 ; gsub(/;/,"",seq) ; next;}
/^AC/ {ac = $2 ; gsub(/;/,"",ac) ; next;}
/^DE/ {de = substr ($0,6) ;  next;}
/^RA/ {ra = substr ($0,6) ;  gsub(/\./,"",ra) ; gsub(/, /,"\nAuthor \"",ra) gsub(/;/,"",ra) ;  next;}
/^RX   MEDLINE/ {rx = substr ($0,15) ; gsub(/\./,"",rx) ;  next;}
/^\/\// {
if (ce)
  {
    printf("\nProtein %s\n",seq);
    printf("Database SWISSPROT %s\n",ac);
    if (de != "") printf("Title \"%s\n",de);
    if (ra != "") printf("Author \"%s\n",ra);
    if (rx != "") printf("Medline_acc %s\n",rx);
    if (state>1)  printf("\n%s\n", aa) ;
  }
state = 0 ; seq="" ; ac = "" ; ra = "" ; rx = "" ;
ce = 0 ;
next; }

/^SQ/ {state = 2 ; next ;}
{ if (state == 2) { aa="Peptide " seq  "\n" ; state = 3 ;  }
  if (state == 3) 
    { pp = $0 ; gsub(/[0-9]/,"",pp) ; gsub(/ /,"",pp) ;
      aa = aa  pp  "\n" ;
    }
}

/^OS   CAENORHABDITIS ELEGANS/ { ce=1 ;}





More information about the Acedb mailing list