TFDtoGCG - fix for TFD 7.0

David Mathog mathog at seqvax.caltech.edu
Thu Jun 24 16:31:00 EST 1993


David Ghosh recently sent out a warning that TFD 7.0's sites.dat file would
have a slightly different format.  This change will break the previous
version of TFDtoGCG that was posted to this newsgroup a few months ago.

The new version of TFDtoGCG appended to the end of this message should work
with TFD 7.0.  (But not with TFD <7.0 ).  I say "should" because the data 
set hasn't been released yet, and so the modified program has not been
tested.  No problems are expected since the changes were to the record
structure only. 

As usual, no warranties, etc.

David Mathog
mathog at seqvax.bio.caltech.edu
Manager, sequence analysis facility, biology division, Caltech


--8<----8<----8<----8<----8<- CUT HERE  ---8<----8<----8<----8<----8<--
C	TFDTOGCG.FOR
C	9-APR-1993 David Mathog, Division of Biology, Caltech
C	mathog at seqvax.bio.caltech.edu
C
C	This little program takes one of David Ghosh's site.dat
C	files and reformats it for GCG usage.
C
C	Put the output file into the GCG system as: GENMOREDATA:TFSITES.DAT
C
C	FTP to NCBI.NLM.NIH.GOV and look in repository/TFD/tfd.ascii
C	for the site.dat file.
C
C	This works with GCG 7.2 and VMS 5.5-2 and *may* work on other 
C	systems (not tested).
C
C	Instructions for building an executable:
C
C	   $ for/nolis  tfdtogcg
C	   $ link/nomap tfdtogcg
C
C	Example session (program's prompts not shown):
C
C	   $ run tfdtogcg
C          site.dat
C	   temporary.out
C	   9-APR-1993, Converted TFD X.Y to GCG format
C
C	   $ copy temporary.out genmoredata:tfsites.dat
C	   $ set file/prot=w:re genmoredata:tfsites.dat
C	   $ delete temporary.out.
C
C	There is practically NO error checking, so watch out!
C
C	Revision 1.0 24-JUN 1993, David Mathog
C	  TFD 7.0 changes in sites file are:
C	    GENOME*1       removed
C	    TRN_UNIT*20    resized to 30
C	  Modified ghosh_record to reflect these changes.
C
	implicit none
	character*2048 inline,outline,infile,outfile
	integer*4 inlen,istat
	integer*4 recsize
	logical   ok
c
c  Ghosh lays out site records like this in TFD 7.0
c
	structure /ghosh_record/
	  character SITE_ID*6
	  character FAC_NAME*25
	  character SEQ_NAME*30
	  character NA_SEQ*45
	  character SEQ_TYPE*1
	  character SYSTEM*10
	  character TRN_UNIT*30
	  character COMMENTS*80
	  character MAIN_REF*60
	  character FAC_SOURCE*16
	  character LOCAT_REF*20
	  character LOCATION*20
	  character METHOD*11
	  character N_PROB*8
	  character REF_N*8
	  character STRAND*1
	  character BINDING*1
	end structure
c
c  GCG lays out TFSITE.DAT records like this
c
	structure /GCG_RECORD/
	  character SEQ_NAME*31
	  character SPACER1*2
	  character NA_SEQ*45
	  character SPACER2*5
	  character FAC_NAME*25
	  character SPACER3*1
	  character MAIN_REF*60
	end structure
c
	record /ghosh_record/ ghosh
	record /gcg_record/   gcg
c
c	Init the spacers for the GCG record
c
	gcg.spacer1 = '0 '
	gcg.spacer2 = ' 0 ! '
	gcg.spacer3 = ' '
c
	write(6,*)'TFDtoGCG'
	write(6,*)'This program converts one of David Ghosh''s site'
	write(6,*)'  files to GCG''s format'
c
	write(6,*)'Input the name of the file to process'
	read(5,'(q,a)')inlen,infile(1:inlen)
c
	open(unit=10,file=infile(1:inlen)
	1 ,form='UNFORMATTED',organization='SEQUENTIAL',status='OLD'
	1 ,recordtype='VARIABLE', READONLY)
c
	write(6,*)'Input the name of the output file'
	read(5,'(q,a)')inlen,outfile(1:inlen)
c
	open(unit=11,file=outfile(1:inlen)
	1 ,form='UNFORMATTED',status='NEW',organization='SEQUENTIAL'
	2 ,recordtype='STREAM_LF',recl = 255)
c
c	get the comments
c
	write(6,*)'Enter as many lines of comments as you would like'
	write(6,*)'  End each line with a <return>'
	write(6,*)'  End the last line with <return><return>'
	ok = .true.
	do while (ok)
	   read(5,1000)inlen,inline(1:inlen)
	   if(inlen.eq.0)then
	       ok = .false.
	   else
	       write(11)' '//inline(1:inlen)
	   end if
	end do
	write(6,*)'Working ...'
c
c	write a title line, this one is *easy*
c
	GCG.SEQ_NAME = 'NAME'
	GCG.FAC_NAME = 'FACTOR'
	GCG.NA_SEQ   = 'SEQUENCE'
	GCG.MAIN_REF = 'REFERENCE'
	write(11)gcg
c
c	Now write the divider
c
	write(11)'..'	
c
	istat=0
	do while(istat.ge.0)
	    read(10,iostat=istat)ghosh
1000	    format(q,a)
	    if(istat.ge.0)then
	       GCG.SEQ_NAME = GHOSH.SEQ_NAME//' '
	       GCG.FAC_NAME = GHOSH.FAC_NAME
	       GCG.NA_SEQ   = GHOSH.NA_SEQ
	       GCG.MAIN_REF = GHOSH.MAIN_REF
	       call fixseqname(GCG.SEQ_NAME,GHOSH.N_PROB)
	       write(11)gcg
	    end if
	end do
1100	format(a)
	close(unit=10)
	close(unit=11)
	stop 'TFDtoGCG: normal completion' 
	end

	subroutine fixseqname(NAME,N_PROB)
	character name*(*)
	character N_prob*(*)
	integer i,last,inlen,nlen
	real limit,value
	parameter (limit = 5.0e-4)
c
c	Do the length this way so that it will still work if the
c	length of NAME, N_PROB change.
c
	inlen=len(NAME)
	nlen =len(N_PROB)
c
c	first put in a ";", if needed to indicate a frequent motif
c
	read(N_prob,1000)value
1000	format(F<NLEN>.2)
	if(value.gt.limit)NAME = ';'//NAME(1:inlen-1)
c
c	now convert any internal spaces to underscores
c	if it doesn't find *any* nonspaces, the name becomes "UNKNOWN"
c	This is done in two passes.
c
	do i = 1, inlen
	  if(name(i:i).eq.' ')name(i:i)='_'
	end do
c
	last=inlen+1
	i = inlen
	do while(last.eq.inlen+1 .and. i.gt.0)
	  if(name(i:i).ne.'_')then
	     last = i
	  else
	     name(i:i) = ' '
	     i = i-1
	  end if
	end do
c
	if(last.eq.inlen+1)name='UNKNOWN'
c
	return
	end




More information about the Bio-soft mailing list