ENT2BIB.AWK

POSTMAST at GUNBRF.bitnet POSTMAST at GUNBRF.bitnet
Thu Feb 27 17:05:00 EST 1992


Distribution-File:
        "BIO-SOFT at genbank.bio.net"
        Schneider
        "marvin at molbio.cbs.umn.edu"
        "moberg at cis.ohio-state.edu"
        "prm at aber.ac.uk"

# ENT2BIB.AWK
# AWK script for conversion of MEDLIB table of contents files to BibTex
# Version 2.0
#  1-MAY-1991 written by John S. Garavelli
# 27-FEB-1992 revised by John S. Garavelli to remove gsub function,
#             improve parsing of SO line, and add tags
#
# This script should work for these journals
# CABIOS, EMBO J., J. Biol. Chem., Mol. Microbiol.
#
# Here are model entries
#AU Nakamura-H.  Katayanagi-K.  Morikawa-K.  Ikehara-M.
#TI Structural models of ribonuclease H domains in reverse
#   transcriptases from retroviruses.
#SO Nucleic-Acids-Res.  1991 Apr 25.  19(8).  P 1817-1824.
#
#AU Nussinov-R.
#TI Compositional variations in DNA sequences.
#SO Comput-Appl-Biosci.  1991 July.  7.  P 287-293.
#
# Report problems to POSTMASTER at GUNBRF.BITNET

{ if (substr($0, 0, 2) == "  ") {
    if (authorflag) theauthor = theauthor substr($0, 3, 80)
    else if (titleflag) thetitle = thetitle substr($0, 3, 80)
  }
}

{ if ($1 == "AU") {
    authorflag = 1
    titleflag = 0
    theauthor = substr($0, 4, 80)
  }
}

{ if ($1 == "TI") {
    authorflag = 0
    titleflag = 1
    thetitle = substr($0, 4, 80)
  }
}

{ if ($1 == "SO") {
    authorflag = 0
    titleflag = 0
    thejournal = $2
    while ((i = index(thejournal, "-"))) {
# Check for two journals without abbreviations
      if ((test = substr(thejournal, i-3, 3)) == "eic" || test == "ids") {
        thejournal = substr(thejournal, 1, i-1) " " substr(thejournal, i+1)
      }
      else thejournal = substr(thejournal, 1, i-1) ". " substr(thejournal, i+1)
    }
    theyear = $3
    themonth = $4
    if ((i=index(themonth, "."))) themonth = substr(themonth, 1, i-1)
    n = split($0, SO, ".")
# Correction for Mol. Microbiol.
    if (index(SO[2], "(")) thevolume = substr($5, 1, length($5)-1)
    else thevolume = substr(SO[3], 3)
    while (substr(thevolume, 1, 1) == " ") thevolume = substr(thevolume, 2)
    thenumber = ""
    if ((i = index(thevolume, "("))) {
      thenumber = substr(thevolume, i+1, length(thevolume)-i-1)
      thevolume = substr(thevolume, 1, i-1)
    }
    thepages = SO[n-1]
    thepages = substr(thepages, index(thepages, "P ")+2)
    while (substr(thepages, 1, 1) == " ") thepages = substr(thepages, 2)
    tag = ""
    n = split(theauthor, authors, ".")
    theauthor = ""
    for (i = 1; i < n; i++) {
# Trim spaces left by split
      while (substr(authors[i], 1, 1) == " ") authors[i] = substr(authors[i], 2)
# Spaces aren't supposed to appear in name, but they do
      j = index(authors[i], "-")
      lastname = substr(authors[i], 1, j-1)
      authors[i] = substr(authors[i], j+1)
      while ((j = index(authors[i], "-"))) {
        authors[i] = substr(authors[i], 1, j-1) " " substr(authors[i], j+1)
      }
      p = split(authors[i], parts)
      authors[i] = ""
      for (j = 1; j <= p; j++) {
# Here is the normal case of an initial
        if (length(parts[j]) == 1) authors[i] = authors[i] parts[j] ". "
# Here is the case of Jr, etc. (not always immediately after lastname!)
        else if (parts[j] == "JR" || parts[j] == "Jr" )
          lastname = lastname " " parts[j] "."
        else if (parts[j] == "II" || parts[j] == "III" || parts[j] == "IV")
          lastname = lastname " " parts[j]
# Here is the case of a correctly hyphenated last name
        else lastname = lastname "-" parts[j]
      }
      if (i == 1) theauthor = authors[1] lastname
      else theauthor = theauthor " and " authors[i] lastname
      if (i < 3) tag = tag lastname "."
    }
    while ((i=index(tag, " "))) tag = substr(tag, 1, i-1) substr(tag, i+1)
    tag = tag theyear

    printf("@article{%s,\n", tag)
    printf("  author = \"%s\",\n", theauthor)
    printf("   title = \"%s\",\n", thetitle)
    printf(" journal = \"%s\",\n", thejournal)
    printf("    year = %s,\n", theyear)
    if (themonth != "") printf("   month = \"%s\",\n", themonth)
    printf("  volume = \"%s\",\n", thevolume)
    if (thenumber != "") printf("  number = \"%s\",\n", thenumber)
    printf("   pages = \"%s\"\n}\n\n", thepages)
  }
}




More information about the Bio-soft mailing list