Personal tools
You are here: Home / Example files / modified residues / ARIA files / Nomenclature.py

Nomenclature.py

src/py/legacy/Nomenclature.py

Python Source icon Nomenclature.py — Python Source, 7 KB (7454 bytes)

File contents

"""
A module to convert atomnames to IUPAC nomenclature

authors: Jens Linge, EMBL & Institut Pasteur
         Jurgen Doreleijers, BMRB
"""
__author__   = "$Author: habeck $"
__revision__ = "$Revision: 1.1 $"
__date__     = "$Date: 2003/02/26 19:08:28 $"

import re, sys, os, string
import AminoAcid

## Initializing some variables
## Use unix notation as a standard
## Dir with aria distribution 
## Jens, can this be derived from this files' location or so???
BaseDir                 = '/usr/home/jurgen/aria/aria1.0/Aria'

## Filename with path to atom name library having XPLOR and IUPAC 
AtomLib_Xplor_FileName  = BaseDir + '/Nomenclature/AtomLIB-xplor'

def ConvertCnsProtonNames(residueName, atomName):
    """
    convert an atomname from XPLOR/CNS to IUPAC nomenclature (or vice versa)
    residueName: a string which contains 1- or 3-letter code, e.g. 'A' or 'ALA'
                 only the 20 common aminoacids are supported!
    atomName:    a string containing the atomnames, e.g. 'HG12'

    returns a string with the new atomname (all characters are uppercase)
    If the atom name doesn't have to be changed, it will return the input
    atom name (stripped and uppercase)
    """
    #I. get a clean three-letter code and strip & uppercase the atomName
    threeLetter = AminoAcid.AminoAcid(residueName)[1]
    if threeLetter[2] == '':
        print 'WARNING: residue name', residueName, 'not understood'
        return atomName
    atomName = string.upper(string.strip(atomName))
    
    #II. methylenes
    #1. GLY HA:
    if threeLetter in ('GLY', 'GLI') and atomName == 'HA1':
        atomName = 'HA2'
    elif threeLetter in ('GLY', 'GLI') and atomName == 'HA2':
        atomName = 'HA1'
        
    #2. ARG, ASN, ASP, CYS, CYC, GLN, GLX, GLU, HIS, LEU, LYS, NLE, MET, PHE, PRO, SER, TRP, TYR HB%:
    elif threeLetter in ('ARG', 'ASN', 'ASP', 'CYS', 'CYC', 'GLN', 'GLX', 'GLU', 'HIS', 'LEU', 'NLE', 'LYS',\
                         'MET', 'PHE', 'PRO', 'SER', 'TRP', 'TYR') and \
                         atomName == 'HB3':
        atomName = 'HB1'
    elif threeLetter in ('ARG', 'ASN', 'ASP', 'CYS', 'CYC', 'GLN', 'GLX', 'GLU', 'HIS', 'LEU', 'LEU', 'LYS',\
                         'MET', 'PHE', 'PRO', 'SER', 'TRP', 'TYR') and \
                         atomName == 'HB1':
        atomName = 'HB3'

    #3. ARG, GLN, GLX, CYC, GLU, LYS, NLE, MET, PRO HG%:
    elif threeLetter in ('ARG', 'GLN', 'GLX', 'CYC', 'GLU', 'NLE', 'LYS', 'MET', 'PRO') and\
         atomName == 'HG1':
        atomName = 'HG3'
    elif threeLetter in ('ARG', 'GLN', 'GLX', 'CYC', 'GLU', 'NLE', 'LYS', 'MET', 'PRO') and\
         atomName == 'HG3':
        atomName = 'HG1'
    #4. ILE  HG1%:
    elif threeLetter == 'ILE' and atomName == 'HG13':
        atomName = 'HG11'
    elif threeLetter == 'ILE' and atomName == 'HG11':
        atomName = 'HG13' 
    #5. ARG, ASN, LYS, NLE, PRO HD:
    elif threeLetter in ('ARG', 'ASN', 'NLE', 'LYS', 'PRO') and atomName == 'HD1':
        atomName = 'HD3'
    elif threeLetter in ('ARG', 'ASN', 'NLE', 'LYS', 'PRO') and atomName == 'HD3':
        atomName = 'HD1'
    #6. LYS HE:
    elif threeLetter == 'LYS' and atomName == 'HE3':
        atomName = 'HE1'
    elif threeLetter == 'LYS' and atomName == 'HE1':
        atomName = 'HE3'
        
    #III. methyls:
    #1. ALA beta:
    elif threeLetter == 'ALA' and atomName == 'HB2':
        atomName = 'HB1'
    elif threeLetter == 'ALA' and atomName == 'HB1':
        atomName = 'HB2'
    #2. VAL gamma1:
    elif threeLetter == 'VAL' and atomName == 'HG11':
        atomName = 'HG12'
    elif threeLetter == 'VAL' and atomName == 'HG12':
        atomName = 'HG11'
    #3. ILE, VAL gamma2:
    elif threeLetter in ('ILE', 'VAL') and atomName == 'HG21':
        atomName = 'HG22'
    elif threeLetter in ('ILE', 'VAL') and atomName == 'HG22':
        atomName = 'HG21'
    #4. ILE, LEU delta1:
    elif threeLetter in ('ILE', 'LEU') and atomName == 'HD11':
        atomName = 'HD12'
    elif threeLetter in ('ILE', 'LEU') and atomName == 'HD12':
        atomName = 'HD11'    
    #5. LEU delta2:
    elif threeLetter == 'LEU' and atomName == 'HD21':
        atomName = 'HD22'
    elif threeLetter == 'LEU' and atomName == 'HD22':
        atomName = 'HD21'    
    #6. MET, NLE epsilon:
    elif threeLetter in ('MET', 'NLE') and atomName == 'HE1':
        atomName = 'HE2'
    elif threeLetter in ('MET', 'NLE') and atomName == 'HE2':
        atomName = 'HE1'
    #7. zeta:
    elif atomName == 'HZ1':
        atomName = 'HZ2'
    elif atomName == 'HZ2':
        atomName = 'HZ1'     
        
    #IV. ARG NHs:
    elif threeLetter == 'ARG' and atomName == 'HH11':
        atomName = 'HH12'
    elif threeLetter == 'ARG' and atomName == 'HH12':
        atomName = 'HH11'
    elif threeLetter == 'ARG' and atomName == 'HH21':
        atomName = 'HH22'
    elif threeLetter == 'ARG' and atomName == 'HH22':
        atomName = 'HH21'    

    return atomName


def ConvertCnsPseudoAtomNames(residueName, atomName):
    """
    convert an pseudo atomname from XPLOR/CNS to IUPAC nomenclature (or vice versa)
    residueName: a string which contains full code, e.g. 'A' is not 'ALA'
                 but is allowed for DADE and RADE (this needs work)
                 only the 20 common aminoacids are supported!
    atomName:    a string containing the atomnames, e.g. 'HG1#'

    returns a string with the new atomname (all characters are uppercase)
    If the atom name doesn't have to be changed, it will return the input
    atom name (stripped and uppercase)
    
    e.g. 	ALA HB#  -> MB
            LEU HD1# -> MD1
            VAL HG#  -> QG
            
    This last example might be more than expected since HG# doesn't expand
    to HG11 and the 5 other protons.
    """
    #I. get a clean three-letter code and strip & uppercase the atomName
    residueCode = AminoAcid.AminoAcid(residueName)[1]
    atomName = string.upper(string.strip(atomName))
    
    #II. Set up the mapping table, this should of course be done only once
    ## and the results stored in a global parameter
    ## Jens, do you know how to set that up? I really prefer to have the 
    ## info outside the source code.
    iupac_name = {}    
    
    ## Regular expression pattern matching just the definitions from the
    ## AQUA atom name library
    pattern = re.compile(r"""
        ^def      \s+      # Start with def
         (\w+)    \s+      # Residue name
         \*       \s+      # Actual '*'
         (\w+\#)  \s+      # CNS pseudo atom name with trailing '#'
         (\w+)    \s*$     # IUPAC pseudo atom name
             """, re.IGNORECASE | re.MULTILINE | re.VERBOSE )
        
    ## Read file contents, leave file handle open later on?
    file_content = open(AtomLib_Xplor_FileName, 'r').read()
    match_list = pattern.findall( file_content ) 
    if ( match_list ):
        print "Read %d definitions from file: %s"         % ( len(match_list), AtomLib_Xplor_FileName )
    else:
        print "ERROR: No definitions read from file: %s"    % AtomLib_Xplor_FileName
        sys.exit(1)
    for match in match_list:
        iupac_name[ match[0] ] = { match[1]: match[2] }
    
    if ( iupac_name[ residueCode ].has_key( atomName ) ):
        print "Changed atom name: %s to: " % atomName
        atomName = iupac_name[ residueCode ][atomName]
        
    return atomName

if __name__ == "__main__":
    print ConvertCnsPseudoAtomNames( "ALA", "HB#" )
September 2019 »
September
MonTueWedThuFriSatSun
1
2345678
9101112131415
16171819202122
23242526272829
30