PseudoAtom.py
PseudoAtom.py
—
Python Source,
20 KB (21183 bytes)
File contents
"""
This module provides methods for the conversion of cns atomnames in
pseudoatomes and vice versa.
The nomenclature of the pseudoatoms is in the same spirit as in:
Markley et al. JMB 280, 933-952 (1998)
Methods:
Pseudo2Atom(atom) e.g. 'QA' -> 'ha#'
'QR' -> 'hd# or name he# or name hz'
Pseudo2Tuple(atom) e.g. 'QA' -> ('ha#',)
'QR' -> ('hd#', 'he#', 'hz#')
Atom2Pseudo(atomname, aminoacid)
e.g. 'HB1', 'TYR' -> ('QB', )
'HG11', 'VAL' -> ('QG1', 'QQG')
Pseudo2IupacTuple(threelettercode, atomname)
"""
__author__ = "$Author: habeck $"
__revision__ = "$Revision: 1.1 $"
__date__ = "$Date: 2003/02/26 19:08:28 $"
import re, string
###############################################################################
def Pseudo2Atom(atom):
"""
converts pseudoatoms into a CNS readable format
input: atom name (can be a pseudoatom)
output: CNS readable atom name
"""
replacedic = {'QA': 'ha#',
'QB': 'hb#',
'QG': 'hg#',
'QG1': 'hg1#',
'QG2': 'hg2#',
'QQG': 'hg#',
'QD': 'hd#',
'QD1': 'hd1#',
'QD2': 'hd2#',
'QQD': 'hd#',
'QE': 'he#',
'QE2': 'he2#',
'QR': 'hd# or name he# or name hz',
'QZ': 'hz#',
'QH1': 'hh1#',
'QH2': 'hh2#'}
atom = string.upper(atom)
atom = string.strip(atom)
if replacedic.has_key(atom):
atom = replacedic[atom]
return atom
def Pseudo2IupacTuple(threelettercode, atomname):
"""
converting pseudoatoms into tuples of atomnames in IUPAC nomenclature
input: atomname (in IUPAC nomenclature, but may contain pseudoatoms as
specified in Markley et al. JMB 280, 933-952 (1998), may contain
wildcards #?%*, may be without the final number in the atomname,
e.g 'HB' instead of 'HB%')
atomnamemust be a string! No tuples or lists!
threelettercode must be a string, e.g. 'TRP'
output: tuple of IUPAC atomnames, without wildcards
the output is a tuple of strings!
example: Pseudo2IupacTuple('ALA', 'HB%')
returns the tuple of strings:
('HB1', 'HB2', 'HB3')
Note: the strings threelettercode and atomname may
be upper or lower case
The returned atomname is ALWAYS upper case!
"""
#convert wildcards "#", "?", "*" to "%":
atomname = re.sub('\#', '%', atomname)
atomname = re.sub('\?', '%', atomname)
atomname = re.sub('\*', '%', atomname)
atomname = string.upper(atomname)
atomname = string.strip(atomname)
threelettercode = string.upper(threelettercode)
threelettercode = string.strip(threelettercode)
#define the dictionaries for the conversion:
# some notes:
# HZ in TYR is not included in pseudoatom QR
# QD and QD2 are used in ASN
# QE and QE2 are used in GLN
# QD and QD1 are used in ILE
replacedic = {'ALA': {'QB': ('HB1', 'HB2', 'HB3'),\
'HB%': ('HB1', 'HB2', 'HB3'),\
'HB': ('HB1', 'HB2', 'HB3')},\
'ARG': {'QB': ('HB2', 'HB3'),\
'HB%': ('HB2', 'HB3'),\
'HB': ('HB2', 'HB3'),\
'QG': ('HG2', 'HG3'),\
'HG%': ('HG2', 'HG3'),\
'HG': ('HG2', 'HG3'),\
'QD': ('HD2', 'HD3'),\
'HD%': ('HD2', 'HD3'),\
'HD': ('HD2', 'HD3'),\
'QH1': ('HH11', 'HH12'),\
'HH1%':('HH11', 'HH12'),\
'HH1': ('HH11', 'HH12'),\
'QH2': ('HH21', 'HH22'),\
'HH2%':('HH21', 'HH22'),\
'HH2': ('HH21', 'HH22')},\
'ASN': {'QB': ('HB2', 'HB3'),\
'HB%': ('HB2', 'HB3'),\
'HB': ('HB2', 'HB3'),\
'QD2': ('HD21', 'HD22'),\
'HD2%':('HD21', 'HD22'),\
'HD2': ('HD21', 'HD22'),\
'QD': ('HD21', 'HD22'),\
'HD%': ('HD21', 'HD22'),\
'HD': ('HD21', 'HD22')},\
'ASP': {'QB': ('HB2', 'HB3'),\
'HB%': ('HB2', 'HB3'),\
'HB': ('HB2', 'HB3')},\
'CYS': {'QB': ('HB2', 'HB3'),\
'HB%': ('HB2', 'HB3'),\
'HB': ('HB2', 'HB3')},\
'CYC': {'QB': ('HB2', 'HB3'),\
'HB%': ('HB2', 'HB3'),\
'HB': ('HB2', 'HB3'),\
'QG': ('HG2', 'HG3'),\
'HG%': ('HG2', 'HG3'),\
'HG': ('HG2', 'HG3')},\
'GLN': {'QB': ('HB2', 'HB3'),\
'HB%': ('HB2', 'HB3'),\
'HB': ('HB2', 'HB3'),\
'QG': ('HG2', 'HG3'),\
'HG%': ('HG2', 'HG3'),\
'HG': ('HG2', 'HG3'),\
'QE2': ('HE21', 'HE22'),\
'HE2%':('HE21', 'HE22'),\
'HE2': ('HE21', 'HE22'),\
'QE': ('HE21', 'HE22'),\
'HE%': ('HE21', 'HE22'),\
'HE': ('HE21', 'HE22'),
},\
'GLX': {'QB': ('HB2', 'HB3'),\
'HB%': ('HB2', 'HB3'),\
'HB': ('HB2', 'HB3'),\
'QG': ('HG2', 'HG3'),\
'HG%': ('HG2', 'HG3'),\
'HG': ('HG2', 'HG3'),\
'QE2': ('HE21', 'HE22'),\
'HE2%':('HE21', 'HE22'),\
'HE2': ('HE21', 'HE22'),\
'QE': ('HE21', 'HE22'),\
'HE%': ('HE21', 'HE22'),\
'HE': ('HE21', 'HE22'),
},\
'GLU': {'QB': ('HB2', 'HB3'),\
'HB%': ('HB2', 'HB3'),\
'HB': ('HB2', 'HB3'),\
'QG': ('HG2', 'HG3'),\
'HG%': ('HG2', 'HG3'),\
'HG': ('HG2', 'HG3')},\
'GLY': {'QA': ('HA2', 'HA3'),\
'HA%': ('HA2', 'HA3'),\
'HA': ('HA2', 'HA3')},\
'GLI': {'QA': ('HA2', 'HA3'),\
'HA%': ('HA2', 'HA3'),\
'HA': ('HA2', 'HA3')},\
'HIS': {'QB': ('HB2', 'HB3'),\
'HB%': ('HB2', 'HB3'),\
'HB': ('HB2', 'HB3')},
'ILE': {'QG1': ('HG12', 'HG13'),\
'HG1%':('HG12', 'HG13'),\
'HG1': ('HG12', 'HG13'),\
'QG2': ('HG21', 'HG22', 'HG23'),\
'HG2%':('HG21', 'HG22', 'HG23'),\
'HG2': ('HG21', 'HG22', 'HG23'),\
'QQG': ('HG12', 'HG13', 'HG21', 'HG22', 'HG23'),\
'HG%': ('HG12', 'HG13', 'HG21', 'HG22', 'HG23'),\
'HG': ('HG12', 'HG13', 'HG21', 'HG22', 'HG23'),\
'QD1': ('HD11', 'HD12', 'HD13'),\
'HD1%':('HD11', 'HD12', 'HD13'),\
'HD1': ('HD11', 'HD12', 'HD13'),\
'QD': ('HD11', 'HD12', 'HD13'),\
'HD%': ('HD11', 'HD12', 'HD13'),\
'HD': ('HD11', 'HD12', 'HD13')},\
'LEU': {'QB': ('HB2', 'HB3'),\
'HB%': ('HB2', 'HB3'),\
'HB': ('HB2', 'HB3'),\
'QD1': ('HD11', 'HD12', 'HD13'),\
'HD1%':('HD11', 'HD12', 'HD13'),\
'HD1': ('HD11', 'HD12', 'HD13'),\
'QD2': ('HD21', 'HD22', 'HD23'),\
'HD2%':('HD21', 'HD22', 'HD23'),\
'HD2': ('HD21', 'HD22', 'HD23'),\
'QQD': ('HD11', 'HD12', 'HD13', 'HD21', 'HD22', 'HD23'),\
'HD%': ('HD11', 'HD12', 'HD13', 'HD21', 'HD22', 'HD23'),\
'HD': ('HD11', 'HD12', 'HD13', 'HD21', 'HD22', 'HD23')},\
'LYS': {'QB': ('HB2', 'HB3'),\
'HB%': ('HB2', 'HB3'),\
'HB': ('HB2', 'HB3'),\
'QG': ('HG2', 'HG3'),\
'HG%': ('HG2', 'HG3'),\
'HG': ('HG2', 'HG3'),\
'QD': ('HD2', 'HD3'),\
'HD%': ('HD2', 'HD3'),\
'HD': ('HD2', 'HD3'),\
'QE': ('HE2', 'HE3'),\
'HE%': ('HE2', 'HE3'),\
'HE': ('HE2', 'HE3'),\
'QZ': ('HZ1', 'HZ2', 'HZ3'),\
'HZ%': ('HZ1', 'HZ2', 'HZ3'),\
'HZ': ('HZ1', 'HZ2', 'HZ3')},\
'NLE': {'QB': ('HB2', 'HB3'),\
'HB%': ('HB2', 'HB3'),\
'HB': ('HB2', 'HB3'),\
'QG': ('HG2', 'HG3'),\
'HG%': ('HG2', 'HG3'),\
'HG': ('HG2', 'HG3'),\
'QD': ('HD2', 'HD3'),\
'HD%': ('HD2', 'HD3'),\
'HD': ('HD2', 'HD3'),\
'QE': ('HE1', 'HE2', 'HE3'),\
'HE%': ('HE1', 'HE2', 'HE3'),\
'HE': ('HE1', 'HE2', 'HE3')},\
'MET': {'QB': ('HB2', 'HB3'),\
'HB%': ('HB2', 'HB3'),\
'HB': ('HB2', 'HB3'),\
'QG': ('HG2', 'HG3'),\
'HG%': ('HG2', 'HG3'),\
'HG': ('HG2', 'HG3'),\
'QE': ('HE1', 'HE2', 'HE3'),\
'HE%': ('HE1', 'HE2', 'HE3'),\
'HE': ('HE1', 'HE2', 'HE3')},\
'PHE': {'QB': ('HB2', 'HB3'),\
'HB%': ('HB2', 'HB3'),\
'HB': ('HB2', 'HB3'),\
'QD': ('HD1', 'HD2'),\
'HD%': ('HD1', 'HD2'),\
'HD': ('HD1', 'HD2'),\
'QE': ('HE1', 'HE2'),\
'HE%': ('HE1', 'HE2'),\
'HE': ('HE1', 'HE2'),\
'QR': ('HD1', 'HD2', 'HE1', 'HE2', 'HZ'),\
'HR%': ('HD1', 'HD2', 'HE1', 'HE2', 'HZ'),\
'HR': ('HD1', 'HD2', 'HE1', 'HE2', 'HZ')},\
'PRO': {'QB': ('HB2', 'HB3'),\
'HB%': ('HB2', 'HB3'),\
'HB': ('HB2', 'HB3'),\
'QG': ('HG2', 'HG3'),\
'HG%': ('HG2', 'HG3'),\
'HG': ('HG2', 'HG3'),\
'QD': ('HD2', 'HD3'),\
'HD%': ('HD2', 'HD3'),\
'HD': ('HD2', 'HD3')},\
'SER': {'QB': ('HB2', 'HB3'),\
'HB%': ('HB2', 'HB3'),\
'HB': ('HB2', 'HB3')},\
'THR': {'QB': ('HB', ),\
'HB%': ('HB', ),\
'HB': ('HB', ),\
'QG2': ('HG21', 'HG22', 'HG23'),\
'HG2%':('HG21', 'HG22', 'HG23'),\
'HG2': ('HG21', 'HG22', 'HG23')},\
'TRP': {'QB': ('HB2', 'HB3'),\
'HB%': ('HB2', 'HB3'),\
'HB': ('HB2', 'HB3'),\
'QG2': ('HG21', 'HG22', 'HG23'),\
'HG2%':('HG21', 'HG22', 'HG23'),\
'HG2': ('HG21', 'HG22', 'HG23')},
'TYR': {'QB': ('HB2', 'HB3'),\
'HB%': ('HB2', 'HB3'),\
'HB': ('HB2', 'HB3'),\
'QD': ('HD1', 'HD2'),\
'HD%': ('HD1', 'HD2'),\
'HD': ('HD1', 'HD2'),\
'QE': ('HE1', 'HE2'),\
'HE%': ('HE1', 'HE2'),\
'HE': ('HE1', 'HE2'),\
'QR': ('HD1', 'HD2', 'HE1', 'HE2'),\
'HR%': ('HD1', 'HD2', 'HE1', 'HE2'),\
'HR': ('HD1', 'HD2', 'HE1', 'HE2')},\
'VAL': {'QG1': ('HG11', 'HG12', 'HG13'),\
'HG1%':('HG11', 'HG12', 'HG13'),\
'HG1': ('HG11', 'HG12', 'HG13'),\
'QG2': ('HG21', 'HG22', 'HG23'),\
'HG2%':('HG21', 'HG22', 'HG23'),\
'HG2': ('HG21', 'HG22', 'HG23'),\
'QQG': ('HG11', 'HG12', 'HG13', 'HG21', 'HG22', 'HG23'),\
'HG%': ('HG11', 'HG12', 'HG13', 'HG21', 'HG22', 'HG23'),\
'HG': ('HG11', 'HG12', 'HG13', 'HG21', 'HG22', 'HG23')}}
if replacedic.has_key(threelettercode):
if replacedic[threelettercode].has_key(atomname):
atomname = replacedic[threelettercode][atomname]
else:
atomname = (atomname, )
else:
atomname = (atomname, )
return atomname
###############################################################################
def Pseudo2Tuple(atom):
"""
converts pseudoatoms into a tuple of atomnames in a CNS readable format
input: atom name (can be a pseudoatom)
output: tuple of CNS readable atoms
"""
replacedic = {'QA': ('ha#', ),
'QB': ('hb#', ),
'QG': ('hg#', ),
'QG1': ('hg1#', ),
'QG2': ('hg2#', ),
'QQG': ('hg#', ),
'QD': ('hd#', ),
'QD1': ('hd1#', ),
'QD2': ('hd2#', ),
'QQD': ('hd#', ),
'QE': ('he#', ),
'QE2': ('he2#', ),
'QR': ('hd#', 'he#', 'hz'),
'QZ': ('hz#', ),
'QH1': ('hh1#', ),
'QH2': ('hh2#', )}
atom = string.upper(atom)
atom = string.strip(atom)
if replacedic.has_key(atom):
atom = replacedic[atom]
else:
atom = (atom, )
return atom
###############################################################################
def Atom2Pseudo(atomname, aminoacid):
"""
INPUT: cns atomname and aminoacid type in 3-letter code
OUTPUT: returns a tuple of possible pseudoatoms, best hit comes first
within the tuple
if it is not possible to find a pseudoatom, an empty tuple
will be returned
The nomenclature of the pseudoatoms is in the same spirit as in:
Markley et al. JMB 280, 933-952 (1998)
easy: ALA, ASP, ASN, CYS, GLN, GLU, GLY, HIS, LYS, MET,
PRO, SER, THR, TRP
ambiguous: ARG: QH1, QH2 or QH
ILE: QG1, QG2 or QQG
LEU: QD1, QD2 or QQD
PHE: QR
TYR: QR
VAL: QG1, QG2 or QQG
"""
pseudoatom = ()
aminoacid = string.upper(aminoacid)
aminoacid = string.strip(aminoacid)
atomname = string.upper(atomname)
atomname = string.strip(atomname)
if aminoacid == 'ALA':
if atomname in ['HB1', 'HB2', 'HB3', 'HB%', 'HB#']:
pseudoatom = ('QB', )
elif aminoacid == 'ARG':
if atomname in ['HB1', 'HB2', 'HB3', 'HB%', 'HB#']:
pseudoatom = ('QB', )
elif atomname in ['HG1', 'HG2', 'HG3', 'HG%', 'HG#']:
pseudoatom = ('QG', )
elif atomname in ['HD1', 'HD2', 'HD3', 'HD%', 'HD#']:
pseudoatom = ('QD', )
elif atomname in ['HH11', 'HH12', 'HH13', 'HH1%', 'HH1#']:
pseudoatom = ('QH1', )
elif atomname in ['HH21', 'HH22', 'HH23', 'HH2%', 'HH2#']:
pseudoatom = ('QH2', )
elif aminoacid == 'ASN':
if atomname in ['HB1', 'HB2', 'HB3', 'HB%', 'HB#']:
pseudoatom = ('QB', )
elif atomname in ['HD21', 'HD22', 'HD23', 'HD%', 'HD#']:
pseudoatom = ('QD2', )
elif aminoacid == 'ASP':
if atomname in ['HB1', 'HB2', 'HB3', 'HB%', 'HB#']:
pseudoatom = ('QB', )
elif aminoacid == 'CYS':
if atomname in ['HB1', 'HB2', 'HB3', 'HB%', 'HB#']:
pseudoatom = ('QB', )
elif aminoacid == 'GLU':
if atomname in ['HB1', 'HB2', 'HB3', 'HB%', 'HB#']:
pseudoatom = ('QB', )
elif atomname in ['HG1', 'HG2', 'HG3', 'HG%', 'HG#']:
pseudoatom = ('QG', )
elif aminoacid == 'GLN':
if atomname in ['HB1', 'HB2', 'HB3', 'HB%', 'HB#']:
pseudoatom = ('QB', )
elif atomname in ['HG1', 'HG2', 'HG3', 'HG%', 'HG#']:
pseudoatom = ('QG', )
elif atomname in ['HE21', 'HE22', 'HE2%', 'HE2#']:
pseudoatom = ('QE2', )
elif aminoacid == 'GLY':
if atomname in ['HA1', 'HA2', 'HA3', 'HA%', 'HA#']:
pseudoatom = ('QA', )
elif aminoacid == 'HIS':
if atomname in ['HB1', 'HB2', 'HB3', 'HB%', 'HB#']:
pseudoatom = ('QB', )
elif aminoacid == 'ILE':
if atomname in ['HD11', 'HD12', 'HD13', 'HD1%', 'HD1#']:
pseudoatom = ('QD1', )
elif atomname in ['HG11', 'HG12', 'HG13', 'HG1%', 'HG1#']:
pseudoatom = ('QG1', 'QQG')
elif atomname in ['HG21', 'HG22', 'HG23', 'HG2%', 'HG2#']:
pseudoatom = ('QG2', 'QQG')
elif atomname[:2] == 'HG':
pseudoatom == ('QQG', 'QG1', 'QG2')
elif aminoacid == 'LEU':
if atomname in ['HB1', 'HB2', 'HB3', 'HB%', 'HB#']:
pseudoatom = ('QB', )
elif atomname in ['HD11', 'HD12', 'HD13', 'HD1%', 'HD1#']:
pseudoatom = ('QD1', 'QQD')
elif atomname in ['HD21', 'HD22', 'HD23', 'HD2%', 'HD2#']:
pseudoatom = ('QD2', 'QQD')
elif atomname[:2] == 'HD':
pseudoatom = ('QQD', 'QD1', 'QD2')
elif aminoacid == 'LYS':
if atomname in ['HB1', 'HB2', 'HB3', 'HB%', 'HB#']:
pseudoatom = ('QB', )
elif atomname in ['HG1', 'HG2', 'HG3', 'HG%', 'HG#']:
pseudoatom = ('QG', )
elif atomname in ['HD1', 'HD2', 'HD3', 'HD%', 'HD#']:
pseudoatom = ('QD', )
elif atomname in ['HE1', 'HE2', 'HE3', 'HE%', 'HE#']:
pseudoatom = ('QE', )
elif atomname in ['HZ1', 'HZ2', 'HZ3', 'HZ%', 'HZ#']:
pseudoatom = ('QZ', )
elif aminoacid == 'MET':
if atomname in ['HB1', 'HB2', 'HB3', 'HB%', 'HB#']:
pseudoatom = ('QB', )
elif atomname in ['HG1', 'HG2', 'HG3', 'HG%', 'HG#']:
pseudoatom = ('QG', )
elif atomname in ['HE1', 'HE2', 'HE3', 'HE%', 'HE#']:
pseudoatom = ('QE', )
elif aminoacid == 'PHE':
if atomname in ['HB1', 'HB2', 'HB3', 'HB%', 'HB#']:
pseudoatom = ('QB', )
elif atomname in ['HD1', 'HD2', 'HD3', 'HD%', 'HD#']:
pseudoatom = ('QD', 'QR')
elif atomname in ['HE1', 'HE2', 'HE3', 'HE%', 'HE#']:
pseudoatom = ('QE', 'QR')
elif aminoacid == 'PRO':
if atomname in ['HB1', 'HB2', 'HB3', 'HB%', 'HB#']:
pseudoatom = ('QB', )
elif atomname in ['HG1', 'HG2', 'HG3', 'HG%', 'HG#']:
pseudoatom = ('QG', )
elif atomname in ['HD1', 'HD2', 'HD3', 'HD%', 'HD#']:
pseudoatom = ('QD', )
elif aminoacid == 'SER':
if atomname in ['HB1', 'HB2', 'HB3', 'HB%', 'HB#']:
pseudoatom = ('QB', )
elif aminoacid == 'THR':
if atomname in ['HG21', 'HG22', 'HG23', 'HG2%', 'HG2#']:
pseudoatom = ('QG2', )
elif aminoacid == 'TRP':
if atomname in ['HB1', 'HB2', 'HB3', 'HB%', 'HB#']:
pseudoatom = ('QB', )
elif aminoacid == 'TYR':
if atomname in ['HB1', 'HB2', 'HB3', 'HB%', 'HB#']:
pseudoatom = ('QB', )
elif atomname in ['HD1', 'HD2', 'HD3', 'HD%', 'HD#']:
pseudoatom = ('QD', 'QR')
elif atomname in ['HE1', 'HE2', 'HE3', 'HE%', 'HE#']:
pseudoatom = ('QE', 'QR')
elif aminoacid == 'VAL':
if atomname in ['HG11', 'HG12', 'HG13', 'HG1%', 'HG1#']:
pseudoatom = ('QG1', 'QQG')
elif atomname in ['HG21', 'HG22', 'HG23', 'HG2%', 'HG2#']:
pseudoatom = ('QG2', 'QQG')
elif pseudoatom[:2] == 'HG':
pseudoatom = ('QQG', 'QG1', 'QG2')
return pseudoatom
###############################################################################
#test code:
if __name__ == "__main__":
print 'testing module:\n'
print " converting ('TYR', 'QR,) to:", Pseudo2IupacTuple('tyr', 'QR')
print " should be IUPAC nomenclature..."
print '\nciao.'

ARIA is part of the ELIXIR infrastructure.