import numpy as np
import copy
# Отображение структур
import IPython.display
import ipywidgets
from IPython.display import display,display_svg,SVG,Image
import pybel
#from pybel import
import openbabel as ob
#from pybel import *
#import numpy as np
#from openbabel import OBAtomAtomIter, OBAtomBondIter, OBTypeTable
# Open Drug Discovery Toolkit (ODDT)
import oddt
import oddt.docking
import oddt.interactions # oddt.interactions module calculates interactions between two molecules
# (proein-protein, protein-ligand, small-small). Currently following interacions are
# implemented:
# • hydrogen bonds
# • halogen bonds
# • pi stacking (parallel and perpendicular)
# • salt bridges
# • hydrophobic contacts
# • pi-cation
# • metal coordination
# • pi-metal
# Cheminformatics
import rdkit
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem import AllChem
from rdkit.Chem import MolFromSmiles
from rdkit.Chem.Draw import IPythonConsole
import pmx # Модуль для манипулирования pdb
# PMX (formerly pymacs) has started as a small bunch of classes to read structure files such as pdb or gro
# and trajectory data in gromacs xtc format. Over the years it has been extended towards a versatile (bio-)
# molecular structure manipulation package with some additional functionalities, e.g. gromacs file parsers
# and scripts for setup and analysis of free energy calculations.
import nglview
import ipywidgets
w = nglview.show_structure_file('1lmp_hom.pdb')
w
Image(filename='1lmp_hom.png')
# Подготовка белка
pdb = pmx.Model('1lmp_hom.pdb')
for r in pdb.residues[-5:]:
print r # Посмотрим последние 5 остатков
Создадим новые объекты: разделим белок и лиганд:
newpdb = pdb.copy()
for r in newpdb.residues[-3:]:
newpdb.remove_residue(r)
lig = pdb.copy()
for r in lig.residues[:-3]:
lig.remove_residue(r)
Найдем геометрический центр:
# Первый вариант
#
# x = []
# y = []
# z = []
#
# for a in lig.atoms:
# coords = a.x
# x.append(coords[0])
# y.append(coords[1])
# z.append(coords[2])
#
# geom_center = [np.mean(x), np.mean(y), np.mean(z)]
# geom_center
# Второй вариант
coords = np.zeros((len(lig.atoms),3))
for c,a in enumerate(lig.atoms):
coords[c,:] = a.x # Coordinates of the atom (atom a)
# The arithmetic mean
geom_center = np.mean(coords, axis = 0) # Axis along which we want to calculate the arithmetic mean
# axis = 0 means along the column and axis = 1 means
# working along the row.
print(geom_center)
newpdb.writePDB('prot.pdb')
lig.writePDB('lig.pdb')
Белок без лиганда:
w_p = nglview.show_structure_file('prot.pdb')
w_p
Image(filename='1lmp_prot.png')
Лиганд:
w_l = nglview.show_structure_file('lig.pdb')
w_l
Image(filename='1lmp_lig.png')
Белок
prot = oddt.toolkit.readfile('pdb','prot.pdb').next()
print prot.OBMol.AddPolarHydrogens()
print prot.OBMol.AutomaticPartialCharge()
print 'is it the first mol in 1lmp is protein?',prot.protein,':) and MW of this mol is:', prot.molwt
Лиганды
# Make two-dimensional molecules from SMILES
def chemdraw(smiles):
mol = Chem.MolFromSmiles(smiles)
AllChem.Compute2DCoords(mol)
display(mol)
# Make three-dimensional molecules from two-dimensional ones
def molTo3D(m2d):
m3d = Chem.AddHs(m2d)
Chem.AllChem.EmbedMolecule(m3d)
AllChem.MMFFOptimizeMolecule(m3d, maxIters=500, nonBondedThresh=200)
return m3d
# Создадим несколько структур для докинга низкомолекулярных соединений в белок:
# NAG (N-ацетил-D-глюкозамин) содержит в себе СH3C(=O)NH группу; проводем докинг с ним самим
# и с лигандами на его основе, где метильный радикал этой группы заменён на OH, NH3+, H, Ph, COO-
smiles = ['CC(=O)NC1C(C(C(OC1O)CO)O)O', 'OC(=O)NC1C(C(C(OC1O)CO)O)O', '[NH3+]C(=O)NC1C(C(C(OC1O)CO)O)O',
'C(=O)NC1C(C(C(OC1O)CO)O)O', 'C1=CC=CC=C1C(=O)NC1C(C(C(OC1O)CO)O)O', '[O-]C(=O)C(=O)NC1C(C(C(OC1O)CO)O)O']
mols = []
images = []
for s in enumerate(smiles):
print(s[1])
print("2D")
chemdraw(s[1])
m2d = Chem.MolFromSmiles(s[1])
m3d = molTo3D(m2d)
print("3D")
Chem.MolToPDBFile(m3d, str(s[0]) + ".pdb")
display(m3d)
mols = []
for i in range(len(smiles)):
m = oddt.toolkit.readfile('pdb', str(i) + ".pdb")
for j in m:
mols.append(j)
# Creates docking object
# Место докинга - приблизительное положение сайта связывания лиганда
# с белком (найденный ранее геометрический центр лиганда)
dock_obj= oddt.docking.AutodockVina.autodock_vina(
protein=prot,size=(20,20,20),center=geom_center,
executable='/usr/bin/vina',autocleanup=True, num_modes=5) # The number of docked poses must be between
# 1 and 9 (due to Autodock Vina limitation)
print dock_obj.tmp_dir
print " ".join(dock_obj.params)
Выдача:
# Docking
res = dock_obj.dock(mols,prot)
# Результаты докинга, отсортированные по афинности
import pandas as pd
res_sorted = sorted(res, key = lambda x : float(x.data['vina_affinity']))
hbs_total = []
hbs_strict = []
stack = []
phob = []
formulas = []
aff = []
rmsd = []
for i,r in enumerate(res):
hbs_total.append(len(oddt.interactions.hbonds(prot,r)[0]))
hbs_strict.append(oddt.interactions.hbonds(prot,r)[2].sum())
stack.append(len(oddt.interactions.pi_stacking(prot,r)[0]))
phob.append(len(oddt.interactions.hydrophobic_contacts(prot,r)[0]))
formulas.append(r.formula)
aff.append(r.data['vina_affinity'])
rmsd.append(r.data['vina_rmsd_ub'])
results = pd.DataFrame({"Chemical formula": [r.formula for r in res_sorted],
"Affinity": [r.data['vina_affinity'] for r in res_sorted],
"RMSD": [r.data['vina_rmsd_ub'] for r in res_sorted],
"H-bonds (total)": hbs_total, "H-bonds (strict)": hbs_strict,
"Stacking": stack, "Hydrophobic_contacts": phob})
results
for i,r in enumerate(res):
r.write(filename='r%s.pdb' % i, format='pdb', overwrite=True)
Покажем несколько структур:
Image(filename='d0.png')
Image(filename='d6.png')
Image(filename='d14.png')
Image(filename='d20.png')
Image(filename='d23.png')
Визуализация: PyMOLWiki, NGL Viewer
https://pymolwiki.org/index.php/Main_Page
https://github.com/arose/nglview
Докинг: ODDT, AutoDock Vina
http://vina.scripps.edu/tutorial.html
https://jcheminf.biomedcentral.com/articles/10.1186/s13321-015-0078-2
Хемоинформатика: RDKit, Open Babel
https://github.com/rdkit/rdkit
http://openbabel.org/wiki/Main_Page