Докинг низкомолекулярных лигандов в структуру белка¶

import numpy as np
import copy

# Отображение структур
import IPython.display
import ipywidgets
from IPython.display import display,display_svg,SVG,Image

import pybel
#from pybel import 
import openbabel as ob
#from pybel import *
#import numpy as np
#from openbabel import OBAtomAtomIter, OBAtomBondIter, OBTypeTable


# Open Drug Discovery Toolkit (ODDT)
import oddt
import oddt.docking
import oddt.interactions   # oddt.interactions module calculates interactions between two molecules 
                           # (proein-protein, protein-ligand, small-small). Currently following interacions are 
                           # implemented:
                           # • hydrogen bonds
                           # • halogen bonds
                           # • pi stacking (parallel and perpendicular)
                           # • salt bridges
                           # • hydrophobic contacts
                           # • pi-cation
                           # • metal coordination
                           # • pi-metal

# Cheminformatics
import rdkit
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem import AllChem
from rdkit.Chem import MolFromSmiles
from rdkit.Chem.Draw import IPythonConsole

import pmx # Модуль для манипулирования pdb 

# PMX (formerly pymacs) has started as a small bunch of classes to read structure files such as pdb or gro 
# and trajectory data in gromacs xtc format. Over the years it has been extended towards a versatile (bio-)
# molecular structure manipulation package with some additional functionalities, e.g. gromacs file parsers 
# and scripts for setup and analysis of free energy calculations.

Подготовка белка¶

import nglview
import ipywidgets
w = nglview.show_structure_file('1lmp_hom.pdb')
w

Image(filename='1lmp_hom.png')

# Подготовка белка
pdb = pmx.Model('1lmp_hom.pdb')
for r in pdb.residues[-5:]:
    print r # Посмотрим последние 5 остатков

<Molecule: id = 121 name = VAL chain_id =    natoms = 7>
<Molecule: id = 122 name = GLN chain_id =    natoms = 10>
<Molecule: id = 123 name = NAG chain_id =    natoms = 14>
<Molecule: id = 124 name = NAG chain_id =    natoms = 14>
<Molecule: id = 125 name = NDG chain_id =    natoms = 15>

Создадим новые объекты: разделим белок и лиганд:

newpdb = pdb.copy()
for r in newpdb.residues[-3:]:
    newpdb.remove_residue(r)
lig = pdb.copy()
for r in lig.residues[:-3]:
    lig.remove_residue(r)

Найдем геометрический центр:

# Первый вариант
#
# x = []
# y = []
# z = []
# 
# for a in lig.atoms:
#     coords = a.x 
#     x.append(coords[0])
#     y.append(coords[1])
#     z.append(coords[2])
#     
# geom_center = [np.mean(x), np.mean(y), np.mean(z)]
# geom_center


# Второй вариант

coords = np.zeros((len(lig.atoms),3))
for c,a in enumerate(lig.atoms):
    coords[c,:] = a.x   # Coordinates of the atom (atom a)
    
# The arithmetic mean    
geom_center = np.mean(coords, axis = 0)   # Axis along which we want to calculate the arithmetic mean
                                          # axis = 0 means along the column and axis = 1 means 
                                          # working along the row.
print(geom_center)

[38.9964186  45.12913953 27.53430233]

newpdb.writePDB('prot.pdb')
lig.writePDB('lig.pdb')

Белок без лиганда:

w_p = nglview.show_structure_file('prot.pdb')
w_p

Image(filename='1lmp_prot.png')

Лиганд:

w_l = nglview.show_structure_file('lig.pdb')
w_l

Image(filename='1lmp_lig.png')

Подготовка моделей для докинга¶

Белок

prot = oddt.toolkit.readfile('pdb','prot.pdb').next()

print prot.OBMol.AddPolarHydrogens()
print prot.OBMol.AutomaticPartialCharge()
print 'is it the first mol in 1lmp is protein?',prot.protein,':) and MW of this mol is:', prot.molwt

True
True
is it the first mol in 1lmp is protein? False :) and MW of this mol is: 13375.39512

Лиганды

# Make two-dimensional molecules from SMILES

def chemdraw(smiles):
    mol = Chem.MolFromSmiles(smiles)
    AllChem.Compute2DCoords(mol)
    display(mol)
    
    
# Make three-dimensional molecules from two-dimensional ones

def molTo3D(m2d):
    m3d = Chem.AddHs(m2d)
    Chem.AllChem.EmbedMolecule(m3d)
    AllChem.MMFFOptimizeMolecule(m3d, maxIters=500, nonBondedThresh=200)
    return m3d

# Создадим несколько структур для докинга низкомолекулярных соединений в белок:
# NAG (N-ацетил-D-глюкозамин) содержит в себе СH3C(=O)NH группу; проводем докинг с ним самим 
# и с лигандами на его основе, где метильный радикал этой группы заменён на OH, NH3+, H, Ph, COO-

smiles = ['CC(=O)NC1C(C(C(OC1O)CO)O)O', 'OC(=O)NC1C(C(C(OC1O)CO)O)O', '[NH3+]C(=O)NC1C(C(C(OC1O)CO)O)O', 
          'C(=O)NC1C(C(C(OC1O)CO)O)O', 'C1=CC=CC=C1C(=O)NC1C(C(C(OC1O)CO)O)O', '[O-]C(=O)C(=O)NC1C(C(C(OC1O)CO)O)O']

mols = []
images = []

for s in enumerate(smiles):
    print(s[1])
    print("2D")
    chemdraw(s[1])
    m2d = Chem.MolFromSmiles(s[1])
    m3d = molTo3D(m2d)
    print("3D")
    Chem.MolToPDBFile(m3d, str(s[0]) + ".pdb")
    display(m3d)

CC(=O)NC1C(C(C(OC1O)CO)O)O
2D

3D

OC(=O)NC1C(C(C(OC1O)CO)O)O
2D

3D

[NH3+]C(=O)NC1C(C(C(OC1O)CO)O)O
2D

3D

C(=O)NC1C(C(C(OC1O)CO)O)O
2D

3D

C1=CC=CC=C1C(=O)NC1C(C(C(OC1O)CO)O)O
2D

3D

mols = []

for i in range(len(smiles)):
    m = oddt.toolkit.readfile('pdb', str(i) + ".pdb")
    for j in m:
        mols.append(j)

Докинг¶

# Creates docking object
# Место докинга - приблизительное положение сайта связывания лиганда 
# с белком (найденный ранее геометрический центр лиганда)

dock_obj= oddt.docking.AutodockVina.autodock_vina(
    protein=prot,size=(20,20,20),center=geom_center,
    executable='/usr/bin/vina',autocleanup=True, num_modes=5)   # The number of docked poses must be between 
                                                                 # 1 and 9 (due to Autodock Vina limitation)

print dock_obj.tmp_dir
print " ".join(dock_obj.params)

/tmp/autodock_vina_BpZy5A
--center_x 38.996418604651176 --center_y 45.12913953488373 --center_z 27.53430232558139 --size_x 20 --size_y 20 --size_z 20 --exhaustiveness 8 --num_modes 5 --energy_range 3

Выдача:

center_x (y,z): геометрический центр лиганда
size_x (y,z): размер области, в которой будет производиться поиск
cpu: сколько процессоров будет использоваться для выполнения нашей задачи
exhaustiveness: параметр, который говорит программе, насколько тщательно производить поиск
num_modes: количество конформаций лиганда в активном центре, которые будут найдены программой
energy_range: максимальная разница по энергии между лучшей и худшей конформацией из найденных

# Docking
res = dock_obj.dock(mols,prot)

# Результаты докинга, отсортированные по афинности

import pandas as pd
res_sorted = sorted(res, key = lambda x : float(x.data['vina_affinity']))

hbs_total = []
hbs_strict = []
stack = []
phob = []
formulas = []
aff = []
rmsd = []
for i,r in enumerate(res):
    hbs_total.append(len(oddt.interactions.hbonds(prot,r)[0]))
    hbs_strict.append(oddt.interactions.hbonds(prot,r)[2].sum())
    stack.append(len(oddt.interactions.pi_stacking(prot,r)[0]))
    phob.append(len(oddt.interactions.hydrophobic_contacts(prot,r)[0]))
    formulas.append(r.formula)
    aff.append(r.data['vina_affinity'])
    rmsd.append(r.data['vina_rmsd_ub'])
    
results = pd.DataFrame({"Chemical formula": [r.formula for r in res_sorted], 
                        "Affinity": [r.data['vina_affinity'] for r in res_sorted], 
                        "RMSD": [r.data['vina_rmsd_ub'] for r in res_sorted], 
                        "H-bonds (total)": hbs_total, "H-bonds (strict)": hbs_strict, 
                        "Stacking": stack, "Hydrophobic_contacts": phob})


results

Визуализация¶

for i,r in enumerate(res):
    r.write(filename='r%s.pdb' % i, format='pdb', overwrite=True)

Покажем несколько структур:

Image(filename='d0.png')

Image(filename='d6.png')

Image(filename='d14.png')

Image(filename='d20.png')

Image(filename='d23.png')

Ссылки¶

Визуализация: PyMOLWiki, NGL Viewer

https://pymolwiki.org/index.php/Main_Page
https://github.com/arose/nglview
Докинг: ODDT, AutoDock Vina

http://vina.scripps.edu/tutorial.html
https://jcheminf.biomedcentral.com/articles/10.1186/s13321-015-0078-2
Хемоинформатика: RDKit, Open Babel

https://github.com/rdkit/rdkit
http://openbabel.org/wiki/Main_Page

	Affinity	Chemical formula	H-bonds (strict)	H-bonds (total)	Hydrophobic_contacts	RMSD	Stacking
0	-7.2	C13H17NO6	6	6	2	0.000	0
1	-7.0	C13H17NO6	8	10	2	7.095	0
2	-6.9	C13H17NO6	8	10	0	7.077	0
3	-6.7	C8H15NO6	6	7	1	0.000	0
4	-6.7	C13H17NO6	3	4	1	2.345	0
5	-6.6	C7H15N2O6	6	9	0	0.000	0
6	-6.5	C8H15NO6	8	8	0	3.971	0
7	-6.4	C13H17NO6	9	12	0	7.029	0
8	-6.4	C8H13NO8	9	9	0	0.000	0
9	-6.3	C7H13NO7	7	11	0	0.000	0
10	-6.3	C7H13NO7	11	11	0	4.643	0
11	-6.3	C7H15N2O6	7	8	0	4.760	0
12	-6.3	C8H13NO8	4	4	0	5.797	0
13	-6.3	C8H13NO8	7	8	0	2.649	0
14	-6.2	C8H15NO6	5	5	0	4.177	0
15	-6.2	C8H15NO6	7	10	0	2.723	0
16	-6.2	C8H15NO6	7	10	0	4.592	0
17	-6.2	C7H13NO7	9	10	0	3.261	0
18	-6.2	C7H15N2O6	11	12	0	5.247	0
19	-6.2	C7H15N2O6	8	9	0	5.192	0
20	-6.2	C7H15N2O6	6	7	7	4.555	0
21	-6.1	C7H13NO7	6	7	7	4.490	1
22	-6.1	C7H13NO7	7	9	1	4.308	0
23	-6.1	C7H13NO6	6	7	4	0.000	0
24	-6.1	C8H13NO8	4	4	3	7.769	1
25	-6.0	C7H13NO6	6	8	0	5.157	0
26	-6.0	C7H13NO6	9	9	0	4.223	0
27	-6.0	C7H13NO6	5	7	0	3.979	0
28	-6.0	C8H13NO8	10	11	0	3.073	0
29	-5.9	C7H13NO6	6	6	0	4.846	0