Докинг низкомолекулярных лигандов в структуру белка

In [2]:
import numpy as np
import copy

# Отображение структур
import IPython.display
import ipywidgets
from IPython.display import display,display_svg,SVG,Image

# Open Drug Discovery Toolkit
import oddt
import oddt.docking
import oddt.interactions

# Органика
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole

import pmx # Модуль для манипулирования pdb 

Подготовка белка

In [3]:
pdb=pmx.Model('HYPCU.B99990002.pdb')

for r in pdb.residues[135:]:
    print r # остатки
<Molecule: id = 136 name = LEU chain_id =    natoms = 8>
<Molecule: id = 137 name = PRO chain_id =    natoms = 7>
<Molecule: id = 138 name = ASP chain_id =    natoms = 8>
<Molecule: id = 139 name = THR chain_id =    natoms = 7>
<Molecule: id = 140 name = SER chain_id =    natoms = 6>
<Molecule: id = 141 name = ASN chain_id =    natoms = 8>
<Molecule: id = 142 name = CYS chain_id =    natoms = 7>
<Molecule: id = 143 name = NAG chain_id =    natoms = 14>
<Molecule: id = 144 name = NAG chain_id =    natoms = 14>
<Molecule: id = 145 name = NDG chain_id =    natoms = 15>
In [5]:
# создание объектов белок и лиганда
newpdb = pdb.copy()
for r in newpdb.residues[-3:]:
    newpdb.remove_residue(r)
lig = pdb.copy()
del lig.residues[:-3]
In [6]:
# ищем геометрический центр лиганда
x = []
y = []
z = []
for a in lig.atoms:
    coord = a.x 
    x.append(coord[0])
    y.append(coord[1])
    z.append(coord[2])
    
geom_center = [np.mean(x), np.mean(y), np.mean(z)]
geom_center
Out[6]:
[33.1267133163698, 34.05911620016964, 22.480422391857505]
In [7]:
newpdb.writePDB("myprot.pdb")
lig.writePDB("lig.pdb")

Подготовка белка для докинга

In [8]:
prot = oddt.toolkit.readfile('pdb','myprot.pdb').next()

prot.OBMol.AddPolarHydrogens()
prot.OBMol.AutomaticPartialCharge()


print 'is it the first mol in 1lmp is protein?',prot.protein,':) and MW of this mol is:', prot.molwt 
is it the first mol in 1lmp is protein? False :) and MW of this mol is: 16234.3631

Лиганды для докинга

In [15]:
smiles = ['O=C(NC1C(O)C(O)C(OC1O)CO)O', 'O=C(NC1C(O)C(O)C(OC1O)CO)[NH3+]',
          'O=C(NC1C(O)C(O)C(OC1O)CO)', 'O=C(NC1C(O)C(O)C(OC1O)CO)(c1ccccc1)','O=C(NC1C(O)C(O)C(OC1O)CO)C(=O)O']
mols= []
images =[]

for s in smiles:
    m = oddt.toolkit.readstring('smi', s)
    if not m.OBMol.Has3D(): 
        m.make3D(forcefield='mmff94', steps=150)
        m.removeh()
        m.OBMol.AddPolarHydrogens()

    mols.append(m)
    ###with print m.OBMol.Has3D() was found that:
    ### deep copy needed to keep 3D , write svg make mols flat
    images.append((SVG(copy.deepcopy(m).write('svg'))))
    
display_svg(*images)
OBDepict ***** O N O O O O O O H H H H H H
OBDepict ***** O N O O O O O N H H H H H H H
OBDepict ***** O N O O O O O H H H H H
OBDepict ***** O N O O O O O H H H H H
OBDepict ***** O N O O O O O O O H H H H H H

Докинг

In [17]:
#create docking object
dock_obj= oddt.docking.AutodockVina.autodock_vina(
    protein=prot,size=(20,20,20),center=geom_center,
    executable='/usr/bin/vina',autocleanup=True, num_modes=9)

print dock_obj.tmp_dir
print " ".join(dock_obj.params)
/tmp/autodock_vina_sfBDDw
--center_x 33.1267133163698 --center_y 34.05911620016964 --center_z 22.480422391857505 --size_x 20 --size_y 20 --size_z 20 --exhaustiveness 8 --num_modes 9 --energy_range 3
In [18]:
# do it
res = dock_obj.dock(mols,prot)

Результаты докинга

In [19]:
for i,r in enumerate(res):
    print "%4d%10s%8s%8s" %(i, r.formula, r.data['vina_affinity'],  r.data['vina_rmsd_ub'])
   0  C7H13NO7    -6.2   0.000
   1  C7H13NO7    -6.1   4.913
   2  C7H13NO7    -5.6   4.209
   3  C7H13NO7    -5.6   1.462
   4  C7H13NO7    -5.4   1.992
   5  C7H13NO7    -5.4   2.510
   6  C7H13NO7    -5.3   5.012
   7  C7H13NO7    -5.3   3.816
   8  C7H13NO7    -4.7   2.931
   9 C7H14N2O6    -6.4   0.000
  10 C7H14N2O6    -5.3   3.653
  11 C7H14N2O6    -5.0   4.992
  12  C7H13NO6    -6.2   0.000
  13  C7H13NO6    -5.6   4.405
  14  C7H13NO6    -5.6   4.605
  15  C7H13NO6    -5.3   3.901
  16  C7H13NO6    -5.2   2.483
  17  C7H13NO6    -3.5   2.398
  18  C7H13NO6    -3.4   3.587
  19  C7H13NO6    -3.3   3.052
  20 C13H17NO6    -4.3   0.000
  21 C13H17NO6    -4.0   6.497
  22 C13H17NO6    -3.4   6.316
  23 C13H17NO6    -3.2   2.140
  24 C13H17NO6    -2.6   2.664
  25  C8H13NO8    -5.9   0.000
  26  C8H13NO8    -5.2   2.079
  27  C8H13NO8    -4.7   2.701
  28  C8H13NO8    -3.6   3.074

Анализ докинга

In [22]:
import pandas as pd

hbtotal = []
hbstrict = []
phob = []
stack = []
res_sorted = sorted(res, key = lambda x : float(x.data['vina_affinity']))

for i,r in enumerate(res_sorted):
    int1, int2, strict = oddt.interactions.hbonds(prot,r)
    hbtotal.append(len(int1))
    stack.append(len(oddt.interactions.pi_stacking(prot,r)[0]))
    hbstrict.append(strict.sum())
    ph1, ph2 = oddt.interactions.hydrophobic_contacts(prot,r)
    phob.append(len(ph1))
sort_table = pd.DataFrame({'Formula':[r.formula for r in res_sorted],
                   'Affinity, kcal/mol':[r.data['vina_affinity'] for r in res_sorted],
                   'Total number of h-bonds':hbtotal, 'Strict number of h-bonds':hbstrict, 'Stacking': stack,'Hydrophobic':phob, 
                    'RMSD':[r.data['vina_rmsd_ub'] for r in res_sorted]})

sort_table[:]
Out[22]:
Affinity, kcal/mol Formula Hydrophobic RMSD Stacking Strict number of h-bonds Total number of h-bonds
0 -6.4 C7H14N2O6 0 0.000 0 13 13
1 -6.2 C7H13NO7 0 0.000 0 10 11
2 -6.2 C7H13NO6 0 0.000 0 9 10
3 -6.1 C7H13NO7 0 4.913 0 9 11
4 -5.9 C8H13NO8 0 0.000 0 7 10
5 -5.6 C7H13NO7 0 4.209 0 8 11
6 -5.6 C7H13NO7 0 1.462 0 9 10
7 -5.6 C7H13NO6 0 4.405 0 6 8
8 -5.6 C7H13NO6 0 4.605 0 9 10
9 -5.4 C7H13NO7 0 1.992 0 8 9
10 -5.4 C7H13NO7 0 2.510 0 7 7
11 -5.3 C7H13NO7 0 5.012 0 9 10
12 -5.3 C7H13NO7 0 3.816 0 7 10
13 -5.3 C7H14N2O6 0 3.653 0 7 8
14 -5.3 C7H13NO6 0 3.901 0 6 8
15 -5.2 C7H13NO6 0 2.483 0 7 8
16 -5.2 C8H13NO8 0 2.079 0 5 9
17 -5.0 C7H14N2O6 0 4.992 0 6 8
18 -4.7 C7H13NO7 0 2.931 0 9 11
19 -4.7 C8H13NO8 0 2.701 0 7 7
20 -4.3 C13H17NO6 12 0.000 1 10 12
21 -4.0 C13H17NO6 7 6.497 0 5 6
22 -3.6 C8H13NO8 0 3.074 0 6 6
23 -3.5 C7H13NO6 0 2.398 0 3 3
24 -3.4 C7H13NO6 0 3.587 0 3 4
25 -3.4 C13H17NO6 4 6.316 0 1 1
26 -3.3 C7H13NO6 0 3.052 0 8 10
27 -3.2 C13H17NO6 17 2.140 1 9 10
28 -2.6 C13H17NO6 12 2.664 0 10 10

Лиганды с наивысшей аффинностью - C7H14N2O6 (NAG, где метильный радикал заменен на NH3+) и C7H13NO7 (NAG, где метильный радикал замещен на гидроксил). У обоих вариантов большое количество водородных связей.

In [23]:
for i in (9, 0):
    res[i].write(filename='%s.pdb' % str(i), format='pdb')

Рис.1. C7H13NO7 с наибольшим скором, видно 8 водородных связей

Рис.2. C7H14N2O6 с наибольшим скором, видно 10 водородных связей