В этом задании предлагалось познакомиться с докингом низкомолекулярных соединений с помощью пакетов Autodock Vina и oddt в структуру лизоцима.

In [2]:
import numpy as np
import copy

# Отображение структур
import IPython.display
import ipywidgets
from IPython.display import display,display_svg,SVG,Image

# Open Drug Discovery Toolkit
import oddt
import oddt.docking
import oddt.interactions

# Органика
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole

import pmx # Модуль для манипулирования pdb 

Подготовка белка

In [3]:
pdb=pmx.Model('LAMBD_L.B99990001.pdb')

for r in pdb.residues[135:]:
    print r #посмотрим остатки
<Molecule: id = 136 name = GLU chain_id =    natoms = 9>
<Molecule: id = 137 name = HIS chain_id =    natoms = 10>
<Molecule: id = 138 name = LYS chain_id =    natoms = 9>
<Molecule: id = 139 name = ALA chain_id =    natoms = 5>
<Molecule: id = 140 name = ASP chain_id =    natoms = 8>
<Molecule: id = 141 name = SER chain_id =    natoms = 6>
<Molecule: id = 142 name = LEU chain_id =    natoms = 8>
<Molecule: id = 143 name = ILE chain_id =    natoms = 8>
<Molecule: id = 144 name = ALA chain_id =    natoms = 5>
<Molecule: id = 145 name = LYS chain_id =    natoms = 9>
<Molecule: id = 146 name = PHE chain_id =    natoms = 11>
<Molecule: id = 147 name = LYS chain_id =    natoms = 9>
<Molecule: id = 148 name = GLU chain_id =    natoms = 9>
<Molecule: id = 149 name = ALA chain_id =    natoms = 5>
<Molecule: id = 150 name = GLY chain_id =    natoms = 4>
<Molecule: id = 151 name = GLY chain_id =    natoms = 4>
<Molecule: id = 152 name = THR chain_id =    natoms = 7>
<Molecule: id = 153 name = VAL chain_id =    natoms = 7>
<Molecule: id = 154 name = ARG chain_id =    natoms = 11>
<Molecule: id = 155 name = GLU chain_id =    natoms = 9>
<Molecule: id = 156 name = ILE chain_id =    natoms = 8>
<Molecule: id = 157 name = ASP chain_id =    natoms = 8>
<Molecule: id = 158 name = VAL chain_id =    natoms = 8>
<Molecule: id = 159 name = NAG chain_id =    natoms = 14>
<Molecule: id = 160 name = NAG chain_id =    natoms = 14>
<Molecule: id = 161 name = NDG chain_id =    natoms = 15>
In [5]:
# создание объектов белок и лиганда
newpdb = pdb.copy()
lig = pdb.copy()
del newpdb.residues[158:]
del lig.residues[:158]

Находим геометрический центр лиганда.

In [6]:
x_list = []
y_list = []
z_list = []
for a in lig.atoms:
    x_list.append(a.x[0])
    y_list.append(a.x[1])
    z_list.append(a.x[2])

x_ave = reduce(lambda k, l: k+l, x_list)/len(x_list)
y_ave = reduce(lambda k, l: k+l, y_list)/len(y_list)
z_ave = reduce(lambda k, l: k+l, z_list)/len(z_list)
In [7]:
newpdb.writePDB("LAMBD.pdb")

Подготовка белка к докингу.

In [8]:
prot = oddt.toolkit.readfile('pdb','LAMBD.pdb').next()

prot.OBMol.AddPolarHydrogens()
prot.OBMol.AutomaticPartialCharge()


print 'is it the first mol in 1lmp is protein?',prot.protein,':) and MW of this mol is:', prot.molwt 
is it the first mol in 1lmp is protein? False :) and MW of this mol is: 18452.56116

Загружаем лиганды для докинга.

In [9]:
smiles = ['c1cccc(O)c1', 'c1c(O)ccc(O)c1','c1(O)cc(c2ccccc2)cc(O)c1']
mols= []
images =[]

for s in smiles:
    m = oddt.toolkit.readstring('smi', s)
    if not m.OBMol.Has3D(): 
        m.make3D(forcefield='mmff94', steps=150)
        m.removeh()
        m.OBMol.AddPolarHydrogens()

    mols.append(m)
    ###with print m.OBMol.Has3D() was found that:
    ### deep copy needed to keep 3D , write svg make mols flat
    images.append((SVG(copy.deepcopy(m).write('svg'))))
    
display_svg(*images)
***** - Open Babel Depiction O H *****
***** - Open Babel Depiction O O H H *****
***** - Open Babel Depiction O O H H *****

Делаем докинг.

In [10]:
#create docking object
dock_obj= oddt.docking.AutodockVina.autodock_vina(
    protein=prot,size=(20,20,20),center=[x_ave,y_ave,z_ave],
    executable='/usr/bin/vina',autocleanup=True, num_modes=20)

print dock_obj.tmp_dir
print " ".join(dock_obj.params) # Опишите выдачу
/tmp/autodock_vina_Bw7cnO
--center_x 40.4721946154 --center_y 34.33903 --center_z 21.2514769231 --size_x 20 --size_y 20 --size_z 20 --cpu 1 --exhaustiveness 8 --num_modes 20 --energy_range 3
In [11]:
# do it
res = dock_obj.dock(mols,prot)

Смотрим результаты докинга.

In [12]:
for i,r in enumerate(res):
    print "%4d%10s%8s%8s%8s" %(i,r.formula, r.data['vina_affinity'],  r.data['vina_rmsd_ub'], r.residues[0].name)
   0     C6H6O    -3.6   0.000     UNL
   1     C6H6O    -2.9  25.494     UNL
   2     C6H6O    -2.9  25.553     UNL
   3     C6H6O    -2.9  19.561     UNL
   4     C6H6O    -2.8  24.801     UNL
   5     C6H6O    -2.4  19.915     UNL
   6     C6H6O    -2.4  19.606     UNL
   7     C6H6O    -2.3  25.132     UNL
   8     C6H6O    -2.0  19.560     UNL
   9    C6H6O2    -4.4   0.000     UNL
  10    C6H6O2    -3.4   3.656     UNL
  11    C6H6O2    -3.1   2.308     UNL
  12    C6H6O2    -3.0   3.355     UNL
  13    C6H6O2    -2.6  24.939     UNL
  14    C6H6O2    -2.6  24.733     UNL
  15    C6H6O2    -2.5  24.719     UNL
  16    C6H6O2    -2.4  19.294     UNL
  17    C6H6O2    -2.4  13.359     UNL
  18  C12H10O2     1.9   0.000     UNL
  19  C12H10O2     2.8  19.474     UNL
  20  C12H10O2     3.0  19.585     UNL

Анализ докинга.

In [13]:
for i,r in enumerate(res):
    hbs = oddt.interactions.hbonds(prot,r)
    stack= oddt.interactions.pi_stacking(prot,r)
    phob = oddt.interactions.hydrophobic_contacts(prot,r)
    print "%8s%8s%8s" % (len(hbs[1]), len(stack[1]), len(phob[1]))
       0       1      11
       2       0       6
       2       0       6
       2       1       4
       4       1       3
       3       1       7
       2       1       4
       4       0       2
       0       1       4
       5       1      10
       5       1      10
       2       1      10
       2       1      10
       6       0       4
       6       0       4
       7       0       4
       1       1       4
       2       0       5
       5       1      21
       5       0      15
       5       0      15
/home/preps/golovin/miniconda2/lib/python2.7/site-packages/oddt/interactions.py:103: RuntimeWarning: invalid value encountered in greater
  strict = (((angle1 > (base_angle / a_neighbors_num - tolerance)) |
/home/preps/golovin/miniconda2/lib/python2.7/site-packages/oddt/interactions.py:105: RuntimeWarning: invalid value encountered in greater
  ((angle2 > (base_angle / d_neighbors_num - tolerance)) | np.isnan(angle2))).all(axis=-1)

Визуализация.

In [14]:
for i,r in enumerate(res):
    r.write(filename='r%s.pdb' % i, format='pdb')

Еще раз результаты докинга, но отсортированные по 'vina_affinity'.

In [15]:
import collections
doc_dic = {}
for i,r in enumerate(res):
    doc_dic[float(r.data['vina_affinity'])] = "%4d%10s%8s%8s%8s" %(i,r.formula, r.data['vina_affinity'],  r.data['vina_rmsd_ub'], r.residues[0].name)
dd = collections.OrderedDict(sorted(doc_dic.items()))
for key in dd:
    print dd[key]
   9    C6H6O2    -4.4   0.000     UNL
   0     C6H6O    -3.6   0.000     UNL
  10    C6H6O2    -3.4   3.656     UNL
  11    C6H6O2    -3.1   2.308     UNL
  12    C6H6O2    -3.0   3.355     UNL
   3     C6H6O    -2.9  19.561     UNL
   4     C6H6O    -2.8  24.801     UNL
  14    C6H6O2    -2.6  24.733     UNL
  15    C6H6O2    -2.5  24.719     UNL
  17    C6H6O2    -2.4  13.359     UNL
   7     C6H6O    -2.3  25.132     UNL
   8     C6H6O    -2.0  19.560     UNL
  18  C12H10O2     1.9   0.000     UNL
  19  C12H10O2     2.8  19.474     UNL
  20  C12H10O2     3.0  19.585     UNL
In [ ]: