In [1]:
import numpy as np
import copy

# Отображение структур
import IPython.display
import ipywidgets
from IPython.display import display,display_svg,SVG,Image

# Open Drug Discovery Toolkit
import oddt
import oddt.docking
import oddt.interactions

# Органика
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole

import pmx # Модуль для манипулирования pdb 
In [3]:
best_model_pdb_path = "red2.BL00010001.pdb"
In [52]:
pdb=pmx.Model(best_model_pdb_path)

for r in pdb.residues[135:]:
    print r #посмотрим остатки
<Molecule: id = 136 name = PHE chain_id =    natoms = 11>
<Molecule: id = 137 name = ASN chain_id =    natoms = 8>
<Molecule: id = 138 name = LYS chain_id =    natoms = 9>
<Molecule: id = 139 name = PRO chain_id =    natoms = 7>
<Molecule: id = 140 name = VAL chain_id =    natoms = 7>
<Molecule: id = 141 name = SER chain_id =    natoms = 6>
<Molecule: id = 142 name = SER chain_id =    natoms = 6>
<Molecule: id = 143 name = ASN chain_id =    natoms = 8>
<Molecule: id = 144 name = SER chain_id =    natoms = 6>
<Molecule: id = 145 name = ASN chain_id =    natoms = 8>
<Molecule: id = 146 name = GLN chain_id =    natoms = 9>
<Molecule: id = 147 name = ASN chain_id =    natoms = 8>
<Molecule: id = 148 name = ASN chain_id =    natoms = 8>
<Molecule: id = 149 name = GLN chain_id =    natoms = 9>
<Molecule: id = 150 name = THR chain_id =    natoms = 7>
<Molecule: id = 151 name = GLY chain_id =    natoms = 5>
<Molecule: id = 152 name = NAG chain_id =    natoms = 14>
<Molecule: id = 153 name = NAG chain_id =    natoms = 14>
<Molecule: id = 154 name = NDG chain_id =    natoms = 15>
In [54]:
newpdb = pdb.copy()
for i in range(151, len(newpdb.residues)):
    newpdb.remove_residue(newpdb.residues[151])
In [56]:
lig = pdb.copy()
for i in range(0, 151):
    lig.remove_residue(lig.residues[0])
In [58]:
mean_center = np.mean(np.array([a.x for a in lig.atoms]), axis = 0)
median_center = np.median(np.array([a.x for a in lig.atoms]), axis = 0)
In [59]:
print mean_center
print median_center
[ 50.41276744  33.57113953  26.74576744]
[ 50.375  32.995  26.492]

Будем использовать mean center

In [60]:
out_pdb_file_name = "lys_whout_ligand.pdb"
newpdb.writePDB(out_pdb_file_name, title="Modelled Lysozyme with deleted ligand")
In [61]:
prot = oddt.toolkit.readfile('pdb',out_pdb_file_name).next()
In [62]:
prot.OBMol.AddPolarHydrogens()
prot.OBMol.AutomaticPartialCharge()
Out[62]:
True
In [63]:
print 'is it the first mol in 1lmp is protein?',prot.protein,':) and MW of this mol is:', prot.molwt 
is it the first mol in 1lmp is protein? False :) and MW of this mol is: 16776.70062
In [66]:
prot.protein = True
In [67]:
smiles = ['c1cccc(O)c1', 'c1c(O)ccc(O)c1','c1(O)cc(c2ccccc2)cc(O)c1']
mols= []
images =[]

for s in smiles:
    m = oddt.toolkit.readstring('smi', s)
    if not m.OBMol.Has3D(): 
        m.make3D(forcefield='mmff94', steps=150)
        m.removeh()
        m.OBMol.AddPolarHydrogens()

    mols.append(m)
    ###with print m.OBMol.Has3D() was found that:
    ### deep copy needed to keep 3D , write svg make mols flat
    images.append((SVG(copy.deepcopy(m).write('svg'))))
    
display_svg(*images)
***** - Open Babel Depiction O H *****
***** - Open Babel Depiction O O H H *****
***** - Open Babel Depiction O O H H *****
In [68]:
#create docking object
dock_obj= oddt.docking.AutodockVina.autodock_vina(
    protein=prot,size=(20,20,20),center=mean_center,
    executable='/usr/bin/vina',autocleanup=True, num_modes=20)

print dock_obj.tmp_dir
print " ".join(dock_obj.params) # Опишите выдачу
/tmp/autodock_vina_VOLZJx
--center_x 50.4127674419 --center_y 33.5711395349 --center_z 26.7457674419 --size_x 20 --size_y 20 --size_z 20 --cpu 1 --exhaustiveness 8 --num_modes 20 --energy_range 3
In [74]:
res = dock_obj.dock(mols, prot)
In [75]:
for i,r in enumerate(res):
    print "%4d%10s%8s%8s%8s" %(i,r.formula, r.data['vina_affinity'],  r.data['vina_rmsd_ub'], r.residues[0].name)
   0     C6H6O    -4.5   0.000     UNL
   1     C6H6O    -4.4   2.060     UNL
   2     C6H6O    -4.0   1.188     UNL
   3     C6H6O    -3.9   2.611     UNL
   4     C6H6O    -3.9   2.911     UNL
   5     C6H6O    -3.8   3.881     UNL
   6     C6H6O    -3.8   3.678     UNL
   7     C6H6O    -3.6   4.161     UNL
   8     C6H6O    -3.6   3.424     UNL
   9    C6H6O2    -4.9   0.000     UNL
  10    C6H6O2    -4.9   1.713     UNL
  11    C6H6O2    -4.9   3.656     UNL
  12    C6H6O2    -4.9   3.231     UNL
  13    C6H6O2    -4.6   1.575     UNL
  14    C6H6O2    -4.6   3.129     UNL
  15    C6H6O2    -4.5   3.141     UNL
  16    C6H6O2    -4.2   3.765     UNL
  17    C6H6O2    -4.2   3.290     UNL
  18  C12H10O2    -6.8   0.000     UNL
  19  C12H10O2    -6.7   2.534     UNL
  20  C12H10O2    -6.3   3.180     UNL
  21  C12H10O2    -6.3   2.480     UNL
  22  C12H10O2    -6.2   6.749     UNL
  23  C12H10O2    -6.1   3.908     UNL
  24  C12H10O2    -6.1   4.515     UNL
  25  C12H10O2    -6.0   3.343     UNL
  26  C12H10O2    -6.0   4.421     UNL
In [84]:
?oddt.interactions.hydrophobic_contacts
In [177]:
?oddt.interactions.hbonds
In [179]:
for i,r in enumerate(res):
    mol1, mol2, strict = oddt.interactions.hbonds(prot,r)
    mol1, mol2, strict_parallel, strict_perpendicular= oddt.interactions.pi_stacking(prot,r)
    mol1, mol2 = oddt.interactions.hydrophobic_contacts(prot,r)

Создадим лиганды где метильный радикал группы СH3C(=O)NH будет заменён на :

1.OH

2.NH3+

3.H

4.Ph

5.COO-

In [161]:
nag_mod_smiles = ["CC(=O)N[C@@H]1[C@H]([C@@H]([C@H](O[C@H]1O)CO)O)O", \
                  "C(O)(=O)N[C@@H]1[C@H]([C@@H]([C@H](O[C@H]1O)CO)O)O",
                 "C([NH3+])(=O)N[C@@H]1[C@H]([C@@H]([C@H](O[C@H]1O)CO)O)O",
                 "C([H])(=O)N[C@@H]1[C@H]([C@@H]([C@H](O[C@H]1O)CO)O)O",
                 "C(C(=O)[O-])(=O)N[C@@H]1[C@H]([C@@H]([C@H](O[C@H]1O)CO)O)O",
                 "C(c1ccccc1)(=O)N[C@@H]1[C@H]([C@@H]([C@H](O[C@H]1O)CO)O)O"]


names = ["NAG", "OH_mod", "NH3_mod", "H_mod", "COO_mod", "Ph_mod"]
In [235]:
nag_mols= []
nag_images =[]

for s, name in zip(nag_mod_smiles, names):
    m = oddt.toolkit.readstring('smi', s)
    if not m.OBMol.Has3D(): 
        m.make3D(forcefield='mmff94', steps=150)
        m.removeh()
        m.OBMol.AddPolarHydrogens()
    m.name = name
    nag_mols.append(m)
    ###with print m.OBMol.Has3D() was found that:
    ### deep copy needed to keep 3D , write svg make mols flat
    nag_images.append((SVG(copy.deepcopy(m).write('svg'))))
    
display_svg(*nag_images)
***** - Open Babel Depiction O H H H H H O O O CH 3 O N O *****
***** - Open Babel Depiction O H H H H H H O O O O N O O *****
***** - Open Babel Depiction H H H H H H H H O O O O O N O N *****
***** - Open Babel Depiction O H H H H H O O O O N O *****
***** - Open Babel Depiction H H H H H O O O O O N O OH O *****
***** - Open Babel Depiction H H H H H O O O O O N O *****
In [249]:
dock_obj= oddt.docking.AutodockVina.autodock_vina(
    protein=prot,size=(20,20,20),center=mean_center,
    executable='/usr/bin/vina',autocleanup=False, num_modes=20)

print dock_obj.tmp_dir
print " ".join(dock_obj.params) # Опишите выдачу
/tmp/autodock_vina_mm8sY8
--center_x 50.4127674419 --center_y 33.5711395349 --center_z 26.7457674419 --size_x 20 --size_y 20 --size_z 20 --cpu 1 --exhaustiveness 8 --num_modes 20 --energy_range 3
In [250]:
nag_res = dock_obj.dock(nag_mols, prot)
In [255]:
nag_res_sorted = sorted(nag_res, key = lambda x : float(x.data['vina_affinity']))
In [256]:
print "Place\t\tFormula\tAffinity\tRMSD\tName\tHb_tot\tHb_strict\tPi_tot\tPi_par\tPi_str_perp\tHydphob"
for i,r in enumerate(nag_res_sorted):
    mol1, mol2, strict = oddt.interactions.hbonds(prot,r)
    hbonds_num_total = len(mol1)
    hbonds_num_strict = strict.sum()
    mol1, mol2, strict_parallel, strict_perpendicular= oddt.interactions.pi_stacking(prot,r)
    pi_stack_num_total = len(mol1)
    pi_stack_num_strict_par = strict_parallel.sum()
    pi_stack_num_strict_perp = strict_perpendicular.sum()
    mol1, mol2 = oddt.interactions.hydrophobic_contacts(prot,r)
    hydrophob_num = len(mol1)
    print "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}".format(i,r.formula, r.data['vina_affinity'],\
                                                  r.data['vina_rmsd_ub'], r.residues[0].name,
                                                 hbonds_num_total, hbonds_num_strict,\
                                                  pi_stack_num_total, pi_stack_num_strict_par, pi_stack_num_strict_perp,\
                                                  hydrophob_num)
Place		Formula	Affinity	RMSD	Name	Hb_tot	Hb_strict	Pi_tot	Pi_par	Pi_str_perp	Hydphob
0	C7H16N2O6	-7.0	0.000	UNL	14	10	0	0	0	0
1	C7H13NO7	-6.9	0.000	UNL	17	15	0	0	0	0
2	C13H17NO6	-6.8	0.000	UNL	9	8	0	0	0	2
3	C8H13NO8	-6.6	0.000	UNL	12	9	0	0	0	0
4	C7H13NO6	-6.5	0.000	UNL	13	13	0	0	0	0
5	C13H17NO6	-6.5	6.038	UNL	8	8	0	0	0	2
6	C7H13NO7	-6.4	2.656	UNL	13	12	0	0	0	0
7	C8H13NO8	-6.4	5.845	UNL	9	9	0	0	0	0
8	C8H13NO8	-6.3	2.716	UNL	12	10	0	0	0	0
9	C8H13NO8	-6.3	5.516	UNL	9	7	0	0	0	0
10	C8H15NO6	-6.2	0.000	UNL	10	6	0	0	0	1
11	C7H16N2O6	-6.2	2.695	UNL	7	7	0	0	0	0
12	C13H17NO6	-6.2	4.481	UNL	8	7	0	0	0	1
13	C13H17NO6	-6.2	3.940	UNL	9	8	0	0	0	2
14	C7H13NO7	-6.1	4.021	UNL	10	9	0	0	0	0
15	C7H13NO7	-6.1	5.418	UNL	12	11	0	0	0	0
16	C8H13NO8	-6.1	4.051	UNL	8	7	0	0	0	0
17	C13H17NO6	-6.1	7.353	UNL	8	5	0	0	0	4
18	C13H17NO6	-6.1	2.341	UNL	5	3	0	0	0	7
19	C8H15NO6	-6.0	3.741	UNL	9	8	0	0	0	1
20	C7H16N2O6	-6.0	2.369	UNL	8	7	0	0	0	0
21	C7H16N2O6	-6.0	5.335	UNL	10	10	0	0	0	0
22	C7H13NO6	-6.0	5.307	UNL	8	7	0	0	0	0
23	C8H13NO8	-6.0	3.960	UNL	12	10	0	0	0	0
24	C13H17NO6	-6.0	2.163	UNL	5	4	0	0	0	3
25	C7H16N2O6	-5.9	4.027	UNL	7	7	0	0	0	0
26	C13H17NO6	-5.9	3.370	UNL	11	8	0	0	0	2
27	C8H15NO6	-5.8	5.515	UNL	7	6	0	0	0	1
28	C7H13NO7	-5.8	4.918	UNL	8	8	0	0	0	0
29	C8H13NO8	-5.8	2.105	UNL	14	13	0	0	0	0
30	C8H13NO8	-5.8	5.214	UNL	8	7	0	0	0	0
31	C13H17NO6	-5.8	3.270	UNL	5	4	0	0	0	4
32	C7H13NO6	-5.7	3.672	UNL	8	6	0	0	0	0
33	C7H13NO6	-5.7	2.702	UNL	7	7	0	0	0	0
34	C8H13NO8	-5.7	4.745	UNL	11	8	0	0	0	0
35	C7H13NO7	-5.6	3.158	UNL	8	7	0	0	0	0
36	C8H15NO6	-5.5	3.124	UNL	9	7	0	0	0	1
37	C7H13NO7	-5.5	4.599	UNL	7	7	0	0	0	0
38	C7H13NO7	-5.5	5.118	UNL	11	11	0	0	0	0
39	C7H13NO6	-5.5	4.042	UNL	9	7	0	0	0	0
40	C8H15NO6	-5.4	6.104	UNL	8	6	0	0	0	2
41	C8H15NO6	-5.4	5.050	UNL	6	6	0	0	0	1
42	C7H13NO7	-5.4	5.031	UNL	9	8	0	0	0	0
43	C7H16N2O6	-5.4	5.468	UNL	9	9	0	0	0	0
44	C7H16N2O6	-5.4	6.175	UNL	7	7	0	0	0	0
45	C7H13NO6	-5.4	4.669	UNL	6	6	0	0	0	0
46	C7H13NO6	-5.3	5.234	UNL	10	10	0	0	0	0
47	C8H15NO6	-5.2	1.796	UNL	4	4	0	0	0	1
48	C8H15NO6	-5.2	3.485	UNL	5	3	0	0	0	0
49	C7H13NO6	-5.2	4.401	UNL	9	8	0	0	0	0
50	C7H13NO6	-5.2	3.936	UNL	7	6	0	0	0	0
51	C8H15NO6	-5.1	3.388	UNL	9	8	0	0	0	2
52	C7H16N2O6	-5.1	4.140	UNL	6	5	0	0	0	0
53	C7H16N2O6	-5.0	5.618	UNL	11	11	0	0	0	0

Кажется странным целых 17 водородных связей. Посмотрим на их характер

In [257]:
mol1, mol2, strict = oddt.interactions.hbonds(prot,nag_res_sorted[1])
In [258]:
for x,y in zip(mol1, mol2):
    print "{}:{}-->{}:{}".format(x[9], x[5],y[9] or "LIG", y[5])
GLN:O.2-->LIG:O.3
ASP:O.3-->LIG:O.3
ASP:O.3-->LIG:O.3
ILE:O.2-->LIG:N.am
ILE:O.2-->LIG:O.3
ARG:O.2-->LIG:O.3
ARG:O.2-->LIG:O.3
GLN:N.am-->LIG:O.2
ARG:N.pl3-->LIG:O.3
ARG:N.pl3-->LIG:O.3
GLN:N.am-->LIG:O.3
ARG:N.pl3-->LIG:O.3
TRP:N.ar-->LIG:O.3
ASP:O.3-->LIG:O.3
ASP:O.3-->LIG:O.3
TRP:N.ar-->LIG:O.3
GLN:N.am-->LIG:O.3

Видим, что часть из них "дублируется", т.е у них одинаковый акцептор/донор

In [270]:
for ind, res in enumerate(nag_res_sorted):
    res.write(filename='r{}.pdb'.format(ind), format='pdb')

Посмотрим на этот лиганд в структуре белка

In [323]:
from IPython.display import Image
Image(filename='1.png')
Out[323]:

В принципе, расположен он удачно

Отсортируем более по-умному За каждую strict водородную связб будем давать 7.5 очков, strict pi-связь - 2, гидрофобную - 5, за не strict - половину соответствующей энергии.

In [289]:
def score_func(prot, r):

    mol1, mol2, strict = oddt.interactions.hbonds(prot,r)
    hbonds_num_total = len(mol1)
    hbonds_num_strict = strict.sum()
    mol1, mol2, strict_parallel, strict_perpendicular= oddt.interactions.pi_stacking(prot,r)
    pi_stack_num_total = len(mol1)
    pi_stack_num_strict_par = strict_parallel.sum()
    pi_stack_num_strict_perp = strict_perpendicular.sum()
    mol1, mol2 = oddt.interactions.hydrophobic_contacts(prot,r)
    hydrophob_num = len(mol1)
    
    score = hbonds_num_strict * 7.5 + (hbonds_num_total - hbonds_num_strict) * 0.5 +\
        pi_stack_num_strict_par * 2 + pi_stack_num_strict_perp * 2 +\
        (pi_stack_num_total - pi_stack_num_strict_par - pi_stack_num_strict_perp) * 1+\
        hydrophob_num * 5
    return score
In [292]:
nag_res_cl_sorted = sorted(nag_res, key = lambda x : -score_func(prot, x))
In [322]:
print "Place Formula    Score Hb_tot Hb_str Pi_tot Pi_par Pi_perp Hydphob"
for i,r in enumerate(nag_res_cl_sorted):
    mol1, mol2, strict = oddt.interactions.hbonds(prot,r)
    hbonds_num_total = len(mol1)
    hbonds_num_strict = strict.sum()
    mol1, mol2, strict_parallel, strict_perpendicular= oddt.interactions.pi_stacking(prot,r)
    pi_stack_num_total = len(mol1)
    pi_stack_num_strict_par = strict_parallel.sum()
    pi_stack_num_strict_perp = strict_perpendicular.sum()
    mol1, mol2 = oddt.interactions.hydrophobic_contacts(prot,r)
    hydrophob_num = len(mol1)
    print "{:5} {:10} {:5} {:5}{:5}{:7}{:7}{:7}{:7}".format(i,r.formula, \
                                                  score_func(prot, r),
                                                 hbonds_num_total, hbonds_num_strict,\
                                                  pi_stack_num_total, pi_stack_num_strict_par, pi_stack_num_strict_perp,\
                                                  hydrophob_num)
Place Formula    Score Hb_tot Hb_str Pi_tot Pi_par Pi_perp Hydphob
    0 C7H13NO7   113.5    17   15      0      0      0      0
    1 C8H13NO8    98.0    14   13      0      0      0      0
    2 C7H13NO6    97.5    13   13      0      0      0      0
    3 C7H13NO7    90.5    13   12      0      0      0      0
    4 C7H13NO7    83.0    12   11      0      0      0      0
    5 C7H13NO7    82.5    11   11      0      0      0      0
    6 C7H16N2O6   82.5    11   11      0      0      0      0
    7 C7H16N2O6   77.0    14   10      0      0      0      0
    8 C8H13NO8    76.0    12   10      0      0      0      0
    9 C8H13NO8    76.0    12   10      0      0      0      0
   10 C7H16N2O6   75.0    10   10      0      0      0      0
   11 C7H13NO6    75.0    10   10      0      0      0      0
   12 C13H17NO6   71.5    11    8      0      0      0      2
   13 C8H15NO6    70.5     9    8      0      0      0      2
   14 C13H17NO6   70.5     9    8      0      0      0      2
   15 C13H17NO6   70.5     9    8      0      0      0      2
   16 C13H17NO6   70.0     8    8      0      0      0      2
   17 C8H13NO8    69.0    12    9      0      0      0      0
   18 C7H13NO7    68.0    10    9      0      0      0      0
   19 C7H16N2O6   67.5     9    9      0      0      0      0
   20 C8H13NO8    67.5     9    9      0      0      0      0
   21 C8H15NO6    65.5     9    8      0      0      0      1
   22 C8H13NO8    61.5    11    8      0      0      0      0
   23 C7H13NO7    60.5     9    8      0      0      0      0
   24 C7H13NO6    60.5     9    8      0      0      0      0
   25 C7H13NO7    60.0     8    8      0      0      0      0
   26 C13H17NO6   59.0     8    5      0      0      0      4
   27 C8H15NO6    58.5     9    7      0      0      0      1
   28 C13H17NO6   58.5     5    3      0      0      0      7
   29 C13H17NO6   58.0     8    7      0      0      0      1
   30 C8H15NO6    56.0     8    6      0      0      0      2
   31 C7H13NO6    53.5     9    7      0      0      0      0
   32 C8H13NO8    53.5     9    7      0      0      0      0
   33 C7H13NO7    53.0     8    7      0      0      0      0
   34 C7H16N2O6   53.0     8    7      0      0      0      0
   35 C7H13NO6    53.0     8    7      0      0      0      0
   36 C8H13NO8    53.0     8    7      0      0      0      0
   37 C8H13NO8    53.0     8    7      0      0      0      0
   38 C7H13NO7    52.5     7    7      0      0      0      0
   39 C7H16N2O6   52.5     7    7      0      0      0      0
   40 C7H16N2O6   52.5     7    7      0      0      0      0
   41 C7H16N2O6   52.5     7    7      0      0      0      0
   42 C7H13NO6    52.5     7    7      0      0      0      0
   43 C8H15NO6    52.0    10    6      0      0      0      1
   44 C8H15NO6    50.5     7    6      0      0      0      1
   45 C13H17NO6   50.5     5    4      0      0      0      4
   46 C8H15NO6    50.0     6    6      0      0      0      1
   47 C7H13NO6    46.0     8    6      0      0      0      0
   48 C7H13NO6    45.5     7    6      0      0      0      0
   49 C13H17NO6   45.5     5    4      0      0      0      3
   50 C7H13NO6    45.0     6    6      0      0      0      0
   51 C7H16N2O6   38.0     6    5      0      0      0      0
   52 C8H15NO6    35.0     4    4      0      0      0      1
   53 C8H15NO6    23.5     5    3      0      0      0      0

Видим, что ситуация не особо изменилась в данном случае (для топ-5, к примеру)