Цель данного занятия - ознакомиться с возможностями докинга низкомолекулярного лиганда в структуру белка. Белок - предсказанный в прошлом практикуме с помощью гомологичного моделирования. Лиганд - измененный NAG, где метильный радикал СH3C(=O)NH группы заменен на:

  • OH
  • NH3+
  • H
  • Ph
  • COO- </ul>

In [1]:
import numpy as np
import copy

# Отображение структур
import IPython.display
import ipywidgets
from IPython.display import display,display_svg,SVG,Image

# Open Drug Discovery Toolkit
import oddt
import oddt.docking
import oddt.interactions

# Органика
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole

import pmx # Модуль для манипулирования pdb 

Подготовим белок. Посмотрим на структуру предсказанного белка лизоцима тутового шелкопряда и выпишем его остатки

In [5]:
pdb=pmx.Model('LYZ_lig.B99990001.pdb')

for r in pdb.residues[:137]:
    print r #посмотрим остатки
<Molecule: id = 1 name = MET chain_id =    natoms = 8>
<Molecule: id = 2 name = GLN chain_id =    natoms = 9>
<Molecule: id = 3 name = LYS chain_id =    natoms = 9>
<Molecule: id = 4 name = LEU chain_id =    natoms = 8>
<Molecule: id = 5 name = ILE chain_id =    natoms = 8>
<Molecule: id = 6 name = ILE chain_id =    natoms = 8>
<Molecule: id = 7 name = PHE chain_id =    natoms = 11>
<Molecule: id = 8 name = ALA chain_id =    natoms = 5>
<Molecule: id = 9 name = LEU chain_id =    natoms = 8>
<Molecule: id = 10 name = VAL chain_id =    natoms = 7>
<Molecule: id = 11 name = VAL chain_id =    natoms = 7>
<Molecule: id = 12 name = LEU chain_id =    natoms = 8>
<Molecule: id = 13 name = CYS chain_id =    natoms = 6>
<Molecule: id = 14 name = VAL chain_id =    natoms = 7>
<Molecule: id = 15 name = GLY chain_id =    natoms = 4>
<Molecule: id = 16 name = SER chain_id =    natoms = 6>
<Molecule: id = 17 name = GLU chain_id =    natoms = 9>
<Molecule: id = 18 name = ALA chain_id =    natoms = 5>
<Molecule: id = 19 name = LYS chain_id =    natoms = 9>
<Molecule: id = 20 name = THR chain_id =    natoms = 7>
<Molecule: id = 21 name = PHE chain_id =    natoms = 11>
<Molecule: id = 22 name = THR chain_id =    natoms = 7>
<Molecule: id = 23 name = ARG chain_id =    natoms = 11>
<Molecule: id = 24 name = CYS chain_id =    natoms = 6>
<Molecule: id = 25 name = GLY chain_id =    natoms = 4>
<Molecule: id = 26 name = LEU chain_id =    natoms = 8>
<Molecule: id = 27 name = VAL chain_id =    natoms = 7>
<Molecule: id = 28 name = HIS chain_id =    natoms = 10>
<Molecule: id = 29 name = GLU chain_id =    natoms = 9>
<Molecule: id = 30 name = LEU chain_id =    natoms = 8>
<Molecule: id = 31 name = ARG chain_id =    natoms = 11>
<Molecule: id = 32 name = LYS chain_id =    natoms = 9>
<Molecule: id = 33 name = HIS chain_id =    natoms = 10>
<Molecule: id = 34 name = GLY chain_id =    natoms = 4>
<Molecule: id = 35 name = PHE chain_id =    natoms = 11>
<Molecule: id = 36 name = GLU chain_id =    natoms = 9>
<Molecule: id = 37 name = GLU chain_id =    natoms = 9>
<Molecule: id = 38 name = ASN chain_id =    natoms = 8>
<Molecule: id = 39 name = LEU chain_id =    natoms = 8>
<Molecule: id = 40 name = MET chain_id =    natoms = 8>
<Molecule: id = 41 name = ARG chain_id =    natoms = 11>
<Molecule: id = 42 name = ASN chain_id =    natoms = 8>
<Molecule: id = 43 name = TRP chain_id =    natoms = 14>
<Molecule: id = 44 name = VAL chain_id =    natoms = 7>
<Molecule: id = 45 name = CYS chain_id =    natoms = 6>
<Molecule: id = 46 name = LEU chain_id =    natoms = 8>
<Molecule: id = 47 name = VAL chain_id =    natoms = 7>
<Molecule: id = 48 name = GLU chain_id =    natoms = 9>
<Molecule: id = 49 name = HIS chain_id =    natoms = 10>
<Molecule: id = 50 name = GLU chain_id =    natoms = 9>
<Molecule: id = 51 name = SER chain_id =    natoms = 6>
<Molecule: id = 52 name = SER chain_id =    natoms = 6>
<Molecule: id = 53 name = ARG chain_id =    natoms = 11>
<Molecule: id = 54 name = ASP chain_id =    natoms = 8>
<Molecule: id = 55 name = THR chain_id =    natoms = 7>
<Molecule: id = 56 name = SER chain_id =    natoms = 6>
<Molecule: id = 57 name = LYS chain_id =    natoms = 9>
<Molecule: id = 58 name = THR chain_id =    natoms = 7>
<Molecule: id = 59 name = ASN chain_id =    natoms = 8>
<Molecule: id = 60 name = THR chain_id =    natoms = 7>
<Molecule: id = 61 name = ASN chain_id =    natoms = 8>
<Molecule: id = 62 name = ARG chain_id =    natoms = 11>
<Molecule: id = 63 name = ASN chain_id =    natoms = 8>
<Molecule: id = 64 name = GLY chain_id =    natoms = 4>
<Molecule: id = 65 name = SER chain_id =    natoms = 6>
<Molecule: id = 66 name = LYS chain_id =    natoms = 9>
<Molecule: id = 67 name = ASP chain_id =    natoms = 8>
<Molecule: id = 68 name = TYR chain_id =    natoms = 12>
<Molecule: id = 69 name = GLY chain_id =    natoms = 4>
<Molecule: id = 70 name = LEU chain_id =    natoms = 8>
<Molecule: id = 71 name = PHE chain_id =    natoms = 11>
<Molecule: id = 72 name = GLN chain_id =    natoms = 9>
<Molecule: id = 73 name = ILE chain_id =    natoms = 8>
<Molecule: id = 74 name = ASN chain_id =    natoms = 8>
<Molecule: id = 75 name = ASP chain_id =    natoms = 8>
<Molecule: id = 76 name = ARG chain_id =    natoms = 11>
<Molecule: id = 77 name = TYR chain_id =    natoms = 12>
<Molecule: id = 78 name = TRP chain_id =    natoms = 14>
<Molecule: id = 79 name = CYS chain_id =    natoms = 6>
<Molecule: id = 80 name = SER chain_id =    natoms = 6>
<Molecule: id = 81 name = LYS chain_id =    natoms = 9>
<Molecule: id = 82 name = GLY chain_id =    natoms = 4>
<Molecule: id = 83 name = ALA chain_id =    natoms = 5>
<Molecule: id = 84 name = SER chain_id =    natoms = 6>
<Molecule: id = 85 name = PRO chain_id =    natoms = 7>
<Molecule: id = 86 name = GLY chain_id =    natoms = 4>
<Molecule: id = 87 name = LYS chain_id =    natoms = 9>
<Molecule: id = 88 name = ASP chain_id =    natoms = 8>
<Molecule: id = 89 name = CYS chain_id =    natoms = 6>
<Molecule: id = 90 name = ASN chain_id =    natoms = 8>
<Molecule: id = 91 name = VAL chain_id =    natoms = 7>
<Molecule: id = 92 name = LYS chain_id =    natoms = 9>
<Molecule: id = 93 name = CYS chain_id =    natoms = 6>
<Molecule: id = 94 name = SER chain_id =    natoms = 6>
<Molecule: id = 95 name = ASP chain_id =    natoms = 8>
<Molecule: id = 96 name = LEU chain_id =    natoms = 8>
<Molecule: id = 97 name = LEU chain_id =    natoms = 8>
<Molecule: id = 98 name = THR chain_id =    natoms = 7>
<Molecule: id = 99 name = ASP chain_id =    natoms = 8>
<Molecule: id = 100 name = ASP chain_id =    natoms = 8>
<Molecule: id = 101 name = ILE chain_id =    natoms = 8>
<Molecule: id = 102 name = THR chain_id =    natoms = 7>
<Molecule: id = 103 name = LYS chain_id =    natoms = 9>
<Molecule: id = 104 name = ALA chain_id =    natoms = 5>
<Molecule: id = 105 name = ALA chain_id =    natoms = 5>
<Molecule: id = 106 name = LYS chain_id =    natoms = 9>
<Molecule: id = 107 name = CYS chain_id =    natoms = 6>
<Molecule: id = 108 name = ALA chain_id =    natoms = 5>
<Molecule: id = 109 name = LYS chain_id =    natoms = 9>
<Molecule: id = 110 name = LYS chain_id =    natoms = 9>
<Molecule: id = 111 name = ILE chain_id =    natoms = 8>
<Molecule: id = 112 name = TYR chain_id =    natoms = 12>
<Molecule: id = 113 name = LYS chain_id =    natoms = 9>
<Molecule: id = 114 name = ARG chain_id =    natoms = 11>
<Molecule: id = 115 name = HIS chain_id =    natoms = 10>
<Molecule: id = 116 name = ARG chain_id =    natoms = 11>
<Molecule: id = 117 name = PHE chain_id =    natoms = 11>
<Molecule: id = 118 name = ASP chain_id =    natoms = 8>
<Molecule: id = 119 name = ALA chain_id =    natoms = 5>
<Molecule: id = 120 name = TRP chain_id =    natoms = 14>
<Molecule: id = 121 name = TYR chain_id =    natoms = 12>
<Molecule: id = 122 name = GLY chain_id =    natoms = 4>
<Molecule: id = 123 name = TRP chain_id =    natoms = 14>
<Molecule: id = 124 name = LYS chain_id =    natoms = 9>
<Molecule: id = 125 name = ASN chain_id =    natoms = 8>
<Molecule: id = 126 name = HIS chain_id =    natoms = 10>
<Molecule: id = 127 name = CYS chain_id =    natoms = 6>
<Molecule: id = 128 name = GLN chain_id =    natoms = 9>
<Molecule: id = 129 name = GLY chain_id =    natoms = 4>
<Molecule: id = 130 name = SER chain_id =    natoms = 6>
<Molecule: id = 131 name = LEU chain_id =    natoms = 8>
<Molecule: id = 132 name = PRO chain_id =    natoms = 7>
<Molecule: id = 133 name = ASP chain_id =    natoms = 8>
<Molecule: id = 134 name = ILE chain_id =    natoms = 8>
<Molecule: id = 135 name = SER chain_id =    natoms = 6>
<Molecule: id = 136 name = SER chain_id =    natoms = 6>
<Molecule: id = 137 name = CYS chain_id =    natoms = 7>
In [6]:
# создание объектов белок и лиганда
newpdb = pdb.copy()
for r in newpdb.residues[-3:]:
    newpdb.remove_residue(r)
lig = pdb.copy()   
del lig.residues[:-3]
In [7]:
for r in newpdb.residues:
    print r #посмотрим остатки
print    
for r in lig.residues:
    print r #посмотрим остатки    
<Molecule: id = 1 name = MET chain_id =    natoms = 8>
<Molecule: id = 2 name = GLN chain_id =    natoms = 9>
<Molecule: id = 3 name = LYS chain_id =    natoms = 9>
<Molecule: id = 4 name = LEU chain_id =    natoms = 8>
<Molecule: id = 5 name = ILE chain_id =    natoms = 8>
<Molecule: id = 6 name = ILE chain_id =    natoms = 8>
<Molecule: id = 7 name = PHE chain_id =    natoms = 11>
<Molecule: id = 8 name = ALA chain_id =    natoms = 5>
<Molecule: id = 9 name = LEU chain_id =    natoms = 8>
<Molecule: id = 10 name = VAL chain_id =    natoms = 7>
<Molecule: id = 11 name = VAL chain_id =    natoms = 7>
<Molecule: id = 12 name = LEU chain_id =    natoms = 8>
<Molecule: id = 13 name = CYS chain_id =    natoms = 6>
<Molecule: id = 14 name = VAL chain_id =    natoms = 7>
<Molecule: id = 15 name = GLY chain_id =    natoms = 4>
<Molecule: id = 16 name = SER chain_id =    natoms = 6>
<Molecule: id = 17 name = GLU chain_id =    natoms = 9>
<Molecule: id = 18 name = ALA chain_id =    natoms = 5>
<Molecule: id = 19 name = LYS chain_id =    natoms = 9>
<Molecule: id = 20 name = THR chain_id =    natoms = 7>
<Molecule: id = 21 name = PHE chain_id =    natoms = 11>
<Molecule: id = 22 name = THR chain_id =    natoms = 7>
<Molecule: id = 23 name = ARG chain_id =    natoms = 11>
<Molecule: id = 24 name = CYS chain_id =    natoms = 6>
<Molecule: id = 25 name = GLY chain_id =    natoms = 4>
<Molecule: id = 26 name = LEU chain_id =    natoms = 8>
<Molecule: id = 27 name = VAL chain_id =    natoms = 7>
<Molecule: id = 28 name = HIS chain_id =    natoms = 10>
<Molecule: id = 29 name = GLU chain_id =    natoms = 9>
<Molecule: id = 30 name = LEU chain_id =    natoms = 8>
<Molecule: id = 31 name = ARG chain_id =    natoms = 11>
<Molecule: id = 32 name = LYS chain_id =    natoms = 9>
<Molecule: id = 33 name = HIS chain_id =    natoms = 10>
<Molecule: id = 34 name = GLY chain_id =    natoms = 4>
<Molecule: id = 35 name = PHE chain_id =    natoms = 11>
<Molecule: id = 36 name = GLU chain_id =    natoms = 9>
<Molecule: id = 37 name = GLU chain_id =    natoms = 9>
<Molecule: id = 38 name = ASN chain_id =    natoms = 8>
<Molecule: id = 39 name = LEU chain_id =    natoms = 8>
<Molecule: id = 40 name = MET chain_id =    natoms = 8>
<Molecule: id = 41 name = ARG chain_id =    natoms = 11>
<Molecule: id = 42 name = ASN chain_id =    natoms = 8>
<Molecule: id = 43 name = TRP chain_id =    natoms = 14>
<Molecule: id = 44 name = VAL chain_id =    natoms = 7>
<Molecule: id = 45 name = CYS chain_id =    natoms = 6>
<Molecule: id = 46 name = LEU chain_id =    natoms = 8>
<Molecule: id = 47 name = VAL chain_id =    natoms = 7>
<Molecule: id = 48 name = GLU chain_id =    natoms = 9>
<Molecule: id = 49 name = HIS chain_id =    natoms = 10>
<Molecule: id = 50 name = GLU chain_id =    natoms = 9>
<Molecule: id = 51 name = SER chain_id =    natoms = 6>
<Molecule: id = 52 name = SER chain_id =    natoms = 6>
<Molecule: id = 53 name = ARG chain_id =    natoms = 11>
<Molecule: id = 54 name = ASP chain_id =    natoms = 8>
<Molecule: id = 55 name = THR chain_id =    natoms = 7>
<Molecule: id = 56 name = SER chain_id =    natoms = 6>
<Molecule: id = 57 name = LYS chain_id =    natoms = 9>
<Molecule: id = 58 name = THR chain_id =    natoms = 7>
<Molecule: id = 59 name = ASN chain_id =    natoms = 8>
<Molecule: id = 60 name = THR chain_id =    natoms = 7>
<Molecule: id = 61 name = ASN chain_id =    natoms = 8>
<Molecule: id = 62 name = ARG chain_id =    natoms = 11>
<Molecule: id = 63 name = ASN chain_id =    natoms = 8>
<Molecule: id = 64 name = GLY chain_id =    natoms = 4>
<Molecule: id = 65 name = SER chain_id =    natoms = 6>
<Molecule: id = 66 name = LYS chain_id =    natoms = 9>
<Molecule: id = 67 name = ASP chain_id =    natoms = 8>
<Molecule: id = 68 name = TYR chain_id =    natoms = 12>
<Molecule: id = 69 name = GLY chain_id =    natoms = 4>
<Molecule: id = 70 name = LEU chain_id =    natoms = 8>
<Molecule: id = 71 name = PHE chain_id =    natoms = 11>
<Molecule: id = 72 name = GLN chain_id =    natoms = 9>
<Molecule: id = 73 name = ILE chain_id =    natoms = 8>
<Molecule: id = 74 name = ASN chain_id =    natoms = 8>
<Molecule: id = 75 name = ASP chain_id =    natoms = 8>
<Molecule: id = 76 name = ARG chain_id =    natoms = 11>
<Molecule: id = 77 name = TYR chain_id =    natoms = 12>
<Molecule: id = 78 name = TRP chain_id =    natoms = 14>
<Molecule: id = 79 name = CYS chain_id =    natoms = 6>
<Molecule: id = 80 name = SER chain_id =    natoms = 6>
<Molecule: id = 81 name = LYS chain_id =    natoms = 9>
<Molecule: id = 82 name = GLY chain_id =    natoms = 4>
<Molecule: id = 83 name = ALA chain_id =    natoms = 5>
<Molecule: id = 84 name = SER chain_id =    natoms = 6>
<Molecule: id = 85 name = PRO chain_id =    natoms = 7>
<Molecule: id = 86 name = GLY chain_id =    natoms = 4>
<Molecule: id = 87 name = LYS chain_id =    natoms = 9>
<Molecule: id = 88 name = ASP chain_id =    natoms = 8>
<Molecule: id = 89 name = CYS chain_id =    natoms = 6>
<Molecule: id = 90 name = ASN chain_id =    natoms = 8>
<Molecule: id = 91 name = VAL chain_id =    natoms = 7>
<Molecule: id = 92 name = LYS chain_id =    natoms = 9>
<Molecule: id = 93 name = CYS chain_id =    natoms = 6>
<Molecule: id = 94 name = SER chain_id =    natoms = 6>
<Molecule: id = 95 name = ASP chain_id =    natoms = 8>
<Molecule: id = 96 name = LEU chain_id =    natoms = 8>
<Molecule: id = 97 name = LEU chain_id =    natoms = 8>
<Molecule: id = 98 name = THR chain_id =    natoms = 7>
<Molecule: id = 99 name = ASP chain_id =    natoms = 8>
<Molecule: id = 100 name = ASP chain_id =    natoms = 8>
<Molecule: id = 101 name = ILE chain_id =    natoms = 8>
<Molecule: id = 102 name = THR chain_id =    natoms = 7>
<Molecule: id = 103 name = LYS chain_id =    natoms = 9>
<Molecule: id = 104 name = ALA chain_id =    natoms = 5>
<Molecule: id = 105 name = ALA chain_id =    natoms = 5>
<Molecule: id = 106 name = LYS chain_id =    natoms = 9>
<Molecule: id = 107 name = CYS chain_id =    natoms = 6>
<Molecule: id = 108 name = ALA chain_id =    natoms = 5>
<Molecule: id = 109 name = LYS chain_id =    natoms = 9>
<Molecule: id = 110 name = LYS chain_id =    natoms = 9>
<Molecule: id = 111 name = ILE chain_id =    natoms = 8>
<Molecule: id = 112 name = TYR chain_id =    natoms = 12>
<Molecule: id = 113 name = LYS chain_id =    natoms = 9>
<Molecule: id = 114 name = ARG chain_id =    natoms = 11>
<Molecule: id = 115 name = HIS chain_id =    natoms = 10>
<Molecule: id = 116 name = ARG chain_id =    natoms = 11>
<Molecule: id = 117 name = PHE chain_id =    natoms = 11>
<Molecule: id = 118 name = ASP chain_id =    natoms = 8>
<Molecule: id = 119 name = ALA chain_id =    natoms = 5>
<Molecule: id = 120 name = TRP chain_id =    natoms = 14>
<Molecule: id = 121 name = TYR chain_id =    natoms = 12>
<Molecule: id = 122 name = GLY chain_id =    natoms = 4>
<Molecule: id = 123 name = TRP chain_id =    natoms = 14>
<Molecule: id = 124 name = LYS chain_id =    natoms = 9>
<Molecule: id = 125 name = ASN chain_id =    natoms = 8>
<Molecule: id = 126 name = HIS chain_id =    natoms = 10>
<Molecule: id = 127 name = CYS chain_id =    natoms = 6>
<Molecule: id = 128 name = GLN chain_id =    natoms = 9>
<Molecule: id = 129 name = GLY chain_id =    natoms = 4>
<Molecule: id = 130 name = SER chain_id =    natoms = 6>
<Molecule: id = 131 name = LEU chain_id =    natoms = 8>
<Molecule: id = 132 name = PRO chain_id =    natoms = 7>
<Molecule: id = 133 name = ASP chain_id =    natoms = 8>
<Molecule: id = 134 name = ILE chain_id =    natoms = 8>
<Molecule: id = 135 name = SER chain_id =    natoms = 6>
<Molecule: id = 136 name = SER chain_id =    natoms = 6>
<Molecule: id = 137 name = CYS chain_id =    natoms = 7>

<Molecule: id = 138 name = NAG chain_id =    natoms = 14>
<Molecule: id = 139 name = NAG chain_id =    natoms = 14>
<Molecule: id = 140 name = NDG chain_id =    natoms = 15>
In [8]:
help(pmx.Atom)
x_list = []
x_coord = []
y_coord = []
z_coord = []

for a in lig.atoms:
    x_list.append(a.x)# найдите геометрический центр лиганда

for coord in x_list:
    x_coord.append(coord[0])
    y_coord.append(coord[1])
    z_coord.append(coord[2])
    

center = (np.mean(x_coord),np.mean(y_coord),np.mean(z_coord))
print center
Help on class Atom in module pmx.atom:

class Atom
 |  class for storage of atom properties and methods
 |  
 |  Methods defined here:
 |  
 |  __init__(self, line=None, mol2line=None, **kwargs)
 |  
 |  __str__(self)
 |      prints the atom in PDB format
 |  
 |  __sub__(self, other)
 |      Overloading of the '-' operator for using
 |      atom1-atom2 instead of atom1.dist(atom2)
 |  
 |  a2nm(self)
 |  
 |  angle(self, other1, other2, degree=None)
 |      Calcluates the angle between 3 atoms
 |      Usage: atom1.angle(atom2,atom3)
 |      The degree flag causes the function to return the angle
 |      in degrees.
 |      (Note: atom1 must be between 2 and 3)
 |  
 |  copy(self)
 |      copy atom
 |  
 |  dihedral(self, other1, other2, other3, degree=None)
 |      Calculates the dihedral between four atoms.
 |      Usage: atom1.dihedral(atom2,atom3,atom4)
 |      The degree flag causes the function to return the dihedral
 |      in degrees.
 |  
 |  dist(self, other)
 |      returns the distance between two atoms
 |      Usage: dist=atom1.dist(atom2)
 |      This function is also called by typing
 |      d=atom1-atom2
 |  
 |  dist2(self, other)
 |      returns the squared distance between two atoms
 |      Usage: dist=atom1.dist2(atom2)
 |  
 |  get_order(self)
 |      get the order (number of bonds to mainchain)
 |  
 |  get_symbol(self)
 |      get element
 |  
 |  make_long_name(self)
 |      make extended name to determine element
 |      and order
 |  
 |  nm2a(self)
 |  
 |  readPDBString(self, line)
 |      PDB String to Atom
 |  
 |  read_mol2_line(self, line)
 |  
 |  set_chain_id(self, chain_id)
 |      change chain identifier
 |  
 |  set_resname(self, resname)
 |  
 |  translate(self, v)

(38.993956979806853, 32.899071992976296, 23.871308165057069)
In [9]:
newpdb.writePDB('protein_no_lig.pdb')

Подготовим белок для докинга. Пишет, что в PDB файле первая молекула - не белок, но с массой 15274. Но если посмотреть собственно в файл, то там координаты аминокислот. Не верим выдаче, но файл проверить стоит.

In [10]:
prot = oddt.toolkit.readfile('pdb','/tmp/prot.pdb').next()

prot.OBMol.AddPolarHydrogens()
prot.OBMol.AutomaticPartialCharge()


print 'is the first mol in 1lmp is protein?',prot.protein,':) and MW of this mol is:', prot.molwt
is the first mol in 1lmp is protein? False :) and MW of this mol is: 15274.26022

Подготовим лиганд

In [11]:
#smiles = ['c1cccc(O)c1', 'c1c(O)ccc(O)c1','c1(O)cc(c2ccccc2)cc(O)c1']
smiles =["CC(=O)NC1C(C(C(OC1O)CO)O)O","OC(=O)NC1C(C(C(OC1O)CO)O)O","[NH3+]C(=O)NC1C(C(C(OC1O)CO)O)O","C(=O)NC1C(C(C(OC1O)CO)O)O","C1=CC=CC=C1C(=O)NC1C(C(C(OC1O)CO)O)O","[O-]C(=O)C(=O)NC1C(C(C(OC1O)CO)O)O"]
mols= []
images =[]

for s in smiles:
    m = oddt.toolkit.readstring('smi', s)
    if not m.OBMol.Has3D(): 
        m.make3D(forcefield='mmff94', steps=150)
        m.removeh()
        m.OBMol.AddPolarHydrogens()

    mols.append(m)
    ###with print m.OBMol.Has3D() was found that:
    ### deep copy needed to keep 3D , write svg make mols flat
    images.append((SVG(copy.deepcopy(m).write('svg'))))
    
display_svg(*images)
***** - Open Babel Depiction O H H H H H O O O CH 3 O N O *****
***** - Open Babel Depiction O H H H H H H O O O O O N O *****
***** - Open Babel Depiction H H H H H H H H O O O N O O N O *****
***** - Open Babel Depiction O H H H H H O O O O N O *****
***** - Open Babel Depiction H H H H H O O O O O N O *****
***** - Open Babel Depiction H H H H H O O O O O O N O HO *****

Докинг

In [12]:
#create docking object
dock_obj= oddt.docking.AutodockVina.autodock_vina(
    protein=prot,size=(20,20,20),center=center,
    executable='/usr/bin/vina',autocleanup=True, num_modes=20)

print dock_obj.tmp_dir  #это папка докинга
print " ".join(dock_obj.params)
/tmp/autodock_vina_Biyzj8
--center_x 38.9939569798 --center_y 32.899071993 --center_z 23.8713081651 --size_x 20 --size_y 20 --size_z 20 --cpu 1 --exhaustiveness 8 --num_modes 20 --energy_range 3

Вначале идут координаты центра лиганда, затем размеры grid-box, 1 ядро, точность расчетов, количество положений лиганда в выдаче, максимальная разница энергии с наилучшей находкой.

In [13]:
res = dock_obj.dock(mols,prot)

Результаты докинга

Покажем заместители от лучшего по энергии к худшему:

In [16]:
#print '#\tformula\tenergy\trmsd\tprot_hbonds\tlig_hbonds\tprot_stack\tlig_stack\tprot_phob\tlig_phob'
res_energy = {}
res_dict = {}
print '#\tenergy\tformula\t\trmsd\thbonds\tstack\tphob'
for i,r in enumerate(res):    
    hbs = oddt.interactions.hbonds(prot,r) #это массив массивов взаимодействий. первый массив - белок, второй - лиганд
    stack= oddt.interactions.pi_stacking(prot,r)
    phob = oddt.interactions.hydrophobic_contacts(prot,r)
    res_energy[i] = r.data['vina_affinity']
    res_dict[i] = [r.formula, r.data['vina_rmsd_ub'],str(len(hbs[0])),str(len(stack[0])),str(len(phob[0]))]
#    print '\t'.join([str(i),r.formula, r.data['vina_affinity'],  r.data['vina_rmsd_ub'],str(len(hbs[0])),str(len(stack[0])),str(len(phob[0]))])
#    print '\t'.join([str(i),r.formula, r.data['vina_affinity'],  r.data['vina_rmsd_ub'],str(len(hbs[0])),str(len(hbs[1])),str(len(stack[0])),str(len(stack[1])),str(len(phob[0])),str(len(phob[1]))])

for k, v in sorted(res_energy.items(), key=lambda x:x[1],reverse=True):   
    print '\t'.join([str(k)]+[str(v)]+res_dict[k])
#	energy	formula		rmsd	hbonds	stack	phob
9	-5.3	C7H13NO7	0.000	13	0	0
36	-5.0	C13H17NO6	0.000	5	0	6
10	-4.9	C7H13NO7	4.954	11	0	0
37	-4.8	C13H17NO6	3.081	6	1	9
38	-4.8	C13H17NO6	2.987	4	1	6
18	-4.7	C7H15N2O6	0.000	9	0	0
39	-4.5	C13H17NO6	3.042	3	1	6
27	-4.4	C7H13NO6	0.000	7	0	0
40	-4.4	C13H17NO6	7.074	5	0	2
28	-4.3	C7H13NO6	4.838	5	0	0
41	-4.3	C13H17NO6	5.700	2	0	4
42	-4.3	C13H17NO6	3.631	4	0	0
45	-4.3	C8H13NO8	0.000	7	0	0
46	-4.3	C8H13NO8	16.454	9	0	0
0	-4.2	C8H15NO6	0.000	6	0	1
11	-4.2	C7H13NO7	14.603	7	0	0
29	-4.2	C7H13NO6	16.089	6	0	0
43	-4.2	C13H17NO6	7.710	3	0	5
44	-4.2	C13H17NO6	3.964	4	0	8
47	-4.2	C8H13NO8	6.423	8	0	0
48	-4.2	C8H13NO8	11.579	8	0	0
12	-4.1	C7H13NO7	14.660	4	0	0
13	-4.1	C7H13NO7	15.745	4	0	0
19	-4.1	C7H15N2O6	14.643	5	0	0
49	-4.1	C8H13NO8	12.793	6	0	0
14	-4.0	C7H13NO7	15.979	8	0	0
15	-4.0	C7H13NO7	17.007	7	0	0
20	-4.0	C7H15N2O6	15.169	7	0	0
21	-4.0	C7H15N2O6	15.775	3	0	0
30	-4.0	C7H13NO6	14.580	5	0	0
31	-4.0	C7H13NO6	4.127	8	0	0
32	-4.0	C7H13NO6	3.749	6	0	0
50	-4.0	C8H13NO8	5.928	6	0	0
51	-4.0	C8H13NO8	5.072	7	0	0
52	-4.0	C8H13NO8	3.089	3	0	0
1	-3.9	C8H15NO6	8.034	5	0	2
2	-3.9	C8H15NO6	4.650	5	0	1
3	-3.9	C8H15NO6	8.751	4	0	3
4	-3.9	C8H15NO6	3.950	7	0	1
16	-3.9	C7H13NO7	15.297	8	0	0
22	-3.9	C7H15N2O6	18.240	8	0	0
23	-3.9	C7H15N2O6	15.364	7	0	0
24	-3.9	C7H15N2O6	16.158	5	0	0
33	-3.9	C7H13NO6	15.478	5	0	0
53	-3.9	C8H13NO8	6.917	6	0	0
5	-3.8	C8H15NO6	8.297	6	0	0
6	-3.8	C8H15NO6	4.639	5	0	1
7	-3.8	C8H15NO6	5.640	4	0	1
17	-3.8	C7H13NO7	14.766	6	0	0
25	-3.8	C7H15N2O6	14.584	7	0	0
34	-3.8	C7H13NO6	14.225	5	0	0
8	-3.7	C8H15NO6	4.884	3	0	2
26	-3.7	C7H15N2O6	14.544	2	0	0
35	-3.7	C7H13NO6	15.326	6	0	0

Визуализация:

In [20]:
for i,r in enumerate(res):
    r.write(filename='r%s.pdb' % i, format='pdb', overwrite=True)

Лиганд №9 (жёлтый),с наименьшей энергией, располагается приблизительно в том же месте, что и NAG (синий). А вот №35 (зелёный), с наибольшей энергией располагается совсем не там и слишком близко расположен с остову белка.

In [21]:
from IPython.display import Image
Image(filename='result.png')
Out[21]: