Практикум 3. Хемоинформатика

In [1]:
from IPython.display import Image, display
In [2]:
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit import RDConfig
from rdkit.Chem.Draw import IPythonConsole 
from rdkit.Chem import Draw, Lipinski

import numpy as np
from IPython.display import display,Image

Правило 5 Лепински:

In [4]:
def lipinski_rule(smiles):
    don = Lipinski.NumHDonors(smiles) < 5
    acc = Lipinski.NumHAcceptors(smiles) < 10
    wgt = Lipinski.rdMolDescriptors.CalcExactMolWt(smiles) < 500
    des = Lipinski.rdMolDescriptors.CalcCrippenDescriptors(smiles)[0] < 5
    return all([don, acc, wgt, des])

Нарисуем ибупрофен:

In [5]:
ibu=Chem.MolFromSmiles('CC(C)CC1=CC=C(C=C1)C(C)C(=O)O')
AllChem.Compute2DCoords(ibu)
display(ibu)

Сделаем производное ибупрофена для клик-химии

In [15]:
nnn_ibu=Chem.MolFromSmiles('CC(C)Cc1ccc(cc1)[C@@](C)(C1=CN=NN1)C(=O)O')  # SMILES с википедии =)
AllChem.Compute2DCoords(nnn_ibu)
display(nnn_ibu)
# Хотя от механизма реакции зависит, какое стереопроизводное будет в результате получения.
# Дальше лучше буду рисовать без этой информации.

Перевернем его для удобного использвания дальше

In [20]:
templ = Chem.MolFromSmiles('N1N=NC(=C1)C(C(O)=O)(C)C1=CC=C(C=C1)CC(C)C')
AllChem.Compute2DCoords(templ)
display(templ)

Идем в PubChem https://pubchem.ncbi.nlm.nih.gov/search/search.cgi и ищем поиском Substructure search (33558).

Скачали данные в формате SMILES

In [35]:
import pandas as pd
In [52]:
smiles = pd.read_csv("./4432027415351750751.txt", sep="\t", header=None, names=["id", "SMILES"])
smiles.head(3)
Out[52]:
id SMILES
0 146027037 C1[C@H]2[C@@H]([C@@H](S1)CCCCC(=O)NCCN(CC3=CC=...
1 146026689 C1=CN2C(=CN=N2)C(=C1)Cl
2 146026636 CCOC1=NC2=CC=CC(=C2N1CC3=CC=C(C=C3)C4=CC=CC=C4...
In [53]:
len(smiles)
Out[53]:
500000
In [54]:
smiles = smiles["SMILES"]
In [56]:
condition = smiles.apply(lambda x: '.' not in x) &\
            smiles.apply(lambda x: len(x) < 30)
In [59]:
targets = smiles[condition].reset_index(drop=True)
targets
Out[59]:
0              C1=CN2C(=CN=N2)C(=C1)Cl
1                CN1C=C(N=N1)C2(CCC2)N
2         C1C(OC2=CC=CC=C2O1)C3=NNN=C3
3            CC(C1=NNN=C1)(C2=NC=CS2)O
4        C1C2=CC=CC=C2I(O1)N=[N+]=[N-]
                     ...              
37474            C1=CC=C2C(=C1)N=NN2SI
37475             CCCCCCCCC1=NNN=C1CCC
37476     C#CCCOCCOCCOCCOCCN=[N+]=[N-]
37477    COCCN1C=NC=C1CNC(=O)C2=NNN=C2
37478    CCCN1C=C(N=N1)NC(=O)NC(CC)COC
Name: SMILES, Length: 37479, dtype: object
In [65]:
azides = [Chem.MolFromSmiles('N=[N+]=[N-]'),
          Chem.MolFromSmiles('N=[N+]=[NH]')]
In [66]:
mods = []
for smile in smiles:
    if "N=[N+]=[N-]" in smile:  # cheers =)
        smile = smile.replace("N=[N+]=[N-]", 'N1N=NC(=C1)C(C(O)=O)(C)C1=CC=C(C=C1)CC(C)C')
    else:
        continue
    try:
        mol = Chem.MolFromSmiles(smile)
        if lipinski_rule(mol):
            mods.append(mol)
    except:
        pass
RDKit ERROR: [17:08:59] Explicit valence for atom # 7 Cl, 3, is greater than permitted
RDKit WARNING: [17:09:00] WARNING: not removing hydrogen atom without neighbors
RDKit ERROR: [17:09:01] Explicit valence for atom # 9 Cl, 3, is greater than permitted
RDKit ERROR: [17:09:01] Explicit valence for atom # 1 Cl, 3, is greater than permitted
RDKit ERROR: [17:09:01] Explicit valence for atom # 25 Cl, 3, is greater than permitted
RDKit ERROR: [17:09:03] Explicit valence for atom # 43 Cl, 3, is greater than permitted
RDKit ERROR: [17:09:06] Explicit valence for atom # 3 I, 7, is greater than permitted
RDKit ERROR: [17:09:06] Explicit valence for atom # 8 Cl, 3, is greater than permitted
RDKit ERROR: [17:09:07] Explicit valence for atom # 37 Cl, 3, is greater than permitted
RDKit ERROR: [17:09:07] Explicit valence for atom # 37 Cl, 3, is greater than permitted
RDKit ERROR: [17:09:12] Explicit valence for atom # 6 Si, 8, is greater than permitted
RDKit WARNING: [17:09:17] WARNING: not removing hydrogen atom without neighbors
RDKit WARNING: [17:09:17] Conflicting single bond directions around double bond at index 12.
RDKit WARNING: [17:09:17]   BondStereo set to STEREONONE and single bond directions set to NONE.
RDKit ERROR: [17:09:19] Explicit valence for atom # 0 H, 2, is greater than permitted
RDKit ERROR: [17:09:19] Explicit valence for atom # 0 H, 2, is greater than permitted
RDKit ERROR: [17:09:19] Explicit valence for atom # 1 H, 2, is greater than permitted
RDKit ERROR: [17:09:19] Explicit valence for atom # 1 H, 2, is greater than permitted
RDKit ERROR: [17:09:19] Explicit valence for atom # 1 H, 2, is greater than permitted
RDKit ERROR: [17:09:19] Explicit valence for atom # 0 H, 2, is greater than permitted
RDKit ERROR: [17:09:19] Explicit valence for atom # 1 H, 2, is greater than permitted
RDKit ERROR: [17:09:19] Explicit valence for atom # 0 H, 2, is greater than permitted
RDKit ERROR: [17:09:19] Explicit valence for atom # 0 H, 2, is greater than permitted
RDKit ERROR: [17:09:19] Explicit valence for atom # 0 H, 2, is greater than permitted
RDKit ERROR: [17:09:19] Explicit valence for atom # 0 H, 2, is greater than permitted
RDKit ERROR: [17:09:19] Explicit valence for atom # 0 H, 2, is greater than permitted
RDKit ERROR: [17:09:19] Explicit valence for atom # 0 H, 2, is greater than permitted
RDKit ERROR: [17:09:35] Explicit valence for atom # 3 Cl, 3, is greater than permitted
RDKit ERROR: [17:09:35] Explicit valence for atom # 4 Cl, 3, is greater than permitted
RDKit WARNING: [17:09:45] WARNING: not removing hydrogen atom without neighbors
In [67]:
len(mods)
Out[67]:
13042

Молекул набралось немало! Это отлично.

Отрисуем несколько (64):

In [68]:
Draw.MolsToGridImage(mods[:64], molsPerRow=8, subImgSize=(150, 150))
/home/arsen_l/anaconda3/envs/alphafold/lib/python3.7/site-packages/rdkit/Chem/Draw/IPythonConsole.py:188: UserWarning: Truncating the list of molecules to be displayed to 50. Change the maxMols value to display more.
  % (maxMols))
Out[68]:
In [70]:
mods[5]

Ну и теперь посчитаем сходство пятой молекулы из полученных с исходной молекулой

In [77]:
from rdkit.Chem.Draw import SimilarityMaps
In [90]:
similarity = SimilarityMaps.GetSimilarityMapForFingerprint(ibu, mods[4], SimilarityMaps.GetMorganFingerprint)
In [91]:
similarity[1]
Out[91]:
0.17956656346749222

Вес маловат, сходство небольшое

Попробуем понять, что это значит, сравнив исходники друг с другом

In [92]:
similarity = SimilarityMaps.GetSimilarityMapForFingerprint(ibu, templ, SimilarityMaps.GetMorganFingerprint)
In [93]:
similarity[1]
Out[93]:
0.20535714285714285
In [94]:
similarity = SimilarityMaps.GetSimilarityMapForFingerprint(templ, nnn_ibu, SimilarityMaps.GetMorganFingerprint)
similarity[1]
Out[94]:
0.16387959866220736

Тут вес почему-то даже меньше, чем в предыдущем случае, странно.


Теперь посмотрим в 3D

In [95]:
m3d = Chem.AddHs(mods[4])
Chem.AllChem.EmbedMolecule(m3d)
AllChem.MMFFOptimizeMolecule(m3d, maxIters=500, nonBondedThresh=200)
Out[95]:
1
In [97]:
display(m3d)

Странно выглядящая 2D-проекция одной из конформаций молекулы.

nglview установить не удалось =(

Сохраню результаты:

In [105]:
import pickle
In [106]:
with open('mod5.pkl', 'wb') as modfile:
    pickle.dump(m3d, modfile)

Поменяю ядро и загружусь

In [5]:
import pickle
In [6]:
with open('mod5.pkl', 'rb') as modfile:
    m3d = pickle.load(modfile)
In [8]:
import nglview as nv
In [10]:
nv.show_rdkit(m3d)
In [11]:
view = nv.show_rdkit(m3d)
In [12]:
view
In [23]:
view.render_image()
In [24]:
view._display_image()
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
~/anaconda3/envs/nglview/lib/python3.6/site-packages/IPython/core/display.py in _data_and_metadata(self, always_both)
   1292         try:
-> 1293             b64_data = b2a_base64(self.data).decode('ascii')
   1294         except TypeError:

TypeError: a bytes-like object is required, not 'str'

During handling of the above exception, another exception occurred:

FileNotFoundError                         Traceback (most recent call last)
~/anaconda3/envs/nglview/lib/python3.6/site-packages/IPython/core/formatters.py in __call__(self, obj, include, exclude)
    968 
    969             if method is not None:
--> 970                 return method(include=include, exclude=exclude)
    971             return None
    972         else:

~/anaconda3/envs/nglview/lib/python3.6/site-packages/IPython/core/display.py in _repr_mimebundle_(self, include, exclude)
   1281         if self.embed:
   1282             mimetype = self._mimetype
-> 1283             data, metadata = self._data_and_metadata(always_both=True)
   1284             if metadata:
   1285                 metadata = {mimetype: metadata}

~/anaconda3/envs/nglview/lib/python3.6/site-packages/IPython/core/display.py in _data_and_metadata(self, always_both)
   1294         except TypeError:
   1295             raise FileNotFoundError(
-> 1296                 "No such file or directory: '%s'" % (self.data))
   1297         md = {}
   1298         if self.metadata:

FileNotFoundError: No such file or directory: ''
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
~/anaconda3/envs/nglview/lib/python3.6/site-packages/IPython/core/display.py in _data_and_metadata(self, always_both)
   1292         try:
-> 1293             b64_data = b2a_base64(self.data).decode('ascii')
   1294         except TypeError:

TypeError: a bytes-like object is required, not 'str'

During handling of the above exception, another exception occurred:

FileNotFoundError                         Traceback (most recent call last)
~/anaconda3/envs/nglview/lib/python3.6/site-packages/IPython/core/formatters.py in __call__(self, obj)
    343             method = get_real_method(obj, self.print_method)
    344             if method is not None:
--> 345                 return method()
    346             return None
    347         else:

~/anaconda3/envs/nglview/lib/python3.6/site-packages/IPython/core/display.py in _repr_png_(self)
   1311     def _repr_png_(self):
   1312         if self.embed and self.format == self._FMT_PNG:
-> 1313             return self._data_and_metadata()
   1314 
   1315     def _repr_jpeg_(self):

~/anaconda3/envs/nglview/lib/python3.6/site-packages/IPython/core/display.py in _data_and_metadata(self, always_both)
   1294         except TypeError:
   1295             raise FileNotFoundError(
-> 1296                 "No such file or directory: '%s'" % (self.data))
   1297         md = {}
   1298         if self.metadata:

FileNotFoundError: No such file or directory: ''
Out[24]:
<IPython.core.display.Image object>

Все равно не работает =(((