In [1]:
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit import RDConfig
from rdkit.Chem.Draw import IPythonConsole 
from rdkit.Chem import Draw
import numpy as np
from IPython.display import display,Image
RDKit WARNING: [02:55:45] Enabling RDKit 2019.09.3 jupyter extensions

Нарисуем ибупрофен

In [2]:
ibu=Chem.MolFromSmiles('CC(C)CC1=CC=C(C=C1)C(C)C(=O)O')
AllChem.Compute2DCoords(ibu)
display(ibu)

Посчитаем параметры для правила Лепински:

In [3]:
import rdkit.Chem.Lipinski as Lipinksy
print (Lipinksy.NumHDonors(ibu))
print (Lipinksy.NumHAcceptors(ibu))
print (Lipinksy.rdMolDescriptors.CalcExactMolWt(ibu))
print (Lipinksy.rdMolDescriptors.CalcCrippenDescriptors(ibu)[0])
1
1
206.130679816
3.073200000000001
In [4]:
import pubchempy as pcp

Скачаем список азидов

In [5]:
compounds = []
per_page = 200
for smiles in ["N=N=N", "NN#N", "N=[N+]=[N-]"]:
    for i in range(2):
        try:
            a = pcp.get_properties(
              properties="CanonicalSMILES", 
              identifier=smiles, namespace="smiles", 
              searchtype="substructure",
              RingsNotEmbedded=True,
              listkey_count=per_page, listkey_start=i*per_page
            )
        except:
            print('no')
        print("Retrieved page {} of {} search".format(i+1, smiles))
        compounds.extend(a)
Retrieved page 1 of N=N=N search
Retrieved page 2 of N=N=N search
Retrieved page 1 of NN#N search
Retrieved page 2 of NN#N search
Retrieved page 1 of N=[N+]=[N-] search
Retrieved page 2 of N=[N+]=[N-] search
In [13]:
compounds[0]['CanonicalSMILES']
Out[13]:
'CC1=C(C=CC(=C1C)[Si])CN=[N+]=[N-]'

Сохраним на всякий пожарный

In [6]:
# import json
# with open('compounds_.json', 'w') as file:
#     json.dump(compounds, file)
In [5]:
import json
with open('compounds_.json', 'r') as file:
    compounds = json.load(file)
In [6]:
templ = 'N1N=NC(C(C)CC2=CC=C(C=C2)C(C)C(=O)O)=C1'
template = Chem.MolFromSmiles(templ)
AllChem.Compute2DCoords(template)
display(template)
In [7]:
smiles_transform = []
smm = []
for k in compounds:
    smm.append(k['CanonicalSMILES'])
for s in smm:
    for a in ["N=[N+]=[N-]", "N=N=N", "NN#N"]:
        if a in s:
            new = s.replace(a, templ)
            smiles_transform.append(new)
        else:
            continue

Создадим функцию, проверяющую соответствие правилу Липински, и отсортируем:

In [8]:
def lipinsky_rule(inp):
    if Lipinksy.NumHDonors(inp) <= 5 and Lipinksy.NumHAcceptors(inp) <= 10 and Lipinksy.rdMolDescriptors.CalcExactMolWt(inp) < 500 and Lipinksy.rdMolDescriptors.CalcCrippenDescriptors(inp)[0] <= 5:
        return True
lipinsky_rule(ibu)
Out[8]:
True
In [1]:
final = []

for sm in smiles_transform:
    try:
        new = Chem.MolFromSmiles(sm)
        if lipinsky_rule(new) == True:
            final.append(new)
    except:
        pass
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-9dc3df55d7ca> in <module>
      1 final = []
      2 
----> 3 for sm in smiles_transform:
      4     try:
      5         new = Chem.MolFromSmiles(sm)

NameError: name 'smiles_transform' is not defined

По какой-то причине сессия жупитера прерывается, когда я пытаюсь запустить ячейку выше, чтобы отсортировать данные по правилу Лепински, это происходит как на больших массивах, так и на маленьких, kernel вылетает(

In [ ]:
from IPython.display import SVG
for s in final:
    AllChem.Compute2DCoords(s)
display(Draw.MolsToGridImage(final[:6],useSVG=True, molsPerRow=3, subImgSize=(200, 200)))
In [ ]:
from rdkit.Chem.Draw import SimilarityMaps

fp = SimilarityMaps.GetMorganFingerprint(final[5], fpType='bv')
fig, maxweight = SimilarityMaps.GetSimilarityMapForFingerprint(ibu, final[5], SimilarityMaps.GetMorganFingerprint)
In [ ]:
m3d=Chem.AddHs(m2d)
Chem.AllChem.EmbedMolecule(m3d)
AllChem.MMFFOptimizeMolecule(m3d,maxIters=500,nonBondedThresh=200 )
In [2]:
import nglview as nv
In [ ]:
nv.show_rdkit(m3d)