Установим RDkit conda install -c rdkit rdkit ; conda install -c jirinovo pubchempy

In [1]:
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit import RDConfig
from rdkit.Chem.Draw import IPythonConsole 
from rdkit.Chem import Draw
import numpy as np
from IPython.display import display,Image
from rdkit.Chem.Draw import SimilarityMaps
import nglview as nv

Нарисуем ибупрофен

In [2]:
ibu=Chem.MolFromSmiles('CC(C)CC1=CC=C(C=C1)C(C)C(=O)O')
AllChem.Compute2DCoords(ibu)
display(ibu)

Посчитаем параметры для правила Лепински

In [3]:
import rdkit.Chem.Lipinski as Lipinksy

def lipinksy_rule(molecule):
    if Lipinksy.NumHDonors(molecule) <= 5 and Lipinksy.NumHAcceptors(molecule) <= 10 and Lipinksy.rdMolDescriptors.CalcExactMolWt(molecule) < 500 and Lipinksy.rdMolDescriptors.CalcCrippenDescriptors(molecule)[0] <= 5:
        return True 
    else:
        return False  

Загрузим скаченные данные и отфильтруем

In [4]:
# strings=np.genfromtxt('561336253252063998.txt',dtype=np.str)
file = open('561336253252063998.txt', 'r')
strings = file.read().split("\n")[:-1]
smiles = []
for line in strings:
    line = line.split()
    if len(line[1]) < 30 and not '.' in line[1]:
        smiles.append(line[1])
len(smiles)
Out[4]:
66821
In [5]:
template = 'N1N=NC(=C1)C(C)CC1=CC=C(C=C1)C(C)C(=O)O'
AllChem.Compute2DCoords(Chem.MolFromSmiles(template))
display(Chem.MolFromSmiles(template))

Пстроим новые молекулы и отфильтруем

In [6]:
new_molecules = []
for smi in smiles: 
    if 'N=[N+]=[N-]'in smi:
        try:
            newsmi = Chem.MolFromSmiles(smi.replace('N=[N+]=[N-]', template))
            if lipinksy_rule(newsmi):
                new_molecules.append(newsmi)
        except Exception:
            pass    
    else:
        continue
RDKit ERROR: [19:45:32] Explicit valence for atom # 4 Cl, 3, is greater than permitted
In [7]:
Chem.Draw.MolsToGridImage(new_molecules[:16], molsPerRow=4, subImgSize=(200, 200))
Out[7]:
In [8]:
fp = SimilarityMaps.GetMorganFingerprint(new_molecules[5], fpType='bv')
fig, maxweight = SimilarityMaps.GetSimilarityMapForFingerprint(ibu, new_molecules[4], SimilarityMaps.GetMorganFingerprint)
/home/nastya/miniconda3/envs/molsim/lib/python3.6/site-packages/rdkit/Chem/Draw/__init__.py:285: MatplotlibDeprecationWarning: The bivariate_normal function was deprecated in version 2.2.
  Z = mlab.bivariate_normal(X, Y, a, a, mol._atomPs[0][0], mol._atomPs[0][1]) * weights[0]
/home/nastya/miniconda3/envs/molsim/lib/python3.6/site-packages/rdkit/Chem/Draw/__init__.py:287: MatplotlibDeprecationWarning: The bivariate_normal function was deprecated in version 2.2.
  Zp = mlab.bivariate_normal(X, Y, a, a, mol._atomPs[i][0], mol._atomPs[i][1])
In [9]:
m3d=Chem.AddHs(new_molecules[4])
Chem.AllChem.EmbedMolecule(m3d)
AllChem.MMFFOptimizeMolecule(m3d,maxIters=500,nonBondedThresh=200)
Out[9]:
0
In [11]:
nv.show_rdkit(m3d)
The installed widget Javascript is the wrong version.
In [66]:
from IPython.display import Image
Image(filename='drug.png')
Out[66]: