Установим RDkit conda install -c rdkit rdkit ; conda install -c jirinovo pubchempy
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit import RDConfig
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import Draw
import numpy as np
from IPython.display import display,Image
from rdkit.Chem.Draw import SimilarityMaps
import nglview as nv
Нарисуем ибупрофен
ibu=Chem.MolFromSmiles('CC(C)CC1=CC=C(C=C1)C(C)C(=O)O')
AllChem.Compute2DCoords(ibu)
display(ibu)
Посчитаем параметры для правила Лепински
import rdkit.Chem.Lipinski as Lipinksy
def lipinksy_rule(molecule):
if Lipinksy.NumHDonors(molecule) <= 5 and Lipinksy.NumHAcceptors(molecule) <= 10 and Lipinksy.rdMolDescriptors.CalcExactMolWt(molecule) < 500 and Lipinksy.rdMolDescriptors.CalcCrippenDescriptors(molecule)[0] <= 5:
return True
else:
return False
Загрузим скаченные данные и отфильтруем
# strings=np.genfromtxt('561336253252063998.txt',dtype=np.str)
file = open('561336253252063998.txt', 'r')
strings = file.read().split("\n")[:-1]
smiles = []
for line in strings:
line = line.split()
if len(line[1]) < 30 and not '.' in line[1]:
smiles.append(line[1])
len(smiles)
template = 'N1N=NC(=C1)C(C)CC1=CC=C(C=C1)C(C)C(=O)O'
AllChem.Compute2DCoords(Chem.MolFromSmiles(template))
display(Chem.MolFromSmiles(template))
Пстроим новые молекулы и отфильтруем
new_molecules = []
for smi in smiles:
if 'N=[N+]=[N-]'in smi:
try:
newsmi = Chem.MolFromSmiles(smi.replace('N=[N+]=[N-]', template))
if lipinksy_rule(newsmi):
new_molecules.append(newsmi)
except Exception:
pass
else:
continue
Chem.Draw.MolsToGridImage(new_molecules[:16], molsPerRow=4, subImgSize=(200, 200))
fp = SimilarityMaps.GetMorganFingerprint(new_molecules[5], fpType='bv')
fig, maxweight = SimilarityMaps.GetSimilarityMapForFingerprint(ibu, new_molecules[4], SimilarityMaps.GetMorganFingerprint)
m3d=Chem.AddHs(new_molecules[4])
Chem.AllChem.EmbedMolecule(m3d)
AllChem.MMFFOptimizeMolecule(m3d,maxIters=500,nonBondedThresh=200)
nv.show_rdkit(m3d)
from IPython.display import Image
Image(filename='drug.png')