import numpy as np
import pubchempy
import rdkit.Chem.Lipinski as Lipinksy
from IPython.display import Image, display
from rdkit import Chem, RDConfig
from rdkit.Chem import AllChem, Draw
from rdkit.Chem.Draw import IPythonConsole
ibu=Chem.MolFromSmiles("CC(C)CC1=CC=C(C=C1)C(C)C(=O)O")
AllChem.Compute2DCoords(ibu)
display(ibu)
print(Lipinksy.NumHDonors(ibu))
print(Lipinksy.NumHAcceptors(ibu))
print(Lipinksy.rdMolDescriptors.CalcExactMolWt(ibu))
print(Lipinksy.rdMolDescriptors.CalcCrippenDescriptors(ibu))
1 1 206.130679816 (3.073200000000001, 61.03480000000004)
def is_lepinsky_valid(compound):
"""Проверяет валидность структуры по правилам Lepinsky"""
try:
return (
(Lipinksy.NumHDonors(compound) <= 5)
and (Lipinksy.NumHAcceptors(compound) <= 10)
and (Lipinksy.rdMolDescriptors.CalcExactMolWt(ibu) < 500)
and (Lipinksy.rdMolDescriptors.CalcCrippenDescriptors(ibu)[0] <= 5)
)
except:
return False
ibu_modified = Chem.MolFromSmiles("C(C(C)CC1=CC=C(C=C1)C(C)C(=O)O)#C")
AllChem.Compute2DCoords(ibu_modified)
display(ibu_modified)
ibu_clicked = Chem.MolFromSmiles("CC(C)CC1=CC=C(C=C1)C(C(C(N=N1)=CN1))C(=O)O")
AllChem.Compute2DCoords(ibu_clicked)
display(ibu_clicked)
ibu_modified = Chem.MolFromSmiles("C(C(C)CC1=CC=C(C=C1)C(C)C(=O)O)#C")
AllChem.Compute2DCoords(ibu_modified)
display(ibu_modified)
ibu_clicked = Chem.MolFromSmiles("C1(=C[NH]N=N1)C(C)CC2=CC=C(C=C2)C(C)C(=O)O")
AllChem.Compute2DCoords(ibu_clicked)
display(ibu_clicked)
# Дальше будем вставлять все в эту структуру. Ее шаблон вставки N1N=N-C=C1C(C)CC2=CC=C(C=C2)C(C)C(=O)O
ibu_modified = Chem.MolFromSmiles("CC(C)CC1=C(C=C(C=C1)C(C)C(=O)O)C#C")
AllChem.Compute2DCoords(ibu_modified)
display(ibu_modified)
ibu_clicked = Chem.MolFromSmiles("CC(C)CC1=C(C=C(C=C1)C(C)C(=O)O)C2=CN=N[NH]2")
AllChem.Compute2DCoords(ibu_clicked)
display(ibu_clicked)
compounds = []
per_page = 10**5
for smiles in ["N=N=N", "NN#N",]:
for i in range(200):
try:
a = pubchempy.get_properties(
properties="CanonicalSMILES",
identifier=smiles,
namespace="smiles",
searchtype="substructure",
RingsNotEmbedded=True,
listkey_count=per_page,
listkey_start=i*per_page
)
except:
break
print("Retrieved page {} of {} search".format(i+1, smiles))
compounds.extend(a)
Retrieved page 1 of N=N=N search Retrieved page 2 of N=N=N search Retrieved page 3 of N=N=N search Retrieved page 4 of N=N=N search Retrieved page 5 of N=N=N search Retrieved page 6 of N=N=N search Retrieved page 7 of N=N=N search Retrieved page 8 of N=N=N search Retrieved page 9 of N=N=N search Retrieved page 10 of N=N=N search Retrieved page 11 of N=N=N search Retrieved page 12 of N=N=N search Retrieved page 13 of N=N=N search Retrieved page 14 of N=N=N search Retrieved page 15 of N=N=N search Retrieved page 16 of N=N=N search Retrieved page 1 of NN#N search
# Всего соединений
len(compounds)
1601375
Возьмем небольшие азиды с группой N=[N+]=[N-]
filtered_smiles = [
x["CanonicalSMILES"]
for x in compounds
if ("N=[N+]=[N-]" in x["CanonicalSMILES"]) and len(x["CanonicalSMILES"]) <= 30
]
len(filtered_smiles)
251084
ibu_clicked = Chem.MolFromSmiles(
filtered_smiles[0].replace("N=[N+]=[N-]", "N1N=N-C=C1C(C)CC2=CC=C(C=C2)C(C)C(=O)O")
)
AllChem.Compute2DCoords(ibu_clicked)
display(ibu_clicked)
valid_structures = []
for fsmi in filtered_smiles:
struct = Chem.MolFromSmiles(
fsmi.replace("N=[N+]=[N-]", "N1N=N-C=C1C(C)CC2=CC=C(C=C2)C(C)C(=O)O")
)
if is_lepinsky_valid(struct):
valid_structures.append(struct)
RDKit ERROR: [15:48:14] Explicit valence for atom # 8 Cl, 3, is greater than permitted RDKit ERROR: [15:48:14] Explicit valence for atom # 1 Br, 3, is greater than permitted
наиболее частая причина невалидности структур -- наличие ионов хлора на конце smiles
len(valid_structures)
250852
Draw.MolsToGridImage(
valid_structures[:30], useSVG=True, molsPerRow=3, subImgSize=(250, 120)
)
from rdkit.Chem.Draw import SimilarityMaps
fp = SimilarityMaps.GetMorganFingerprint(valid_structures[5], fpType="bv")
fig, maxweight = SimilarityMaps.GetSimilarityMapForFingerprint(
ibu, valid_structures[5], SimilarityMaps.GetMorganFingerprint
)
Chem.MolToSmiles(valid_structures[5])
'CC(Cc1ccc(C(C)C(=O)O)cc1)c1cnnn1CCC1CCN(C)C(=O)O1'
struct = Chem.MolFromSmiles("CC(Cc1ccc(C(C)C(=O)O)cc1)c1cnnn1CCC1CCN(C)C(=O)O1")
m3d = Chem.AddHs(struct)
Chem.AllChem.EmbedMolecule(m3d)
AllChem.MMFFOptimizeMolecule(m3d, maxIters=500, nonBondedThresh=200)
0
import nglview as nv
nv.show_rdkit(m3d)