Хемоинформатика

В этом задании нужно было найти аналог ибупрофена, используя пакет RDkit и данные из PubChem в формате SMILES. Я скачала из PubChem все с структуры с 'N=[N+]=[N-]' в SMILES, с этой базой и буду работать.

In [1]:
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit import RDConfig
from rdkit.Chem.Draw import IPythonConsole 
from rdkit.Chem import Draw
import numpy as np
from IPython.display import display,Image

Далее я рисую ибупрофен и создаю его производное для Click Chemistry (последний рисунок).

In [2]:
ibu=Chem.MolFromSmiles('CC(C)CC1=CC=C(C=C1)C(C)C(=O)O')
AllChem.Compute2DCoords(ibu)
display(ibu)
#let's replace isopropyl to -C#C
modif_ibu = Chem.MolFromSmiles('C#CC1=CC=C(C=C1)C(C)C(=O)O')
AllChem.Compute2DCoords(modif_ibu)
display(modif_ibu)
#and now adding the ring
template = Chem.MolFromSmiles('N1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O')
AllChem.Compute2DCoords(template)
display(template)

Мы используем параметры Лепински для оценки молекулы

In [3]:
import rdkit.Chem.Lipinski as Lipinksy
print Lipinksy.NumHDonors(ibu)
print Lipinksy.NumHAcceptors(ibu)
print Lipinksy.rdMolDescriptors.CalcExactMolWt(ibu)
print Lipinksy.rdMolDescriptors.CalcCrippenDescriptors(ibu)[0]
1
1
206.130679816
3.0732
In [4]:
help(Lipinksy.rdMolDescriptors.CalcCrippenDescriptors)
Help on built-in function CalcCrippenDescriptors in module rdkit.Chem.rdMolDescriptors:

CalcCrippenDescriptors(...)
    CalcCrippenDescriptors( (Mol)mol [, (bool)includeHs=True [, (bool)force=False]]) -> tuple :
        returns a 2-tuple with the Wildman-Crippen logp,mr values
    
        C++ signature :
            boost::python::tuple CalcCrippenDescriptors(RDKit::ROMol [,bool=True [,bool=False]])

Help помог разобраться с тем, что означает последний параметр. Это характеристика гидрофобности молекулы. По правилу Лепински он не должен привышать 5.

Далее загружаем нашу базу SMILES.

In [5]:
strings=np.genfromtxt('1011401151079521058.txt',dtype=np.str)
smiles = []
for line in strings:
    if len(line[1]) < 30 and not '.' in line[1]:
        smiles.append(line[1])

smiles
Out[5]:
['CN1C(=CN=N1)C2(CNC2)OC',
 'CCCCC(C)C(=O)NC1=NN(N=N1)C',
 'CC(=CC(=O)OC)CN1C=CN=N1',
 'CC1=C(C=C(C=C1)CN2C=CN=N2)OC',
 'CCC(CC)CN(CC)C(=O)CCC1=NNN=C1',
 'C1CCC(C1)NCC2=CN(N=N2)C3CCC3',
 'CN1C=C(N=N1)C2=NOC(=N2)C3CCC3',
 'CC(=C)COCC1=NNN=N1',
 'C1CC(CC1C',
 'CC1CCC(C1)N2C=C(N=N2)C3CCCCC3',
 'CCC(CO)SC1=NN=NN1CC2CCCO2',
 'C1CC(C1)(CC2=CN(N=N2)CC3CC3)O',
 'CC(C)(C)N1N=C(N=N1)CN2C=CC=N2',
 'CN1C=C(N=N1)CCNCC2=CC=C(S2)Cl',
 'CN1C=C(N=N1)CCNC2CCCCC2',
 'CC1=NNN=C1NC(=O)CC2CC3CCC2C3',
 'CCCCCOCC(=O)NC1=NNN=C1C',
 'C1CCC(C1)(C',
 'CC1=NN=NN1CCCSC2=CC=CC(=C2)O',
 'CCSCCSCCCN1C(=NN=N1)C',
 'C',
 'C',
 'CCN1C(=NN=N1)NC(=O)COCC2CCCO2',
 'CC1=NNN=C1NC(=O)C(C)CSC',
 'CC1=CC(=NS1)C(=O)NC2=NNN=C2C',
 'CC(C)(C)CCCN1N=C(N=N1)C(=O)N',
 'CC',
 'CC(C)(C)C1=CSC(=C1C',
 'CC1=NNN=C1NC(=O)CSCC2CCCCO2',
 'CC(C)(C)N1C=C(N=N1)C=C(C',
 'C1CCC2(C1)CC(CCO2)SC3=NNN=C3',
 'C[Si](C)(C)C',
 'C1=CC(=CC(=C1)SCCCC2=NNN=N2)O',
 'CN(C)CC',
 'C1COCCC1(C2=CC=CC(=C2)C',
 'C1CN(CCN1CC2=C(C(=CC=C2)F)C',
 'C1CN(CCN1CC2=C(N=CC(=C2)C',
 'CC(C)(C)N1C(=NN=N1)CN(CC',
 'C1CCC(CC1)(C',
 'C',
 'C',
 'C',
 'C',
 'C',
 'CC1CN(CC(N1CC',
 'C1=CC(=CC=C1CC',
 'CC1=CC(=CS1)CN2C=CN=N2',
 'CC1=C(C=CO1)CNCCC2=CN(N=N2)C',
 'CC1=C(C=CO1)CNC2=CN(N=N2)C',
 'COCC1(CCCC1)C2=NNN=N2',
 'CCCCNC(=O)CN1N=C(C(=N1)Br)Br',
 'CCN(C)C(=O)CN1N=C(C(=N1)Br)Br',
 'CCC1C(=O)NCCN1CC2=C(N=NN2CC)C',
 'CCSC1=NC=CN=C1C2=NNN=N2',
 'C1CCN2CC(CC2C1)C3=NNN=N3',
 'CC(C1CCOCC1)SC2=NNN=C2',
 'CC1CC(CN(C1)CCN2C(=NN=N2)C)C',
 'C1C(OC2=CC=CC=C2O1)CN3C=CN=N3',
 'CCN1C(=CN=N1)CNC2CCCCC2',
 'CN(C)C(=O)CN1N=C(C(=N1)Br)Br',
 'CNC(=O)CN1N=C(C(=N1)Br)Br',
 'CCC(C)OC1=CC=C(C=C1)C2=NNN=N2',
 'CC(C)(C1CCC1)C2=NNN=N2',
 'C=CC(=O)NCCCN1C=C(N=N1)CO',
 'CCC1=NC(=CS1)CC2=NNN=N2',
 'C1=CN=C(C=C1C2=NNN=N2)OC(F)F',
 'CCC1=CC(=NC=C1)C2=NNN=N2',
 'CN(C)CC1=CC=C(C=C1)C2=NNN=N2',
 'C1=CC(=NC2=C1C=CN2)C3=NNN=N3',
 'C1CCC(C1)OCC2=NNN=N2',
 'COCCOC(=O)C1=C2C=CC=NN2N=C1',
 'CN1C(=NN=N1)N(C)CC2CCCOC2',
 'CN1C=C(N=N1)CN(C)CC2CCCO2',
 'CC(C)C(C(=O)O)SCC1=NNN=N1',
 'CC1=CC(=NC(=C1)C)C2=NNN=N2',
 'CCC1=NOC=C1CN(C)CC2=CN(N=N2)C',
 'CN1C(=CN=N1)CNC2=C(C=CC(=C2)C',
 'CCCCCOCC(=O)NCCN1C=CN=N1',
 'C1CC2CC1CC2CC(=O)NCCN3C=CN=N3',
 'CN1C=C(N=N1)C2C(CCO2)N',
 'COCC(C1=NNN=N1)N',
 'C1C(CN1)(C2=NNN=C2)O',
 'CCC1=NC2=CC=CC=C2N1CC3=NNN=N3',
 'CCN1C(=CN=N1)CN2CCCC(C2)(C)F',
 'CC1=NN=NN1CC2CCC(CC2)(F)F',
 'C1CC1C2=NN(N=N2)CC3CC(C3)(F)F',
 'CC1=NN=NN1CC2=CC=C(C=C2)C(F)F',
 'C1CC(CCC1CN2C=CN=N2)(F)F',
 'C1CC(CCC1C(=O)CN2C=CN=N2)(F)F',
 'CC1=NN=NN1CC2CCCC2(F)F',
 'C1=CC(=NC=C1F)CSC2=NNN=C2',
 'CCOC1CC(C1)CCNC(=O)CN2C=CN=N2',
 'C1=CC(=C(C=C1CN2C=CN=N2)F)C',
 'C1=CC(=C(C=C1CN2C=CN=N2)F)F',
 'CC1=NN=NN1CC2=CC(=CC=C2)C(F)F',
 'CC1=NN=NN1CC2=CC(=CC(=C2)F)F',
 'CC1=NN=NN1CC',
 'CC1=NN=NN1CC2=CC=C(C=C2)F',
 'CN1C(=NN=N1)SCC(F)(F)F',
 'CC1=CC(=C(C=C1)F)OCC2=CN=NN2C',
 'CC1=C(C=C(C=C1)F)CN2C=CN=N2',
 'C1=C(C=NC=C1F)CSC2=NNN=C2',
 'CCN1C(=CN=N1)C(=O)NC2CCC(C2)F',
 'CC(C)(C',
 'C1C(CN1C2=NC=C(C=C2)C',
 'C1CC(C1)(C',
 'C1C(CN1C2=C(C(=CC=C2)F)C',
 'C1C(CN1C2=NC=CN=C2C',
 'C1CCC(C1)(C',
 'CC(C)(C)N1C=C(N=N1)N(C)C',
 'CC(C)N1C2=C(C=CC(=C2)I)N=N1',
 'CCCCCCC(C)(CCCC)C1=CN(N=N1)C',
 'CCCNCN=[N+]=[N-]',
 'CCC(C)(C)C1=CN(N=N1)CC2CC2',
 'CCCCCCC(C)(CCCC)C1=NN(N=C1)C',
 'CCCCCCC(C)(CCCC)C1=CN(N=N1)CC',
 'CC1=C(N(N=N1)C)C2CC2',
 'CC(C)(C)CC1=CN(N=N1)C(C)(C)SC',
 'CC1=C(N=NN1C(C)(C)SS)C(C)(C)C',
 'CC1=C(N(N=N1)C(C)(C)C)F',
 'C(=O)C1=NC(=NN=N1)Cl',
 'CC(CN)CN=[N+]=[N-]',
 'C(CN=N)CN=[N+]=[N-]',
 'CC1CN([C@@H]1N=[N+]=[N-])N=N',
 'CCN1C(=C(CN1)C(=O)O)C2=NNN=C2',
 'CCCCOC(=O)C1CC1C2=CN(N=N2)CC',
 'C1CC1N2C=C(N=N2)C(=O)N3CCNCC3',
 'C1COC[C@@H](C1N=[N+]=[N-])O',
 'C1=CC2=CC(=CN=C2C=C1C',
 'C1[C@@H](C(CO1)N=[N+]=[N-])O',
 'CCC(C',
 'C1CC1C(C(=O)C2=NNN=C2)C(=O)O',
 'CN1C=C(N=N1)C2CC2C(=O)O',
 'CCCCN1N=C(N=N1)C2=CN=CC(=C2)C',
 'C[C@@H](C(=O)N[C@H]1CCC2=C(C',
 'CCC(C(C)C)N1C=NN=N1',
 'CC1=CN=C(C=C1)CCCN2C=CN=N2',
 'CN1C2=C(C',
 'CC1=C(N=NN1C2=C(C=CC(=C2)Cl)C',
 'CCOC(=O)C(CN=[N+]=[N-])NI',
 'CC(C)C1=CC2=NNN=C2C=N1',
 'CC(C)C1=NC2=NNN=C2C=C1',
 'CC(C)C1CCC2=NNN=C2C1',
 'CCC(C)(C)C1=CC2=NNN=C2N=C1',
 'CCCCCCCC(C)(CCCC)C1=NNN=C1',
 'CCOC(=C)COCCOC(C)N=[N+]=[N-]',
 'CC1=CN(N=N1)CCNC',
 'C1=CC(=C(C=C1F)F)C2=NNN=C2CO',
 'CC1=NN=NN1C/C=C/C(=O)NC',
 'CC(CN1C=CN=N1)SC',
 'C[C@@H](C(=O)C1=NNN=N1)N',
 'C[C@@H](C(=O)C1=NNN=N1)NCl',
 'C1=NNN=C1C(=O)C(N)N',
 'CC(C)(C)CN1C=C(N=N1)C(=O)N',
 'CC1(CCC1)N=[N+]=[N-]',
 'CC(C)C(=O)C1=NN=NN1COC',
 'CC(C)C(C1=NN=NN1COC)O',
 'CC(C)CC1=CN(N=N1)C(C)(C)C',
 'CCCC(C)(C1=NN=NN1Cl)I',
 'CC(C)(C1=NN=NN1Cl)I',
 'C1=C(N=CN1CN=[N+]=[N-])N',
 'CC(CCC(C)N1N=CC=N1)N2C=NN=C2',
 'CC(=O)OCC1=CC2=NNN=C2C=C1',
 'CCCOCCOC(=O)C1=CC2=NNN=C2C=C1',
 'C1[C@H](CON1C(=O)O)N=[N+]=N',
 'CC1=C(C=CC2=NNN=C12)C=C',
 'C1COC(CN1C(=O)O)CCCN=[N+]=N',
 'CC1=NN(N=N1)CC2=C(C=CC(=C2)C',
 'CC1=NN(N=N1)CC2=C(C=CC(=C2)C',
 'CC1=NN(N=N1)CC2=C(C=CC(=C2)C',
 'C1=NN(N=C1)C2=NC=C(S2)CNCl',
 'C1=CN(N=N1)C2=NC=C(S2)CN',
 'CC1CCC2=C(C(CC1)(F)F)N(N=N2)C',
 'CC(=O)C1(CCCCCC2=C1N(N=N2)C)F',
 'CCC1CCCCC2=C(C1(F)F)N(N=N2)C',
 'CCC(=O)C1(CCCCCC2=C1N=NN2C)F',
 'C1=CC(=CC=C1/C=N/O)N2N=CC=N2',
 'C1=CC(=CC(=C1)N2N=CC=N2)CNCl',
 'C1=CC(=CC(=C1)CN)C2=NNN=C2',
 'C1=CC(=CC=C1C',
 'C1=CC(=CC=C1COC2=CC=C(C=C2)C',
 'C1=CC(=CC=C1COC2=CC=C(C=C2)C',
 'C(C(CO)(CO)O)N=[N+]=[N-]',
 'CSSN1C=C(N=N1)C2CC2',
 'CN1C=C(N=N1)CC(O)O',
 'CC(CO)C(=O)OCN=[N+]=[N-]',
 'C(C(C(=O)OCN=[N+]=[N-])O)O',
 'C(CN=[N+]=[N-])NCCO',
 'CN(CO)C(=O)CN1C=NN=N1',
 'CN(C(=O)CN1C=NN=N1)OC',
 'CN1C[C@@H]2C(C1)C2C3=NN=NN3C',
 'CCC1CCC2=C(C(CC1)(F)F)N=NN2CC',
 'CCC1CCC2=C(C(CC1)(F)F)N=NN2C',
 'CCC1CCCCC2=C(C1(F)F)N=NN2C',
 'CC(=O)C1(CCCCCC2=C1N=NN2C)F',
 'C1(=NN(N=N1)F)OC(=C(F)F)F',
 'C1CNC(=O)C(N1)CN=[N+]=[N-]',
 'C1CC2=NNN=C2CCC3=NNN=C31',
 'C1CCC2=NNN=C2CCCC3=NNN=C3C1',
 'CCCCCCC(CCCCCC)C1=NNN=N1',
 'CC(C)(CN1N=C(N=N1)I)O',
 'CCCCCCC(CCCCCC)C1=NNN=C1',
 'CCCCC1=CN(N=N1)CI',
 'CCCCC1=CN(N=N1)CCI',
 'CC(C)CCCC1=CN=NN1CCI',
 'CC(C)CCCC1=CN(N=N1)CCI',
 'CCN1C(=CN=N1)CCCCI',
 'CCCCCCN1C(=CN=N1)CCCS',
 'CCCCCCC1=CN=NN1CCCI',
 'C1=C(N(N=N1)CCS)CCCCN',
 'CCCCCCC1=CN=NN1CS',
 'CCCCCCN1C(=CN=N1)CI',
 'CCCCCN1C=C(N=N1)CCCCS',
 'CCCCCCCC1=CN=NN1CCCI',
 'CN1C(=NN=N1)P',
 'CCC1=NN(N=N1)CCN=C(N)N',
 'CCN1C=C(N=N1)CC(CN)O',
 'CCN1C=C(N=N1)C(=O)NCCCN',
 'C(CCN=[N+]=[N-])CCS(=O)(=O)N',
 'CC1=NN(N=C1I)C',
 'C=CCCCC(N)N=[N+]=[N-]',
 'CCCCCC(CC)NC1=NN=NC=C1',
 'C1=C(SC(=N1)C',
 'CCCCN(C(C)C(=O)O)N=[N+]=[N-]',
 'C/C=C(\\C)/CNCC1=CN(N=N1)CCCO',
 'C(CCC(=O)N)CCN=[N+]=[N-]',
 'CC1=CN(N=N1)C(C)(C)SS',
 'CCC(C)(C)N1C=C(N=N1)C2CC2',
 'CC(CN=[N+]=[N-])(C(=O)ON)N',
 'C(CSN=[N+]=[N-])N=[N+]=[N-]',
 'CC(CCN=[N+]=[N-])CN=[N+]=[N-]',
 'C1=CN=C(N=C1Cl)NCCN=[N+]=[N-]',
 'CC(=S=CO)N=[N+]=[N-]',
 'CN1N=C(N=N1)P',
 'C=C/C=C\\C1=NN=NC=C1N',
 'C1C2=CN=NN=C2C=CN1',
 'C1=CSC(=C1F)CN=[N+]=[N-]',
 'C1=C(SC(=C1F)CN=[N+]=[N-])Br',
 'CC(C)C1=NN=NN1C(=C)C=C',
 'CCOC(=O)[C@H](CCN1CC(C1)C',
 'C[C@H](C1=NN=NN1C)C(C)C',
 'CC(C)N1C=C(N=N1)I',
 'CCCN1C=C(N=N1)C2=CSC=C2',
 'CCCCCCC(C)CN1C=C(N=N1)CCCC',
 'CCCCCCC(C)CN1C=C(N=N1)CO',
 'CC(C)CN1C=C(N=N1)C2=CSC=C2',
 'CCCCCCC(C)CN1C=C(N=N1)CNC',
 'CCC(C)(C)CN1C=C(N=N1)CCCCCl',
 'CC(C)CN1C=C(N=N1)C2=CC=CC=N2',
 'CCCN1C=C(N=N1)CCCCCl',
 'CC(C)CN1C=C(N=N1)CCCCCl',
 'CC(C)CN1C=C(N=N1)C2=CN=CC=C2',
 'CCCCCCC(C)CN1C=C(N=N1)CC(C)C',
 'CC1=NN(N=C1Br)CC(=O)OC(C)(C)C',
 'CC1=NN(N=C1)CC(=O)OC(C)(C)C',
 'CC1=NN(N=C1)CC(=O)OC',
 'CC(C)(C)N1C=C(N=N1)COC',
 'CC(C)(C)N1C=C(N=N1)C(F)(F)F',
 'CC(C)(C)N1C=C(N=N1)C2CC2',
 'CCC1=C(N(N=N1)CC(=O)OCC)I',
 'CCC1=C(N(N=N1)CC(=O)OCC)C',
 'CCC1=C(N(N=N1)CC(=O)O)C',
 'CCOC(=O)CC1=NN=NN1CC2CCCCC2',
 'CCOC(=O)CC1=NN(N=N1)CC2CCCCC2',
 'CCCN1N=C(N=N1)CC(=O)OCC',
 'C(CO)COCCN=[N+]=[N-]',
 'C1=CC(=C(C=C1C2=CN=C(C=C2)C',
 'C1=CC(=C(C=C1C',
 'C1=CC(=C(C=C1N2C=C(C=N2)C',
 'C1=CC(=C(C=C1C2=CNC(=O)C=C2)C',
 'C1=CC(=C(C=C1C2=CN=C(C=C2)N)C',
 'C1=CC(=C(C=C1C2=CN=C(C=C2)F)C',
 'CC1=CC(=NC2=NNN=C12)N',
 'CCCCCCCN1C(=CN=N1)CCCS',
 'CCCCOC1=CC(=NC2=NNN=C12)N',
 'CCCC1=CN(N=N1)CCCCI',
 'CCCCCCN1C=C(N=N1)CCS',
 'C1=C(N(N=N1)CCS)CCCCCI',
 'C1=C(N=NN1CI)CCCCCI',
 'CC(C)CN1C=C(N=N1)CCCCI',
 'C1=C(N=NN1CI)CCCI',
 'C1=C(N=NN1CCCI)CCCN',
 'CCCC1=CN(N=N1)CC(C)C',
 'CC(C)CN1C(=CN=N1)CCCCI',
 'CCCCN1C=C(N=N1)CI',
 'C1=C(N(N=N1)CCS)CCCCS',
 'CCCCCCC1=CN=NN1CI',
 'CCC1=CC(=NC2=NNN=C12)N',
 'C1CC1(C',
 'C1CC1(C',
 'C1CC1(C',
 'C1CC1(C',
 'CC1CCC1CNC(=O)CCCN=[N+]=[N-]',
 'CC(C)C1=CN=NN1N',
 'CC(C)C1=NN=NN1N',
 'CCC1=C(C(=NN=N1)C(C)C)N',
 'C/C(=C\\C=C(/C=C)\\C',
 'CC1=CC2=C(C=C1)N=NN=N2',
 'CCN1N=CC(=N1)C2=CC=C(C=C2)C',
 'C1CC(C1)(CN=[N+]=[N-])N=O',
 'CN1C2=C(C=C(C=C2N=N1)C',
 'C1=CC(=C(C=C1O)O)CN=[N+]=[N-]',
 'CC1=NN(N=C1Br)CC(=O)O',
 'CC1=NN(N=C1)CC(=O)O',
 'CS(=O)(=O)OCC1=CN(N=N1)CI',
 'C(C1=C(N(N=N1)CC(=O)O)CCl)Cl',
 'CSCCC(C(=O)O)N1C=C(N=N1)CN',
 'CCOC(=O)CC1=NN(N=N1)CC',
 'C(CC1=C(N=NN1CC(=O)O)CN)CN',
 'CCOC(=O)C(=O)C1=NNN=N1',
 'CC(C)(C)OC(=O)NCC',
 'CCOC(=O)CC1=NN=NN1CC',
 'CCCCON=[N+]=[N-]',
 'CCN1N=CC(=N1)C2=CC=C(C=C2)N=O',
 'CCN1N=C(N=N1)C(I)I',
 'CCC(C)(C)C1=CC=CC=C1C2=NNN=N2',
 'C1CC(C1)C',
 'CCCCCCC(CCCCCC)CC1=NNN=N1',
 'CCCC(C)(C)C1=CN(N=N1)CC',
 'C1=C(N=NN1CCCC=O)CCC(=O)N',
 'CCC(=O)CC(CN=[N+]=[N-])O',
 'CCCC(=O)N=[N+]=N',
 'C1=CC(=CC=C1COCCN)C',
 'CC1CC2(CCCN2C1)CN=[N+]=[N-]',
 'C1=C(N=NN=N1)N=[N+]=[N-]',
 'C1CCC(CC1)OCCCCC2=NNN=C2',
 'C1=C(C=C(C=C1Br)C',
 'C1=CC(=C(C=C1Br)C',
 'COC1=C(C(=CC=C1)F)N=[N+]=[N-]',
 'C1CC1OC2=CC(=C(C=C2)C',
 'COC(=O)NC1=NC2=NNN=C2C(=C1)Cl',
 'C1=CC=C(C(=C1)C',
 'C1=CC(=C(C=C1C',
 'C1=CC(=C(C=C1C',
 'C1=CC(=C(C=C1C',
 'C/C=N\\N1C(=CN=N1)N=C',
 'CC(C)(C)OCC(CNC(=O)O)N=[N+]=N',
 'CC(C)(C1=NN=NN1C)I',
 'CC[C@H](CN1C2=CC=CC=C2N=N1)I',
 'C=C(CN=[N+]=[N-])N1CCC(CC1)CN',
 'C(CN)COCN=[N+]=[N-]',
 'CC(C)(C)CN1C=C(N=N1)C2CC2',
 'C1=CC=NC(=C1)C2=CN(N=N2)CCI',
 'C',
 'CC1=NC(=CS1)CN2C=NN=N2',
 'CPN1C2=CC=CC=C2N=N1',
 'C(CNC(=O)COCI)CN=[N+]=[N-]',
 'CCCONCCCN=[N+]=[N-]',
 'CC(C)CCONCCCN=[N+]=[N-]',
 'C1CC(N(C1)N2C=CN=N2)C(=O)N',
 'CC(C)(C)OC(=O)CN1C2=C(C(=N1)C',
 'CCCCCCCC(CCCCCC)C1=NNN=N1',
 'CCC(C)CC(C)N1C=CN=N1',
 'C1CCC(CC1)N2C=C(N=N2)I',
 'CCCCC/C=C(/C',
 'CC(C)C1=C(C=CC(=C1)C',
 'C(CCN=[N+]=[N-])CCSN',
 'C1=CC2=NNN=C2C=C1NN',
 'C1=CC=C(C=C1)CN2C(=CN=N2)OO',
 'C1=CC(=C(C=C1F)Br)N=[N+]=N',
 'CCNC(=O)C1=C(C(=CC(=C1)C',
 'CC(C)C1CCC(CC1)NCCCC2=NNN=C2',
 'CC1=C(C=CC(=C1C',
 'COC(=O)CC1=CC=CC(=C1)C',
 'C(CCSCC(=O)O)CN=[N+]=[N-]',
 'CCN1C2=NC=CC(=C2N=N1)N',
 'CC(C)N(C(C)C)P(OCCC',
 'CC(C)N1C2=C(C(=NC=C2)N)N=N1',
 'CC(C)N1C=NC2=C1N=NN=C2N',
 'CCS(=S)N=[N+]=[N-]',
 'CC(C(=O)OCN=[N+]=[N-])NC',
 'CC1=CC(=CC(=C1N)C)C2=NNN=N2',
 'CC(C)(CN1C(=NN=N1)N)O',
 'CCCCCNC(=O)CCC1=CN(N=N1)CCF',
 'CC(CCOCN=[N+]=[N-])F',
 'CCCCNC(=O)CCC1=CN(N=N1)CCF',
 'CN1N=C(C(=N1)C(F)(F)F)CS(=O)O',
 'C1CCCCC(CCCC1)N=[N+]=[N-]',
 'CC(C)(C)CN1N=CN=N1',
 'CC(C)N1C2=C(C(=O)CC=N2)N=N1',
 'CC(C)N1C2=C(C(=CC=C2)N)N=N1',
 'C(C(=O)NN)NC(=O)OCN=[N+]=[N-]',
 'CC(=O)CNC(=O)OCN=[N+]=[N-]',
 'CCC(C)C1=CC=C(C=C1)N2C=NN=N2',
 'CC(C1=CC=C(C=C1)C2=NNN=N2)S',
 'CC1=C(C=CC(=C1C',
 'CN(C)N1N=CC=N1',
 'C',
 'CC',
 'C(COCCOF)N=[N+]=[N-]',
 'CC1=CN=C(C(=N1)C2=NNN=C2C)N',
 'CC(C)(C)C1=NN(N=C1)CC(C)(F)F',
 'C1=CC=C(C(=C1)C(C',
 'CC(C)N1C=C(N=N1)CNC2=CC=CC=C2',
 'CC1=CC=C(C2=NNN=C12)I',
 'CCC(N=[N+]=[N-])F',
 'CCCC(N=[N+]=[N-])F',
 'CCCCCCCC1=CN=NN1CC(C)C',
 'CCCC1=CN=NN1CCCCCCC2CS2',
 'CC(C)CCCCCC1=CN(N=N1)CSC',
 'C[C@@H](C(=O)NC(C(C)OCC',
 'CCCCCC1=CN(N=N1)CCCCCC(C)C',
 'CC(C)CCCN1C(=CN=N1)CCCNN',
 'CCCCCCN1C=C(N=N1)CCCCC(C)C',
 'CCCCN1C=C(N=N1)CCCCCCC(C)C',
 'CCN1C=C(N=N1)CCCCC=C(C)C',
 'CNC1=CC=CC2=C1N=NN2C',
 'CC(C)CCCN1C=C(N=N1)C(C)C(C)C',
 'CCCC1=CN=NN1CCCCSS',
 'CC(C)CCCC1=CN=NN1CCCC=C(C)C',
 'CC(C)CCCC1=CN=NN1CCCCS',
 'COC1=CC(=C(C=C1)F)N=[N+]=[N-]',
 'CCCCCCC1=CN=NN1CCCCCC(C)C',
 'CC(C)CCCC1=CN=NN1CCCCCCSC',
 'CCCCCCC1=CN(N=N1)CCCCC(C)C',
 'CCCCCCC1=C(N=NN1CC)CN',
 'CCCCCCC1=C(N(N=N1)CC)CN',
 'CCCCN1C=C(N=N1)CCCCC(C)C',
 'CCCCCCCN1C(=CN=N1)CCC(C)SC',
 'CC(C)CCCC1=CN(N=N1)CCC2CN2C',
 'C=C(N)N1N=CN=N1',
 'CSC1=CN=C2C(=C1)CN3C2=NN=N3',
 'CC(C)SC1=NC(=CS1)C2=NNN=N2',
 'CC1=C2C3=C(C=C(C=C3)C',
 'CC(C)N(C(C)C)P(OCCC',
 'C1=CC=C(C(=C1)C',
 'CC(C)(C)SC1=NC(=CS1)C2=NNN=N2',
 'CC1=NN(N=C1)CC(F)(F)F',
 'CCC1=C(N=C(S1)C2=NNN=C2)C',
 'CC1=C(SC(=N1)C2=NNN=C2)C(C)C',
 'CC1=CC(=NN=C1C=O)N2C=NN=N2',
 'C1=CC(=CC=C1C(CO)O)N2C=NN=N2',
 'CCCC=CCCN=[N+]=[N-]',
 'CCN1C=C(N=N1)CCC(C)C',
 'C1=CC=C2C(=C1)N=NN2SCCCCP=O',
 'CC1=CN=C(C(=C1)C2=NNN=C2)N',
 'CC(C)(C)C1=NN(N=C1)CC(F)(F)F',
 'CC1=CC(=CC(=C1C)C)C2=NNN=N2',
 'CC1=NN=NN1CC(C)(C)O',
 'C',
 'CC(C)[C@H](N=[N+]=[N-])OC=O',
 'CCCCC1=CN(N=N1)CCCCN',
 'CC[C@H](C)C(N=[N+]=[N-])OC=O',
 'CC1=CN(N=N1)NI',
 'CC(=O)NC1=CC(=NC2=NNN=C12)Cl',
 'C',
 'CN1N=C(N=N1)C2=CC=C(C=C2)C',
 'CCN1C(=CN=N1)[C@H](C)O',
 'CCN1C(=CN=N1)[C@@H](C)O',
 'CCCCCOCN=[N+]=[N-]',
 'COCC1=CN(N=N1)CCOCCOCCO',
 'CC1=CC(=C(C=C1)N2C=C(N=N2)C',
 'CC1=C(C(=C(C=C1)N2C=C(N=N2)C',
 'CC(N=[N+]=[N-])P=P',
 'C1=CC(=CC=C1N=[N+]=[N-])OCCCO',
 'CC',
 'C1=CC(=CC=C1NCN=[N+]=[N-])N=O',
 'C(NC(=O)I)N=[N+]=[N-]',
 'C(C',
 'C1C2=CC(=CN=C2C3=NN=NN31)Br',
 'C1C2=C(C3=NN=NN31)N=CC=C2',
 'C=CC1=CC2=C(C3=NN=NN3C2)N=C1',
 'C1CCO[C@@H](C1)CCN=[N+]=[N-]',
 'C=CC1=CN=C(C=C1)N2C=CN=N2',
 'CC(CCCCO)N=[N+]=[N-]',
 'C',
 'CC(=S)N=[N+]=[N-]',
 'CCC1=CC2=C(N=NN2C=C1)C(=O)O',
 'CC1=C(C2=C(N=NN2C=C1)C(=O)O)C',
 'CC1=CC2=C(N=NN2C=C1)N',
 'C',
 'CCN(CCCCNCC1=NNN=C1)C(=O)O',
 'C1CC2=NNN=C2CC1C=O',
 'CCC(C)N1C(=C(N=N1)C(C)C)C',
 'C[C@@H](CCCN=[N+]=[N-])NC',
 'CCCC(C)C1=NNN=C1C(C)(C)C',
 'C',
 'COC1=CN2NC3=NC=CN=C3[N+]2=C1',
 'CCOC(=O)C(CC',
 'CCOC(=O)C(CC',
 'CN1C2=NC=CN=C2[N+]3=CC=NN13',
 'C1=CN2NC3=NC=NC=C3[N+]2=C1',
 'COC1=CC=[N+]2N1NC3=NC=CN=C32',
 'C1=CC2=C(N=C1)[N+]3=CC=CN3N2',
 'COC1=CC=C(C=C1)C',
 'CCOC(=O)C(CC',
 'CC(C)(C)OC(=O)NCC',
 'C',
 'C',
 'CC(C)C1=CC=C(C=C1)SC2=NN=NN2C',
 'CC1=C(C2=NNN=C2C=C1)C(C)(C)C',
 'CC1CNCC(N1)CN=[N+]=[N-]',
 'C1C=CC(N1)CN=[N+]=[N-]',
 'CCC1=NN=NN1CCN(C)C',
 'COC(=O)C1=CN(N=N1)CC',
 'CC(C)N1C=C(N=N1)CCOI',
 'CCC(C)(CC)N1C=C(N=N1)CCF',
 'CCCCCCN1C=C(N=N1)CCF',
 'C',
 'CCCCCCCC(CCCC)CCCN1C=CN=N1',
 'C',
 'C',
 'CCCCCC1=CC2=NNN=C2N=C1C(=O)N',
 'CCCC(C)ON1C2=C(C=CC=N2)N=N1',
 'CCN1CC(CC1C2=NNN=N2)(F)F',
 'CC(C)(CC1=NNN=C1)N',
 'CCN1C=C(N=N1)C(=O)N',
 'CC(=O)N1CCC[C@H]1CN=[N+]=[N-]',
 'C1CCC2=NN=NC=C2C1',
 'COC(=O)CCOCCOCCOCCN=[N+]=[N-]',
 'C(COCCOCCOCCOCCON)N=[N+]=[N-]',
 'C1C[C@@H](CNC1)N2C=C(N=N2)CN',
 'C1C[C@H](CNC1)N2C=C(N=N2)CN',
 'COCC1=CN(N=N1)[C@H]2CCNC2',
 'COCC1=CN(N=N1)[C@@H]2CCNC2',
 'CN1C2=C(C=C(C=C2)C(=O)NN)N=N1',
 'C',
 'C1=NN(N=N1)CC2=NN(N=N2)CC',
 'COC1=CC=C(C=C1)CN2N=C(N=N2)N',
 'C',
 'C1=CC(=CNNC2=NNN=N2)N=C1',
 'C',
 'C',
 'C1=CC=C(C(=C1)C',
 'C1=CC=C(C(=C1)C',
 'C1=CC=C(C(=C1)C',
 'C1=CC=C(C(=C1)C',
 'C1=CC=C(C(=C1)C',
 'C1=CC=C(C(=C1)C',
 'C1=CC(=C(C=C1Cl)N2C=NN=N2)Cl',
 'CC[C@H](C(OC)OC)SC1=NNN=C1',
 'CC[C@@H](C(OC)OC)SC1=NNN=C1',
 'C[C@@H](C',
 'C[C@@H](C',
 'C[C@H](C',
 'C[C@H](C',
 'C1=CC(=NC=C1C2=NNN=N2)OCC(F)F',
 'C1CSC[C@@H]1N=[N+]=[N-]',
 'C1CSC[C@H]1N=[N+]=[N-]',
 'CCOCN1C(=C(N=N1)C(=O)O)N',
 'C[C@@H](CN=[N+]=[N-])C1OCCO1',
 'C[C@H](CN=[N+]=[N-])C1OCCO1',
 'CC[C@H](CN=[N+]=[N-])C1OCCO1',
 'CC[C@@H](CN=[N+]=[N-])C1OCCO1',
 'CC[C@H](CC1OCCO1)N=[N+]=[N-]',
 'CC[C@@H](CC1OCCO1)N=[N+]=[N-]',
 'C[C@@H](CC1OCCO1)N=[N+]=[N-]',
 'C[C@H](CC1OCCO1)N=[N+]=[N-]',
 'CN(C)CC1=CN(N=N1)[C@H]2CCNC2',
 'CN(C)CC1=CN(N=N1)[C@@H]2CCNC2',
 'CNCC1=CN(N=N1)[C@H]2CCNC2',
 'CNCC1=CN(N=N1)[C@@H]2CCNC2',
 'CCNCC1=CN(N=N1)[C@H]2CCNC2',
 'CCNCC1=CN(N=N1)[C@@H]2CCNC2',
 'C(C',
 'CN(CCCC',
 'CCCCN(CCCC',
 'C1=CC=C(C=C1)CN(CCCC',
 'C=CCN(CCCC',
 'CCN(CC)N1N=C2C=CC=CC2=N1',
 'CC1=CC2=NN(N=C2C=C1)N(C)C',
 'CC1=NSC(=C1C(=O)O)N2N=CC=N2',
 'C1=NN(N=C1)C2=CSN=C2C(=O)O',
 'CN1C=C(C(=N1)N2N=CC=N2)C(=O)O',
 'COC1=C(C=CC(=C1)O)N2C=NN=N2',
 'C1CNCCC1C2=NN=NN2CC(F)(F)F',
 'CCCCCC1=CC=CC2=CN=NN21',
 'CCN(CC)N1N=C2C=CC(=CC2=N1)Cl',
 'CN(C)N1N=C2C=CC(=CC2=N1)Cl',
 'CCCCC1=CN=NN1C2=CC=C(C=C2)I',
 'C[Si](C)(C)CC1=NNN=C1',
 'C1CCN2C(=CN=N2)C3=CC=CC=C3C1',
 'CC(=O)C1=C(N(N=N1)CCN2CCCC2)I',
 'CCC[C@@](C)(C1=NNN=C1C=O)O',
 'CCC[C@](C)(C1=NNN=C1C=O)O',
 'CC[C@@](C)(C1=NNN=C1C=O)O',
 'CC[C@](C)(C1=NNN=C1C=O)O',
 'C1=CC(=CC=C1C',
 'C1COCCN1N2N=C3C=CC=CC3=N2',
 'CC1=CC=CC=C1C2=CN=NN2CCC=O',
 'CC(C)(C)[Si](C)(C)OCC1=NNN=N1',
 'C[Si](C)(C)CCOCN1N=CC(=N1)C=O',
 'CNCC1=NN(N=C1)CC2=CC=CC=C2',
 'CN(C)C(=O)CN1N=C2C=CC=CC2=N1',
 'CCCCC1=CN=NN1C2=CN=C(C=C2)Br',
 'C[Si](C)(C)C1=C[N+]2=CC=CN2N1',
 'C1CCN(C1)N2N=C3C=CC=CC3=N2',
 'C=CC1=C2C3=CC=CC=C3CCN2N=N1',
 'COC1=CC=CC=C1N2C(=CN=N2)C=C',
 'C[Si](C)(C)CCOCN1C=C(N=N1)CO',
 'C[Si](C)(C)CCOCN1C=C(N=N1)CCN',
 'CC(C)(C)N1C(=CN=N1)C=O',
 'C[Si](C)(C)CCOCN1C=C(N=N1)CC',
 'CC(C)(C)[Si](C)(C)OCC1=NNN=C1',
 'C1=CN(N=N1)CCCCCN2C=CN=N2',
 'C1=CSC=C1C2=CN(N=N2)C3=CSC=C3',
 'C',
 'COCC(=C)ON1C2=CC=CC=C2N=N1',
 'CCCCC(=O)CC1=CC=CC2=NNN=C21',
 'COCC(=O)CC1=CC=CC2=NNN=C21',
 'CCCCCCN1C=C(N=N1)C(=O)CC',
 'C1=CC=C(C=C1)N2C=C(N=N2)C=C(C',
 'COC(=O)C1=NNN=C1C2CC2',
 'C1CC(=CC[C@@H]1C',
 'C1CC(=CC[C@H]1C',
 'C1C[C@H](CNC1)N2C(=CN=N2)I',
 'C1C[C@@H](CNC1)N2C(=CN=N2)I',
 'C1CC1C2=C3CCS(=O)(=O)CCN3N=N2',
 'CCN1C(=C(N=N1)Br)S(=O)(=O)Cl',
 'CCN1C(=C(N=N1)Br)C=O',
 'CCN1C=C(N=N1)C[C@H](C)O',
 'CCN1C(=C(N=N1)Br)CN',
 'CCN1C=C(N=N1)C[C@H](C)N',
 'C1CC2=C(C[C@H]1N)N(N=N2)C3CC3',
 'C1=CSC2=C1C(=CN3C2=NN=N3)C',
 'CN=CC1=CN(N=N1)C[C@H]2CCOC2',
 'CN=CC1=CN(N=N1)C[C@@H]2CCOC2',
 'C1=NN(N=C1Br)CCO',
 'CCC1=C2CCS(=O)(=O)CCN2N=N1',
 'CCN1C(=C(N=N1)Br)C(=O)O',
 'CC(C)(C)N1N=CC(=N1)Br',
 'C1CC(C1)N2N=CC(=N2)Br',
 'C1=NN(N=C1Br)CCC(=O)O',
 'CCN1C(=C(N=N1)Br)CO',
 'CC(C)C1=C2CCS(=O)(=O)CCN2N=N1',
 'CCCCN1N=CC(=N1)Br',
 'CC(C)CN1N=CC(=N1)Br',
 'CC(=O)OCCN1N=CC(=N1)Br',
 'CC1=C2CCS(=O)(=O)CCN2N=N1',
 'CCCN=CC1=CN(N=N1)C[C@H]2CCOC2',
 'CCCCNCC1=CN(N=N1)C[C@H]2CCOC2',
 'C1CC1CN2N=CC(=N2)Br',
 'CCOCCN1N=CC(=N1)Br',
 'COCCN1N=CC(=N1)Br',
 'C1=CC(=CC=C1C',
 'CCN=CC1=CN(N=N1)C[C@H]2CCOC2',
 'CCN=CC1=CN(N=N1)C[C@@H]2CCOC2',
 'C1=NN(N=C1C(=O)O)CC(=O)O',
 'C[C@@H](CC1=CN(N=N1)C(C)C)O',
 'C[C@H](CC1=CN(N=N1)C(C)C)O',
 'CC[C@H](C(=O)O)N1N=CC(=N1)Br',
 'CC[C@@H](C(=O)O)N1N=CC(=N1)Br',
 'CC1(C[C@H](OC1)CN=[N+]=[N-])C',
 'CC(C)CCN1N=CC(=N1)Br',
 'CCC(CC)N1N=CC(=N1)Br',
 'C1=NN(N=C1S(=O)(=O)Cl)CC(=O)O',
 'C1CC12C[C@H](OC2)CN=[N+]=[N-]',
 'CS(=O)(=O)CC1=NNN=C1',
 'COC[C@@H](CC1=NNN=C1)O',
 'COC[C@H](CC1=NNN=C1)O',
 'CC1=CC(=CC(=C1OC)N2C=NN=N2)Cl',
 'C1=C(ON=C1CN=[N+]=[N-])C(F)F',
 'CC1=NN(N=C1C(=O)O)C2=CN=CC=C2',
 'C(C(=O)O)N1N=C(C(=N1)Br)Br',
 'C1COC[C@H]1CCNC(=O)CN2N=CC=N2',
 'C1=CN(C=C1)CCCC(=O)NC2=NNN=N2',
 'C1=NNC(=N1)NC(=O)CCCN2C=NN=N2',
 'C1=C(N=NN1CCCCCCCCCC(=O)O)O',
 'CC(C)C1CCC(CC1)C2=NNN=C2',
 'C[C@H]1CCCC[C@@H]1C2=NNN=C2',
 'C[C@@H]1CCCC[C@@H]1C2=NNN=C2',
 'C1=CC(=C(C=C1C',
 'C[C@H]1CCCC[C@H]1C2=NNN=C2',
 'C[C@@H]1CCCC[C@H]1C2=NNN=C2',
 'CC1=CN(N=N1)CCCCCCCCCC(=O)O',
 'C[C@H]1CCC[C@@H](C1)C2=NNN=C2',
 'C[C@H]1CCC[C@H](C1)C2=NNN=C2',
 'C[C@@H]1CCC[C@H](C1)C2=NNN=C2',
 'CC[C@]1(CCNC1=O)CN=[N+]=[N-]',
 'CC[C@@]1(CCNC1=O)CN=[N+]=[N-]',
 'CC1(CCC[C@@H](C1)C2=NNN=C2)C',
 'CC1(CCC[C@H](C1)C2=NNN=C2)C',
 'C1[C@H]2[C@H]1CC(C2)C3=NNN=C3',
 'C1CC2(CCC1C3=NNN=C3)OCCO2',
 'CC1(CCC(CC1)C2=NNN=C2)C',
 'CCC[C@]1(CCNC1=O)CN=[N+]=[N-]',
 'C(CI)OCC1=NNN=C1Br',
 'CC1=NNN=C1COCCI',
 'CN1C(=CN=N1)C[C@@H](C(=O)O)O',
 'CN1C(=CN=N1)C[C@H](C(=O)O)O',
 'C[C@@]1(CCNC1=O)CN=[N+]=[N-]',
 'C[C@]1(CCNC1=O)CN=[N+]=[N-]',
 'CC1=C(NN=C1)CN=[N+]=[N-]',
 'CCC1=NSC(=N1)NCC2=C(N=NN2CC)C',
 'CCCCN1C(=NN=N1)CNC(=O)C=C',
 'C',
 'C',
 'CN1C=C(N=N1)C2=CC(=CC=C2)CCOC',
 'C',
 'CN(C1CC1)C2=NNN=N2',
 'CC1=NN=NN1CC2=CC(=C(N=C2)Cl)F',
 'C1=CC(=CC=C1CC',
 'C1=CC(=CC=C1CC',
 'C1=NNN=C1SCCN2C=NC(=N2)Br',
 'C1CC1(CSC2=NNN=C2)Br',
 'CC1=NN=NN1CC2=CC=CC=C2OC3CC3',
 'CN(C)C1=NC=C(C=C1)CSC2=NNN=C2',
 'C[C@@H](C1CCOCC1)SC2=NNN=C2',
 'C[C@H](C1CCOCC1)SC2=NNN=C2',
 'C=CC(=O)N1CCC[C@H]1CN2C=CN=N2',
 'C1CCCC(CC1)CN2C(=NN=N2)CCO',
 'C1C=C(S[C@H]1Cl)C2=NNN=N2',
 'C1C=C(S[C@@H]1Cl)C2=NNN=N2',
 'C[C@H]1C(=C(NN1C2=NNN=N2)C)Br',
 'C1CCC(CC1)NC2=NN=NN2CCCCCl',
 'COC(=O)C1=NC2=NN=NN2C(=O)C1',
 'CC1=CC2=NNN=C2C3=NSNC13',
 'C1=C(C(=NC=N1)N)CN=[N+]=[N-]',
 'CC1=NC2=NN=NN2C(=O)C1',
 'COC(=O)CC1=CN(N=N1)CC2CCCCC2',
 'C[C@@H]1C(=O)N2C(=NN=N2)N=N1',
 'C[C@H]1C(=O)N2C(=NN=N2)N=N1',
 'C1=NN=NN1CC(=O)NCC=O',
 'CC1=C(CN(N1)C)C2=NNN=N2',
 'C1C(=NN=C1N2C=NN=N2)C(=O)O',
 'CC1=NC(=O)C2=C(N=NN2C1)C(=O)O',
 'C1C(=C(N=N1)N2C=NN=N2)C(=O)O',
 'C1C=CO[C@H]1/C=N\\NC2=NNN=N2',
 'C1C=CO[C@@H]1/C=N\\NC2=NNN=N2',
 'C1CCC(CC1)N2C(=NN=N2)NCCCCCl',
 'C[C@@H]1C(=C(NN1C2=NNN=N2)C)N',
 'C[C@H]1C(=C(NN1C2=NNN=N2)C)N',
 'C1C(=NC2=NN=NN2C1=O)C(=O)O',
 'C1C=C(O[C@H]1Br)C2=NNN=N2',
 'C1C=C(O[C@@H]1Br)C2=NNN=N2',
 'C[C@H]1CC=C(N1C)C2=NNN=N2',
 'C[C@@H]1CC=C(N1C)C2=NNN=N2',
 'C1CCC(CC1)NC2=NN(N=N2)CCCCCl',
 'C1C(=NC(=O)S1)CN=[N+]=[N-]',
 'CC1=NC(=O)C2=CN=NN2C1',
 'C[C@H]1CC=C(S1)CNC2=NNN=N2',
 'C[C@@H]1CC=C(S1)CNC2=NNN=N2',
 'CN1C(=CN=N1)CN(C)C2=CC=CC=C2F',
 'COC(=O)C1=NNN=C1C2CCCCC2',
 'C1=CC(=CC=C1NC(=O)CC',
 'CCC1=NN=C(S1)N2C=NN=N2',
 'CN(CCC',
 'CCN(CCN1C=CN=N1)C2=CC=CC=C2',
 'C1=CC(=NC=C1CO)CN=[N+]=[N-]',
 'CCCN1C2=C(C(=NC(=N2)C)Cl)N=N1',
 'CCCN1C2=C(C(=NC(=N2)Cl)N)N=N1',
 'CCCN1C2=C(C(=NC=N2)Cl)N=N1',
 'CCCN1C(=C(N=N1)C(=O)O)N',
 'CC(C)(CN=[N+]=[N-])N(C)C',
 'CCCN1C2=C(C(=NC(=N2)N)NC)N=N1',
 'CN1C(=NN=N1)SC(=O)N2CCOCC2',
 'C',
 'C',
 'C',
 'C',
 'CC1=CC(=NC(=C1C',
 'CC1=CC(=NC(=C1C',
 'C=CCN(CC1=CC=C(C=C1)C',
 'C=CCN(CC1=CC=C(C=C1)C',
 'C[C@H](CN(C)CC1=NN(N=C1)C)C',
 'C[C@@H](CN(C)CC1=NN(N=C1)C)C',
 'CCN(C[C@@H](C)C',
 'CCN(C[C@H](C)C',
 'C[C@H](CCC',
 'C[C@@H](CCC',
 'C[C@](C',
 'C[C@@](C',
 'CC1(CCCN([C@H]1C',
 'CC1(CCCN([C@@H]1C',
 'CCC(=O)N1CC[C@H](C1)N2N=CC=N2',
 'C1[C@H]([C@H](CO1)O)N2C=CN=N2',
 'C1C[C@](CNC1)(C2=NNN=C2)O',
 'C1C[C@@](CNC1)(C2=NNN=C2)O',
 'C1[C@H]([C@H](CO1)N2C=CN=N2)N',
 'C1CNC[C@]1(C2=NNN=C2)O',
 'C1CNC[C@@]1(C2=NNN=C2)O',
 'C1CC2=NNN=C2CO[C@@H]1CN',
 'C1C[C@@](CNC1)(CN2C=CN=N2)O',
 'C1=CN=C(C=C1N=[N+]=[N-])N',
 'C1CC2=NNN=C2CO[C@@H]1CO',
 'C1=CC(=C(C=C1C=O)C',
 'C1CC2=NNN=C2CO[C@H]1CO',
 'C1C[C@H]([C@@H]1N=[N+]=[N-])O',
 'C1C[C@](CNC1)(CN2C=CN=N2)O',
 'C1=C(NC(=O)NC1=O)CN=[N+]=[N-]',
 'C1=CC(=NN2C1=NC(=N2)N)Cl',
 'C1=NNC(=N1)CN=[N+]=[N-]',
 'C1CC2(CCC1CN=[N+]=[N-])OCCO2',
 'C1CC2=NNN=C2CO[C@H]1CN',
 'C1=CC2=NN(N=C2C=C1)CC(=O)Cl',
 'COC(=O)[C@@H]1CCC2=NNN=C2CO1',
 'COC(=O)[C@H]1CCC2=NNN=C2CO1',
 'CC(C)CCC1=CN(N=N1)CCN',
 'CN1C=C(N=N1)C[C@@H](C(=O)O)O',
 'CN1C=C(N=N1)C[C@H](C(=O)O)O',
 'C1CNC[C@H]1N2C=C(N=N2)C3CC3',
 'C1CNC[C@@H]1N2C=C(N=N2)C3CC3',
 'CCOC(=O)[C@H]1CCC2=NN(N=C12)N',
 'CCOC(=O)[C@H]1CCC2=NN(N=C12)C',
 'CC[C@H](C(=O)O)N1C=C(N=N1)CF',
 'CC[C@@H](C(=O)O)N1C=C(N=N1)CF',
 'C[C@H](C(=O)O)N1C=C(N=N1)CF',
 'C[C@@H](C(=O)O)N1C=C(N=N1)CF',
 'CC[C@H](C(=O)OC)N1C=C(N=N1)CF',
 'C[C@@H](C(=O)OC)N1C=C(N=N1)CF',
 'C[C@H](C(=O)OC)N1C=C(N=N1)CF',
 'C1=CN(N=N1)[C@H](CCN)C(F)(F)F',
 'C1=CN(N=N1)[C@H](CCO)C(F)(F)F',
 'CC[C@H](C(=O)OC)N1C(=CN=N1)CO',
 'C[C@@H](C(=O)OC)N1C(=CN=N1)CO',
 'C[C@H](C(=O)OC)N1C(=CN=N1)CO',
 'C1C[C@@H](OC1)CN2C(=CN=N2)CO',
 'C1C[C@H](OC1)CN2C(=CN=N2)CO',
 'C1C[C@@H](CCNC1)N2C=CN=N2',
 'C1C[C@H](CCNC1)N2C=CN=N2',
 'C1=CN(N=N1)[C@H](CN)C(F)(F)F',
 'C1=CN(N=N1)[C@@H](CN)C(F)(F)F',
 'CC(C)N1C2=C(CC[C@H](C2)N)N=N1',
 'CCN1C2=C(CC[C@H](C2)N)N=N1',
 'CCN1C2=C(CC[C@@H](C2)N)N=N1',
 'C[C@H](C1CCOCC1)N=[N+]=[N-]',
 'C[C@@H](C1CCOCC1)N=[N+]=[N-]',
 'CN1C2=C(C[C@H](CC2)N)N=N1',
 'CN1C2=C(C[C@@H](CC2)N)N=N1',
 'CC1=NN=NN1N[C@]2(CC=CC=C2)O',
 'CC1=NN=NN1N[C@@]2(CC=CC=C2)O',
 'C1CCO[C@H](C1)N2N=C(N=N2)CCl',
 'C1CCO[C@@H](C1)N2N=C(N=N2)CCl',
 'C1[C@H](O1)CCN=[N+]=[N-]',
 'C1[C@@H](O1)CCN=[N+]=[N-]',
 'C1CC1C2=NN(N=N2)C[C@H](CCC',
 'C1CC1C2=NN(N=N2)C[C@@H](CCC',
 'C1CO[C@H](CN1)CN2C=CN=N2',
 'C1CO[C@@H](CN1)CN2C=CN=N2',
 'C1CCC2=NN=C(N2CC1)[C@@H](C',
 'C1CCC2=NN=C(N2CC1)[C@H](C',
 'C[C@@H](C1=CN(N=N1)C(C)C)O',
 'C[C@H](C1=CN(N=N1)C(C)C)O',
 'CC(C)N1C(=NN=N1)[C@H]2CCNC2',
 'CC(C)N1C(=NN=N1)[C@@H]2CCNC2',
 'CN1C=C(N=N1)/C=C\\C(=O)O',
 'C[C@H]1CC=C(O1)C2=NNN=N2',
 'C[C@@H]1CC=C(O1)C2=NNN=N2',
 'C1=CN(N=N1)C[C@H](CN)O',
 'C1=CN(N=N1)C[C@@H](CN)O',
 'C1C[C@H](CNC1)N=[N+]=[N-]',
 'C1C[C@@H](CNC1)N=[N+]=[N-]',
 'CO[C@H]1CCCC[C@H]1N=[N+]=[N-]',
 'CCN1C=C(N=N1)[C@H](C)O',
 'CCN1C=C(N=N1)[C@@H](C)O',
 'CN1CCCC[C@H]1CCSC2=NN=NN2CC=C',
 'CN1C2=CC=CC=C2N=C1[C@H](C',
 'CN1C2=CC=CC=C2N=C1[C@@H](C',
 'C1CC2=NN=NN2[C@H]1C(=O)O',
 'C1CC2=NN=NN2[C@@H]1C(=O)O',
 'CC1=CN(N=N1)[C@@H](C)C(=O)OC',
 'CC1=CN(N=N1)[C@H](C)C(=O)OC',
 'CC1=CN=NN1[C@@H](C)C(=O)OC',
 'CC1=CN=NN1[C@H](C)C(=O)OC',
 'CC1=CN(N=N1)[C@H](C)C(=O)O',
 'CN1CC[C@H](C1=O)N=[N+]=[N-]',
 'CN1CC[C@@H](C1=O)N=[N+]=[N-]',
 'C[C@](CN1N=C2C=CC=CC2=N1)(C',
 'C[C@@](CN1N=C2C=CC=CC2=N1)(C',
 'C1CNC[C@H]1CN2C=CN=N2',
 'C1CNC[C@@H]1CN2C=CN=N2',
 'C1C[C@H](N2C(=CN=N2)C1)C(=O)O',
 'C[C@H](C(=O)O)N1N=CC=N1',
 'C[C@@H](C(=O)O)N1N=CC=N1',
 'C1CC2=NNN=C2CO[C@@H]1C(=O)O',
 'C1CC2=NNN=C2CO[C@H]1C(=O)O',
 'CN1C=C(N=N1)[C@H](C',
 'CN1C=C(N=N1)[C@@H](C',
 'C1C=C[C@H](S1)C2=NNN=N2',
 'C1C=C[C@@H](S1)C2=NNN=N2',
 'COCCOC1=NC=C(C=C1)C2=NNN=N2',
 'C1=CC=C2C(=C1)N=CN2CC3=NNN=N3',
 'C1=CC=C(C=C1)COCCC2=NNN=N2',
 'CC1=CC(=NC(=N1)NC2=NNN=N2)C',
 'C1COCCC1CCC2=NNN=N2',
 'C1CCC(CC1)CCC2=NNN=N2',
 'C(C1=NNN=N1)SCC(=O)O',
 'C1C=C(S[C@H]1Br)C2=NNN=N2',
 'C1C=C(S[C@@H]1Br)C2=NNN=N2',
 'C1CSC=C1C2=NNN=N2',
 'C1C=CC(=N1)C2=NNN=N2',
 'C1C[C@@H](NC1)CN=[N+]=[N-]',
 'COCCNCC1=CN(N=N1)C[C@H]2CCOC2',
 'CCOC(=O)[C@H](CN=[N+]=[N-])O',
 'C1C[C@H](SC1)C2=NNN=N2',
 'C1C[C@@H](SC1)C2=NNN=N2',
 'CC(C)(C)CC[C@H](C1=NNN=C1)N',
 'CC(C)(C)CC[C@@H](C1=NNN=C1)N',
 'C[C@H](C1=NNN=C1)O',
 'C[C@@H](C1=NNN=C1)O',
 'C1CS[C@H](CS1)C2=NNN=N2',
 'C1CS[C@@H](CS1)C2=NNN=N2',
 'CCCNCC1=CN(N=N1)C[C@H]2CCOC2',
 'CCCNCC1=CN(N=N1)C[C@@H]2CCOC2',
 'CCNCC1=CN(N=N1)C[C@H]2CCOC2',
 'CCNCC1=CN(N=N1)C[C@@H]2CCOC2',
 'CNCC1=CN(N=N1)C[C@H]2CCOC2',
 'C1COC[C@H]1CN2C=C(N=N2)C=O',
 'C1COC[C@@H]1CN2C=C(N=N2)C=O',
 'CC[C@H](CN1C=C(N=N1)C=O)O',
 'CC[C@@H](CN1C=C(N=N1)C=O)O',
 'C1C[C@@H](OC1)CN2C=C(N=N2)C=O',
 'C1C[C@H](OC1)CN2C=C(N=N2)C=O',
 'C[C@H](CC1=CN(N=N1)C)O',
 'CN1C=C(N=N1)C2=CC(=CC=C2)OC',
 'C[C@@H](CC1=CN(N=N1)C)O',
 'CN1C(=CN=N1)CBr',
 'C[C@@H](C1=CN=NN1C)Br',
 'C1[C@H]([C@H](CN1)O)N2C=CN=N2',
 'CNCC1=CN(N=N1)C[C@@H]2CCOC2',
 'C1=CC=C(C=C1)N2C(=CN=N2)CBr',
 'C[C@H](N1C=CN=N1)O',
 'C[C@@H](N1C=CN=N1)O',
 'C[C@H](CN1C=CN=N1)C2=CC=CC=C2',
 'CN(C)C1=CC=C(C=C1)C2=NNN=C2C',
 'CN1C=C(N=N1)N2CCC[C@H](C2=O)N',
 'CN1C=C(N=N1)N2CCC[C@H](C2)N',
 'CN1C=C(N=N1)N2CCC[C@@H](C2)N',
 'CN1C=C(N=N1)/C=N\\N',
 'CN1C=C(N=N1)/C=N\\O',
 'CC1=C(N=NN1[C@H]2CCNC2)C(=O)O',
 'C1CNC[C@H]1N2C=C(N=N2)C(=O)O',
 'C1CNC[C@@H]1N2C=C(N=N2)C(=O)O',
 'CC1=CN(N=N1)[C@@H](C)C(=O)O',
 'C1COC[C@H]1N2C=C(N=N2)CN',
 'C1COC[C@@H]1N2C=C(N=N2)CN',
 'CC1=C(C=CC(=C1)I)N2C=NN=N2',
 'C1=CC=C(C(=C1)C',
 'CC1=NN=NN1CC2=C(C=CC=C2Cl)Cl',
 'CC1=NN=NN1CC(=O)N2CCOCC2',
 'CC1=NN=NN1CC2=CC=C(C=C2)Cl',
 'CC1=NN=NN1CC2=CC(=CC=C2)Br',
 'CC(C)[C@H](C(=O)O)N1C=NN=N1',
 'C1C[C@@H](OC1)CN2C=C(N=N2)CO',
 'C1C[C@H](OC1)CN2C=C(N=N2)CO',
 'C1COC[C@H]1CN2C=C(N=N2)CO',
 'C1COC[C@@H]1CN2C=C(N=N2)CO',
 'CC[C@H](CN1C=C(N=N1)CN)O',
 'CC[C@@H](CN1C=C(N=N1)CN)O',
 'C[S@](=O)CCN=[N+]=[N-]',
 'C[S@@](=O)CCN=[N+]=[N-]',
 'C1CCO[C@@H](C1)OCCN=[N+]=[N-]',
 'C1C=C[C@H](O1)C2=NNN=N2',
 'C1C=C[C@@H](O1)C2=NNN=N2',
 'CN1N=C(N=N1)C[C@H]2CC(=O)N2',
 'C1CC(=O)C[C@H]1N=[N+]=[N-]',
 'C1CC(=O)C[C@@H]1N=[N+]=[N-]',
 'CC[C@H](CN=[N+]=[N-])O',
 'C1C[C@H](OC1)CN=[N+]=[N-]',
 'C1C[C@@H](OC1)CN=[N+]=[N-]',
 'C1C[C@H](CC(=O)C1)N=[N+]=[N-]',
 'CCC[C@H](NC1=CC=CC=C1C',
 'CCC[C@@H](NC1=CC=CC=C1C',
 'C1=CC(=CC=C1C[C@H](C',
 'C1=CC(=CC=C1C[C@@H](C',
 'CC(C)[C@H](C(=O)O)SCC1=NNN=N1',
 'COC1=CC=C(C=C1)CN2C(=NN=N2)Cl',
 'C1CC2=NN=NN2CC[C@@H]1CO',
 'C1CC2=NN=NN2CC[C@H]1CO',
 'CC1CC(C1)N2C3=C(CCNC3)N=N2',
 'C1CC2=NN=NN2CC[C@H]1N',
 'C1CC2=NN=NN2CC[C@@H]1N',
 'CC1=NC(=CC=C1)CCN=[N+]=[N-]',
 'C1CC2=NN=NN2CC[C@H]1C(=O)O',
 'C1CC2=NN=NN2CC[C@@H]1C(=O)O',
 'C1CC(C1)N2C3=C(CCNC3)N=N2',
 'C1=NN(N=C1Br)CC(=O)O',
 'C1CNC[C@@]1(CN2C=CN=N2)O',
 'C1CNC[C@]1(CN2C=CN=N2)O',
 'C[C@@H](CC1=CN(N=N1)C(C)C)N',
 'CO[C@H]1COC[C@@H]1N=[N+]=[N-]',
 'CO[C@H]1COC[C@H]1N=[N+]=[N-]',
 'CCOC1CC(C1)N=[N+]=[N-]',
 'CC(C)(C)N1C2=C(CCNC2)N=N1',
 'CN1C2=C([C@H](NCC2)COC)N=N1',
 'CN1C2=C([C@@H](NCC2)COC)N=N1',
 'CC(=O)NCC1=C2CNCCCN2N=N1',
 'C1CNCC2=C(N=NN2C1)CN3C=NN=C3',
 'CCOCC1=C2CNCCCN2N=N1',
 'CCN1C2=C(CNC[C@H]2COC)N=N1',
 'CCN1C2=C(CNC[C@@H]2COC)N=N1',
 'CCOC[C@@H]1CNCC2=C1N(N=N2)C',
 'CCOC[C@H]1CNCC2=C1N(N=N2)C',
 'CN1C2=C(CNC[C@@H]2COC)N=N1',
 'CN1C2=C(CNC[C@H]2COC)N=N1',
 'CCN1C2=C(CNC[C@H]2COCC)N=N1',
 'CCN1C2=C(CNC[C@@H]2COCC)N=N1',
 'C1CNCC2=C(N=NN2C1)COCC3CC3',
 'CCN1C2=C([C@@H](NCC2)COC)N=N1',
 'CCN1C2=C([C@H](NCC2)COC)N=N1',
 'C1CNCC2=C(N=NN2C1)CN3C=CN=C3',
 'C1CC2=C(C1)N(N=N2)CCN',
 'C1=CC=C(C(=C1)N)N2C=C(N=N2)CF',
 'C1=C(N=NN1CCCC(=O)O)CF',
 'C1=C(N=NN1CCC(=O)O)CF',
 'COC(=O)CCN1C=C(N=N1)CF',
 'CC(C)(C(=O)OC)N1C=C(N=N1)CO',
 'CC(C)(C)OC(=O)CN1C=C(N=N1)CF',
 'COC(=O)CN1C=C(N=N1)CF',
 'CN1N=C(N=N1)C2=CC=CC=C2C(=O)O',
 ...]
In [6]:
print len(smiles)
55389

Мы заменяем азидную группу на модифицированный ибупрофен, если SMILES не битая. Нам нужны модификации, более водорастворимые, чем ибупрофен (они выводятся). В конце выведем самую водорастворимую и запомним ее номер. Как видно из блока выше, всего SMILES 55389. Надо, конечно, запускать все, но это очень много и долго, поэтому я остановлюсь на первых 1500.

In [7]:
#for smi in smiles[:1500]:
good_new_smiles=[] 
N=0
#all_mol = len(smiles)
all_mol = 1500
min = Lipinksy.rdMolDescriptors.CalcCrippenDescriptors(ibu)[0]
min_index = 0
#Новую молекулу лучше создавать в try из-за возможных битых Smiles
for smi in smiles[:all_mol]:    
    if 'N=[N+]=[N-]' in smi:
        newsmi=smi.replace('N=[N+]=[N-]','N1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O') #replace with template
    else:
        continue

    try:
        newmol=Chem.MolFromSmiles(newsmi)
        if Lipinksy.NumHDonors(newmol) <= 5 and Lipinksy.NumHAcceptors(newmol) <= 10 and Lipinksy.rdMolDescriptors.CalcExactMolWt(newmol) <=500 and Lipinksy.rdMolDescriptors.CalcCrippenDescriptors(newmol)[0] <= Lipinksy.rdMolDescriptors.CalcCrippenDescriptors(ibu)[0]:
            good_new_smiles.append(newmol)
            AllChem.Compute2DCoords(newmol)
            display(newmol)
            
            print N
            N=N+1
            if Lipinksy.rdMolDescriptors.CalcCrippenDescriptors(newmol)[0] < min: 
                min = Lipinksy.rdMolDescriptors.CalcCrippenDescriptors(newmol)[0]
                min_index = N-1

    except:
        pass
print 'good molecules: ', float(N)/float(all_mol)*100,'%'   
print 'molecule with minimal log P: %i with number %i' %(min,min_index)
display(good_new_smiles[min_index])
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
good molecules:  8.13333333333 %
molecule with minimal log P: 0 with number 37

Самая лучшая молекула имеет номер 37. Теперь мы можем визуализировать ее в 3D

In [8]:
m3d=Chem.AddHs(good_new_smiles[37])
Chem.AllChem.EmbedMolecule(m3d)
AllChem.MMFFOptimizeMolecule(m3d,maxIters=500,nonBondedThresh=200 )
Out[8]:
0
In [9]:
import nglview as nv
nv.show_rdkit(m3d)


Рис 1. 3D визуализация самой лучшей находки из первых 1500 SMILES.