Докинг низкомолекулярных лигандов в структуру белка

In [21]:
import numpy as np
import copy

# Отображение структур
import IPython.display
import ipywidgets
from IPython.display import display,display_svg,SVG,Image

# Open Drug Discovery Toolkit
import oddt
import oddt.docking
import oddt.interactions

# Органика
from rdkit.Chem import Draw
from rdkit import Chem
from rdkit.Chem.Draw import IPythonConsole

import pandas as pd
In [2]:
import mdtraj as md
u = md.load('lys_lig.B99990001.pdb')
pdb = u.topology
for i,r in enumerate(pdb.atoms):
    print(i,r, end=("\t" if i % 8 else "\n"))
    #посмотрим на атомы
0 SER1-N
1 SER1-CA	2 SER1-CB	3 SER1-OG	4 SER1-C	5 SER1-O	6 GLY2-N	7 GLY2-CA	8 GLY2-C
9 GLY2-O	10 LYS3-N	11 LYS3-CA	12 LYS3-CB	13 LYS3-CG	14 LYS3-CD	15 LYS3-CE	16 LYS3-NZ
17 LYS3-C	18 LYS3-O	19 TYR4-N	20 TYR4-CA	21 TYR4-CB	22 TYR4-CG	23 TYR4-CD1	24 TYR4-CD2
25 TYR4-CE1	26 TYR4-CE2	27 TYR4-CZ	28 TYR4-OH	29 TYR4-C	30 TYR4-O	31 ILE5-N	32 ILE5-CA
33 ILE5-CB	34 ILE5-CG1	35 ILE5-CG2	36 ILE5-CD1	37 ILE5-C	38 ILE5-O	39 SER6-N	40 SER6-CA
41 SER6-CB	42 SER6-OG	43 SER6-C	44 SER6-O	45 TRP7-N	46 TRP7-CA	47 TRP7-CB	48 TRP7-CG
49 TRP7-CD2	50 TRP7-CD1	51 TRP7-NE1	52 TRP7-CE2	53 TRP7-CE3	54 TRP7-CZ2	55 TRP7-CZ3	56 TRP7-CH2
57 TRP7-C	58 TRP7-O	59 GLU8-N	60 GLU8-CA	61 GLU8-CB	62 GLU8-CG	63 GLU8-CD	64 GLU8-OE1
65 GLU8-OE2	66 GLU8-C	67 GLU8-O	68 ASP9-N	69 ASP9-CA	70 ASP9-CB	71 ASP9-CG	72 ASP9-OD1
73 ASP9-OD2	74 ASP9-C	75 ASP9-O	76 SER10-N	77 SER10-CA	78 SER10-CB	79 SER10-OG	80 SER10-C
81 SER10-O	82 CYS11-N	83 CYS11-CA	84 CYS11-CB	85 CYS11-SG	86 CYS11-C	87 CYS11-O	88 SER12-N
89 SER12-CA	90 SER12-CB	91 SER12-OG	92 SER12-C	93 SER12-O	94 TYR13-N	95 TYR13-CA	96 TYR13-CB
97 TYR13-CG	98 TYR13-CD1	99 TYR13-CD2	100 TYR13-CE1	101 TYR13-CE2	102 TYR13-CZ	103 TYR13-OH	104 TYR13-C
105 TYR13-O	106 LEU14-N	107 LEU14-CA	108 LEU14-CB	109 LEU14-CG	110 LEU14-CD1	111 LEU14-CD2	112 LEU14-C
113 LEU14-O	114 GLN15-N	115 GLN15-CA	116 GLN15-CB	117 GLN15-CG	118 GLN15-CD	119 GLN15-OE1	120 GLN15-NE2
121 GLN15-C	122 GLN15-O	123 LEU16-N	124 LEU16-CA	125 LEU16-CB	126 LEU16-CG	127 LEU16-CD1	128 LEU16-CD2
129 LEU16-C	130 LEU16-O	131 GLN17-N	132 GLN17-CA	133 GLN17-CB	134 GLN17-CG	135 GLN17-CD	136 GLN17-OE1
137 GLN17-NE2	138 GLN17-C	139 GLN17-O	140 LYS18-N	141 LYS18-CA	142 LYS18-CB	143 LYS18-CG	144 LYS18-CD
145 LYS18-CE	146 LYS18-NZ	147 LYS18-C	148 LYS18-O	149 TYR19-N	150 TYR19-CA	151 TYR19-CB	152 TYR19-CG
153 TYR19-CD1	154 TYR19-CD2	155 TYR19-CE1	156 TYR19-CE2	157 TYR19-CZ	158 TYR19-OH	159 TYR19-C	160 TYR19-O
161 GLU20-N	162 GLU20-CA	163 GLU20-CB	164 GLU20-CG	165 GLU20-CD	166 GLU20-OE1	167 GLU20-OE2	168 GLU20-C
169 GLU20-O	170 ARG21-N	171 ARG21-CA	172 ARG21-CB	173 ARG21-CG	174 ARG21-CD	175 ARG21-NE	176 ARG21-CZ
177 ARG21-NH1	178 ARG21-NH2	179 ARG21-C	180 ARG21-O	181 CYS22-N	182 CYS22-CA	183 CYS22-CB	184 CYS22-SG
185 CYS22-C	186 CYS22-O	187 GLU23-N	188 GLU23-CA	189 GLU23-CB	190 GLU23-CG	191 GLU23-CD	192 GLU23-OE1
193 GLU23-OE2	194 GLU23-C	195 GLU23-O	196 LEU24-N	197 LEU24-CA	198 LEU24-CB	199 LEU24-CG	200 LEU24-CD1
201 LEU24-CD2	202 LEU24-C	203 LEU24-O	204 ALA25-N	205 ALA25-CA	206 ALA25-CB	207 ALA25-C	208 ALA25-O
209 LYS26-N	210 LYS26-CA	211 LYS26-CB	212 LYS26-CG	213 LYS26-CD	214 LYS26-CE	215 LYS26-NZ	216 LYS26-C
217 LYS26-O	218 ALA27-N	219 ALA27-CA	220 ALA27-CB	221 ALA27-C	222 ALA27-O	223 LEU28-N	224 LEU28-CA
225 LEU28-CB	226 LEU28-CG	227 LEU28-CD1	228 LEU28-CD2	229 LEU28-C	230 LEU28-O	231 LYS29-N	232 LYS29-CA
233 LYS29-CB	234 LYS29-CG	235 LYS29-CD	236 LYS29-CE	237 LYS29-NZ	238 LYS29-C	239 LYS29-O	240 LYS30-N
241 LYS30-CA	242 LYS30-CB	243 LYS30-CG	244 LYS30-CD	245 LYS30-CE	246 LYS30-NZ	247 LYS30-C	248 LYS30-O
249 GLY31-N	250 GLY31-CA	251 GLY31-C	252 GLY31-O	253 GLY32-N	254 GLY32-CA	255 GLY32-C	256 GLY32-O
257 LEU33-N	258 LEU33-CA	259 LEU33-CB	260 LEU33-CG	261 LEU33-CD1	262 LEU33-CD2	263 LEU33-C	264 LEU33-O
265 ALA34-N	266 ALA34-CA	267 ALA34-CB	268 ALA34-C	269 ALA34-O	270 ASP35-N	271 ASP35-CA	272 ASP35-CB
273 ASP35-CG	274 ASP35-OD1	275 ASP35-OD2	276 ASP35-C	277 ASP35-O	278 PHE36-N	279 PHE36-CA	280 PHE36-CB
281 PHE36-CG	282 PHE36-CD1	283 PHE36-CD2	284 PHE36-CE1	285 PHE36-CE2	286 PHE36-CZ	287 PHE36-C	288 PHE36-O
289 LYS37-N	290 LYS37-CA	291 LYS37-CB	292 LYS37-CG	293 LYS37-CD	294 LYS37-CE	295 LYS37-NZ	296 LYS37-C
297 LYS37-O	298 GLY38-N	299 GLY38-CA	300 GLY38-C	301 GLY38-O	302 TYR39-N	303 TYR39-CA	304 TYR39-CB
305 TYR39-CG	306 TYR39-CD1	307 TYR39-CD2	308 TYR39-CE1	309 TYR39-CE2	310 TYR39-CZ	311 TYR39-OH	312 TYR39-C
313 TYR39-O	314 SER40-N	315 SER40-CA	316 SER40-CB	317 SER40-OG	318 SER40-C	319 SER40-O	320 LEU41-N
321 LEU41-CA	322 LEU41-CB	323 LEU41-CG	324 LEU41-CD1	325 LEU41-CD2	326 LEU41-C	327 LEU41-O	328 GLU42-N
329 GLU42-CA	330 GLU42-CB	331 GLU42-CG	332 GLU42-CD	333 GLU42-OE1	334 GLU42-OE2	335 GLU42-C	336 GLU42-O
337 ASN43-N	338 ASN43-CA	339 ASN43-CB	340 ASN43-CG	341 ASN43-OD1	342 ASN43-ND2	343 ASN43-C	344 ASN43-O
345 TRP44-N	346 TRP44-CA	347 TRP44-CB	348 TRP44-CG	349 TRP44-CD2	350 TRP44-CD1	351 TRP44-NE1	352 TRP44-CE2
353 TRP44-CE3	354 TRP44-CZ2	355 TRP44-CZ3	356 TRP44-CH2	357 TRP44-C	358 TRP44-O	359 ILE45-N	360 ILE45-CA
361 ILE45-CB	362 ILE45-CG1	363 ILE45-CG2	364 ILE45-CD1	365 ILE45-C	366 ILE45-O	367 CYS46-N	368 CYS46-CA
369 CYS46-CB	370 CYS46-SG	371 CYS46-C	372 CYS46-O	373 THR47-N	374 THR47-CA	375 THR47-CB	376 THR47-OG1
377 THR47-CG2	378 THR47-C	379 THR47-O	380 ALA48-N	381 ALA48-CA	382 ALA48-CB	383 ALA48-C	384 ALA48-O
385 PHE49-N	386 PHE49-CA	387 PHE49-CB	388 PHE49-CG	389 PHE49-CD1	390 PHE49-CD2	391 PHE49-CE1	392 PHE49-CE2
393 PHE49-CZ	394 PHE49-C	395 PHE49-O	396 HIS50-N	397 HIS50-CA	398 HIS50-ND1	399 HIS50-CG	400 HIS50-CB
401 HIS50-NE2	402 HIS50-CD2	403 HIS50-CE1	404 HIS50-C	405 HIS50-O	406 GLU51-N	407 GLU51-CA	408 GLU51-CB
409 GLU51-CG	410 GLU51-CD	411 GLU51-OE1	412 GLU51-OE2	413 GLU51-C	414 GLU51-O	415 SER52-N	416 SER52-CA
417 SER52-CB	418 SER52-OG	419 SER52-C	420 SER52-O	421 GLY53-N	422 GLY53-CA	423 GLY53-C	424 GLY53-O
425 TYR54-N	426 TYR54-CA	427 TYR54-CB	428 TYR54-CG	429 TYR54-CD1	430 TYR54-CD2	431 TYR54-CE1	432 TYR54-CE2
433 TYR54-CZ	434 TYR54-OH	435 TYR54-C	436 TYR54-O	437 ASN55-N	438 ASN55-CA	439 ASN55-CB	440 ASN55-CG
441 ASN55-OD1	442 ASN55-ND2	443 ASN55-C	444 ASN55-O	445 THR56-N	446 THR56-CA	447 THR56-CB	448 THR56-OG1
449 THR56-CG2	450 THR56-C	451 THR56-O	452 ALA57-N	453 ALA57-CA	454 ALA57-CB	455 ALA57-C	456 ALA57-O
457 SER58-N	458 SER58-CA	459 SER58-CB	460 SER58-OG	461 SER58-C	462 SER58-O	463 THR59-N	464 THR59-CA
465 THR59-CB	466 THR59-OG1	467 THR59-CG2	468 THR59-C	469 THR59-O	470 ASN60-N	471 ASN60-CA	472 ASN60-CB
473 ASN60-CG	474 ASN60-OD1	475 ASN60-ND2	476 ASN60-C	477 ASN60-O	478 TYR61-N	479 TYR61-CA	480 TYR61-CB
481 TYR61-CG	482 TYR61-CD1	483 TYR61-CD2	484 TYR61-CE1	485 TYR61-CE2	486 TYR61-CZ	487 TYR61-OH	488 TYR61-C
489 TYR61-O	490 ASN62-N	491 ASN62-CA	492 ASN62-CB	493 ASN62-CG	494 ASN62-OD1	495 ASN62-ND2	496 ASN62-C
497 ASN62-O	498 PRO63-N	499 PRO63-CA	500 PRO63-CD	501 PRO63-CB	502 PRO63-CG	503 PRO63-C	504 PRO63-O
505 PRO64-N	506 PRO64-CA	507 PRO64-CD	508 PRO64-CB	509 PRO64-CG	510 PRO64-C	511 PRO64-O	512 ASP65-N
513 ASP65-CA	514 ASP65-CB	515 ASP65-CG	516 ASP65-OD1	517 ASP65-OD2	518 ASP65-C	519 ASP65-O	520 LYS66-N
521 LYS66-CA	522 LYS66-CB	523 LYS66-CG	524 LYS66-CD	525 LYS66-CE	526 LYS66-NZ	527 LYS66-C	528 LYS66-O
529 SER67-N	530 SER67-CA	531 SER67-CB	532 SER67-OG	533 SER67-C	534 SER67-O	535 THR68-N	536 THR68-CA
537 THR68-CB	538 THR68-OG1	539 THR68-CG2	540 THR68-C	541 THR68-O	542 ASP69-N	543 ASP69-CA	544 ASP69-CB
545 ASP69-CG	546 ASP69-OD1	547 ASP69-OD2	548 ASP69-C	549 ASP69-O	550 TYR70-N	551 TYR70-CA	552 TYR70-CB
553 TYR70-CG	554 TYR70-CD1	555 TYR70-CD2	556 TYR70-CE1	557 TYR70-CE2	558 TYR70-CZ	559 TYR70-OH	560 TYR70-C
561 TYR70-O	562 GLY71-N	563 GLY71-CA	564 GLY71-C	565 GLY71-O	566 ILE72-N	567 ILE72-CA	568 ILE72-CB
569 ILE72-CG1	570 ILE72-CG2	571 ILE72-CD1	572 ILE72-C	573 ILE72-O	574 PHE73-N	575 PHE73-CA	576 PHE73-CB
577 PHE73-CG	578 PHE73-CD1	579 PHE73-CD2	580 PHE73-CE1	581 PHE73-CE2	582 PHE73-CZ	583 PHE73-C	584 PHE73-O
585 GLN74-N	586 GLN74-CA	587 GLN74-CB	588 GLN74-CG	589 GLN74-CD	590 GLN74-OE1	591 GLN74-NE2	592 GLN74-C
593 GLN74-O	594 ILE75-N	595 ILE75-CA	596 ILE75-CB	597 ILE75-CG1	598 ILE75-CG2	599 ILE75-CD1	600 ILE75-C
601 ILE75-O	602 ASN76-N	603 ASN76-CA	604 ASN76-CB	605 ASN76-CG	606 ASN76-OD1	607 ASN76-ND2	608 ASN76-C
609 ASN76-O	610 SER77-N	611 SER77-CA	612 SER77-CB	613 SER77-OG	614 SER77-C	615 SER77-O	616 ARG78-N
617 ARG78-CA	618 ARG78-CB	619 ARG78-CG	620 ARG78-CD	621 ARG78-NE	622 ARG78-CZ	623 ARG78-NH1	624 ARG78-NH2
625 ARG78-C	626 ARG78-O	627 TRP79-N	628 TRP79-CA	629 TRP79-CB	630 TRP79-CG	631 TRP79-CD2	632 TRP79-CD1
633 TRP79-NE1	634 TRP79-CE2	635 TRP79-CE3	636 TRP79-CZ2	637 TRP79-CZ3	638 TRP79-CH2	639 TRP79-C	640 TRP79-O
641 TRP80-N	642 TRP80-CA	643 TRP80-CB	644 TRP80-CG	645 TRP80-CD2	646 TRP80-CD1	647 TRP80-NE1	648 TRP80-CE2
649 TRP80-CE3	650 TRP80-CZ2	651 TRP80-CZ3	652 TRP80-CH2	653 TRP80-C	654 TRP80-O	655 CYS81-N	656 CYS81-CA
657 CYS81-CB	658 CYS81-SG	659 CYS81-C	660 CYS81-O	661 ASN82-N	662 ASN82-CA	663 ASN82-CB	664 ASN82-CG
665 ASN82-OD1	666 ASN82-ND2	667 ASN82-C	668 ASN82-O	669 ASP83-N	670 ASP83-CA	671 ASP83-CB	672 ASP83-CG
673 ASP83-OD1	674 ASP83-OD2	675 ASP83-C	676 ASP83-O	677 TYR84-N	678 TYR84-CA	679 TYR84-CB	680 TYR84-CG
681 TYR84-CD1	682 TYR84-CD2	683 TYR84-CE1	684 TYR84-CE2	685 TYR84-CZ	686 TYR84-OH	687 TYR84-C	688 TYR84-O
689 LYS85-N	690 LYS85-CA	691 LYS85-CB	692 LYS85-CG	693 LYS85-CD	694 LYS85-CE	695 LYS85-NZ	696 LYS85-C
697 LYS85-O	698 THR86-N	699 THR86-CA	700 THR86-CB	701 THR86-OG1	702 THR86-CG2	703 THR86-C	704 THR86-O
705 PRO87-N	706 PRO87-CA	707 PRO87-CD	708 PRO87-CB	709 PRO87-CG	710 PRO87-C	711 PRO87-O	712 ARG88-N
713 ARG88-CA	714 ARG88-CB	715 ARG88-CG	716 ARG88-CD	717 ARG88-NE	718 ARG88-CZ	719 ARG88-NH1	720 ARG88-NH2
721 ARG88-C	722 ARG88-O	723 SER89-N	724 SER89-CA	725 SER89-CB	726 SER89-OG	727 SER89-C	728 SER89-O
729 LYS90-N	730 LYS90-CA	731 LYS90-CB	732 LYS90-CG	733 LYS90-CD	734 LYS90-CE	735 LYS90-NZ	736 LYS90-C
737 LYS90-O	738 ASN91-N	739 ASN91-CA	740 ASN91-CB	741 ASN91-CG	742 ASN91-OD1	743 ASN91-ND2	744 ASN91-C
745 ASN91-O	746 THR92-N	747 THR92-CA	748 THR92-CB	749 THR92-OG1	750 THR92-CG2	751 THR92-C	752 THR92-O
753 CYS93-N	754 CYS93-CA	755 CYS93-CB	756 CYS93-SG	757 CYS93-C	758 CYS93-O	759 ASN94-N	760 ASN94-CA
761 ASN94-CB	762 ASN94-CG	763 ASN94-OD1	764 ASN94-ND2	765 ASN94-C	766 ASN94-O	767 ILE95-N	768 ILE95-CA
769 ILE95-CB	770 ILE95-CG1	771 ILE95-CG2	772 ILE95-CD1	773 ILE95-C	774 ILE95-O	775 ASP96-N	776 ASP96-CA
777 ASP96-CB	778 ASP96-CG	779 ASP96-OD1	780 ASP96-OD2	781 ASP96-C	782 ASP96-O	783 CYS97-N	784 CYS97-CA
785 CYS97-CB	786 CYS97-SG	787 CYS97-C	788 CYS97-O	789 LYS98-N	790 LYS98-CA	791 LYS98-CB	792 LYS98-CG
793 LYS98-CD	794 LYS98-CE	795 LYS98-NZ	796 LYS98-C	797 LYS98-O	798 VAL99-N	799 VAL99-CA	800 VAL99-CB
801 VAL99-CG1	802 VAL99-CG2	803 VAL99-C	804 VAL99-O	805 LEU100-N	806 LEU100-CA	807 LEU100-CB	808 LEU100-CG
809 LEU100-CD1	810 LEU100-CD2	811 LEU100-C	812 LEU100-O	813 LEU101-N	814 LEU101-CA	815 LEU101-CB	816 LEU101-CG
817 LEU101-CD1	818 LEU101-CD2	819 LEU101-C	820 LEU101-O	821 GLY102-N	822 GLY102-CA	823 GLY102-C	824 GLY102-O
825 ASP103-N	826 ASP103-CA	827 ASP103-CB	828 ASP103-CG	829 ASP103-OD1	830 ASP103-OD2	831 ASP103-C	832 ASP103-O
833 ASP104-N	834 ASP104-CA	835 ASP104-CB	836 ASP104-CG	837 ASP104-OD1	838 ASP104-OD2	839 ASP104-C	840 ASP104-O
841 ILE105-N	842 ILE105-CA	843 ILE105-CB	844 ILE105-CG1	845 ILE105-CG2	846 ILE105-CD1	847 ILE105-C	848 ILE105-O
849 SER106-N	850 SER106-CA	851 SER106-CB	852 SER106-OG	853 SER106-C	854 SER106-O	855 PRO107-N	856 PRO107-CA
857 PRO107-CD	858 PRO107-CB	859 PRO107-CG	860 PRO107-C	861 PRO107-O	862 ALA108-N	863 ALA108-CA	864 ALA108-CB
865 ALA108-C	866 ALA108-O	867 ILE109-N	868 ILE109-CA	869 ILE109-CB	870 ILE109-CG1	871 ILE109-CG2	872 ILE109-CD1
873 ILE109-C	874 ILE109-O	875 LYS110-N	876 LYS110-CA	877 LYS110-CB	878 LYS110-CG	879 LYS110-CD	880 LYS110-CE
881 LYS110-NZ	882 LYS110-C	883 LYS110-O	884 CYS111-N	885 CYS111-CA	886 CYS111-CB	887 CYS111-SG	888 CYS111-C
889 CYS111-O	890 ALA112-N	891 ALA112-CA	892 ALA112-CB	893 ALA112-C	894 ALA112-O	895 LYS113-N	896 LYS113-CA
897 LYS113-CB	898 LYS113-CG	899 LYS113-CD	900 LYS113-CE	901 LYS113-NZ	902 LYS113-C	903 LYS113-O	904 ARG114-N
905 ARG114-CA	906 ARG114-CB	907 ARG114-CG	908 ARG114-CD	909 ARG114-NE	910 ARG114-CZ	911 ARG114-NH1	912 ARG114-NH2
913 ARG114-C	914 ARG114-O	915 VAL115-N	916 VAL115-CA	917 VAL115-CB	918 VAL115-CG1	919 VAL115-CG2	920 VAL115-C
921 VAL115-O	922 VAL116-N	923 VAL116-CA	924 VAL116-CB	925 VAL116-CG1	926 VAL116-CG2	927 VAL116-C	928 VAL116-O
929 SER117-N	930 SER117-CA	931 SER117-CB	932 SER117-OG	933 SER117-C	934 SER117-O	935 ASP118-N	936 ASP118-CA
937 ASP118-CB	938 ASP118-CG	939 ASP118-OD1	940 ASP118-OD2	941 ASP118-C	942 ASP118-O	943 PRO119-N	944 PRO119-CA
945 PRO119-CD	946 PRO119-CB	947 PRO119-CG	948 PRO119-C	949 PRO119-O	950 ASN120-N	951 ASN120-CA	952 ASN120-CB
953 ASN120-CG	954 ASN120-OD1	955 ASN120-ND2	956 ASN120-C	957 ASN120-O	958 GLY121-N	959 GLY121-CA	960 GLY121-C
961 GLY121-O	962 MET122-N	963 MET122-CA	964 MET122-CB	965 MET122-CG	966 MET122-SD	967 MET122-CE	968 MET122-C
969 MET122-O	970 GLY123-N	971 GLY123-CA	972 GLY123-C	973 GLY123-O	974 ALA124-N	975 ALA124-CA	976 ALA124-CB
977 ALA124-C	978 ALA124-O	979 TRP125-N	980 TRP125-CA	981 TRP125-CB	982 TRP125-CG	983 TRP125-CD2	984 TRP125-CD1
985 TRP125-NE1	986 TRP125-CE2	987 TRP125-CE3	988 TRP125-CZ2	989 TRP125-CZ3	990 TRP125-CH2	991 TRP125-C	992 TRP125-O
993 VAL126-N	994 VAL126-CA	995 VAL126-CB	996 VAL126-CG1	997 VAL126-CG2	998 VAL126-C	999 VAL126-O	1000 ALA127-N
1001 ALA127-CA	1002 ALA127-CB	1003 ALA127-C	1004 ALA127-O	1005 TRP128-N	1006 TRP128-CA	1007 TRP128-CB	1008 TRP128-CG
1009 TRP128-CD2	1010 TRP128-CD1	1011 TRP128-NE1	1012 TRP128-CE2	1013 TRP128-CE3	1014 TRP128-CZ2	1015 TRP128-CZ3	1016 TRP128-CH2
1017 TRP128-C	1018 TRP128-O	1019 LYS129-N	1020 LYS129-CA	1021 LYS129-CB	1022 LYS129-CG	1023 LYS129-CD	1024 LYS129-CE
1025 LYS129-NZ	1026 LYS129-C	1027 LYS129-O	1028 LYS130-N	1029 LYS130-CA	1030 LYS130-CB	1031 LYS130-CG	1032 LYS130-CD
1033 LYS130-CE	1034 LYS130-NZ	1035 LYS130-C	1036 LYS130-O	1037 TYR131-N	1038 TYR131-CA	1039 TYR131-CB	1040 TYR131-CG
1041 TYR131-CD1	1042 TYR131-CD2	1043 TYR131-CE1	1044 TYR131-CE2	1045 TYR131-CZ	1046 TYR131-OH	1047 TYR131-C	1048 TYR131-O
1049 CYS132-N	1050 CYS132-CA	1051 CYS132-CB	1052 CYS132-SG	1053 CYS132-C	1054 CYS132-O	1055 LYS133-N	1056 LYS133-CA
1057 LYS133-CB	1058 LYS133-CG	1059 LYS133-CD	1060 LYS133-CE	1061 LYS133-NZ	1062 LYS133-C	1063 LYS133-O	1064 GLY134-N
1065 GLY134-CA	1066 GLY134-C	1067 GLY134-O	1068 LYS135-N	1069 LYS135-CA	1070 LYS135-CB	1071 LYS135-CG	1072 LYS135-CD
1073 LYS135-CE	1074 LYS135-NZ	1075 LYS135-C	1076 LYS135-O	1077 ASN136-N	1078 ASN136-CA	1079 ASN136-CB	1080 ASN136-CG
1081 ASN136-OD1	1082 ASN136-ND2	1083 ASN136-C	1084 ASN136-O	1085 LEU137-N	1086 LEU137-CA	1087 LEU137-CB	1088 LEU137-CG
1089 LEU137-CD1	1090 LEU137-CD2	1091 LEU137-C	1092 LEU137-O	1093 SER138-N	1094 SER138-CA	1095 SER138-CB	1096 SER138-OG
1097 SER138-C	1098 SER138-O	1099 GLN139-N	1100 GLN139-CA	1101 GLN139-CB	1102 GLN139-CG	1103 GLN139-CD	1104 GLN139-OE1
1105 GLN139-NE2	1106 GLN139-C	1107 GLN139-O	1108 TRP140-N	1109 TRP140-CA	1110 TRP140-CB	1111 TRP140-CG	1112 TRP140-CD2
1113 TRP140-CD1	1114 TRP140-NE1	1115 TRP140-CE2	1116 TRP140-CE3	1117 TRP140-CZ2	1118 TRP140-CZ3	1119 TRP140-CH2	1120 TRP140-C
1121 TRP140-O	1122 THR141-N	1123 THR141-CA	1124 THR141-CB	1125 THR141-OG1	1126 THR141-CG2	1127 THR141-C	1128 THR141-O
1129 GLN142-N	1130 GLN142-CA	1131 GLN142-CB	1132 GLN142-CG	1133 GLN142-CD	1134 GLN142-OE1	1135 GLN142-NE2	1136 GLN142-C
1137 GLN142-O	1138 GLY143-N	1139 GLY143-CA	1140 GLY143-C	1141 GLY143-O	1142 CYS144-N	1143 CYS144-CA	1144 CYS144-CB
1145 CYS144-SG	1146 CYS144-C	1147 CYS144-O	1148 LYS145-N	1149 LYS145-CA	1150 LYS145-CB	1151 LYS145-CG	1152 LYS145-CD
1153 LYS145-CE	1154 LYS145-NZ	1155 LYS145-C	1156 LYS145-O	1157 LEU146-N	1158 LEU146-CA	1159 LEU146-CB	1160 LEU146-CG
1161 LEU146-CD1	1162 LEU146-CD2	1163 LEU146-C	1164 LEU146-O	1165 LEU146-OXT	1166 NDG147-C1	1167 NDG147-C2	1168 NDG147-C3
1169 NDG147-C4	1170 NDG147-C5	1171 NDG147-C6	1172 NDG147-C7	1173 NDG147-C8	1174 NDG147-O5	1175 NDG147-O3	1176 NDG147-O4
1177 NDG147-O6	1178 NDG147-O7	1179 NDG147-N2	1180 NDG147-O1	1181 NAG148-C1	1182 NAG148-C2	1183 NAG148-C3	1184 NAG148-C4
1185 NAG148-C5	1186 NAG148-C6	1187 NAG148-C7	1188 NAG148-C8	1189 NAG148-N2	1190 NAG148-O3	1191 NAG148-O4	1192 NAG148-O5
1193 NAG148-O6	1194 NAG148-O7	1195 NAG149-C1	1196 NAG149-C2	1197 NAG149-C3	1198 NAG149-C4	1199 NAG149-C5	1200 NAG149-C6
1201 NAG149-C7	1202 NAG149-C8	1203 NAG149-N2	1204 NAG149-O3	1205 NAG149-O4	1206 NAG149-O5	1207 NAG149-O6	1208 NAG149-O7
In [3]:
# сохраним pdb без лиганда
u2 = u.atom_slice(range(1166))
u2.save_pdb('lys_lig-clean.pdb', force_overwrite=True)
In [4]:
#определим геометрический центр лиганда
lig = u.atom_slice(range(1166, 1209))
center = np.mean(lig.xyz, axis=1) * 10  # В ангстремы
center = center.reshape(3,)
center
Out[4]:
array([48.94363 , 44.634903, 29.850237], dtype=float32)
In [5]:
prot = next(oddt.toolkits.rdk.readfile('pdb', 'lys_lig-clean.pdb'))
print('Is the first mol in model is protein?',prot.protein,':) and MW of this mol is:', prot.molwt )
Is the first mol in model is protein? False :) and MW of this mol is: 16605.929999999968

В смысле, не белок?

In [6]:
smiles = ["CC(=O)NC1C(O)OC(CO)C(O)C1O",  # NAG
          "OC(=O)NC1C(O)OC(CO)C(O)C1O",  # -OH
          "[NH3+]C(=O)NC1C(O)OC(CO)C(O)C1O",  # -NH3+
          "C(=O)NC1C(O)OC(CO)C(O)C1O",  # -H
          "C=1(C=CC=CC1)C(=O)NC1C(O)OC(CO)C(O)C1O",  # -Ph
          "C(C([O-])=O)(=O)NC1C(O)OC(CO)C(O)C1O"  # -COO-
         ]
mols= []
images =[]

for s in smiles:
    m = oddt.toolkits.rdk.readstring('smi', s)
    m.make3D(forcefield='mmff94', steps=150)

    mols.append(m)
    print(Chem.rdMolDescriptors.CalcMolFormula(m.Mol))
    images.append((SVG(copy.deepcopy(m).write('svg'))))
    
display(*images)
C8H15NO6
C7H13NO7
C7H15N2O6+
C7H13NO6
C13H17NO6
C8H12NO8-
O N H HO O OH OH OH
HO O N H HO O OH OH OH
H3N+ O N H HO O OH OH OH
O N H HO O OH OH OH
O N H OH O HO OH OH
O- O O N H HO O OH OH OH

Докинг

In [7]:
center
Out[7]:
array([48.94363 , 44.634903, 29.850237], dtype=float32)
In [14]:
# create docking object
# центром докинга дадим геометрический центр лиганда,
# найденный нами ранее - приблизительное положение сайта связывания
dock_obj= oddt.docking.AutodockVina.autodock_vina(
    protein=prot,size=(20,20,20),center=center,
    executable='C:\\Program Files (x86)\\The Scripps Research Institute\\Vina\\vina.exe',autocleanup=True, num_modes=9)

print(dock_obj.tmp_dir)
print(" ".join(dock_obj.params))
#C:\\Program Files (x86)\\The Scripps Research Institute\\Vina\\vina.exe
/tmp\autodock_vina_xgcdgr2g
--center_x 48.94363 --center_y 44.634903 --center_z 29.850237 --size_x 20 --size_y 20 --size_z 20 --exhaustiveness 8 --num_modes 9 --energy_range 3

center_x/y/z - геометрический центр лиганда
size_x/y/z - размер области, в которой будет производиться поиск
exhaustiveness = параметр, который говорит программе, насколько тщательно производить поиск
num_modes - количество конформаций лиганда в активном центре, которые будут найдены программой
energy_range - максимально допустимый разброс энергии для разных моделей одного и того же лиганда

In [15]:
res = dock_obj.dock(mols,prot)
In [16]:
# отсортируем результаты по аффинности (параметр vina_affinity):
res_sorted = sorted(res, key = lambda x : float(x.data['vina_affinity']))
In [19]:
#что вышло
for i,r in enumerate(res):
    formula = Chem.rdMolDescriptors.CalcMolFormula(r.Mol)
    print("%4d%10s%8s%8s%8s" %(i, formula, r.data['vina_affinity'],  r.data['vina_rmsd_ub'], r.residues[0].name))
   0  C8H15NO6    -5.1   0.000     UNL
   1  C8H15NO6    -5.1   3.177     UNL
   2  C8H15NO6    -5.0   2.105     UNL
   3  C8H15NO6    -4.9   4.417     UNL
   4  C8H15NO6    -4.8   4.224     UNL
   5  C8H15NO6    -4.8   2.412     UNL
   6  C8H15NO6    -4.8   3.907     UNL
   7  C8H15NO6    -4.7   2.422     UNL
   8  C8H15NO6    -4.7   3.629     UNL
   9  C7H13NO7    -5.3   0.000     UNL
  10  C7H13NO7    -5.0   2.803     UNL
  11  C7H13NO7    -4.9   5.133     UNL
  12  C7H13NO7    -4.9   4.865     UNL
  13  C7H13NO7    -4.8   4.128     UNL
  14  C7H13NO7    -4.8   3.321     UNL
  15  C7H13NO7    -4.8   4.656     UNL
  16  C7H13NO7    -4.7   5.546     UNL
  17  C7H13NO7    -4.7   5.447     UNL
  18C7H15N2O6+    -5.0   0.000     UNL
  19C7H15N2O6+    -5.0   6.216     UNL
  20C7H15N2O6+    -5.0   2.529     UNL
  21C7H15N2O6+    -4.8   5.403     UNL
  22C7H15N2O6+    -4.8   6.082     UNL
  23C7H15N2O6+    -4.8   5.235     UNL
  24C7H15N2O6+    -4.6   5.904     UNL
  25C7H15N2O6+    -4.6   4.217     UNL
  26C7H15N2O6+    -4.5   3.947     UNL
  27  C7H13NO6    -4.9   0.000     UNL
  28  C7H13NO6    -4.6   4.198     UNL
  29  C7H13NO6    -4.5   4.506     UNL
  30  C7H13NO6    -4.5   5.955     UNL
  31  C7H13NO6    -4.5   5.312     UNL
  32  C7H13NO6    -4.3   3.434     UNL
  33  C7H13NO6    -4.3   4.208     UNL
  34  C7H13NO6    -4.3   7.549     UNL
  35  C7H13NO6    -4.2   6.400     UNL
  36 C13H17NO6    -7.1   0.000     UNL
  37 C13H17NO6    -6.9   2.519     UNL
  38 C13H17NO6    -6.8   6.963     UNL
  39 C13H17NO6    -6.7   2.998     UNL
  40 C13H17NO6    -6.3   1.753     UNL
  41 C13H17NO6    -6.0   8.361     UNL
  42 C13H17NO6    -6.0   7.231     UNL
  43 C13H17NO6    -5.8   8.147     UNL
  44 C13H17NO6    -5.8   9.545     UNL
  45 C8H12NO8-    -5.9   0.000     UNL
  46 C8H12NO8-    -5.8   6.041     UNL
  47 C8H12NO8-    -5.5   4.530     UNL
  48 C8H12NO8-    -5.5   2.720     UNL
  49 C8H12NO8-    -5.3   6.015     UNL
  50 C8H12NO8-    -5.3   3.428     UNL
  51 C8H12NO8-    -5.3   6.116     UNL
  52 C8H12NO8-    -5.2   6.945     UNL
  53 C8H12NO8-    -5.1   6.329     UNL

Анализ докинга

In [27]:
hbtotal = []
hbstrict = []
phob = []
formulas = []
for i,r in enumerate(res_sorted):
    formula = Chem.rdMolDescriptors.CalcMolFormula(r.Mol)
    formulas.append(formula)
    int1, int2, strict = oddt.interactions.hbonds(prot,r)
    hbtotal.append(len(int1))
    hbstrict.append(strict.sum())
    ph1, ph2 = oddt.interactions.hydrophobic_contacts(prot,r)
    phob.append(len(ph1))
sort_table = pd.DataFrame({'Formula': formulas,
                   'Affinity, kcal/mol':[r.data['vina_affinity'] for r in res_sorted],
                   'Total number of h-bonds':hbtotal, 'Strict number of h-bonds':hbstrict, 'Hydrophobic bonds':phob, 
                    'RMSD':[r.data['vina_rmsd_ub'] for r in res_sorted]})
In [28]:
sort_table[:]
Out[28]:
Formula Affinity, kcal/mol Total number of h-bonds Strict number of h-bonds Hydrophobic bonds RMSD
0 C13H17NO6 -7.1 2 2 2 0.000
1 C13H17NO6 -6.9 2 2 1 2.519
2 C13H17NO6 -6.8 5 5 14 6.963
3 C13H17NO6 -6.7 2 2 4 2.998
4 C13H17NO6 -6.3 3 2 3 1.753
5 C13H17NO6 -6.0 5 5 3 8.361
6 C13H17NO6 -6.0 4 3 4 7.231
7 C8H12NO8- -5.9 5 4 0 0.000
8 C13H17NO6 -5.8 1 0 5 8.147
9 C13H17NO6 -5.8 5 5 3 9.545
10 C8H12NO8- -5.8 6 6 0 6.041
11 C8H12NO8- -5.5 6 5 0 4.530
12 C8H12NO8- -5.5 4 3 0 2.720
13 C7H13NO7 -5.3 3 2 0 0.000
14 C8H12NO8- -5.3 3 1 0 6.015
15 C8H12NO8- -5.3 4 2 0 3.428
16 C8H12NO8- -5.3 4 4 0 6.116
17 C8H12NO8- -5.2 2 2 0 6.945
18 C8H15NO6 -5.1 4 1 5 0.000
19 C8H15NO6 -5.1 2 1 1 3.177
20 C8H12NO8- -5.1 2 1 0 6.329
21 C8H15NO6 -5.0 1 0 1 2.105
22 C7H13NO7 -5.0 3 2 0 2.803
23 C7H15N2O6+ -5.0 5 5 0 0.000
24 C7H15N2O6+ -5.0 3 2 0 6.216
25 C7H15N2O6+ -5.0 6 5 0 2.529
26 C8H15NO6 -4.9 1 1 1 4.417
27 C7H13NO7 -4.9 5 4 0 5.133
28 C7H13NO7 -4.9 8 5 0 4.865
29 C7H13NO6 -4.9 5 5 0 0.000
30 C8H15NO6 -4.8 3 2 1 4.224
31 C8H15NO6 -4.8 2 2 0 2.412
32 C8H15NO6 -4.8 1 1 1 3.907
33 C7H13NO7 -4.8 4 4 0 4.128
34 C7H13NO7 -4.8 5 5 0 3.321
35 C7H13NO7 -4.8 3 2 0 4.656
36 C7H15N2O6+ -4.8 7 5 0 5.403
37 C7H15N2O6+ -4.8 2 2 0 6.082
38 C7H15N2O6+ -4.8 2 0 0 5.235
39 C8H15NO6 -4.7 2 2 1 2.422
40 C8H15NO6 -4.7 1 1 0 3.629
41 C7H13NO7 -4.7 4 1 0 5.546
42 C7H13NO7 -4.7 2 2 0 5.447
43 C7H15N2O6+ -4.6 3 1 0 5.904
44 C7H15N2O6+ -4.6 1 1 0 4.217
45 C7H13NO6 -4.6 2 2 0 4.198
46 C7H15N2O6+ -4.5 1 1 0 3.947
47 C7H13NO6 -4.5 3 3 0 4.506
48 C7H13NO6 -4.5 4 3 0 5.955
49 C7H13NO6 -4.5 2 1 0 5.312
50 C7H13NO6 -4.3 3 3 0 3.434
51 C7H13NO6 -4.3 4 2 0 4.208
52 C7H13NO6 -4.3 3 3 0 7.549
53 C7H13NO6 -4.2 2 2 0 6.400

Гидрофобные взаимодействия выявились у лигандов с ароматическими группами в составе. Самая лучшшая афииность у лиганда C13H17NO6. Неожиданно мало водородных связей у лучших хитов.

Визуализируем: посмотрим лучший (номер 36) и худший (номер 35) лиганд, а также лиганд с наибольшим количеством гидрофобных связей (номер 38).

In [44]:
display(Image('lig_36.png'))

Ароматическая группа лучшего лиганда находится в гидрофобном кармане белка (валином и CH2 группой аспарагина). К тому же, дополнительно позицию лиганжа укрепляют 2 водородные связи.

In [52]:
display(Image('lig_35.png'))

Худший лиганд. Всего одна водородная связь. Расположен в том же кармане, что и предыдущий лиганд. Позиция не усилена дополнительно гидрофобными взаимодействиями или стекингом.

In [57]:
display(Image('lig_38.png'))

Лиганд с наибольшим количеством гидрофобных связей. Дополнительно имеется 5 водородных связей. Высокая афинность. Странно, что по энергии связывания лиганд проигрывает лучшему хиту.