# импортируем библиотеки
from Bio.PDB import *
import numpy as np 
import pandas as pd

# скачиваем данные PDB-файла
p = PDBParser()
structure = p.get_structure("NMR_6L7K", "6l7k.pdb")

# для атомов остова 
h_bond_main1 = []
h_bond_main2 = []

for model in structure:
    atom1 = model["A"][70]["N"].get_coord()
    atom2 = model["A"][78]["O"].get_coord()
    
    atom3 = model["A"][70]["O"].get_coord()
    atom4 = model["A"][78]["N"].get_coord()
    
    diff_vector1 = atom1 - atom2 # между кислородом Leu-78 и азотом Cys-70
    distance1 = np.sqrt(np.sum(diff_vector1 **2)).round(2)
    h_bond_main1.append(distance1)
    
    diff_vector2 = atom3 - atom4 # между кислородом Cys-70 и азотом Leu-78
    distance2 = np.sqrt(np.sum(diff_vector2 **2)).round(2)
    h_bond_main2.append(distance2)

# для атомов боковых радикалов аминокислот
h_bond_side = []

for model in structure:
    atom5 = model["A"][14]["OH"].get_coord()
    atom6 = model["A"][126]["NH2"].get_coord()
    
    diff_vector3 = atom5 - atom6 # между кислородом Tyr-14 и азотом Arg-126
    distance3 = np.sqrt(np.sum(diff_vector3 **2)).round(2)
    h_bond_side.append(distance3)

# в петлях на поверхности белковой глобулы
h_bond_surface = []

for model in structure:
    atom7 = model["A"][88]["NZ"].get_coord()
    atom8 = model["A"][107]["OE1"].get_coord()
    
    diff_vector4 = atom7 - atom8 # между кислородом Glu-107 и азотом Lys-88
    distance4 = np.sqrt(np.sum(diff_vector4 **2)).round(2)
    h_bond_surface.append(distance4)

h_bonds_res = ['Cys70_N_and_Leu78_O', 'Cys70_O_and_Leu78_N', 'Tyr14_O_and_Arg126_N', 'Glu107_O_and_Lys-88_N']

dist_all = pd.DataFrame(list(zip(h_bond_main1, h_bond_main2, h_bond_side, h_bond_surface)),
               columns = h_bonds_res)
dist_all.head()

print(dist_all.describe().round(2))

       Cys70_N_and_Leu78_O  Cys70_O_and_Leu78_N  Tyr14_O_and_Arg126_N  \
count                20.00                20.00                 20.00   
mean                  2.85                 3.13                  3.40   
std                   0.06                 0.14                  0.82   
min                   2.73                 2.79                  2.38   
25%                   2.80                 3.18                  2.78   
50%                   2.86                 3.19                  3.08   
75%                   2.88                 3.19                  4.08   
max                   3.00                 3.21                  4.88   

       Glu107_O_and_Lys-88_N  
count                  20.00  
mean                    7.61  
std                     0.96  
min                     5.76  
25%                     6.93  
50%                     7.61  
75%                     8.16  
max                     9.56

h_filter = dist_all < 3.5
dist_all_filter = dist_all[h_filter]
numb_rows = 20 - dist_all_filter.isna().sum() # расчёт количества строк в каждом столбце
percent_rows = (numb_rows / 20) * 100
print(numb_rows) # в скольких моделях присутствует данная водородная связь (в штуках)
print(percent_rows) # и в процентах

Cys70_N_and_Leu78_O      20
Cys70_O_and_Leu78_N      20
Tyr14_O_and_Arg126_N     12
Glu107_O_and_Lys-88_N     0
dtype: int64
Cys70_N_and_Leu78_O      100.0
Cys70_O_and_Leu78_N      100.0
Tyr14_O_and_Arg126_N      60.0
Glu107_O_and_Lys-88_N      0.0
dtype: float64

dist_all_filter.head() # вместо False - NaN (см. ниже)

X_ray_dist = [2.8, 2.7, 3.5, 3.2] # расстояния в РСА-модели

final_df = pd.DataFrame(list(zip(X_ray_dist, list(numb_rows), list(percent_rows), list(dist_all.max()), list(dist_all.min()), list(dist_all.mean()))), 
                        columns=['X_ray_dist', 'Number_of_Models', 'Percentage_of_Models', 'Max_NMR_dist', 'Min_NMR_dist', 'Mean_NMR_dist'],
                       index = h_bonds_res)
final_df

	Cys70_N_and_Leu78_O	Cys70_O_and_Leu78_N	Tyr14_O_and_Arg126_N	Glu107_O_and_Lys-88_N
0	2.80	3.19	2.85	6.44
1	2.80	3.18	4.22	8.04
2	2.87	3.20	4.06	6.92
3	2.86	3.19	2.38	8.82
4	2.73	2.79	4.16	6.95

	X_ray_dist	Number_of_Models	Percentage_of_Models	Max_NMR_dist	Min_NMR_dist	Mean_NMR_dist
Cys70_N_and_Leu78_O	2.8	20	100.0	3.00	2.73	2.850
Cys70_O_and_Leu78_N	2.7	20	100.0	3.21	2.79	3.131
Tyr14_O_and_Arg126_N	3.5	12	60.0	4.88	2.38	3.401
Glu107_O_and_Lys-88_N	3.2	0	0.0	9.56	5.76	7.613

Задание 1. Расчёт расстояний в ЯМР-моделях¶