import numpy as np, pandas as pd, prody as prd


s1 = prd.parsePDB("7AVZ")
mean_betas = []
for residue in s1.iterResidues():
    if "CA" in residue.getNames(): 
        mean_beta = np.mean(residue.getBetas())
        mean_betas.append([residue, mean_beta])
betas = sorted(mean_betas, key = lambda x: x[1])

@> PDB file is found in working directory (7avz.pdb.gz).
@> 2214 atoms and 1 coordinate set(s) were parsed in 0.03s.


betas[0]

[<Residue: ALA 708 from Chain A from 7AVZ (5 atoms)>, 40.33]


betas[-1]

[<Residue: SER 661 from Chain A from 7AVZ (6 atoms)>, 103.745]


res_max = s1.select("resnum 661")
np.std(res_max.getBetas())

16.095086983300217


res_min = s1.select("resnum 708")
np.std(res_min.getBetas())

2.5889457313740665


prot_mass_center = prd.calcCenter(s1, weights=s1.getMasses())

betas_new = []
distances = []
for residue in s1.iterResidues():
    if "CA" in residue.getNames():
        beta = np.mean(residue.getBetas())
        betas_new.append(beta)
        
        mass_center = prd.calcCenter(residue, weights=residue.getMasses())
        distance = prd.calcDistance(prot_mass_center, mass_center)               
        distances.append(distance)


import seaborn as sns, matplotlib.pyplot as plt
sns.set(rc={'figure.figsize':(9,6)}) 
plot = sns.scatterplot(x=distances, y=betas_new)
plot.set_xlabel('Расстояние до центра масс, А')
plot.set_ylabel('B-фактор')
plot.set_title("B-фактор = f(d(остаток, центр масс))")

Text(0.5, 1.0, 'B-фактор = f(d(остаток, центр масс))')


from scipy.stats import spearmanr
spearmanr(distances, betas_new)

SpearmanrResult(correlation=0.6411401910293151, pvalue=1.9916967142226675e-32)


from scipy.stats import pearsonr
pearsonr(distances, betas_new)

(0.5985658106681595, 1.9319328149618057e-27)


dom1 = s1.select("resnum 562 to 677 731 to 742")
dom1_mass_center = prd.calcCenter(dom1, weights=dom1.getMasses())
r1 = list(range(562, 678))
r2 = list(range(731, 742))

betas_new_dom1 = []
distances_dom1 = []
for residue in s1.iterResidues():
    if "CA" in residue.getNames() and ((residue.getResnum() in r1) or(residue.getResnum() in r2)):
        beta = np.mean(residue.getBetas())
        betas_new_dom1.append(beta)
        
        mass_center = prd.calcCenter(residue, weights=residue.getMasses())
        distance = prd.calcDistance(dom1_mass_center, mass_center)               
        distances_dom1.append(distance)
        
        
dom2 = s1.select("resnum 678 to 730 763 to 862")
dom2_mass_center = prd.calcCenter(dom2, weights=dom2.getMasses())
r1 = list(range(678, 731))
r2 = list(range(763, 863))

betas_new_dom2 = []
distances_dom2 = []
for residue in s1.iterResidues():
    if "CA" in residue.getNames() and ((residue.getResnum() in r1) or(residue.getResnum() in r2)):
        beta = np.mean(residue.getBetas())
        betas_new_dom2.append(beta)
        
        mass_center = prd.calcCenter(residue, weights=residue.getMasses())
        distance = prd.calcDistance(dom2_mass_center, mass_center)               
        distances_dom2.append(distance)
        
sns.set(rc={'figure.figsize':(14,6)})       
fig, ax =plt.subplots(1,2)
plot = sns.scatterplot(x=distances_dom1, y=betas_new_dom1, ax=ax[0])
plot.set_xlabel('Расстояние до центра с/д 1')
plot.set_ylabel('B-фактор')
plot.set_title("B-фактор в остатках субдомена 1")
plot = sns.scatterplot(x=distances_dom2, y=betas_new_dom2, ax=ax[1])
plot.set_xlabel('Расстояние до центра с/д 2')
plot.set_ylabel('B-фактор')
plot.set_title("B-фактор в остатках субдомена 2")

Text(0.5, 1.0, 'B-фактор в остатках субдомена 2')


r1_1 = list(range(562, 678))
r2_1 = list(range(731, 743))
r1_2 = list(range(678, 731))
r2_2 = list(range(763, 863))

betas_all = []
ds_to_mc = []
ds_to_sdc = []
for residue in s1.iterResidues():
    if "CA" in residue.getNames():
        beta = np.mean(residue.getBetas())
        betas_all.append(beta)

        mass_center = prd.calcCenter(residue, weights=residue.getMasses())
        distance = prd.calcDistance(prot_mass_center, mass_center)               
        ds_to_mc.append(distance)

        if (residue.getResnum() in r1_1) or(residue.getResnum() in r2_1):
            distance1 = prd.calcDistance(dom1_mass_center, mass_center) 
        elif (residue.getResnum() in r1_2) or(residue.getResnum() in r2_2):
            distance1 = prd.calcDistance(dom2_mass_center, mass_center)
        else:
            distance1 = "NA"
            print(residue.getResnum())
        ds_to_sdc.append(distance1)


dom2_mass_center

array([ 6.66366584, 22.96348023, -3.94233646])


dom1_mass_center

array([ 27.16646747,  36.55479335, -12.95993441])


from scipy.optimize import curve_fit

def func(X, a, b, c, n1, n2):
    d1,d2 = X
    return a + b*np.power(d1, n1) + c*np.power(d2, n2)

d1 = np.array(ds_to_mc)
d2 = np.array(ds_to_sdc)

p0 = 40., .05, .05, 1, 1
popt, pcov = curve_fit(func, (d1,d2), betas_all, p0, maxfev=1200)
popt

array([34.31995044,  0.07013715,  1.09585979,  1.70806096,  0.99494132])


perr = np.sqrt(np.diag(pcov))
perr

array([6.81991929, 0.13535577, 1.80637028, 0.5472898 , 0.46368291])


sns.set(rc={'figure.figsize':(14,6)})       
fig, ax =plt.subplots(1,2)
plot = sns.scatterplot(x=distances, y=betas_new, ax=ax[0])
plot.set_xlabel('Расстояние до центра масс')
plot.set_ylabel('B-фактор')
plot.set_title("Данные")
plot = sns.scatterplot(x=d1, y=func((d1,d2), *popt), ax=ax[1])
plot.set_xlabel('Расстояние до центра')
plot.set_ylabel('B-фактор')
plot.set_ylim(38, 105)
plot.set_title("Модель")

Text(0.5, 1.0, 'Модель')


summ = pd.read_csv("bioinf_pr4/summary.csv", delimiter=";")
summ

	Harmonics set	Resolution (A)	Data integrity (%)	F noise (% of phi)	Phase noise (% of phi)	Quality
0	0_10	3.0 A	100%	0	0	bad
1	0_20	1.5 A	100%	0	0	medium
2	0_25	1.2 A	100%	0	0	perfect
3	0_30	1.0 A	100%	0	0	perfect
4	0_25	1.2 A	100%	10	10	perfect
5	0_25	1.2 A	100%	0	20	good
6	0_25	1.2 A	100%	20	0	perfect
7	1_10, 15_25	1.2 A	80%	10	10	perfect
8	4_25	1.2 A	84%	10	10	medium
9	3_7, 10_25	1.2 A	80%	10	10	good
10	2_7, 10_17, 20_25	1.2 A	76%	10	10	perfect

Задание 1¶

Задание 2¶

Задание 3¶