import prody as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
import pandas
Перед работой в jupyter notebook pdb-файл был предварительно обработан в PyMol (в файле prot.pdb присутствует только молекула белка
prot = pd.parsePDB('/home/julybel/study/3d/prak-03/protein-only.pdb')
sp = [(res, np.mean(res.getBetas())) for res in prot.iterResidues()]
print('MIN:\t', sorted(sp, key=lambda x: x[1])[0])
print('MAX:\t', sorted(sp, key=lambda x: x[1], reverse=True)[0])
Окраска в PyMol по b-factor:
> spectrum b, deepteal_white_firebrick, minimum=10, maximum=50
prot_mcenter = pd.calcCenter(prot, weights=prot.getMasses())
betas, distances = [], []
for res in prot.iterResidues():
mean_beta = np.mean(res.getBetas())
mcenter = pd.calcCenter(res, weights=res.getMasses())
dist = pd.calcDistance(prot_mcenter, mcenter)
betas.append(mean_beta)
distances.append(dist)
print('Pearson correlation (rho, p-value):\n', stats.pearsonr(distances, betas), '\n')
print('Spearman correlation (rho, p-value):\n', stats.spearmanr(distances, betas), '\n')
fig, ax = plt.subplots(figsize=(12, 8))
sns.regplot(distances, betas, color='forestgreen')
ax.set_title('Dependancy of B-factor on distance between mass centers\nattempt of linear approximation\n',
fontdict={'fontsize' : 20})
ax.set_xlabel('Distance between mass centers (Å)', fontdict={'fontsize' : 16})
ax.set_ylabel('B-factor', fontdict={'fontsize' : 16})
fig, ax = plt.subplots(figsize=(12, 8))
sns.regplot(distances, betas, color='forestgreen',
order=3, ci=None)
ax.set_title('Dependancy of B-factor on distance between mass centers\nfit with a higher-order polynomial regression (order=3)\n',
fontdict={'fontsize' : 20})
ax.set_xlabel('Distance between mass centers (Å)', fontdict={'fontsize' : 16})
ax.set_ylabel('B-factor', fontdict={'fontsize' : 16})
fig, ax = plt.subplots(figsize=(12, 8))
sns.regplot(distances, betas, color='forestgreen',
x_estimator=np.mean, logx=True)
ax.set_title('Dependancy of B-factor on distance between mass centers\nfit the regression model using log(x)\n',
fontdict={'fontsize' : 20})
ax.set_xlabel('Distance between mass centers (Å)', fontdict={'fontsize' : 16})
ax.set_ylabel('B-factor', fontdict={'fontsize' : 16})