In [1]:
import numpy as np
import pandas as pd
import matplotlib as plt
from sklearn.metrics import roc_curve
In [ ]:
 
In [ ]:
df1.head()
In [ ]:
df1[df1['in_2dom'] == True]['len'].hist(bins=40)
In [ ]:
df1[df1['in_hmm'] == True]['len'].hist(bins=40)
In [ ]:
hmm = pd.read_csv('./pr9/hmm_res_df.txt', sep = '\s+')
hmm = hmm.reset_index()
hmm.head()
In [ ]:
plt.plot(hmm['score'])
plt.show()
In [ ]:
hmm['in_2dom'] = hmm.level_0.isin(ac_l)
score = hmm["score"]

fpr, tpr, thres = roc_curve(hmm["in_2dom"], hmm["score"])
tp = tpr * hmm["in_2dom"].sum()
tn = (1 - fpr) * (~hmm["in_2dom"]).sum()
acc = (tp + tn) / hmm["in_2dom"].shape[0]
am = np.argmax(acc)

plt.plot(fpr, tpr)
plt.plot([fpr[am]], [tpr[am]], color="C1", marker="+", markersize=20)
plt.xlabel('fpr')
plt.ylabel('tpr')
plt.text(fpr[am] + 0.03, tpr[am] - 0.07, f"threshold = {thres[am]}")
plt.show()
In [ ]:
tp = tpr * hmm["in_2dom"].sum()
fp = fpr * (~hmm["in_2dom"]).sum()
tn = (1 - fpr) * (~hmm["in_2dom"]).sum()
fn = (1 - tpr) * hmm["in_2dom"].sum()
f1 = 2*tp/(2*tp+fp+fn)

am_f1 = np.argmax(f1)  # максимум F1
thres[am_f1]

plt.plot(fpr, tpr)
plt.plot([fpr[am_f1]], [tpr[am_f1]], color="C1", marker="+", markersize=20)
plt.xlabel('fpr')
plt.ylabel('tpr')
plt.text(fpr[am_f1] + 0.03, tpr[am_f1] - 0.15, f"threshold = {thres[am_f1]}\nf1 = {f1.max():.3f}")
plt.show()