fasta = open('GCA_000008865.2_ASM886v2_genomic.fna', mode='r')
lst = []
for line in fasta:
x = line.strip()
if '>' not in x :
lst.append(x)
sequence = ''.join(lst)
counter = 0
for i in range(len(sequence)-1):
if sequence[i:i+2] == 'TA':
counter += 1
print(counter)
365293
from scipy import stats
p_value = stats.binom_test(365293, len(sequence), 0.248 * 0.247, 'less')
p_value
0.0