fasta = open('GCA_000008865.2_ASM886v2_genomic.fna', mode='r')
lst = []
for line in fasta:
x = line.strip()
if '>' not in x :
lst.append(x)
sequence = ''.join(lst)
counter = 0
for i in range(len(sequence)-6):
if sequence[i:i+6] == 'GAATTC':
counter += 1
print(counter)
1133
from scipy import stats
p_value = stats.binom_test(1133, len(sequence), 0.0002392315, 'less')
p_value
7.897818517501663e-79