from collections import Counter
from math import sqrt

fasta = "/home/olya/Downloads/GCF_016415705.1_ASM1641570v1_genomic.fna"

seq_parts = []

with open(fasta) as f:
    for line in f:
        line = line.strip().upper()
        if line.startswith(">"):
            continue
        seq_parts.append(line)

seq = "".join(seq_parts)

L = len(seq)
counts = Counter(seq)
freq = {base: counts[base] / L for base in "ACGT"}

# Числа из fuzzpro
observed_forward = 807
observed_both = 1644
observed_compl = observed_both - observed_forward

# Посчитаем частоту мотива ожидаемую
p_forward = freq["A"]**2 * freq["G"]**4
expected_forward = (L - 6 + 1) * p_forward
p_compl = freq["C"]**4 * freq["T"]**2
expected_compl = (L - 6 + 1) * p_compl

#посчитаем z-score
z_forward = (observed_forward - expected_forward) / sqrt(expected_forward)
z_compl = (observed_compl - expected_compl) / sqrt(expected_compl)

print("Forward:")
print("Observed:", observed_forward)
print("Expected:", expected_forward)
print("Z-score:", z_forward)

print("Complement:")
print("Observed:", observed_compl)
print("Expected:", expected_compl)
print("Z-score:", z_compl)