# -*- coding: utf-8 -*-

sd = "./SD.out"                     # выход fuzznuc с позициями мотива
gff = "./GCF_002157855.1_ASM215785v1_feature_table.txt"

MIN_DIST = 5
MAX_DIST = 12

int_have = []

with open(sd) as f:
    for line in f:
        line = line.strip()
        if line and line[0].isdigit():
            parts = line.split()
            if len(parts) >= 3:
                try:
                    int_have.append({
                        "start": int(parts[0]),
                        "end":   int(parts[1]),
                        "strand": parts[2]
                    })
                except ValueError:
                    pass
            break

    for line in f:
        line = line.strip()
        if not line or line.startswith('#') or not line[0].isdigit():
            continue
        parts = line.split()
        if len(parts) < 3:
            continue
        try:
            int_have.append({
                "start": int(parts[0]),
                "end":   int(parts[1]),
                "strand": parts[2]
            })
        except ValueError:
            continue
CDS = []

with open(gff) as f:
    for line in f:
        if line.startswith('#'):
            continue
        parts = line.split('\t')
        if len(parts) < 10:
            continue
        if parts[0] != "CDS":
            continue
        try:
            start = int(parts[7])
            end = int(parts[8])
            strand = parts[9]
        except (ValueError, IndexError):
            continue
        CDS.append({"start": start, "end": end, "strand": strand})

correct = 0

for motif in int_have:
    for cds in CDS:
        if motif["strand"] != cds["strand"]:
            continue

        if motif["strand"] == "+":
            dist = cds["start"] - motif["end"]          # SD_end .. старт-кодон
        else:
            dist = motif["start"] - cds["end"]          # старт-кодон (на компл. цепи) .. SD_start

        if MIN_DIST <= dist <= MAX_DIST:
            correct += 1
            break

total = len(int_have)

print("Всего находок мотива AGGAGG :", total)
print("Находок в позиции 5‑12 п.н. до старт-кодона :", correct)
if total:
    print(f"Доля правильных находок : {100 * correct / total:.2f}%")
else:
    print("Нет данных в pshd_outpr.")

