import sys

def parse_SD(filename, target_contig="NZ_CP026671.1"):
    pattern = []
    current_contig = None
    
    with open(filename, 'r') as f:
        for line in f:
            if '# Sequence:' in line and 'from:' in line:
                parts = line.split()
                for i, p in enumerate(parts):
                    if p == 'Sequence:':
                        current_contig = parts[i+1].replace(':', '')
                        break
            
            if current_contig != target_contig:
                continue
            
            line_stripped = line.strip()
            if line_stripped and line_stripped[0].isdigit():
                parts = line_stripped.split()
                if len(parts) >= 2:
                    start = int(parts[0])
                    end = int(parts[1])
                    pattern.append((start, end))
    
    return pattern

def parse_start_codons(filename, target_contig="NZ_CP026671.1"):
    start_codons = []
    with open(filename, 'r') as f:
        for line in f:
            if line.startswith('#'):
                continue
            parts = line.strip().split('\t')
            if parts[0] == 'CDS' and parts[6] == target_contig:
                if parts[9] == '+':
                    start_codons.append(int(parts[7]))
                elif parts[9] == '-':
                    start_codons.append(int(parts[8]))
    return start_codons

def main():
    target = "NZ_CP026671.1"
    pattern = parse_SD(sys.argv[1], target)
    print(f"Находок AGGAGG вcего: {len(pattern)}")
    

    start_codons = parse_start_codons(sys.argv[2], target)
        
    correct = 0
    for pat_start, pat_end in pattern:
        for sc in start_codons:
            dist = sc - pat_end
            if 0 <= dist <= 25: #расстояние возьмем 0-25
                correct += 1
                break
        
    percent = correct / len(pattern) * 100 if pattern else 0
    print(f"Находок AGGAGG перед старт-кодонами: {correct}")
    print(f"Процент: {percent:.1f}%")


main()