import numpy as np

def complementary_strand_maker(seq):
    d = {'A': 'T', 'T': 'A', 'G': 'C', 'C': 'G'}
    complementary_strand = ''
    for nucleotide in seq:
        complementary_strand += d[nucleotide]
    complementary_strand = complementary_strand[::-1]
    return complementary_strand

def dic_maker(distance):
    with open('C:/Users/jfedo/OneDrive/Documents/public_html/term4/pr9/T_peptonophilus_annotation.txt', 'r') as txt:
        d = {}
        previous = [0, 0]
        previous_complement = [0, 0]
        while True:
            line = txt.readline()
            if len(line) == 0:
                break
            line = [line[:21].strip(), line[21:]]
            if line[0] == 'gene':
                is_complement = line[-1].startswith('complement')
                if is_complement:
                    line = line[-1].split('(')[-1][:-2]
                    line = [int(c) for c in line.split('..')]
                    if line[0] - previous_complement[1] > distance:
                        s = txt.readline().strip()
                        if s.startswith('/locus_tag='):
                            d[s.split('"')[1]] = (line, is_complement)
                        else:
                            d[txt.readline().split('"')[1]] = (line, is_complement)
                    previous_complement = line
                else:
                    line = [int(c) for c in line[-1].split('..')]
                    if line[0] - previous[1] > distance:
                        s = txt.readline().strip()
                        if s.startswith('/locus_tag='):
                            d[s.split('"')[1]] = (line, is_complement)
                        else:
                            d[txt.readline().split('"')[1]] = (line, is_complement)
                    previous = line
    return d

def nuc_dic_maker(structure, distance, end):
    with open('C:/Users/jfedo/OneDrive/Documents/public_html/term4/pr9/T_peptonophilus_genome.txt', 'r') as txt:
        dic = {}
        dm = dic_maker(distance)
        s = txt.readline()
        is_gene = structure == 'genes'
        for key in dm.keys():
            if dm[key][1]:
                dic[key] = complementary_strand_maker(s[dm[key][0][0] - 1:dm[key][0][1]]) if is_gene else complementary_strand_maker(s[dm[key][0][0] - distance - end:dm[key][0][0] - end])
            else:
                dic[key] = s[dm[key][0][0] - 1:dm[key][0][1]] if is_gene else s[dm[key][0][0] - distance - end:dm[key][0][0] - end]
    return dic

def writer(structure, distance=100, end=1):
    with open(f'C:/Users/jfedo/OneDrive/Documents/public_html/term4/pr9/T_peptonophilus_{structure}.txt', 'w') as txt:
        ndm = nuc_dic_maker(structure, distance, end)
        for gene in ndm.keys():
            txt.write(f'>{gene}\n{ndm[gene]}\n')

def pwm_maker(values):
    interval = range(len(values[0]))
    length = len(interval)
    matrix = np.array(values)
    z = 0
    v = ['A', 'T', 'G', 'C']
    d = {}
    pwm = np.empty((len(v), length))
    consensus = []
    for s in v:
        d[s] = np.sum([matrix[i][j] == s for i in range(len(values)) for j in range(length)])
    for j in range(length):
        for i in range(len(v)):
            x = np.sum([matrix[y][j] == v[i] for y in range(len(values))])
            pwm[i][j] = np.log((x + 0.25) / (d[v[i]] + 1) * length)
    for j in range(length):
        m = np.max([pwm[x][j] for x in range(len(v))])
        for i in range(len(v)):
            f = np.sum([matrix[y][j] == v[i] for y in range(len(values))]) / len(values) + 0.25
            z += f * np.log2(f / (d[v[i]] + 1) * len(values) * length)
            if pwm[i][j] == m:
                consensus.append(v[i])
    return pwm, consensus, z

def signal_pwm_maker(file):
    v = []
    with open(f'C:/Users/jfedo/OneDrive/Documents/public_html/term4/pr9/{file}.tsv', 'r') as txt:
        txt.readline()
        while True:
            line = txt.readline().split('\t')
            if float(line[-2]) >= 0.05:
                print(len(v))
                break
            v.append(line[-1])
    return pwm_maker(v)

# writer('genes')
# writer('promoters')
# writer('intergene', 200, 101)
# print(pwm_maker(list(nuc_dic_maker('promoters', 100, 1).values())))
# print(signal_pwm_maker('fimo'))
