#! /usr/bin/env python3
from sys import argv



#Cоздание словарей, в которых пара ключ-значение это имя и последовательность


if argv[1]=='-h':
    print('''NAME
        comparing_alignments.py - performing a comparison of multiple alignments and displaying identical columns in various formats

SYNOPSIS
        ./comparing_alignments.py -h [...] OR ./comparing_alignments.py [-a] [-b] [-c] aligment_file_1 aligment_file_2

OPTIONS
        -h  information on the command
        -a  all output data formats in a row
        -b  output format in the form of blocks of identical columns
        -с  output format in the form of a number of identical columns in two alignments

SEE ALSO
        additional information:
        https://colab.research.google.com/drive/1txe0mcSXwnNGYwuOLe_MfoZwB8B1ucsq?usp=sharing''')
else:
    if len(argv)==3:
        with open(argv[1], 'r') as text1:
            prefasta1=text1.read().split(">")
            prefasta1.remove('')
            fasta1=[]
            for s in prefasta1:
                fasta1.append(s.split("\n"))



        with open(argv[2], 'r') as text2:
           prefasta2=text2.read().split(">")
           prefasta2.remove('')
           fasta2=[]
           for s in prefasta2:
                fasta2.append(s.split("\n"))
    else:
        with open(argv[2], 'r') as text1:
            prefasta1=text1.read().split(">")
            prefasta1.remove('')
            fasta1=[]
            for s in prefasta1:
                fasta1.append(s.split("\n"))



        with open(argv[3], 'r') as text2:
           prefasta2=text2.read().split(">")
           prefasta2.remove('')
           fasta2=[]
           for s in prefasta2:
                fasta2.append(s.split("\n"))

    dict1={}
    for s in fasta1:
      t=''
      for i in range(1, len(s)):
        t+=s[i]
      dict1[s[0]]=t


    dict2={}
    for s in fasta2:
      t=''
      for i in range(1, len(s)):
        t+=s[i]
      dict2[s[0]]=t

#список имен

    names=[]
    for s in fasta1:
      names.append(s[0])
      if '' in s:
        s.remove('')

#переформатирование последовательностей в индексы аминокислот

    tech1={}
    for k, s in dict1.items():
      cod=[]
      number_gap=0
      for i in range(len(s)):
        if s[i]=='-':
          cod.append(0)
          number_gap+=1
        else:
          cod.append(i+1-number_gap)
      tech1[k]=cod

    tech2={}
    for k, s in dict2.items():
      cod=[]
      number_gap=0
      for i in range(len(s)):
        if s[i]=='-':
          cod.append(0)
          number_gap+=1
        else:
          cod.append(i+1-number_gap)
      tech2[k]=cod


# создание словарей, в которых пара ключ-значение - это номер колонки и индексы аминокислот в этой колонке

    columns1={}
    for i in range(len(tech1[names[0]])):
      index=[]
      for s in names:
        index.append(tech1[s][i])
      columns1[i+1]=index

    columns2={}
    for i in range(len(tech2[names[0]])):
      index=[]
      for s in names:
        index.append(tech2[s][i])
      columns2[i+1]=index


#нахождение равных колонок

    identical_columns=[]
    for number1, col1 in columns1.items():
      for number2, col2 in columns2.items():
        if col1==col2:
          identical_columns.append([number1, number2])


#нахождение границ блоков

    starts=[]
    ends=[]
    starts.append(identical_columns[0])
    for i in range(1, len(identical_columns)):
        if identical_columns[i][1]-1 != identical_columns[i-1][1]:
          starts.append(identical_columns[i])
          ends.append(identical_columns[i-1])
    ends.append(identical_columns[len(identical_columns)-1])



#выдача результатов

    if argv[1]=='-a' or len(argv)==3:
        print(f"number of sequences: {len(names)}")
        print(f"alignment length 1: {len(tech1[names[0]])}")
        print(f"alignment length 2: {len(tech2[names[1]])}")
        print('')
        print(f"% of aligned columns of alignment length 1: {(100*len(identical_columns)/len(tech1[names[0]])):.2f}%")
        print(f"% of aligned columns of alignment length 2: {(100*len(identical_columns)/len(tech2[names[0]])):.2f}%")
        print('')
        print('')





        print("blocks of equally aligned columns:")
        print('')
        for i in range(len(starts)):
          print(f"block {i+1}:")
          print(f"    length: {ends[i][0]-starts[i][0]+1}")
          print(f"    start and end columns in alignment 1: {starts[i][0]}-{ends[i][0]}")
          print(f"    start and end columns in alignment 2: {starts[i][1]}-{ends[i][1]}")
          print('')


        print('')
        print("numbers of equally aligned columns:")
        print('MSA1', 'MSA2')
        for s in identical_columns:
          print(s[0], s[1])



    elif argv[1]=='-b':
        print("blocks of equally aligned columns:")
        print('')
        for i in range(len(starts)):
          print(f"block {i+1}:")
          print(f"    length: {ends[i][0]-starts[i][0]+1}")
          print(f"    start and end columns in alignment 1: {starts[i][0]}-{ends[i][0]}")
          print(f"    start and end columns in alignment 2: {starts[i][1]}-{ends[i][1]}")

    elif argv[1]=='-c':
        print("numbers of equally aligned columns:")
        print('MSA1', 'MSA2')
        for s in identical_columns:
          print(s[0], s[1])