def fasta_AC(fasta):
    ac_list=[]
    with open(fasta, mode='r') as file:
        for line in file:
            if line.startswith('>'):
                line=line.strip().split('|')[0][1:]
                ac_list.append(line)
    return set(ac_list)
common_AC=fasta_AC('protein-matching-aves.fasta')&fasta_AC('protein-sequences.fasta')
print(len(common_AC))

def ac_into_fasta(fasta, ac):
    with open('domain+tax.fasta', mode='w') as out:
        with open(fasta, mode='r') as file:
            write_true=0
            for line in file:
                if line.startswith('>'):
                    if line.strip().split('|')[0][1:] in common_AC:
                        write_true=1
                        out.write(line)
                        continue
                    else:
                        write_true=0
                if write_true:
                    out.write(line)
ac_into_fasta('protein-sequences.fasta', ac=common_AC)

