//searchmet.py
#! /usr/bin/env python
import gzip
print('This program calculates if there are any proteins that have not methionine as their first aminoacid.')
print('Where to get information from?')
input_file = gzip.open(input(), 'r')
count = 0
totalcount = 0
for line in input_file:
        line = str(line)[2:]
        if line.startswith('SQ'):
                totalcount += 1
                info = line
                first = str(input_file.readline())[2:].strip()
                if first[0]=="M":
                        count+=1
                else:
                        print(info)
                        print(first)
fraction = count/totalcount*100
#print(totalcount)
print(f"Occurence of methionine: {count} in {totalcount} proteins,\nfraction: {fraction:.02f}")
input_file.close()

//searchenclass.py
#! /usr/bin/env python
import gzip
print('This program calculates the fraction of each type of enzyme proteins in a proteome.')
print('Where to get information from?')
input_file = gzip.open(input(), 'r')
count = 0
totalcount = 0
new = True
enzymes = {'oxidoreductases': 0,'transferases': 0,'hydrolases': 0,'lyases': 0,'isomerases': 0,'ligases': 0,'translocases': 0}
for line in input_file:
        line = str(line)[2:]
        if ("EC=" in line) and new:
                if 'EC=1' in line:
                        enzymes['oxidoreductases']+=1
                elif 'EC=2' in line:
                        enzymes['transferases']+=1
                elif 'EC=3' in line:
                        enzymes['hydrolases']+=1
                elif 'EC=4' in line:
                        enzymes['lyases']+=1
                elif 'EC=5' in line:
                        enzymes['isomerases']+=1
                elif 'EC=6' in line:
                        enzymes['ligases']+=1
                elif 'EC=7' in line:
                        enzymes['translocases']+=1
                else:
                        print("Something wrong")
                #print(line)
                count += 1
                new=False
        elif line.startswith('//'):
                new = True
                totalcount += 1
fraction = count/totalcount*100
#print(totalcount)
print(f"Occurence of all enzymes: {count} in {totalcount} proteins,\nfraction: {fraction:.02f}")
for i,j in enzymes.items():
        frac = j/count*100
        print(f"Occurence of {i}: {j} ({frac:.02f}%) in {count} enzymes")
input_file.close()

//searchamprot.py
#! /usr/bin/env python
import gzip
print('This program calculates the most popular aminoacid in the entire protein.')
print('Where to get information from?')
input_file = gzip.open(input(), 'r')
amacid = {'A':0, 'R':0, 'N':0, 'D':0, 'C':0,
        'E':0, 'Q':0, 'G':0, 'H':0, 'I':0,
        'L':0, 'K':0, 'M':0, 'F':0, 'P':0,
        'S':0, 'T':0, 'W':0, 'Y':0, 'V':0}
namacid = 0
totalcount = 0
for line in input_file:
        line = str(line)[2:]
        if line.startswith('SQ'):
                totalcount+=1
                info = line
                seq = str(input_file.readline())[2:]
                while not (seq.startswith('//')):
                        for i in seq:
                                if i in amacid:
                                        amacid[i]+=1
                                        namacid+=1
                        seq = str(input_file.readline())[2:]
ansmax='-'
valuemax=0
ansmin='-'
valuemin=namacid
for i,j in amacid.items():
        if j>valuemax:
                valuemax=j
                ansmax=i
        elif j==valuemax:
                ansmax+=i
        if j<valuemin:
                valuemin=j
                ansmin=i
        elif j==valuemin:
                ansmin+=i
        frac=j/namacid*100
        print(f"{i}: {frac:.02f}% ({j})", end ='\t')
fractionmax = valuemax/namacid*100
fractionmin = valuemin/namacid*100
print(f"\nOccurence of the most popular aminoacid(s) {ansmax}: {valuemax} ({fractionmax:.02f}%) in {namacid} aminoacids ({totalcount} proteins).")
print(f"Occurence of the least popular aminoacid(s) {ansmin}: {valuemin} ({fractionmin:.02f}%) in {namacid} aminoacids ({totalcount} proteins).")
input_file.close()

//searchampr.py
#! /usr/bin/env python
import gzip
print('This program calculates the most popular aminoacid among the most popular for each protein.')
print('Where to get information from?')
input_file = gzip.open(input(), 'r')
totalcount = 0
ans = ['','']
for line in input_file:
        line = str(line)[2:]
        if line.startswith('SQ'):
                amacid = {'A':0, 'R':0, 'N':0, 'D':0, 'C':0,
                        'E':0, 'Q':0, 'G':0, 'H':0, 'I':0,
                        'L':0, 'K':0, 'M':0, 'F':0, 'P':0,
                        'S':0, 'T':0, 'W':0, 'Y':0, 'V':0}
                totalcount+=1
                info = line
                seq = str(input_file.readline())[2:]
                while not (seq.startswith('//')):
                        for i in seq:
                                if i in amacid:
                                        amacid[i]+=1
                        seq = str(input_file.readline())[2:]
                ansmax='-'
                valuemax=0
                ansmin='-'
                valuemin=sum(amacid.values())
                for i,j in amacid.items():
                        if j>valuemax:
                                valuemax=j
                                ansmax=i
                        elif j==valuemax:
                                ansmax+=i
                        if j<valuemin:
                                valuemin=j
                                ansmin=i
                        elif j==valuemin:
                                ansmin+=i
                ans[0]+=ansmax
                ans[1]+=ansmin
valuemax=0
valuemin=0
ansmax=''
ansmin=''
a=''
for i in ans[0]:
        if i not in a:
                a+=i
for i in a:
        if ans[0].count(i)>valuemax:
                ansmax=i
                valuemax=ans[0].count(i)
        elif ans[0].count(i)==valuemax:
                ansmax+=i
print(f"Most popular: {a}")
a=''
for i in ans[1]:
        if i not in a:
                a+=i
for i in a:
        if ans[1].count(i)>valuemin:
                ansmin=i
                valuemin=ans[1].count(i)
        elif ans[1].count(i)==valuemin:
                ansmin+=i
print(f"Least popular: {a}")
fractionmax = valuemax/totalcount*100
fractionmin = valuemin/totalcount*100
print(f"\nOccurence of the most popular aminoacid(s) {ansmax}: {valuemax} ({fractionmax:.02f}%) proteins in ({totalcount} proteins).")
print(f"Occurence of the least popular aminoacid(s) {ansmin}: {valuemin} ({fractionmin:.02f}%) proteins in ({totalcount} proteins).")
input_file.close()