//searchtm.py
#! /usr/bin/env python
import gzip
print('This program calculates the fraction of transmembrane proteins in a proteome.')
print('Where to get information from?')
input_file = gzip.open(input(), 'r')
count = 0
totalcount = 0
new = True
for line in input_file:
        line = str(line)[2:]
        if line.startswith('KW') and ("Transmembrane" in line) and new:
                print(totalcount+1, line)
                count += 1
                new=False
        elif line.startswith('//'):
                new = True
                totalcount += 1
fraction = count/totalcount*100
#print(totalcount)
print(f"Occurence of transmembranes: {count} in {totalcount} proteins,\nfraction: {fraction:.02f}")
input_file.close()


//searchen.py
#! /usr/bin/env python
import gzip
print('This program calculates the fraction of enzyme proteins in a proteome.')
print('Where to get information from?')
input_file = gzip.open(input(), 'r')
count = 0
totalcount = 0
new = True
for line in input_file:
        line = str(line)[2:]
        if ("EC=" in line) and new:
                print(line)
                count += 1
                new=False
        elif line.startswith('//'):
                new = True
                totalcount += 1
fraction = count/totalcount*100
#print(totalcount)
print(f"Occurence of enzymes: {count} in {totalcount} proteins,\nfraction: {fraction:.02f}")
input_file.close()

//searchdnab.py
#! /usr/bin/env python
import gzip
print('This program calculates the fraction of DNA-binding proteins in a proteome.')
print('Where to get information from?')
input_file = gzip.open(input(), 'r')
count = 0
totalcount = 0
new = True
for line in input_file:
        line = str(line)[2:]
        if ("DNA-binding protein" in line) and ("DE" in line) and new:
                #print(line)
                count += 1
                new=False
        elif line.startswith('//'):
                #print(input_file.readline())
                new = True
                totalcount += 1
fraction = count/totalcount*100
#print(totalcount)
print(f"Occurence of DNA-binding proteins: {count} in {totalcount} proteins,\nfraction: {fraction:.02f}")
input_file.close()

//searchgal.py
#! /usr/bin/env python
import gzip
print('This program calculates the fraction of galactosidases in a proteome.')
print('Where to get information from?')
input_file = gzip.open(input(), 'r')
count = 0
totalcount = 0
new = True
for line in input_file:
        line = str(line)[2:]
        if ("galactosidase" in line) and ("DE" in line) and new:
                #print(line)
                count += 1
                new=False
        elif line.startswith('//'):
                #print(input_file.readline())
                new = True
                totalcount += 1
fraction = count/totalcount*100
#print(totalcount)
print(f"Occurence of galactosidase proteins: {count} in {totalcount} proteins,\nfraction: {fraction:.02f}")
input_file.close()


//searchhsp.py
#! /usr/bin/env python
import gzip
print('This program calculates the fraction of heat-shock proteins in a proteome.')
print('Where to get information from?')
input_file = gzip.open(input(), 'r')
count = 0
totalcount = 0
new = True
for line in input_file:
        line = str(line)[2:]
        if ("shock" in line) and ("DE" in line) and new:
                if "Heat" in line:
                        count+=1
                print(line)
                new=False
        elif line.startswith('//'):
                #print(input_file.readline())
                new = True
                totalcount += 1
fraction = count/totalcount*100
#print(totalcount)
print(f"Occurence of heat-shock proteins: {count} in {totalcount} proteins,\nfraction: {fraction:.02f}")
input_file.close()

//searchamprotdev.py
#! /usr/bin/env python
import gzip
print('This program calculates the most popular aminoacid in the entire protein.')
print('Where to get information from?')
input_file = gzip.open(input(), 'r')
amacid = {'A':0, 'R':0, 'N':0, 'D':0, 'C':0,
        'E':0, 'Q':0, 'G':0, 'H':0, 'I':0,
        'L':0, 'K':0, 'M':0, 'F':0, 'P':0,
        'S':0, 'T':0, 'W':0, 'Y':0, 'V':0}
namacid = 0
totalcount = 0
for line in input_file:
        line = str(line)[2:]
        if line.startswith('SQ'):
                totalcount+=1
                info = line
                seq = str(input_file.readline())[2:]
                while not (seq.startswith('//')):
                        for i in seq:
                                if i in amacid:
                                        amacid[i]+=1
                                        namacid+=1
                        seq = str(input_file.readline())[2:]
pchar = amacid['K']+amacid['R']+amacid['H']+amacid['D']+amacid['E']
punchar = amacid['S']+amacid['T']+amacid['C']+amacid['G']+amacid['N']+amacid['Y']
print(f"Ratio of polar charged versus polar uncharged amino acids: {(pchar/punchar):.02f}.")
input_file.close()