//searchmet.py #! /usr/bin/env python import gzip print('This program calculates if there are any proteins that have not methionine as their first aminoacid.') print('Where to get information from?') input_file = gzip.open(input(), 'r') count = 0 totalcount = 0 for line in input_file: line = str(line)[2:] if line.startswith('SQ'): totalcount += 1 info = line first = str(input_file.readline())[2:].strip() if first[0]=="M": count+=1 else: print(info) print(first) fraction = count/totalcount*100 #print(totalcount) print(f"Occurence of methionine: {count} in {totalcount} proteins,\nfraction: {fraction:.02f}") input_file.close() //searchenclass.py #! /usr/bin/env python import gzip print('This program calculates the fraction of each type of enzyme proteins in a proteome.') print('Where to get information from?') input_file = gzip.open(input(), 'r') count = 0 totalcount = 0 new = True enzymes = {'oxidoreductases': 0,'transferases': 0,'hydrolases': 0,'lyases': 0,'isomerases': 0,'ligases': 0,'translocases': 0} for line in input_file: line = str(line)[2:] if ("EC=" in line) and new: if 'EC=1' in line: enzymes['oxidoreductases']+=1 elif 'EC=2' in line: enzymes['transferases']+=1 elif 'EC=3' in line: enzymes['hydrolases']+=1 elif 'EC=4' in line: enzymes['lyases']+=1 elif 'EC=5' in line: enzymes['isomerases']+=1 elif 'EC=6' in line: enzymes['ligases']+=1 elif 'EC=7' in line: enzymes['translocases']+=1 else: print("Something wrong") #print(line) count += 1 new=False elif line.startswith('//'): new = True totalcount += 1 fraction = count/totalcount*100 #print(totalcount) print(f"Occurence of all enzymes: {count} in {totalcount} proteins,\nfraction: {fraction:.02f}") for i,j in enzymes.items(): frac = j/count*100 print(f"Occurence of {i}: {j} ({frac:.02f}%) in {count} enzymes") input_file.close() //searchamprot.py #! /usr/bin/env python import gzip print('This program calculates the most popular aminoacid in the entire protein.') print('Where to get information from?') input_file = gzip.open(input(), 'r') amacid = {'A':0, 'R':0, 'N':0, 'D':0, 'C':0, 'E':0, 'Q':0, 'G':0, 'H':0, 'I':0, 'L':0, 'K':0, 'M':0, 'F':0, 'P':0, 'S':0, 'T':0, 'W':0, 'Y':0, 'V':0} namacid = 0 totalcount = 0 for line in input_file: line = str(line)[2:] if line.startswith('SQ'): totalcount+=1 info = line seq = str(input_file.readline())[2:] while not (seq.startswith('//')): for i in seq: if i in amacid: amacid[i]+=1 namacid+=1 seq = str(input_file.readline())[2:] ansmax='-' valuemax=0 ansmin='-' valuemin=namacid for i,j in amacid.items(): if j>valuemax: valuemax=j ansmax=i elif j==valuemax: ansmax+=i if jvaluemax: valuemax=j ansmax=i elif j==valuemax: ansmax+=i if jvaluemax: ansmax=i valuemax=ans[0].count(i) elif ans[0].count(i)==valuemax: ansmax+=i print(f"Most popular: {a}") a='' for i in ans[1]: if i not in a: a+=i for i in a: if ans[1].count(i)>valuemin: ansmin=i valuemin=ans[1].count(i) elif ans[1].count(i)==valuemin: ansmin+=i print(f"Least popular: {a}") fractionmax = valuemax/totalcount*100 fractionmin = valuemin/totalcount*100 print(f"\nOccurence of the most popular aminoacid(s) {ansmax}: {valuemax} ({fractionmax:.02f}%) proteins in ({totalcount} proteins).") print(f"Occurence of the least popular aminoacid(s) {ansmin}: {valuemin} ({fractionmin:.02f}%) proteins in ({totalcount} proteins).") input_file.close()