//searchtm.py #! /usr/bin/env python import gzip print('This program calculates the fraction of transmembrane proteins in a proteome.') print('Where to get information from?') input_file = gzip.open(input(), 'r') count = 0 totalcount = 0 new = True for line in input_file: line = str(line)[2:] if line.startswith('KW') and ("Transmembrane" in line) and new: print(totalcount+1, line) count += 1 new=False elif line.startswith('//'): new = True totalcount += 1 fraction = count/totalcount*100 #print(totalcount) print(f"Occurence of transmembranes: {count} in {totalcount} proteins,\nfraction: {fraction:.02f}") input_file.close() //searchen.py #! /usr/bin/env python import gzip print('This program calculates the fraction of enzyme proteins in a proteome.') print('Where to get information from?') input_file = gzip.open(input(), 'r') count = 0 totalcount = 0 new = True for line in input_file: line = str(line)[2:] if ("EC=" in line) and new: print(line) count += 1 new=False elif line.startswith('//'): new = True totalcount += 1 fraction = count/totalcount*100 #print(totalcount) print(f"Occurence of enzymes: {count} in {totalcount} proteins,\nfraction: {fraction:.02f}") input_file.close() //searchdnab.py #! /usr/bin/env python import gzip print('This program calculates the fraction of DNA-binding proteins in a proteome.') print('Where to get information from?') input_file = gzip.open(input(), 'r') count = 0 totalcount = 0 new = True for line in input_file: line = str(line)[2:] if ("DNA-binding protein" in line) and ("DE" in line) and new: #print(line) count += 1 new=False elif line.startswith('//'): #print(input_file.readline()) new = True totalcount += 1 fraction = count/totalcount*100 #print(totalcount) print(f"Occurence of DNA-binding proteins: {count} in {totalcount} proteins,\nfraction: {fraction:.02f}") input_file.close() //searchgal.py #! /usr/bin/env python import gzip print('This program calculates the fraction of galactosidases in a proteome.') print('Where to get information from?') input_file = gzip.open(input(), 'r') count = 0 totalcount = 0 new = True for line in input_file: line = str(line)[2:] if ("galactosidase" in line) and ("DE" in line) and new: #print(line) count += 1 new=False elif line.startswith('//'): #print(input_file.readline()) new = True totalcount += 1 fraction = count/totalcount*100 #print(totalcount) print(f"Occurence of galactosidase proteins: {count} in {totalcount} proteins,\nfraction: {fraction:.02f}") input_file.close() //searchhsp.py #! /usr/bin/env python import gzip print('This program calculates the fraction of heat-shock proteins in a proteome.') print('Where to get information from?') input_file = gzip.open(input(), 'r') count = 0 totalcount = 0 new = True for line in input_file: line = str(line)[2:] if ("shock" in line) and ("DE" in line) and new: if "Heat" in line: count+=1 print(line) new=False elif line.startswith('//'): #print(input_file.readline()) new = True totalcount += 1 fraction = count/totalcount*100 #print(totalcount) print(f"Occurence of heat-shock proteins: {count} in {totalcount} proteins,\nfraction: {fraction:.02f}") input_file.close() //searchamprotdev.py #! /usr/bin/env python import gzip print('This program calculates the most popular aminoacid in the entire protein.') print('Where to get information from?') input_file = gzip.open(input(), 'r') amacid = {'A':0, 'R':0, 'N':0, 'D':0, 'C':0, 'E':0, 'Q':0, 'G':0, 'H':0, 'I':0, 'L':0, 'K':0, 'M':0, 'F':0, 'P':0, 'S':0, 'T':0, 'W':0, 'Y':0, 'V':0} namacid = 0 totalcount = 0 for line in input_file: line = str(line)[2:] if line.startswith('SQ'): totalcount+=1 info = line seq = str(input_file.readline())[2:] while not (seq.startswith('//')): for i in seq: if i in amacid: amacid[i]+=1 namacid+=1 seq = str(input_file.readline())[2:] pchar = amacid['K']+amacid['R']+amacid['H']+amacid['D']+amacid['E'] punchar = amacid['S']+amacid['T']+amacid['C']+amacid['G']+amacid['N']+amacid['Y'] print(f"Ratio of polar charged versus polar uncharged amino acids: {(pchar/punchar):.02f}.") input_file.close()