import gzip

file_path = "../../term2/pr8/UP000636949.swiss.gz"

helix_set = set()
tm_set = set()
entry_id = None

with gzip.open(file_path, "rt") as f:
    for row in f:
        if row.startswith("ID "):
            entry_id = row.split()[1]

        if row.startswith("FT ") and entry_id:
            if "HELIX" in row:
                helix_set.add(entry_id)
            if "TRANSMEM" in row:
                tm_set.add(entry_id)

print("helix:", len(helix_set))
print("transmem:", len(tm_set))
print("both:", len(helix_set & tm_set))
