#!/bin/sh

awk '$1 == "ID" { print ""; print $2 }; $1 == "AC" { $1 = ""; print $0; }; $1 == "DR" { $1 = $2 = ""; print $0 }' swissprot_chunk.txt \
	| sed -r 's/[;.]//g; s/\<(-|(Genomic|Unassigned)_DNA|mRNA|FAINT|4)\>//g; s/ -( |$)//g' \
	| tr -d '\r' \
	| tr '\n' ':' \
	| sed -r 's/::/\n/g; s/:/ /g; s/ +/ /g; s/^ +//; s/ +$//;' \
	| sort -R \
	| head -n 10 \
	> ids_ordered.txt

for seq in `awk '{ print "sw:" $1 }' < ids_ordered.txt ` ; do
	seqret $seq -stdout -auto
done > exam/example.fasta

python -c 'import random; print "\n".join(" ".join(sorted(line.strip().split(), key=lambda v:random.random())) for line in open("ids_ordered.txt"))' \
	| sort -R \
	> exam/mapping.csv
