Kodomo

Пользователь

Учебная страница курса биоинформатики,
год поступления 2012

GO анализ

Файлы

Manual

assayed_genes.tab

de.genes.tab

Загрузка пакетов

source("https://bioconductor.org/biocLite.R")
biocLite("goseq")
library(goseq)

supportedGeneIDs()
head(supportedOrganisms())

#gene length for mm10
biocLite("TxDb.Mmusculus.UCSC.mm10.ensGene")
library(TxDb.Mmusculus.UCSC.mm10.ensGene)
biocLite("org.Mm.eg.db")

Читаем данные

#universe of genes
assayed.genes=scan("assayed_genes.tab", what = character())
#DE genes
de.genes=scan("de.genes.tab", what = character())
gene.vector=as.integer(assayed.genes%in%de.genes)
names(gene.vector)=assayed.genes
table(gene.vector)

Probability Weighting Function

pwf <- nullp(gene.vector, "mm10", "ensGene")

GO анализ

g.annot=getgo(assayed.genes, "mm10", "ensGene", fetch.cats=c("GO:CC","GO:BP","GO:MF")) # Cellular Component; Biological Process; Molecular Function

GO.wall = goseq(pwf, "mm10", "ensGene", gene2cat = g.annot, test.cats="GO:BP", method = "Wallenius")
enriched.GO=subset(GO.wall, p.adjust(GO.wall$over_represented_pvalue, method="BH")<0.05, select=c("category", "over_represented_pvalue", "numDEInCat", "term", "ontology"))

Визуализация

sm=enriched.GO[enriched.GO$ontology=="BP",]
smm=sm[1:15,]
ggplot(smm, aes(smm$term, smm$numDEInCat, fill=smm$over_represented_pvalue))+ geom_bar(stat="identity", colour="black",position="dodge")+scale_fill_gradient2(low='white', mid='yellow', high='red', space='Lab')+coord_flip()+theme(axis.title.y=element_blank())+ylab("Number of DE genes")+labs(fill="p-value")+ggtitle("GO enrichment")