Учебная страница курса биоинформатики,
год поступления 2012
GO анализ
Файлы
Загрузка пакетов
source("https://bioconductor.org/biocLite.R") biocLite("goseq") library(goseq) supportedGeneIDs() head(supportedOrganisms()) #gene length for mm10 biocLite("TxDb.Mmusculus.UCSC.mm10.ensGene") library(TxDb.Mmusculus.UCSC.mm10.ensGene) biocLite("org.Mm.eg.db")
Читаем данные
#universe of genes assayed.genes=scan("assayed_genes.tab", what = character()) #DE genes de.genes=scan("de.genes.tab", what = character()) gene.vector=as.integer(assayed.genes%in%de.genes) names(gene.vector)=assayed.genes table(gene.vector)
Probability Weighting Function
pwf <- nullp(gene.vector, "mm10", "ensGene")
GO анализ
g.annot=getgo(assayed.genes, "mm10", "ensGene", fetch.cats=c("GO:CC","GO:BP","GO:MF")) # Cellular Component; Biological Process; Molecular Function GO.wall = goseq(pwf, "mm10", "ensGene", gene2cat = g.annot, test.cats="GO:BP", method = "Wallenius") enriched.GO=subset(GO.wall, p.adjust(GO.wall$over_represented_pvalue, method="BH")<0.05, select=c("category", "over_represented_pvalue", "numDEInCat", "term", "ontology"))
Визуализация
sm=enriched.GO[enriched.GO$ontology=="BP",] smm=sm[1:15,] ggplot(smm, aes(smm$term, smm$numDEInCat, fill=smm$over_represented_pvalue))+ geom_bar(stat="identity", colour="black",position="dodge")+scale_fill_gradient2(low='white', mid='yellow', high='red', space='Lab')+coord_flip()+theme(axis.title.y=element_blank())+ylab("Number of DE genes")+labs(fill="p-value")+ggtitle("GO enrichment")