Learning Bioinformatics: Affymetrix microarray analysis using affy package in R

#Install affy package:
source("http://bioconductor.org/biocLite.R")
biocLite("affy")

#Load affy:
library("affy")

#Set the working directory where the CEL files from your microarray experiment are located:
setwd("/home/folder_with_your_CEL_files")
Data <- ReadAffy()

#Use rma to background correct and normalize probe levels:
eset <- rma(Data)
e <- exprs(eset)

Index1 <- which(eset$sample == 1:3)
Index2 <- which(eset$sample == 4:6)

#Calculate d value (logFC) , samples 1-3 vs samples 4-6:
d <- rowMeans(e[, Index1]) - rowMeans(e[, Index2])
a <- rowMeans(e)

#Plot MA plot:
png('MA_plot.png')
plot(a,d)
dev.off()

#Modify pData of eset
pd <- data.frame(experiment = c(1, 1, 1, 2, 2, 2), replicate = c(1, 2, 3, 1, 2, 3))
rownames(pd) <- sampleNames(eset)
pData(eset) <- pd

#Do a t-test, with no multiple correction:
source("http://bioconductor.org/biocLite.R")
biocLite("genefilter")

library("genefilter")
tt <- rowttests(e, factor(eset$experiment))

lod <- -log10(tt$p.value)

#Plot a Volcano
png('Volcano.png')
plot(d, lod, cex = 0.25, main = "Volcano plot for $t$-test")
abline(h = 2)
dev.off()

table(tt$p.value <= 0.01)
FALSE TRUE
53403 1272

#Install limma package:
source("http://bioconductor.org/biocLite.R")
biocLite("limma")

library("limma")
design <- model.matrix(~eset$experiment)
fit <- lmFit(eset, design)
ebayes <- eBayes(fit)

#Top 10 genes, multiple testing correction, Benjamini and Hochberg, generate html report:
tab <- topTable(ebayes, coef = 2, adjust.method = "BH", n = 10)
genenames <- as.character(tab$ID)

#Install annotate package:
source("http://bioconductor.org/biocLite.R")
biocLite("annotate")
library("annotate")

annotation(eset)
[1] "hgu133plus2"

#Install hgu133plus2.db package:
source("http://bioconductor.org/biocLite.R")
biocLite("hgu133plus2.db")
library("hgu133plus2.db")

map <- getAnnMap("ENTREZID", "hgu133plus2", load=TRUE, type=c("env", "db"))
Warning message:
getAnnMap: package hgu133plus2 not available, using hgu133plus2.db instead

ll <- getEG(genenames, "hgu133plus2.db")
sym <- getSYMBOL(genenames, "hgu133plus2.db")

tab <- data.frame(sym, signif(tab[, -1], 3))

tab <- data.frame(rownames(tab), tab)
colnames(tab)[1] <- c("Probe ID")

ll <- list(ll)
htmlpage(ll, filename="Multiple_testing_corrected_Top10_genes.html", title="HTML report", othernames=tab, table.head=c("Locus ID",colnames(tab)), table.center=TRUE, digits=6)

#Significant genes, multiple testing correction, Benjamini and Hochberg:
tab <- topTable(ebayes, coef = 2, adjust.method = "BH", n=1000)
tab <- subset(tab, adj.P.Val<=0.05)
genenames <- as.character(tab$ID)
sym <- getSYMBOL(genenames, "hgu133plus2.db")
ll <- getEG(genenames, "hgu133plus2.db")
tab <- data.frame(sym, signif(tab[, -1], 3))
dim(tab)

tab <- data.frame(rownames(tab), tab)
colnames(tab)[1] <- c("Probe ID")

ll <- list(ll)
htmlpage(ll, filename="Multiple_testing_corrected.html", title="HTML report", othernames=tab, table.head=c("Locus ID",colnames(tab)), table.center=TRUE, digits=6)

#Significant genes, p-value 0.01, no correction
tt <- subset(tt, p.value <= 0.01)
tt <- tt[order(tt$p.value), ]
genenames <- as.character(rownames(tt))
sym <- getSYMBOL(genenames, "hgu133plus2.db")
ll <- getEG(genenames, "hgu133plus2.db")

tt <- data.frame(ll, sym, signif(tt, 3))
tt <- data.frame(rownames(tt), tt)
colnames(tt)[1] <- c("Probe ID")
colnames(tt)[2] <- c("Locus ID")

ll <- list(ll)
htmlpage(ll, filename="No_correction.html", title="HTML report", othernames=tt, table.head=c("Locus ID",colnames(tt)), table.center=TRUE, digits=6)

#Significant genes, p-value 0.05, no correction
tt <- rowttests(e, factor(eset$experiment))
tt <- subset(tt, p.value <= 0.05)
tt <- tt[order(tt$p.value), ]
genenames <- as.character(rownames(tt))
sym <- getSYMBOL(genenames, "hgu133plus2.db")
ll <- getEG(genenames, "hgu133plus2.db")

tt <- data.frame(ll, sym, signif(tt, 3))
tt <- data.frame(rownames(tt), tt)
colnames(tt)[1] <- c("Probe ID")
colnames(tt)[2] <- c("Locus ID")

ll <- list(ll)
htmlpage(ll, filename="No_correction_pvalue_0.05.html", title="HTML report", othernames=tt, table.head=c("Locus ID",colnames(tt)), table.center=TRUE, digits=6)

#All genes, no correction
tt <- rowttests(e, factor(eset$experiment))
tt <- tt[order(tt$p.value), ]
genenames <- as.character(rownames(tt))
sym <- getSYMBOL(genenames, "hgu133plus2.db")
ll <- getEG(genenames, "hgu133plus2.db")

tt <- data.frame(ll, sym, signif(tt, 3))
tt <- data.frame(rownames(tt), tt)
colnames(tt)[1] <- c("Probe ID")
colnames(tt)[2] <- c("Locus ID")

ll <- list(ll)
htmlpage(ll, filename="No_correction_all.html", title="HTML report", othernames=tt, table.head=c("Locus ID",colnames(tt)), table.center=TRUE, digits=6)

#Significant genes, p-value 0.05, no correction, separated up- and down- regulated genes, fold change>2
tt <- rowttests(e, factor(eset$experiment))
tt <- subset(tt, p.value <= 0.05)
tt_up <- subset(tt, dm>1)
tt_down <- subset(tt, dm< -1)

tt_up <- tt_up[order(tt_up$dm, decreasing=T), ]
tt_down <- tt_down[order(tt_down$dm, decreasing=F), ]

genenames_up <- as.character(rownames(tt_up))
genenames_down <- as.character(rownames(tt_down))

sym_up <- getSYMBOL(genenames_up, "hgu133plus2.db")
ll_up <- getEG(genenames_up, "hgu133plus2.db")
sym_down <- getSYMBOL(genenames_down, "hgu133plus2.db")
ll_down <- getEG(genenames_down, "hgu133plus2.db")

tt_up <- data.frame(ll_up, sym_up, signif(tt_up, 3))
tt_up <- data.frame(rownames(tt_up), tt_up)
colnames(tt_up)[1] <- c("Probe ID")
colnames(tt_up)[2] <- c("Locus ID")

tt_down <- data.frame(ll_down, sym_down, signif(tt_down, 3))
tt_down <- data.frame(rownames(tt_down), tt_down)
colnames(tt_down)[1] <- c("Probe ID")
colnames(tt_down)[2] <- c("Locus ID")

ll_up <- list(ll_up)
htmlpage(ll_up, filename="No_correction_pvalue_0.05_upreg.html", title="HTML report", othernames=tt_up, table.head=c("Locus ID",colnames(tt_up)), table.center=TRUE, digits=6)

ll_down <- list(ll_down)
htmlpage(ll_down, filename="No_correction_pvalue_0.05_downreg.html", title="HTML report", othernames=tt_down, table.head=c("Locus ID",colnames(tt_down)), table.center=TRUE, digits=6)

#Significant genes, p-value 0.05, no correction, separated up- and down- regulated genes, fold change>1.5
tt <- rowttests(e, factor(eset$experiment))
tt <- subset(tt, p.value <= 0.05)
tt_up <- subset(tt, dm> 0.5849)
tt_down <- subset(tt, dm< -0.5849)

tt_up <- tt_up[order(tt_up$dm, decreasing=T), ]
tt_down <- tt_down[order(tt_down$dm, decreasing=F), ]

genenames_up <- as.character(rownames(tt_up))
genenames_down <- as.character(rownames(tt_down))

sym_up <- getSYMBOL(genenames_up, "hgu133plus2.db")
ll_up <- getEG(genenames_up, "hgu133plus2.db")
sym_down <- getSYMBOL(genenames_down, "hgu133plus2.db")
ll_down <- getEG(genenames_down, "hgu133plus2.db")

tt_up <- data.frame(ll_up, sym_up, signif(tt_up, 3))
tt_up <- data.frame(rownames(tt_up), tt_up)
colnames(tt_up)[1] <- c("Probe ID")
colnames(tt_up)[2] <- c("Locus ID")

tt_down <- data.frame(ll_down, sym_down, signif(tt_down, 3))
tt_down <- data.frame(rownames(tt_down), tt_down)
colnames(tt_down)[1] <- c("Probe ID")
colnames(tt_down)[2] <- c("Locus ID")

ll_up <- list(ll_up)
htmlpage(ll_up, filename="No_correction_pvalue_0.05_upreg1.5.html", title="HTML report", othernames=tt_up, table.head=c("Locus ID",colnames(tt_up)), table.center=TRUE, digits=6)

ll_down <- list(ll_down)
htmlpage(ll_down, filename="No_correction_pvalue_0.05_downreg1.5.html", title="HTML report", othernames=tt_down, table.head=c("Locus ID",colnames(tt_down)), table.center=TRUE, digits=6)

Learning Bioinformatics

Wednesday, October 16, 2013

Affymetrix microarray analysis using affy package in R

No comments:

Post a Comment