Wednesday, October 16, 2013

Affymetrix microarray analysis using affy package in R

#Install affy package: 
source("http://bioconductor.org/biocLite.R") 
biocLite("affy")

#Load affy:
library("affy")

#Set the working directory where the CEL files from your microarray experiment are located:
setwd("/home/folder_with_your_CEL_files")
Data <- ReadAffy()

#Use rma to background correct and normalize probe levels:
eset <- rma(Data)
e <- exprs(eset)

Index1 <- which(eset$sample == 1:3)

Index2 <- which(eset$sample == 4:6)

#Calculate d value (logFC) , samples 1-3 vs samples 4-6:
d <- rowMeans(e[, Index1]) - rowMeans(e[, Index2])
a <- rowMeans(e) 

#Plot MA plot:
png('MA_plot.png')
plot(a,d)
dev.off()


#Modify pData of eset
pd <- data.frame(experiment = c(1, 1, 1, 2, 2, 2), replicate = c(1, 2, 3, 1, 2, 3))
rownames(pd) <- sampleNames(eset)

pData(eset) <- pd

#Do a t-test, with no multiple correction:
source("http://bioconductor.org/biocLite.R") 
biocLite("genefilter")

library("genefilter")
tt <- rowttests(e, factor(eset$experiment))

lod <- -log10(tt$p.value)

#Plot a Volcano 
png('Volcano.png')
plot(d, lod, cex = 0.25, main = "Volcano plot for $t$-test")
abline(h = 2)
dev.off() 

  

table(tt$p.value <= 0.01)
FALSE  TRUE
53403  1272 


#Install limma package: 
source("http://bioconductor.org/biocLite.R") 
biocLite("limma")

library("limma")
design <- model.matrix(~eset$experiment)

fit <- lmFit(eset, design)
ebayes <- eBayes(fit)

#Top 10 genes, multiple testing correction, Benjamini and Hochberg, generate html report:
tab <- topTable(ebayes, coef = 2, adjust.method = "BH", n = 10)
genenames <- as.character(tab$ID)

#Install annotate package: 
source("http://bioconductor.org/biocLite.R") 
biocLite("annotate") 
library("annotate") 

annotation(eset)
[1] "hgu133plus2"
 

#Install hgu133plus2.db package: 
source("http://bioconductor.org/biocLite.R") 
biocLite("hgu133plus2.db") 
library("hgu133plus2.db")

map <- getAnnMap("ENTREZID", "hgu133plus2", load=TRUE, type=c("env", "db"))
Warning message:
getAnnMap: package hgu133plus2 not available, using hgu133plus2.db instead 


ll <- getEG(genenames, "hgu133plus2.db")
sym <- getSYMBOL(genenames, "hgu133plus2.db")

tab <- data.frame(sym, signif(tab[, -1], 3))

tab <- data.frame(rownames(tab), tab) 
colnames(tab)[1] <- c("Probe ID") 

ll <- list(ll)
htmlpage(ll, filename="Multiple_testing_corrected_Top10_genes.html", title="HTML report", othernames=tab, table.head=c("Locus ID",colnames(tab)), table.center=TRUE, digits=6)

#Significant genes, multiple testing correction, Benjamini and Hochberg:
tab <- topTable(ebayes, coef = 2, adjust.method = "BH", n=1000)
tab <- subset(tab, adj.P.Val<=0.05)
 
genenames <- as.character(tab$ID)
sym <- getSYMBOL(genenames, "hgu133plus2.db") 
ll <- getEG(genenames, "hgu133plus2.db")
tab <- data.frame(sym, signif(tab[, -1], 3))
dim(tab) 


tab <- data.frame(rownames(tab), tab) 
colnames(tab)[1] <- c("Probe ID") 

ll <- list(ll)
htmlpage(ll, filename="Multiple_testing_corrected.html", title="HTML report", othernames=tab, table.head=c("Locus ID",colnames(tab)), table.center=TRUE, digits=6)
 

#Significant genes, p-value 0.01, no correction
tt <- subset(tt, p.value <= 0.01)
tt <- tt[order(tt$p.value), ]
genenames <- as.character(rownames(tt))
sym <- getSYMBOL(genenames, "hgu133plus2.db")

ll <- getEG(genenames, "hgu133plus2.db")

tt <- data.frame(ll, sym, signif(tt, 3))
tt <- data.frame(rownames(tt), tt)
colnames(tt)[1] <- c("Probe ID") 
colnames(tt)[2] <- c("Locus ID")

ll <- list(ll)
htmlpage(ll, filename="No_correction.html", title="HTML report", othernames=tt, table.head=c("Locus ID",colnames(tt)), table.center=TRUE, digits=6)

#Significant genes, p-value 0.05, no correction
tt <- rowttests(e, factor(eset$experiment)) 
tt <- subset(tt, p.value <= 0.05)
tt <- tt[order(tt$p.value), ]
genenames <- as.character(rownames(tt))
sym <- getSYMBOL(genenames, "hgu133plus2.db")

ll <- getEG(genenames, "hgu133plus2.db")

tt <- data.frame(ll, sym, signif(tt, 3))
tt <- data.frame(rownames(tt), tt)
colnames(tt)[1] <- c("Probe ID") 
colnames(tt)[2] <- c("Locus ID")

ll <- list(ll)
htmlpage(ll, filename="No_correction_pvalue_0.05.html", title="HTML report", othernames=tt, table.head=c("Locus ID",colnames(tt)), table.center=TRUE, digits=6)

#All genes, no correction
tt <- rowttests(e, factor(eset$experiment)) 
tt <- tt[order(tt$p.value), ]
genenames <- as.character(rownames(tt))
sym <- getSYMBOL(genenames, "hgu133plus2.db")

ll <- getEG(genenames, "hgu133plus2.db")

tt <- data.frame(ll, sym, signif(tt, 3))
tt <- data.frame(rownames(tt), tt)
colnames(tt)[1] <- c("Probe ID") 
colnames(tt)[2] <- c("Locus ID")

ll <- list(ll)
htmlpage(ll, filename="No_correction_all.html", title="HTML report", othernames=tt, table.head=c("Locus ID",colnames(tt)), table.center=TRUE, digits=6)

#Significant genes, p-value 0.05, no correction, separated up- and down- regulated genes, fold change>2
tt <- rowttests(e, factor(eset$experiment)) 
tt <- subset(tt, p.value <= 0.05)
tt_up <- subset(tt, dm>1)
tt_down <- subset(tt, dm< -1)

tt_up <- tt_up[order(tt_up$dmdecreasing=T), ]
tt_down <- tt_down[order(tt_down$dm, decreasing=F), ] 

genenames_up <- as.character(rownames(tt_up))
genenames_down <- as.character(rownames(tt_down))

sym_up <- getSYMBOL(genenames_up, "hgu133plus2.db")
ll_up <- getEG(genenames_up, "hgu133plus2.db")
sym_down <- getSYMBOL(genenames_down, "hgu133plus2.db")
ll_down <- getEG(genenames_down, "hgu133plus2.db")

tt_up <- data.frame(ll_up, sym_up, signif(tt_up, 3))
tt_up <- data.frame(rownames(tt_up), tt_up)
colnames(tt_up)[1] <- c("Probe ID") 
colnames(tt_up)[2] <- c("Locus ID")


tt_down <- data.frame(ll_down, sym_down, signif(tt_down, 3))
tt_down <- data.frame(rownames(tt_down), tt_down)
colnames(tt_down)[1] <- c("Probe ID") 
colnames(tt_down)[2] <- c("Locus ID")

ll_up <- list(ll_up)
htmlpage(ll_up, filename="No_correction_pvalue_0.05_upreg.html", title="HTML report", othernames=tt_up, table.head=c("Locus ID",colnames(tt_up)), table.center=TRUE, digits=6)

ll_down <- list(ll_down)
htmlpage(ll_down, filename="No_correction_pvalue_0.05_downreg.html", title="HTML report", othernames=tt_down, table.head=c("Locus ID",colnames(tt_down)), table.center=TRUE, digits=6)


#Significant genes, p-value 0.05, no correction, separated up- and down- regulated genes, fold change>1.5
tt <- rowttests(e, factor(eset$experiment)) 
tt <- subset(tt, p.value <= 0.05)
tt_up <- subset(tt, dm> 0.5849)
tt_down <- subset(tt, dm< -0.5849)

tt_up <- tt_up[order(tt_up$dm, decreasing=T), ]
tt_down <- tt_down[order(tt_down$dm, decreasing=F), ] 

genenames_up <- as.character(rownames(tt_up))
genenames_down <- as.character(rownames(tt_down))

sym_up <- getSYMBOL(genenames_up, "hgu133plus2.db")
ll_up <- getEG(genenames_up, "hgu133plus2.db")
sym_down <- getSYMBOL(genenames_down, "hgu133plus2.db")
ll_down <- getEG(genenames_down, "hgu133plus2.db")

tt_up <- data.frame(ll_up, sym_up, signif(tt_up, 3))
tt_up <- data.frame(rownames(tt_up), tt_up)
colnames(tt_up)[1] <- c("Probe ID") 
colnames(tt_up)[2] <- c("Locus ID")

tt_down <- data.frame(ll_down, sym_down, signif(tt_down, 3))
tt_down <- data.frame(rownames(tt_down), tt_down)
colnames(tt_down)[1] <- c("Probe ID") 
colnames(tt_down)[2] <- c("Locus ID")

ll_up <- list(ll_up)
htmlpage(ll_up, filename="No_correction_pvalue_0.05_upreg1.5.html", title="HTML report", othernames=tt_up, table.head=c("Locus ID",colnames(tt_up)), table.center=TRUE, digits=6)

ll_down <- list(ll_down)
htmlpage(ll_down, filename="No_correction_pvalue_0.05_downreg1.5.html", title="HTML report", othernames=tt_down, table.head=c("Locus ID",colnames(tt_down)), table.center=TRUE, digits=6)

Tuesday, October 15, 2013

DNA Methylation with Lumi

  • source("http://bioconductor.org/biocLite.R")
    biocLite("lumi")
    http://www.bioconductor.org/packages/2.12/bioc/vignettes/lumi/inst/doc/methylationAnalysis.R
  • library(lumi)
  • sampleInfo <- read.table("../../../barcode.txt",sep="\t",header=T)
  • example.lumiMethy <- importMethyIDAT(sampleInfo, dataPath=getwd(),lib=NULL)
  • ### R code from vignette source 'vignettes/lumi/inst/doc/methylationAnalysis.Rnw'
    
    ###################################################
    ### code chunk number 1: load library
    ###################################################
    library(lumi)
    
    
    ###################################################
    ### code chunk number 2: load example dataset
    ###################################################
    ## load example data (a methyLumiM object)
    #data(example.lumiMethy)
    ## summary of the example data
    #example.lumiMethy
    ## print sample Names
    sampleNames(example.lumiMethy)
    
    
    ###################################################
    ### code chunk number 3: sampleRelation
    ###################################################
    plotSampleRelation(example.lumiMethy, method='mds', cv.Th=0) 
    
    
    ###################################################
    ### code chunk number 4: sampleRelationTree
    ###################################################
    plotSampleRelation(example.lumiMethy, method='cluster', cv.Th=0) 
    
    
    ###################################################
    ### code chunk number 5: load example titration dataset
    ###################################################
    ## load the tritration data (a methyLumiM object)
    #data(example.methyTitration)
    ## summary of the example data
    #example.methyTitration
    ## print sample Names
    #sampleNames(example.methyTitration)
    
    
    ###################################################
    ### code chunk number 6: sampleRelationTitration
    ###################################################
    plotSampleRelation(example.methyTitration, method='mds', cv.Th=0) 
    
    
    ###################################################
    ### code chunk number 7: densityMTitration
    ###################################################
    ## plot the density
    density(example.methyTitration, xlab="M-value") 
    
    
    ###################################################
    ### code chunk number 8: densityM
    ###################################################
    ## specify the colors of control and treatment samples
    sampleColor <- rep(1, ncol(example.lumiMethy))
    sampleColor[grep("Treat", sampleNames(example.lumiMethy))] <- 2 
    
    density(example.lumiMethy, col=sampleColor, xlab="M-value") ## plot the density
    
    
    ###################################################
    ### code chunk number 9: boxplotM
    ###################################################
    ## Because the distribution of M-value has two modes, we use a boxplot different from regular ones
    boxplot(example.lumiMethy)
    
    
    ###################################################
    ### code chunk number 10: densityColorBiasBoth
    ###################################################
    plotColorBias1D(example.lumiMethy)
    
    
    ###################################################
    ### code chunk number 11: densityColorBiasMethy
    ###################################################
    plotColorBias1D(example.lumiMethy, channel='methy')
    
    
    ###################################################
    ### code chunk number 12: densityColorBiasUnmethy
    ###################################################
    plotColorBias1D(example.lumiMethy, channel='unmethy')
    
    
    ###################################################
    ### code chunk number 13: boxplotColorBiasMethy
    ###################################################
    boxplotColorBias(example.lumiMethy, channel='methy')
    
    
    ###################################################
    ### code chunk number 14: boxplotColorBiasUnmethy
    ###################################################
    boxplotColorBias(example.lumiMethy, channel='unmethy')
    
    
    ###################################################
    ### code chunk number 15: color balance summary
    ###################################################
    ## summary of color balance information of individual samples
    colorBiasSummary(example.lumiMethy[,1:8], channel='methy')
    
    
    ###################################################
    ### code chunk number 16: scatterColorBias1
    ###################################################
    plotColorBias2D(example.lumiMethy, selSample=1, cex=2)
    
    
    ###################################################
    ### code chunk number 17: boxplotColorBiasSum
    ###################################################
    boxplotColorBias(example.lumiMethy, channel='sum')
    
    
    ###################################################
    ### code chunk number 18: densityColorBiasSum
    ###################################################
    plotColorBias1D(example.lumiMethy, channel='sum')
    
    
    ###################################################
    ### code chunk number 19: densityIntensity
    ###################################################
    density(estimateIntensity(example.lumiMethy), xlab="log2(CpG-site Intensity)")
    
    
    ###################################################
    ### code chunk number 20: boxplotIntensity
    ###################################################
    boxplot(estimateIntensity(example.lumiMethy))
    
    
    ###################################################
    ### code chunk number 21: pairsColor
    ###################################################
    ## get the color channel information
    colorChannel <- as.character(pData(featureData(example.lumiMethy))[, "COLOR_CHANNEL"])
    ## replace the "Red" and "Grn" as color names defined in R
    colorChannel[colorChannel == 'Red'] <- 'red'
    colorChannel[colorChannel == 'Grn'] <- 'green'
    ## select a subet of sample for pair plot
    selSample <- c( "Ctrl1", "Ctrl1.rep", "Treat1", "Treat1.rep")
    ## plot pair plot with the dots in scatter plot colored based on the color channels
    pairs(estimateIntensity(example.lumiMethy[, selSample]), dotColor= colorChannel, main="Pair plot of CpG-site Intensity")
    
    
    ###################################################
    ### code chunk number 22: color balance adjustment
    ###################################################
    ## summary of color balance information of individual samples
    lumiMethy.c.adj <- lumiMethyC(example.lumiMethy)
    
    
    ###################################################
    ### code chunk number 23: densityColorBiasSumAdj
    ###################################################
    plotColorBias1D(lumiMethy.c.adj, channel='sum')
    
    
    ###################################################
    ### code chunk number 24: boxplotColorBiasSumAdj
    ###################################################
    boxplotColorBias(lumiMethy.c.adj, channel='sum')
    
    
    ###################################################
    ### code chunk number 25: scatterColorBias1Adj
    ###################################################
    ## plot the color balance adjusted scatter plot of two color channels
    plotColorBias2D(lumiMethy.c.adj, selSample=1, cex=2)
    
    
    ###################################################
    ### code chunk number 26: pairsColorAdj
    ###################################################
    ## plot pairwise plot after color balance adjustment 
    pairs(estimateIntensity(lumiMethy.c.adj[, selSample]), dotColor= colorChannel, main="Pair plot of CpG-site Intensity")
    
    
    ###################################################
    ### code chunk number 27: background adjustment
    ###################################################
    ##separately adjust backgrounds of two color channels
    lumiMethy.b.adj <- lumiMethyB(example.lumiMethy, method="bgAdjust2C", separateColor=TRUE)
    
    ##background adjustment of individual samples
    lumiMethy.bc.adj <- lumiMethyB(lumiMethy.c.adj, method="bgAdjust2C")
    
    
    ###################################################
    ### code chunk number 28: bgDensityMethy
    ###################################################
    ## plot the background mode of methylated probe data of first five example samples
    plotColorBias1D(example.lumiMethy[,1:5], channel='methy', xlim=c(-1000,5000), logMode=FALSE)
    
    
    ###################################################
    ### code chunk number 29: bgAdjDensityMethy
    ###################################################
    ## plot the background mode of methylated probe data of first five example samples
    plotColorBias1D(lumiMethy.b.adj [,1:5], channel='methy', xlim=c(-1000,5000), logMode=FALSE)
    
    
    ###################################################
    ### code chunk number 30: bcAdjDensityMethy
    ###################################################
    ## plot the background mode of methylated probe data of first five example samples
    plotColorBias1D(lumiMethy.bc.adj [,1:5], channel='methy', xlim=c(-1000,5000), logMode=FALSE)
    
    
    ###################################################
    ### code chunk number 31: Normalization
    ###################################################
    ## Perform SSN normalization based on color balance adjusted data
    lumiMethy.c.ssn <- lumiMethyN(lumiMethy.c.adj, method='ssn')
    ## Perform quantile normalization based on color balance adjusted data
    lumiMethy.c.quantile <- lumiMethyN(lumiMethy.c.adj, method='quantile')
    
    
    ###################################################
    ### code chunk number 32: sampleRelationTreeNormalized
    ###################################################
    plotSampleRelation(lumiMethy.c.ssn, method='cluster', cv.Th=0) 
    
    
    ###################################################
    ### code chunk number 33: densitySSN
    ###################################################
    ## plot the density of M-values after SSN normalization
    density(lumiMethy.c.ssn, col= sampleColor, main="Density plot after SSN normalization")
    
    
    ###################################################
    ### code chunk number 34: densityQuantile
    ###################################################
    ## plot the density of M-values after quantile normalization
    density(lumiMethy.c.quantile, col= sampleColor, main="Density plot after quantile normalization")
    
    
    ###################################################
    ### code chunk number 35: densityIntensityNormalizedSSN
    ###################################################
    density(estimateIntensity(lumiMethy.c.ssn), col= sampleColor,  xlab="log2(CpG-site Intensity)")
    
    
    ###################################################
    ### code chunk number 36: densityIntensityNormalizedQ
    ###################################################
    density(estimateIntensity(lumiMethy.c.quantile), col= sampleColor,  xlab="log2(CpG-site Intensity)")
    
    
    ###################################################
    ### code chunk number 37: boxplotColorBiasNormalized
    ###################################################
    boxplotColorBias(lumiMethy.c.quantile, channel='sum')
    
    
    ###################################################
    ### code chunk number 38: pairMNormalize
    ###################################################
    ## select a subet of sample for pair plot
    selSample <- c( "Ctrl1", "Ctrl1.rep", "Treat1", "Treat1.rep")
    ## plot pair plot with the dots in scatter plot colored based on the color channels
    pairs(lumiMethy.c.quantile[, selSample], dotColor= colorChannel, main="Pair plot of M-value after normalization")
    
    
    ###################################################
    ### code chunk number 39: gammaFit
    ###################################################
    ## Fit the two component gamma mixture model of the first sample of example.lumiMethy
    fittedGamma <- gammaFitEM(exprs(example.lumiMethy)[,1], plotMode=TRUE)
    
    
    ###################################################
    ### code chunk number 40: estimate methylation status based on M-value
    ###################################################
    ## estimate the methylation status based on the results of gammaFitEM
    methyCall <- methylationCall(fittedGamma)
    table(methyCall)
    
    
    ###################################################
    ### code chunk number 41: estimate methylation status  of a LumiMethyM object
    ###################################################
    ## estimate the methylation status of a LumiMethyM object
    methyCall <- lumiMethyStatus(example.lumiMethy[,1:4])
    head(methyCall)
    
    ## retrieve the methylation probability matrix
    methyProb <- attr(methyCall, "probability")
    head(methyProb)
    
    
    ###################################################
    ### code chunk number 42: user defined preprocessing functions (eval = FALSE)
    ###################################################
    ## ## suppose "userB" is a user defined background adjustment method
    ##  lumiMethy.b.adj <- lumiMethyB(example.lumiMethy, method=userB, separateColor=TRUE) # not Run
    ## 
    ## ## suppose "userC" is a user defined color balance adjustment method
    ##  lumiMethy.c.adj <- lumiMethyC(example.lumiMethy, method=userC, separateColor=TRUE) # not Run
    ## 
    ## ## suppose "userN" is a user defined probe level normalization method
    ##  lumiMethy.c.n <- lumiMethyN(lumiMethy.c.adj, method= userN, separateColor=TRUE) # not Run
    
    
    ###################################################
    ### code chunk number 43: Color channel information
    ###################################################
    ## retrieve the featureData information
    ff <- pData(featureData(example.lumiMethy))
    
    ## show the color channel information
    head(ff)
    
    ## add user provided color channel information if it is not existed in the featureData
    # example.lumiMethy <- addAnnotationInfo(example.lumiMethy, lib="IlluminaHumanMethylation27k.db")
    
    
    ###################################################
    ### code chunk number 44: options to separately process each color channel (eval = FALSE)
    ###################################################
    ## ## suppose "userB" is a user defined background adjustment method
    ##  lumiMethy.b.adj <- lumiMethyB(example.lumiMethy,  separateColor=TRUE) 
    ## 
    ## ## suppose "userC" is a user defined color balance adjustment method
    ##  lumiMethy.c.adj <- lumiMethyC(example.lumiMethy,  separateColor=TRUE) 
    ## 
    ## ## suppose "userN" is a user defined probe level normalization method
    ##  lumiMethy.c.n <- lumiMethyN(lumiMethy.c.adj,  separateColor=TRUE) 
    
    
    ###################################################
    ### code chunk number 45: Estimate detection call of a CpG site (eval = FALSE)
    ###################################################
    ## ## Estimate the detection call of a CpG site
    ## presentCount <- detectionCall(example.lumiMethy)
    
    
    ###################################################
    ### code chunk number 46: Preprocessing methylation microarray (eval = FALSE)
    ###################################################
    ## 
    ## library(lumi)
    ## ## specify the file name
    ## # fileName <- 'Example_Illumina_Methylation_profile.txt' 
    ## ## load the data
    ## # example.lumiMethy <- lumiMethyR(fileName, lib="IlluminaHumanMethylation27k.db")  # Not Run
    ## 
    ## ## Quality and color balance assessment
    ## data(example.lumiMethy)
    ## ## summary of the example data
    ## example.lumiMethy
    ## 
    ## ## preprocessing and quality control after normalization
    ## plotColorBias1D(example.lumiMethy, channel='sum')
    ## boxplotColorBias(example.lumiMethy, channel='sum')
    ## ## select interested sample to further check color balance in 2D scatter plot
    ## plotColorBias2D(example.lumiMethy, selSample=1)
    ## 
    ## ## Color balance adjustment between two color channels
    ## lumiMethy.c.adj <- lumiMethyC(example.lumiMethy)
    ## ## Check color balance after color balance adjustment
    ## boxplotColorBias(lumiMethy.c.adj, channel='sum')
    ## 
    ## ## Background adjustment is skipped because the SSN normalization includes background adjustment
    ## 
    ## ## Normalization
    ## ## Perform SSN normalization based on color balance adjusted data
    ## lumiMethy.c.ssn <- lumiMethyN(lumiMethy.c.adj, method='ssn')
    ## 
    ## ## Or we can perform quantile normalization based on color balance adjusted data
    ## # lumiMethy.c.q <- lumiMethyN(lumiMethy.c.adj, method='quantile')
    ## 
    ## ## plot the density of M-values after SSN normalization
    ## density(lumiMethy.c.ssn, main="Density plot of M-value after SSN normalization")
    ## ## comparing with the density of M-values before normalization
    ## density(example.lumiMethy, main="Density plot of M-value of the raw data")
    ## 
    ## ## output the normlized M-value as a Tab-separated text file
    myAnnot<- data.frame(
    SYMBOL=sapply(contents(IlluminaHumanMethylation450kSYMBOL), paste, collapse=", ")
    ) write.exprs(lumiMethy.c.ssn, file='processedMethylationExampleData.txt') exp<- read.table('processedMethylationExampleData.txt',header=T,sep="\t") colnames(exp)[1]<- "Probe_ID" exp$symbol <- myAnnot[exp$Probe_ID,] exp <- exp[, c(length(colnames(exp)),1:(length(colnames(exp))-1))] write.table(exp, file="DM_Mvalue.txt", col.names=T, row.names=F, sep="\t", quote=F)
    ###################################################
    ### code chunk number 47: sessionInfo
    ###################################################
    toLatex(sessionInfo())
    
    

DNA Methylation (IlluminaHumanMethylation450k) analysis



# 1: Reading Raw Files (.idat)
  1. Create Target/csv/sample file
    • TCGA (not a sample.txt or csv file)(https://sagebionetworks.jira.com/wiki/pages/viewpage.action?pageId=11206919&src=search)
      • Download the data, Use .sdrf file provided with the downloaded data to make the targets file that facilitates data reading using minfi functions into R;
      • For target file, the only important column is Basename that indicates where the files are for every patients if _Grn.idat or _Red.idat can be attached (find creat_target.pl)
    • GEO
      • ...
  2. Read .idat files

    • library(minfi)
    • targets <- read.table("target.txt", header=T, sep="\t")
    • RGSet<-read.450k.exp(targets=targets)
      • #Get the methylation matrix that will allow to extract methylated and unmethylated probes:
    • Mset.raw<-preprocessRaw(RGSet) 
      • #this doesn't apply any normalization although the package has a few methods available including the methods implemented in GenomeStudio
# 2: DNA methylation data normalization
  • Easy way:
    • m<-getMeth(Mset.raw)
    • u<-getUnmeth(Mset.raw)
    • mval<-log2((m+0.01)/(u+0.01)) #alpha=0.01
    • x<-apply(mval,2,function(x) which(is.na(x)))
    • beta_val<-m/(m+u) #alpha=0
  • Complicated way (minfi + shinyMethyl)
    • require(minfi)
    • require(minfiData)
    • phenoData <- pData(RGSet)
    • phenoData[,1:6]
    • manifest <- getManifest(RGSet)
    • manifest
    • head(getProbeInfo(manifest))
    • typeIProbes <- getProbeInfo(manifest, type = "I")$Name
    • head(typeIProbes)
    • snpProbesI <- getProbeInfo(manifest, type = "SnpI")$Name
    • snpProbesII <- getProbeInfo(manifest, type = "SnpI")$Name
    • head(snpProbesI)
    • Quality Control
      • detP <- detectionP(RGSet)
      • failed <- detP > 0.01
      • head(failed, n=3)
      • colMeans(failed)
      • sum(rowMeans(failed)>0.5)
      • MSet <- preprocessRaw(RGSet)
      • MSet
      • head(getMeth(MSet)[,1:3])
      • head(getUnmeth(MSet)[,1:3])
      • qc <- getQC(MSet)
      • head(qc)
      • plotQC(qc)
      • densityPlot(RGSet, sampGroups = phenoData$Sample_Group, main= "Beta", xlab = "Beta")
      • densityBeanPlot(RGSet, sampGroups = phenoData$Sample_Group)
      • controlStripPlot(RGSet, controls="BISULFITE CONVERSION II")
      • qcReport(RGSet, pdf= "qcReport.pdf")
    • Starting with ShinyMethyl
      • source("E:/AML/shinyMethyl-master/R/extractFromRGSet450k.R")
      • extractedDataMinfiLab <- extractFromRGSet450k(RGSet,file = "extractedDataMinfiLab.Rda")
      • source("/data/minfiLab/myRunApp.R")
      • myRunApp(appDir = "/data/minfiLab/shinyMethyl")
    • Proprocessing and normalization
      • MSet.illumina <- preprocessIllumina(RGSet, bg.correct = TRUE,normalize = "controls")
      • MSet.swan <- preprocessSWAN(RGSet)
    • MethylSet and RatioSet
      • getBeta(MSet, type = "Illumina")[1:4,1:3]
      • getM(MSet)[1:4,1:3]
      • ratioSet <- ratioConvert(MSet, what = "both", keepCN = TRUE)
      • ratioSet
    • Map to Genome
      • gRatioSet <- mapToGenome(ratioSet, genomeBuild = "hg19", mergeManifest = TRUE)
      • gRatioSet
      • showClass("GenomicRatioSet")
      • getBeta(gRatioSet)
      • getM(gRatioSet)
      • getCN(gRatioSet)
      • sampleNames <- sampleNames(gRatioSet)
      • probeNames <- featureNames(gRatioSet)
      • pheno <- pData(gRatioSet)
      • gRanges <- rowData(gRatioSet)
      • head(gRanges, n= 3)