# make sure you have these packages installed
# -------------------------------------------
source("http://bioconductor.org/biocLite.R")
biocLite(c("minfi","IlluminaHumanMethylation450kmanifest",
"charm","GenomicRanges"))
library(minfi)
library(charm)
library(GenomicRanges)
# preprocess 450k array data
# --------------------------
f <- read.450k.sheet(".", "450k_SampleSheet")
raw <- read.450k.exp(targets = f, verbose=TRUE)
ppi <- preprocessIllumina(raw, bg.correct = TRUE, normalize = "controls")
b <- getBeta(ppi)
colnames(b) <- f$Sample_Name # give descriptive column names
# read probe annotation (for chr22 probes)
anno <- read.csv("450_probes_subset.csv", stringsAsFactors=FALSE, header=TRUE)
probeids <- intersect(rownames(b), anno$Name)
m <- match(probeids, anno$Name)
anno <- anno[m, ]
m <- match(probeids, rownames(b))
b <- b[m, ]
k <- !is.na(anno$MAPINFO) & rowSums( is.na(b) )==0
anno <- anno[k,]
b <- b[k,]
gr450k <- GRanges(seqnames=paste("chr",anno$CHR,sep=""),
ranges=IRanges(start=anno$MAPINFO,width=1))
# load RRBS data
# --------------
rf <- dir(,".bedRrbs")
names(rf) <- gsub("_chr22.bedRrbs","",rf)
rd <- lapply(rf, function(u) {
d <- read.table(u, header = FALSE, stringsAsFactors = FALSE)
d[,c(1:3,5:6,11)]
})
# have a look at how RRBS data comes
lapply(rd,head)
# function to convert to methylation levels
mapNeg2PosRRBS <- function(p) {
xs <- split(p, p$V6)
pos <- xs[["+"]]
neg <- xs[["-"]]
key <- paste(pos$V1, pos$V3-1, sep = ".")
keyn <- paste(neg$V1, neg$V2, sep = ".")
# finding the pairs
m.pos <- match(key, keyn)
n <- is.na(m.pos)
# positive strand
x <- data.frame(chr=pos$V1, position=pos$V2+1, n=pos$V5,
nC=round( pos$V5*pos$V11/100 ))
# negative strand that match with positive strand
mn <- m.pos[!n]
x$n[!n] <- x$n[!n] + neg$V5[mn]
x$nC[!n] <- x$nC[!n] + round( neg$V5[mn]*neg$V11[mn]/100 )
# negative strand only
m.neg <- match(keyn, key)
nas.neg <- is.na(m.neg)
y <- data.frame(chr=neg$V1, position=neg$V2, n=neg$V5,
nC=round( neg$V5*neg$V11/100 ))[nas.neg, ]
res <- rbind(x,y)
GRanges(seqnames=res$chr, IRanges(start=res$position, width=1),
n=res$n, nC=res$nC)
}
rdc <- lapply(rd, mapNeg2PosRRBS)
# look at converted data
rdc
names(rdc)
colnames(b)
# do spot checks: match up RRBS and 450k readout for same CpG sites
v <- values(rdc[[1]])
meth <- v$nC / v$n
fo <- findOverlaps(gr450k, rdc[[1]])
plot(b[fo@queryHits,1],meth[fo@subjectHits],xlab="450k", ylab="RRBS")
# make design matrices for DMR finding
grp <- gsub("_[1-3]","",colnames(b))
d1 <- model.matrix(~grp)
d0 <- d1[,1,drop=FALSE]
b1 <- b*.98+.01
range(b)
range(b1)
o <- order(anno$CHR, anno$MAPINFO)
# find differentially methylated regions
pns <- clusterMaker(anno$CHR, anno$MAPINFO, maxGap=500)
dmrs <- dmrFind(p=b1[o,], mod=d1, mod0=d0, coef=2, pns=pns[o],
chr=anno$CHR[o], pos=anno$MAPINFO[o], svs=0,
use.limma=TRUE, use="swald", Q=0.97)
# Exercise 1. Make histograms of "beta" values (3 LNCaP replicates,
# 3 PrEC replicates). Are there (global) differences between cancer and
# normal? What if you restrict to probes only in CpG islands?
# (You can get this information for chr22 in the annotation file).
# Exercise 2. Does the concordance between 450k and RRBS improve
# when you require a higher depth in the RRBS data?
# Exercise 3. For the DMRs found using 450k array data, make a plot
# of the RRBS data in the same regions to verify that they are
# calling the same diff. methylated regions.
Monday, November 18, 2013
DNA methylation R exercises - FGCZ
http://www.fgcz.ch/education/StatMethodsExpression/05_DNAme_R_exercises.txt
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment