Tuesday, November 12, 2013

Query a MySQL Database from R using RMySQL


mysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -D hg19 -N \
 -e 'select concat("curl http://genome.ucsc.edu/cgi-bin/das/hg19/dna?segment=",chrom,":",txStart-35,",",txStart-1) from knownGene where strand="+"'  |\
 sh > result.concatenated.xml


12345678910111213141516171819202122232425262728
#Install the package if you've never done so
install.packages("RMySQL")
 
#Load the package
library(RMySQL)
 
# Set up a connection to your database management system.
# I'm using the public MySQL server for the UCSC genome browser (no password)
mychannel <- dbConnect(MySQL(), user="genome", host="genome-mysql.cse.ucsc.edu")
 
# Function to make it easier to query
query <- function(...) dbGetQuery(mychannel, ...)
 
# Get the UCSC gene name, start and end sites for the first 10 genes on Chromosome 12
query("SELECT name, chrom, txStart, txEnd FROM mm9.knownGene WHERE chrom='chr12' LIMIT 10;")
 
# Results are returned as a data.frame:
# name chrom txStart txEnd
# 1 uc007mwj.2 chr12 3235525 3250374
# 2 uc007mwg.2 chr12 3235790 3239112
# 3 uc007mwh.2 chr12 3235790 3239288
# 4 uc007mwi.2 chr12 3235790 3250374
# 5 uc007mwk.1 chr12 3236610 3249997
# 6 uc011yjq.1 chr12 3237284 3241410
# 7 uc007mwl.2 chr12 3247427 3309969
# 8 uc007mwm.1 chr12 3365131 3406494
# 9 uc007mwn.1 chr12 3365131 3406494
# 10 uc007mwp.2 chr12 3403882 3426747

No comments:

Post a Comment