GeneABEL Exercise

R:
 # Load files
 library(GenABEL)
 convert.snp.tped(tped = "gwa_gabel_qtl.tped", tfam = "gwa_gabel_qtl.tfam", out = "gwa_gabel_qtl.raw", strand = "u")
 g.dat <- load.gwaa.data(phen = "gwa_gabel_qtl.praw", gen = "gwa_gabel_qtl.raw", force = T)
 slotNames(g.dat)
 slotNames(g.dat@gtdata)
 colnames(g.dat@phdata)
 # sample size
 sample.size <- g.dat@gtdata@nids
 # number of SNPs
 snps.total <- g.dat@gtdata@nsnps
 print(c(sample.size, snps.total)) 
 # Trait
 summary(g.dat@phdata$disease)
 hist(g.dat@phdata$disease, main="Quantitative Phenotype data summary", xlab = "Systolic pressure measure", freq = F,breaks=20, col="gray")
 rug(g.dat@phdata$disease) 
 ###
 # tests for association
 ###
 # GLM test
 test.snp <- scan.glm('disease ~ CRSNP', family = gaussian(), data = g.dat)
 names(test.snp)
 alpha <- 5e-8  
 test.snp$snpnames[test.snp$P1df < alpha]
 test.snp$P1df[test.snp$P1df < alpha]
 # Score test
 test.qt <- qtscore(disease, data = g.dat, trait = "gaussian")
 slotNames(test.qt)
 names(test.qt@results)
 test.qt@lambda
 descriptives.scan(test.qt)
 rownames(results(test.qt))[results(test.qt)$P1df < alpha]
 results(test.qt)$P1df[results(test.qt)$P1df < alpha] 
 results(test.qt)$Pc1df[results(test.qt)$Pc1df < alpha]
 # QQ plot
 obs <- sort(results(test.qt)$P1df) 
 ept <- c(1:length(obs)) / (length(obs) + 1) 
 plot(-log10(ept), -log10(obs), main = "GWAS QQ plot, qtl", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")
 abline(0, 1, col = "red")
 abline(h = 8, lty = 2)
 # Manhattan plot        
 plot(test.qt, col = "black")
 # Adding confounders
 test.qt.sex <- qtscore(disease ~ sex, data = g.dat, trait = "gaussian")
 rownames(results(test.qt.sex))[results(test.qt)$P1df < alpha]
 summary(lm(disease ~ sex, data = g.dat))
 ###
 # MDS
 ###
 gkin <- ibs(g.dat, weight = "freq")
 gkin[1:10,1:10]
 cps.full <- cmdscale(as.dist(.5 - gkin), eig = T, k = 10)
 names(cps.full) 
 cps <- cps.full$points 
 plot(cps[,1], cps[,2], pch = g.dat@phdata$popn)
 legend(-0.16, 0.06, c("TSI","MEX", "CEU"), pch = c(1,2,3))  
 ###
 # Corrected test
 ###
 # Incorporating PCs as predictors
 colnames(cps)<-c('C1','C2','C3','C4','C5','C6','C7','C8','C9','C10') 
 gpc.dat <- g.dat
 gpc.dat@phdata<-cbind(g.dat@phdata, cps)
 test.pc.a <- scan.glm('disease ~ CRSNP + C1 + C2 + C3 + C4 + C5', family=gaussian(), data = gpc.dat) 
 test.pc.a$snpnames[test.pc.a$P1df < alpha]
 test.pc.a$P1df[test.pc.a$P1df < alpha]
 test.pc.b <- qtscore(disease ~  C1 + C2 + C3 + C4 + C5, data = gpc.dat, trait = "gaussian")
 test.pc.b@lambda
 # scree plot
 plot(cps.full$eig[1:10]/sum(cps.full$eig), axes = F, type = "b", xlab = "Components",  ylim = c(0,0.05), ylab = "Proportion of Variations", main = "MDS analysis scree plot") 
 axis(1, 1:10)
 axis(2)
 # cumulative plot
 plot(cumsum(cps.full$eig[1:10])/sum(cps.full$eig), axes = F, type = "b", ylim = c(0,0.2), xlab = "Components", ylab = "Proportion of Variations", main = "MDS analysis cumulative plot") 
 axis(1, 1:10)
 axis(2)
 # Genomic control
 # Uncorrected GIF
 test.qt@lambda 
 # Corrected p-value
 row.names(results(test.qt))[results(test.qt)$Pc1df < alpha]
 results(test.qt)$Pc1df[results(test.qt)$Pc1df < alpha]
 # Check for inflation of statistic 
 obs <- sort(results(test.qt)$chi2.1df)
 ept <- sort(qchisq(1:length(obs) / (length(obs) + 1), df = 1)) 
 plot(ept, obs, main = "Genomic control (slope is the inflation factor)", xlab="Expected chisq, 1df", ylab="Observed chisq, 1df")
 abline(0, 1, col = "red")
 abline(0, test.qt@lambda[1], lty = 2)
 # Definition of GIF
 # Conventional definition
 median(results(test.qt)$chi2.1df)/0.456
 # GenABEL definition
 lm(obs~ept)$coef[2]
 # QQ plot
 obs <- sort(results(test.qt)$Pc1df)
 ept <- c(1:length(obs)) / (length(obs) + 1)
 plot(-log10(ept), -log10(obs), main = "GWAS QQ plot adj. via Genomic Control", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")
 abline(0, 1, col = "red")
 abline(h = 8, lty = 2)
 # EIGENSTRAT
 adj.gkin = gkin
 diag(adj.gkin) = hom(g.dat)$Var
 # naxes = 3 is default value
 test.eg <- egscore(disease, data = g.dat, kin = adj.gkin, naxes = 2)
 descriptives.scan(test.eg)
 snp.eg <- row.names(results(test.eg))[results(test.eg)$P1df < alpha]
 pvalue.eg <- results(test.eg)$P1df[results(test.eg)$P1df < alpha]
 lambda.eg <- test.eg@lambda
 snp.eg 
 pvalue.eg
 lambda.eg
 # Change #PCs
 for (k in 1:10){ 
 test.tmp <- egscore(disease, data = g.dat, kin = adj.gkin, naxes = k)
 print(test.tmp@lambda$estimate)
 }
 # QQ plot
 obs <- sort(results(test.eg)$Pc1df)
 ept <- c(1:length(obs)) / (length(obs) + 1) 
 qqplot(-log10(ept), -log10(obs), main = "GWAS QQ plot adj. w/ EIGENSTRAT", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")
 abline(0, 1, col = "red")
 abline(h = 8, lty = 2)
 # Manhattan plot comparison
 plot(test.qt, col = "black")
 add.plot(test.eg, col = "gray", pch = 3)
 legend("topright", c("Original plot","After correction w/ EIGENSTRAT"), pch = c(1,3))  
 ###
 # Basic test, binary trait
 ###
 # load files to GenABEL
 convert.snp.tped(tped = "gwa_gabel.tped", tfam = "gwa_gabel.tfam", out = "gwa_gabel.raw", strand = "u")
 b.dat <- load.gwaa.data(phen = "gwa_gabel.praw", gen = "gwa_gabel.raw", force = T)
 slotNames(b.dat)
 slotNames(b.dat@gtdata)
 colnames(b.dat@phdata)
 # sample size
 b.dat@gtdata@nids
 # number of cases and controls
 case.size <- length(which(b.dat@phdata$disease == 1))
 control.size <- length(which(b.dat@phdata$disease == 0))
 case.size 
 control.size 
 # number of SNPs
 snpsb.total <- b.dat@gtdata@nsnps
 # GLM test
 testb.snp <- scan.glm('disease ~ CRSNP', family = binomial(), data = b.dat)
 names(testb.snp)  
 alpha <- 5e-8
 testb.snp$snpnames[testb.snp$P1df < alpha]
 testb.snp$P1df[testb.snp$P1df < alpha]
 # Score test
 testb.qt <- qtscore(disease, data = b.dat, trait = "binomial")
 slotNames(testb.qt)
 descriptives.scan(testb.qt)
 row.names(results(testb.qt))[results(testb.qt)$P1df < alpha]
 results(testb.qt)$P1df[results(testb.qt)$P1df < alpha] 
 results(testb.qt)$Pc1df[results(testb.qt)$Pc1df < alpha]
Personal tools

Search

Namespaces

Views

Actions

Widgets

Widgets

Recent changes

Wanted pages

Who is online?

Tools

GeneABEL Exercise

From Statistical Genetics Courses

GeneABEL Exercise

Navigation menu

Short Courses

Software

Course Materials