AdvGeneMap2018Commands
From Statistical Genetics Courses
Revision as of 17:38, 3 January 2018 by Serveradmin (Talk | contribs)
GeneABEL
plink --file GWAS_clean4 --pheno pheno.phen --pheno-name Aff --transpose --recode --out gwa_gabel --noweb plink --file GWAS_clean4 --pheno pheno.phen --pheno-name systolic --transpose --recode --out gwa_gabel_qtl --noweb R library(GenABEL) convert.snp.tped(tped = "gwa_gabel_qtl.tped", tfam = "gwa_gabel_qtl.tfam", out = "gwa_gabel_qtl.raw", strand = "u") g.dat <- load.gwaa.data(phen = "gwa_gabel_qtl.praw", gen = "gwa_gabel_qtl.raw", force = T) slotNames(g.dat) slotNames(g.dat@gtdata) colnames(g.dat@phdata) sample.size <- g.dat@gtdata@nids snps.total <- g.dat@gtdata@nsnps print(c(sample.size, snps.total)) summary(g.dat@phdata$disease) hist(g.dat@phdata$disease, main="Quantitative Phenotype data summary", xlab = "Systolic pressure", freq = F,breaks=20, col="gray") rug(g.dat@phdata$disease) test.snp <- scan.glm('disease ~ CRSNP', family = gaussian(), data = g.dat) names(test.snp) alpha <- 5e-8 test.snp$snpnames[test.snp$P1df < alpha] test.snp$P1df[test.snp$P1df < alpha] test.qt <- qtscore(disease, data = g.dat, trait = "gaussian") slotNames(test.qt) names(test.qt@results) head(results(test.qt)) test.qt@lambda descriptives.scan(test.qt) row.names(results(test.qt))[results(test.qt)$P1df < alpha] results(test.qt)$P1df[results(test.qt)$P1df < alpha] results(test.qt)$Pc1df[results(test.qt)$Pc1df < alpha] obs <- sort(results(test.qt)$P1df) ept <- ppoints(obs) plot(-log10(ept), -log10(obs), main = "GWAS QQ plot, qtl", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)") abline(0, 1, col = "red") abline(h = 8, lty = 2) plot(test.qt, col = "black") test.qt.sex <- qtscore(disease ~ sex, data = g.dat, trait = "gaussian") row.names(results(test.qt.sex))[results(test.qt)$P1df < alpha] summary(lm(disease ~ sex, data = g.dat)) convert.snp.tped(tped = "gwa_gabel.tped", tfam = "gwa_gabel.tfam", out = "gwa_gabel.raw", strand = "u") b.dat <- load.gwaa.data(phen = "gwa_gabel.praw", gen = "gwa_gabel.raw", force = T) slotNames(b.dat) slotNames(b.dat@gtdata) colnames(b.dat@phdata) b.dat@gtdata@nids case.size <- length(which(b.dat@phdata$disease == 1)) control.size <- length(which(b.dat@phdata$disease == 0)) case.size control.size snpsb.total <- b.dat@gtdata@nsnps testb.snp <- scan.glm('disease ~ CRSNP', family = binomial(), data = b.dat) names(testb.snp) alpha <- 5e-8 testb.snp$snpnames[testb.snp$P1df < alpha] testb.snp$P1df[testb.snp$P1df < alpha] testb.qt <- qtscore(disease, data = b.dat, trait = "binomial") slotNames(testb.qt) descriptives.scan(testb.qt) row.names(results(testb.qt))[results(testb.qt)$P1df < alpha] results(testb.qt)$P1df[results(testb.qt)$P1df < alpha] results(testb.qt)$Pc1df[results(testb.qt)$Pc1df < alpha] gkin <- ibs(g.dat, weight = "freq") gkin[1:10,1:10] cps.full <- cmdscale(as.dist(.5 - gkin), eig = T, k = 10) names(cps.full) cps <- cps.full$points plot(cps[,1], cps[,2], pch = g.dat@phdata$popn) legend("topright", c("TSI","MEX", "CEU"), pch = c(1,2,3)) colnames(cps)<-c('C1','C2','C3','C4','C5','C6','C7','C8','C9','C10') gpc.dat <- g.dat gpc.dat@phdata<-cbind(g.dat@phdata, cps) test.pc.a <- scan.glm('disease ~ CRSNP + C1 + C2 + C3 + C4 + C5', family=gaussian(), data = gpc.dat) test.pc.a$snpnames[test.pc.a$P1df < alpha] test.pc.a$P1df[test.pc.a$P1df < alpha] test.pc.b <- qtscore(disease ~ C1 + C2 + C3 + C4 + C5, data = gpc.dat, trait = "gaussian") test.pc.b@lambda plot(cps.full$eig[1:10]/sum(cps.full$eig), axes = F, type = "b", xlab = "Components", ylim = c(0,0.05), ylab = "Proportion of Variations", main = "MDS analysis scree plot") axis(1, 1:10) axis(2) plot(cumsum(cps.full$eig[1:10])/sum(cps.full$eig), axes = F, type = "b", ylim = c(0,0.2), xlab = "Components", ylab = "Proportion of Variations", main = "MDS analysis cumulative plot") axis(1, 1:10) axis(2) row.names(results(test.qt))[results(test.qt)$Pc1df < alpha] results(test.qt)$Pc1df[results(test.qt)$Pc1df < alpha] test.qt@lambda obs <- sort(results(test.qt)$chi2.1df) ept <- sort(qchisq(ppoints(obs), df = 1)) plot(ept, obs, main = "Genomic control (lambda = slope of the dashed line)", xlab="Expected chisq, 1df", ylab="Observed chisq, 1df") abline(0, 1, col = "red") abline(0, test.qt@lambda[1], lty = 2) median(results(test.qt)$chi2.1df)/0.456 obs <- sort(results(test.qt)$Pc1df) ept <- ppoints(obs) plot(-log10(ept), -log10(obs), main = "GWAS QQ plot adj. via Genomic Control", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)") abline(0, 1, col = "red") abline(h = 8, lty = 2) adj.gkin = gkin diag(adj.gkin) = hom(g.dat)$Var test.eg <- egscore(disease, data = g.dat, kin = adj.gkin, naxes = 2) descriptives.scan(test.eg) snp.eg <- row.names(results(test.eg))[results(test.eg)$P1df < alpha] pvalue.eg <- results(test.eg)$P1df[results(test.eg)$P1df < alpha] lambda.eg <- test.eg@lambda snp.eg pvalue.eg lambda.eg for (k in 1:10){ test.tmp <- egscore(disease, data = g.dat, kin = adj.gkin, naxes = k) print(test.tmp@lambda$estimate) } obs <- sort(results(test.eg)$Pc1df) ept <- ppoints(obs) plot(-log10(ept), -log10(obs), main = "GWAS QQ plot adj. w/ EIGENSTRAT", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)") abline(0, 1, col = "red") abline(h = 8, lty = 2) plot(test.qt, col = "black") add.plot(test.eg, col = "gray", pch = 3) legend("topright", c("Original plot","After correction w/ EIGENSTRAT"), pch = c(1,3))