Difference between revisions of "AdvGeneMap2018Commands"

From Statistical Genetics Courses

Jump to: navigation, search
(Created page with "==GeneABEL== plink --file GWAS_clean4 --pheno pheno.phen --pheno-name Aff --transpose --recode --out gwa_gabel --noweb<br /> plink --file GWAS_clean4 --pheno pheno.phen --phe...")
 
Line 1: Line 1:
 
==GeneABEL==
 
==GeneABEL==
 
 
plink --file GWAS_clean4 --pheno pheno.phen --pheno-name Aff --transpose --recode --out gwa_gabel --noweb<br /> plink --file GWAS_clean4 --pheno pheno.phen --pheno-name systolic --transpose --recode --out gwa_gabel_qtl --noweb<br /> R<br /> library(GenABEL)<br /> convert.snp.tped(tped = "gwa_gabel_qtl.tped", tfam = "gwa_gabel_qtl.tfam", out = "gwa_gabel_qtl.raw", strand = "u")<br /> g.dat &lt;- load.gwaa.data(phen = "gwa_gabel_qtl.praw", gen = "gwa_gabel_qtl.raw", force = T)<br /> slotNames(g.dat)<br /> slotNames(g.dat@gtdata)<br /> colnames(g.dat@phdata)<br /> sample.size &lt;- g.dat@gtdata@nids<br /> snps.total &lt;- g.dat@gtdata@nsnps<br /> print(c(sample.size, snps.total))<br /> summary(g.dat@phdata$disease)<br /> hist(g.dat@phdata$disease, main="Quantitative Phenotype data summary", xlab = "Systolic pressure", freq = F,breaks=20, col="gray")<br /> rug(g.dat@phdata$disease)<br /> test.snp &lt;- scan.glm('disease ~ CRSNP', family = gaussian(), data = g.dat)<br /> names(test.snp) <br /> alpha &lt;- 5e-8<br /> test.snp$snpnames[test.snp$P1df < alpha]<br /> test.snp$P1df[test.snp$P1df < alpha]<br /> test.qt &lt;- qtscore(disease, data = g.dat, trait = "gaussian")<br /> slotNames(test.qt)<br /> names(test.qt@results)<br /> head(results(test.qt))<br /> test.qt@lambda<br /> descriptives.scan(test.qt)<br /> row.names(results(test.qt))[results(test.qt)$P1df < alpha]<br /> results(test.qt)$P1df[results(test.qt)$P1df < alpha] results(test.qt)$Pc1df[results(test.qt)$Pc1df < alpha]<br /> obs &lt;- sort(results(test.qt)$P1df) <br /> ept &lt;- ppoints(obs) <br /> plot(-log10(ept), -log10(obs), main = "GWAS QQ plot, qtl", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")<br /> abline(0, 1, col = "red")<br /> abline(h = 8, lty = 2)<br /> plot(test.qt, col = "black")<br /> test.qt.sex &lt;- qtscore(disease ~ sex, data = g.dat, trait = "gaussian")<br /> row.names(results(test.qt.sex))[results(test.qt)$P1df < alpha]<br /> summary(lm(disease ~ sex, data = g.dat))<br /> convert.snp.tped(tped = "gwa_gabel.tped", tfam = "gwa_gabel.tfam", out = "gwa_gabel.raw", strand = "u")<br /> b.dat &lt;- load.gwaa.data(phen = "gwa_gabel.praw", gen = "gwa_gabel.raw", force = T)<br /> slotNames(b.dat)<br /> slotNames(b.dat@gtdata)<br /> colnames(b.dat@phdata)<br /> b.dat@gtdata@nids<br /> case.size &lt;- length(which(b.dat@phdata$disease == 1))<br /> control.size &lt;- length(which(b.dat@phdata$disease == 0))<br /> case.size <br /> control.size <br /> snpsb.total &lt;- b.dat@gtdata@nsnps<br /> testb.snp &lt;- scan.glm('disease ~ CRSNP', family = binomial(), data = b.dat)<br /> names(testb.snp) <br /> alpha &lt;- 5e-8<br /> testb.snp$snpnames[testb.snp$P1df < alpha]<br /> testb.snp$P1df[testb.snp$P1df < alpha]<br /> testb.qt &lt;- qtscore(disease, data = b.dat, trait = "binomial")<br /> slotNames(testb.qt)<br /> descriptives.scan(testb.qt)<br /> row.names(results(testb.qt))[results(testb.qt)$P1df < alpha]<br /> results(testb.qt)$P1df[results(testb.qt)$P1df < alpha]<br /> results(testb.qt)$Pc1df[results(testb.qt)$Pc1df < alpha] <br /> gkin &lt;- ibs(g.dat, weight = "freq")<br /> gkin[1:10,1:10]<br /> cps.full &lt;- cmdscale(as.dist(.5 - gkin), eig = T, k = 10)<br /> names(cps.full)<br /> cps &lt;- cps.full$points<br /> plot(cps[,1], cps[,2], pch = g.dat@phdata$popn)<br /> legend("topright", c("TSI","MEX", "CEU"), pch = c(1,2,3)) <br /> colnames(cps)&lt;-c('C1','C2','C3','C4','C5','C6','C7','C8','C9','C10') <br /> gpc.dat &lt;- g.dat<br /> gpc.dat@phdata&lt;-cbind(g.dat@phdata, cps)<br /> test.pc.a &lt;- scan.glm('disease ~ CRSNP + C1 + C2 + C3 + C4 + C5', family=gaussian(), data = gpc.dat)<br /> test.pc.a$snpnames[test.pc.a$P1df < alpha]<br /> test.pc.a$P1df[test.pc.a$P1df < alpha]<br /> test.pc.b &lt;- qtscore(disease ~ C1 + C2 + C3 + C4 + C5, data = gpc.dat, trait = "gaussian") <br /> test.pc.b@lambda<br /> plot(cps.full$eig[1:10]/sum(cps.full$eig), axes = F, type = "b", xlab = "Components", ylim = c(0,0.05), ylab = "Proportion of Variations", main = "MDS analysis scree plot") <br /> axis(1, 1:10)<br /> axis(2)<br /> plot(cumsum(cps.full$eig[1:10])/sum(cps.full$eig), axes = F, type = "b", ylim = c(0,0.2), xlab = "Components", ylab = "Proportion of Variations", main = "MDS analysis cumulative plot") <br /> axis(1, 1:10)<br /> axis(2)<br /> row.names(results(test.qt))[results(test.qt)$Pc1df < alpha]<br /> results(test.qt)$Pc1df[results(test.qt)$Pc1df < alpha]<br /> test.qt@lambda<br /> obs &lt;- sort(results(test.qt)$chi2.1df)<br /> ept &lt;- sort(qchisq(ppoints(obs), df = 1)) <br /> plot(ept, obs, main = "Genomic control (lambda = slope of the dashed line)", xlab="Expected chisq, 1df", ylab="Observed chisq, 1df")<br /> abline(0, 1, col = "red")<br /> abline(0, test.qt@lambda[1], lty = 2)<br /> median(results(test.qt)$chi2.1df)/0.456<br /> obs &lt;- sort(results(test.qt)$Pc1df)<br /> ept &lt;- ppoints(obs) <br /> plot(-log10(ept), -log10(obs), main = "GWAS QQ plot adj. via Genomic Control", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")<br /> abline(0, 1, col = "red")<br /> abline(h = 8, lty = 2) <br /> adj.gkin = gkin<br /> diag(adj.gkin) = hom(g.dat)$Var<br /> test.eg &lt;- egscore(disease, data = g.dat, kin = adj.gkin, naxes = 2)<br /> descriptives.scan(test.eg)<br /> snp.eg &lt;- row.names(results(test.eg))[results(test.eg)$P1df < alpha]<br /> pvalue.eg &lt;- results(test.eg)$P1df[results(test.eg)$P1df < alpha] lambda.eg &lt;- test.eg@lambda<br /> snp.eg <br /> pvalue.eg<br /> lambda.eg<br /> for (k in 1:10){<br /> test.tmp &lt;- egscore(disease, data = g.dat, kin = adj.gkin, naxes = k)<br /> print(test.tmp@lambda$estimate)<br /> }<br /> obs &lt;- sort(results(test.eg)$Pc1df)<br /> ept &lt;- ppoints(obs) <br /> plot(-log10(ept), -log10(obs), main = "GWAS QQ plot adj. w/ EIGENSTRAT", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")<br /> abline(0, 1, col = "red")<br /> abline(h = 8, lty = 2)<br /> plot(test.qt, col = "black")<br /> add.plot(test.eg, col = "gray", pch = 3)<br /> legend("topright", c("Original plot","After correction w/ EIGENSTRAT"), pch = c(1,3))
 
plink --file GWAS_clean4 --pheno pheno.phen --pheno-name Aff --transpose --recode --out gwa_gabel --noweb<br /> plink --file GWAS_clean4 --pheno pheno.phen --pheno-name systolic --transpose --recode --out gwa_gabel_qtl --noweb<br /> R<br /> library(GenABEL)<br /> convert.snp.tped(tped = "gwa_gabel_qtl.tped", tfam = "gwa_gabel_qtl.tfam", out = "gwa_gabel_qtl.raw", strand = "u")<br /> g.dat &lt;- load.gwaa.data(phen = "gwa_gabel_qtl.praw", gen = "gwa_gabel_qtl.raw", force = T)<br /> slotNames(g.dat)<br /> slotNames(g.dat@gtdata)<br /> colnames(g.dat@phdata)<br /> sample.size &lt;- g.dat@gtdata@nids<br /> snps.total &lt;- g.dat@gtdata@nsnps<br /> print(c(sample.size, snps.total))<br /> summary(g.dat@phdata$disease)<br /> hist(g.dat@phdata$disease, main="Quantitative Phenotype data summary", xlab = "Systolic pressure", freq = F,breaks=20, col="gray")<br /> rug(g.dat@phdata$disease)<br /> test.snp &lt;- scan.glm('disease ~ CRSNP', family = gaussian(), data = g.dat)<br /> names(test.snp) <br /> alpha &lt;- 5e-8<br /> test.snp$snpnames[test.snp$P1df < alpha]<br /> test.snp$P1df[test.snp$P1df < alpha]<br /> test.qt &lt;- qtscore(disease, data = g.dat, trait = "gaussian")<br /> slotNames(test.qt)<br /> names(test.qt@results)<br /> head(results(test.qt))<br /> test.qt@lambda<br /> descriptives.scan(test.qt)<br /> row.names(results(test.qt))[results(test.qt)$P1df < alpha]<br /> results(test.qt)$P1df[results(test.qt)$P1df < alpha] results(test.qt)$Pc1df[results(test.qt)$Pc1df < alpha]<br /> obs &lt;- sort(results(test.qt)$P1df) <br /> ept &lt;- ppoints(obs) <br /> plot(-log10(ept), -log10(obs), main = "GWAS QQ plot, qtl", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")<br /> abline(0, 1, col = "red")<br /> abline(h = 8, lty = 2)<br /> plot(test.qt, col = "black")<br /> test.qt.sex &lt;- qtscore(disease ~ sex, data = g.dat, trait = "gaussian")<br /> row.names(results(test.qt.sex))[results(test.qt)$P1df < alpha]<br /> summary(lm(disease ~ sex, data = g.dat))<br /> convert.snp.tped(tped = "gwa_gabel.tped", tfam = "gwa_gabel.tfam", out = "gwa_gabel.raw", strand = "u")<br /> b.dat &lt;- load.gwaa.data(phen = "gwa_gabel.praw", gen = "gwa_gabel.raw", force = T)<br /> slotNames(b.dat)<br /> slotNames(b.dat@gtdata)<br /> colnames(b.dat@phdata)<br /> b.dat@gtdata@nids<br /> case.size &lt;- length(which(b.dat@phdata$disease == 1))<br /> control.size &lt;- length(which(b.dat@phdata$disease == 0))<br /> case.size <br /> control.size <br /> snpsb.total &lt;- b.dat@gtdata@nsnps<br /> testb.snp &lt;- scan.glm('disease ~ CRSNP', family = binomial(), data = b.dat)<br /> names(testb.snp) <br /> alpha &lt;- 5e-8<br /> testb.snp$snpnames[testb.snp$P1df < alpha]<br /> testb.snp$P1df[testb.snp$P1df < alpha]<br /> testb.qt &lt;- qtscore(disease, data = b.dat, trait = "binomial")<br /> slotNames(testb.qt)<br /> descriptives.scan(testb.qt)<br /> row.names(results(testb.qt))[results(testb.qt)$P1df < alpha]<br /> results(testb.qt)$P1df[results(testb.qt)$P1df < alpha]<br /> results(testb.qt)$Pc1df[results(testb.qt)$Pc1df < alpha] <br /> gkin &lt;- ibs(g.dat, weight = "freq")<br /> gkin[1:10,1:10]<br /> cps.full &lt;- cmdscale(as.dist(.5 - gkin), eig = T, k = 10)<br /> names(cps.full)<br /> cps &lt;- cps.full$points<br /> plot(cps[,1], cps[,2], pch = g.dat@phdata$popn)<br /> legend("topright", c("TSI","MEX", "CEU"), pch = c(1,2,3)) <br /> colnames(cps)&lt;-c('C1','C2','C3','C4','C5','C6','C7','C8','C9','C10') <br /> gpc.dat &lt;- g.dat<br /> gpc.dat@phdata&lt;-cbind(g.dat@phdata, cps)<br /> test.pc.a &lt;- scan.glm('disease ~ CRSNP + C1 + C2 + C3 + C4 + C5', family=gaussian(), data = gpc.dat)<br /> test.pc.a$snpnames[test.pc.a$P1df < alpha]<br /> test.pc.a$P1df[test.pc.a$P1df < alpha]<br /> test.pc.b &lt;- qtscore(disease ~ C1 + C2 + C3 + C4 + C5, data = gpc.dat, trait = "gaussian") <br /> test.pc.b@lambda<br /> plot(cps.full$eig[1:10]/sum(cps.full$eig), axes = F, type = "b", xlab = "Components", ylim = c(0,0.05), ylab = "Proportion of Variations", main = "MDS analysis scree plot") <br /> axis(1, 1:10)<br /> axis(2)<br /> plot(cumsum(cps.full$eig[1:10])/sum(cps.full$eig), axes = F, type = "b", ylim = c(0,0.2), xlab = "Components", ylab = "Proportion of Variations", main = "MDS analysis cumulative plot") <br /> axis(1, 1:10)<br /> axis(2)<br /> row.names(results(test.qt))[results(test.qt)$Pc1df < alpha]<br /> results(test.qt)$Pc1df[results(test.qt)$Pc1df < alpha]<br /> test.qt@lambda<br /> obs &lt;- sort(results(test.qt)$chi2.1df)<br /> ept &lt;- sort(qchisq(ppoints(obs), df = 1)) <br /> plot(ept, obs, main = "Genomic control (lambda = slope of the dashed line)", xlab="Expected chisq, 1df", ylab="Observed chisq, 1df")<br /> abline(0, 1, col = "red")<br /> abline(0, test.qt@lambda[1], lty = 2)<br /> median(results(test.qt)$chi2.1df)/0.456<br /> obs &lt;- sort(results(test.qt)$Pc1df)<br /> ept &lt;- ppoints(obs) <br /> plot(-log10(ept), -log10(obs), main = "GWAS QQ plot adj. via Genomic Control", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")<br /> abline(0, 1, col = "red")<br /> abline(h = 8, lty = 2) <br /> adj.gkin = gkin<br /> diag(adj.gkin) = hom(g.dat)$Var<br /> test.eg &lt;- egscore(disease, data = g.dat, kin = adj.gkin, naxes = 2)<br /> descriptives.scan(test.eg)<br /> snp.eg &lt;- row.names(results(test.eg))[results(test.eg)$P1df < alpha]<br /> pvalue.eg &lt;- results(test.eg)$P1df[results(test.eg)$P1df < alpha] lambda.eg &lt;- test.eg@lambda<br /> snp.eg <br /> pvalue.eg<br /> lambda.eg<br /> for (k in 1:10){<br /> test.tmp &lt;- egscore(disease, data = g.dat, kin = adj.gkin, naxes = k)<br /> print(test.tmp@lambda$estimate)<br /> }<br /> obs &lt;- sort(results(test.eg)$Pc1df)<br /> ept &lt;- ppoints(obs) <br /> plot(-log10(ept), -log10(obs), main = "GWAS QQ plot adj. w/ EIGENSTRAT", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")<br /> abline(0, 1, col = "red")<br /> abline(h = 8, lty = 2)<br /> plot(test.qt, col = "black")<br /> add.plot(test.eg, col = "gray", pch = 3)<br /> legend("topright", c("Original plot","After correction w/ EIGENSTRAT"), pch = c(1,3))

Revision as of 17:38, 3 January 2018

GeneABEL

plink --file GWAS_clean4 --pheno pheno.phen --pheno-name Aff --transpose --recode --out gwa_gabel --noweb
plink --file GWAS_clean4 --pheno pheno.phen --pheno-name systolic --transpose --recode --out gwa_gabel_qtl --noweb
R
library(GenABEL)
convert.snp.tped(tped = "gwa_gabel_qtl.tped", tfam = "gwa_gabel_qtl.tfam", out = "gwa_gabel_qtl.raw", strand = "u")
g.dat <- load.gwaa.data(phen = "gwa_gabel_qtl.praw", gen = "gwa_gabel_qtl.raw", force = T)
slotNames(g.dat)
slotNames(g.dat@gtdata)
colnames(g.dat@phdata)
sample.size <- g.dat@gtdata@nids
snps.total <- g.dat@gtdata@nsnps
print(c(sample.size, snps.total))
summary(g.dat@phdata$disease)
hist(g.dat@phdata$disease, main="Quantitative Phenotype data summary", xlab = "Systolic pressure", freq = F,breaks=20, col="gray")
rug(g.dat@phdata$disease)
test.snp <- scan.glm('disease ~ CRSNP', family = gaussian(), data = g.dat)
names(test.snp)
alpha <- 5e-8
test.snp$snpnames[test.snp$P1df < alpha]
test.snp$P1df[test.snp$P1df < alpha]
test.qt <- qtscore(disease, data = g.dat, trait = "gaussian")
slotNames(test.qt)
names(test.qt@results)
head(results(test.qt))
test.qt@lambda
descriptives.scan(test.qt)
row.names(results(test.qt))[results(test.qt)$P1df < alpha]
results(test.qt)$P1df[results(test.qt)$P1df < alpha] results(test.qt)$Pc1df[results(test.qt)$Pc1df < alpha]
obs <- sort(results(test.qt)$P1df)
ept <- ppoints(obs)
plot(-log10(ept), -log10(obs), main = "GWAS QQ plot, qtl", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")
abline(0, 1, col = "red")
abline(h = 8, lty = 2)
plot(test.qt, col = "black")
test.qt.sex <- qtscore(disease ~ sex, data = g.dat, trait = "gaussian")
row.names(results(test.qt.sex))[results(test.qt)$P1df < alpha]
summary(lm(disease ~ sex, data = g.dat))
convert.snp.tped(tped = "gwa_gabel.tped", tfam = "gwa_gabel.tfam", out = "gwa_gabel.raw", strand = "u")
b.dat <- load.gwaa.data(phen = "gwa_gabel.praw", gen = "gwa_gabel.raw", force = T)
slotNames(b.dat)
slotNames(b.dat@gtdata)
colnames(b.dat@phdata)
b.dat@gtdata@nids
case.size <- length(which(b.dat@phdata$disease == 1))
control.size <- length(which(b.dat@phdata$disease == 0))
case.size
control.size
snpsb.total <- b.dat@gtdata@nsnps
testb.snp <- scan.glm('disease ~ CRSNP', family = binomial(), data = b.dat)
names(testb.snp)
alpha <- 5e-8
testb.snp$snpnames[testb.snp$P1df < alpha]
testb.snp$P1df[testb.snp$P1df < alpha]
testb.qt <- qtscore(disease, data = b.dat, trait = "binomial")
slotNames(testb.qt)
descriptives.scan(testb.qt)
row.names(results(testb.qt))[results(testb.qt)$P1df < alpha]
results(testb.qt)$P1df[results(testb.qt)$P1df < alpha]
results(testb.qt)$Pc1df[results(testb.qt)$Pc1df < alpha]
gkin <- ibs(g.dat, weight = "freq")
gkin[1:10,1:10]
cps.full <- cmdscale(as.dist(.5 - gkin), eig = T, k = 10)
names(cps.full)
cps <- cps.full$points
plot(cps[,1], cps[,2], pch = g.dat@phdata$popn)
legend("topright", c("TSI","MEX", "CEU"), pch = c(1,2,3))
colnames(cps)<-c('C1','C2','C3','C4','C5','C6','C7','C8','C9','C10')
gpc.dat <- g.dat
gpc.dat@phdata<-cbind(g.dat@phdata, cps)
test.pc.a <- scan.glm('disease ~ CRSNP + C1 + C2 + C3 + C4 + C5', family=gaussian(), data = gpc.dat)
test.pc.a$snpnames[test.pc.a$P1df < alpha]
test.pc.a$P1df[test.pc.a$P1df < alpha]
test.pc.b <- qtscore(disease ~ C1 + C2 + C3 + C4 + C5, data = gpc.dat, trait = "gaussian")
test.pc.b@lambda
plot(cps.full$eig[1:10]/sum(cps.full$eig), axes = F, type = "b", xlab = "Components", ylim = c(0,0.05), ylab = "Proportion of Variations", main = "MDS analysis scree plot")
axis(1, 1:10)
axis(2)
plot(cumsum(cps.full$eig[1:10])/sum(cps.full$eig), axes = F, type = "b", ylim = c(0,0.2), xlab = "Components", ylab = "Proportion of Variations", main = "MDS analysis cumulative plot")
axis(1, 1:10)
axis(2)
row.names(results(test.qt))[results(test.qt)$Pc1df < alpha]
results(test.qt)$Pc1df[results(test.qt)$Pc1df < alpha]
test.qt@lambda
obs <- sort(results(test.qt)$chi2.1df)
ept <- sort(qchisq(ppoints(obs), df = 1))
plot(ept, obs, main = "Genomic control (lambda = slope of the dashed line)", xlab="Expected chisq, 1df", ylab="Observed chisq, 1df")
abline(0, 1, col = "red")
abline(0, test.qt@lambda[1], lty = 2)
median(results(test.qt)$chi2.1df)/0.456
obs <- sort(results(test.qt)$Pc1df)
ept <- ppoints(obs)
plot(-log10(ept), -log10(obs), main = "GWAS QQ plot adj. via Genomic Control", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")
abline(0, 1, col = "red")
abline(h = 8, lty = 2)
adj.gkin = gkin
diag(adj.gkin) = hom(g.dat)$Var
test.eg <- egscore(disease, data = g.dat, kin = adj.gkin, naxes = 2)
descriptives.scan(test.eg)
snp.eg <- row.names(results(test.eg))[results(test.eg)$P1df < alpha]
pvalue.eg <- results(test.eg)$P1df[results(test.eg)$P1df < alpha] lambda.eg <- test.eg@lambda
snp.eg
pvalue.eg
lambda.eg
for (k in 1:10){
test.tmp <- egscore(disease, data = g.dat, kin = adj.gkin, naxes = k)
print(test.tmp@lambda$estimate)
}
obs <- sort(results(test.eg)$Pc1df)
ept <- ppoints(obs)
plot(-log10(ept), -log10(obs), main = "GWAS QQ plot adj. w/ EIGENSTRAT", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")
abline(0, 1, col = "red")
abline(h = 8, lty = 2)
plot(test.qt, col = "black")
add.plot(test.eg, col = "gray", pch = 3)
legend("topright", c("Original plot","After correction w/ EIGENSTRAT"), pch = c(1,3))