Changes

AdvGeneMap2018Commands

143 bytes removed, 15:16, 23 January 2018
__NOTITLE__
__FORCETOC__
 
===GenABEL===
 
# Load files
library(GenABEL)
convert.snp.tped(tped = "gwa_gabel_qtl.tped", tfam = "gwa_gabel_qtl.tfam", out = "gwa_gabel_qtl.raw", strand = "u")
g.dat <- load.gwaa.data(phen = "gwa_gabel_qtl.praw", gen = "gwa_gabel_qtl.raw", force = T)
slotNames(g.dat)
slotNames(g.dat@gtdata)
colnames(g.dat@phdata)
# sample size
sample.size <- g.dat@gtdata@nids
hist(g.dat@phdata$disease, main="Quantitative Phenotype data summary", xlab = "Systolic pressure measure", freq = F,breaks=20, col="gray")
rug(g.dat@phdata$disease)
###
# tests for association
names(test.snp)
alpha <- 5e-8
test.snp$snpnames[test.snp$P1df &lt; < alpha] test.snp$P1df[test.snp$P1df &lt; < alpha]
# Score test
test.qt &lt;- qtscore(disease, data = g.dat, trait = "gaussian")
slotNames(test.qt)
names(test.qt@results)
test.qt@lambda
descriptives.scan(test.qt)
rownames(results(test.qt))[results(test.qt)$P1df &lt; < alpha] results(test.qt)$P1df[results(test.qt)$P1df &lt; < alpha] results(test.qt)$Pc1df[results(test.qt)$Pc1df &lt; < alpha]
# QQ plot
obs &lt;- sort(results(test.qt)$P1df)
# Manhattan plot
plot(test.qt, col = "black")
# Adding confounders
test.qt.sex &lt;- qtscore(disease ~ sex, data = g.dat, trait = "gaussian")
rownames(results(test.qt.sex))[results(test.qt)$P1df &lt; < alpha]
summary(lm(disease ~ sex, data = g.dat))
###
# MDS
###
gkin &lt;- ibs(g.dat, weight = "freq")
gkin[1:10,1:10]
cps.full &lt;- cmdscale(as.dist(.5 - gkin), eig = T, k = 10)
names(cps.full)
cps &lt;- cps.full$points
plot(cps[,1], cps[,2], pch = g.dat@phdata$popn)
legend(-0.16, 0.06, c("TSI","MEX", "CEU"), pch = c(1,2,3))
###
# Corrected test
###
# Incorporating PCs as predictors
colnames(cps)&lt;-c('C1','C2','C3','C4','C5','C6','C7','C8','C9','C10')
gpc.dat &lt;- g.dat
gpc.dat@phdata&lt;-cbind(g.dat@phdata, cps)
test.pc.a &lt;- scan.glm('disease ~ CRSNP + C1 + C2 + C3 + C4 + C5', family=gaussian(), data = gpc.dat)
test.pc.a$snpnames[test.pc.a$P1df &lt; < alpha] test.pc.a$P1df[test.pc.a$P1df &lt; < alpha]
test.pc.b &lt;- qtscore(disease ~ C1 + C2 + C3 + C4 + C5, data = gpc.dat, trait = "gaussian")
test.pc.b@lambda
# scree plot
plot(cps.full$eig[1:10]/sum(cps.full$eig), axes = F, type = "b", xlab = "Components", ylim = c(0,0.05), ylab = "Proportion of Variations", main = "MDS analysis scree plot")
axis(1, 1:10)
axis(2)
# cumulative plot
plot(cumsum(cps.full$eig[1:10])/sum(cps.full$eig), axes = F, type = "b", ylim = c(0,0.2), xlab = "Components", ylab = "Proportion of Variations", main = "MDS analysis cumulative plot")
axis(1, 1:10)
axis(2)
# Genomic control
# Uncorrected GIF
test.qt@lambda
# Corrected p-value
row.names(results(test.qt))[results(test.qt)$Pc1df &lt; < alpha] results(test.qt)$Pc1df[results(test.qt)$Pc1df &lt; < alpha]
# Check for inflation of statistic
obs &lt;- sort(results(test.qt)$chi2.1df)
abline(0, 1, col = "red")
abline(0, test.qt@lambda[1], lty = 2)
# Definition of GIF
# Conventional definition
# GenABEL definition
lm(obs~ept)$coef[2]
# QQ plot
obs &lt;- sort(results(test.qt)$Pc1df)
abline(0, 1, col = "red")
abline(h = 8, lty = 2)
# EIGENSTRAT
adj.gkin = gkin
diag(adj.gkin) = hom(g.dat)$Var
# naxes = 3 is default value
test.eg &lt;- egscore(disease, data = g.dat, kin = adj.gkin, naxes = 2)
descriptives.scan(test.eg)
snp.eg &lt;- row.names(results(test.eg))[results(test.eg)$P1df &lt; < alpha] pvalue.eg &lt;- results(test.eg)$P1df[results(test.eg)$P1df &lt; < alpha]
lambda.eg &lt;- test.eg@lambda
snp.eg
pvalue.eg
lambda.eg
# Change #PCs
for (k in 1:10){
print(test.tmp@lambda$estimate)
}
# QQ plot
obs &lt;- sort(results(test.eg)$Pc1df)
add.plot(test.eg, col = "gray", pch = 3)
legend("topright", c("Original plot","After correction w/ EIGENSTRAT"), pch = c(1,3))
###
# Basic test, binary trait
###
# load files to GenABEL
convert.snp.tped(tped = "gwa_gabel.tped", tfam = "gwa_gabel.tfam", out = "gwa_gabel.raw", strand = "u")
b.dat &lt;- load.gwaa.data(phen = "gwa_gabel.praw", gen = "gwa_gabel.raw", force = T)
slotNames(b.dat)
slotNames(b.dat@gtdata)
colnames(b.dat@phdata)
# sample size
b.dat@gtdata@nids
# number of cases and controls
case.size &lt;- length(which(b.dat@phdata$disease == 1))
case.size
control.size
# number of SNPs
snpsb.total &lt;- b.dat@gtdata@nsnps
# GLM test
testb.snp &lt;- scan.glm('disease ~ CRSNP', family = binomial(), data = b.dat)
names(testb.snp)
alpha &lt;- 5e-8
testb.snp$snpnames[testb.snp$P1df &lt; < alpha] testb.snp$P1df[testb.snp$P1df &lt; < alpha]
# Score test
testb.qt &lt;- qtscore(disease, data = b.dat, trait = "binomial")
slotNames(testb.qt)
descriptives.scan(testb.qt)
row.names(results(testb.qt))[results(testb.qt)$P1df &lt; < alpha] results(testb.qt)$P1df[results(testb.qt)$P1df &lt; < alpha] results(testb.qt)$Pc1df[results(testb.qt)$Pc1df &lt; < alpha]
===Plink - Part 1 - Data QC===
 
plink --file GWAS
plink --file GWAS --mind 0.10 --recode --out GWAS_clean_mind
setwd("to_your_working_directory/")
dups = read.table("duplicates.genome", header = T)
problem_pairs = dups[which(dups$PI_HAT &gt; > 0.4),]
problem_pairs
problem_pairs = dups[which(dups$PI_HAT &gt; > 0.05),]
myvars = c("FID1", "IID1", "FID2", "IID2", "PI_HAT")
problem_pairs[myvars]
hardy = read.table("plink.hwe", header = T)
names(hardy)
hwe_prob = hardy[which(hardy$P &lt; < 0.0000009),]
hwe_prob
q()
##########
plink --file GWAS_clean3 --exclude HWE_out.txt --recode --out GWAS_clean4===Plink - Part 2 - Controlling for Substructure===
 
plink --file GWAS_clean4 --genome --cluster --mds-plot 10
#### in R
broadqq &lt;-function(pvals, title)
{
 
observed &lt;- sort(pvals)
lobs &lt;- -(log10(observed))
broadqq(aff_C1C2.add.p, "Some Trait Adjusted for PC1 and PC2")
dev.off()
gws_unadj = aff_unadj[which(aff_unadj$P &lt; < 0.0000001),]
gws_unadj
gws_adjusted = aff_C1C2[which(aff_C1C2$P &lt; < 0.0000001),]
gws_adjusted===VAT===
 
vtools -h
vtools init VATDemo
Bureaucrat, administrator
1,252
edits