Changes

AdvGeneMap2018Commands

505 bytes added, 15:20, 23 January 2018
===GenABEL===
 
# Load files
library(GenABEL)
convert.snp.tped(tped = &eq; "gwa_gabel_qtl.tped", tfam = &eq; "gwa_gabel_qtl.tfam", out = &eq; "gwa_gabel_qtl.raw", strand = &eq; "u") g.dat <- load.gwaa.data(phen = &eq; "gwa_gabel_qtl.praw", gen = &eq; "gwa_gabel_qtl.raw", force = &eq; T)
slotNames(g.dat)
slotNames(g.dat@gtdata)
# Trait
summary(g.dat@phdata$disease)
hist(g.dat@phdata$disease, main=&eq;"Quantitative Phenotype data summary", xlab = &eq; "Systolic pressure measure", freq = &eq; F,breaks=&eq;20, col=&eq;"gray")
rug(g.dat@phdata$disease)
###
###
# GLM test
test.snp <- scan.glm('disease ~ CRSNP', family = &eq; gaussian(), data = &eq; g.dat)
names(test.snp)
alpha <- 5e-8
test.snp$P1df[test.snp$P1df < alpha]
# Score test
test.qt &lt;- qtscore(disease, data = &eq; g.dat, trait = &eq; "gaussian")
slotNames(test.qt)
names(test.qt@results)
obs &lt;- sort(results(test.qt)$P1df)
ept &lt;- c(1:length(obs)) / (length(obs) + 1)
plot(-log10(ept), -log10(obs), main = &eq; "GWAS QQ plot, qtl", xlab=&eq;"Expected -log10(pvalue)", ylab=&eq;"Observed -log10(pvalue)") abline(0, 1, col = &eq; "red") abline(h = &eq; 8, lty = &eq; 2)
# Manhattan plot
plot(test.qt, col = &eq; "black")
# Adding confounders
test.qt.sex &lt;- qtscore(disease ~ sex, data = &eq; g.dat, trait = &eq; "gaussian")
rownames(results(test.qt.sex))[results(test.qt)$P1df < alpha]
summary(lm(disease ~ sex, data = &eq; g.dat))
###
# MDS
###
gkin &lt;- ibs(g.dat, weight = &eq; "freq")
gkin[1:10,1:10]
cps.full &lt;- cmdscale(as.dist(.5 - gkin), eig = &eq; T, k = &eq; 10)
names(cps.full)
cps &lt;- cps.full$points
plot(cps[,1], cps[,2], pch = &eq; g.dat@phdata$popn) legend(-0.16, 0.06, c("TSI","MEX", "CEU"), pch = &eq; c(1,2,3))
###
# Corrected test
gpc.dat &lt;- g.dat
gpc.dat@phdata&lt;-cbind(g.dat@phdata, cps)
test.pc.a &lt;- scan.glm('disease ~ CRSNP + C1 + C2 + C3 + C4 + C5', family=&eq;gaussian(), data = &eq; gpc.dat)
test.pc.a$snpnames[test.pc.a$P1df < alpha]
test.pc.a$P1df[test.pc.a$P1df < alpha]
test.pc.b &lt;- qtscore(disease ~ C1 + C2 + C3 + C4 + C5, data = &eq; gpc.dat, trait = &eq; "gaussian")
test.pc.b@lambda
# scree plot
plot(cps.full$eig[1:10]/sum(cps.full$eig), axes = &eq; F, type = &eq; "b", xlab = &eq; "Components", ylim = &eq; c(0,0.05), ylab = &eq; "Proportion of Variations", main = &eq; "MDS analysis scree plot")
axis(1, 1:10)
axis(2)
# cumulative plot
plot(cumsum(cps.full$eig[1:10])/sum(cps.full$eig), axes = &eq; F, type = &eq; "b", ylim = &eq; c(0,0.2), xlab = &eq; "Components", ylab = &eq; "Proportion of Variations", main = &eq; "MDS analysis cumulative plot")
axis(1, 1:10)
axis(2)
# Check for inflation of statistic
obs &lt;- sort(results(test.qt)$chi2.1df)
ept &lt;- sort(qchisq(1:length(obs) / (length(obs) + 1), df = &eq; 1)) plot(ept, obs, main = &eq; "Genomic control (slope is the inflation factor)", xlab=&eq;"Expected chisq, 1df", ylab=&eq;"Observed chisq, 1df") abline(0, 1, col = &eq; "red") abline(0, test.qt@lambda[1], lty = &eq; 2)
# Definition of GIF
# Conventional definition
obs &lt;- sort(results(test.qt)$Pc1df)
ept &lt;- c(1:length(obs)) / (length(obs) + 1)
plot(-log10(ept), -log10(obs), main = &eq; "GWAS QQ plot adj. via Genomic Control", xlab=&eq;"Expected -log10(pvalue)", ylab=&eq;"Observed -log10(pvalue)") abline(0, 1, col = &eq; "red") abline(h = &eq; 8, lty = &eq; 2)
# EIGENSTRAT
adj.gkin = &eq; gkin diag(adj.gkin) = &eq; hom(g.dat)$Var # naxes = &eq; 3 is default value test.eg &lt;- egscore(disease, data = &eq; g.dat, kin = &eq; adj.gkin, naxes = &eq; 2)
descriptives.scan(test.eg)
snp.eg &lt;- row.names(results(test.eg))[results(test.eg)$P1df < alpha]
# Change #PCs
for (k in 1:10){
test.tmp &lt;- egscore(disease, data = &eq; g.dat, kin = &eq; adj.gkin, naxes = &eq; k)
print(test.tmp@lambda$estimate)
}
obs &lt;- sort(results(test.eg)$Pc1df)
ept &lt;- c(1:length(obs)) / (length(obs) + 1)
qqplot(-log10(ept), -log10(obs), main = &eq; "GWAS QQ plot adj. w/ EIGENSTRAT", xlab=&eq;"Expected -log10(pvalue)", ylab=&eq;"Observed -log10(pvalue)") abline(0, 1, col = &eq; "red") abline(h = &eq; 8, lty = &eq; 2)
# Manhattan plot comparison
plot(test.qt, col = &eq; "black") add.plot(test.eg, col = &eq; "gray", pch = &eq; 3) legend("topright", c("Original plot","After correction w/ EIGENSTRAT"), pch = &eq; c(1,3))
###
# Basic test, binary trait
###
# load files to GenABEL
convert.snp.tped(tped = &eq; "gwa_gabel.tped", tfam = &eq; "gwa_gabel.tfam", out = &eq; "gwa_gabel.raw", strand = &eq; "u") b.dat &lt;- load.gwaa.data(phen = &eq; "gwa_gabel.praw", gen = &eq; "gwa_gabel.raw", force = &eq; T)
slotNames(b.dat)
slotNames(b.dat@gtdata)
b.dat@gtdata@nids
# number of cases and controls
case.size &lt;- length(which(b.dat@phdata$disease == &eq;&eq; 1)) control.size &lt;- length(which(b.dat@phdata$disease == &eq;&eq; 0))
case.size
control.size
snpsb.total &lt;- b.dat@gtdata@nsnps
# GLM test
testb.snp &lt;- scan.glm('disease ~ CRSNP', family = &eq; binomial(), data = &eq; b.dat)
names(testb.snp)
alpha &lt;- 5e-8
testb.snp$P1df[testb.snp$P1df < alpha]
# Score test
testb.qt &lt;- qtscore(disease, data = &eq; b.dat, trait = &eq; "binomial")
slotNames(testb.qt)
descriptives.scan(testb.qt)
results(testb.qt)$P1df[results(testb.qt)$P1df < alpha]
results(testb.qt)$Pc1df[results(testb.qt)$Pc1df < alpha]
 
===Plink - Part 1 - Data QC===
#### in R - open R by simply typing R
setwd("to_your_working_directory/")
sexcheck = &eq; read.table("GWAS_sex_checking.sexcheck", header=&eq;T)
names(sexcheck)
sex_problem = &eq; sexcheck[which(sexcheck$STATUS==&eq;&eq;"PROBLEM"),]
sex_problem
q()
#### in R
setwd("to_your_working_directory/")
dups = &eq; read.table("duplicates.genome", header = &eq; T) problem_pairs = &eq; dups[which(dups$PI_HAT > 0.4),]
problem_pairs
problem_pairs = &eq; dups[which(dups$PI_HAT > 0.05),] myvars = &eq; c("FID1", "IID1", "FID2", "IID2", "PI_HAT")
problem_pairs[myvars]
q()
plink --file GWAS_clean3 --het
###### in R
Dataset &lt;- read.table("plink.het", header=&eq;TRUE, sep=&eq;"", na.strings=&eq;"NA", dec=&eq;".", strip.white=&eq;TRUE)
mean(Dataset$F)
sd(Dataset$F)
jpeg("hist.jpeg", height=&eq;1000, width=&eq;1000) hist(scale(Dataset$F), xlim=&eq;c(-4,4))
dev.off()
q()
plink --file GWAS_clean3 --pheno pheno.txt --pheno-name Aff --hardy
##### in R
hardy = &eq; read.table("plink.hwe", header = &eq; T)
names(hardy)
hwe_prob = &eq; hardy[which(hardy$P < 0.0000009),]
hwe_prob
q()
    ===Plink - Part 2 - Controlling for Substructure===
plink --file GWAS_clean4 --genome --cluster --mds-plot 10
#### in R
mydata = &eq; read.table("mds_components.txt", header=&eq;T) mydata$pch&#91;[mydata$Group==1 &#93eq; &eq;1 ] &lt;-15 mydata$pch&#91;[mydata$Group==2 &#93eq; &eq;2 ] &lt;-16 mydata$pch&#91;[mydata$Group==3 &#93eq; &eq;3 ] &lt;-2 jpeg("mds.jpeg", height=&eq;500, width=&eq;500) plot(mydata$C1, mydata$C2 ,pch=&eq;mydata$pch)
dev.off()
q()
broadqq &lt;-function(pvals, title)
{
observed &lt;- sort(pvals) lobs &lt;- -(log10(observed)) expected &lt;- c(1:length(observed)) lexp &lt;- -(log10(expected / (length(expected)+1))) plot(c(0,7), c(0,7), col=&eq;"red", lwd=&eq;3, type=&eq;"l", xlab=&eq;"Expected (-logP)", ylab=&eq;"Observed (-logP)", xlim=&eq;c(0,max(lobs)), ylim=&eq;c(0,max(lobs)), las=&eq;1, xaxs=&eq;"i", yaxs=&eq;"i", bty=&eq;"l", main = &eq; title) points(lexp, lobs, pch=&eq;23, cex=&eq;.4, bg=&eq;"black") }  jpeg("qqplot_compare.jpeg", height=&eq;1000, width=&eq;500) par(mfrow=&eq;c(2,1)) aff_unadj&lt;-read.table("unadj.assoc.logistic", header=&eq;TRUE) aff_unadj.add.p&lt;-aff_unadj&#91;[aff_unadj$TEST==&eq;&eq;c("ADD"),&#93;]$P
broadqq(aff_unadj.add.p,"Some Trait Unadjusted")
aff_C1C2&lt;-read.table("PC1-PC2.assoc.logistic", header=&eq;TRUE) aff_C1C2.add.p&lt;-aff_C1C2&#91;[aff_C1C2$TEST==&eq;&eq;c("ADD"),&#93;]$P
broadqq(aff_C1C2.add.p, "Some Trait Adjusted for PC1 and PC2")
dev.off()
gws_unadj = aff_unadj&#91eq;aff_unadj[which(aff_unadj$P &lt; 0.0000001),&#93;]
gws_unadj
gws_adjusted = aff_C1C2&#91eq;aff_C1C2[which(aff_C1C2$P &lt; 0.0000001),&#93;]
gws_adjusted
 
 
===VAT===
 
vtools -h
vtools init VATDemo
head GenotypeSummary.txt
vtools output variant "max(DP)" "min(DP)" "avg(DP)" "stdev(DP)" "lower_quartile(DP)" "upper_quartile(DP)" --header
vtools select variant "filter=&eq;’PASS’" --count vtools select variant "filter=&eq;’PASS’" -o "max(DP)" "min(DP)" "avg(DP)" "stdev(DP)" "lower_quartile(DP)" "upper_quartile(DP)" --header vtools update variant --from_stat ’total=&eq;#(GT)’ ’num=&eq;#(alt)’ ’het=&eq;#(het)’ ’hom=&eq;#(hom)’ ’other=&eq;#(other)’ ’minDP=&eq;min(DP_geno)’ ’maxDP=&eq;max(DP_geno)’ ’meanDP=&eq;avg(DP_geno)’ ’maf=&eq;maf()’
vtools show fields
vtools show table variant
vtools update variant --from_stat ’totalGD10=&eq;#(GT)’ ’numGD10=&eq;#(alt)’ ’hetGD10=&eq;#(het)’ ’homGD10=&eq;#(hom)’ ’otherGD10=&eq;#(other)’ ’mafGD10=&eq;maf()’ --genotypes "DP_geno &gt; 10"
vtools show fields
vtools show table variant
vtools output variant chr pos maf mafGD10 --header --limit 20
vtools phenotype --set "RACE=&eq;0" --samples "filename like ’YRI%’" vtools phenotype --set "RACE=&eq;1" --samples "filename like ’CEU%’"
vtools show samples --limit 10
vtools update variant --from_stat ’CEU_mafGD10=&eq;maf()’ --genotypes ’DP_geno&gt;10’ --samples "RACE=&eq;1" vtools update variant --from_stat ’YRI_mafGD10=&eq;maf()’ --genotypes ’DP_geno&gt;10’ --samples "RACE=&eq;0"
vtools output variant chr pos mafGD10 CEU_mafGD10 YRI_mafGD10 --header --limit 10
vtools phenotype --from_stat ’CEU_totalGD10=&eq;#(GT)’ ’CEU_numGD10=&eq;#(alt)’ --genotypes ’DP_geno&gt;10’ --samples "RACE=&eq;1" vtools phenotype --from_stat ’YRI_totalGD10=&eq;#(GT)’ ’YRI_numGD10=&eq;#(alt)’ --genotypes ’DP_geno&gt;10’ --samples "RACE=&eq;0"
vtools phenotype --output sample_nameCEU_totalGD10CEU_numGD10YRI_totalGD10YRI_numGD10 --header
vtools execute ANNOVAR geneanno
vtools show tables
vtools remove genotypes "DP_geno&lt;10" -v0
vtools select variant "mut_type like ’non%’ or mut_type like ’stop%’ or region_type=&eq;’splicing’" -t v_funct
vtools show tables
vtools show samples --limit 5
vtools select variant --samples "RACE=&eq;1" -t CEU
mkdir -p ceu
cd ceu
vtools init ceu --parent ../ --variants CEU --samples "RACE=&eq;1" --build hg19 vtools show project vtools select variant "CEU_mafGD10&gt;=&eq;0.05" -t common_ceu
vtools select v_funct "CEU_mafGD10&lt;0.01" -t rare_ceu
vtools use refGene
less EA_RV_VT.asso.res
sort -g -k6 EA_RV_VT.asso.res | head
vtools select rare_ceu "refGene.name2=&eq;’ABCC1’" -o chr pos ref alt CEU_mafGD10 numGD10 mut_type --header
cd ..
vtools select variant --samples "RACE=&eq;0" -t YRI
mkdir -p yri
cd yri
vtools init yri --parent ../ --variants YRI --samples "RACE=&eq;0" --build hg19 vtools select variant "YRI_mafGD10&gt;=&eq;0.05" -t common_yri vtools select v_funct "YRI_mafGD10&lt;0.01" -t rare_yri
vtools use refGene
vtools associate common_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db YA_CV &gt; YA_CV.asso.res
Bureaucrat, administrator
1,252
edits