Difference between revisions of "AdvGeneMap2018Commands"

Revision as of 17:09, 22 January 2018

Data QC Plink

#PLINK

plink --file GWAS

plink --file GWAS --mind 0.10 --recode --out GWAS_clean_mind

plink --file GWAS_clean_mind --maf 0.05 --recode --out MAF_greater_5

plink --file GWAS_clean_mind --exclude MAF_greater_5.map --recode --out MAF_less_5

plink --file MAF_greater_5 --geno 0.05 --recode --out MAF_greater_5_clean

plink --file MAF_less_5 --geno 0.01 --recode --out MAF_less_5_clean

plink --file MAF_greater_5_clean --merge MAF_less_5_clean.ped MAF_less_5_clean.map --recode --out GWAS_MAF_clean

plink --file GWAS_MAF_clean --mind 0.03 --recode --out GWAS_clean2

plink --file GWAS_clean2 --check-sex --out GWAS_sex_checking

#### in R - open R by simply typing R

setwd("to_your_working_directory/")

sexcheck = read.table("GWAS_sex_checking.sexcheck", header=T)
names(sexcheck)
sex_problem = sexcheck[which(sexcheck$STATUS=="PROBLEM"),]
sex_problem

q()

##################################

plink --file GWAS_clean2 --genome --out duplicates

#### in R

setwd("to_your_working_directory/")

dups = read.table("duplicates.genome", header = T)
problem_pairs = dups[which(dups$PI_HAT > 0.4),]
problem_pairs
problem_pairs = dups[which(dups$PI_HAT > 0.05),]
myvars = c("FID1", "IID1", "FID2", "IID2", "PI_HAT")
problem_pairs[myvars]

q()

######

plink --file GWAS_clean2 --remove IBS_excluded.txt --recode --out GWAS_clean3

plink --file GWAS_clean3 --het

###### in R

Dataset <- read.table("plink.het", header=TRUE, sep="", na.strings="NA", dec=".", strip.white=TRUE)

mean(Dataset$F)
sd(Dataset$F)

jpeg("hist.jpeg", height=1000, width=1000)
hist(scale(Dataset$F), xlim=c(-4,4))
dev.off()

q()

######

plink --file GWAS_clean3 --pheno pheno.txt --pheno-name Aff --hardy 

##### in R

hardy = read.table("plink.hwe", header = T)
names(hardy)
hwe_prob = hardy[which(hardy$P < 0.0000009),]
hwe_prob


q()


##########

plink --file GWAS_clean3 --exclude HWE_out.txt --recode --out GWAS_clean4

###############################################
##### Part 2: controlling for substructure#####
###############################################

plink --file GWAS_clean4 --genome --cluster --mds-plot 10

#### in R

mydata = read.table("mds_components.txt", header=T)

mydata$pch[mydata$Group==1 ] <-15
mydata$pch[mydata$Group==2 ] <-16
mydata$pch[mydata$Group==3 ] <-2

jpeg("mds.jpeg", height=500, width=500)
plot(mydata$C1, mydata$C2 ,pch=mydata$pch)
dev.off()

q()

######

plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --logistic --adjust --out unadj

plink --file GWAS_clean4 --genome --cluster --pca 10 header

plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.eigenvec --covar-name PC1 --logistic --adjust --out PC1

plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.eigenvec --covar-name PC1-PC2 --logistic --adjust --out PC1-PC2

#### in R

broadqq <-function(pvals, title)
{
	observed <- sort(pvals)
	lobs <- -(log10(observed))

	expected <- c(1:length(observed)) 
	lexp <- -(log10(expected / (length(expected)+1)))

	plot(c(0,7), c(0,7), col="red", lwd=3, type="l", xlab="Expected (-logP)", ylab="Observed (-logP)", xlim=c(0,max(lobs)), ylim=c(0,max(lobs)), las=1, xaxs="i", yaxs="i", bty="l", main = title)
	points(lexp, lobs, pch=23, cex=.4, bg="black") }

jpeg("qqplot_compare.jpeg", height=1000, width=500)
par(mfrow=c(2,1))
aff_unadj<-read.table("unadj.assoc.logistic", header=TRUE)
aff_unadj.add.p<-aff_unadj[aff_unadj$TEST==c("ADD"),]$P
broadqq(aff_unadj.add.p,"Some Trait Unadjusted")
aff_C1C2<-read.table("PC1-PC2.assoc.logistic", header=TRUE)
aff_C1C2.add.p<-aff_C1C2[aff_C1C2$TEST==c("ADD"),]$P
broadqq(aff_C1C2.add.p, "Some Trait Adjusted for PC1 and PC2")
dev.off()

gws_unadj = aff_unadj[which(aff_unadj$P < 0.0000001),]
gws_unadj
gws_adjusted = aff_C1C2[which(aff_C1C2$P < 0.0000001),]
gws_adjusted

Personal tools

Search

Namespaces

Views

Actions

Widgets

Widgets

Recent changes

Wanted pages

Who is online?

Tools

Difference between revisions of "AdvGeneMap2018Commands"

From Statistical Genetics Courses

Revision as of 17:09, 22 January 2018

Data QC Plink

Navigation menu

Short Courses

Software

Course Materials

@@ Line 1: / Line 1: @@
 __NOTITLE__
+==Data QC Plink==
-==ANNOVAR==
+  #PLINK
-  table_annovar.pl
+  plink --file GWAS
-  table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out APOC3_Gene.vcf -remove -nastring . -protocol refGene -operation g -vcfinput
- cat APOC3_Gene.vcf.hg19_multianno.txt
+  plink --file GWAS --mind 0.10 --recode --out GWAS_clean_mind
- table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out APOC3_Gene.vcf -remove -nastring . -protocol refGene,knownGene,ensGene -operation g,g,g -arg '-splicing
--exonicsplicing','-splicing 12 -exonicsplicing','-splicing 12 -exonicsplicing' -vcfinput
+  plink --file GWAS_clean_mind --maf 0.05 --recode --out MAF_greater_5
- awk -F'\t' '{print $1,$2,$6,$7,$8,$9,$10}' APOC3_Gene.vcf.hg19_multianno.txt
- table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out APOC3_Region.vcf -remove -nastring . -protocol phastConsElements46way -operation r -vcfinput
+  plink --file GWAS_clean_mind --exclude MAF_greater_5.map --recode --out MAF_less_5
- table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out APOC3_Region.vcf -remove -nastring . -protocol gwasCatalog -operation r -vcfinput
- table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out APOC3_Filter.vcf -remove -nastring . -protocol
+  plink --file MAF_greater_5 --geno 0.05 --recode --out MAF_greater_5_clean
- gnomad_genome,gnomad_exome,popfreq_max_20150413,gme,avsnp150,dbnsfp33a,dbscsnv11,cadd13gt20,clinvar_20170905,gwava -operation f,f,f,f,f,f,f,f,f,f -vcfinput
- awk -F'\t' '{print $1,$2,$103,$104}' APOC3_Filter.vcf.hg19_multianno.txt
+  plink --file MAF_less_5 --geno 0.01 --recode --out MAF_less_5_clean
- awk -F'\t' '{print $1,$2,$6,$14}' APOC3_Filter.vcf.hg19_multianno.txt
- awk -F'\t' '{print $1,$2,$15,$16,$17,$18,$19,$20,$21,$22}' APOC3_Filter.vcf.hg19_multianno.txt
+  plink --file MAF_greater_5_clean --merge MAF_less_5_clean.ped MAF_less_5_clean.map --recode --out GWAS_MAF_clean
- awk -F'\t' '{print $1,$2,$36,$86,$70}' APOC3_Filter.vcf.hg19_multianno.txt
- awk -F'\t' '{print $1,$2,$99,$100}' APOC3_Filter.vcf.hg19_multianno.txt
+  plink --file GWAS_MAF_clean --mind 0.03 --recode --out GWAS_clean2
- table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out APOC3_ANN.vcf -remove -nastring . -protocol
- refGene,knownGene,ensGene,wgRna,targetScanS,phastConsElements46way,tfbsConsSites,gwasCatalog,gnomad_genome,gnomad_exome,popfreq_max_20150413,gme,avsnp150,dbnsfp33a,dbscsnv11,cadd13gt20,clinvar_20170905,gwava -operation g,g,g,r,r,r,r,r,f,f,f,f,f,f,f,f,f,f -arg '-splicing 12 -exonicsplicing','-splicing 12 -exonicsplicing','-splicing 12 -exonicsplicing',,,,,,,,,,,,,,, -vcfinput
+  plink --file GWAS_clean2 --check-sex --out GWAS_sex_checking
-==GeneABEL==
+ #### in R - open R by simply typing R
-  plink --file GWAS_clean4 --pheno pheno.phen --pheno-name Aff --transpose --recode --out gwa_gabel --noweb
+ setwd("to_your_working_directory/")
- plink --file GWAS_clean4 --pheno pheno.phen --pheno-name systolic --transpose --recode --out gwa_gabel_qtl --noweb
- R
- library(GenABEL)
- convert.snp.tped(tped = "gwa_gabel_qtl.tped", tfam = "gwa_gabel_qtl.tfam", out = "gwa_gabel_qtl.raw", strand = "u")
- g.dat &lt;- load.gwaa.data(phen = "gwa_gabel_qtl.praw", gen = "gwa_gabel_qtl.raw", force = T)
- slotNames(g.dat)
- slotNames(g.dat@gtdata)
- colnames(g.dat@phdata)
- sample.size &lt;- g.dat@gtdata@nids
- snps.total &lt;- g.dat@gtdata@nsnps
- print(c(sample.size, snps.total))
- summary(g.dat@phdata$disease)
- hist(g.dat@phdata$disease, main="Quantitative Phenotype data summary", xlab = "Systolic pressure", freq = F,breaks=20, col="gray")
- rug(g.dat@phdata$disease)
- test.snp &lt;- scan.glm('disease ~ CRSNP', family = gaussian(), data = g.dat)
- names(test.snp)
- alpha &lt;- 5e-8
- test.snp$snpnames[test.snp$P1df < alpha]
- test.snp$P1df[test.snp$P1df < alpha]
- test.qt &lt;- qtscore(disease, data = g.dat, trait = "gaussian")
- slotNames(test.qt)
- names(test.qt@results)
- head(results(test.qt))
- test.qt@lambda
- descriptives.scan(test.qt)
- row.names(results(test.qt))[results(test.qt)$P1df < alpha]
- results(test.qt)$P1df[results(test.qt)$P1df < alpha] results(test.qt)$Pc1df[results(test.qt)$Pc1df < alpha]
- obs &lt;- sort(results(test.qt)$P1df)
- ept &lt;- ppoints(obs)
- plot(-log10(ept), -log10(obs), main = "GWAS QQ plot, qtl", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")
-  abline(0, 1, col = "red")
- abline(h = 8, lty = 2)
- plot(test.qt, col = "black")
- test.qt.sex &lt;- qtscore(disease ~ sex, data = g.dat, trait = "gaussian")
- row.names(results(test.qt.sex))[results(test.qt)$P1df < alpha]
- summary(lm(disease ~ sex, data = g.dat))
- convert.snp.tped(tped = "gwa_gabel.tped", tfam = "gwa_gabel.tfam", out = "gwa_gabel.raw", strand = "u")
- b.dat &lt;- load.gwaa.data(phen = "gwa_gabel.praw", gen = "gwa_gabel.raw", force = T)
- slotNames(b.dat)
- slotNames(b.dat@gtdata)
- colnames(b.dat@phdata)
- b.dat@gtdata@nids
- case.size &lt;- length(which(b.dat@phdata$disease == 1))
- control.size &lt;- length(which(b.dat@phdata$disease == 0))
- case.size
- control.size
- snpsb.total &lt;- b.dat@gtdata@nsnps
- testb.snp &lt;- scan.glm('disease ~ CRSNP', family = binomial(), data = b.dat)
- names(testb.snp)
- alpha &lt;- 5e-8
- testb.snp$snpnames[testb.snp$P1df < alpha]
- testb.snp$P1df[testb.snp$P1df < alpha]
- testb.qt &lt;- qtscore(disease, data = b.dat, trait = "binomial")
- slotNames(testb.qt)
- descriptives.scan(testb.qt)
- row.names(results(testb.qt))[results(testb.qt)$P1df < alpha]
- results(testb.qt)$P1df[results(testb.qt)$P1df < alpha]
- results(testb.qt)$Pc1df[results(testb.qt)$Pc1df < alpha]
- gkin &lt;- ibs(g.dat, weight = "freq")
- gkin[1:10,1:10]
- cps.full &lt;- cmdscale(as.dist(.5 - gkin), eig = T, k = 10)
- names(cps.full)
- cps &lt;- cps.full$points
- plot(cps[,1], cps[,2], pch = g.dat@phdata$popn)
- legend("topright", c("TSI","MEX", "CEU"), pch = c(1,2,3))
- colnames(cps)&lt;-c('C1','C2','C3','C4','C5','C6','C7','C8','C9','C10')
- gpc.dat &lt;- g.dat
- gpc.dat@phdata&lt;-cbind(g.dat@phdata, cps)
- test.pc.a &lt;- scan.glm('disease ~ CRSNP + C1 + C2 + C3 + C4 + C5', family=gaussian(), data = gpc.dat)
- test.pc.a$snpnames[test.pc.a$P1df < alpha]
- test.pc.a$P1df[test.pc.a$P1df < alpha]
- test.pc.b &lt;- qtscore(disease ~  C1 + C2 + C3 + C4 + C5, data = gpc.dat, trait = "gaussian")
- test.pc.b@lambda
- plot(cps.full$eig[1:10]/sum(cps.full$eig), axes = F, type = "b", xlab = "Components",  ylim = c(0,0.05), ylab = "Proportion of Variations", main = "MDS analysis scree plot")
- axis(1, 1:10)
- axis(2)
- plot(cumsum(cps.full$eig[1:10])/sum(cps.full$eig), axes = F, type = "b", ylim = c(0,0.2), xlab = "Components", ylab = "Proportion of Variations", main = "MDS analysis cumulative plot")
- axis(1, 1:10)
- axis(2)
- row.names(results(test.qt))[results(test.qt)$Pc1df < alpha]
- results(test.qt)$Pc1df[results(test.qt)$Pc1df < alpha]
- test.qt@lambda
- obs &lt;- sort(results(test.qt)$chi2.1df)
- ept &lt;- sort(qchisq(ppoints(obs), df = 1))
- plot(ept, obs, main = "Genomic control (lambda = slope of the dashed line)", xlab="Expected chisq, 1df", ylab="Observed chisq, 1df")
- abline(0, 1, col = "red")
- abline(0, test.qt@lambda[1], lty = 2)
- median(results(test.qt)$chi2.1df)/0.456
- obs &lt;- sort(results(test.qt)$Pc1df)
- ept &lt;- ppoints(obs)
- plot(-log10(ept), -log10(obs), main = "GWAS QQ plot adj. via Genomic Control", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")
- abline(0, 1, col = "red")
- abline(h = 8, lty = 2)
- adj.gkin = gkin
- diag(adj.gkin) = hom(g.dat)$Var
- test.eg &lt;- egscore(disease, data = g.dat, kin = adj.gkin, naxes = 2)
- descriptives.scan(test.eg)
- snp.eg &lt;- row.names(results(test.eg))[results(test.eg)$P1df < alpha]
- pvalue.eg &lt;- results(test.eg)$P1df[results(test.eg)$P1df < alpha] lambda.eg &lt;- test.eg@lambda snp.eg  pvalue.eg lambda.eg
- for (k in 1:10){
-  test.tmp &lt;- egscore(disease, data = g.dat, kin = adj.gkin, naxes = k)
- print(test.tmp@lambda$estimate)
- }
- obs &lt;- sort(results(test.eg)$Pc1df)
- ept &lt;- ppoints(obs)
- plot(-log10(ept), -log10(obs), main = "GWAS QQ plot adj. w/ EIGENSTRAT", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")
- abline(0, 1, col = "red")
- abline(h = 8, lty = 2)
- plot(test.qt, col = "black")
- add.plot(test.eg, col = "gray", pch = 3)
- legend("topright", c("Original plot","After correction w/ EIGENSTRAT"), pch = c(1,3))==GWAS Data QC==
- plink --file GWAS --noweb
-  plink --file GWAS --mind 0.10 --recode --out GWAS_clean_mind --noweb
-  plink --file GWAS_clean_mind --maf 0.05 --recode --out MAF_greater_5 --noweb
-  plink --file GWAS_clean_mind --exclude MAF_greater_5.map --recode --out MAF_less_5 --noweb
-  plink --file MAF_greater_5 --geno 0.05 --recode --out MAF_greater_5_clean --noweb
-  plink --file MAF_less_5 --geno 0.01 --recode --out MAF_less_5_clean --noweb
-  plink --file MAF_greater_5_clean --merge MAF_less_5_clean.ped MAF_less_5_clean.map --recode --out GWAS_MAF_clean --noweb
-  plink --file GWAS_MAF_clean --mind 0.03 --recode --out GWAS_clean2 --noweb
-  plink --file GWAS_clean2 --check-sex --out GWAS_sex_checking --noweb
-  R
   sexcheck = read.table("GWAS_sex_checking.sexcheck", header=T)
   names(sexcheck)
   sex_problem = sexcheck[which(sexcheck$STATUS=="PROBLEM"),]
   sex_problem
   q()
-  plink --file GWAS_clean2 --genome --out duplicates --noweb
-  R
+ ##################################
+  plink --file GWAS_clean2 --genome --out duplicates
+ #### in R
+ setwd("to_your_working_directory/")
   dups = read.table("duplicates.genome", header = T)
   problem_pairs = dups[which(dups$PI_HAT > 0.4),]
@@ Line 160: / Line 47: @@
   myvars = c("FID1", "IID1", "FID2", "IID2", "PI_HAT")
   problem_pairs[myvars]
   q()
-  plink --file GWAS_clean2 --remove IBS_excluded.txt --recode --out GWAS_clean3 --noweb
-  plink --file GWAS_clean3 --het --noweb
+ ######
-  R
-  Dataset &lt;- read.table("plink.het", header=TRUE, sep="", na.strings="NA", dec=".",
+  plink --file GWAS_clean2 --remove IBS_excluded.txt --recode --out GWAS_clean3
- strip.white=TRUE)
+  plink --file GWAS_clean3 --het
+ ###### in R
+  Dataset <- read.table("plink.het", header=TRUE, sep="", na.strings="NA", dec=".", strip.white=TRUE)
   mean(Dataset$F)
   sd(Dataset$F)
   jpeg("hist.jpeg", height=1000, width=1000)
   hist(scale(Dataset$F), xlim=c(-4,4))
   dev.off()
   q()
-  plink --file GWAS_clean3 --pheno pheno.txt --pheno-name Aff --hardy --noweb
-  R
+ ######
+  plink --file GWAS_clean3 --pheno pheno.txt --pheno-name Aff --hardy
+ ##### in R
   hardy = read.table("plink.hwe", header = T)
   names(hardy)
   hwe_prob = hardy[which(hardy$P < 0.0000009),]
   hwe_prob
   q()
-  plink --file GWAS_clean3 --exclude HWE_out.txt --recode --out GWAS_clean4 --noweb==GWAS Control Substructure==
-  plink --file GWAS_clean4 --genome --mds-plot 10 --noweb
-  R
+ ##########
+  plink --file GWAS_clean3 --exclude HWE_out.txt --recode --out GWAS_clean4
+ ###############################################
+ ##### Part 2: controlling for substructure#####
+ ###############################################
+  plink --file GWAS_clean4 --genome --cluster --mds-plot 10
+ #### in R
   mydata = read.table("mds_components.txt", header=T)
-  mydata$pch[mydata$Group==1 ] &lt;-15
-  mydata$pch[mydata$Group==2 ] &lt;-16
+  mydata$pch[mydata$Group==1 ] <-15
-  mydata$pch[mydata$Group==3 ] &lt;-2
+  mydata$pch[mydata$Group==2 ] <-16
-  jpeg("mds.jpeg", height=1000, width=1000)
+  mydata$pch[mydata$Group==3 ] <-2
+  jpeg("mds.jpeg", height=500, width=500)
   plot(mydata$C1, mydata$C2 ,pch=mydata$pch)
   dev.off()
   q()
-  plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --logistic --adjust --out unadj --noweb
-  plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.mds --covar-name C1 --logistic --adjust --out C1 --noweb
+ ######
-  plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.mds --covar-name C1-C2 --logistic --adjust --out C1-C2 --noweb
-  R
+  plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --logistic --adjust --out unadj
-  broadqq &lt;-function(pvals, title)
+ plink --file GWAS_clean4 --genome --cluster --pca 10 header
+  plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.eigenvec --covar-name PC1 --logistic --adjust --out PC1
+  plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.eigenvec --covar-name PC1-PC2 --logistic --adjust --out PC1-PC2
+ #### in R
+  broadqq <-function(pvals, title)
   {
- &nbsp;&nbsp;&nbsp;&nbsp;observed &lt;- sort(pvals)
+ 	observed <- sort(pvals)
- &nbsp;&nbsp;&nbsp;&nbsp;lobs &lt;- -(log10(observed))
+ 	lobs <- -(log10(observed))
-  &nbsp;&nbsp;&nbsp;&nbsp;expected &lt;- c(1:length(observed))
- &nbsp;&nbsp;&nbsp;&nbsp;lexp &lt;- -(log10(expected / (length(expected)+1)))
+ 	expected <- c(1:length(observed))
-  &nbsp;&nbsp;&nbsp;&nbsp;plot(c(0,7), c(0,7), col="red", lwd=3, type="l", xlab="Expected (-logP)", ylab="Observed (-logP)", xlim=c(0,max(lobs)), ylim=c(0,max(lobs)), las=1, xaxs="i", yaxs="i", bty="l", main = title)
+ 	lexp <- -(log10(expected / (length(expected)+1)))
- &nbsp;&nbsp;&nbsp;&nbsp;points(lexp, lobs, pch=23, cex=.4, bg="black") }
-  jpeg("qqplot_compare.jpeg", height=1000, width=1000)
+ 	plot(c(0,7), c(0,7), col="red", lwd=3, type="l", xlab="Expected (-logP)", ylab="Observed (-logP)", xlim=c(0,max(lobs)), ylim=c(0,max(lobs)), las=1, xaxs="i", yaxs="i", bty="l", main = title)
+ 	points(lexp, lobs, pch=23, cex=.4, bg="black") }
+  jpeg("qqplot_compare.jpeg", height=1000, width=500)
   par(mfrow=c(2,1))
-  aff_unadj&lt;-read.table("unadj.assoc.logistic", header=TRUE)
+  aff_unadj<-read.table("unadj.assoc.logistic", header=TRUE)
-  aff_unadj.add.p&lt;-aff_unadj[aff_unadj$TEST==c("ADD"),]$P
+  aff_unadj.add.p<-aff_unadj[aff_unadj$TEST==c("ADD"),]$P
   broadqq(aff_unadj.add.p,"Some Trait Unadjusted")
-  aff_C1C2&lt;-read.table("C1-C2.assoc.logistic", header=TRUE)
+  aff_C1C2<-read.table("PC1-PC2.assoc.logistic", header=TRUE)
-  aff_C1C2.add.p&lt;-aff_C1C2[aff_C1C2$TEST==c("ADD"),]$P
+  aff_C1C2.add.p<-aff_C1C2[aff_C1C2$TEST==c("ADD"),]$P
-  broadqq(aff_C1C2.add.p, "Some Trait Adjusted")
+  broadqq(aff_C1C2.add.p, "Some Trait Adjusted for PC1 and PC2")
   dev.off()
   gws_unadj = aff_unadj[which(aff_unadj$P < 0.0000001),]
   gws_unadj
   gws_adjusted = aff_C1C2[which(aff_C1C2$P < 0.0000001),]
   gws_adjusted
- q()
-==VAT==
- vtools -h
- vtools init VATDemo
- vtools import *.vcf.gz --var_info DP filter --geno_info DP_geno --build hg18 -j1
- vtools liftover hg19
- head phenotypes.csv
- vtools phenotype --from_file phenotypes.csv --delimiter ","
- vtools show project
- vtools show tables
- vtools show table variant
- vtools show samples
- vtools show genotypes
- vtools show fields
- vtools select variant --count
- vtools show genotypes &gt; GenotypeSummary.txt
- head GenotypeSummary.txt
- vtools output variant "max(DP)" "min(DP)" "avg(DP)" "stdev(DP)" "lower_quartile(DP)" "upper_quartile(DP)" --header
- vtools select variant "filter='PASS'" --count
- vtools select variant "filter='PASS'" -o "max(DP)" "min(DP)" "avg(DP)" "stdev(DP)" "lower_quartile(DP)" "upper_quartile(DP)" --header
- vtools update variant --from_stat 'total=#(GT)' 'num=#(alt)' 'het=#(het)' 'hom=#(hom)' 'other=#(other)' 'minDP=min(DP_geno)' 'maxDP=max(DP_geno)' 'meanDP=avg(DP_geno)' 'maf=maf()'
- vtools show fields
- vtools show table variant
- vtools update variant --from_stat 'totalGD10=#(GT)' 'numGD10=#(alt)' 'hetGD10=#(het)' 'homGD10=#(hom)' 'otherGD10=#(other)' 'mafGD10=maf()' --genotypes "DP_geno &gt; 10"
- vtools show fields
- vtools show table variant
- vtools output variant chr pos maf mafGD10 --header --limit 20
- vtools phenotype --set "RACE=0" --samples "filename like 'YRI%'"
- vtools phenotype --set "RACE=1" --samples "filename like 'CEU%'"
- vtools show samples --limit 10
- vtools update variant --from_stat 'CEU_mafGD10=maf()' --genotypes 'DP_geno&gt;10' --samples "RACE=1"
- vtools update variant --from_stat 'YRI_mafGD10=maf()' --genotypes 'DP_geno&gt;10' --samples "RACE=0"
- vtools output variant chr pos mafGD10 CEU_mafGD10 YRI_mafGD10 --header --limit 10
- vtools phenotype --from_stat 'CEU_totalGD10=#(GT)' 'CEU_numGD10=#(alt)' --genotypes 'DP_geno&gt;10' --samples "RACE=1"
- vtools phenotype --from_stat 'YRI_totalGD10=#(GT)' 'YRI_numGD10=#(alt)' --genotypes 'DP_geno&gt;10' --samples "RACE=0"
- vtools phenotype --output sample_name CEU_totalGD10 CEU_numGD10 YRI_totalGD10 YRI_numGD10 --header
- vtools select variant 'maf&gt;=0.01' -t variant_MAFge01 'Variants that have MAF &gt;= 0.01'
- vtools show tables
- vtools execute KING --var_table variant_MAFge01
- vtools_report plot_pheno_fields KING_MDS1 KING_MDS2 RACE --dot KING.mds.race.pdf --discrete_color Dark2
- vtools_report plot_pheno_fields KING_MDS1 KING_MDS2 panel --dot KING.mds.panel.pdf --discrete_color Dark2
- vtools execute ANNOVAR geneanno
- vtools output variant chr pos ref alt mut_type --limit 20 --header
- vtools_report trans_ratio variant -n num
- vtools_report trans_ratio variant -n numGD10
- vtools select variant "DP&lt;15" -t to_remove
- vtools show tables
- vtools remove variants to_remove -v0
- vtools show tables
- vtools remove genotypes "DP_geno&lt;10" -v0  <br />vtools select variant "mut_type like 'non%' or mut_type like 'stop%' or region_type='splicing'" -t v_funct  <br />vtools show tables  <br />vtools show samples --limit 5  <br />vtools select variant --samples "RACE=1" -t CEU  <br />mkdir -p ceu <br />cd ceu <br />vtools init ceu --parent ../ --variants CEU --samples "RACE=1" --build hg19
- vtools show project
- vtools select variant "CEU_mafGD10&gt;=0.05" -t common_ceu
- vtools select v_funct "CEU_mafGD10&lt;0.01" -t rare_ceu  <br />vtools use refGene  <br />vtools show annotation refGene  <br />vtools associate -h  <br />vtools show tests  <br />vtools show test LinRegBurden <br />vtools associate common_ceu BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db EA_CV &gt; EA_CV.asso.res
- grep -i error *.log
- less EA_CV.asso.res
- sort -g -k7 EA_CV.asso.res | head
- vtools show fields
- vtools associate rare_ceu BMI --covariate SEX -m "LinRegBurden --alternative 2" -g refGene.name2 -j1 --to_db EA_RV &gt; EA_RV.asso.res
- grep -i error *.log | tail -22
- less EA_RV.asso.res
- sort -g -k6 EA_RV.asso.res | head
- vtools associate rare_ceu BMI --covariate SEX -m "VariableThresholdsQt --alternative 2 -p 100000 --adaptive 0.0005" -g refGene.name2 -j1 --to_db EA_RV &gt; EA_RV_VT.asso.res
- grep -i error *.log | tail -22
- less EA_RV_VT.asso.res
- sort -g -k6 EA_RV_VT.asso.res | head
- vtools select rare_ceu "refGene.name2='ABCC1'" -o chr pos ref alt CEU_mafGD10 numGD10 mut_type --header
- vtools_report plot_association qq -o QQRV -b --label_top 2 -f 6 &lt; EA_RV.asso.res
- vtools_report plot_association manhattan -o MHRV -b --label_top 5 --color Dark2 --chrom_prefix None -f 6 &lt; EA_RV.asso.res <br />vtools associate rare_ceu BMI --covariate SEX KING_MDS1 KING_MDS2 -m "LinRegBurden --name RVMDS2 --alternative 2" -g refGene.name2 -j1 --to_db EA_RV &gt; EA_RV_MDS2.asso.res
- vtools_report plot_association qq -o QQRV_MDS2 -b --label_top 2 -f 6 &lt; EA_RV_MDS2.asso.res  <br />cd ..  <br />vtools select variant --samples "RACE=0" -t YRI <br />mkdir -p yri <br />cd yri <br />vtools init yri --parent ../ --variants YRI --samples "RACE=0" --build hg19 <br />vtools select variant "YRI_mafGD10&gt;=0.05" -t common_yri
- vtools select v_funct "YRI_mafGD10&lt;0.01" -t rare_yri  <br />vtools use refGene  <br />vtools associate common_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db YA_CV &gt; YA_CV.asso.res
- vtools associate rare_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -g refGene.name2 -j1 --to_db YA_RV &gt; YA_RV.asso.res
- vtools associate rare_yri BMI --covariate SEX -m "VariableThresholdsQt --alternative 2 -p 100000 --adaptive 0.0005" -g refGene.name2 -j1 --to_db YA_RV &gt; YA_RV_VT.asso.res
- cd ..
- vtools_report meta_analysis ceu/EA_RV_VT.asso.res yri/YA_RV_VT.asso.res --beta 5 --pval 6 --se 7 -n 2 --link 1 &gt; META_RV_VT.asso.res
- cut -f1,3 META_RV_VT.asso.res | head