Difference between revisions of "AdvGeneMap2018Commands"

From Statistical Genetics Courses

Jump to: navigation, search
(Plink - Part 2 - Controlling for Substructure)
Line 5: Line 5:
  
 
===GenABEL===
 
===GenABEL===
 +
 
  # Load files
 
  # Load files
 
  library(GenABEL)
 
  library(GenABEL)
  convert.snp.tped(tped = "gwa_gabel_qtl.tped", tfam = "gwa_gabel_qtl.tfam", out = "gwa_gabel_qtl.raw", strand = "u")
+
  convert.snp.tped(tped &eq; "gwa_gabel_qtl.tped", tfam &eq; "gwa_gabel_qtl.tfam", out &eq; "gwa_gabel_qtl.raw", strand &eq; "u")
  g.dat <- load.gwaa.data(phen = "gwa_gabel_qtl.praw", gen = "gwa_gabel_qtl.raw", force = T)
+
  g.dat <- load.gwaa.data(phen &eq; "gwa_gabel_qtl.praw", gen &eq; "gwa_gabel_qtl.raw", force &eq; T)
 
  slotNames(g.dat)
 
  slotNames(g.dat)
 
  slotNames(g.dat@gtdata)
 
  slotNames(g.dat@gtdata)
Line 19: Line 20:
 
  # Trait
 
  # Trait
 
  summary(g.dat@phdata$disease)
 
  summary(g.dat@phdata$disease)
  hist(g.dat@phdata$disease, main="Quantitative Phenotype data summary", xlab = "Systolic pressure measure", freq = F,breaks=20, col="gray")
+
  hist(g.dat@phdata$disease, main&eq;"Quantitative Phenotype data summary", xlab &eq; "Systolic pressure measure", freq &eq; F,breaks&eq;20, col&eq;"gray")
 
  rug(g.dat@phdata$disease)  
 
  rug(g.dat@phdata$disease)  
 
  ###
 
  ###
Line 25: Line 26:
 
  ###
 
  ###
 
  # GLM test
 
  # GLM test
  test.snp <- scan.glm('disease ~ CRSNP', family = gaussian(), data = g.dat)
+
  test.snp <- scan.glm('disease ~ CRSNP', family &eq; gaussian(), data &eq; g.dat)
 
  names(test.snp)
 
  names(test.snp)
 
  alpha <- 5e-8   
 
  alpha <- 5e-8   
Line 31: Line 32:
 
  test.snp$P1df[test.snp$P1df < alpha]
 
  test.snp$P1df[test.snp$P1df < alpha]
 
  # Score test
 
  # Score test
  test.qt &lt;- qtscore(disease, data = g.dat, trait = "gaussian")
+
  test.qt &lt;- qtscore(disease, data &eq; g.dat, trait &eq; "gaussian")
 
  slotNames(test.qt)
 
  slotNames(test.qt)
 
  names(test.qt@results)
 
  names(test.qt@results)
Line 42: Line 43:
 
  obs &lt;- sort(results(test.qt)$P1df)  
 
  obs &lt;- sort(results(test.qt)$P1df)  
 
  ept &lt;- c(1:length(obs)) / (length(obs) + 1)  
 
  ept &lt;- c(1:length(obs)) / (length(obs) + 1)  
  plot(-log10(ept), -log10(obs), main = "GWAS QQ plot, qtl", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")
+
  plot(-log10(ept), -log10(obs), main &eq; "GWAS QQ plot, qtl", xlab&eq;"Expected -log10(pvalue)", ylab&eq;"Observed -log10(pvalue)")
  abline(0, 1, col = "red")
+
  abline(0, 1, col &eq; "red")
  abline(h = 8, lty = 2)
+
  abline(h &eq; 8, lty &eq; 2)
 
  # Manhattan plot         
 
  # Manhattan plot         
  plot(test.qt, col = "black")
+
  plot(test.qt, col &eq; "black")
 
  # Adding confounders
 
  # Adding confounders
  test.qt.sex &lt;- qtscore(disease ~ sex, data = g.dat, trait = "gaussian")
+
  test.qt.sex &lt;- qtscore(disease ~ sex, data &eq; g.dat, trait &eq; "gaussian")
 
  rownames(results(test.qt.sex))[results(test.qt)$P1df < alpha]
 
  rownames(results(test.qt.sex))[results(test.qt)$P1df < alpha]
  summary(lm(disease ~ sex, data = g.dat))
+
  summary(lm(disease ~ sex, data &eq; g.dat))
 
  ###
 
  ###
 
  # MDS
 
  # MDS
 
  ###
 
  ###
  gkin &lt;- ibs(g.dat, weight = "freq")
+
  gkin &lt;- ibs(g.dat, weight &eq; "freq")
 
  gkin[1:10,1:10]
 
  gkin[1:10,1:10]
  cps.full &lt;- cmdscale(as.dist(.5 - gkin), eig = T, k = 10)
+
  cps.full &lt;- cmdscale(as.dist(.5 - gkin), eig &eq; T, k &eq; 10)
 
  names(cps.full)  
 
  names(cps.full)  
 
  cps &lt;- cps.full$points  
 
  cps &lt;- cps.full$points  
  plot(cps[,1], cps[,2], pch = g.dat@phdata$popn)
+
  plot(cps[,1], cps[,2], pch &eq; g.dat@phdata$popn)
  legend(-0.16, 0.06, c("TSI","MEX", "CEU"), pch = c(1,2,3))   
+
  legend(-0.16, 0.06, c("TSI","MEX", "CEU"), pch &eq; c(1,2,3))   
 
  ###
 
  ###
 
  # Corrected test
 
  # Corrected test
Line 68: Line 69:
 
  gpc.dat &lt;- g.dat
 
  gpc.dat &lt;- g.dat
 
  gpc.dat@phdata&lt;-cbind(g.dat@phdata, cps)
 
  gpc.dat@phdata&lt;-cbind(g.dat@phdata, cps)
  test.pc.a &lt;- scan.glm('disease ~ CRSNP + C1 + C2 + C3 + C4 + C5', family=gaussian(), data = gpc.dat)  
+
  test.pc.a &lt;- scan.glm('disease ~ CRSNP + C1 + C2 + C3 + C4 + C5', family&eq;gaussian(), data &eq; gpc.dat)  
 
  test.pc.a$snpnames[test.pc.a$P1df < alpha]
 
  test.pc.a$snpnames[test.pc.a$P1df < alpha]
 
  test.pc.a$P1df[test.pc.a$P1df < alpha]
 
  test.pc.a$P1df[test.pc.a$P1df < alpha]
  test.pc.b &lt;- qtscore(disease ~  C1 + C2 + C3 + C4 + C5, data = gpc.dat, trait = "gaussian")
+
  test.pc.b &lt;- qtscore(disease ~  C1 + C2 + C3 + C4 + C5, data &eq; gpc.dat, trait &eq; "gaussian")
 
  test.pc.b@lambda
 
  test.pc.b@lambda
 
  # scree plot
 
  # scree plot
  plot(cps.full$eig[1:10]/sum(cps.full$eig), axes = F, type = "b", xlab = "Components",  ylim = c(0,0.05), ylab = "Proportion of Variations", main = "MDS analysis scree plot")  
+
  plot(cps.full$eig[1:10]/sum(cps.full$eig), axes &eq; F, type &eq; "b", xlab &eq; "Components",  ylim &eq; c(0,0.05), ylab &eq; "Proportion of Variations", main &eq; "MDS analysis scree plot")  
 
  axis(1, 1:10)
 
  axis(1, 1:10)
 
  axis(2)
 
  axis(2)
 
  # cumulative plot
 
  # cumulative plot
  plot(cumsum(cps.full$eig[1:10])/sum(cps.full$eig), axes = F, type = "b", ylim = c(0,0.2), xlab = "Components", ylab = "Proportion of Variations", main = "MDS analysis cumulative plot")  
+
  plot(cumsum(cps.full$eig[1:10])/sum(cps.full$eig), axes &eq; F, type &eq; "b", ylim &eq; c(0,0.2), xlab &eq; "Components", ylab &eq; "Proportion of Variations", main &eq; "MDS analysis cumulative plot")  
 
  axis(1, 1:10)
 
  axis(1, 1:10)
 
  axis(2)
 
  axis(2)
Line 89: Line 90:
 
  # Check for inflation of statistic  
 
  # Check for inflation of statistic  
 
  obs &lt;- sort(results(test.qt)$chi2.1df)
 
  obs &lt;- sort(results(test.qt)$chi2.1df)
  ept &lt;- sort(qchisq(1:length(obs) / (length(obs) + 1), df = 1))  
+
  ept &lt;- sort(qchisq(1:length(obs) / (length(obs) + 1), df &eq; 1))  
  plot(ept, obs, main = "Genomic control (slope is the inflation factor)", xlab="Expected chisq, 1df", ylab="Observed chisq, 1df")
+
  plot(ept, obs, main &eq; "Genomic control (slope is the inflation factor)", xlab&eq;"Expected chisq, 1df", ylab&eq;"Observed chisq, 1df")
  abline(0, 1, col = "red")
+
  abline(0, 1, col &eq; "red")
  abline(0, test.qt@lambda[1], lty = 2)
+
  abline(0, test.qt@lambda[1], lty &eq; 2)
 
  # Definition of GIF
 
  # Definition of GIF
 
  # Conventional definition
 
  # Conventional definition
Line 101: Line 102:
 
  obs &lt;- sort(results(test.qt)$Pc1df)
 
  obs &lt;- sort(results(test.qt)$Pc1df)
 
  ept &lt;- c(1:length(obs)) / (length(obs) + 1)
 
  ept &lt;- c(1:length(obs)) / (length(obs) + 1)
  plot(-log10(ept), -log10(obs), main = "GWAS QQ plot adj. via Genomic Control", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")
+
  plot(-log10(ept), -log10(obs), main &eq; "GWAS QQ plot adj. via Genomic Control", xlab&eq;"Expected -log10(pvalue)", ylab&eq;"Observed -log10(pvalue)")
  abline(0, 1, col = "red")
+
  abline(0, 1, col &eq; "red")
  abline(h = 8, lty = 2)
+
  abline(h &eq; 8, lty &eq; 2)
 
  # EIGENSTRAT
 
  # EIGENSTRAT
  adj.gkin = gkin
+
  adj.gkin &eq; gkin
  diag(adj.gkin) = hom(g.dat)$Var
+
  diag(adj.gkin) &eq; hom(g.dat)$Var
  # naxes = 3 is default value
+
  # naxes &eq; 3 is default value
  test.eg &lt;- egscore(disease, data = g.dat, kin = adj.gkin, naxes = 2)
+
  test.eg &lt;- egscore(disease, data &eq; g.dat, kin &eq; adj.gkin, naxes &eq; 2)
 
  descriptives.scan(test.eg)
 
  descriptives.scan(test.eg)
 
  snp.eg &lt;- row.names(results(test.eg))[results(test.eg)$P1df < alpha]
 
  snp.eg &lt;- row.names(results(test.eg))[results(test.eg)$P1df < alpha]
Line 118: Line 119:
 
  # Change #PCs
 
  # Change #PCs
 
  for (k in 1:10){  
 
  for (k in 1:10){  
  test.tmp &lt;- egscore(disease, data = g.dat, kin = adj.gkin, naxes = k)
+
  test.tmp &lt;- egscore(disease, data &eq; g.dat, kin &eq; adj.gkin, naxes &eq; k)
 
  print(test.tmp@lambda$estimate)
 
  print(test.tmp@lambda$estimate)
 
  }
 
  }
Line 124: Line 125:
 
  obs &lt;- sort(results(test.eg)$Pc1df)
 
  obs &lt;- sort(results(test.eg)$Pc1df)
 
  ept &lt;- c(1:length(obs)) / (length(obs) + 1)  
 
  ept &lt;- c(1:length(obs)) / (length(obs) + 1)  
  qqplot(-log10(ept), -log10(obs), main = "GWAS QQ plot adj. w/ EIGENSTRAT", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")
+
  qqplot(-log10(ept), -log10(obs), main &eq; "GWAS QQ plot adj. w/ EIGENSTRAT", xlab&eq;"Expected -log10(pvalue)", ylab&eq;"Observed -log10(pvalue)")
  abline(0, 1, col = "red")
+
  abline(0, 1, col &eq; "red")
  abline(h = 8, lty = 2)
+
  abline(h &eq; 8, lty &eq; 2)
 
  # Manhattan plot comparison
 
  # Manhattan plot comparison
  plot(test.qt, col = "black")
+
  plot(test.qt, col &eq; "black")
  add.plot(test.eg, col = "gray", pch = 3)
+
  add.plot(test.eg, col &eq; "gray", pch &eq; 3)
  legend("topright", c("Original plot","After correction w/ EIGENSTRAT"), pch = c(1,3))   
+
  legend("topright", c("Original plot","After correction w/ EIGENSTRAT"), pch &eq; c(1,3))   
 
  ###
 
  ###
 
  # Basic test, binary trait
 
  # Basic test, binary trait
 
  ###
 
  ###
 
  # load files to GenABEL
 
  # load files to GenABEL
  convert.snp.tped(tped = "gwa_gabel.tped", tfam = "gwa_gabel.tfam", out = "gwa_gabel.raw", strand = "u")
+
  convert.snp.tped(tped &eq; "gwa_gabel.tped", tfam &eq; "gwa_gabel.tfam", out &eq; "gwa_gabel.raw", strand &eq; "u")
  b.dat &lt;- load.gwaa.data(phen = "gwa_gabel.praw", gen = "gwa_gabel.raw", force = T)
+
  b.dat &lt;- load.gwaa.data(phen &eq; "gwa_gabel.praw", gen &eq; "gwa_gabel.raw", force &eq; T)
 
  slotNames(b.dat)
 
  slotNames(b.dat)
 
  slotNames(b.dat@gtdata)
 
  slotNames(b.dat@gtdata)
Line 143: Line 144:
 
  b.dat@gtdata@nids
 
  b.dat@gtdata@nids
 
  # number of cases and controls
 
  # number of cases and controls
  case.size &lt;- length(which(b.dat@phdata$disease == 1))
+
  case.size &lt;- length(which(b.dat@phdata$disease &eq;&eq; 1))
  control.size &lt;- length(which(b.dat@phdata$disease == 0))
+
  control.size &lt;- length(which(b.dat@phdata$disease &eq;&eq; 0))
 
  case.size  
 
  case.size  
 
  control.size  
 
  control.size  
Line 150: Line 151:
 
  snpsb.total &lt;- b.dat@gtdata@nsnps
 
  snpsb.total &lt;- b.dat@gtdata@nsnps
 
  # GLM test
 
  # GLM test
  testb.snp &lt;- scan.glm('disease ~ CRSNP', family = binomial(), data = b.dat)
+
  testb.snp &lt;- scan.glm('disease ~ CRSNP', family &eq; binomial(), data &eq; b.dat)
 
  names(testb.snp)   
 
  names(testb.snp)   
 
  alpha &lt;- 5e-8
 
  alpha &lt;- 5e-8
Line 156: Line 157:
 
  testb.snp$P1df[testb.snp$P1df < alpha]
 
  testb.snp$P1df[testb.snp$P1df < alpha]
 
  # Score test
 
  # Score test
  testb.qt &lt;- qtscore(disease, data = b.dat, trait = "binomial")
+
  testb.qt &lt;- qtscore(disease, data &eq; b.dat, trait &eq; "binomial")
 
  slotNames(testb.qt)
 
  slotNames(testb.qt)
 
  descriptives.scan(testb.qt)
 
  descriptives.scan(testb.qt)
Line 162: Line 163:
 
  results(testb.qt)$P1df[results(testb.qt)$P1df < alpha]  
 
  results(testb.qt)$P1df[results(testb.qt)$P1df < alpha]  
 
  results(testb.qt)$Pc1df[results(testb.qt)$Pc1df < alpha]
 
  results(testb.qt)$Pc1df[results(testb.qt)$Pc1df < alpha]
 
  
 
===Plink - Part 1 - Data QC===
 
===Plink - Part 1 - Data QC===
Line 177: Line 177:
 
  #### in R - open R by simply typing R
 
  #### in R - open R by simply typing R
 
  setwd("to_your_working_directory/")
 
  setwd("to_your_working_directory/")
  sexcheck = read.table("GWAS_sex_checking.sexcheck", header=T)
+
  sexcheck &eq; read.table("GWAS_sex_checking.sexcheck", header&eq;T)
 
  names(sexcheck)
 
  names(sexcheck)
  sex_problem = sexcheck[which(sexcheck$STATUS=="PROBLEM"),]
+
  sex_problem &eq; sexcheck[which(sexcheck$STATUS&eq;&eq;"PROBLEM"),]
 
  sex_problem
 
  sex_problem
 
  q()
 
  q()
Line 186: Line 186:
 
  #### in R
 
  #### in R
 
  setwd("to_your_working_directory/")
 
  setwd("to_your_working_directory/")
  dups = read.table("duplicates.genome", header = T)
+
  dups &eq; read.table("duplicates.genome", header &eq; T)
  problem_pairs = dups[which(dups$PI_HAT > 0.4),]
+
  problem_pairs &eq; dups[which(dups$PI_HAT > 0.4),]
 
  problem_pairs
 
  problem_pairs
  problem_pairs = dups[which(dups$PI_HAT > 0.05),]
+
  problem_pairs &eq; dups[which(dups$PI_HAT > 0.05),]
  myvars = c("FID1", "IID1", "FID2", "IID2", "PI_HAT")
+
  myvars &eq; c("FID1", "IID1", "FID2", "IID2", "PI_HAT")
 
  problem_pairs[myvars]
 
  problem_pairs[myvars]
 
  q()
 
  q()
Line 197: Line 197:
 
  plink --file GWAS_clean3 --het
 
  plink --file GWAS_clean3 --het
 
  ###### in R
 
  ###### in R
  Dataset &lt;- read.table("plink.het", header=TRUE, sep="", na.strings="NA", dec=".", strip.white=TRUE)
+
  Dataset &lt;- read.table("plink.het", header&eq;TRUE, sep&eq;"", na.strings&eq;"NA", dec&eq;".", strip.white&eq;TRUE)
 
  mean(Dataset$F)
 
  mean(Dataset$F)
 
  sd(Dataset$F)
 
  sd(Dataset$F)
  jpeg("hist.jpeg", height=1000, width=1000)
+
  jpeg("hist.jpeg", height&eq;1000, width&eq;1000)
  hist(scale(Dataset$F), xlim=c(-4,4))
+
  hist(scale(Dataset$F), xlim&eq;c(-4,4))
 
  dev.off()
 
  dev.off()
 
  q()
 
  q()
Line 207: Line 207:
 
  plink --file GWAS_clean3 --pheno pheno.txt --pheno-name Aff --hardy  
 
  plink --file GWAS_clean3 --pheno pheno.txt --pheno-name Aff --hardy  
 
  ##### in R
 
  ##### in R
  hardy = read.table("plink.hwe", header = T)
+
  hardy &eq; read.table("plink.hwe", header &eq; T)
 
  names(hardy)
 
  names(hardy)
  hwe_prob = hardy[which(hardy$P < 0.0000009),]
+
  hwe_prob &eq; hardy[which(hardy$P < 0.0000009),]
 
  hwe_prob
 
  hwe_prob
 
  q()
 
  q()
Line 216: Line 216:
  
  
===Plink - Part 2 - Controlling for Substructure===
+
 
 +
 
 +
 
 +
===Plink - Part 2 - Controlling for Substructure===
 +
 
  plink --file GWAS_clean4 --genome --cluster --mds-plot 10
 
  plink --file GWAS_clean4 --genome --cluster --mds-plot 10
 
  #### in R
 
  #### in R
  mydata = read.table("mds_components.txt", header=T)
+
  mydata &eq; read.table("mds_components.txt", header&eq;T)
  mydata$pch&#91;mydata$Group==1 &#93; &lt;-15
+
  mydata$pch[mydata$Group&eq;&eq;1 ] &lt;-15
  mydata$pch&#91;mydata$Group==2 &#93; &lt;-16
+
  mydata$pch[mydata$Group&eq;&eq;2 ] &lt;-16
  mydata$pch&#91;mydata$Group==3 &#93; &lt;-2
+
  mydata$pch[mydata$Group&eq;&eq;3 ] &lt;-2
  jpeg("mds.jpeg", height=500, width=500)
+
  jpeg("mds.jpeg", height&eq;500, width&eq;500)
  plot(mydata$C1, mydata$C2 ,pch=mydata$pch)
+
  plot(mydata$C1, mydata$C2 ,pch&eq;mydata$pch)
 
  dev.off()
 
  dev.off()
 
  q()
 
  q()
Line 235: Line 239:
 
  broadqq &lt;-function(pvals, title)
 
  broadqq &lt;-function(pvals, title)
 
  {
 
  {
observed &lt;- sort(pvals)
+
observed &lt;- sort(pvals)
lobs &lt;- -(log10(observed))
+
lobs &lt;- -(log10(observed))
expected &lt;- c(1:length(observed))  
+
expected &lt;- c(1:length(observed))
lexp &lt;- -(log10(expected / (length(expected)+1)))
+
lexp &lt;- -(log10(expected / (length(expected)+1)))
plot(c(0,7), c(0,7), col="red", lwd=3, type="l", xlab="Expected (-logP)", ylab="Observed (-logP)", xlim=c(0,max(lobs)), ylim=c(0,max(lobs)), las=1, xaxs="i", yaxs="i", bty="l", main = title)
+
plot(c(0,7), c(0,7), col&eq;"red", lwd&eq;3, type&eq;"l", xlab&eq;"Expected (-logP)", ylab&eq;"Observed (-logP)", xlim&eq;c(0,max(lobs)), ylim&eq;c(0,max(lobs)), las&eq;1, xaxs&eq;"i", yaxs&eq;"i", bty&eq;"l", main &eq; title)
points(lexp, lobs, pch=23, cex=.4, bg="black") }
+
points(lexp, lobs, pch&eq;23, cex&eq;.4, bg&eq;"black") }
  jpeg("qqplot_compare.jpeg", height=1000, width=500)
+
 
  par(mfrow=c(2,1))
+
  jpeg("qqplot_compare.jpeg", height&eq;1000, width&eq;500)
  aff_unadj&lt;-read.table("unadj.assoc.logistic", header=TRUE)
+
  par(mfrow&eq;c(2,1))
  aff_unadj.add.p&lt;-aff_unadj&#91;aff_unadj$TEST==c("ADD"),&#93;$P
+
  aff_unadj&lt;-read.table("unadj.assoc.logistic", header&eq;TRUE)
 +
  aff_unadj.add.p&lt;-aff_unadj[aff_unadj$TEST&eq;&eq;c("ADD"),]$P
 
  broadqq(aff_unadj.add.p,"Some Trait Unadjusted")
 
  broadqq(aff_unadj.add.p,"Some Trait Unadjusted")
  aff_C1C2&lt;-read.table("PC1-PC2.assoc.logistic", header=TRUE)
+
  aff_C1C2&lt;-read.table("PC1-PC2.assoc.logistic", header&eq;TRUE)
  aff_C1C2.add.p&lt;-aff_C1C2&#91;aff_C1C2$TEST==c("ADD"),&#93;$P
+
  aff_C1C2.add.p&lt;-aff_C1C2[aff_C1C2$TEST&eq;&eq;c("ADD"),]$P
 
  broadqq(aff_C1C2.add.p, "Some Trait Adjusted for PC1 and PC2")
 
  broadqq(aff_C1C2.add.p, "Some Trait Adjusted for PC1 and PC2")
 
  dev.off()
 
  dev.off()
  gws_unadj = aff_unadj&#91;which(aff_unadj$P &lt; 0.0000001),&#93;
+
  gws_unadj &eq; aff_unadj[which(aff_unadj$P &lt; 0.0000001),]
 
  gws_unadj
 
  gws_unadj
  gws_adjusted = aff_C1C2&#91;which(aff_C1C2$P &lt; 0.0000001),&#93;
+
  gws_adjusted &eq; aff_C1C2[which(aff_C1C2$P &lt; 0.0000001),]
 
  gws_adjusted
 
  gws_adjusted
 +
 +
  
  
 
===VAT===
 
===VAT===
 +
 
  vtools -h
 
  vtools -h
 
  vtools init VATDemo
 
  vtools init VATDemo
Line 273: Line 281:
 
  head GenotypeSummary.txt
 
  head GenotypeSummary.txt
 
  vtools output variant "max(DP)" "min(DP)" "avg(DP)" "stdev(DP)" "lower_quartile(DP)" "upper_quartile(DP)" --header
 
  vtools output variant "max(DP)" "min(DP)" "avg(DP)" "stdev(DP)" "lower_quartile(DP)" "upper_quartile(DP)" --header
  vtools select variant "filter=’PASS’" --count
+
  vtools select variant "filter&eq;’PASS’" --count
  vtools select variant "filter=’PASS’" -o "max(DP)" "min(DP)" "avg(DP)" "stdev(DP)" "lower_quartile(DP)" "upper_quartile(DP)" --header
+
  vtools select variant "filter&eq;’PASS’" -o "max(DP)" "min(DP)" "avg(DP)" "stdev(DP)" "lower_quartile(DP)" "upper_quartile(DP)" --header
  vtools update variant --from_stat ’total=#(GT)’ ’num=#(alt)’ ’het=#(het)’ ’hom=#(hom)’ ’other=#(other)’ ’minDP=min(DP_geno)’ ’maxDP=max(DP_geno)’ ’meanDP=avg(DP_geno)’ ’maf=maf()’
+
  vtools update variant --from_stat ’total&eq;#(GT)’ ’num&eq;#(alt)’ ’het&eq;#(het)’ ’hom&eq;#(hom)’ ’other&eq;#(other)’ ’minDP&eq;min(DP_geno)’ ’maxDP&eq;max(DP_geno)’ ’meanDP&eq;avg(DP_geno)’ ’maf&eq;maf()’
 
  vtools show fields
 
  vtools show fields
 
  vtools show table variant
 
  vtools show table variant
  vtools update variant --from_stat ’totalGD10=#(GT)’ ’numGD10=#(alt)’ ’hetGD10=#(het)’ ’homGD10=#(hom)’ ’otherGD10=#(other)’ ’mafGD10=maf()’ --genotypes "DP_geno &gt; 10"
+
  vtools update variant --from_stat ’totalGD10&eq;#(GT)’ ’numGD10&eq;#(alt)’ ’hetGD10&eq;#(het)’ ’homGD10&eq;#(hom)’ ’otherGD10&eq;#(other)’ ’mafGD10&eq;maf()’ --genotypes "DP_geno &gt; 10"
 
  vtools show fields
 
  vtools show fields
 
  vtools show table variant
 
  vtools show table variant
 
  vtools output variant chr pos maf mafGD10 --header --limit 20
 
  vtools output variant chr pos maf mafGD10 --header --limit 20
  vtools phenotype --set "RACE=0" --samples "filename like ’YRI%’"
+
  vtools phenotype --set "RACE&eq;0" --samples "filename like ’YRI%’"
  vtools phenotype --set "RACE=1" --samples "filename like ’CEU%’"
+
  vtools phenotype --set "RACE&eq;1" --samples "filename like ’CEU%’"
 
  vtools show samples --limit 10
 
  vtools show samples --limit 10
  vtools update variant --from_stat ’CEU_mafGD10=maf()’ --genotypes ’DP_geno&gt;10’ --samples "RACE=1"
+
  vtools update variant --from_stat ’CEU_mafGD10&eq;maf()’ --genotypes ’DP_geno&gt;10’ --samples "RACE&eq;1"
  vtools update variant --from_stat ’YRI_mafGD10=maf()’ --genotypes ’DP_geno&gt;10’ --samples "RACE=0"
+
  vtools update variant --from_stat ’YRI_mafGD10&eq;maf()’ --genotypes ’DP_geno&gt;10’ --samples "RACE&eq;0"
 
  vtools output variant chr pos mafGD10 CEU_mafGD10 YRI_mafGD10 --header --limit 10
 
  vtools output variant chr pos mafGD10 CEU_mafGD10 YRI_mafGD10 --header --limit 10
  vtools phenotype --from_stat ’CEU_totalGD10=#(GT)’ ’CEU_numGD10=#(alt)’ --genotypes ’DP_geno&gt;10’ --samples "RACE=1"
+
  vtools phenotype --from_stat ’CEU_totalGD10&eq;#(GT)’ ’CEU_numGD10&eq;#(alt)’ --genotypes ’DP_geno&gt;10’ --samples "RACE&eq;1"
  vtools phenotype --from_stat ’YRI_totalGD10=#(GT)’ ’YRI_numGD10=#(alt)’ --genotypes ’DP_geno&gt;10’ --samples "RACE=0"
+
  vtools phenotype --from_stat ’YRI_totalGD10&eq;#(GT)’ ’YRI_numGD10&eq;#(alt)’ --genotypes ’DP_geno&gt;10’ --samples "RACE&eq;0"
 
  vtools phenotype --output sample_nameCEU_totalGD10CEU_numGD10YRI_totalGD10YRI_numGD10 --header
 
  vtools phenotype --output sample_nameCEU_totalGD10CEU_numGD10YRI_totalGD10YRI_numGD10 --header
 
  vtools execute ANNOVAR geneanno
 
  vtools execute ANNOVAR geneanno
Line 300: Line 308:
 
  vtools show tables
 
  vtools show tables
 
  vtools remove genotypes "DP_geno&lt;10" -v0
 
  vtools remove genotypes "DP_geno&lt;10" -v0
  vtools select variant "mut_type like ’non%’ or mut_type like ’stop%’ or region_type=’splicing’" -t v_funct  
+
  vtools select variant "mut_type like ’non%’ or mut_type like ’stop%’ or region_type&eq;’splicing’" -t v_funct  
 
  vtools show tables
 
  vtools show tables
 
  vtools show samples --limit 5
 
  vtools show samples --limit 5
  vtools select variant --samples "RACE=1" -t CEU
+
  vtools select variant --samples "RACE&eq;1" -t CEU
 
  mkdir -p ceu
 
  mkdir -p ceu
 
  cd ceu
 
  cd ceu
  vtools init ceu --parent ../ --variants CEU --samples "RACE=1" --build hg19 vtools show project
+
  vtools init ceu --parent ../ --variants CEU --samples "RACE&eq;1" --build hg19 vtools show project
  vtools select variant "CEU_mafGD10&gt;=0.05" -t common_ceu
+
  vtools select variant "CEU_mafGD10&gt;&eq;0.05" -t common_ceu
 
  vtools select v_funct "CEU_mafGD10&lt;0.01" -t rare_ceu
 
  vtools select v_funct "CEU_mafGD10&lt;0.01" -t rare_ceu
 
  vtools use refGene
 
  vtools use refGene
Line 327: Line 335:
 
  less EA_RV_VT.asso.res
 
  less EA_RV_VT.asso.res
 
  sort -g -k6 EA_RV_VT.asso.res | head
 
  sort -g -k6 EA_RV_VT.asso.res | head
  vtools select rare_ceu "refGene.name2=’ABCC1’" -o chr pos ref alt CEU_mafGD10 numGD10 mut_type --header
+
  vtools select rare_ceu "refGene.name2&eq;’ABCC1’" -o chr pos ref alt CEU_mafGD10 numGD10 mut_type --header
 
  cd ..
 
  cd ..
  vtools select variant --samples "RACE=0" -t YRI
+
  vtools select variant --samples "RACE&eq;0" -t YRI
 
  mkdir -p yri
 
  mkdir -p yri
 
  cd yri
 
  cd yri
  vtools init yri --parent ../ --variants YRI --samples "RACE=0" --build hg19 vtools select variant "YRI_mafGD10&gt;=0.05" -t common_yri vtools select v_funct "YRI_mafGD10&lt;0.01" -t rare_yri
+
  vtools init yri --parent ../ --variants YRI --samples "RACE&eq;0" --build hg19 vtools select variant "YRI_mafGD10&gt;&eq;0.05" -t common_yri vtools select v_funct "YRI_mafGD10&lt;0.01" -t rare_yri
 
  vtools use refGene
 
  vtools use refGene
 
  vtools associate common_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db YA_CV &gt; YA_CV.asso.res
 
  vtools associate common_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db YA_CV &gt; YA_CV.asso.res

Revision as of 15:20, 23 January 2018



GenABEL

# Load files
library(GenABEL)
convert.snp.tped(tped &eq; "gwa_gabel_qtl.tped", tfam &eq; "gwa_gabel_qtl.tfam", out &eq; "gwa_gabel_qtl.raw", strand &eq; "u")
g.dat <- load.gwaa.data(phen &eq; "gwa_gabel_qtl.praw", gen &eq; "gwa_gabel_qtl.raw", force &eq; T)
slotNames(g.dat)
slotNames(g.dat@gtdata)
colnames(g.dat@phdata)
# sample size
sample.size <- g.dat@gtdata@nids
# number of SNPs
snps.total <- g.dat@gtdata@nsnps
print(c(sample.size, snps.total)) 
# Trait
summary(g.dat@phdata$disease)
hist(g.dat@phdata$disease, main&eq;"Quantitative Phenotype data summary", xlab &eq; "Systolic pressure measure", freq &eq; F,breaks&eq;20, col&eq;"gray")
rug(g.dat@phdata$disease) 
###
# tests for association
###
# GLM test
test.snp <- scan.glm('disease ~ CRSNP', family &eq; gaussian(), data &eq; g.dat)
names(test.snp)
alpha <- 5e-8  
test.snp$snpnames[test.snp$P1df < alpha]
test.snp$P1df[test.snp$P1df < alpha]
# Score test
test.qt <- qtscore(disease, data &eq; g.dat, trait &eq; "gaussian")
slotNames(test.qt)
names(test.qt@results)
test.qt@lambda
descriptives.scan(test.qt)
rownames(results(test.qt))[results(test.qt)$P1df < alpha]
results(test.qt)$P1df[results(test.qt)$P1df < alpha] 
results(test.qt)$Pc1df[results(test.qt)$Pc1df < alpha]
# QQ plot
obs <- sort(results(test.qt)$P1df) 
ept <- c(1:length(obs)) / (length(obs) + 1) 
plot(-log10(ept), -log10(obs), main &eq; "GWAS QQ plot, qtl", xlab&eq;"Expected -log10(pvalue)", ylab&eq;"Observed -log10(pvalue)")
abline(0, 1, col &eq; "red")
abline(h &eq; 8, lty &eq; 2)
# Manhattan plot        
plot(test.qt, col &eq; "black")
# Adding confounders
test.qt.sex <- qtscore(disease ~ sex, data &eq; g.dat, trait &eq; "gaussian")
rownames(results(test.qt.sex))[results(test.qt)$P1df < alpha]
summary(lm(disease ~ sex, data &eq; g.dat))
###
# MDS
###
gkin <- ibs(g.dat, weight &eq; "freq")
gkin[1:10,1:10]
cps.full <- cmdscale(as.dist(.5 - gkin), eig &eq; T, k &eq; 10)
names(cps.full) 
cps <- cps.full$points 
plot(cps[,1], cps[,2], pch &eq; g.dat@phdata$popn)
legend(-0.16, 0.06, c("TSI","MEX", "CEU"), pch &eq; c(1,2,3))  
###
# Corrected test
###
# Incorporating PCs as predictors
colnames(cps)<-c('C1','C2','C3','C4','C5','C6','C7','C8','C9','C10') 
gpc.dat <- g.dat
gpc.dat@phdata<-cbind(g.dat@phdata, cps)
test.pc.a <- scan.glm('disease ~ CRSNP + C1 + C2 + C3 + C4 + C5', family&eq;gaussian(), data &eq; gpc.dat) 
test.pc.a$snpnames[test.pc.a$P1df < alpha]
test.pc.a$P1df[test.pc.a$P1df < alpha]
test.pc.b <- qtscore(disease ~  C1 + C2 + C3 + C4 + C5, data &eq; gpc.dat, trait &eq; "gaussian")
test.pc.b@lambda
# scree plot
plot(cps.full$eig[1:10]/sum(cps.full$eig), axes &eq; F, type &eq; "b", xlab &eq; "Components",  ylim &eq; c(0,0.05), ylab &eq; "Proportion of Variations", main &eq; "MDS analysis scree plot") 
axis(1, 1:10)
axis(2)
# cumulative plot
plot(cumsum(cps.full$eig[1:10])/sum(cps.full$eig), axes &eq; F, type &eq; "b", ylim &eq; c(0,0.2), xlab &eq; "Components", ylab &eq; "Proportion of Variations", main &eq; "MDS analysis cumulative plot") 
axis(1, 1:10)
axis(2)
# Genomic control
# Uncorrected GIF
test.qt@lambda 
# Corrected p-value
row.names(results(test.qt))[results(test.qt)$Pc1df < alpha]
results(test.qt)$Pc1df[results(test.qt)$Pc1df < alpha]
# Check for inflation of statistic 
obs <- sort(results(test.qt)$chi2.1df)
ept <- sort(qchisq(1:length(obs) / (length(obs) + 1), df &eq; 1)) 
plot(ept, obs, main &eq; "Genomic control (slope is the inflation factor)", xlab&eq;"Expected chisq, 1df", ylab&eq;"Observed chisq, 1df")
abline(0, 1, col &eq; "red")
abline(0, test.qt@lambda[1], lty &eq; 2)
# Definition of GIF
# Conventional definition
median(results(test.qt)$chi2.1df)/0.456
# GenABEL definition
lm(obs~ept)$coef[2]
# QQ plot
obs <- sort(results(test.qt)$Pc1df)
ept <- c(1:length(obs)) / (length(obs) + 1)
plot(-log10(ept), -log10(obs), main &eq; "GWAS QQ plot adj. via Genomic Control", xlab&eq;"Expected -log10(pvalue)", ylab&eq;"Observed -log10(pvalue)")
abline(0, 1, col &eq; "red")
abline(h &eq; 8, lty &eq; 2)
# EIGENSTRAT
adj.gkin &eq; gkin
diag(adj.gkin) &eq; hom(g.dat)$Var
# naxes &eq; 3 is default value
test.eg <- egscore(disease, data &eq; g.dat, kin &eq; adj.gkin, naxes &eq; 2)
descriptives.scan(test.eg)
snp.eg <- row.names(results(test.eg))[results(test.eg)$P1df < alpha]
pvalue.eg <- results(test.eg)$P1df[results(test.eg)$P1df < alpha]
lambda.eg <- test.eg@lambda
snp.eg 
pvalue.eg
lambda.eg
# Change #PCs
for (k in 1:10){ 
test.tmp <- egscore(disease, data &eq; g.dat, kin &eq; adj.gkin, naxes &eq; k)
print(test.tmp@lambda$estimate)
}
# QQ plot
obs <- sort(results(test.eg)$Pc1df)
ept <- c(1:length(obs)) / (length(obs) + 1) 
qqplot(-log10(ept), -log10(obs), main &eq; "GWAS QQ plot adj. w/ EIGENSTRAT", xlab&eq;"Expected -log10(pvalue)", ylab&eq;"Observed -log10(pvalue)")
abline(0, 1, col &eq; "red")
abline(h &eq; 8, lty &eq; 2)
# Manhattan plot comparison
plot(test.qt, col &eq; "black")
add.plot(test.eg, col &eq; "gray", pch &eq; 3)
legend("topright", c("Original plot","After correction w/ EIGENSTRAT"), pch &eq; c(1,3))  
###
# Basic test, binary trait
###
# load files to GenABEL
convert.snp.tped(tped &eq; "gwa_gabel.tped", tfam &eq; "gwa_gabel.tfam", out &eq; "gwa_gabel.raw", strand &eq; "u")
b.dat <- load.gwaa.data(phen &eq; "gwa_gabel.praw", gen &eq; "gwa_gabel.raw", force &eq; T)
slotNames(b.dat)
slotNames(b.dat@gtdata)
colnames(b.dat@phdata)
# sample size
b.dat@gtdata@nids
# number of cases and controls
case.size <- length(which(b.dat@phdata$disease &eq;&eq; 1))
control.size <- length(which(b.dat@phdata$disease &eq;&eq; 0))
case.size 
control.size 
# number of SNPs
snpsb.total <- b.dat@gtdata@nsnps
# GLM test
testb.snp <- scan.glm('disease ~ CRSNP', family &eq; binomial(), data &eq; b.dat)
names(testb.snp)  
alpha <- 5e-8
testb.snp$snpnames[testb.snp$P1df < alpha]
testb.snp$P1df[testb.snp$P1df < alpha]
# Score test
testb.qt <- qtscore(disease, data &eq; b.dat, trait &eq; "binomial")
slotNames(testb.qt)
descriptives.scan(testb.qt)
row.names(results(testb.qt))[results(testb.qt)$P1df < alpha]
results(testb.qt)$P1df[results(testb.qt)$P1df < alpha] 
results(testb.qt)$Pc1df[results(testb.qt)$Pc1df < alpha]

Plink - Part 1 - Data QC

plink --file GWAS
plink --file GWAS --mind 0.10 --recode --out GWAS_clean_mind
plink --file GWAS_clean_mind --maf 0.05 --recode --out MAF_greater_5
plink --file GWAS_clean_mind --exclude MAF_greater_5.map --recode --out MAF_less_5
plink --file MAF_greater_5 --geno 0.05 --recode --out MAF_greater_5_clean
plink --file MAF_less_5 --geno 0.01 --recode --out MAF_less_5_clean
plink --file MAF_greater_5_clean --merge MAF_less_5_clean.ped MAF_less_5_clean.map --recode --out GWAS_MAF_clean
plink --file GWAS_MAF_clean --mind 0.03 --recode --out GWAS_clean2
plink --file GWAS_clean2 --check-sex --out GWAS_sex_checking
#### in R - open R by simply typing R
setwd("to_your_working_directory/")
sexcheck &eq; read.table("GWAS_sex_checking.sexcheck", header&eq;T)
names(sexcheck)
sex_problem &eq; sexcheck[which(sexcheck$STATUS&eq;&eq;"PROBLEM"),]
sex_problem
q()
##################################
plink --file GWAS_clean2 --genome --out duplicates
#### in R
setwd("to_your_working_directory/")
dups &eq; read.table("duplicates.genome", header &eq; T)
problem_pairs &eq; dups[which(dups$PI_HAT > 0.4),]
problem_pairs
problem_pairs &eq; dups[which(dups$PI_HAT > 0.05),]
myvars &eq; c("FID1", "IID1", "FID2", "IID2", "PI_HAT")
problem_pairs[myvars]
q()
######
plink --file GWAS_clean2 --remove IBS_excluded.txt --recode --out GWAS_clean3
plink --file GWAS_clean3 --het
###### in R
Dataset <- read.table("plink.het", header&eq;TRUE, sep&eq;"", na.strings&eq;"NA", dec&eq;".", strip.white&eq;TRUE)
mean(Dataset$F)
sd(Dataset$F)
jpeg("hist.jpeg", height&eq;1000, width&eq;1000)
hist(scale(Dataset$F), xlim&eq;c(-4,4))
dev.off()
q()
######
plink --file GWAS_clean3 --pheno pheno.txt --pheno-name Aff --hardy 
##### in R
hardy &eq; read.table("plink.hwe", header &eq; T)
names(hardy)
hwe_prob &eq; hardy[which(hardy$P < 0.0000009),]
hwe_prob
q()
##########
plink --file GWAS_clean3 --exclude HWE_out.txt --recode --out GWAS_clean4



===Plink - Part 2 - Controlling for Substructure===

plink --file GWAS_clean4 --genome --cluster --mds-plot 10
#### in R
mydata &eq; read.table("mds_components.txt", header&eq;T)
mydata$pch[mydata$Group&eq;&eq;1 ] <-15
mydata$pch[mydata$Group&eq;&eq;2 ] <-16
mydata$pch[mydata$Group&eq;&eq;3 ] <-2
jpeg("mds.jpeg", height&eq;500, width&eq;500)
plot(mydata$C1, mydata$C2 ,pch&eq;mydata$pch)
dev.off()
q()
######
plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --logistic --adjust --out unadj
plink --file GWAS_clean4 --genome --cluster --pca 10 header
plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.eigenvec --covar-name PC1 --logistic --adjust --out PC1
plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.eigenvec --covar-name PC1-PC2 --logistic --adjust --out PC1-PC2
#### in R
broadqq <-function(pvals, title)
{

observed <- sort(pvals) lobs <- -(log10(observed)) expected <- c(1:length(observed)) lexp <- -(log10(expected / (length(expected)+1))) plot(c(0,7), c(0,7), col&eq;"red", lwd&eq;3, type&eq;"l", xlab&eq;"Expected (-logP)", ylab&eq;"Observed (-logP)", xlim&eq;c(0,max(lobs)), ylim&eq;c(0,max(lobs)), las&eq;1, xaxs&eq;"i", yaxs&eq;"i", bty&eq;"l", main &eq; title) points(lexp, lobs, pch&eq;23, cex&eq;.4, bg&eq;"black") }

jpeg("qqplot_compare.jpeg", height&eq;1000, width&eq;500)
par(mfrow&eq;c(2,1))
aff_unadj<-read.table("unadj.assoc.logistic", header&eq;TRUE)
aff_unadj.add.p<-aff_unadj[aff_unadj$TEST&eq;&eq;c("ADD"),]$P
broadqq(aff_unadj.add.p,"Some Trait Unadjusted")
aff_C1C2<-read.table("PC1-PC2.assoc.logistic", header&eq;TRUE)
aff_C1C2.add.p<-aff_C1C2[aff_C1C2$TEST&eq;&eq;c("ADD"),]$P
broadqq(aff_C1C2.add.p, "Some Trait Adjusted for PC1 and PC2")
dev.off()
gws_unadj &eq; aff_unadj[which(aff_unadj$P < 0.0000001),]
gws_unadj
gws_adjusted &eq; aff_C1C2[which(aff_C1C2$P < 0.0000001),]
gws_adjusted



VAT

vtools -h
vtools init VATDemo
vtools import *.vcf.gz --var_info DP filter --geno_info DP_geno --build hg18 -j1
vtools liftover hg19
head phenotypes.csv
vtools phenotype --from_file phenotypes.csv --delimiter ","
vtools show project
vtools show tables
vtools show table variant
vtools show samples
vtools show genotypes
vtools show fields
vtools select variant --count
vtools show genotypes > GenotypeSummary.txt
head GenotypeSummary.txt
vtools output variant "max(DP)" "min(DP)" "avg(DP)" "stdev(DP)" "lower_quartile(DP)" "upper_quartile(DP)" --header
vtools select variant "filter&eq;’PASS’" --count
vtools select variant "filter&eq;’PASS’" -o "max(DP)" "min(DP)" "avg(DP)" "stdev(DP)" "lower_quartile(DP)" "upper_quartile(DP)" --header
vtools update variant --from_stat ’total&eq;#(GT)’ ’num&eq;#(alt)’ ’het&eq;#(het)’ ’hom&eq;#(hom)’ ’other&eq;#(other)’ ’minDP&eq;min(DP_geno)’ ’maxDP&eq;max(DP_geno)’ ’meanDP&eq;avg(DP_geno)’ ’maf&eq;maf()’
vtools show fields
vtools show table variant
vtools update variant --from_stat ’totalGD10&eq;#(GT)’ ’numGD10&eq;#(alt)’ ’hetGD10&eq;#(het)’ ’homGD10&eq;#(hom)’ ’otherGD10&eq;#(other)’ ’mafGD10&eq;maf()’ --genotypes "DP_geno > 10"
vtools show fields
vtools show table variant
vtools output variant chr pos maf mafGD10 --header --limit 20
vtools phenotype --set "RACE&eq;0" --samples "filename like ’YRI%’"
vtools phenotype --set "RACE&eq;1" --samples "filename like ’CEU%’"
vtools show samples --limit 10
vtools update variant --from_stat ’CEU_mafGD10&eq;maf()’ --genotypes ’DP_geno>10’ --samples "RACE&eq;1"
vtools update variant --from_stat ’YRI_mafGD10&eq;maf()’ --genotypes ’DP_geno>10’ --samples "RACE&eq;0"
vtools output variant chr pos mafGD10 CEU_mafGD10 YRI_mafGD10 --header --limit 10
vtools phenotype --from_stat ’CEU_totalGD10&eq;#(GT)’ ’CEU_numGD10&eq;#(alt)’ --genotypes ’DP_geno>10’ --samples "RACE&eq;1"
vtools phenotype --from_stat ’YRI_totalGD10&eq;#(GT)’ ’YRI_numGD10&eq;#(alt)’ --genotypes ’DP_geno>10’ --samples "RACE&eq;0"
vtools phenotype --output sample_nameCEU_totalGD10CEU_numGD10YRI_totalGD10YRI_numGD10 --header
vtools execute ANNOVAR geneanno
vtools output variant chr pos ref alt mut_type --limit 20 --header
vtools_report trans_ratio variant -n num
vtools_report trans_ratio variant -n numGD10
vtools select variant "DP<15" -t to_remove
vtools show tables
vtools remove variants to_remove -v0
vtools show tables
vtools remove genotypes "DP_geno<10" -v0
vtools select variant "mut_type like ’non%’ or mut_type like ’stop%’ or region_type&eq;’splicing’" -t v_funct 
vtools show tables
vtools show samples --limit 5
vtools select variant --samples "RACE&eq;1" -t CEU
mkdir -p ceu
cd ceu
vtools init ceu --parent ../ --variants CEU --samples "RACE&eq;1" --build hg19 vtools show project
vtools select variant "CEU_mafGD10>&eq;0.05" -t common_ceu
vtools select v_funct "CEU_mafGD10<0.01" -t rare_ceu
vtools use refGene
vtools show annotation refGene
vtools associate -h
vtools show tests
vtools show test LinRegBurden
vtools associate common_ceu BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db EA_CV > EA_CV.asso.res
grep -i error *.log
less EA_CV.asso.res
sort -g -k7 EA_CV.asso.res | head
vtools show fields
vtools associate rare_ceu BMI --covariate SEX -m "LinRegBurden --alternative 2" -g refGene.name2 -j1 --to_db EA_RV  > EA_RV.asso.res
grep -i error *.log | tail -10
less EA_RV.asso.res
sort -g -k6 EA_RV.asso.res | head
vtools associate rare_ceu BMI --covariate SEX -m "VariableThresholdsQt --alternative 2 -p 100000 --adaptive 0.0005" -g refGene.name2 -j1 --to_db EA_RV > EA_RV_VT.asso.res
grep -i error *.log | tail -10
less EA_RV_VT.asso.res
sort -g -k6 EA_RV_VT.asso.res | head
vtools select rare_ceu "refGene.name2&eq;’ABCC1’" -o chr pos ref alt CEU_mafGD10 numGD10 mut_type --header
cd ..
vtools select variant --samples "RACE&eq;0" -t YRI
mkdir -p yri
cd yri
vtools init yri --parent ../ --variants YRI --samples "RACE&eq;0" --build hg19 vtools select variant "YRI_mafGD10>&eq;0.05" -t common_yri vtools select v_funct "YRI_mafGD10<0.01" -t rare_yri
vtools use refGene
vtools associate common_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db YA_CV > YA_CV.asso.res
vtools associate rare_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -g refGene.name2 -j1 --to_db YA_RV > YA_RV.asso.res vtools associate rare_yri BMI --covariate SEX -m "VariableThresholdsQt --alternative 2 -p 100000 --adaptive 0.0005" -g refGene.name2 -j1 --to_db YA_RV > YA_RV_VT.asso.res
cd ..
vtools_report meta_analysis ceu/EA_RV_VT.asso.res yri/YA_RV_VT.asso.res --beta 5 --pval 6 --se 7 -n 2 --link 1 > ME\ TA_RV_VT.asso.res
cut -f1,3 META_RV_VT.asso.res | head