2016-july-berlin-commands

From Statistical Genetics Courses

Revision as of 12:19, 8 July 2016 by Serveradmin (Talk | contribs) (Annotation exercise)

Jump to: navigation, search

Annotation exercise

mkdir APOC3 && cd APOC3
vtools init APOC3
vtools import ../APOC3.vcf --format ../vcf.fmt --build hg19
vtools select variant --count
vtools use refGene
vtools show fields
vtools select variant --output chr pos ref alt refGene.name2
vtools use ../dbNSFP.DB
vtools show fields
vtools select variant 'id="rs76353203" or id="rs140621530" or id="rs147210663" or id="rs121918381" or id="rs121918382"' -t 5var
vtools select 5var --output chr pos ref alt id dbNSFP.GERP_RS dbNSFP.phyloP100way_vertebrate dbNSFP.phastCons100way_vertebrate
vtools select 5var --output chr pos ref alt dbNSFP.FATHMM_pred dbNSFP.FATHMM_score
vtools select 5var --output chr pos ref alt dbNSFP.Polyphen2_HDIV_pred dbNSFP.Polyphen2_HDIV_score dbNSFP.Polyphen2_HVAR_pred dbNSFP.Polyphen2_HVAR_score
vtools select 5var --output chr pos ref alt dbNSFP.LRT_pred dbNSFP.LRT_score
vtools select 5var --output chr pos ref alt dbNSFP.MutationTaster_pred
vtools select 5var --output chr pos ref alt dbNSFP.PROVEAN_pred dbNSFP.PROVEAN_score
vtools select 5var --output chr pos ref alt dbNSFP.SIFT_pred dbNSFP.SIFT_score
vtools select 5var --output chr pos ref alt dbNSFP.CADD_raw dbNSFP.CADD_phred

GATK/IGV exercise

java -jar GenomeAnalysisTK.jar --help
java -Xmx200m -jar GenomeAnalysisTK.jar -R human_g1k_v37.fa -T UnifiedGenotyper -glm BOTH -I child.bam -I father.bam -I mother.bam -L trio.intervals -o ug.vcf less -S ug.vcf
java -Xmx200m -jar GenomeAnalysisTK.jar -T VariantEval -R human_g1k_v37.fa -L trio.intervals -D dbsnp.vcf -eval ug.vcf -o ug.vcf.eval
less -S ug.vcf.eval
java -Xmx200m -jar GenomeAnalysisTK.jar -R human_g1k_v37.fa -T SelectVariants -V ug.vcf -mv -mvq 0 -o ug.dnm.vcf -ped trio.ped
java -Xmx500m -jar GenomeAnalysisTK.jar -R human_g1k_v37.fa -T HaplotypeCaller -I child.bam -I father.bam -I mother.bam -minPruning 4 -L 1:199325670-199325672 -mergeVariantsViaLD -o hc.vcf

PSEQ exercise

Data analysis:

pseq help
pseq help all
pseq myproj new-project --resources hg19
pseq myproj load-vcf --vcf CEU.exon.2010_03.genotypes.hg19.vcf.gz YRI.exon.2010_03.genotypes.hg19.vcf.gz
pseq myproj load-pheno --file phenotype.phe
pseq myproj v-view | head
pseq myproj i-view | head
pseq myproj summary
pseq myproj var-summary
pseq myproj ind-summary
pseq myproj loc-summary
pseq myproj ref-summary
pseq myproj seq-summary
pseq myproj file-summary
pseq myproj meta-summary
pseq myproj v-stats
pseq myproj i-stats | head
pseq myproj tag-file --id 1 --name CEU
pseq myproj tag-file --id 2 --name YRI
pseq myproj var-summary
pseq myproj v-freq | head
pseq myproj v-freq --mask file=CEU | head
pseq myproj v-freq --mask file=YRI | head
pseq myproj v-view --mask any.filter.ex | head
pseq myproj v-view --mask any.filter.ex | wc -l
pseq myproj v-view --mask any.filter | wc -l
pseq myproj var-set --group pass --mask any.filter.ex
pseq myproj var-summary
pseq myproj var-set --group pass_DP15 --mask include="DP>14" var=pass
pseq myproj var-summary
pseq myproj var-set --group pass_DP15_DPgeno10 --mask geno=DP:ge:11 var=pass_DP15
pseq myproj var-summary
pseq myproj var-set --group pass_DP15_DPgeno10_CEU --mask file=CEU var=pass_DP15_DPgeno10
pseq myproj var-summary
pseq myproj var-set --group pass_DP15_DPgeno10_CEU_HWE --mask hwe=5.7e-7:1 var=pass_DP15_DPgeno10_CEU
pseq myproj var-summary
pseq myproj var-set --group pass_DP15_DPgeno10_CEU_HWE_MAFgt05 --mask maf=0.05:0.5 var=pass_DP15_DPgeno10_CEU_HWE
pseq myproj var-summary
pseq myproj var-set --group pass_DP15_DPgeno10_CEU_HWE_MAFlt01 --mask "mac=1 maf=0.01" var=pass_DP15_DPgeno10_CEU_HWE
pseq myproj var-summary
pseq myproj glm --phenotype BMI --covar SEX --mask var=pass_DP15_DPgeno10_CEU_HWE_MAFgt05 > SNV_CEU.result
head SNV_CEU.result
cat SNV_CEU.result | awk '{if(FNR==1) print $0; if(NR>1) print $0 | "sort -k9"}' | grep -v "NA\s\+NA\s\+NA" | head
pseq myproj assoc --tests skat --phenotype BMI --covar SEX --mask var=pass_DP15_DPgeno10_CEU_HWE_MAFlt01 loc.group=refseq > SKAT_CEU.result
pseq myproj assoc --tests skat --phenotype BMI --covar SEX --mask include="DP>14" geno=DP:ge:11 file=CEU hwe=5.7e-7:1 "mac=1 maf=0.01" loc.group=refseq > SKAT_CEU.result
head -20 SKAT_CEU.result
cat SKAT_CEU.result | grep SKAT | grep -v "P=NA" | sort -k6 | head -15

Exercise analyzing YRI samples:

pseq myproj var-set --group pass_DP15_DPgeno10_YRI --mask file=YRI var=pass_DP15_DPgeno10
pseq myproj var-summary
pseq myproj var-set --group pass_DP15_DPgeno10_YRI_HWE --mask hwe=5.7e-7:1 var=pass_DP15_DPgeno10_YRI
pseq myproj var-summary
pseq myproj var-set --group pass_DP15_DPgeno10_YRI_HWE_MAFgt05 --mask maf=0.05:0.5 var=pass_DP15_DPgeno10_YRI_HWE
pseq myproj var-summary
pseq myproj var-set --group pass_DP15_DPgeno10_YRI_HWE_MAFlt01 --mask "mac=1 maf=0.01" var=pass_DP15_DPgeno10_YRI_HWE
pseq myproj var-summary
pseq myproj glm --phenotype BMI --covar SEX --mask var=pass_DP15_DPgeno10_YRI_HWE_MAFgt05 > SNV_YRI.result
head SNV_YRI.result
cat SNV_YRI.result | awk '{if(FNR==1) print $0; if(NR>1) print $0 | "sort -k9"}' | grep -v "NA\s\+NA\s\+NA" | head
pseq myproj assoc --tests skat --phenotype BMI --covar SEX --mask include="DP>14" geno=DP:ge:11 file=YRI hwe=5.7e-7:1 "mac=1 maf=0.01" loc.group=refseq > SKAT_YRI.result
head -20 SKAT_YRI.result
cat SKAT_YRI.result | grep SKAT | grep -v "P=NA" | sort -k6 | head -15

Popgen exercise R programs

popgen_drift.q

#############################################################
### Michael Nothnagel, michael.nothnagel@uni-koeln.de ###
### Simulating genetic drift ###
#############################################################
N.s = c(10, 100, 1000, 10000)
n.gen = 50
n.rep = 100 # === Simulate genetic drift === #
for (n in N.s) {
freqs = matrix(NA, ncol=n.gen+1, nrow=n.rep)
for (i in 1:n.rep) {
alleles = c(rep(0,n/2), rep(1,n/2))
freqs[i,1] = sum(alleles==1) / length(alleles)
for (j in 1:n.gen) {
alleles = sample (alleles, length(alleles), replace=T)
freqs[i,j+1] = sum(alleles==1) / length(alleles)
}
}
assign (paste("freqs.n", n, sep=""), freqs)
} summary(freqs.n10 [,n.gen+1])
summary(freqs.n100 [,n.gen+1])
summary(freqs.n1000 [,n.gen+1])
summary(freqs.n10000[,n.gen+1]) # === Graph allele frequency changes === #
pdf("drift_plot.pdf", paper="special", height=4*2, width=4*2, onefile=F)
split.screen(c(2,2))
screen(1)
plot(x=0, y=0, type="n", xlim=c(0,n.gen), ylim=c(0,1), xlab="Generation", ylab="Allele frequency", main="N=10")
lines (c(0,n.gen), rep(0.5, 2), lty=3, col="#AAAAAA")
freqs = get(paste("freqs.n", 10 , sep=""))
for (i in 1:n.rep) {
lines(0:n.gen,freqs[i,], col="#44AAAA")
}
screen(2)
plot(x=0, y=0, type="n", xlim=c(0,n.gen), ylim=c(0,1), xlab="Generation", ylab="Allele frequency", main="N=100")
lines (c(0,n.gen), rep(0.5, 2), lty=3, col="#AAAAAA")
freqs = get(paste("freqs.n", 100 , sep=""))
for (i in 1:n.rep) {
lines(0:n.gen,freqs[i,], col="#44AAAA")
}
screen(3)
plot(x=0, y=0, type="n", xlim=c(0,n.gen), ylim=c(0,1), xlab="Generation", ylab="Allele frequency", main="N=1000")
lines (c(0,n.gen), rep(0.5, 2), lty=3, col="#AAAAAA")
freqs = get(paste("freqs.n", 1000 , sep=""))
for (i in 1:n.rep) {
lines(0:n.gen,freqs[i,], col="#44AAAA")
}
screen(4)
plot(x=0, y=0, type="n", xlim=c(0,n.gen), ylim=c(0,1), xlab="Generation", ylab="Allele frequency", main="N=10000")
lines (c(0,n.gen), rep(0.5, 2), lty=3, col="#AAAAAA")
freqs = get(paste("freqs.n", 10000, sep=""))
for (i in 1:n.rep) {
lines(0:n.gen,freqs[i,], col="#44AAAA")
}
close.screen(all.screens=T)
dev.off() # === Graph allele frequency after 50 generations === #
pdf("drift_hist.pdf", paper="special", height=4*2, width=4*2, onefile=F)
split.screen(c(2,2))
screen(1)
hist(freqs.n10 [,n.gen+1], main="N=10", xlab="Allele frequency", xlim=c(0,1))
screen(2)
hist(freqs.n100 [,n.gen+1], main="N=100", xlab="Allele frequency", xlim=c(0,1))
screen(3)
hist(freqs.n1000 [,n.gen+1], main="N=1000", xlab="Allele frequency", xlim=c(0,1))
screen(4)
hist(freqs.n10000[,n.gen+1], main="N=10000", xlab="Allele frequency", xlim=c(0,1))
close.screen(all.screens=T)
dev.off()

popgen.selection.q

#############################################################
### Michael Nothnagel, michael.nothnagel@uni-koeln.de ###
### Calculation allele frequency changes due to selection ###
#############################################################
s.s = c(0.001, 0.01, 0.1)
h = 0.5
n.gen = 100 # === Calculate allele frequency changes === #
for (s in s.s) {
freqs = rep(NA, n.gen+1)
af = 0.5
freqs[1] = 1-af
for (j in 1:n.gen) {
omega = 1 - 2*af*(1-af)*h*s - (1-af)*(1-af)*s
f.het = (1-h*s)*2*af*(1-af)/omega
f.hom = af*af/omega
af = f.hom + f.het/2
freqs[j+1] = 1-af
}
assign (paste("freqs.s", s, sep=""), freqs)
} # === Report allele frequencies after 100 generations === #
for (s in s.s) {
cat("s=") ; cat(s) ; cat(": ")
freqs = get(paste("freqs.s", s, sep=""))
cat(freqs[n.gen+1]) ; cat("\n")
} # === Graph allele frequency changes === #
pdf("selection_plot.pdf", paper="special", height=4*2, width=4*2, onefile=F)
plot(x=0, y=0, type="n", xlim=c(0,n.gen), ylim=c(0,1), xlab="Generation", ylab="Allele frequency")
lines (c(0,n.gen), rep(0.5, 2), lty=3, col="#AAAAAA")
for (s in s.s) {
freqs = get(paste("freqs.s", s, sep=""))
lines(0:n.gen,freqs, col="#44AAAA")
}
dev.off()

Regression exercise

In R:

load("dbp.R")
ls()
dbp[1:5,]
#
result.snp12 = glm (affection ~ rs1112, family=binomial("logit"), data=dbp)
print (result.snp12)
print ( class (result.snp12) )
print ( summary(result.snp12) )
#
dev.geno = anova (result.snp12, test="Chi")
lrt.pvalue = pchisq(dev.geno[dim(dev.geno)[1],"Deviance"],
df=2, ncp=0, FALSE)
print ( lrt.pvalue )
#
print ( summary(result.snp12)$coefficients )
snp.beta = summary(result.snp12)$coefficients[2:3,1]
print ( snp.beta )
print ( exp(snp.beta) )
ci = confint (result.snp12)
print (ci)
print ( exp(ci) )
#
snp.data = dbp[,c("affection", "rs1112")]
summary(snp.data)
snp.data[,"rs1112"]
summary(snp.data)
#
result.all = glm (affection ~ rs1112, family=binomial("logit"),
data=snp.data)
dev.all = anova (result.all, test="Chi")
summary(result.all)
print(dev.all)
#
snp.data = dbp[,c("affection", "trait","sex", "age", "rs1112", "rs1117")]
summary(snp.data)
snp.data[,"rs1112"]
snp.data[,"rs1117"] #
result.adj = glm (affection ~ sex + rs1112 , family=binomial("logit"),
data=snp.data)
summary(result.adj)
#
result.adj = glm (affection ~ age + rs1112 , family=binomial("logit"),
data=snp.data)
summary(result.adj)
#
result.adj = glm (affection ~ sex + age + rs1112, family=binomial("logit"),
data=snp.data)
summary(result.adj)
#
result.adj = glm (affection ~ rs1117 + rs1112, family=binomial("logit"),
data=snp.data)
summary(result.adj)
anova (result.adj, test="Chi")
result.adj = glm (affection ~ rs1112 + rs1117, family=binomial("logit"),
data=snp.data)
summary(result.adj)
anova (result.adj, test="Chi")
#
result.adj = lm (trait ~ rs1112, data=snp.data)
summary(result.adj)
result.adj = lm (trait ~ sex + rs1112, data=snp.data)
summary(result.adj)
#
result.inter = glm (affection ~ sex * rs1112, family=binomial("logit"),
data=snp.data)
summary(result.inter)
result.inter = glm (affection ~ age * rs1112, family=binomial("logit"),
data=snp.data)
summary(result.inter)
#
result.inter = glm (affection ~ rs1112 * rs1117, family=binomial("logit"),
data=snp.data)
summary(result.inter)
#
q()

SEQPower exercise

spower -h
spower LOGIT -h
spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --method "CFisher --name CMC" -r 100 -j 4 -l 1 -o exercise
spower show exercise.csv
spower show exercise.csv power*
spower show exercise.loci.csv
spower show exercise.loci.csv maf
spower show tests
spower show test SKAT
spower LOGIT Kryukov2009European1800.sfs --def_rare 0.01 --def_neutral -0.00001 0.00001 --moi A --proportion_detrimental 1 --proportion_protective 0 --OR_rare_detrimental 1.5 --OR_common_detrimental 1 --baseline_effect 0.01 --sample_size 1000 --p1 0.5 --limit 1 --alpha 0.05 --method "KBAC --name K1 --mafupper 0.01 --maflower 0 --alternative 1 --moi additive --permutations 1000 --adaptive 0.1" --replicates 1000 --jobs 4 -o exercise
spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.2 --ORmax_rare_detrimental 3.0 --method CFisher -r 100 -j 4 -l 1 -o exercise
spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.2 --ORmax_rare_detrimental 3.0 --method CFisher -r 100 -j 4 -l 1 -o exercise
spower show exercise.loci.csv effect*
spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.2 --ORmax_rare_detrimental 3.0 --proportion_detrimental 0.8 --method CFisher -r 100 -j 4 -l 1 -o exercise
spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --missing_sites 0.05 --method CFisher -r 100 -j 4 -l 1 -o exercise
spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --missing_sites 0.05 --method CFisher -r 100 -j 4 -l 1 -o exercise
spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --missing_low_maf 0.000125 --method CFisher -r 100 -j 4 -l 1 -o exercise
spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --missing_low_maf 0.000125 --method CFisher -r 100 -j 4 -l 1 -o exercise
spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --method "CFisher --alternative 1 --name CMC" "KBAC --permutations 1000 --alternative 1" "WSSRankTest --alternative 1 --name WSS" "VTtest --alternative 1 --permutations 1000" "SKAT disease" -r 100 -j 4 -l 1 -o exercise

spower LNR Kryukov2009European1800.sfs --sample_size 1000 --meanshift_rare_detrimental 0.2 --method "CollapseQt --name CMC --alternative 2" -r 100 -j 4 -l 1 -o exercise
spower LNR Kryukov2009European1800.sfs --sample_size 1000 --meanshift_rare_detrimental 0.2 --meanshiftmax_rare_detrimental 0.5 --method "CollapseQt --alternative 2" -r 100 -j 4 -l 1 -o exercise
spower ELNR Kryukov2009European1800.sfs --sample_size 1000 --meanshift_rare_detrimental 0.2 --QT_thresholds 0.4 0.6 --method "CollapseQt --alternative 2" -r 100 -j 4 -l 1 -o exercise
spower ELNR Kryukov2009European1800.sfs --sample_size 1000 --p1 0.5 --meanshift_rare_detrimental 0.5 --QT_thresholds 0.4 0.6 --method "CollapseQt --alternative 2" -r 100 -j 4 -l 1 -o exercise
spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --method "GroupWrite ExerciseSimulation" -j 4 -o exercise -v1
spower show exercise.SEQPowerDB LOGIT method power title --condition "where power between 0.25 and 0.95"

for i in 1 1.5 2 2.5 3 3.5 4; do
spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental $i --method "CFisher --name CMC$i" --title FixedOR$i -r 100 -j 4 -l 1 -o exercise2
done

VAT exercise

vtools -h
vtools init VATDemo
vtools import *.vcf.gz --var_info DP filter --geno_info DP_geno --build hg18 -j1
vtools liftover hg19
head phenotypes.csv
vtools phenotype --from_file phenotypes.csv --delimiter ","
vtools show project
vtools show tables
vtools show table variant
vtools show samples
vtools show genotypes
vtools show fields
vtools select variant --count
vtools show genotypes > GenotypeSummary.txt
head GenotypeSummary.txt
vtools output variant "max(DP)" "min(DP)" "avg(DP)" "stdev(DP)" "lower_quartile(DP)" "upper_quartile(DP)" --header
vtools select variant "filter='PASS'" --count
vtools select variant "filter='PASS'" -o "max(DP)" "min(DP)" "avg(DP)" "stdev(DP)" "lower_quartile(DP)" "upper_quartile(DP)" --header
vtools update variant --from_stat 'total=#(GT)' 'num=#(alt)' 'het=#(het)' 'hom=#(hom)' 'other=#(other)' 'minDP=min(DP_geno)' 'maxDP=max(DP_geno)' 'meanDP=avg(DP_geno)' 'maf=maf()'
vtools show fields
vtools show table variant
vtools update variant --from_stat 'totalGD10=#(GT)' 'numGD10=#(alt)' 'hetGD10=#(het)' 'homGD10=#(hom)' 'otherGD10=#(other)' 'mafGD10=maf()' --genotypes "DP_geno > 10"
vtools show fields
vtools show table variant
vtools output variant chr pos maf mafGD10 --header --limit 20
vtools phenotype --set "RACE=0" --samples "filename like 'YRI%'"
vtools phenotype --set "RACE=1" --samples "filename like 'CEU%'"
vtools show samples --limit 10
vtools update variant --from_stat 'CEU_mafGD10=maf()' --genotypes 'DP_geno>10' --samples "RACE=1"
vtools update variant --from_stat 'YRI_mafGD10=maf()' --genotypes 'DP_geno>10' --samples "RACE=0"
vtools output variant chr pos mafGD10 CEU_mafGD10 YRI_mafGD10 --header --limit 10
vtools phenotype --from_stat 'CEU_totalGD10=#(GT)' 'CEU_numGD10=#(alt)' --genotypes 'DP_geno>10' --samples "RACE=1"
vtools phenotype --from_stat 'YRI_totalGD10=#(GT)' 'YRI_numGD10=#(alt)' --genotypes 'DP_geno>10' --samples "RACE=0"
vtools phenotype --output sample_name CEU_totalGD10 CEU_numGD10 YRI_totalGD10 YRI_numGD10 --header
vtools select variant 'maf>=0.01' -t variant_MAFge01 'Variants that have MAF >= 0.01'
vtools show tables
vtools execute KING --var_table variant_MAFge01
vtools_report plot_pheno_fields KING_MDS1 KING_MDS2 RACE --dot KING.mds.race.pdf --discrete_color Dark2
vtools_report plot_pheno_fields KING_MDS1 KING_MDS2 panel --dot KING.mds.panel.pdf --discrete_color Dark2
vtools execute ANNOVAR geneanno
vtools output variant chr pos ref alt mut_type --limit 20 --header
vtools_report trans_ratio variant -n num
vtools_report trans_ratio variant -n numGD10
vtools select variant "DP<15" -t to_remove
vtools show tables
vtools remove variants to_remove -v0
vtools show tables
vtools remove genotypes "DP_geno<10" -v0
vtools select variant "mut_type like 'non%' or mut_type like 'stop%' or region_type='splicing'" -t v_funct
vtools show tables
vtools show samples --limit 5
vtools select variant --samples "RACE=1" -t CEU
mkdir -p ceu
cd ceu
vtools init ceu --parent ../ --variants CEU --samples "RACE=1" --build hg19
vtools show project
vtools select variant "CEU_mafGD10>=0.05" -t common_ceu
vtools select v_funct "CEU_mafGD10<0.01" -t rare_ceu
vtools use refGene
vtools show annotation refGene
vtools associate -h
vtools show tests
vtools show test LinRegBurden
vtools associate common_ceu BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db EA_CV > EA_CV.asso.res
grep -i error *.log
less EA_CV.asso.res
sort -g -k7 EA_CV.asso.res | head
vtools show fields
vtools associate rare_ceu BMI --covariate SEX -m "LinRegBurden --alternative 2" -g refGene.name2 -j1 --to_db EA_RV > EA_RV.asso.res
grep -i error *.log | tail -10
less EA_RV.asso.res
sort -g -k6 EA_RV.asso.res | head
vtools associate rare_ceu BMI --covariate SEX -m "VariableThresholdsQt --alternative 2 -p 100000 --adaptive 0.0005" -g refGene.name2 -j1 --to_db EA_RV > EA_RV_VT.asso.res
grep -i error *.log | tail -10
less EA_RV_VT.asso.res
sort -g -k6 EA_RV_VT.asso.res | head
vtools select rare_ceu "refGene.name2='ABCC1'" -o chr pos ref alt CEU_mafGD10 numGD10 mut_type --header
vtools_report plot_association qq -o QQRV -b --label_top 2 -f 6 < EA_RV.asso.res
vtools_report plot_association manhattan -o MHRV -b --label_top 5 --color Dark2 --chrom_prefix None -f 6 < EA_RV.asso.res
vtools associate rare_ceu BMI --covariate SEX KING_MDS1 KING_MDS2 -m "LinRegBurden --name RVMDS2 --alternative 2" -g refGene.name2 -j1 --to_db EA_RV > EA_RV_MDS2.asso.res
vtools_report plot_association qq -o QQRV_MDS2 -b --label_top 2 -f 6 < EA_RV_MDS2.asso.res
cd ..
vtools select variant --samples "RACE=0" -t YRI
mkdir -p yri; cd yri
vtools init yri --parent ../ --variants YRI --samples "RACE=0" --build hg19
vtools select variant "YRI_mafGD10>=0.05" -t common_yri
vtools select v_funct "YRI_mafGD10<0.01" -t rare_yri
vtools use refGene
vtools associate common_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db YA_CV > YA_CV.asso.res
vtools associate rare_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -g refGene.name2 -j1 --to_db YA_RV > YA_RV.asso.res
vtools associate rare_yri BMI --covariate SEX -m "VariableThresholdsQt --alternative 2 -p 100000 --adaptive 0.0005" -g refGene.name2 -j1 --to_db YA_RV > YA_RV_VT.asso.res
cd ..
vtools_report meta_analysis ceu/EA_RV_VT.asso.res yri/YA_RV_VT.asso.res --beta 5 --pval 6 --se 7 -n 2 --link 1 > META_RV_VT.asso.res
cut -f1,3 META_RV_VT.asso.res | head==VMT==