2016-VAT VMT

From Statistical Genetics Courses

Jump to: navigation, search

VAT

vtools -h
vtools init VATDemo
vtools import *.vcf.gz --var_info DP filter --geno_info DP_geno --build hg18 -j1
vtools liftover hg19
head phenotypes.csv
vtools phenotype --from_file phenotypes.csv --delimiter ","
vtools show project
vtools show tables
vtools show table variant
vtools show samples
vtools show genotypes
vtools show fields
vtools select variant --count
vtools show genotypes > GenotypeSummary.txt
head GenotypeSummary.txt
vtools output variant "max(DP)" "min(DP)" "avg(DP)" "stdev(DP)" "lower_quartile(DP)" "upper_quartile(DP)" --header
vtools select variant "filter='PASS'" --count
vtools select variant "filter='PASS'" -o "max(DP)" "min(DP)" "avg(DP)" "stdev(DP)" "lower_quartile(DP)" "upper_quartile(DP)" --header
vtools update variant --from_stat 'total=#(GT)' 'num=#(alt)' 'het=#(het)' 'hom=#(hom)' 'other=#(other)' 'minDP=min(DP_geno)' 'maxDP=max(DP_geno)' 'meanDP=avg(DP_geno)' 'maf=maf()'
vtools show fields
vtools show table variant
vtools update variant --from_stat 'totalGD10=#(GT)' 'numGD10=#(alt)' 'hetGD10=#(het)' 'homGD10=#(hom)' 'otherGD10=#(other)' 'mafGD10=maf()' --genotypes "DP_geno > 10"
vtools show fields
vtools show table variant
vtools output variant chr pos maf mafGD10 --header --limit 20
vtools phenotype --set "RACE=0" --samples "filename like 'YRI%'"
vtools phenotype --set "RACE=1" --samples "filename like 'CEU%'"
vtools show samples --limit 10
vtools update variant --from_stat 'CEU_mafGD10=maf()' --genotypes 'DP_geno>10' --samples "RACE=1"
vtools update variant --from_stat 'YRI_mafGD10=maf()' --genotypes 'DP_geno>10' --samples "RACE=0"
vtools output variant chr pos mafGD10 CEU_mafGD10 YRI_mafGD10 --header --limit 10
vtools phenotype --from_stat 'CEU_totalGD10=#(GT)' 'CEU_numGD10=#(alt)' --genotypes 'DP_geno>10' --samples "RACE=1"
vtools phenotype --from_stat 'YRI_totalGD10=#(GT)' 'YRI_numGD10=#(alt)' --genotypes 'DP_geno>10' --samples "RACE=0"
vtools phenotype --output sample_name CEU_totalGD10 CEU_numGD10 YRI_totalGD10 YRI_numGD10 --header
vtools select variant 'maf>=0.01' -t variant_MAFge01 'Variants that have MAF >= 0.01'
vtools show tables
vtools execute KING --var_table variant_MAFge01
vtools_report plot_pheno_fields KING_MDS1 KING_MDS2 RACE --dot KING.mds.race.pdf --discrete_color Dark2
vtools_report plot_pheno_fields KING_MDS1 KING_MDS2 panel --dot KING.mds.panel.pdf --discrete_color Dark2
vtools execute ANNOVAR geneanno
vtools output variant chr pos ref alt mut_type --limit 20 --header
vtools_report trans_ratio variant -n num
vtools_report trans_ratio variant -n numGD10
vtools select variant "DP<15" -t to_remove
vtools show tables
vtools remove variants to_remove -v0
vtools show tables
vtools remove genotypes "DP_geno<10" -v0  
vtools select variant "mut_type like 'non%' or mut_type like 'stop%' or region_type='splicing'" -t v_funct
vtools show tables
vtools show samples --limit 5
vtools select variant --samples "RACE=1" -t CEU
mkdir -p ceu
cd ceu
vtools init ceu --parent ../ --variants CEU --samples "RACE=1" --build hg19 vtools show project vtools select variant "CEU_mafGD10>=0.05" -t common_ceu vtools select v_funct "CEU_mafGD10<0.01" -t rare_ceu
vtools use refGene
vtools show annotation refGene
vtools associate -h
vtools show tests
vtools show test LinRegBurden
vtools associate common_ceu BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db EA_CV > EA_CV.asso.res grep -i error *.log less EA_CV.asso.res sort -g -k7 EA_CV.asso.res | head vtools show fields vtools associate rare_ceu BMI --covariate SEX -m "LinRegBurden --alternative 2" -g refGene.name2 -j1 --to_db EA_RV > EA_RV.asso.res grep -i error *.log | tail -22 less EA_RV.asso.res sort -g -k6 EA_RV.asso.res | head vtools associate rare_ceu BMI --covariate SEX -m "VariableThresholdsQt --alternative 2 -p 100000 --adaptive 0.0005" -g refGene.name2 -j1 --to_db EA_RV > EA_RV_VT.asso.res grep -i error *.log | tail -22 less EA_RV_VT.asso.res sort -g -k6 EA_RV_VT.asso.res | head vtools select rare_ceu "refGene.name2='ABCC1'" -o chr pos ref alt CEU_mafGD10 numGD10 mut_type --header vtools_report plot_association qq -o QQRV -b --label_top 2 -f 6 < EA_RV.asso.res vtools_report plot_association manhattan -o MHRV -b --label_top 5 --color Dark2 --chrom_prefix None -f 6 < EA_RV.asso.res
vtools associate rare_ceu BMI --covariate SEX KING_MDS1 KING_MDS2 -m "LinRegBurden --name RVMDS2 --alternative 2" -g refGene.name2 -j1 --to_db EA_RV > EA_RV_MDS2.asso.res vtools_report plot_association qq -o QQRV_MDS2 -b --label_top 2 -f 6 < EA_RV_MDS2.asso.res
cd ..
vtools select variant --samples "RACE=0" -t YRI
mkdir -p yri
cd yri
vtools init yri --parent ../ --variants YRI --samples "RACE=0" --build hg19
vtools select variant "YRI_mafGD10>=0.05" -t common_yri vtools select v_funct "YRI_mafGD10<0.01" -t rare_yri
vtools use refGene
vtools associate common_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db YA_CV > YA_CV.asso.res vtools associate rare_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -g refGene.name2 -j1 --to_db YA_RV > YA_RV.asso.res vtools associate rare_yri BMI --covariate SEX -m "VariableThresholdsQt --alternative 2 -p 100000 --adaptive 0.0005" -g refGene.name2 -j1 --to_db YA_RV > YA_RV_VT.asso.res cd .. vtools_report meta_analysis ceu/EA_RV_VT.asso.res yri/YA_RV_VT.asso.res --beta 5 --pval 6 --se 7 -n 2 --link 1 > META_RV_VT.asso.res cut -f1,3 META_RV_VT.asso.res | head

VMT

##################################
# Example 1: Autosomal recessive #
##################################
vtools init VMT --force
vtools import AR1.vcf.gz AR2.vcf.gz --format NSHI.fmt --build hg19 -j8
vtools show tables
vtools show samples
vtools execute ANNOVAR geneanno
vtools use dbNSFP.DB
vtools use refGene
vtools show fields
vtools select variant "(ExAC_AF is NULL or ExAC_AF<0.0005) AND (ExAC_SAS_AF is NULL or ExAC_SAS_AF<0.0005)"  -t  ExAC0005 vtools select ExAC0005 "CADD_phred>20 or CADD_phred is NULL" -t CADD20
vtools select CADD20 "region_type is 'splicing' OR (mut_type is not NULL AND mut_type is not 'synonymous SNV' AND mut_type is not 'unknown')" -t ANNOVARtype
vtools select ANNOVARtype "dbNSFP.chr is not null" --output chr pos ref alt sift_pred lrt_pred fathmm_pred mutationtaster_pred mutationassessor_pred polyphen2_hdiv_pred polyphen2_hvar_pred provean_pred MetaLR_pred MetaSVM_pred >snv.txt
python choose_damaging_variants.py snv.txt
vtools update ANNOVARtype --format VMT_annotation.fmt --from_file snv.txt.parsed 
vtools select ANNOVARtype "vmt_annotation='damaging_SNV'" -t damaging                                                                                                                      
vtools select damaging "chr=16 AND (pos>=63600000 AND pos<=79700000)" -o chr pos ref alt region_type region_name mut_type function rs_dbSNP141 ExAC_Adj_AF ExAC_SAS_AF CADD_phred FATHMM_pred LRT_pred MetaLR_pred MetaSVM_pred MutationAssessor_pred MutationTaster_pred Polyphen2_HDIV_pred Polyphen2_HVAR_pred PROVEAN_pred SIFT_pred "samples('geno_filter=GT=1')" "samples('geno_filter=GT=2')"
vtools update damaging --from_stat "totX=#(GT)" "homX=#(hom)" --samples "sample_name='L1' OR sample_name='L2'" -j2
vtools select damaging "totX=homX AND totX=2" -t homL1_L2
vtools remove fields totX homX
vtools output homL1_L2 chr pos ref alt region_type region_name mut_type function rs_dbSNP141 ExAC_Adj_AF ExAC_SAS_AF CADD_phred FATHMM_pred LRT_pred MetaLR_pred MetaSVM_pred MutationAssessor_pred MutationTaster_pred Polyphen2_HDIV_pred Polyphen2_HVAR_pred PROVEAN_pred SIFT_pred "samples('geno_filter=GT=1')" "samples('geno_filter=GT=2')"
##################################
# Example 2: Autosomal dominant  #
##################################
vtools import AD.vcf.gz --format NSHI.fmt --build hg19 -j8
vtools show tables
vtools show samples
vtools select variant "(ExAC_AF is NULL or ExAC_AF<0.0005) AND (ExAC_NFE_AF is NULL or ExAC_NFE_AF<0.0005)"  --samples "sample_name='AD1' OR sample_name='AD2'" -t  ExAC0005_AD vtools select ExAC0005_AD "CADD_phred>15 or CADD_phred is NULL" -t CADD15_AD
vtools select CADD15_AD "dbNSFP.chr is not null" --output chr pos ref alt sift_pred lrt_pred fathmm_pred mutationtaster_pred mutationassessor_pred polyphen2_hdiv_pred polyphen2_hvar_pred provean_pred MetaLR_pred MetaSVM_pred >snv.txt
python choose_damaging_variants.py snv.txt 
vtools update variant --format VMT_annotation.fmt --from_file snv.txt.parsed 
vtools select CADD15_AD "vmt_annotation='damaging_SNV'" -t damaging_AD                                                                                                                      
vtools update damaging_AD --from_stat "totX=#(GT)" "hetX=#(het)" --samples "sample_name='AD1' OR sample_name='AD2'" -j2
vtools select damaging_AD "totX=hetX AND totX=2" -t hetAD_2
vtools remove fields totX hetX
vtools output hetAD_2 chr pos ref alt refGene.name2 rs_dbSNP141 ExAC_Adj_AF ExAC_SAS_AF CADD_phred FATHMM_pred LRT_pred MetaLR_pred MetaSVM_pred MutationAssessor_pred MutationTaster_pred Polyphen2_HDIV_pred Polyphen2_HVAR_pred PROVEAN_pred SIFT_pred "samples('geno_filter=GT=1')" "samples('geno_filter=GT=2')"
##################################
#       Example 3: De Novo       #
##################################
vtools import de_novo.vcf.gz --format NSHI.fmt --build hg19 -j8
vtools show tables
vtools show samples
vtools update variant --from_stat "totX=#(GT)" "hetX=#(het)" --samples "sample_name='Son'" -j2
vtools select variant "totX=hetX AND totX=1" -t Son_het
vtools update Son_het --from_stat "totX=#(GT)" "wtX=#(wtGT)" --samples "sample_name='Dad' OR sample_name='Mom'" -j2
vtools select Son_het "(totX=wtX AND totX=2) and ((ExAC_AF is NULL or ExAC_AF<0.0005) AND (ExAC_NFE_AF is NULL or ExAC_NFE_AF<0.0005))" -t deNovo
vtools output deNovo chr pos ref alt refGene.name2 rs_dbSNP141 ExAC_Adj_AF ExAC_NFE_AF CADD_phred FATHMM_pred LRT_pred MetaLR_pred MetaSVM_pred MutationAssessor_pred MutationTaster_pred Polyphen2_HDIV_pred Polyphen2_HVAR_pred PROVEAN_pred SIFT_pred "samples('geno_filter=GT=1')" "samples('geno_filter=GT=2')"