2016-VAT VMT
From Statistical Genetics Courses
Revision as of 13:25, 15 September 2016 by Serveradmin (Talk | contribs)
VAT
vtools -h vtools init VATDemo vtools import *.vcf.gz --var_info DP filter --geno_info DP_geno --build hg18 -j1 vtools liftover hg19 head phenotypes.csv vtools phenotype --from_file phenotypes.csv --delimiter "," vtools show project vtools show tables vtools show table variant vtools show samples vtools show genotypes vtools show fields vtools select variant --count vtools show genotypes > GenotypeSummary.txt head GenotypeSummary.txt vtools output variant "max(DP)" "min(DP)" "avg(DP)" "stdev(DP)" "lower_quartile(DP)" "upper_quartile(DP)" --header vtools select variant "filter='PASS'" --count vtools select variant "filter='PASS'" -o "max(DP)" "min(DP)" "avg(DP)" "stdev(DP)" "lower_quartile(DP)" "upper_quartile(DP)" --header vtools update variant --from_stat 'total=#(GT)' 'num=#(alt)' 'het=#(het)' 'hom=#(hom)' 'other=#(other)' 'minDP=min(DP_geno)' 'maxDP=max(DP_geno)' 'meanDP=avg(DP_geno)' 'maf=maf()' vtools show fields vtools show table variant vtools update variant --from_stat 'totalGD10=#(GT)' 'numGD10=#(alt)' 'hetGD10=#(het)' 'homGD10=#(hom)' 'otherGD10=#(other)' 'mafGD10=maf()' --genotypes "DP_geno > 10" vtools show fields vtools show table variant vtools output variant chr pos maf mafGD10 --header --limit 20 vtools phenotype --set "RACE=0" --samples "filename like 'YRI%'" vtools phenotype --set "RACE=1" --samples "filename like 'CEU%'" vtools show samples --limit 10 vtools update variant --from_stat 'CEU_mafGD10=maf()' --genotypes 'DP_geno>10' --samples "RACE=1" vtools update variant --from_stat 'YRI_mafGD10=maf()' --genotypes 'DP_geno>10' --samples "RACE=0" vtools output variant chr pos mafGD10 CEU_mafGD10 YRI_mafGD10 --header --limit 10 vtools phenotype --from_stat 'CEU_totalGD10=#(GT)' 'CEU_numGD10=#(alt)' --genotypes 'DP_geno>10' --samples "RACE=1" vtools phenotype --from_stat 'YRI_totalGD10=#(GT)' 'YRI_numGD10=#(alt)' --genotypes 'DP_geno>10' --samples "RACE=0" vtools phenotype --output sample_name CEU_totalGD10 CEU_numGD10 YRI_totalGD10 YRI_numGD10 --header vtools select variant 'maf>=0.01' -t variant_MAFge01 'Variants that have MAF >= 0.01' vtools show tables vtools execute KING --var_table variant_MAFge01 vtools_report plot_pheno_fields KING_MDS1 KING_MDS2 RACE --dot KING.mds.race.pdf --discrete_color Dark2 vtools_report plot_pheno_fields KING_MDS1 KING_MDS2 panel --dot KING.mds.panel.pdf --discrete_color Dark2 vtools execute ANNOVAR geneanno vtools output variant chr pos ref alt mut_type --limit 20 --header vtools_report trans_ratio variant -n num vtools_report trans_ratio variant -n numGD10 vtools select variant "DP<15" -t to_remove vtools show tables vtools remove variants to_remove -v0 vtools show tables vtools remove genotypes "DP_geno<10" -v0
vtools select variant "mut_type like 'non%' or mut_type like 'stop%' or region_type='splicing'" -t v_funct
vtools show tables
vtools show samples --limit 5
vtools select variant --samples "RACE=1" -t CEU
mkdir -p ceu
cd ceu
vtools init ceu --parent ../ --variants CEU --samples "RACE=1" --build hg19 vtools show project vtools select variant "CEU_mafGD10>=0.05" -t common_ceu vtools select v_funct "CEU_mafGD10<0.01" -t rare_ceu
vtools use refGene
vtools show annotation refGene
vtools associate -h
vtools show tests
vtools show test LinRegBurden
vtools associate common_ceu BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db EA_CV > EA_CV.asso.res grep -i error *.log less EA_CV.asso.res sort -g -k7 EA_CV.asso.res | head vtools show fields vtools associate rare_ceu BMI --covariate SEX -m "LinRegBurden --alternative 2" -g refGene.name2 -j1 --to_db EA_RV > EA_RV.asso.res grep -i error *.log | tail -22 less EA_RV.asso.res sort -g -k6 EA_RV.asso.res | head vtools associate rare_ceu BMI --covariate SEX -m "VariableThresholdsQt --alternative 2 -p 100000 --adaptive 0.0005" -g refGene.name2 -j1 --to_db EA_RV > EA_RV_VT.asso.res grep -i error *.log | tail -22 less EA_RV_VT.asso.res sort -g -k6 EA_RV_VT.asso.res | head vtools select rare_ceu "refGene.name2='ABCC1'" -o chr pos ref alt CEU_mafGD10 numGD10 mut_type --header vtools_report plot_association qq -o QQRV -b --label_top 2 -f 6 < EA_RV.asso.res vtools_report plot_association manhattan -o MHRV -b --label_top 5 --color Dark2 --chrom_prefix None -f 6 < EA_RV.asso.res
vtools associate rare_ceu BMI --covariate SEX KING_MDS1 KING_MDS2 -m "LinRegBurden --name RVMDS2 --alternative 2" -g refGene.name2 -j1 --to_db EA_RV > EA_RV_MDS2.asso.res vtools_report plot_association qq -o QQRV_MDS2 -b --label_top 2 -f 6 < EA_RV_MDS2.asso.res
cd ..
vtools select variant --samples "RACE=0" -t YRI
mkdir -p yri
cd yri
vtools init yri --parent ../ --variants YRI --samples "RACE=0" --build hg19
vtools select variant "YRI_mafGD10>=0.05" -t common_yri vtools select v_funct "YRI_mafGD10<0.01" -t rare_yri
vtools use refGene
vtools associate common_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db YA_CV > YA_CV.asso.res vtools associate rare_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -g refGene.name2 -j1 --to_db YA_RV > YA_RV.asso.res vtools associate rare_yri BMI --covariate SEX -m "VariableThresholdsQt --alternative 2 -p 100000 --adaptive 0.0005" -g refGene.name2 -j1 --to_db YA_RV > YA_RV_VT.asso.res cd .. vtools_report meta_analysis ceu/EA_RV_VT.asso.res yri/YA_RV_VT.asso.res --beta 5 --pval 6 --se 7 -n 2 --link 1 > META_RV_VT.asso.res cut -f1,3 META_RV_VT.asso.res | head
VMT
################################## # Example 1: Autosomal recessive # ################################## vtools init VMT --force vtools import AR1.vcf.gz AR2.vcf.gz --format NSHI.fmt --build hg19 -j8 vtools show tables vtools show samples vtools execute ANNOVAR geneanno vtools use dbNSFP.DB vtools use refGene vtools show fields vtools select variant "(ExAC_AF is NULL or ExAC_AF<0.0005) AND (ExAC_SAS_AF is NULL or ExAC_SAS_AF<0.0005)" -t ExAC0005 vtools select ExAC0005 "CADD_phred>20 or CADD_phred is NULL" -t CADD20 vtools select CADD20 "region_type is 'splicing' OR (mut_type is not NULL AND mut_type is not 'synonymous SNV' AND mut_type is not 'unknown')" -t ANNOVARtype vtools select ANNOVARtype "dbNSFP.chr is not null" --output chr pos ref alt sift_pred lrt_pred fathmm_pred mutationtaster_pred mutationassessor_pred polyphen2_hdiv_pred polyphen2_hvar_pred provean_pred MetaLR_pred MetaSVM_pred >snv.txt python choose_damaging_variants.py snv.txt vtools update ANNOVARtype --format VMT_annotation.fmt --from_file snv.txt.parsed vtools select ANNOVARtype "vmt_annotation='damaging_SNV'" -t damaging vtools select damaging "chr=16 AND (pos>=63600000 AND pos<=79700000)" -o chr pos ref alt region_type region_name mut_type function rs_dbSNP141 ExAC_Adj_AF ExAC_SAS_AF CADD_phred FATHMM_pred LRT_pred MetaLR_pred MetaSVM_pred MutationAssessor_pred MutationTaster_pred Polyphen2_HDIV_pred Polyphen2_HVAR_pred PROVEAN_pred SIFT_pred "samples('geno_filter=GT=1')" "samples('geno_filter=GT=2')" vtools update damaging --from_stat "totX=#(GT)" "homX=#(hom)" --samples "sample_name='L1' OR sample_name='L2'" -j2 vtools select damaging "totX=homX AND totX=2" -t homL1_L2 vtools remove fields totX homX vtools output homL1_L2 chr pos ref alt region_type region_name mut_type function rs_dbSNP141 ExAC_Adj_AF ExAC_SAS_AF CADD_phred FATHMM_pred LRT_pred MetaLR_pred MetaSVM_pred MutationAssessor_pred MutationTaster_pred Polyphen2_HDIV_pred Polyphen2_HVAR_pred PROVEAN_pred SIFT_pred "samples('geno_filter=GT=1')" "samples('geno_filter=GT=2')" ################################## # Example 2: Autosomal dominant # ################################## vtools import AD.vcf.gz --format NSHI.fmt --build hg19 -j8 vtools show tables vtools show samples vtools select variant "(ExAC_AF is NULL or ExAC_AF<0.0005) AND (ExAC_NFE_AF is NULL or ExAC_NFE_AF<0.0005)" --samples "sample_name='AD1' OR sample_name='AD2'" -t ExAC0005_AD vtools select ExAC0005_AD "CADD_phred>15 or CADD_phred is NULL" -t CADD15_AD vtools select CADD15_AD "dbNSFP.chr is not null" --output chr pos ref alt sift_pred lrt_pred fathmm_pred mutationtaster_pred mutationassessor_pred polyphen2_hdiv_pred polyphen2_hvar_pred provean_pred MetaLR_pred MetaSVM_pred >snv.txt python choose_damaging_variants.py snv.txt vtools update variant --format VMT_annotation.fmt --from_file snv.txt.parsed vtools select CADD15_AD "vmt_annotation='damaging_SNV'" -t damaging_AD vtools update damaging_AD --from_stat "totX=#(GT)" "hetX=#(het)" --samples "sample_name='AD1' OR sample_name='AD2'" -j2 vtools select damaging_AD "totX=hetX AND totX=2" -t hetAD_2 vtools remove fields totX hetX vtools output hetAD_2 chr pos ref alt refGene.name2 rs_dbSNP141 ExAC_Adj_AF ExAC_SAS_AF CADD_phred FATHMM_pred LRT_pred MetaLR_pred MetaSVM_pred MutationAssessor_pred MutationTaster_pred Polyphen2_HDIV_pred Polyphen2_HVAR_pred PROVEAN_pred SIFT_pred "samples('geno_filter=GT=1')" "samples('geno_filter=GT=2')" ################################## # Example 3: De Novo # ################################## vtools import de_novo.vcf.gz --format NSHI.fmt --build hg19 -j8 vtools show tables vtools show samples vtools update variant --from_stat "totX=#(GT)" "hetX=#(het)" --samples "sample_name='Son'" -j2 vtools select variant "totX=hetX AND totX=1" -t Son_het vtools update Son_het --from_stat "totX=#(GT)" "wtX=#(wtGT)" --samples "sample_name='Dad' OR sample_name='Mom'" -j2 vtools select Son_het "(totX=wtX AND totX=2) and ((ExAC_AF is NULL or ExAC_AF<0.0005) AND (ExAC_NFE_AF is NULL or ExAC_NFE_AF<0.0005))" -t deNovo vtools output deNovo chr pos ref alt refGene.name2 rs_dbSNP141 ExAC_Adj_AF ExAC_NFE_AF CADD_phred FATHMM_pred LRT_pred MetaLR_pred MetaSVM_pred MutationAssessor_pred MutationTaster_pred Polyphen2_HDIV_pred Polyphen2_HVAR_pred PROVEAN_pred SIFT_pred "samples('geno_filter=GT=1')" "samples('geno_filter=GT=2')"