Difference between revisions of "RV-TDT Exercise"
From Statistical Genetics Courses
Serveradmin (Talk | contribs) |
Serveradmin (Talk | contribs) |
||
(2 intermediate revisions by the same user not shown) | |||
Line 1: | Line 1: | ||
__NOTITLE__ | __NOTITLE__ | ||
− | ==RV-TDT | + | ==RV-TDT exercise== |
− | vtools init rvtdt | + | |
+ | vtools init rvtdt | ||
+ | vtools import --format vcf data/data.vcf --build hg19 | ||
+ | vtools phenotype --from_file data/phen.txt | ||
+ | # variant selection | ||
+ | vtools execute ANNOVAR geneanno | ||
+ | vtools select variant "variant.region_type like '%splicing%'or variant.mut_type like 'nonsynonymous%' or variant.mut_type like 'frameshift%' or variant.mut_type like 'stop%'" -t func_variant | ||
+ | # tped file | ||
+ | vtools export func_variant --format tped --samples 'phenotype is not null' > vat_raw.tped | ||
+ | sort -k4 -n vat_raw.tped | awk 'BEGIN{OFS="\t";prev="None";copy=1} {$2=$1"_"$4; $3=0; if($2==prev) {$2=$2"_"copy; copy=copy+1} else {prev=$2; copy=1}; print $0}' > vat_export.tped | ||
+ | # tfam file | ||
+ | vtools phenotype --out family sample_name pid mid sex phenotype > vat_export.tfam | ||
+ | # anno file | ||
+ | vtools use refGene-hg19_20130904 | ||
+ | vtools update func_variant --set 'maf=0.001' | ||
+ | vtools select func_variant -o chr pos refGene.name2 maf --header > vat_export.anno | ||
+ | # Mendelian error and recode | ||
+ | plink --noweb --tfile vat_export --me 1 1 --set-me-missing --make-bed --out "noME" | ||
+ | plink --bfile noME --recode12 --out "recode12_noME" | ||
+ | sort -n -k1 -k6 -k2 recode12_noME.ped | sed 's/ /\t/g' | cut -f1,3,4,5 --complement > linkage.ped | ||
+ | cut -f2 recode12_noME.map | awk 'BEGIN{OFS="\t";} {print "M",$0}' | sed '1i\I\tid\nA\tDisease' > linkage.dat | ||
+ | java -Xmx10000m -jar java/linkage2beagle.jar linkage.dat linkage.ped > pre_beagle.bgl | ||
+ | python script/pre_phase.py -i pre_beagle.bgl -a pre_beagle_withMissing.bgl | ||
+ | java -Xmx10000m -jar java/beagle.jar missing=0 trios=pre_beagle.bgl out=bgl_phased verbose=false redundant=true | ||
+ | gunzip bgl_phased.pre_beagle.bgl.phased.gz | ||
+ | python script/post_phase.py -a vat_export.anno -b bgl_phased.pre_beagle.bgl.phased -o genes/ | ||
+ | for g in `ls genes | grep tped | cut -d"." -f1 | head -20` | ||
+ | do | ||
+ | echo "runing rvTDT on gene "${g} | ||
+ | rvTDT exercise_proj -G ./genes/${g}.tped -P ./data/rvtdt.phen -M ./genes/${g}.map --adapt 500 --alpha 0.00001 --permut 2000 --lower_cutoff 0 --upper_cutoff 100 --minVariants 3 --maxMissRatio 1 | ||
+ | done | ||
+ | # Answer | ||
+ | vtools show tables | ||
+ | ls genes/ | grep tped | wc | ||
+ | cat exercise_proj_pval/*.pval | grep -v "^#" | sort -k2 | ||
+ | cat exercise_proj_pval/*.pval | grep -v "^#" | sort -k3 | ||
+ | # clean | ||
+ | rm -r exercise_proj* genes/* bgl* linkage* recode12* pre_beagle* vat_export.* |
Latest revision as of 17:42, 24 January 2019
RV-TDT exercise
vtools init rvtdt vtools import --format vcf data/data.vcf --build hg19 vtools phenotype --from_file data/phen.txt # variant selection vtools execute ANNOVAR geneanno vtools select variant "variant.region_type like '%splicing%'or variant.mut_type like 'nonsynonymous%' or variant.mut_type like 'frameshift%' or variant.mut_type like 'stop%'" -t func_variant # tped file vtools export func_variant --format tped --samples 'phenotype is not null' > vat_raw.tped sort -k4 -n vat_raw.tped | awk 'BEGIN{OFS="\t";prev="None";copy=1} {$2=$1"_"$4; $3=0; if($2==prev) {$2=$2"_"copy; copy=copy+1} else {prev=$2; copy=1}; print $0}' > vat_export.tped # tfam file vtools phenotype --out family sample_name pid mid sex phenotype > vat_export.tfam # anno file vtools use refGene-hg19_20130904 vtools update func_variant --set 'maf=0.001' vtools select func_variant -o chr pos refGene.name2 maf --header > vat_export.anno # Mendelian error and recode plink --noweb --tfile vat_export --me 1 1 --set-me-missing --make-bed --out "noME" plink --bfile noME --recode12 --out "recode12_noME" sort -n -k1 -k6 -k2 recode12_noME.ped | sed 's/ /\t/g' | cut -f1,3,4,5 --complement > linkage.ped cut -f2 recode12_noME.map | awk 'BEGIN{OFS="\t";} {print "M",$0}' | sed '1i\I\tid\nA\tDisease' > linkage.dat java -Xmx10000m -jar java/linkage2beagle.jar linkage.dat linkage.ped > pre_beagle.bgl python script/pre_phase.py -i pre_beagle.bgl -a pre_beagle_withMissing.bgl java -Xmx10000m -jar java/beagle.jar missing=0 trios=pre_beagle.bgl out=bgl_phased verbose=false redundant=true gunzip bgl_phased.pre_beagle.bgl.phased.gz python script/post_phase.py -a vat_export.anno -b bgl_phased.pre_beagle.bgl.phased -o genes/ for g in `ls genes | grep tped | cut -d"." -f1 | head -20` do echo "runing rvTDT on gene "${g} rvTDT exercise_proj -G ./genes/${g}.tped -P ./data/rvtdt.phen -M ./genes/${g}.map --adapt 500 --alpha 0.00001 --permut 2000 --lower_cutoff 0 --upper_cutoff 100 --minVariants 3 --maxMissRatio 1 done # Answer vtools show tables ls genes/ | grep tped | wc cat exercise_proj_pval/*.pval | grep -v "^#" | sort -k2 cat exercise_proj_pval/*.pval | grep -v "^#" | sort -k3 # clean rm -r exercise_proj* genes/* bgl* linkage* recode12* pre_beagle* vat_export.*