Difference between revisions of "RV-TDT Exercise"

From Statistical Genetics Courses

Jump to: navigation, search
(RV-TDT Exercise)
Line 1: Line 1:
 
__NOTITLE__
 
__NOTITLE__
  
==RV-TDT Exercise==
+
==RV-TDT exercise==
vtools init rvtdt<br /> vtools import --format vcf data/data.vcf --build hg19<br /> vtools phenotype --from_file data/phen.txt<br /> # variant selection<br /> vtools execute ANNOVAR geneanno<br /> vtools select variant "variant.region_type like '%splicing%'or variant.mut_type like 'nonsynonymous%' or variant.mut_type like 'frameshift%' or variant.mut_type like 'stop%'" -t func_variant<br /> # tped file<br /> vtools export func_variant --format tped --samples 'phenotype is not null' > vat_raw.tped<br /> sort -k4 -n vat_raw.tped | awk 'BEGIN{OFS="\t";prev="None";copy=1} {$2=$1"_"$4; $3=0; if($2==prev) {$2=$2"_"copy; copy=copy+1} else {prev=$2; copy=1}; print $0}' > vat_export.tped<br /> # tfam file<br /> vtools phenotype --out family sample_name pid mid sex phenotype > vat_export.tfam<br /> # anno file<br /> vtools use refGene-hg19_20130904<br /> vtools update func_variant --set 'maf=0.001' <br /> vtools select func_variant -o chr pos refGene.name2 maf --header > vat_export.anno<br /> # Mendelian error and recode<br /> plink --noweb --tfile vat_export --recode12 --me 1 1 --set-me-missing --out "recode12_noME"<br /> sort -n -k1 -k6 -k2 recode12_noME.ped | sed 's/ /\t/g' | cut -f1,3,4,5 --complement > linkage.ped<br /> cut -f2 recode12_noME.map | awk 'BEGIN{OFS="\t";} {print "M",$0}' | sed '1i\I\tid\nA\tDisease' > linkage.dat<br /> java -Xmx10000m -jar java/linkage2beagle.jar linkage.dat linkage.ped > pre_beagle.bgl<br /> python script/pre_phase.py -i pre_beagle.bgl -a pre_beagle_withMissing.bgl<br /> java -Xmx10000m -jar java/beagle.jar missing=0 trios=pre_beagle.bgl out=bgl_phased verbose=false redundant=true<br /> gunzip bgl_phased.pre_beagle.bgl.phased.gz<br /> python script/post_phase.py -a vat_export.anno -b bgl_phased.pre_beagle.bgl.phased -o genes/<br /> for g in `ls genes | grep tped | cut -d"." -f1 | head -20` <br /> do <br /> echo "runing rvTDT on gene "${g}<br /> rvTDT exercise_proj -G ./genes/${g}.tped -P ./data/rvtdt.phen -M ./genes/${g}.map --adapt 500 --alpha 0.00001 --permut 2000 --lower_cutoff 0 --upper_cutoff 100 -- minVariants 3 --maxMissRatio 1 <br /> done <br /> # Answer<br /> vtools show tables<br /> ls genes/ | grep tped | wc<br /> cat exercise_proj_pval/*.pval | grep -v "^#" | sort -k2<br /> cat exercise_proj_pval/*.pval | grep -v "^#" | sort -k3<br /> # clean<br /> rm -r exercise_proj* genes/* bgl* linkage* recode12* pre_beagle* vat_export.*
+
vtools init rvtdt
 +
vtools import --format vcf data/data.vcf --build hg19
 +
vtools phenotype --from_file data/phen.txt
 +
# variant selection
 +
vtools execute ANNOVAR geneanno
 +
vtools select variant "variant.region_type like '%splicing%'or variant.mut_type like 'nonsynonymous%' or variant.mut_type like 'frameshift%' or variant.mut_type like 'stop%'" -t func_variant
 +
# tped file
 +
vtools export func_variant --format tped --samples 'phenotype is not null' > vat_raw.tped
 +
sort -k4 -n vat_raw.tped | awk 'BEGIN{OFS="\t";prev="None";copy=1} {$2=$1"_"$4; $3=0; if($2==prev) {$2=$2"_"copy; copy=copy+1} else {prev=$2; copy=1}; print $0}' > vat_export.tped
 +
# tfam file
 +
vtools phenotype --out family sample_name pid mid sex phenotype > vat_export.tfam
 +
# anno file
 +
vtools use refGene-hg19_20130904
 +
vtools update func_variant --set 'maf=0.001'  
 +
vtools select func_variant -o chr pos refGene.name2 maf --header > vat_export.anno
 +
# Mendelian error and recode
 +
plink --noweb --tfile vat_export --recode12 --me 1 1 --set-me-missing --out "recode12_noME"
 +
sort -n -k1 -k6 -k2 recode12_noME.ped | sed 's/ /\t/g' | cut -f1,3,4,5 --complement > linkage.ped
 +
cut -f2 recode12_noME.map | awk 'BEGIN{OFS="\t";} {print "M",$0}' | sed '1i\I\tid\nA\tDisease' > linkage.dat
 +
java -Xmx10000m -jar java/linkage2beagle.jar linkage.dat linkage.ped > pre_beagle.bgl
 +
python script/pre_phase.py -i pre_beagle.bgl -a pre_beagle_withMissing.bgl
 +
java -Xmx10000m -jar java/beagle.jar missing=0 trios=pre_beagle.bgl out=bgl_phased verbose=false redundant=true
 +
gunzip bgl_phased.pre_beagle.bgl.phased.gz
 +
python script/post_phase.py -a vat_export.anno -b bgl_phased.pre_beagle.bgl.phased -o genes/
 +
for g in `ls genes | grep tped | cut -d"." -f1 | head -20`  
 +
do  
 +
    echo "runing rvTDT on gene "${g}
 +
    rvTDT exercise_proj -G ./genes/${g}.tped -P ./data/rvtdt.phen -M ./genes/${g}.map --adapt 500 --alpha 0.00001 --permut 2000 --lower_cutoff 0 --upper_cutoff 100 -- minVariants 3 --maxMissRatio 1  
 +
done  
 +
# Answer
 +
vtools show tables
 +
ls genes/ | grep tped | wc
 +
cat exercise_proj_pval/*.pval | grep -v "^#" | sort -k2
 +
cat exercise_proj_pval/*.pval | grep -v "^#" | sort -k3
 +
# clean
 +
rm -r exercise_proj* genes/* bgl* linkage* recode12* pre_beagle* vat_export.*

Revision as of 17:40, 23 February 2017

RV-TDT exercise

vtools init rvtdt
vtools import --format vcf data/data.vcf --build hg19
vtools phenotype --from_file data/phen.txt
# variant selection
vtools execute ANNOVAR  geneanno
vtools select variant "variant.region_type like '%splicing%'or variant.mut_type like 'nonsynonymous%' or variant.mut_type like 'frameshift%' or variant.mut_type like 'stop%'" -t func_variant
# tped file
vtools export func_variant --format tped --samples 'phenotype is not null'  > vat_raw.tped
sort -k4 -n vat_raw.tped | awk 'BEGIN{OFS="\t";prev="None";copy=1} {$2=$1"_"$4; $3=0; if($2==prev) {$2=$2"_"copy; copy=copy+1} else {prev=$2; copy=1}; print $0}' > vat_export.tped
# tfam file
vtools phenotype --out family sample_name pid mid sex phenotype > vat_export.tfam
# anno file
vtools use refGene-hg19_20130904
vtools update func_variant --set 'maf=0.001' 
vtools select func_variant -o chr pos refGene.name2 maf --header > vat_export.anno
# Mendelian error and recode
plink --noweb --tfile vat_export --recode12 --me 1 1 --set-me-missing --out "recode12_noME"
sort -n -k1 -k6 -k2 recode12_noME.ped | sed 's/ /\t/g' | cut -f1,3,4,5 --complement > linkage.ped
cut -f2 recode12_noME.map | awk 'BEGIN{OFS="\t";} {print "M",$0}' | sed '1i\I\tid\nA\tDisease' > linkage.dat
java -Xmx10000m -jar java/linkage2beagle.jar linkage.dat linkage.ped > pre_beagle.bgl
python script/pre_phase.py -i pre_beagle.bgl -a pre_beagle_withMissing.bgl
java -Xmx10000m -jar java/beagle.jar missing=0 trios=pre_beagle.bgl out=bgl_phased verbose=false redundant=true
gunzip bgl_phased.pre_beagle.bgl.phased.gz
python script/post_phase.py -a vat_export.anno -b bgl_phased.pre_beagle.bgl.phased -o genes/
for g in `ls genes | grep tped | cut -d"." -f1 | head -20` 
do 
    echo "runing rvTDT on gene "${g}
    rvTDT exercise_proj -G ./genes/${g}.tped -P ./data/rvtdt.phen -M ./genes/${g}.map --adapt 500 --alpha 0.00001 --permut 2000 --lower_cutoff 0 --upper_cutoff 100 -- minVariants 3 --maxMissRatio 1 
done 
# Answer
vtools show tables
ls genes/ | grep tped | wc
cat exercise_proj_pval/*.pval | grep -v "^#" | sort -k2
cat exercise_proj_pval/*.pval | grep -v "^#" | sort -k3
# clean
rm -r exercise_proj* genes/* bgl* linkage* recode12* pre_beagle* vat_export.*