Difference between revisions of "RV-TDT"

From Statistical Genetics Courses

Jump to: navigation, search
(Created page with "<pre> ### Variant Annotation vtools init rvtdt vtools import --format vcf data/data.vcf --build hg19 vtools phenotype --from_file data/phen.txt vtools execute ANNOVAR geneanno...")
 
Line 1: Line 1:
<pre>
+
__NOTITLE__
### Variant Annotation
+
 
 +
==RV-TDT==
 +
<pre>### Variant Annotation
 
vtools init rvtdt
 
vtools init rvtdt
 
vtools import --format vcf data/data.vcf --build hg19
 
vtools import --format vcf data/data.vcf --build hg19
Line 8: Line 10:
 
vtools export func_variant --format tped --samples 'phenotype is not null' &gt; vat_raw.tped
 
vtools export func_variant --format tped --samples 'phenotype is not null' &gt; vat_raw.tped
 
# set marker name as chr_pos, needs to avoid duplicate name
 
# set marker name as chr_pos, needs to avoid duplicate name
sort -k4 -n vat_raw.tped | awk 'BEGIN{OFS&#61;"\t";prev&#61;"None";copy&#61;1} {$2&#61;$1"_"$4; $3&#61;0; if($2&#61;&#61;prev) {$2&#61;$2"_"copy; copy&#61;copy+1} else {prev&#61;$2; copy&#61;1}; print $0}' &gt; vat_export.tped
+
sort -k4 -n vat_raw.tped | awk 'BEGIN{OFS="\t";prev="None";copy=1} {$2=$1"_"$4; $3=0; if($2==prev) {$2=$2"_"copy; copy=copy+1} else {prev=$2; copy=1}; print $0}' &gt; vat_export.tped
 
vtools phenotype --out family sample_name pid mid sex phenotype &gt; vat_export.tfam
 
vtools phenotype --out family sample_name pid mid sex phenotype &gt; vat_export.tfam
 
vtools use refGene-hg19_20130904
 
vtools use refGene-hg19_20130904
vtools update func_variant --set 'maf&#61;0.001' # set the maf to be 0.001
+
vtools update func_variant --set 'maf=0.001' # set the maf to be 0.001
 
vtools select func_variant -o chr pos refGene.name2 maf --header &gt; vat_export.anno
 
vtools select func_variant -o chr pos refGene.name2 maf --header &gt; vat_export.anno
  
 
### Phasing Trio
 
### Phasing Trio
 
plink --noweb --tfile vat_export --recode12 --me 1 1 --set-me-missing --out "recode12_noME"
 
plink --noweb --tfile vat_export --recode12 --me 1 1 --set-me-missing --out "recode12_noME"
sort -n -k1 -k6 -k2 recode12_noME.ped | sed 's/ /\t/g' | cut -f1,3,4,5 --complement &gt; linkage.ped cut -f2 recode12_noME.map | awk 'BEGIN{OFS&#61;"\t";} {print "M",$0}' | sed '1i\I\tid\nA\tDisease' &gt; linkage.dat
+
sort -n -k1 -k6 -k2 recode12_noME.ped | sed 's/ /\t/g' | cut -f1,3,4,5 --complement &gt; linkage.ped cut -f2 recode12_noME.map | awk 'BEGIN{OFS="\t";} {print "M",$0}' | sed '1i\I\tid\nA\tDisease' &gt; linkage.dat
 
java -Xmx10000m -jar java/linkage2beagle.jar linkage.dat linkage.ped &gt; pre_beagle.bgl
 
java -Xmx10000m -jar java/linkage2beagle.jar linkage.dat linkage.ped &gt; pre_beagle.bgl
 
python script/pre_phase.py -i pre_beagle.bgl -a pre_beagle_withMissing.bgl
 
python script/pre_phase.py -i pre_beagle.bgl -a pre_beagle_withMissing.bgl
java -Xmx10000m -jar java/beagle.jar missing&#61;0 trios&#61;pre_beagle.bgl out&#61;bgl_phased verbose&#61;false redundant&#61;true
+
java -Xmx10000m -jar java/beagle.jar missing=0 trios=pre_beagle.bgl out=bgl_phased verbose=false redundant=true
 
gunzip bgl_phased.pre_beagle.bgl.phased.gz
 
gunzip bgl_phased.pre_beagle.bgl.phased.gz
  

Revision as of 15:38, 7 June 2018

RV-TDT

### Variant Annotation
vtools init rvtdt
vtools import --format vcf data/data.vcf --build hg19
vtools phenotype --from_file data/phen.txt
vtools execute ANNOVAR geneanno
vtools select variant "variant.region_type like '%splicing%'or variant.mut_type like 'nonsynonymous%' or variant.mut_type like 'frameshift%' or variant.mut_type like 'stop%'" -t func_variant
vtools export func_variant --format tped --samples 'phenotype is not null' > vat_raw.tped
# set marker name as chr_pos, needs to avoid duplicate name
sort -k4 -n vat_raw.tped | awk 'BEGIN{OFS="\t";prev="None";copy=1} {$2=$1"_"$4; $3=0; if($2==prev) {$2=$2"_"copy; copy=copy+1} else {prev=$2; copy=1}; print $0}' > vat_export.tped
vtools phenotype --out family sample_name pid mid sex phenotype > vat_export.tfam
vtools use refGene-hg19_20130904
vtools update func_variant --set 'maf=0.001' # set the maf to be 0.001
vtools select func_variant -o chr pos refGene.name2 maf --header > vat_export.anno

### Phasing Trio
plink --noweb --tfile vat_export --recode12 --me 1 1 --set-me-missing --out "recode12_noME"
sort -n -k1 -k6 -k2 recode12_noME.ped | sed 's/ /\t/g' | cut -f1,3,4,5 --complement > linkage.ped cut -f2 recode12_noME.map | awk 'BEGIN{OFS="\t";} {print "M",$0}' | sed '1i\I\tid\nA\tDisease' > linkage.dat
java -Xmx10000m -jar java/linkage2beagle.jar linkage.dat linkage.ped > pre_beagle.bgl
python script/pre_phase.py -i pre_beagle.bgl -a pre_beagle_withMissing.bgl
java -Xmx10000m -jar java/beagle.jar missing=0 trios=pre_beagle.bgl out=bgl_phased verbose=false redundant=true
gunzip bgl_phased.pre_beagle.bgl.phased.gz

### RV-TDT Analysis
python script/post_phase.py -a vat_export.anno -b bgl_phased.pre_beagle.bgl.phased -o genes/
for g in `ls genes | grep tped | cut -d"." -f1 | head -20`
do
    echo "running rvTDT on gene "${g}
    rvTDT exercise_proj -G ./genes/${g}.tped -P ./data/rvtdt.phen -M ./genes/${g}.map --adapt 500 --alpha 0.00001 --permut 2000 --lower_cutoff 0 --upper_cutoff 100 --minVariants 3 --maxMissRatio 1 done
done