Changes

AdvGeneMap2018Commands

6,357 bytes added, 19:46, 23 January 2018
/* GxG Interaction */
 
===Functional Annotation===
table_annovar.pl
table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out APOC3_Gene.vcf -remove -nastring . -protocol refGene -operation g -vcfinput
cat APOC3_Gene.vcf.hg19_multianno.txt
table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out APOC3_Gene.vcf -remove -nastring . -protocol refGene,knownGene,ensGene -operation g,g,g -arg '-splicing 12 -exonicsplicing','-splicing 12 -exonicsplicing','-splicing 12 -exonicsplicing' -vcfinput
awk -F'\t' '{print $1,$2,$6,$7,$8,$9,$10}' APOC3_Gene.vcf.hg19_multianno.txt
table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out APOC3_Region.vcf -remove -nastring . -protocol phastConsElements46way -operation r -vcfinput
table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out APOC3_Region.vcf -remove -nastring . -protocol gwasCatalog -operation r -vcfinput
table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out APOC3_Filter.vcf -remove -nastring . -protocol gnomad_genome,gnomad_exome,popfreq_max_20150413,gme,avsnp150,dbnsfp33a,dbscsnv11,cadd13gt20,clinvar_20170905,gwava -operation f,f,f,f,f,f,f,f,f,f -vcfinput
awk -F'\t' '{print $1,$2,$103,$104}' APOC3_Filter.vcf.hg19_multianno.txt
awk -F'\t' '{print $1,$2,$6,$14}' APOC3_Filter.vcf.hg19_multianno.txt
awk -F'\t' '{print $1,$2,$15,$16,$17,$18,$19,$20,$21,$22}' APOC3_Filter.vcf.hg19_multianno.txt
awk -F'\t' '{print $1,$2,$36,$86,$70}' APOC3_Filter.vcf.hg19_multianno.txt
awk -F'\t' '{print $1,$2,$99,$100}' APOC3_Filter.vcf.hg19_multianno.txt
table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out APOC3_ANN.vcf -remove -nastring . -protocol refGene,knownGene,ensGene,wgRna,targetScanS,phastConsElements46way,tfbsConsSites,gwasCatalog,gnomad_genome,gnomad_exome,popfreq_max_20150413,gme,avsnp150,dbnsfp33a,dbscsnv11,cadd13gt20,clinvar_20170905,gwava -operation g,g,g,r,r,r,r,r,f,f,f,f,f,f,f,f,f,f -arg '-splicing 12 -exonicsplicing','-splicing 12 -exonicsplicing','-splicing 12 -exonicsplicing',,,,,,,,,,,,,,, -vcfinput
===GenABEL===
results(testb.qt)$Pc1df[results(testb.qt)$Pc1df < alpha]
 
===GxG Interaction===
 
./plink --noweb --ped simcasecon.ped --map simcasecon.map --assoc
./plink --noweb --ped simcasecon.ped --map simcasecon.map --fast-epistasis
./plink --noweb --ped simcasecon.ped --map simcasecon.map --fast-epistasis --case-only
./plink --noweb --ped simcasecon.ped --map simcasecon.map --epistasis
./plink --noweb --ped simcasecon.ped --map simcasecon.map --recodeA --out recoded
./plink --noweb --ped simcasecon.ped --map simcasecon.map --make-bed --out cassiformat
R
# The following commands are in the R environment
je &lt;-read.table("cassi.out", header=T)
je
library(ORMDR)
recoded&lt;-read.table("recoded.raw", header=T)
head(recoded)
newdata&lt;-recoded[7:106]
ormdrdata&lt;-cbind(newdata,recoded$PHENOTYPE-1)
names(ormdrdata)[101]&lt;-"casestatus"
head(ormdrdata)
mdr1&lt;-mdr.c(ormdrdata, colresp=101, cs=1, combi=1, cv.fold = 10)
mdr1$min.comb
mdr2&lt;-mdr.c(ormdrdata, colresp=101, cs=1, combi=2, cv.fold = 10)
mdr2$min.comb
mdr3&lt;-mdr.c(ormdrdata, colresp=101, cs=1, combi=3, cv.fold = 10)
mdr3$min.comb
mdr1$test.erate
mdr2$test.erate
mdr3$test.erate
mdr1mean&lt;-mean(mdr1$test.erate)
mdr2mean&lt;-mean(mdr2$test.erate)
mdr3mean&lt;-mean(mdr3$test.erate)
mdr1mean
mdr2mean
mdr3mean
mdr2$best.combi
mdr2$min.comb
mdr3$best.combi
mdr3$min.comb
logreg12&lt;-glm(casestatus ~ factor(snp1_2)*factor(snp2_1), family=binomial,
data=ormdrdata)
summary(logreg12)
anova(logreg12)
pchisq(701.68,4,lower.tail=F)
pchisq(703.82,8,lower.tail=F)
logreg345&lt;-glm(casestatus ~ factor(snp3_2)*factor(snp4_2)*factor(snp5_2),
family=binomial, data=ormdrdata)
summary(logreg345)
anova(logreg345)
pchisq(45.6,8,lower.tail=F)
q()
### The following commands are in the linux shell
./BEAM3 beam3data.txt -o beam3results
./BEAM3 beam3data.txt -o beam3results -T 10
===Plink - Part 1 - Data QC===
#### in R - open R by simply typing R
setwd("to_your_working_directory/")
sexcheck = &#61; read.table("GWAS_sex_checking.sexcheck", header=&#61;T)
names(sexcheck)
sex_problem = &#61; sexcheck[which(sexcheck$STATUS==&#61;&#61;"PROBLEM"),]
sex_problem
q()
#### in R
setwd("to_your_working_directory/")
dups = &#61; read.table("duplicates.genome", header = &#61; T) problem_pairs = &#61; dups[which(dups$PI_HAT > 0.4),]
problem_pairs
problem_pairs = &#61; dups[which(dups$PI_HAT > 0.05),] myvars = &#61; c("FID1", "IID1", "FID2", "IID2", "PI_HAT")
problem_pairs[myvars]
q()
plink --file GWAS_clean3 --het
###### in R
Dataset &lt;- read.table("plink.het", header=&#61;TRUE, sep=&#61;"", na.strings=&#61;"NA", dec=&#61;".", strip.white=&#61;TRUE)
mean(Dataset$F)
sd(Dataset$F)
jpeg("hist.jpeg", height=&#61;1000, width=&#61;1000) hist(scale(Dataset$F), xlim=&#61;c(-4,4))
dev.off()
q()
plink --file GWAS_clean3 --pheno pheno.txt --pheno-name Aff --hardy
##### in R
hardy = &#61; read.table("plink.hwe", header = &#61; T)
names(hardy)
hwe_prob = &#61; hardy[which(hardy$P < 0.0000009),]
hwe_prob
q()
===Plink - Part 2 - Controlling for Substructure===
 
plink --file GWAS_clean4 --genome --cluster --mds-plot 10
#### in R
mydata = &#61; read.table("mds_components.txt", header=&#61;T) mydata$pch[mydata$Group==&#61;&#61;1 ] &lt;-15 mydata$pch[mydata$Group==&#61;&#61;2 ] &lt;-16 mydata$pch[mydata$Group==&#61;&#61;3 ] &lt;-2 jpeg("mds.jpeg", height=&#61;500, width=&#61;500) plot(mydata$C1, mydata$C2 ,pch=&#61;mydata$pch)
dev.off()
q()
broadqq &lt;-function(pvals, title)
{
observed &lt;- sort(pvals) lobs &lt;- -(log10(observed)) expected &lt;- c(1:length(observed)) lexp &lt;- -(log10(expected / (length(expected)+1))) plot(c(0,7), c(0,7), col=&#61;"red", lwd=&#61;3, type=&#61;"l", xlab=&#61;"Expected (-logP)", ylab=&#61;"Observed (-logP)", xlim=&#61;c(0,max(lobs)), ylim=&#61;c(0,max(lobs)), las=&#61;1, xaxs=&#61;"i", yaxs=&#61;"i", bty=&#61;"l", main = &#61; title) points(lexp, lobs, pch=&#61;23, cex=&#61;.4, bg=&#61;"black") }  jpeg("qqplot_compare.jpeg", height=&#61;1000, width=&#61;500) par(mfrow=&#61;c(2,1)) aff_unadj&lt;-read.table("unadj.assoc.logistic", header=&#61;TRUE) aff_unadj.add.p&lt;-aff_unadj[aff_unadj$TEST==&#61;&#61;c("ADD"),]$P
broadqq(aff_unadj.add.p,"Some Trait Unadjusted")
aff_C1C2&lt;-read.table("PC1-PC2.assoc.logistic", header=&#61;TRUE) aff_C1C2.add.p&lt;-aff_C1C2[aff_C1C2$TEST==&#61;&#61;c("ADD"),]$P
broadqq(aff_C1C2.add.p, "Some Trait Adjusted for PC1 and PC2")
dev.off()
gws_unadj = &#61; aff_unadj[which(aff_unadj$P < &lt; 0.0000001),]
gws_unadj
gws_adjusted = &#61; aff_C1C2[which(aff_C1C2$P < &lt; 0.0000001),] gws_adjusted    ===RV-TDT=== ### Variant Annotation vtools init rvtdt vtools import --format vcf data/data.vcf --build hg19 vtools phenotype --from_file data/phen.txt vtools execute ANNOVAR geneanno vtools select variant "variant.region_type like '%splicing%'or variant.mut_type like 'nonsynonymous%' or variant.mut_type like 'frameshift%' or variant.mut_type like 'stop%'" -t func_variant vtools export func_variant --format tped --samples 'phenotype is not null' &gt; vat_raw.tped # set marker name as chr_pos, needs to avoid duplicate name sort -k4 -n vat_raw.tped | awk 'BEGIN{OFS&#61;"\t";prev&#61;"None";copy&#61;1} {$2&#61;$1"_"$4; $3&#61;0; if($2&#61;&#61;prev) {$2&#61;$2"_"copy; copy&#61;copy+1} else {prev&#61;$2; copy&#61;1}; print $0}' &gt; vat_export.tped vtools phenotype --out family sample_name pid mid sex phenotype &gt; vat_export.tfam vtools use refGene-hg19_20130904 vtools update func_variant --set 'maf&#61;0.001' # set the maf to be 0.001 vtools select func_variant -o chr pos refGene.name2 maf --header &gt; vat_export.anno ### Phasing Trio plink --noweb --tfile vat_export --recode12 --me 1 1 --set-me-missing --out "recode12_noME" sort -n -k1 -k6 -k2 recode12_noME.ped | sed 's/ /\t/g' | cut -f1,3,4,5 --complement &gt; linkage.ped cut -f2 recode12_noME.map | awk 'BEGIN{OFS&#61;"\t";} {print "M",$0}' | sed '1i\I\tid\nA\tDisease' &gt; linkage.dat java -Xmx10000m -jar java/linkage2beagle.jar linkage.dat linkage.ped &gt; pre_beagle.bgl python script/pre_phase.py -i pre_beagle.bgl -a pre_beagle_withMissing.bgl java -Xmx10000m -jar java/beagle.jar missing&#61;0 trios&#61;pre_beagle.bgl out&#61;bgl_phased verbose&#61;false redundant&#61;true gunzip bgl_phased.pre_beagle.bgl.phased.gz ### RV-TDT Analysis python script/post_phase.py -a vat_export.anno -b bgl_phased.pre_beagle.bgl.phased -o genes/ for g in `ls genes | grep tped | cut -d"." -f1 | head -20` do echo "running rvTDT on gene "${g} rvTDT exercise_proj -G ./genes/${g}.tped -P ./data/rvtdt.phen -M ./genes/${g}.map --adapt 500 --alpha 0.00001 --permut 2000 --lower_cutoff 0 --upper_cutoff 100 --minVariants 3 --maxMissRatio 1 done done   ===Seqspark=== hdfs dfs -put demo.vcf.bz2 hdfs dfs -put demo.tsv seqspark annotation.conf seqspark qc.conf seqspark demo.conf ===VAT=== 
vtools -h
vtools init VATDemo
Bureaucrat, administrator
1,252
edits