Changes

AdvGeneMap2018Commands

5,563 bytes added, 19:46, 23 January 2018
/* GxG Interaction */
===GenABELFunctional Annotation=== table_annovar.pl table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out APOC3_Gene.vcf -remove -nastring . -protocol refGene -operation g -vcfinput cat APOC3_Gene.vcf.hg19_multianno.txt table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out APOC3_Gene.vcf -remove -nastring . -protocol refGene,knownGene,ensGene -operation g,g,g -arg '-splicing 12 -exonicsplicing','-splicing 12 -exonicsplicing','-splicing 12 -exonicsplicing' -vcfinput awk -F'\t' '{print $1,$2,$6,$7,$8,$9,$10}' APOC3_Gene.vcf.hg19_multianno.txt table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out APOC3_Region.vcf -remove -nastring . -protocol phastConsElements46way -operation r -vcfinput table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out APOC3_Region.vcf -remove -nastring . -protocol gwasCatalog -operation r -vcfinput table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out APOC3_Filter.vcf -remove -nastring . -protocol gnomad_genome,gnomad_exome,popfreq_max_20150413,gme,avsnp150,dbnsfp33a,dbscsnv11,cadd13gt20,clinvar_20170905,gwava -operation f,f,f,f,f,f,f,f,f,f -vcfinput awk -F'\t' '{print $1,$2,$103,$104}' APOC3_Filter.vcf.hg19_multianno.txt awk -F'\t' '{print $1,$2,$6,$14}' APOC3_Filter.vcf.hg19_multianno.txt awk -F'\t' '{print $1,$2,$15,$16,$17,$18,$19,$20,$21,$22}' APOC3_Filter.vcf.hg19_multianno.txt awk -F'\t' '{print $1,$2,$36,$86,$70}' APOC3_Filter.vcf.hg19_multianno.txt awk -F'\t' '{print $1,$2,$99,$100}' APOC3_Filter.vcf.hg19_multianno.txt table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out APOC3_ANN.vcf -remove -nastring . -protocol refGene,knownGene,ensGene,wgRna,targetScanS,phastConsElements46way,tfbsConsSites,gwasCatalog,gnomad_genome,gnomad_exome,popfreq_max_20150413,gme,avsnp150,dbnsfp33a,dbscsnv11,cadd13gt20,clinvar_20170905,gwava -operation g,g,g,r,r,r,r,r,f,f,f,f,f,f,f,f,f,f -arg '-splicing 12 -exonicsplicing','-splicing 12 -exonicsplicing','-splicing 12 -exonicsplicing',,,,,,,,,,,,,,, -vcfinput
===GenABEL===
# Load files
library(GenABEL)
convert.snp.tped(tped = = "gwa_gabel_qtl.tped", tfam = = "gwa_gabel_qtl.tfam", out = = "gwa_gabel_qtl.raw", strand = = "u") g.dat <- load.gwaa.data(phen = = "gwa_gabel_qtl.praw", gen = = "gwa_gabel_qtl.raw", force = = T)
slotNames(g.dat)
slotNames(g.dat@gtdata)
# Trait
summary(g.dat@phdata$disease)
hist(g.dat@phdata$disease, main=="Quantitative Phenotype data summary", xlab = = "Systolic pressure measure", freq = = F,breaks==20, col=="gray")
rug(g.dat@phdata$disease)
###
###
# GLM test
test.snp <- scan.glm('disease ~ CRSNP', family = = gaussian(), data = = g.dat)
names(test.snp)
alpha <- 5e-8
test.snp$P1df[test.snp$P1df < alpha]
# Score test
test.qt &lt;- qtscore(disease, data &#61; = g.dat, trait &#61; = "gaussian")
slotNames(test.qt)
names(test.qt@results)
obs &lt;- sort(results(test.qt)$P1df)
ept &lt;- c(1:length(obs)) / (length(obs) + 1)
plot(-log10(ept), -log10(obs), main &#61; = "GWAS QQ plot, qtl", xlab&#61;="Expected -log10(pvalue)", ylab&#61;="Observed -log10(pvalue)") abline(0, 1, col &#61; = "red") abline(h &#61; = 8, lty &#61; = 2)
# Manhattan plot
plot(test.qt, col &#61; = "black")
# Adding confounders
test.qt.sex &lt;- qtscore(disease ~ sex, data &#61; = g.dat, trait &#61; = "gaussian")
rownames(results(test.qt.sex))[results(test.qt)$P1df < alpha]
summary(lm(disease ~ sex, data &#61; = g.dat))
###
# MDS
###
gkin &lt;- ibs(g.dat, weight &#61; = "freq")
gkin[1:10,1:10]
cps.full &lt;- cmdscale(as.dist(.5 - gkin), eig &#61; = T, k &#61; = 10)
names(cps.full)
cps &lt;- cps.full$points
plot(cps[,1], cps[,2], pch &#61; = g.dat@phdata$popn) legend(-0.16, 0.06, c("TSI","MEX", "CEU"), pch &#61; = c(1,2,3))
###
# Corrected test
gpc.dat &lt;- g.dat
gpc.dat@phdata&lt;-cbind(g.dat@phdata, cps)
test.pc.a &lt;- scan.glm('disease ~ CRSNP + C1 + C2 + C3 + C4 + C5', family&#61;=gaussian(), data &#61; = gpc.dat)
test.pc.a$snpnames[test.pc.a$P1df < alpha]
test.pc.a$P1df[test.pc.a$P1df < alpha]
test.pc.b &lt;- qtscore(disease ~ C1 + C2 + C3 + C4 + C5, data &#61; = gpc.dat, trait &#61; = "gaussian")
test.pc.b@lambda
# scree plot
plot(cps.full$eig[1:10]/sum(cps.full$eig), axes &#61; = F, type &#61; = "b", xlab &#61; = "Components", ylim &#61; = c(0,0.05), ylab &#61; = "Proportion of Variations", main &#61; = "MDS analysis scree plot")
axis(1, 1:10)
axis(2)
# cumulative plot
plot(cumsum(cps.full$eig[1:10])/sum(cps.full$eig), axes &#61; = F, type &#61; = "b", ylim &#61; = c(0,0.2), xlab &#61; = "Components", ylab &#61; = "Proportion of Variations", main &#61; = "MDS analysis cumulative plot")
axis(1, 1:10)
axis(2)
# Check for inflation of statistic
obs &lt;- sort(results(test.qt)$chi2.1df)
ept &lt;- sort(qchisq(1:length(obs) / (length(obs) + 1), df &#61; = 1)) plot(ept, obs, main &#61; = "Genomic control (slope is the inflation factor)", xlab&#61;="Expected chisq, 1df", ylab&#61;="Observed chisq, 1df") abline(0, 1, col &#61; = "red") abline(0, test.qt@lambda[1], lty &#61; = 2)
# Definition of GIF
# Conventional definition
obs &lt;- sort(results(test.qt)$Pc1df)
ept &lt;- c(1:length(obs)) / (length(obs) + 1)
plot(-log10(ept), -log10(obs), main &#61; = "GWAS QQ plot adj. via Genomic Control", xlab&#61;="Expected -log10(pvalue)", ylab&#61;="Observed -log10(pvalue)") abline(0, 1, col &#61; = "red") abline(h &#61; = 8, lty &#61; = 2)
# EIGENSTRAT
adj.gkin &#61; = gkin diag(adj.gkin) &#61; = hom(g.dat)$Var # naxes &#61; = 3 is default value test.eg &lt;- egscore(disease, data &#61; = g.dat, kin &#61; = adj.gkin, naxes &#61; = 2)
descriptives.scan(test.eg)
snp.eg &lt;- row.names(results(test.eg))[results(test.eg)$P1df < alpha]
# Change #PCs
for (k in 1:10){
test.tmp &lt;- egscore(disease, data &#61; = g.dat, kin &#61; = adj.gkin, naxes &#61; = k)
print(test.tmp@lambda$estimate)
}
obs &lt;- sort(results(test.eg)$Pc1df)
ept &lt;- c(1:length(obs)) / (length(obs) + 1)
qqplot(-log10(ept), -log10(obs), main &#61; = "GWAS QQ plot adj. w/ EIGENSTRAT", xlab&#61;="Expected -log10(pvalue)", ylab&#61;="Observed -log10(pvalue)") abline(0, 1, col &#61; = "red") abline(h &#61; = 8, lty &#61; = 2)
# Manhattan plot comparison
plot(test.qt, col &#61; = "black") add.plot(test.eg, col &#61; = "gray", pch &#61; = 3) legend("topright", c("Original plot","After correction w/ EIGENSTRAT"), pch &#61; = c(1,3))
###
# Basic test, binary trait
###
# load files to GenABEL
convert.snp.tped(tped &#61; = "gwa_gabel.tped", tfam &#61; = "gwa_gabel.tfam", out &#61; = "gwa_gabel.raw", strand &#61; = "u") b.dat &lt;- load.gwaa.data(phen &#61; = "gwa_gabel.praw", gen &#61; = "gwa_gabel.raw", force &#61; = T)
slotNames(b.dat)
slotNames(b.dat@gtdata)
b.dat@gtdata@nids
# number of cases and controls
case.size &lt;- length(which(b.dat@phdata$disease &#61;&#61; == 1)) control.size &lt;- length(which(b.dat@phdata$disease &#61;&#61; == 0))
case.size
control.size
snpsb.total &lt;- b.dat@gtdata@nsnps
# GLM test
testb.snp &lt;- scan.glm('disease ~ CRSNP', family &#61; = binomial(), data &#61; = b.dat)
names(testb.snp)
alpha &lt;- 5e-8
testb.snp$P1df[testb.snp$P1df < alpha]
# Score test
testb.qt &lt;- qtscore(disease, data &#61; = b.dat, trait &#61; = "binomial")
slotNames(testb.qt)
descriptives.scan(testb.qt)
results(testb.qt)$P1df[results(testb.qt)$P1df < alpha]
results(testb.qt)$Pc1df[results(testb.qt)$Pc1df < alpha]
 
 
===GxG Interaction===
 
./plink --noweb --ped simcasecon.ped --map simcasecon.map --assoc
./plink --noweb --ped simcasecon.ped --map simcasecon.map --fast-epistasis
./plink --noweb --ped simcasecon.ped --map simcasecon.map --fast-epistasis --case-only
./plink --noweb --ped simcasecon.ped --map simcasecon.map --epistasis
./plink --noweb --ped simcasecon.ped --map simcasecon.map --recodeA --out recoded
./plink --noweb --ped simcasecon.ped --map simcasecon.map --make-bed --out cassiformat
R
# The following commands are in the R environment
je &lt;-read.table("cassi.out", header=T)
je
library(ORMDR)
recoded&lt;-read.table("recoded.raw", header=T)
head(recoded)
newdata&lt;-recoded[7:106]
ormdrdata&lt;-cbind(newdata,recoded$PHENOTYPE-1)
names(ormdrdata)[101]&lt;-"casestatus"
head(ormdrdata)
mdr1&lt;-mdr.c(ormdrdata, colresp=101, cs=1, combi=1, cv.fold = 10)
mdr1$min.comb
mdr2&lt;-mdr.c(ormdrdata, colresp=101, cs=1, combi=2, cv.fold = 10)
mdr2$min.comb
mdr3&lt;-mdr.c(ormdrdata, colresp=101, cs=1, combi=3, cv.fold = 10)
mdr3$min.comb
mdr1$test.erate
mdr2$test.erate
mdr3$test.erate
mdr1mean&lt;-mean(mdr1$test.erate)
mdr2mean&lt;-mean(mdr2$test.erate)
mdr3mean&lt;-mean(mdr3$test.erate)
mdr1mean
mdr2mean
mdr3mean
mdr2$best.combi
mdr2$min.comb
mdr3$best.combi
mdr3$min.comb
logreg12&lt;-glm(casestatus ~ factor(snp1_2)*factor(snp2_1), family=binomial,
data=ormdrdata)
summary(logreg12)
anova(logreg12)
pchisq(701.68,4,lower.tail=F)
pchisq(703.82,8,lower.tail=F)
logreg345&lt;-glm(casestatus ~ factor(snp3_2)*factor(snp4_2)*factor(snp5_2),
family=binomial, data=ormdrdata)
summary(logreg345)
anova(logreg345)
pchisq(45.6,8,lower.tail=F)
q()
### The following commands are in the linux shell
./BEAM3 beam3data.txt -o beam3results
./BEAM3 beam3data.txt -o beam3results -T 10
===Plink - Part 1 - Data QC===
 
===RV-TDT===
### Variant Annotation
vtools init rvtdt
vtools import --format vcf data/data.vcf --build hg19
vtools phenotype --from_file data/phen.txt
vtools execute ANNOVAR geneanno
vtools select variant "variant.region_type like '%splicing%'or variant.mut_type like 'nonsynonymous%' or variant.mut_type like 'frameshift%' or variant.mut_type like 'stop%'" -t func_variant
vtools export func_variant --format tped --samples 'phenotype is not null' &gt; vat_raw.tped
# set marker name as chr_pos, needs to avoid duplicate name
sort -k4 -n vat_raw.tped | awk 'BEGIN{OFS&#61;"\t";prev&#61;"None";copy&#61;1} {$2&#61;$1"_"$4; $3&#61;0; if($2&#61;&#61;prev) {$2&#61;$2"_"copy; copy&#61;copy+1} else {prev&#61;$2; copy&#61;1}; print $0}' &gt; vat_export.tped
vtools phenotype --out family sample_name pid mid sex phenotype &gt; vat_export.tfam
vtools use refGene-hg19_20130904
vtools update func_variant --set 'maf&#61;0.001' # set the maf to be 0.001
vtools select func_variant -o chr pos refGene.name2 maf --header &gt; vat_export.anno
### Phasing Trio
plink --noweb --tfile vat_export --recode12 --me 1 1 --set-me-missing --out "recode12_noME"
sort -n -k1 -k6 -k2 recode12_noME.ped | sed 's/ /\t/g' | cut -f1,3,4,5 --complement &gt; linkage.ped cut -f2 recode12_noME.map | awk 'BEGIN{OFS&#61;"\t";} {print "M",$0}' | sed '1i\I\tid\nA\tDisease' &gt; linkage.dat
java -Xmx10000m -jar java/linkage2beagle.jar linkage.dat linkage.ped &gt; pre_beagle.bgl
python script/pre_phase.py -i pre_beagle.bgl -a pre_beagle_withMissing.bgl
java -Xmx10000m -jar java/beagle.jar missing&#61;0 trios&#61;pre_beagle.bgl out&#61;bgl_phased verbose&#61;false redundant&#61;true
gunzip bgl_phased.pre_beagle.bgl.phased.gz
### RV-TDT Analysis
python script/post_phase.py -a vat_export.anno -b bgl_phased.pre_beagle.bgl.phased -o genes/
for g in `ls genes | grep tped | cut -d"." -f1 | head -20`
do
echo "running rvTDT on gene "${g}
rvTDT exercise_proj -G ./genes/${g}.tped -P ./data/rvtdt.phen -M ./genes/${g}.map --adapt 500 --alpha 0.00001 --permut 2000 --lower_cutoff 0 --upper_cutoff 100 --minVariants 3 --maxMissRatio 1 done
done
 
 
 
===Seqspark===
hdfs dfs -put demo.vcf.bz2
hdfs dfs -put demo.tsv
seqspark annotation.conf
seqspark qc.conf
seqspark demo.conf
===VAT===
head GenotypeSummary.txt
vtools output variant "max(DP)" "min(DP)" "avg(DP)" "stdev(DP)" "lower_quartile(DP)" "upper_quartile(DP)" --header
vtools select variant "filter&#61;=’PASS’" --count vtools select variant "filter&#61;=’PASS’" -o "max(DP)" "min(DP)" "avg(DP)" "stdev(DP)" "lower_quartile(DP)" "upper_quartile(DP)" --header vtools update variant --from_stat ’total&#61;=#(GT)’ ’num&#61;=#(alt)’ ’het&#61;=#(het)’ ’hom&#61;=#(hom)’ ’other&#61;=#(other)’ ’minDP&#61;=min(DP_geno)’ ’maxDP&#61;=max(DP_geno)’ ’meanDP&#61;=avg(DP_geno)’ ’maf&#61;=maf()’
vtools show fields
vtools show table variant
vtools update variant --from_stat ’totalGD10&#61;=#(GT)’ ’numGD10&#61;=#(alt)’ ’hetGD10&#61;=#(het)’ ’homGD10&#61;=#(hom)’ ’otherGD10&#61;=#(other)’ ’mafGD10&#61;=maf()’ --genotypes "DP_geno &gt; 10"
vtools show fields
vtools show table variant
vtools output variant chr pos maf mafGD10 --header --limit 20
vtools phenotype --set "RACE&#61;=0" --samples "filename like ’YRI%’" vtools phenotype --set "RACE&#61;=1" --samples "filename like ’CEU%’"
vtools show samples --limit 10
vtools update variant --from_stat ’CEU_mafGD10&#61;=maf()’ --genotypes ’DP_geno&gt;10’ --samples "RACE&#61;=1" vtools update variant --from_stat ’YRI_mafGD10&#61;=maf()’ --genotypes ’DP_geno&gt;10’ --samples "RACE&#61;=0"
vtools output variant chr pos mafGD10 CEU_mafGD10 YRI_mafGD10 --header --limit 10
vtools phenotype --from_stat ’CEU_totalGD10&#61;=#(GT)’ ’CEU_numGD10&#61;=#(alt)’ --genotypes ’DP_geno&gt;10’ --samples "RACE&#61;=1" vtools phenotype --from_stat ’YRI_totalGD10&#61;=#(GT)’ ’YRI_numGD10&#61;=#(alt)’ --genotypes ’DP_geno&gt;10’ --samples "RACE&#61;=0"
vtools phenotype --output sample_nameCEU_totalGD10CEU_numGD10YRI_totalGD10YRI_numGD10 --header
vtools execute ANNOVAR geneanno
vtools show tables
vtools remove genotypes "DP_geno&lt;10" -v0
vtools select variant "mut_type like ’non%’ or mut_type like ’stop%’ or region_type&#61;=’splicing’" -t v_funct
vtools show tables
vtools show samples --limit 5
vtools select variant --samples "RACE&#61;=1" -t CEU
mkdir -p ceu
cd ceu
vtools init ceu --parent ../ --variants CEU --samples "RACE&#61;=1" --build hg19 vtools show project vtools select variant "CEU_mafGD10&gt;&#61;=0.05" -t common_ceu
vtools select v_funct "CEU_mafGD10&lt;0.01" -t rare_ceu
vtools use refGene
less EA_RV_VT.asso.res
sort -g -k6 EA_RV_VT.asso.res | head
vtools select rare_ceu "refGene.name2&#61;=’ABCC1’" -o chr pos ref alt CEU_mafGD10 numGD10 mut_type --header
cd ..
vtools select variant --samples "RACE&#61;=0" -t YRI
mkdir -p yri
cd yri
vtools init yri --parent ../ --variants YRI --samples "RACE&#61;=0" --build hg19 vtools select variant "YRI_mafGD10&gt;&#61;=0.05" -t common_yri vtools select v_funct "YRI_mafGD10&lt;0.01" -t rare_yri
vtools use refGene
vtools associate common_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db YA_CV &gt; YA_CV.asso.res
Bureaucrat, administrator
1,252
edits