Changes

AdvGeneMap2018Commands

6,184 bytes added, 19:46, 23 January 2018
/* GxG Interaction */
__NOTITLE__
__FORCETOC__
==GeneABEL==
 ===Functional Annotation=== plink table_annovar.pl table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -file GWAS_clean4 out APOC3_Gene.vcf -remove -pheno phenonastring .phen -protocol refGene -phenooperation g -name Aff vcfinput cat APOC3_Gene.vcf.hg19_multianno.txt table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -transpose out APOC3_Gene.vcf -remove -recode nastring . -protocol refGene,knownGene,ensGene -operation g,g,g -arg '-splicing 12 -exonicsplicing','-splicing 12 -exonicsplicing','-splicing 12 -exonicsplicing' -vcfinput awk -F'\t' '{print $1,$2,$6,$7,$8,$9,$10}' APOC3_Gene.vcf.hg19_multianno.txt table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out gwa_gabel APOC3_Region.vcf -remove -nastring . -protocol phastConsElements46way -operation r -nowebvcfinput plink table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -file GWAS_clean4 out APOC3_Region.vcf -remove -pheno phenonastring .phen -protocol gwasCatalog -phenooperation r -name systolic vcfinput table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -transpose out APOC3_Filter.vcf -remove -recode nastring . -protocol gnomad_genome,gnomad_exome,popfreq_max_20150413,gme,avsnp150,dbnsfp33a,dbscsnv11,cadd13gt20,clinvar_20170905,gwava -operation f,f,f,f,f,f,f,f,f,f -vcfinput awk -F'\t' '{print $1,$2,$103,$104}' APOC3_Filter.vcf.hg19_multianno.txt awk -F'\t' '{print $1,$2,$6,$14}' APOC3_Filter.vcf.hg19_multianno.txt awk -F'\t' '{print $1,$2,$15,$16,$17,$18,$19,$20,$21,$22}' APOC3_Filter.vcf.hg19_multianno.txt awk -F'\t' '{print $1,$2,$36,$86,$70}' APOC3_Filter.vcf.hg19_multianno.txt awk -F'\t' '{print $1,$2,$99,$100}' APOC3_Filter.vcf.hg19_multianno.txt table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out gwa_gabel_qtl APOC3_ANN.vcf -remove -nowebnastring . -protocol refGene,knownGene,ensGene,wgRna,targetScanS,phastConsElements46way,tfbsConsSites,gwasCatalog,gnomad_genome,gnomad_exome,popfreq_max_20150413,gme,avsnp150,dbnsfp33a,dbscsnv11,cadd13gt20,clinvar_20170905,gwava -operation g,g,g,r,r,r,r,r,f,f,f,f,f,f,f,f,f,f -arg '-splicing 12 -exonicsplicing','-splicing 12 -exonicsplicing','-splicing 12 -exonicsplicing',,,,,,,,,,,,,,, -vcfinput ===GenABEL=== R# Load files
library(GenABEL)
convert.snp.tped(tped = "gwa_gabel_qtl.tped", tfam = "gwa_gabel_qtl.tfam", out = "gwa_gabel_qtl.raw", strand = "u")
slotNames(g.dat@gtdata)
colnames(g.dat@phdata)
# sample size
sample.size <- g.dat@gtdata@nids
# number of SNPs
snps.total <- g.dat@gtdata@nsnps
print(c(sample.size, snps.total)) # Trait
summary(g.dat@phdata$disease)
hist(g.dat@phdata$disease, main="Quantitative Phenotype data summary", xlab = "Systolic pressuremeasure", freq = F,breaks=20, col="gray") rug(g.dat@phdata$disease) ### # tests for association ### # GLM test
test.snp <- scan.glm('disease ~ CRSNP', family = gaussian(), data = g.dat)
names(test.snp) alpha <- 5e-8
test.snp$snpnames[test.snp$P1df < alpha]
test.snp$P1df[test.snp$P1df < alpha]
# Score test
test.qt &lt;- qtscore(disease, data = g.dat, trait = "gaussian")
slotNames(test.qt)
names(test.qt@results)
head(results(test.qt))
test.qt@lambda
descriptives.scan(test.qt)
row.namesrownames(results(test.qt))[results(test.qt)$P1df < alpha] results(test.qt)$P1df[results(test.qt)$P1df < alpha] results(test.qt)$Pc1df[results(test.qt)$Pc1df < alpha] # QQ plot
obs &lt;- sort(results(test.qt)$P1df)
ept &lt;- ppointsc(1:length(obs)) / (length(obs) + 1)
plot(-log10(ept), -log10(obs), main = "GWAS QQ plot, qtl", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")
abline(0, 1, col = "red")
abline(h = 8, lty = 2)
# Manhattan plot
plot(test.qt, col = "black")
# Adding confounders
test.qt.sex &lt;- qtscore(disease ~ sex, data = g.dat, trait = "gaussian")
row.namesrownames(results(test.qt.sex))[results(test.qt)$P1df < alpha]
summary(lm(disease ~ sex, data = g.dat))
convert.snp.tped(tped = "gwa_gabel.tped", tfam = "gwa_gabel.tfam", out = "gwa_gabel.raw", strand = "u")### b.dat &lt;- load.gwaa.data(phen = "gwa_gabel.praw", gen = "gwa_gabel.raw", force = T)# MDS slotNames(b.dat) slotNames(b.dat@gtdata) colnames(b.dat@phdata) b.dat@gtdata@nids case.size &lt;- length(which(b.dat@phdata$disease == 1)) control.size &lt;- length(which(b.dat@phdata$disease == 0)) case.size control.size snpsb.total &lt;- b.dat@gtdata@nsnps testb.snp &lt;- scan.glm('disease ~ CRSNP', family = binomial(), data = b.dat) names(testb.snp) alpha &lt;- 5e-8 testb.snp$snpnames[testb.snp$P1df < alpha] testb.snp$P1df[testb.snp$P1df < alpha] testb.qt &lt;- qtscore(disease, data = b.dat, trait = "binomial") slotNames(testb.qt) descriptives.scan(testb.qt) row.names(results(testb.qt))[results(testb.qt)$P1df < alpha] results(testb.qt)$P1df[results(testb.qt)$P1df < alpha] results(testb.qt)$Pc1df[results(testb.qt)$Pc1df < alpha] ###
gkin &lt;- ibs(g.dat, weight = "freq")
gkin[1:10,1:10]
cps.full &lt;- cmdscale(as.dist(.5 - gkin), eig = T, k = 10)
names(cps.full) cps &lt;- cps.full$points
plot(cps[,1], cps[,2], pch = g.dat@phdata$popn)
legend("topright"-0.16, 0.06, c("TSI","MEX", "CEU"), pch = c(1,2,3)) ### # Corrected test ### # Incorporating PCs as predictors
colnames(cps)&lt;-c('C1','C2','C3','C4','C5','C6','C7','C8','C9','C10')
gpc.dat &lt;- g.dat
gpc.dat@phdata&lt;-cbind(g.dat@phdata, cps)
test.pc.a &lt;- scan.glm('disease ~ CRSNP + C1 + C2 + C3 + C4 + C5', family=gaussian(), data = gpc.dat)
test.pc.a$snpnames[test.pc.a$P1df < alpha]
test.pc.a$P1df[test.pc.a$P1df < alpha]
test.pc.b &lt;- qtscore(disease ~ C1 + C2 + C3 + C4 + C5, data = gpc.dat, trait = "gaussian")
test.pc.b@lambda
# scree plot
plot(cps.full$eig[1:10]/sum(cps.full$eig), axes = F, type = "b", xlab = "Components", ylim = c(0,0.05), ylab = "Proportion of Variations", main = "MDS analysis scree plot")
axis(1, 1:10)
axis(2)
# cumulative plot
plot(cumsum(cps.full$eig[1:10])/sum(cps.full$eig), axes = F, type = "b", ylim = c(0,0.2), xlab = "Components", ylab = "Proportion of Variations", main = "MDS analysis cumulative plot")
axis(1, 1:10)
axis(2)
# Genomic control
# Uncorrected GIF
test.qt@lambda
# Corrected p-value
row.names(results(test.qt))[results(test.qt)$Pc1df < alpha]
results(test.qt)$Pc1df[results(test.qt)$Pc1df < alpha]
test.qt@lambda# Check for inflation of statistic
obs &lt;- sort(results(test.qt)$chi2.1df)
ept &lt;- sort(qchisq(ppoints1:length(obs) / (length(obs) + 1), df = 1)) plot(ept, obs, main = "Genomic control (lambda = slope of is the dashed lineinflation factor)", xlab="Expected chisq, 1df", ylab="Observed chisq, 1df")
abline(0, 1, col = "red")
abline(0, test.qt@lambda[1], lty = 2)
# Definition of GIF
# Conventional definition
median(results(test.qt)$chi2.1df)/0.456
# GenABEL definition
lm(obs~ept)$coef[2]
# QQ plot
obs &lt;- sort(results(test.qt)$Pc1df)
ept &lt;- ppointsc(1:length(obs)) / (length(obs) + 1)
plot(-log10(ept), -log10(obs), main = "GWAS QQ plot adj. via Genomic Control", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")
abline(0, 1, col = "red")
abline(h = 8, lty = 2) # EIGENSTRAT
adj.gkin = gkin
diag(adj.gkin) = hom(g.dat)$Var
# naxes = 3 is default value
test.eg &lt;- egscore(disease, data = g.dat, kin = adj.gkin, naxes = 2)
descriptives.scan(test.eg)
snp.eg &lt;- row.names(results(test.eg))[results(test.eg)$P1df < alpha]
pvalue.eg &lt;- results(test.eg)$P1df[results(test.eg)$P1df < alpha] lambda.eg &lt;- test.eg@lambda snp.eg
pvalue.eg
lambda.eg
# Change #PCs for (k in 1:10){ test.tmp &lt;- egscore(disease, data = g.dat, kin = adj.gkin, naxes = k)
print(test.tmp@lambda$estimate)
}
# QQ plot
obs &lt;- sort(results(test.eg)$Pc1df)
ept &lt;- ppointsc(1:length(obs)) / (length(obs) + 1) plotqqplot(-log10(ept), -log10(obs), main = "GWAS QQ plot adj. w/ EIGENSTRAT", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")
abline(0, 1, col = "red")
abline(h = 8, lty = 2)
# Manhattan plot comparison
plot(test.qt, col = "black")
add.plot(test.eg, col = "gray", pch = 3)
legend("topright", c("Original plot","After correction w/ EIGENSTRAT"), pch = c(1,3)) ### # Basic test, binary trait ### # load files to GenABEL convert.snp.tped(tped ="gwa_gabel.tped", tfam = "gwa_gabel.tfam", out = "gwa_gabel.raw", strand = "u") b.dat &lt;- load.gwaa.data(phen = "gwa_gabel.praw", gen = "gwa_gabel.raw", force = T) slotNames(b.dat) slotNames(b.dat@gtdata) colnames(b.dat@phdata) # sample size b.dat@gtdata@nids # number of cases and controls case.size &lt;- length(which(b.dat@phdata$disease == 1)) control.size &lt;- length(which(b.dat@phdata$disease == 0)) case.size control.size # number of SNPs snpsb.total &lt;- b.dat@gtdata@nsnps # GLM test testb.snp &lt;- scan.glm('disease ~ CRSNP', family = binomial(), data = b.dat) names(testb.snp) alpha &lt;- 5e-8 testb.snp$snpnames[testb.snp$P1df < alpha] testb.snp$P1df[testb.snp$P1df < alpha] # Score test testb.qt &lt;- qtscore(disease, data = b.dat, trait = "binomial") slotNames(testb.qt) descriptives.scan(testb.qt) row.names(results(testb.qt))[results(testb.qt)$P1df < alpha] results(testb.qt)$P1df[results(testb.qt)$P1df < alpha] results(testb.qt)$Pc1df[results(testb.qt)$Pc1df < alpha]  ===GxG Interaction=GWAS Data QC==
./plink --file GWAS --noweb plink --file GWAS ped simcasecon.ped --mind 0map simcasecon.10 map --recode --out GWAS_clean_mind --nowebassoc ./plink --file GWAS_clean_mind noweb --maf 0ped simcasecon.05 ped --recode map simcasecon.map --out MAF_greater_5 fast--nowebepistasis ./plink --file GWAS_clean_mind noweb --exclude MAF_greater_5ped simcasecon.map ped --recode --out MAF_less_5 --noweb plink --file MAF_greater_5 --geno 0map simcasecon.05 map --recode fast-epistasis -out MAF_greater_5_clean -case-nowebonly ./plink --file MAF_less_5 noweb --geno 0ped simcasecon.01 ped --recode map simcasecon.map --out MAF_less_5_clean --nowebepistasis ./plink --file MAF_greater_5_clean noweb --merge MAF_less_5_cleanped simcasecon.ped MAF_less_5_clean--map simcasecon.map --recode recodeA --out GWAS_MAF_clean --nowebrecoded ./plink --file GWAS_MAF_clean noweb --mind 0ped simcasecon.03 ped --recode map simcasecon.map --out GWAS_clean2 make--noweb plink --file GWAS_clean2 --check-sex bed --out GWAS_sex_checking --nowebcassiformat
R
sexcheck # The following commands are in the R environment je &lt;-read.table("cassi.out", header= T) je library(ORMDR) recoded&lt;-read.table("recoded.raw", header=T) head(recoded) newdata&lt;-recoded[7:106] ormdrdata&lt;-cbind(newdata,recoded$PHENOTYPE-1) names(ormdrdata)[101]&lt;-"casestatus" head(ormdrdata) mdr1&lt;-mdr.c(ormdrdata, colresp=101, cs=1, combi=1, cv.fold = 10) mdr1$min.comb mdr2&lt;-mdr.c(ormdrdata, colresp=101, cs=1, combi=2, cv.fold = 10) mdr2$min.comb mdr3&lt;-mdr.c(ormdrdata, colresp=101, cs=1, combi=3, cv.fold = 10) mdr3$min.comb mdr1$test.erate mdr2$test.erate mdr3$test.erate mdr1mean&lt;-mean(mdr1$test.erate) mdr2mean&lt;-mean(mdr2$test.erate) mdr3mean&lt;-mean(mdr3$test.erate) mdr1mean mdr2mean mdr3mean mdr2$best.combi mdr2$min.comb mdr3$best.combi mdr3$min.comb logreg12&lt;-glm(casestatus ~ factor(snp1_2)*factor(snp2_1), family=binomial, data=ormdrdata) summary(logreg12) anova(logreg12) pchisq(701.68,4,lower.tail=F) pchisq(703.82,8,lower.tail=F) logreg345&lt;-glm(casestatus ~ factor(snp3_2)*factor(snp4_2)*factor(snp5_2), family=binomial, data=ormdrdata) summary(logreg345) anova(logreg345) pchisq(45.6,8,lower.tail=F) q() ### The following commands are in the linux shell ./BEAM3 beam3data.txt -o beam3results ./BEAM3 beam3data.txt -o beam3results -T 10 ===Plink - Part 1 - Data QC===  plink --file GWAS plink --file GWAS --mind 0.10 --recode --out GWAS_clean_mind plink --file GWAS_clean_mind --maf 0.05 --recode --out MAF_greater_5 plink --file GWAS_clean_mind --exclude MAF_greater_5.map --recode --out MAF_less_5 plink --file MAF_greater_5 --geno 0.05 --recode --out MAF_greater_5_clean plink --file MAF_less_5 --geno 0.01 --recode --out MAF_less_5_clean plink --file MAF_greater_5_clean --merge MAF_less_5_clean.ped MAF_less_5_clean.map --recode --out GWAS_MAF_clean plink --file GWAS_MAF_clean --mind 0.03 --recode --out GWAS_clean2 plink --file GWAS_clean2 --check-sex --out GWAS_sex_checking #### in R - open R by simply typing R setwd("to_your_working_directory/") sexcheck &#61; read.table("GWAS_sex_checking.sexcheck", header=&#61;T)
names(sexcheck)
sex_problem = &#61; sexcheck[which(sexcheck$STATUS==&#61;&#61;"PROBLEM"),]
sex_problem
q()
################################## plink --file GWAS_clean2 --genome --out duplicates --noweb #### in R setwd("to_your_working_directory/") dups = &#61; read.table("duplicates.genome", header = &#61; T) problem_pairs = &#61; dups[which(dups$PI_HAT > 0.4),]
problem_pairs
problem_pairs = &#61; dups[which(dups$PI_HAT > 0.05),] myvars = &#61; c("FID1", "IID1", "FID2", "IID2", "PI_HAT")
problem_pairs[myvars]
q()
###### plink --file GWAS_clean2 --remove IBS_excluded.txt --recode --out GWAS_clean3 --noweb plink --file GWAS_clean3 --het --noweb ###### in R Dataset &lt;- read.table("plink.het", header=&#61;TRUE, sep=&#61;"", na.strings=&#61;"NA", dec=&#61;".", strip.white=&#61;TRUE)
mean(Dataset$F)
sd(Dataset$F)
jpeg("hist.jpeg", height=&#61;1000, width=&#61;1000) hist(scale(Dataset$F), xlim=&#61;c(-4,4))
dev.off()
q()
###### plink --file GWAS_clean3 --pheno pheno.txt --pheno-name Aff --hardy --noweb ##### in R hardy = &#61; read.table("plink.hwe", header = &#61; T)
names(hardy)
hwe_prob = &#61; hardy[which(hardy$P < 0.0000009),]
hwe_prob
q()
########## plink --file GWAS_clean3 --exclude HWE_out.txt --recode --out GWAS_clean4 --noweb  ==GWAS Control =Plink - Part 2 - Controlling for Substructure===  plink --file GWAS_clean4 --genome --cluster --mds-plot 10 --noweb #### in R mydata = &#61; read.table("mds_components.txt", header=&#61;T) mydata$pch[mydata$Group==&#61;&#61;1 ] &lt;-15 mydata$pch[mydata$Group==&#61;&#61;2 ] &lt;-16 mydata$pch[mydata$Group==&#61;&#61;3 ] &lt;-2 jpeg("mds.jpeg", height=1000&#61;500, width=1000&#61;500) plot(mydata$C1, mydata$C2 ,pch=&#61;mydata$pch)
dev.off()
q()
###### plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --logistic --adjust --out unadj plink --nowebfile GWAS_clean4 --genome --cluster --pca 10 header plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.mds eigenvec --covar-name C1 PC1 --logistic --adjust --out C1 --nowebPC1 plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.mds eigenvec --covar-name C1PC1-C2 PC2 --logistic --adjust --out C1-C2 -PC1-nowebPC2 #### in R
broadqq &lt;-function(pvals, title)
{
&nbsp;&nbsp;&nbsp;&nbsp; observed &lt;- sort(pvals) &nbsp;&nbsp;&nbsp;&nbsp; lobs &lt;- -(log10(observed)) &nbsp;&nbsp;&nbsp;&nbsp; expected &lt;- c(1:length(observed)) &nbsp;&nbsp;&nbsp;&nbsp; lexp &lt;- -(log10(expected / (length(expected)+1))) &nbsp;&nbsp;&nbsp;&nbsp; plot(c(0,7), c(0,7), col=&#61;"red", lwd=&#61;3, type=&#61;"l", xlab=&#61;"Expected (-logP)", ylab=&#61;"Observed (-logP)", xlim=&#61;c(0,max(lobs)), ylim=&#61;c(0,max(lobs)), las=&#61;1, xaxs=&#61;"i", yaxs=&#61;"i", bty=&#61;"l", main = &#61; title) &nbsp;&nbsp;&nbsp;&nbsp; points(lexp, lobs, pch=&#61;23, cex=&#61;.4, bg=&#61;"black") } jpeg("qqplot_compare.jpeg", height=&#61;1000, width=1000&#61;500) par(mfrow=&#61;c(2,1)) aff_unadj&lt;-read.table("unadj.assoc.logistic", header=&#61;TRUE) aff_unadj.add.p&lt;-aff_unadj[aff_unadj$TEST==&#61;&#61;c("ADD"),]$P
broadqq(aff_unadj.add.p,"Some Trait Unadjusted")
aff_C1C2&lt;-read.table("C1PC1-C2PC2.assoc.logistic", header=&#61;TRUE) aff_C1C2.add.p&lt;-aff_C1C2[aff_C1C2$TEST==&#61;&#61;c("ADD"),]$P broadqq(aff_C1C2.add.p, "Some Trait Adjustedfor PC1 and PC2")
dev.off()
gws_unadj = &#61; aff_unadj[which(aff_unadj$P < &lt; 0.0000001),]
gws_unadj
gws_adjusted = &#61; aff_C1C2[which(aff_C1C2$P < &lt; 0.0000001),]
gws_adjusted
    ===RV-TDT=== ### Variant Annotation vtools init rvtdt vtools import --format vcf data/data.vcf --build hg19 vtools phenotype --from_file data/phen.txt vtools execute ANNOVAR geneanno vtools select variant "variant.region_type like '%splicing%'or variant.mut_type like 'nonsynonymous%' or variant.mut_type like 'frameshift%' or variant.mut_type like 'stop%'" -t func_variant vtools export func_variant --format tped --samples 'phenotype is not null' &gt; vat_raw.tped # set marker name as chr_pos, needs to avoid duplicate name qsort -k4 -n vat_raw.tped | awk 'BEGIN{OFS&#61;"\t";prev&#61;"None";copy&#61;1} {$2&#61;$1"_"$4; $3&#61;0; if($2&#61;&#61;prev){$2&#61;$2"_"copy; copy&#61;copy+1} else {prev&#61;$2; copy&#61;1}; print $0}' &gt; vat_export.tped vtools phenotype --out family sample_name pid mid sex phenotype &gt; vat_export.tfam vtools use refGene-hg19_20130904 vtools update func_variant --set 'maf&#61;0.001' # set the maf to be 0.001 vtools select func_variant -o chr pos refGene.name2 maf --header &gt; vat_export.anno ### Phasing Trio plink --noweb --tfile vat_export --recode12 --me 1 1 --set-me-missing --out "recode12_noME" sort -n -k1 -k6 -k2 recode12_noME.ped | sed 's/ /\t/g' | cut -f1,3,4,5 --complement &gt; linkage.ped cut -f2 recode12_noME.map | awk 'BEGIN{OFS&#61;"\t";} {print "M",$0}' | sed '1i\I\tid\nA\tDisease' &gt; linkage.dat java -Xmx10000m -jar java/linkage2beagle.jar linkage.dat linkage.ped &gt; pre_beagle.bgl python script/pre_phase.py -i pre_beagle.bgl -a pre_beagle_withMissing.bgl java -Xmx10000m -jar java/beagle.jar missing&#61;0 trios&#61;pre_beagle.bgl out&#61;bgl_phased verbose&#61;false redundant&#61;true gunzip bgl_phased.pre_beagle.bgl.phased.gz ### RV-TDT Analysis python script/post_phase.py -a vat_export.anno -b bgl_phased.pre_beagle.bgl.phased -o genes/ for g in `ls genes | grep tped | cut -d"." -f1 | head -20` do echo "running rvTDT on gene "${g} rvTDT exercise_proj -G ./genes/${g}.tped -P ./data/rvtdt.phen -M ./genes/${g}.map --adapt 500 --alpha 0.00001 --permut 2000 --lower_cutoff 0 --upper_cutoff 100 --minVariants 3 --maxMissRatio 1 done done   ===Seqspark=== hdfs dfs -put demo.vcf.bz2 hdfs dfs -put demo.tsv seqspark annotation.conf seqspark qc.conf seqspark demo.conf ===VAT=== 
vtools -h
vtools init VATDemo
head GenotypeSummary.txt
vtools output variant "max(DP)" "min(DP)" "avg(DP)" "stdev(DP)" "lower_quartile(DP)" "upper_quartile(DP)" --header
vtools select variant "filter='PASS'’PASS’" --count vtools select variant "filter='PASS'’PASS’" -o "max(DP)" "min(DP)" "avg(DP)" "stdev(DP)" "lower_quartile(DP)" "upper_quartile(DP)" --header vtools update variant --from_stat 'total’total=#(GT)' 'num’ ’num=#(alt)' 'het’ ’het=#(het)' 'hom’ ’hom=#(hom)' 'other’ ’other=#(other)' 'minDP’ ’minDP=min(DP_geno)' 'maxDP’ ’maxDP=max(DP_geno)' 'meanDP’ ’meanDP=avg(DP_geno)' 'maf’ ’maf=maf()'
vtools show fields
vtools show table variant
vtools update variant --from_stat 'totalGD10’totalGD10=#(GT)' 'numGD10’ ’numGD10=#(alt)' 'hetGD10’ ’hetGD10=#(het)' 'homGD10’ ’homGD10=#(hom)' 'otherGD10’ ’otherGD10=#(other)' 'mafGD10’ ’mafGD10=maf()' --genotypes "DP_geno &gt; 10"
vtools show fields
vtools show table variant
vtools output variant chr pos maf mafGD10 --header --limit 20
vtools phenotype --set "RACE=0" --samples "filename like 'YRI’YRI%'" vtools phenotype --set "RACE=1" --samples "filename like 'CEU’CEU%'"
vtools show samples --limit 10
vtools update variant --from_stat 'CEU_mafGD10’CEU_mafGD10=maf()' --genotypes 'DP_geno’DP_geno&gt;10' 10’ --samples "RACE=1" vtools update variant --from_stat 'YRI_mafGD10’YRI_mafGD10=maf()' --genotypes 'DP_geno’DP_geno&gt;10' 10’ --samples "RACE=0"
vtools output variant chr pos mafGD10 CEU_mafGD10 YRI_mafGD10 --header --limit 10
vtools phenotype --from_stat 'CEU_totalGD10’CEU_totalGD10=#(GT)' 'CEU_numGD10’ ’CEU_numGD10=#(alt)' --genotypes 'DP_geno’DP_geno&gt;10' 10’ --samples "RACE=1" vtools phenotype --from_stat 'YRI_totalGD10’YRI_totalGD10=#(GT)' 'YRI_numGD10’ ’YRI_numGD10=#(alt)' --genotypes 'DP_geno’DP_geno&gt;10' 10’ --samples "RACE=0" vtools phenotype --output sample_name CEU_totalGD10 CEU_numGD10 YRI_totalGD10 YRI_numGD10 sample_nameCEU_totalGD10CEU_numGD10YRI_totalGD10YRI_numGD10 --header vtools select variant 'maf&gt;=0.01' -t variant_MAFge01 'Variants that have MAF &gt;= 0.01' vtools show tables vtools execute KING --var_table variant_MAFge01 vtools_report plot_pheno_fields KING_MDS1 KING_MDS2 RACE --dot KING.mds.race.pdf --discrete_color Dark2 vtools_report plot_pheno_fields KING_MDS1 KING_MDS2 panel --dot KING.mds.panel.pdf --discrete_color Dark2
vtools execute ANNOVAR geneanno
vtools output variant chr pos ref alt mut_type --limit 20 --header
vtools remove variants to_remove -v0
vtools show tables
vtools remove genotypes "DP_geno&lt;10" -v0 <br />vtools select variant "mut_type like 'non’non%' or mut_type like 'stop’stop%' or region_type='splicing'’splicing’" -t v_funct <br />vtools show tables <br />vtools show samples --limit 5 <br />vtools select variant --samples "RACE=1" -t CEU <br />mkdir -p ceu <br /> cd ceu <br /> vtools init ceu --parent ../ --variants CEU --samples "RACE=1" --build hg19 vtools show project
vtools select variant "CEU_mafGD10&gt;=0.05" -t common_ceu
vtools select v_funct "CEU_mafGD10&lt;0.01" -t rare_ceu <br />vtools use refGene <br />vtools show annotation refGene <br />vtools associate -h <br />vtools show tests <br />vtools show test LinRegBurden <br /> vtools associate common_ceu BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db EA_CV &gt; EA_CV.asso.res
grep -i error *.log
less EA_CV.asso.res
sort -g -k7 EA_CV.asso.res | head
vtools show fields
vtools associate rare_ceu BMI --covariate SEX -m "LinRegBurden --alternative 2" -g refGene.name2 -j1 --to_db EA_RV &gt; EA_RV.asso.res grep -i error *.log | tail -2210
less EA_RV.asso.res
sort -g -k6 EA_RV.asso.res | head
vtools associate rare_ceu BMI --covariate SEX -m "VariableThresholdsQt --alternative 2 -p 100000 --adaptive 0.0005" -g refGene.name2 -j1 --to_db EA_RV &gt; EA_RV_VT.asso.res
grep -i error *.log | tail -2210
less EA_RV_VT.asso.res
sort -g -k6 EA_RV_VT.asso.res | head
vtools select rare_ceu "refGene.name2='ABCC1'’ABCC1’" -o chr pos ref alt CEU_mafGD10 numGD10 mut_type --header vtools_report plot_association qq -o QQRV -b --label_top 2 -f 6 &lt; EA_RVcd .asso.res vtools_report plot_association manhattan -o MHRV -b --label_top 5 --color Dark2 --chrom_prefix None -f 6 &lt; EA_RV.asso.res <br />vtools associate rare_ceu BMI --covariate SEX KING_MDS1 KING_MDS2 -m "LinRegBurden --name RVMDS2 --alternative 2" -g refGene.name2 -j1 --to_db EA_RV &gt; EA_RV_MDS2.asso.res vtools_report plot_association qq -o QQRV_MDS2 -b --label_top 2 -f 6 &lt; EA_RV_MDS2.asso.res <br />cd .. <br />vtools select variant --samples "RACE=0" -t YRI <br /> mkdir -p yri <br /> cd yri <br /> vtools init yri --parent ../ --variants YRI --samples "RACE=0" --build hg19 <br />vtools select variant "YRI_mafGD10&gt;=0.05" -t common_yri vtools select v_funct "YRI_mafGD10&lt;0.01" -t rare_yri <br />vtools use refGene <br />vtools associate common_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db YA_CV &gt; YA_CV.asso.res vtools associate rare_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -g refGene.name2 -j1 --to_db YA_RV &gt; YA_RV.asso.res vtools associate rare_yri BMI --covariate SEX -m "VariableThresholdsQt --alternative 2 -p 100000 --adaptive 0.0005" -g refGene.name2 -j1 --to_db YA_RV &gt; YA_RV_VT.asso.res
cd ..
vtools_report meta_analysis ceu/EA_RV_VT.asso.res yri/YA_RV_VT.asso.res --beta 5 --pval 6 --se 7 -n 2 --link 1 &gt; META_RV_VTME\ TA_RV_VT.asso.res
cut -f1,3 META_RV_VT.asso.res | head
Bureaucrat, administrator
1,252
edits