Changes

2015-march-berlin-commands

27,943 bytes added, 20:34, 5 May 2016
Created page with "__NOTITLE__ ==Annotation exercise== mkdir APOC3 && cd APOC3 vtools init APOC3 vtools import ../data/APOC3.vcf --format ../data/vcf.fmt --build hg19 vtools select..."
__NOTITLE__

==Annotation exercise==
mkdir APOC3 && cd APOC3
vtools init APOC3
vtools import ../data/APOC3.vcf --format ../data/vcf.fmt --build hg19
vtools select variant --count
vtools use refGene
vtools show fields
vtools select variant --output chr pos ref alt refGene.name2
vtools use ../data/dbNSFP.DB
vtools show fields
vtools select variant 'id="rs76353203" or id="rs140621530" or id="rs147210663" or id="rs121918381" or id="rs121918382"' -t 5var
vtools select 5var --output chr pos ref alt id dbNSFP.GERP_RS dbNSFP.phyloP100way_vertebrate dbNSFP.phastCons100way_vertebrate
vtools select 5var --output chr pos ref alt dbNSFP.FATHMM_pred dbNSFP.FATHMM_score
vtools select 5var --output chr pos ref alt dbNSFP.Polyphen2_HDIV_pred dbNSFP.Polyphen2_HDIV_score dbNSFP.Polyphen2_HVAR_pred dbNSFP.Polyphen2_HVAR_score
vtools select 5var --output chr pos ref alt dbNSFP.LRT_pred dbNSFP.LRT_score
vtools select 5var --output chr pos ref alt dbNSFP.MutationTaster_pred
vtools select 5var --output chr pos ref alt dbNSFP.PROVEAN_pred dbNSFP.PROVEAN_score
vtools select 5var --output chr pos ref alt dbNSFP.SIFT_pred dbNSFP.SIFT_score
vtools select 5var --output chr pos ref alt dbNSFP.CADD_raw dbNSFP.CADD_phred
==GATK exercise==
java -jar GenomeAnalysisTK.jar --help<br data-attributes="%20/" />java -Xmx200m -jar GenomeAnalysisTK.jar -R human_g1k_v37.fa -T UnifiedGenotyper -glm BOTH -I child.bam -I father.bam -I mother.bam -L trio.intervals -o ug.vcf
less -S ug.vcf<br data-attributes="%20/" />java -Xmx200m -jar GenomeAnalysisTK.jar -T VariantEval -R human_g1k_v37.fa -L trio.intervals -D dbsnp.vcf -eval ug.vcf -o ug.vcf.eval<br data-attributes="%20/" />less -S ug.vcf.eval<br data-attributes="%20/" />java -Xmx200m -jar GenomeAnalysisTK.jar -R human_g1k_v37.fa -T SelectVariants -V ug.vcf -mv -mvq 0 -o ug.dnm.vcf -ped trio.ped<br data-attributes="%20/" />java -Xmx500m -jar GenomeAnalysisTK.jar -R human_g1k_v37.fa -T HaplotypeCaller -I child.bam -I father.bam -I mother.bam -minPruning 4 -L 1:199325670-199325672 -mergeVariantsViaLD -o hc.vcf
==PLINKSEQ exercise==
Data analysis:

pseq help<br data-attributes="%20/" />pseq help all<br data-attributes="%20/" />pseq myproj new-project --resources hg19<br data-attributes="%20/" />pseq myproj load-vcf --vcf CEU.exon.2010_03.genotypes.hg19.vcf.gz YRI.exon.2010_03.genotypes.hg19.vcf.gz<br data-attributes="%20/" />pseq myproj load-pheno --file phenotype.phe<br data-attributes="%20/" />pseq myproj v-view | head<br data-attributes="%20/" />pseq myproj i-view | head<br data-attributes="%20/" />pseq myproj summary <br data-attributes="%20/" />pseq myproj var-summary<br data-attributes="%20/" />pseq myproj ind-summary<br data-attributes="%20/" />pseq myproj loc-summary<br data-attributes="%20/" />pseq myproj ref-summary<br data-attributes="%20/" />pseq myproj seq-summary<br data-attributes="%20/" />pseq myproj file-summary<br data-attributes="%20/" />pseq myproj meta-summary<br data-attributes="%20/" />pseq myproj v-stats<br data-attributes="%20/" />pseq myproj i-stats | head<br data-attributes="%20/" />pseq myproj tag-file --id 1 --name CEU<br data-attributes="%20/" />pseq myproj tag-file --id 2 --name YRI<br data-attributes="%20/" />pseq myproj var-summary<br data-attributes="%20/" />pseq myproj v-freq | head<br data-attributes="%20/" />pseq myproj v-freq --mask file=CEU | head <br data-attributes="%20/" />pseq myproj v-freq --mask file=YRI | head<br data-attributes="%20/" />pseq myproj v-view --mask any.filter.ex | head<br data-attributes="%20/" />pseq myproj v-view --mask any.filter.ex | wc -l<br data-attributes="%20/" />pseq myproj v-view --mask any.filter | wc -l<br data-attributes="%20/" />pseq myproj var-set --group pass --mask any.filter.ex<br data-attributes="%20/" />pseq myproj var-summary<br data-attributes="%20/" />pseq myproj var-set --group pass_DP15 --mask include="DP>14" var=pass<br data-attributes="%20/" />pseq myproj var-summary<br data-attributes="%20/" />pseq myproj var-set --group pass_DP15_DPgeno10 --mask geno=DP:ge:11 var=pass_DP15<br data-attributes="%20/" />pseq myproj var-summary<br data-attributes="%20/" />pseq myproj var-set --group pass_DP15_DPgeno10_CEU --mask file=CEU var=pass_DP15_DPgeno10<br data-attributes="%20/" />pseq myproj var-summary<br data-attributes="%20/" />pseq myproj var-set --group pass_DP15_DPgeno10_CEU_HWE --mask hwe=5.7e-7:1 var=pass_DP15_DPgeno10_CEU<br data-attributes="%20/" />pseq myproj var-summary<br data-attributes="%20/" />pseq myproj var-set --group pass_DP15_DPgeno10_CEU_HWE_MAFgt05 --mask maf=0.05:0.5 var=pass_DP15_DPgeno10_CEU_HWE<br data-attributes="%20/" />pseq myproj var-summary<br data-attributes="%20/" />pseq myproj var-set --group pass_DP15_DPgeno10_CEU_HWE_MAFlt01 --mask "mac=1 maf=0.01" var=pass_DP15_DPgeno10_CEU_HWE<br data-attributes="%20/" />pseq myproj var-summary<br data-attributes="%20/" />pseq myproj glm --phenotype BMI --covar SEX --mask var=pass_DP15_DPgeno10_CEU_HWE_MAFgt05 > SNV_CEU.result <br data-attributes="%20/" />head SNV_CEU.result<br data-attributes="%20/" />cat SNV_CEU.result | awk '{if(FNR==1) print $0; if(NR>1) print $0 | "sort -k9"}' | grep -v "NA\s\+NA\s\+NA" | head<br data-attributes="%20/" />pseq myproj assoc --tests fw vt --phenotype BMI <br data-attributes="%20/" />pseq myproj assoc --tests skat --phenotype BMI --covar SEX --mask var=pass_DP15_DPgeno10_CEU_HWE_MAFlt01 loc.group=refseq > SKAT_CEU.result<br data-attributes="%20/" />pseq myproj assoc --tests skat --phenotype BMI --covar SEX --mask include="DP>14" geno=DP:ge:11 file=CEU hwe=5.7e-7:1 "mac=1 maf=0.01" loc.group=refseq > SKAT_CEU.result<br data-attributes="%20/" />head -20 SKAT_CEU.result<br data-attributes="%20/" />cat SKAT_CEU.result | grep SKAT | grep -v "P=NA" | sort -k6 | head -15

Exercise&nbsp;analyzing YRI samples:

pseq myproj var-set --group pass_DP15_DPgeno10_YRI --mask file=YRI var=pass_DP15_DPgeno10<br data-attributes="%20/" />pseq myproj var-summary<br data-attributes="%20/" />pseq myproj var-set --group pass_DP15_DPgeno10_YRI_HWE --mask hwe=5.7e-7:1 var=pass_DP15_DPgeno10_YRI<br data-attributes="%20/" />pseq myproj var-summary<br data-attributes="%20/" />pseq myproj var-set --group pass_DP15_DPgeno10_YRI_HWE_MAFgt05 --mask maf=0.05:0.5 var=pass_DP15_DPgeno10_YRI_HWE<br data-attributes="%20/" />pseq myproj var-summary<br data-attributes="%20/" />pseq myproj var-set --group pass_DP15_DPgeno10_YRI_HWE_MAFlt01 --mask "mac=1 maf=0.01" var=pass_DP15_DPgeno10_YRI_HWE<br data-attributes="%20/" />pseq myproj var-summary<br data-attributes="%20/" />pseq myproj glm --phenotype BMI --covar SEX --mask var=pass_DP15_DPgeno10_YRI_HWE_MAFgt05 > SNV_YRI.result<br data-attributes="%20/" />head SNV_YRI.result<br data-attributes="%20/" />cat SNV_YRI.result | awk '{if(FNR==1) print $0; if(NR>1) print $0 | "sort -k9"}' | grep -v "NA\s\+NA\s\+NA" | head<br data-attributes="%20/" />pseq myproj assoc --tests skat --phenotype BMI --covar SEX --mask include="DP>14" geno=DP:ge:11 file=YRI hwe=5.7e-7:1 "mac=1 maf=0.01" loc.group=refseq > SKAT_YRI.result<br data-attributes="%20/" />head -20 SKAT_YRI.result<br data-attributes="%20/" />cat SKAT_YRI.result | grep SKAT | grep -v "P=NA" | sort -k6 | head -15

==Popgen exercise R programs==
popgen_drift.q

#############################################################<br data-attributes="%20/" />### Michael Nothnagel, michael.nothnagel@uni-koeln.de ###<br data-attributes="%20/" />### Simulating genetic drift ###<br data-attributes="%20/" />#############################################################
<br data-attributes="%20/" />N.s = c(10, 100, 1000, 10000)<br data-attributes="%20/" />n.gen = 50<br data-attributes="%20/" />n.rep = 100
# === Simulate genetic drift === #<br data-attributes="%20/" />for (n in N.s) {<br data-attributes="%20/" /> freqs = matrix(NA, ncol=n.gen+1, nrow=n.rep)<br data-attributes="%20/" /> for (i in 1:n.rep) {<br data-attributes="%20/" /> alleles = c(rep(0,n/2), rep(1,n/2))<br data-attributes="%20/" /> freqs[i,1] = sum(alleles==1) / length(alleles)<br data-attributes="%20/" /> for (j in 1:n.gen) {<br data-attributes="%20/" /> alleles = sample (alleles, length(alleles), replace=T)<br data-attributes="%20/" /> freqs[i,j+1] = sum(alleles==1) / length(alleles)<br data-attributes="%20/" /> }<br data-attributes="%20/" /> }<br data-attributes="%20/" /> assign (paste("freqs.n", n, sep=""), freqs)<br data-attributes="%20/" /> }
summary(freqs.n10 [,n.gen+1])<br data-attributes="%20/" />summary(freqs.n100 [,n.gen+1])<br data-attributes="%20/" />summary(freqs.n1000 [,n.gen+1])<br data-attributes="%20/" />summary(freqs.n10000[,n.gen+1])
# === Graph allele frequency changes === #<br data-attributes="%20/" />pdf("drift_plot.pdf", paper="special", height=4*2, width=4*2, onefile=F)<br data-attributes="%20/" /> split.screen(c(2,2))<br data-attributes="%20/" /> screen(1)<br data-attributes="%20/" /> plot(x=0, y=0, type="n", xlim=c(0,n.gen), ylim=c(0,1), xlab="Generation", ylab="Allele frequency", main="N=10")<br data-attributes="%20/" /> lines (c(0,n.gen), rep(0.5, 2), lty=3, col="#AAAAAA")<br data-attributes="%20/" /> freqs = get(paste("freqs.n", 10 , sep=""))<br data-attributes="%20/" /> for (i in 1:n.rep) {<br data-attributes="%20/" /> lines(0:n.gen,freqs[i,], col="#44AAAA")<br data-attributes="%20/" /> }<br data-attributes="%20/" /> screen(2)<br data-attributes="%20/" /> plot(x=0, y=0, type="n", xlim=c(0,n.gen), ylim=c(0,1), xlab="Generation", ylab="Allele frequency", main="N=100")<br data-attributes="%20/" /> lines (c(0,n.gen), rep(0.5, 2), lty=3, col="#AAAAAA")<br data-attributes="%20/" /> freqs = get(paste("freqs.n", 100 , sep=""))<br data-attributes="%20/" /> for (i in 1:n.rep) {<br data-attributes="%20/" /> lines(0:n.gen,freqs[i,], col="#44AAAA")<br data-attributes="%20/" /> }<br data-attributes="%20/" /> screen(3)<br data-attributes="%20/" /> plot(x=0, y=0, type="n", xlim=c(0,n.gen), ylim=c(0,1), xlab="Generation", ylab="Allele frequency", main="N=1000")<br data-attributes="%20/" /> lines (c(0,n.gen), rep(0.5, 2), lty=3, col="#AAAAAA")<br data-attributes="%20/" /> freqs = get(paste("freqs.n", 1000 , sep=""))<br data-attributes="%20/" /> for (i in 1:n.rep) {<br data-attributes="%20/" /> lines(0:n.gen,freqs[i,], col="#44AAAA")<br data-attributes="%20/" /> }<br data-attributes="%20/" /> screen(4)<br data-attributes="%20/" /> plot(x=0, y=0, type="n", xlim=c(0,n.gen), ylim=c(0,1), xlab="Generation", ylab="Allele frequency", main="N=10000")<br data-attributes="%20/" /> lines (c(0,n.gen), rep(0.5, 2), lty=3, col="#AAAAAA")<br data-attributes="%20/" /> freqs = get(paste("freqs.n", 10000, sep=""))<br data-attributes="%20/" /> for (i in 1:n.rep) {<br data-attributes="%20/" /> lines(0:n.gen,freqs[i,], col="#44AAAA")<br data-attributes="%20/" /> }<br data-attributes="%20/" /> close.screen(all.screens=T)<br data-attributes="%20/" />dev.off()
# === Graph allele frequency after 50 generations === #<br data-attributes="%20/" />pdf("drift_hist.pdf", paper="special", height=4*2, width=4*2, onefile=F)<br data-attributes="%20/" /> split.screen(c(2,2))<br data-attributes="%20/" /> screen(1)<br data-attributes="%20/" /> hist(freqs.n10 [,n.gen+1], main="N=10", xlab="Allele frequency", xlim=c(0,1))<br data-attributes="%20/" /> screen(2)<br data-attributes="%20/" /> hist(freqs.n100 [,n.gen+1], main="N=100", xlab="Allele frequency", xlim=c(0,1))<br data-attributes="%20/" /> screen(3)<br data-attributes="%20/" /> hist(freqs.n1000 [,n.gen+1], main="N=1000", xlab="Allele frequency", xlim=c(0,1))<br data-attributes="%20/" /> screen(4)<br data-attributes="%20/" /> hist(freqs.n10000[,n.gen+1], main="N=10000", xlab="Allele frequency", xlim=c(0,1))<br data-attributes="%20/" /> close.screen(all.screens=T)<br data-attributes="%20/" />dev.off()

popgen.selection.q

#############################################################<br data-attributes="%20/" />### Michael Nothnagel, michael.nothnagel@uni-koeln.de ###<br data-attributes="%20/" />### Calculation allele frequency changes due to selection ###<br data-attributes="%20/" />#############################################################
<br data-attributes="%20/" />s.s = c(0.001, 0.01, 0.1)<br data-attributes="%20/" />h = 0.5<br data-attributes="%20/" />n.gen = 100
# === Calculate allele frequency changes === #<br data-attributes="%20/" />for (s in s.s) {<br data-attributes="%20/" /> freqs = rep(NA, n.gen+1)<br data-attributes="%20/" /> af = 0.5<br data-attributes="%20/" /> freqs[1] = 1-af<br data-attributes="%20/" /> for (j in 1:n.gen) {<br data-attributes="%20/" /> omega = 1 - 2*af*(1-af)*h*s - (1-af)*(1-af)*s<br data-attributes="%20/" /> f.het = (1-h*s)*2*af*(1-af)/omega<br data-attributes="%20/" /> f.hom = af*af/omega<br data-attributes="%20/" /> af = f.hom + f.het/2<br data-attributes="%20/" /> freqs[j+1] = 1-af<br data-attributes="%20/" /> }<br data-attributes="%20/" /> assign (paste("freqs.s", s, sep=""), freqs)<br data-attributes="%20/" /> }
# === Report allele frequencies after 100 generations === #<br data-attributes="%20/" />for (s in s.s) {<br data-attributes="%20/" /> cat("s=") ; cat(s) ; cat(": ")<br data-attributes="%20/" /> freqs = get(paste("freqs.s", s, sep=""))<br data-attributes="%20/" /> cat(freqs[n.gen+1]) ; cat("\n")<br data-attributes="%20/" /> }
# === Graph allele frequency changes === #<br data-attributes="%20/" />pdf("selection_plot.pdf", paper="special", height=4*2, width=4*2, onefile=F)<br data-attributes="%20/" /> plot(x=0, y=0, type="n", xlim=c(0,n.gen), ylim=c(0,1), xlab="Generation", ylab="Allele frequency")<br data-attributes="%20/" /> lines (c(0,n.gen), rep(0.5, 2), lty=3, col="#AAAAAA")<br data-attributes="%20/" /> for (s in s.s) {<br data-attributes="%20/" /> freqs = get(paste("freqs.s", s, sep=""))<br data-attributes="%20/" /> lines(0:n.gen,freqs, col="#44AAAA")<br data-attributes="%20/" /> }<br data-attributes="%20/" />dev.off()
==<br data-attributes="%20class=%22Apple-interchange-newline%22">Regression exercise==
In R:

load("dbp.R")<br data-attributes="%20/" />ls()<br data-attributes="%20/" />dbp[1:5,]<br data-attributes="%20/" />#<br data-attributes="%20/" />result.snp12 = glm (affection ~ rs1112, family=binomial("logit"), data=dbp)<br data-attributes="%20/" />print (result.snp12)<br data-attributes="%20/" />print ( class (result.snp12) )<br data-attributes="%20/" />print ( summary(result.snp12) )<br data-attributes="%20/" />#<br data-attributes="%20/" />dev.geno = anova (result.snp12, test="Chi")<br data-attributes="%20/" />lrt.pvalue = pchisq(dev.geno[dim(dev.geno)[1],"Deviance"],<br data-attributes="%20/" /> df=2, ncp=0, FALSE)<br data-attributes="%20/" />print ( lrt.pvalue )<br data-attributes="%20/" />#<br data-attributes="%20/" />print ( summary(result.snp12)$coefficients )<br data-attributes="%20/" />snp.beta = summary(result.snp12)$coefficients[2:3,1]<br data-attributes="%20/" />print ( snp.beta )<br data-attributes="%20/" />print ( exp(snp.beta) )<br data-attributes="%20/" />ci = confint (result.snp12)<br data-attributes="%20/" />print (ci)<br data-attributes="%20/" />print ( exp(ci) )<br data-attributes="%20/" />#<br data-attributes="%20/" />snp.data = dbp[,c("affection", "rs1112")]<br data-attributes="%20/" />summary(snp.data)<br data-attributes="%20/" />snp.data[,"rs1112"]<br />summary(snp.data)<br data-attributes="%20/" />#<br data-attributes="%20/" />result.all = glm (affection ~ rs1112, family=binomial("logit"),<br data-attributes="%20/" /> data=snp.data)<br data-attributes="%20/" />dev.all = anova (result.all, test="Chi")<br data-attributes="%20/" />summary(result.all)<br data-attributes="%20/" />print(dev.all)<br data-attributes="%20/" />#<br data-attributes="%20/" />snp.data = dbp[,c("affection", "trait","sex", "age", "rs1112", "rs1117")]<br data-attributes="%20/" />summary(snp.data)<br data-attributes="%20/" />snp.data[,"rs1112"]<br />snp.data[,"rs1117"]
#<br data-attributes="%20/" />result.adj = glm (affection ~ sex + rs1112 , family=binomial("logit"),<br data-attributes="%20/" /> data=snp.data)<br data-attributes="%20/" />summary(result.adj)<br data-attributes="%20/" />#<br data-attributes="%20/" />result.adj = glm (affection ~ age + rs1112 , family=binomial("logit"),<br data-attributes="%20/" /> data=snp.data)<br data-attributes="%20/" />summary(result.adj)<br data-attributes="%20/" />#<br data-attributes="%20/" />result.adj = glm (affection ~ sex + age + rs1112, family=binomial("logit"),<br data-attributes="%20/" /> data=snp.data)<br data-attributes="%20/" />summary(result.adj)<br data-attributes="%20/" />#<br data-attributes="%20/" />result.adj = glm (affection ~ rs1117 + rs1112, family=binomial("logit"),<br data-attributes="%20/" /> data=snp.data)<br data-attributes="%20/" />summary(result.adj)<br data-attributes="%20/" />anova (result.adj, test="Chi")<br data-attributes="%20/" />result.adj = glm (affection ~ rs1112 + rs1117, family=binomial("logit"),<br data-attributes="%20/" /> data=snp.data)<br data-attributes="%20/" />summary(result.adj)<br data-attributes="%20/" />anova (result.adj, test="Chi")<br data-attributes="%20/" />#<br data-attributes="%20/" />result.adj = lm (trait ~ rs1112, data=snp.data)<br data-attributes="%20/" />summary(result.adj)<br data-attributes="%20/" />result.adj = lm (trait ~ sex + rs1112, data=snp.data)<br data-attributes="%20/" />summary(result.adj)<br data-attributes="%20/" />#<br data-attributes="%20/" />result.inter = glm (affection ~ sex * rs1112, family=binomial("logit"),<br data-attributes="%20/" /> data=snp.data)<br data-attributes="%20/" />summary(result.inter)<br data-attributes="%20/" />result.inter = glm (affection ~ age * rs1112, family=binomial("logit"),<br data-attributes="%20/" /> data=snp.data)<br data-attributes="%20/" />summary(result.inter)<br data-attributes="%20/" />#<br data-attributes="%20/" />result.inter = glm (affection ~ rs1112 * rs1117, family=binomial("logit"),<br data-attributes="%20/" /> data=snp.data)<br data-attributes="%20/" />summary(result.inter)<br data-attributes="%20/" />#<br data-attributes="%20/" />q()
==SEQPower exercise==

spower -h<br data-attributes="%20/" />spower LOGIT -h<br data-attributes="%20/" />spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --method "CFisher --name CMC" -r 100 -j 4 -l 1 -o exercise
<br data-attributes="%20/" />spower show exercise.csv<br data-attributes="%20/" />spower show exercise.csv power*<br data-attributes="%20/" />spower show exercise.loci.csv<br data-attributes="%20/" />spower show exercise.loci.csv maf<br data-attributes="%20/" />spower show tests<br data-attributes="%20/" />spower show test SKAT
<br data-attributes="%20/" />spower LOGIT Kryukov2009European1800.sfs --def_rare 0.01 --def_neutral -0.00001 0.00001 --moi A --proportion_detrimental 1 --proportion_protective 0 --OR_rare_detrimental 1.5 --OR_common_detrimental 1 --baseline_effect 0.01 --sample_size 1000 --p1 0.5 --limit 1 --alpha 0.05 --method "KBAC --name K1 --mafupper 0.01 --maflower 0 --alternative 1 --moi additive --permutations 1000 --adaptive 0.1" --replicates 1000 --jobs 4 -o exercise
<br data-attributes="%20/" />spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.2 --ORmax_rare_detrimental 3.0 --method CFisher -r 100 -j 4 -l 1 -o exercise
<br data-attributes="%20/" />spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.2 --ORmax_rare_detrimental 3.0 --method CFisher -r 100 -j 4 -l 1 -o exercise
<br data-attributes="%20/" />spower show exercise.loci.csv effect*
<br data-attributes="%20/" />spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.2 --ORmax_rare_detrimental 3.0 --proportion_detrimental 0.8 --method CFisher -r 100 -j 4 -l 1 -o exercise
<br data-attributes="%20/" />spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --missing_sites 0.05 --method CFisher -r 100 -j 4 -l 1 -o exercise
<br data-attributes="%20/" />spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --missing_sites 0.05 --method CFisher -r 100 -j 4 -l 1 -o exercise
<br data-attributes="%20/" />spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --missing_low_maf 0.000125 --method CFisher -r 100 -j 4 -l 1 -o exercise
<br data-attributes="%20/" />spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --missing_low_maf 0.000125 --method CFisher -r 100 -j 4 -l 1 -o exercise
<br data-attributes="%20/" />spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --method "CFisher --alternative 1 --name CMC" "KBAC --permutations 1000 --alternative 1" "WSSRankTest --alternative 1 --name WSS" "VTtest --alternative 1 --permutations 1000" "SKAT disease" -r 100 -j 4 -l 1 -o exercise<br data-attributes="%20/" /><br data-attributes="%20/" />spower LNR Kryukov2009European1800.sfs --sample_size 1000 --meanshift_rare_detrimental 0.2 --method "CollapseQt --name CMC --alternative 2" -r 100 -j 4 -l 1 -o exercise
<br data-attributes="%20/" />spower LNR Kryukov2009European1800.sfs --sample_size 1000 --meanshift_rare_detrimental 0.2 --meanshiftmax_rare_detrimental 0.5 --method "CollapseQt --alternative 2" -r 100 -j 4 -l 1 -o exercise
<br data-attributes="%20/" />spower ELNR Kryukov2009European1800.sfs --sample_size 1000 --meanshift_rare_detrimental 0.2 --QT_thresholds 0.4 0.6 --method "CollapseQt --alternative 2" -r 100 -j 4 -l 1 -o exercise
<br data-attributes="%20/" />spower ELNR Kryukov2009European1800.sfs --sample_size 1000 --p1 0.5 --meanshift_rare_detrimental 0.5 --QT_thresholds 0.4 0.6 --method "CollapseQt --alternative 2" -r 100 -j 4 -l 1 -o exercise
<br data-attributes="%20/" />spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --method "GroupWrite ExerciseSimulation" -j 4 -o exercise -v1
<br data-attributes="%20/" />spower show exercise.SEQPowerDB LOGIT method power title --condition "where power between 0.25 and 0.95"<br data-attributes="%20/" />
<br data-attributes="%20/" />for i in 1 1.5 2 2.5 3 3.5 4; do<br data-attributes="%20/" />spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental $i --method "CFisher --name CMC$i" --title FixedOR$i -r 100 -j 4 -l 1 -o exercise2<br data-attributes="%20/" />done
==VAT exercise==

vtools -h
vtools init VATDemo
vtools import *.vcf.gz --var_info DP filter --geno_info DP_geno --build hg18 -j1
vtools liftover hg19
head phenotypes.csv
vtools phenotype --from_file phenotypes.csv --delimiter ","
vtools show project
vtools show tables
vtools show table variant
vtools show samples
vtools show genotypes
vtools show fields
vtools select variant --count
vtools show genotypes > GenotypeSummary.txt
head GenotypeSummary.txt
vtools output variant "max(DP)" "min(DP)" "avg(DP)" "stdev(DP)" "lower_quartile(DP)" "upper_quartile(DP)" --header
vtools select variant "filter='PASS'" --count
vtools select variant "filter='PASS'" -o "max(DP)" "min(DP)" "avg(DP)" "stdev(DP)" "lower_quartile(DP)" "upper_quartile(DP)" --header
vtools update variant --from_stat 'total=#(GT)' 'num=#(alt)' 'het=#(het)' 'hom=#(hom)' 'other=#(other)' 'minDP=min(DP_geno)' 'maxDP=max(DP_geno)' 'meanDP=avg(DP_geno)' 'maf=maf()'
vtools show fields
vtools show table variant
vtools update variant --from_stat 'totalGD10=#(GT)' 'numGD10=#(alt)' 'hetGD10=#(het)' 'homGD10=#(hom)' 'otherGD10=#(other)' 'mafGD10=maf()' --genotypes "DP_geno > 10"
vtools show fields
vtools show table variant
vtools output variant chr pos maf mafGD10 --header --limit 20
vtools phenotype --set "RACE=0" --samples "filename like 'YRI%'"
vtools phenotype --set "RACE=1" --samples "filename like 'CEU%'"
vtools show samples --limit 10
vtools update variant --from_stat 'CEU_mafGD10=maf()' --genotypes 'DP_geno>10' --samples "RACE=1"
vtools update variant --from_stat 'YRI_mafGD10=maf()' --genotypes 'DP_geno>10' --samples "RACE=0"
vtools output variant chr pos mafGD10 CEU_mafGD10 YRI_mafGD10 --header --limit 10
vtools phenotype --from_stat 'CEU_totalGD10=#(GT)' 'CEU_numGD10=#(alt)' --genotypes 'DP_geno>10' --samples "RACE=1"
vtools phenotype --from_stat 'YRI_totalGD10=#(GT)' 'YRI_numGD10=#(alt)' --genotypes 'DP_geno>10' --samples "RACE=0"
vtools phenotype --output sample_name CEU_totalGD10 CEU_numGD10 YRI_totalGD10 YRI_numGD10 --header
vtools select variant 'maf>=0.01' -t variant_MAFge01 'Variants that have MAF >= 0.01'
vtools show tables
vtools execute KING --var_table variant_MAFge01
vtools_report plot_pheno_fields KING_MDS1 KING_MDS2 RACE --dot KING.mds.race.pdf --discrete_color Dark2
vtools_report plot_pheno_fields KING_MDS1 KING_MDS2 panel --dot KING.mds.panel.pdf --discrete_color Dark2
vtools execute ANNOVAR geneanno
vtools output variant chr pos ref alt mut_type --limit 20 --header
vtools_report trans_ratio variant -n num
vtools_report trans_ratio variant -n numGD10
vtools select variant "DP<15" -t to_remove
vtools show tables
vtools remove variants to_remove -v0
vtools show tables
vtools remove genotypes "DP_geno<10" -v0 <br />vtools select variant "mut_type like 'non%' or mut_type like 'stop%' or region_type='splicing'" -t v_funct <br />vtools show tables <br />vtools show samples --limit 5 <br />vtools select variant --samples "RACE=1" -t CEU <br />mkdir -p ceu
cd ceu
vtools init ceu --parent ../ --variants CEU --samples "RACE=1" --build hg19
vtools show project
vtools select variant "CEU_mafGD10>=0.05" -t common_ceu
vtools select v_funct "CEU_mafGD10<0.01" -t rare_ceu <br />vtools use refGene <br />vtools show annotation refGene <br />vtools associate -h <br />vtools show tests <br />vtools show test LinRegBurden
vtools associate common_ceu BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db EA_CV > EA_CV.asso.res
grep -i error *.log
less EA_CV.asso.res
sort -g -k7 EA_CV.asso.res | head
vtools show fields
vtools associate rare_ceu BMI --covariate SEX -m "LinRegBurden --alternative 2" -g refGene.name2 -j1 --to_db EA_RV > EA_RV.asso.res
grep -i error *.log | tail -22
less EA_RV.asso.res
sort -g -k6 EA_RV.asso.res | head
vtools associate rare_ceu BMI --covariate SEX -m "VariableThresholdsQt --alternative 2 -p 100000 --adaptive 0.0005" -g refGene.name2 -j1 --to_db EA_RV > EA_RV_VT.asso.res
grep -i error *.log | tail -22
less EA_RV_VT.asso.res
sort -g -k6 EA_RV_VT.asso.res | head
vtools select rare_ceu "refGene.name2='ABCC1'" -o chr pos ref alt CEU_mafGD10 numGD10 mut_type --header
vtools_report plot_association qq -o QQRV -b --label_top 2 -f 6 < EA_RV.asso.res
vtools_report plot_association manhattan -o MHRV -b --label_top 5 --color Dark2 --chrom_prefix None -f 6 < EA_RV.asso.res <br />vtools associate rare_ceu BMI --covariate SEX KING_MDS1 KING_MDS2 -m "LinRegBurden --name RVMDS2 --alternative 2" -g refGene.name2 -j1 --to_db EA_RV > EA_RV_MDS2.asso.res
vtools_report plot_association qq -o QQRV_MDS2 -b --label_top 2 -f 6 < EA_RV_MDS2.asso.res <br />cd .. <br />vtools select variant --samples "RACE=0" -t YRI<br />mkdir -p yri<br />cd yri
vtools init yri --parent ../ --variants YRI --samples "RACE=0" --build hg19
vtools select variant "YRI_mafGD10>=0.05" -t common_yri
vtools select v_funct "YRI_mafGD10<0.01" -t rare_yri <br />vtools use refGene vtools associate common_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db YA_CV > YA_CV.asso.res
vtools associate rare_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -g refGene.name2 -j1 --to_db YA_RV > YA_RV.asso.res
vtools associate rare_yri BMI --covariate SEX -m "VariableThresholdsQt --alternative 2 -p 100000 --adaptive 0.0005" -g refGene.name2 -j1 --to_db YA_RV > YA_RV_VT.asso.res
cd ..
vtools_report meta_analysis ceu/EA_RV_VT.asso.res yri/YA_RV_VT.asso.res --beta 5 --pval 6 --se 7 -n 2 --link 1 > META_RV_VT.asso.res
cut -f1,3 META_RV_VT.asso.res | head
Bureaucrat, administrator
1,252
edits