Changes

2016-july-berlin-commands

5,163 bytes removed, 12:27, 8 July 2016
/* Annotation exercise */
==Annotation exercise==
 
mkdir APOC3 && cd APOC3
vtools init APOC3
vtools import ../data/APOC3.vcf --format ../data/vcf.fmt --build hg19
vtools select 5var --output chr pos ref alt dbNSFP.SIFT_pred dbNSFP.SIFT_score
vtools select 5var --output chr pos ref alt dbNSFP.CADD_raw dbNSFP.CADD_phred
 ==GATK /IGV exercise==
java -jar GenomeAnalysisTK.jar --help<br data-attributes="%20/" />java -Xmx200m -jar GenomeAnalysisTK.jar -R human_g1k_v37.fa -T UnifiedGenotyper -glm BOTH -I child.bam -I father.bam -I mother.bam -L trio.intervals -o ug.vcf
less -S ug.vcf<br data-attributes="%20/" />java -Xmx200m -jar GenomeAnalysisTK.jar -T VariantEval -R human_g1k_v37.fa -L trio.intervals -D dbsnp.vcf -eval ug.vcf -o ug.vcf.eval<br data-attributes="%20/" />less -S ug.vcf.eval<br data-attributes="%20/" />java -Xmx200m -jar GenomeAnalysisTK.jar -R human_g1k_v37.fa -T SelectVariants -V ug.vcf -mv -mvq 0 -o ug.dnm.vcf -ped trio.ped<br data-attributes="%20/" />java -Xmx500m -jar GenomeAnalysisTK.jar -R human_g1k_v37.fa -T HaplotypeCaller -I child.bam -I father.bam -I mother.bam -minPruning 4 -L 1:199325670-199325672 -mergeVariantsViaLD -o hc.vcf
==PLINKSEQ PSEQ exercise==
Data analysis:
# === Report allele frequencies after 100 generations === #<br data-attributes="%20/" />for (s in s.s) {<br data-attributes="%20/" /> cat("s=") ; cat(s) ; cat(": ")<br data-attributes="%20/" /> freqs = get(paste("freqs.s", s, sep=""))<br data-attributes="%20/" /> cat(freqs[n.gen+1]) ; cat("\n")<br data-attributes="%20/" /> }
# === Graph allele frequency changes === #<br data-attributes="%20/" />pdf("selection_plot.pdf", paper="special", height=4*2, width=4*2, onefile=F)<br data-attributes="%20/" /> plot(x=0, y=0, type="n", xlim=c(0,n.gen), ylim=c(0,1), xlab="Generation", ylab="Allele frequency")<br data-attributes="%20/" /> lines (c(0,n.gen), rep(0.5, 2), lty=3, col="#AAAAAA")<br data-attributes="%20/" /> for (s in s.s) {<br data-attributes="%20/" /> freqs = get(paste("freqs.s", s, sep=""))<br data-attributes="%20/" /> lines(0:n.gen,freqs, col="#44AAAA")<br data-attributes="%20/" /> }<br data-attributes="%20/" />dev.off()
==<br data-attributes="%20class=%22Apple-interchange-newline%22">Regression exercise==
In R:
load("dbp.R")<br data-attributes="%20/" />ls()<br data-attributes="%20/" />dbp[1:5,]<br data-attributes="%20/" />#<br data-attributes="%20/" />result.snp12 = glm (affection ~ rs1112, family=binomial("logit"), data=dbp)<br data-attributes="%20/" />print (result.snp12)<br data-attributes="%20/" />print ( class (result.snp12) )<br data-attributes="%20/" />print ( summary(result.snp12) )<br data-attributes="%20/" />#<br data-attributes="%20/" />dev.geno = anova (result.snp12, test="Chi")<br data-attributes="%20/" />lrt.pvalue = pchisq(dev.geno[dim(dev.geno)[1],"Deviance"],<br data-attributes="%20/" /> df=2, ncp=0, FALSE)<br data-attributes="%20/" />print ( lrt.pvalue )<br data-attributes="%20/" />#<br data-attributes="%20/" />print ( summary(result.snp12)$coefficients )<br data-attributes="%20/" />snp.beta = summary(result.snp12)$coefficients[2:3,1]<br data-attributes="%20/" />print ( snp.beta )<br data-attributes="%20/" />print ( exp(snp.beta) )<br data-attributes="%20/" />ci = confint (result.snp12)<br data-attributes="%20/" />print (ci)<br data-attributes="%20/" />print ( exp(ci) )<br data-attributes="%20/" />#<br data-attributes="%20/" />snp.data = dbp[,c("affection", "rs1112")]<br data-attributes="%20/" />summary(snp.data)<br data-attributes="%20/" />snp.data[,"rs1112"]<br />summary(snp.data)<br data-attributes="%20/" />#<br data-attributes="%20/" />result.all = glm (affection ~ rs1112, family=binomial("logit"),<br data-attributes="%20/" /> data=snp.data)<br data-attributes="%20/" />dev.all = anova (result.all, test="Chi")<br data-attributes="%20/" />summary(result.all)<br data-attributes="%20/" />print(dev.all)<br data-attributes="%20/" />#<br data-attributes="%20/" />snp.data = dbp[,c("affection", "trait","sex", "age", "rs1112", "rs1117")]<br data-attributes="%20/" />summary(snp.data)<br data-attributes="%20/" />snp.data[,"rs1112"]<br />snp.data[,"rs1117"]
#<br data-attributes="%20/" />result.adj = glm (affection ~ sex + rs1112 , family=binomial("logit"),<br data-attributes="%20/" /> data=snp.data)<br data-attributes="%20/" />summary(result.adj)<br data-attributes="%20/" />#<br data-attributes="%20/" />result.adj = glm (affection ~ age + rs1112 , family=binomial("logit"),<br data-attributes="%20/" /> data=snp.data)<br data-attributes="%20/" />summary(result.adj)<br data-attributes="%20/" />#<br data-attributes="%20/" />result.adj = glm (affection ~ sex + age + rs1112, family=binomial("logit"),<br data-attributes="%20/" /> data=snp.data)<br data-attributes="%20/" />summary(result.adj)<br data-attributes="%20/" />#<br data-attributes="%20/" />result.adj = glm (affection ~ rs1117 + rs1112, family=binomial("logit"),<br data-attributes="%20/" /> data=snp.data)<br data-attributes="%20/" />summary(result.adj)<br data-attributes="%20/" />anova (result.adj, test="Chi")<br data-attributes="%20/" />result.adj = glm (affection ~ rs1112 + rs1117, family=binomial("logit"),<br data-attributes="%20/" /> data=snp.data)<br data-attributes="%20/" />summary(result.adj)<br data-attributes="%20/" />anova (result.adj, test="Chi")<br data-attributes="%20/" />#<br data-attributes="%20/" />result.adj = lm (trait ~ rs1112, data=snp.data)<br data-attributes="%20/" />summary(result.adj)<br data-attributes="%20/" />result.adj = lm (trait ~ sex + rs1112, data=snp.data)<br data-attributes="%20/" />summary(result.adj)<br data-attributes="%20/" />#<br data-attributes="%20/" />result.inter = glm (affection ~ sex * rs1112, family=binomial("logit"),<br data-attributes="%20/" /> data=snp.data)<br data-attributes="%20/" />summary(result.inter)<br data-attributes="%20/" />result.inter = glm (affection ~ age * rs1112, family=binomial("logit"),<br data-attributes="%20/" /> data=snp.data)<br data-attributes="%20/" />summary(result.inter)<br data-attributes="%20/" />#<br data-attributes="%20/" />result.inter = glm (affection ~ rs1112 * rs1117, family=binomial("logit"),<br data-attributes="%20/" /> data=snp.data)<br data-attributes="%20/" />summary(result.inter)<br data-attributes="%20/" />#<br data-attributes="%20/" />q()
==RV-TDT exercise==
for g in `ls rvtdt_exercise_data | grep tped | cut -d"." -f1`
do
echo "runing rvTDT on gene "${g}
./rvTDT exercise_proj -G ./rvtdt_exercise_data/${g}.tped \
-P ./rvtdt_exercise_data/rvtdt_exercise.phen \
-M ./rvtdt_exercise_data/${g}.map \
--adapt 500 --alpha 0.00001 --permut 2000 \
--lower_cutoff 0 --upper_cutoff 100 \
--minVariants 3 \
--maxMissRatio 1
done
 
 
==SEQPower exercise==
spower -h<br data-attributes="%20/" />spower LOGIT -h<br data-attributes="%20/" />spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --method "CFisher --name CMC" -r 100 -j 4 -l 1 -o exercise
==VAT exercise==
 
vtools -h
vtools init VATDemo
vtools remove variants to_remove -v0
vtools show tables
vtools remove genotypes "DP_geno<10" -v0 vtools select variant "mut_type like 'non%' or mut_type like 'stop%' or region_type='splicing'" -t v_funct
vtools show tables
vtools show samples --limit 5
vtools show project
vtools select variant "CEU_mafGD10>=0.05" -t common_ceu
vtools select v_funct "CEU_mafGD10<0.01" -t rare_ceu vtools use refGene
vtools show annotation refGene
vtools associate -h
vtools select rare_ceu "refGene.name2='ABCC1'" -o chr pos ref alt CEU_mafGD10 numGD10 mut_type --header
vtools_report plot_association qq -o QQRV -b --label_top 2 -f 6 < EA_RV.asso.res
vtools_report plot_association manhattan -o MHRV -b --label_top 5 --color Dark2 --chrom_prefix None -f 6 < EA_RV.asso.res vtools associate rare_ceu BMI --covariate SEX KING_MDS1 KING_MDS2 -m "LinRegBurden --name RVMDS2 --alternative 2" -g refGene.name2 -j1 --to_db EA_RV > EA_RV_MDS2.asso.res vtools_report plot_association qq -o QQRV_MDS2 -b --label_top 2 -f 6 < EA_RV_MDS2.asso.res cd ..
vtools select variant --samples "RACE=0" -t YRI
mkdir -p yri; cd yri
vtools init yri --parent ../ --variants YRI --samples "RACE=0" --build hg19
vtools select variant "YRI_mafGD10>=0.05" -t common_yri
vtools select v_funct "YRI_mafGD10<0.01" -t rare_yri vtools use refGene
vtools associate common_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db YA_CV > YA_CV.asso.res
vtools associate rare_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -g refGene.name2 -j1 --to_db YA_RV > YA_RV.asso.res
vtools_report meta_analysis ceu/EA_RV_VT.asso.res yri/YA_RV_VT.asso.res --beta 5 --pval 6 --se 7 -n 2 --link 1 > META_RV_VT.asso.res
cut -f1,3 META_RV_VT.asso.res | head==VMT==
##################################
# Example 1: Autosomal recessive #
##################################
vtools init VMT --force
vtools import AR1.vcf.gz AR2.vcf.gz --format NSHI.fmt --build hg19 -j8
vtools show tables
vtools show samples
vtools execute ANNOVAR geneanno
vtools use dbNSFP.DB
vtools use refGene
vtools show fields
vtools select variant "(ExAC_AF is NULL or ExAC_AF<0.0005) AND (ExAC_SAS_AF is NULL or ExAC_SAS_AF<0.0005)" -t ExAC0005 vtools select ExAC0005 "CADD_phred>20 or CADD_phred is NULL" -t CADD20
vtools select CADD20 "region_type is 'splicing' OR (mut_type is not NULL AND mut_type is not 'synonymous SNV' AND mut_type is not 'unknown')" -t ANNOVARtype
vtools select ANNOVARtype "dbNSFP.chr is not null" --output chr pos ref alt sift_pred lrt_pred fathmm_pred mutationtaster_pred mutationassessor_pred polyphen2_hdiv_pred polyphen2_hvar_pred provean_pred MetaLR_pred MetaSVM_pred >snv.txt
python choose_damaging_variants.py snv.txt
vtools update ANNOVARtype --format VMT_annotation.fmt --from_file snv.txt.parsed
vtools select ANNOVARtype "vmt_annotation='damaging_SNV'" -t damaging
vtools select damaging "chr=16 AND (pos>=63600000 AND pos<=79700000)" -o chr pos ref alt region_type region_name mut_type function rs_dbSNP141 ExAC_Adj_AF ExAC_SAS_AF CADD_phred FATHMM_pred LRT_pred MetaLR_pred MetaSVM_pred MutationAssessor_pred MutationTaster_pred Polyphen2_HDIV_pred Polyphen2_HVAR_pred PROVEAN_pred SIFT_pred "samples('geno_filter=GT=1')" "samples('geno_filter=GT=2')"
vtools update damaging --from_stat "totX=#(GT)" "homX=#(hom)" --samples "sample_name='L1' OR sample_name='L2'" -j2
vtools select damaging "totX=homX AND totX=2" -t homL1_L2
vtools remove fields totX homX
vtools output homL1_L2 chr pos ref alt region_type region_name mut_type function rs_dbSNP141 ExAC_Adj_AF ExAC_SAS_AF CADD_phred FATHMM_pred LRT_pred MetaLR_pred MetaSVM_pred MutationAssessor_pred MutationTaster_pred Polyphen2_HDIV_pred Polyphen2_HVAR_pred PROVEAN_pred SIFT_pred "samples('geno_filter=GT=1')" "samples('geno_filter=GT=2')"
##################################
# Example 2: Autosomal dominant #
##################################
vtools import AD.vcf.gz --format NSHI.fmt --build hg19 -j8
vtools show tables
vtools show samples
vtools select variant "(ExAC_AF is NULL or ExAC_AF<0.0005) AND (ExAC_NFE_AF is NULL or ExAC_NFE_AF<0.0005)" --samples "sample_name='AD1' OR sample_name='AD2'" -t ExAC0005_AD vtools select ExAC0005_AD "CADD_phred>15 or CADD_phred is NULL" -t CADD15_AD
vtools select CADD15_AD "dbNSFP.chr is not null" --output chr pos ref alt sift_pred lrt_pred fathmm_pred mutationtaster_pred mutationassessor_pred polyphen2_hdiv_pred polyphen2_hvar_pred provean_pred MetaLR_pred MetaSVM_pred >snv.txt
python choose_damaging_variants.py snv.txt
vtools update variant --format VMT_annotation.fmt --from_file snv.txt.parsed
vtools select CADD15_AD "vmt_annotation='damaging_SNV'" -t damaging_AD
vtools update damaging_AD --from_stat "totX=#(GT)" "hetX=#(het)" --samples "sample_name='AD1' OR sample_name='AD2'" -j2
vtools select damaging_AD "totX=hetX AND totX=2" -t hetAD_2
vtools remove fields totX hetX
vtools output hetAD_2 chr pos ref alt refGene.name2 rs_dbSNP141 ExAC_Adj_AF ExAC_SAS_AF CADD_phred FATHMM_pred LRT_pred MetaLR_pred MetaSVM_pred MutationAssessor_pred MutationTaster_pred Polyphen2_HDIV_pred Polyphen2_HVAR_pred PROVEAN_pred SIFT_pred "samples('geno_filter=GT=1')" "samples('geno_filter=GT=2')"
##################################
# Example 3: De Novo #
##################################
vtools import de_novo.vcf.gz --format NSHI.fmt --build hg19 -j8
vtools show tables
vtools show samples
vtools update variant --from_stat "totX=#(GT)" "hetX=#(het)" --samples "sample_name='Son'" -j2
vtools select variant "totX=hetX AND totX=1" -t Son_het
vtools update Son_het --from_stat "totX=#(GT)" "wtX=#(wtGT)" --samples "sample_name='Dad' OR sample_name='Mom'" -j2
vtools select Son_het "(totX=wtX AND totX=2) and ((ExAC_AF is NULL or ExAC_AF<0.0005) AND (ExAC_NFE_AF is NULL or ExAC_NFE_AF<0.0005))" -t deNovo
vtools output deNovo chr pos ref alt refGene.name2 rs_dbSNP141 ExAC_Adj_AF ExAC_NFE_AF CADD_phred FATHMM_pred LRT_pred MetaLR_pred MetaSVM_pred MutationAssessor_pred MutationTaster_pred Polyphen2_HDIV_pred Polyphen2_HVAR_pred PROVEAN_pred SIFT_pred "samples('geno_filter=GT=1')" "samples('geno_filter=GT=2')"
Bureaucrat, administrator
1,252
edits