AdvGeneMap2018Commands

ANNOVAR

table_annovar.pl
table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out APOC3_Gene.vcf -remove -nastring . -protocol refGene -operation g -vcfinput
cat APOC3_Gene.vcf.hg19_multianno.txt
table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out APOC3_Gene.vcf -remove -nastring . -protocol refGene,knownGene,ensGene -operation g,g,g -arg '-splicing 
12 -exonicsplicing','-splicing 12 -exonicsplicing','-splicing 12 -exonicsplicing' -vcfinput
awk -F'\t' '{print $1,$2,$6,$7,$8,$9,$10}' APOC3_Gene.vcf.hg19_multianno.txt
table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out APOC3_Region.vcf -remove -nastring . -protocol phastConsElements46way -operation r -vcfinput
table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out APOC3_Region.vcf -remove -nastring . -protocol gwasCatalog -operation r -vcfinput
table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out APOC3_Filter.vcf -remove -nastring . -protocol 
gnomad_genome,gnomad_exome,popfreq_max_20150413,gme,avsnp150,dbnsfp33a,dbscsnv11,cadd13gt20,clinvar_20170905,gwava -operation f,f,f,f,f,f,f,f,f,f -vcfinput
awk -F'\t' '{print $1,$2,$103,$104}' APOC3_Filter.vcf.hg19_multianno.txt
awk -F'\t' '{print $1,$2,$6,$14}' APOC3_Filter.vcf.hg19_multianno.txt
awk -F'\t' '{print $1,$2,$15,$16,$17,$18,$19,$20,$21,$22}' APOC3_Filter.vcf.hg19_multianno.txt
awk -F'\t' '{print $1,$2,$36,$86,$70}' APOC3_Filter.vcf.hg19_multianno.txt
awk -F'\t' '{print $1,$2,$99,$100}' APOC3_Filter.vcf.hg19_multianno.txt
table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out APOC3_ANN.vcf -remove -nastring . -protocol 
refGene,knownGene,ensGene,wgRna,targetScanS,phastConsElements46way,tfbsConsSites,gwasCatalog,gnomad_genome,gnomad_exome,popfreq_max_20150413,gme,avsnp150,dbnsfp33a,dbscsnv11,cadd13gt20,clinvar_20170905,gwava -operation g,g,g,r,r,r,r,r,f,f,f,f,f,f,f,f,f,f -arg '-splicing 12 -exonicsplicing','-splicing 12 -exonicsplicing','-splicing 12 -exonicsplicing',,,,,,,,,,,,,,, -vcfinput

GeneABEL

plink --file GWAS_clean4 --pheno pheno.phen --pheno-name Aff --transpose --recode --out gwa_gabel --noweb
plink --file GWAS_clean4 --pheno pheno.phen --pheno-name systolic --transpose --recode --out gwa_gabel_qtl --noweb
R
library(GenABEL)
convert.snp.tped(tped = "gwa_gabel_qtl.tped", tfam = "gwa_gabel_qtl.tfam", out = "gwa_gabel_qtl.raw", strand = "u")
g.dat <- load.gwaa.data(phen = "gwa_gabel_qtl.praw", gen = "gwa_gabel_qtl.raw", force = T)
slotNames(g.dat)
slotNames(g.dat@gtdata)
colnames(g.dat@phdata)
sample.size <- g.dat@gtdata@nids
snps.total <- g.dat@gtdata@nsnps
print(c(sample.size, snps.total))
summary(g.dat@phdata$disease)
hist(g.dat@phdata$disease, main="Quantitative Phenotype data summary", xlab = "Systolic pressure", freq = F,breaks=20, col="gray")
rug(g.dat@phdata$disease)
test.snp <- scan.glm('disease ~ CRSNP', family = gaussian(), data = g.dat)
names(test.snp)  
alpha <- 5e-8
test.snp$snpnames[test.snp$P1df < alpha]
test.snp$P1df[test.snp$P1df < alpha]
test.qt <- qtscore(disease, data = g.dat, trait = "gaussian")
slotNames(test.qt)
names(test.qt@results)
head(results(test.qt))
test.qt@lambda
descriptives.scan(test.qt)
row.names(results(test.qt))[results(test.qt)$P1df < alpha]
results(test.qt)$P1df[results(test.qt)$P1df < alpha] results(test.qt)$Pc1df[results(test.qt)$Pc1df < alpha]
obs <- sort(results(test.qt)$P1df) 
ept <- ppoints(obs) 
plot(-log10(ept), -log10(obs), main = "GWAS QQ plot, qtl", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")
abline(0, 1, col = "red")
abline(h = 8, lty = 2)
plot(test.qt, col = "black")
test.qt.sex <- qtscore(disease ~ sex, data = g.dat, trait = "gaussian")
row.names(results(test.qt.sex))[results(test.qt)$P1df < alpha]
summary(lm(disease ~ sex, data = g.dat))
convert.snp.tped(tped = "gwa_gabel.tped", tfam = "gwa_gabel.tfam", out = "gwa_gabel.raw", strand = "u")
b.dat <- load.gwaa.data(phen = "gwa_gabel.praw", gen = "gwa_gabel.raw", force = T)
slotNames(b.dat)
slotNames(b.dat@gtdata)
colnames(b.dat@phdata)
b.dat@gtdata@nids
case.size <- length(which(b.dat@phdata$disease == 1))
control.size <- length(which(b.dat@phdata$disease == 0))
case.size 
control.size 
snpsb.total <- b.dat@gtdata@nsnps
testb.snp <- scan.glm('disease ~ CRSNP', family = binomial(), data = b.dat)
names(testb.snp)  
alpha <- 5e-8
testb.snp$snpnames[testb.snp$P1df < alpha]
testb.snp$P1df[testb.snp$P1df < alpha]
testb.qt <- qtscore(disease, data = b.dat, trait = "binomial")
slotNames(testb.qt)
descriptives.scan(testb.qt)
row.names(results(testb.qt))[results(testb.qt)$P1df < alpha]
results(testb.qt)$P1df[results(testb.qt)$P1df < alpha]
results(testb.qt)$Pc1df[results(testb.qt)$Pc1df < alpha]  
gkin <- ibs(g.dat, weight = "freq")
gkin[1:10,1:10]
cps.full <- cmdscale(as.dist(.5 - gkin), eig = T, k = 10)
names(cps.full)
cps <- cps.full$points
plot(cps[,1], cps[,2], pch = g.dat@phdata$popn)
legend("topright", c("TSI","MEX", "CEU"), pch = c(1,2,3))       
colnames(cps)<-c('C1','C2','C3','C4','C5','C6','C7','C8','C9','C10') 
gpc.dat <- g.dat
gpc.dat@phdata<-cbind(g.dat@phdata, cps)
test.pc.a <- scan.glm('disease ~ CRSNP + C1 + C2 + C3 + C4 + C5', family=gaussian(), data = gpc.dat)
test.pc.a$snpnames[test.pc.a$P1df < alpha]
test.pc.a$P1df[test.pc.a$P1df < alpha]
test.pc.b <- qtscore(disease ~  C1 + C2 + C3 + C4 + C5, data = gpc.dat, trait = "gaussian") 
test.pc.b@lambda
plot(cps.full$eig[1:10]/sum(cps.full$eig), axes = F, type = "b", xlab = "Components",  ylim = c(0,0.05), ylab = "Proportion of Variations", main = "MDS analysis scree plot") 
axis(1, 1:10)
axis(2)
plot(cumsum(cps.full$eig[1:10])/sum(cps.full$eig), axes = F, type = "b", ylim = c(0,0.2), xlab = "Components", ylab = "Proportion of Variations", main = "MDS analysis cumulative plot") 
axis(1, 1:10)
axis(2)
row.names(results(test.qt))[results(test.qt)$Pc1df < alpha]
results(test.qt)$Pc1df[results(test.qt)$Pc1df < alpha]
test.qt@lambda
obs <- sort(results(test.qt)$chi2.1df)
ept <- sort(qchisq(ppoints(obs), df = 1)) 
plot(ept, obs, main = "Genomic control (lambda = slope of the dashed line)", xlab="Expected chisq, 1df", ylab="Observed chisq, 1df")
abline(0, 1, col = "red")
abline(0, test.qt@lambda[1], lty = 2)
median(results(test.qt)$chi2.1df)/0.456
obs <- sort(results(test.qt)$Pc1df)
ept <- ppoints(obs) 
plot(-log10(ept), -log10(obs), main = "GWAS QQ plot adj. via Genomic Control", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")
abline(0, 1, col = "red")
abline(h = 8, lty = 2) 
adj.gkin = gkin
diag(adj.gkin) = hom(g.dat)$Var
test.eg <- egscore(disease, data = g.dat, kin = adj.gkin, naxes = 2)
descriptives.scan(test.eg)
snp.eg <- row.names(results(test.eg))[results(test.eg)$P1df < alpha]
pvalue.eg <- results(test.eg)$P1df[results(test.eg)$P1df < alpha] lambda.eg <- test.eg@lambda snp.eg  pvalue.eg lambda.eg
for (k in 1:10){
 test.tmp <- egscore(disease, data = g.dat, kin = adj.gkin, naxes = k)
print(test.tmp@lambda$estimate)
}
obs <- sort(results(test.eg)$Pc1df)
ept <- ppoints(obs) 
plot(-log10(ept), -log10(obs), main = "GWAS QQ plot adj. w/ EIGENSTRAT", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")
abline(0, 1, col = "red")
abline(h = 8, lty = 2)
plot(test.qt, col = "black")
add.plot(test.eg, col = "gray", pch = 3)
legend("topright", c("Original plot","After correction w/ EIGENSTRAT"), pch = c(1,3))==GWAS Data QC==

plink --file GWAS --noweb
plink --file GWAS --mind 0.10 --recode --out GWAS_clean_mind --noweb
plink --file GWAS_clean_mind --maf 0.05 --recode --out MAF_greater_5 --noweb
plink --file GWAS_clean_mind --exclude MAF_greater_5.map --recode --out MAF_less_5 --noweb
plink --file MAF_greater_5 --geno 0.05 --recode --out MAF_greater_5_clean --noweb
plink --file MAF_less_5 --geno 0.01 --recode --out MAF_less_5_clean --noweb
plink --file MAF_greater_5_clean --merge MAF_less_5_clean.ped MAF_less_5_clean.map --recode --out GWAS_MAF_clean --noweb
plink --file GWAS_MAF_clean --mind 0.03 --recode --out GWAS_clean2 --noweb
plink --file GWAS_clean2 --check-sex --out GWAS_sex_checking --noweb
R
sexcheck = read.table("GWAS_sex_checking.sexcheck", header=T)
names(sexcheck)
sex_problem = sexcheck[which(sexcheck$STATUS=="PROBLEM"),]
sex_problem
q()
plink --file GWAS_clean2 --genome --out duplicates --noweb
R
dups = read.table("duplicates.genome", header = T)
problem_pairs = dups[which(dups$PI_HAT > 0.4),]
problem_pairs
problem_pairs = dups[which(dups$PI_HAT > 0.05),]
myvars = c("FID1", "IID1", "FID2", "IID2", "PI_HAT")
problem_pairs[myvars]
q()
plink --file GWAS_clean2 --remove IBS_excluded.txt --recode --out GWAS_clean3 --noweb
plink --file GWAS_clean3 --het --noweb
R
Dataset <- read.table("plink.het", header=TRUE, sep="", na.strings="NA", dec=".",
strip.white=TRUE)
mean(Dataset$F)
sd(Dataset$F)
jpeg("hist.jpeg", height=1000, width=1000)
hist(scale(Dataset$F), xlim=c(-4,4))
dev.off()
q()
plink --file GWAS_clean3 --pheno pheno.txt --pheno-name Aff --hardy --noweb
R
hardy = read.table("plink.hwe", header = T)
names(hardy)
hwe_prob = hardy[which(hardy$P < 0.0000009),]
hwe_prob
q()
plink --file GWAS_clean3 --exclude HWE_out.txt --recode --out GWAS_clean4 --noweb==GWAS Control Substructure==
plink --file GWAS_clean4 --genome --mds-plot 10 --noweb
R
mydata = read.table("mds_components.txt", header=T)
mydata$pch[mydata$Group==1 ] <-15
mydata$pch[mydata$Group==2 ] <-16
mydata$pch[mydata$Group==3 ] <-2
jpeg("mds.jpeg", height=1000, width=1000)
plot(mydata$C1, mydata$C2 ,pch=mydata$pch)
dev.off()
q()
plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --logistic --adjust --out unadj --noweb
plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.mds --covar-name C1 --logistic --adjust --out C1 --noweb
plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.mds --covar-name C1-C2 --logistic --adjust --out C1-C2 --noweb
R
broadqq <-function(pvals, title)
{
    observed <- sort(pvals)
    lobs <- -(log10(observed))
    expected <- c(1:length(observed))
    lexp <- -(log10(expected / (length(expected)+1)))
    plot(c(0,7), c(0,7), col="red", lwd=3, type="l", xlab="Expected (-logP)", ylab="Observed (-logP)", xlim=c(0,max(lobs)), ylim=c(0,max(lobs)), las=1, xaxs="i", yaxs="i", bty="l", main = title)
    points(lexp, lobs, pch=23, cex=.4, bg="black") }
jpeg("qqplot_compare.jpeg", height=1000, width=1000)
par(mfrow=c(2,1))
aff_unadj<-read.table("unadj.assoc.logistic", header=TRUE)
aff_unadj.add.p<-aff_unadj[aff_unadj$TEST==c("ADD"),]$P
broadqq(aff_unadj.add.p,"Some Trait Unadjusted")
aff_C1C2<-read.table("C1-C2.assoc.logistic", header=TRUE)
aff_C1C2.add.p<-aff_C1C2[aff_C1C2$TEST==c("ADD"),]$P
broadqq(aff_C1C2.add.p, "Some Trait Adjusted")
dev.off()
gws_unadj = aff_unadj[which(aff_unadj$P < 0.0000001),]
gws_unadj
gws_adjusted = aff_C1C2[which(aff_C1C2$P < 0.0000001),]
gws_adjusted
q()

VAT

vtools -h
vtools init VATDemo
vtools import *.vcf.gz --var_info DP filter --geno_info DP_geno --build hg18 -j1
vtools liftover hg19
head phenotypes.csv
vtools phenotype --from_file phenotypes.csv --delimiter ","
vtools show project
vtools show tables
vtools show table variant
vtools show samples
vtools show genotypes
vtools show fields
vtools select variant --count
vtools show genotypes > GenotypeSummary.txt
head GenotypeSummary.txt
vtools output variant "max(DP)" "min(DP)" "avg(DP)" "stdev(DP)" "lower_quartile(DP)" "upper_quartile(DP)" --header
vtools select variant "filter='PASS'" --count
vtools select variant "filter='PASS'" -o "max(DP)" "min(DP)" "avg(DP)" "stdev(DP)" "lower_quartile(DP)" "upper_quartile(DP)" --header
vtools update variant --from_stat 'total=#(GT)' 'num=#(alt)' 'het=#(het)' 'hom=#(hom)' 'other=#(other)' 'minDP=min(DP_geno)' 'maxDP=max(DP_geno)' 'meanDP=avg(DP_geno)' 'maf=maf()'
vtools show fields
vtools show table variant
vtools update variant --from_stat 'totalGD10=#(GT)' 'numGD10=#(alt)' 'hetGD10=#(het)' 'homGD10=#(hom)' 'otherGD10=#(other)' 'mafGD10=maf()' --genotypes "DP_geno > 10"
vtools show fields
vtools show table variant
vtools output variant chr pos maf mafGD10 --header --limit 20
vtools phenotype --set "RACE=0" --samples "filename like 'YRI%'"
vtools phenotype --set "RACE=1" --samples "filename like 'CEU%'"
vtools show samples --limit 10
vtools update variant --from_stat 'CEU_mafGD10=maf()' --genotypes 'DP_geno>10' --samples "RACE=1"
vtools update variant --from_stat 'YRI_mafGD10=maf()' --genotypes 'DP_geno>10' --samples "RACE=0"
vtools output variant chr pos mafGD10 CEU_mafGD10 YRI_mafGD10 --header --limit 10
vtools phenotype --from_stat 'CEU_totalGD10=#(GT)' 'CEU_numGD10=#(alt)' --genotypes 'DP_geno>10' --samples "RACE=1"
vtools phenotype --from_stat 'YRI_totalGD10=#(GT)' 'YRI_numGD10=#(alt)' --genotypes 'DP_geno>10' --samples "RACE=0"
vtools phenotype --output sample_name CEU_totalGD10 CEU_numGD10 YRI_totalGD10 YRI_numGD10 --header
vtools select variant 'maf>=0.01' -t variant_MAFge01 'Variants that have MAF >= 0.01'
vtools show tables
vtools execute KING --var_table variant_MAFge01
vtools_report plot_pheno_fields KING_MDS1 KING_MDS2 RACE --dot KING.mds.race.pdf --discrete_color Dark2
vtools_report plot_pheno_fields KING_MDS1 KING_MDS2 panel --dot KING.mds.panel.pdf --discrete_color Dark2
vtools execute ANNOVAR geneanno
vtools output variant chr pos ref alt mut_type --limit 20 --header
vtools_report trans_ratio variant -n num
vtools_report trans_ratio variant -n numGD10
vtools select variant "DP<15" -t to_remove
vtools show tables
vtools remove variants to_remove -v0
vtools show tables
vtools remove genotypes "DP_geno<10" -v0  
vtools select variant "mut_type like 'non%' or mut_type like 'stop%' or region_type='splicing'" -t v_funct  
vtools show tables  
vtools show samples --limit 5  
vtools select variant --samples "RACE=1" -t CEU  
mkdir -p ceu 
cd ceu 
vtools init ceu --parent ../ --variants CEU --samples "RACE=1" --build hg19
vtools show project
vtools select variant "CEU_mafGD10>=0.05" -t common_ceu
vtools select v_funct "CEU_mafGD10<0.01" -t rare_ceu  
vtools use refGene  
vtools show annotation refGene  
vtools associate -h  
vtools show tests  
vtools show test LinRegBurden 
vtools associate common_ceu BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db EA_CV > EA_CV.asso.res
grep -i error *.log
less EA_CV.asso.res
sort -g -k7 EA_CV.asso.res | head
vtools show fields
vtools associate rare_ceu BMI --covariate SEX -m "LinRegBurden --alternative 2" -g refGene.name2 -j1 --to_db EA_RV > EA_RV.asso.res
grep -i error *.log | tail -22
less EA_RV.asso.res
sort -g -k6 EA_RV.asso.res | head
vtools associate rare_ceu BMI --covariate SEX -m "VariableThresholdsQt --alternative 2 -p 100000 --adaptive 0.0005" -g refGene.name2 -j1 --to_db EA_RV > EA_RV_VT.asso.res
grep -i error *.log | tail -22
less EA_RV_VT.asso.res
sort -g -k6 EA_RV_VT.asso.res | head
vtools select rare_ceu "refGene.name2='ABCC1'" -o chr pos ref alt CEU_mafGD10 numGD10 mut_type --header
vtools_report plot_association qq -o QQRV -b --label_top 2 -f 6 < EA_RV.asso.res
vtools_report plot_association manhattan -o MHRV -b --label_top 5 --color Dark2 --chrom_prefix None -f 6 < EA_RV.asso.res 
vtools associate rare_ceu BMI --covariate SEX KING_MDS1 KING_MDS2 -m "LinRegBurden --name RVMDS2 --alternative 2" -g refGene.name2 -j1 --to_db EA_RV > EA_RV_MDS2.asso.res
vtools_report plot_association qq -o QQRV_MDS2 -b --label_top 2 -f 6 < EA_RV_MDS2.asso.res  
cd ..  
vtools select variant --samples "RACE=0" -t YRI 
mkdir -p yri 
cd yri 
vtools init yri --parent ../ --variants YRI --samples "RACE=0" --build hg19 
vtools select variant "YRI_mafGD10>=0.05" -t common_yri
vtools select v_funct "YRI_mafGD10<0.01" -t rare_yri  
vtools use refGene  
vtools associate common_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db YA_CV > YA_CV.asso.res
vtools associate rare_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -g refGene.name2 -j1 --to_db YA_RV > YA_RV.asso.res
vtools associate rare_yri BMI --covariate SEX -m "VariableThresholdsQt --alternative 2 -p 100000 --adaptive 0.0005" -g refGene.name2 -j1 --to_db YA_RV > YA_RV_VT.asso.res
cd ..
vtools_report meta_analysis ceu/EA_RV_VT.asso.res yri/YA_RV_VT.asso.res --beta 5 --pval 6 --se 7 -n 2 --link 1 > META_RV_VT.asso.res
cut -f1,3 META_RV_VT.asso.res | head

Personal tools

Search

Namespaces

Views

Actions

Widgets

Widgets

Recent changes

Wanted pages

Who is online?

Tools

AdvGeneMap2018Commands

From Statistical Genetics Courses

ANNOVAR

GeneABEL

VAT

Navigation menu

Short Courses

Software

Course Materials