Difference between revisions of "AdvGeneMap2018Commands"

Latest revision as of 19:46, 23 January 2018

table_annovar.pl
table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out APOC3_Gene.vcf -remove -nastring . -protocol refGene -operation g -vcfinput
cat APOC3_Gene.vcf.hg19_multianno.txt
table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out APOC3_Gene.vcf -remove -nastring . -protocol refGene,knownGene,ensGene -operation g,g,g -arg '-splicing 12 -exonicsplicing','-splicing 12 -exonicsplicing','-splicing 12 -exonicsplicing' -vcfinput
awk -F'\t' '{print $1,$2,$6,$7,$8,$9,$10}' APOC3_Gene.vcf.hg19_multianno.txt
table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out APOC3_Region.vcf -remove -nastring . -protocol phastConsElements46way -operation r -vcfinput
table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out APOC3_Region.vcf -remove -nastring . -protocol gwasCatalog -operation r -vcfinput
table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out APOC3_Filter.vcf -remove -nastring . -protocol gnomad_genome,gnomad_exome,popfreq_max_20150413,gme,avsnp150,dbnsfp33a,dbscsnv11,cadd13gt20,clinvar_20170905,gwava -operation f,f,f,f,f,f,f,f,f,f -vcfinput
awk -F'\t' '{print $1,$2,$103,$104}' APOC3_Filter.vcf.hg19_multianno.txt
awk -F'\t' '{print $1,$2,$6,$14}' APOC3_Filter.vcf.hg19_multianno.txt
awk -F'\t' '{print $1,$2,$15,$16,$17,$18,$19,$20,$21,$22}' APOC3_Filter.vcf.hg19_multianno.txt
awk -F'\t' '{print $1,$2,$36,$86,$70}' APOC3_Filter.vcf.hg19_multianno.txt
awk -F'\t' '{print $1,$2,$99,$100}' APOC3_Filter.vcf.hg19_multianno.txt
table_annovar.pl APOC3.vcf humandb/ -buildver hg19 -out APOC3_ANN.vcf -remove -nastring . -protocol refGene,knownGene,ensGene,wgRna,targetScanS,phastConsElements46way,tfbsConsSites,gwasCatalog,gnomad_genome,gnomad_exome,popfreq_max_20150413,gme,avsnp150,dbnsfp33a,dbscsnv11,cadd13gt20,clinvar_20170905,gwava -operation g,g,g,r,r,r,r,r,f,f,f,f,f,f,f,f,f,f -arg '-splicing 12 -exonicsplicing','-splicing 12 -exonicsplicing','-splicing 12 -exonicsplicing',,,,,,,,,,,,,,, -vcfinput

GenABEL

# Load files
library(GenABEL)
convert.snp.tped(tped = "gwa_gabel_qtl.tped", tfam = "gwa_gabel_qtl.tfam", out = "gwa_gabel_qtl.raw", strand = "u")
g.dat <- load.gwaa.data(phen = "gwa_gabel_qtl.praw", gen = "gwa_gabel_qtl.raw", force = T)
slotNames(g.dat)
slotNames(g.dat@gtdata)
colnames(g.dat@phdata)
# sample size
sample.size <- g.dat@gtdata@nids
# number of SNPs
snps.total <- g.dat@gtdata@nsnps
print(c(sample.size, snps.total)) 
# Trait
summary(g.dat@phdata$disease)
hist(g.dat@phdata$disease, main="Quantitative Phenotype data summary", xlab = "Systolic pressure measure", freq = F,breaks=20, col="gray")
rug(g.dat@phdata$disease) 
###
# tests for association
###
# GLM test
test.snp <- scan.glm('disease ~ CRSNP', family = gaussian(), data = g.dat)
names(test.snp)
alpha <- 5e-8  
test.snp$snpnames[test.snp$P1df < alpha]
test.snp$P1df[test.snp$P1df < alpha]
# Score test
test.qt <- qtscore(disease, data = g.dat, trait = "gaussian")
slotNames(test.qt)
names(test.qt@results)
test.qt@lambda
descriptives.scan(test.qt)
rownames(results(test.qt))[results(test.qt)$P1df < alpha]
results(test.qt)$P1df[results(test.qt)$P1df < alpha] 
results(test.qt)$Pc1df[results(test.qt)$Pc1df < alpha]
# QQ plot
obs <- sort(results(test.qt)$P1df) 
ept <- c(1:length(obs)) / (length(obs) + 1) 
plot(-log10(ept), -log10(obs), main = "GWAS QQ plot, qtl", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")
abline(0, 1, col = "red")
abline(h = 8, lty = 2)
# Manhattan plot        
plot(test.qt, col = "black")
# Adding confounders
test.qt.sex <- qtscore(disease ~ sex, data = g.dat, trait = "gaussian")
rownames(results(test.qt.sex))[results(test.qt)$P1df < alpha]
summary(lm(disease ~ sex, data = g.dat))
###
# MDS
###
gkin <- ibs(g.dat, weight = "freq")
gkin[1:10,1:10]
cps.full <- cmdscale(as.dist(.5 - gkin), eig = T, k = 10)
names(cps.full) 
cps <- cps.full$points 
plot(cps[,1], cps[,2], pch = g.dat@phdata$popn)
legend(-0.16, 0.06, c("TSI","MEX", "CEU"), pch = c(1,2,3))  
###
# Corrected test
###
# Incorporating PCs as predictors
colnames(cps)<-c('C1','C2','C3','C4','C5','C6','C7','C8','C9','C10') 
gpc.dat <- g.dat
gpc.dat@phdata<-cbind(g.dat@phdata, cps)
test.pc.a <- scan.glm('disease ~ CRSNP + C1 + C2 + C3 + C4 + C5', family=gaussian(), data = gpc.dat) 
test.pc.a$snpnames[test.pc.a$P1df < alpha]
test.pc.a$P1df[test.pc.a$P1df < alpha]
test.pc.b <- qtscore(disease ~  C1 + C2 + C3 + C4 + C5, data = gpc.dat, trait = "gaussian")
test.pc.b@lambda
# scree plot
plot(cps.full$eig[1:10]/sum(cps.full$eig), axes = F, type = "b", xlab = "Components",  ylim = c(0,0.05), ylab = "Proportion of Variations", main = "MDS analysis scree plot") 
axis(1, 1:10)
axis(2)
# cumulative plot
plot(cumsum(cps.full$eig[1:10])/sum(cps.full$eig), axes = F, type = "b", ylim = c(0,0.2), xlab = "Components", ylab = "Proportion of Variations", main = "MDS analysis cumulative plot") 
axis(1, 1:10)
axis(2)
# Genomic control
# Uncorrected GIF
test.qt@lambda 
# Corrected p-value
row.names(results(test.qt))[results(test.qt)$Pc1df < alpha]
results(test.qt)$Pc1df[results(test.qt)$Pc1df < alpha]
# Check for inflation of statistic 
obs <- sort(results(test.qt)$chi2.1df)
ept <- sort(qchisq(1:length(obs) / (length(obs) + 1), df = 1)) 
plot(ept, obs, main = "Genomic control (slope is the inflation factor)", xlab="Expected chisq, 1df", ylab="Observed chisq, 1df")
abline(0, 1, col = "red")
abline(0, test.qt@lambda[1], lty = 2)
# Definition of GIF
# Conventional definition
median(results(test.qt)$chi2.1df)/0.456
# GenABEL definition
lm(obs~ept)$coef[2]
# QQ plot
obs <- sort(results(test.qt)$Pc1df)
ept <- c(1:length(obs)) / (length(obs) + 1)
plot(-log10(ept), -log10(obs), main = "GWAS QQ plot adj. via Genomic Control", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")
abline(0, 1, col = "red")
abline(h = 8, lty = 2)
# EIGENSTRAT
adj.gkin = gkin
diag(adj.gkin) = hom(g.dat)$Var
# naxes = 3 is default value
test.eg <- egscore(disease, data = g.dat, kin = adj.gkin, naxes = 2)
descriptives.scan(test.eg)
snp.eg <- row.names(results(test.eg))[results(test.eg)$P1df < alpha]
pvalue.eg <- results(test.eg)$P1df[results(test.eg)$P1df < alpha]
lambda.eg <- test.eg@lambda
snp.eg 
pvalue.eg
lambda.eg
# Change #PCs
for (k in 1:10){ 
test.tmp <- egscore(disease, data = g.dat, kin = adj.gkin, naxes = k)
print(test.tmp@lambda$estimate)
}
# QQ plot
obs <- sort(results(test.eg)$Pc1df)
ept <- c(1:length(obs)) / (length(obs) + 1) 
qqplot(-log10(ept), -log10(obs), main = "GWAS QQ plot adj. w/ EIGENSTRAT", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")
abline(0, 1, col = "red")
abline(h = 8, lty = 2)
# Manhattan plot comparison
plot(test.qt, col = "black")
add.plot(test.eg, col = "gray", pch = 3)
legend("topright", c("Original plot","After correction w/ EIGENSTRAT"), pch = c(1,3))  
###
# Basic test, binary trait
###
# load files to GenABEL
convert.snp.tped(tped = "gwa_gabel.tped", tfam = "gwa_gabel.tfam", out = "gwa_gabel.raw", strand = "u")
b.dat <- load.gwaa.data(phen = "gwa_gabel.praw", gen = "gwa_gabel.raw", force = T)
slotNames(b.dat)
slotNames(b.dat@gtdata)
colnames(b.dat@phdata)
# sample size
b.dat@gtdata@nids
# number of cases and controls
case.size <- length(which(b.dat@phdata$disease == 1))
control.size <- length(which(b.dat@phdata$disease == 0))
case.size 
control.size 
# number of SNPs
snpsb.total <- b.dat@gtdata@nsnps
# GLM test
testb.snp <- scan.glm('disease ~ CRSNP', family = binomial(), data = b.dat)
names(testb.snp)  
alpha <- 5e-8
testb.snp$snpnames[testb.snp$P1df < alpha]
testb.snp$P1df[testb.snp$P1df < alpha]
# Score test
testb.qt <- qtscore(disease, data = b.dat, trait = "binomial")
slotNames(testb.qt)
descriptives.scan(testb.qt)
row.names(results(testb.qt))[results(testb.qt)$P1df < alpha]
results(testb.qt)$P1df[results(testb.qt)$P1df < alpha] 
results(testb.qt)$Pc1df[results(testb.qt)$Pc1df < alpha]

GxG Interaction

./plink --noweb --ped simcasecon.ped --map simcasecon.map --assoc
./plink --noweb --ped simcasecon.ped --map simcasecon.map --fast-epistasis
./plink --noweb --ped simcasecon.ped --map simcasecon.map --fast-epistasis --case-only
./plink --noweb --ped simcasecon.ped --map simcasecon.map --epistasis
./plink --noweb --ped simcasecon.ped --map simcasecon.map --recodeA --out recoded
./plink --noweb --ped simcasecon.ped --map simcasecon.map --make-bed --out cassiformat
R
# The following commands are in the R environment
je <-read.table("cassi.out", header=T)
je
library(ORMDR)
recoded<-read.table("recoded.raw", header=T)
head(recoded)
newdata<-recoded[7:106]
ormdrdata<-cbind(newdata,recoded$PHENOTYPE-1)
names(ormdrdata)[101]<-"casestatus"
head(ormdrdata)
mdr1<-mdr.c(ormdrdata, colresp=101, cs=1, combi=1, cv.fold = 10)
mdr1$min.comb
mdr2<-mdr.c(ormdrdata, colresp=101, cs=1, combi=2, cv.fold = 10)
mdr2$min.comb
mdr3<-mdr.c(ormdrdata, colresp=101, cs=1, combi=3, cv.fold = 10)
mdr3$min.comb
mdr1$test.erate
mdr2$test.erate
mdr3$test.erate
mdr1mean<-mean(mdr1$test.erate)
mdr2mean<-mean(mdr2$test.erate)
mdr3mean<-mean(mdr3$test.erate)
mdr1mean
mdr2mean
mdr3mean
mdr2$best.combi
mdr2$min.comb
mdr3$best.combi
mdr3$min.comb
logreg12<-glm(casestatus ~ factor(snp1_2)*factor(snp2_1), family=binomial,
data=ormdrdata)
summary(logreg12)
anova(logreg12)
pchisq(701.68,4,lower.tail=F)
pchisq(703.82,8,lower.tail=F)
logreg345<-glm(casestatus ~ factor(snp3_2)*factor(snp4_2)*factor(snp5_2),
family=binomial, data=ormdrdata)
summary(logreg345)
anova(logreg345)
pchisq(45.6,8,lower.tail=F)
q()
### The following commands are in the linux shell
./BEAM3 beam3data.txt -o beam3results
./BEAM3 beam3data.txt -o beam3results -T 10

Plink - Part 1 - Data QC

plink --file GWAS
plink --file GWAS --mind 0.10 --recode --out GWAS_clean_mind
plink --file GWAS_clean_mind --maf 0.05 --recode --out MAF_greater_5
plink --file GWAS_clean_mind --exclude MAF_greater_5.map --recode --out MAF_less_5
plink --file MAF_greater_5 --geno 0.05 --recode --out MAF_greater_5_clean
plink --file MAF_less_5 --geno 0.01 --recode --out MAF_less_5_clean
plink --file MAF_greater_5_clean --merge MAF_less_5_clean.ped MAF_less_5_clean.map --recode --out GWAS_MAF_clean
plink --file GWAS_MAF_clean --mind 0.03 --recode --out GWAS_clean2
plink --file GWAS_clean2 --check-sex --out GWAS_sex_checking
#### in R - open R by simply typing R
setwd("to_your_working_directory/")
sexcheck = read.table("GWAS_sex_checking.sexcheck", header=T)
names(sexcheck)
sex_problem = sexcheck[which(sexcheck$STATUS=="PROBLEM"),]
sex_problem
q()
##################################
plink --file GWAS_clean2 --genome --out duplicates
#### in R
setwd("to_your_working_directory/")
dups = read.table("duplicates.genome", header = T)
problem_pairs = dups[which(dups$PI_HAT > 0.4),]
problem_pairs
problem_pairs = dups[which(dups$PI_HAT > 0.05),]
myvars = c("FID1", "IID1", "FID2", "IID2", "PI_HAT")
problem_pairs[myvars]
q()
######
plink --file GWAS_clean2 --remove IBS_excluded.txt --recode --out GWAS_clean3
plink --file GWAS_clean3 --het
###### in R
Dataset <- read.table("plink.het", header=TRUE, sep="", na.strings="NA", dec=".", strip.white=TRUE)
mean(Dataset$F)
sd(Dataset$F)
jpeg("hist.jpeg", height=1000, width=1000)
hist(scale(Dataset$F), xlim=c(-4,4))
dev.off()
q()
######
plink --file GWAS_clean3 --pheno pheno.txt --pheno-name Aff --hardy 
##### in R
hardy = read.table("plink.hwe", header = T)
names(hardy)
hwe_prob = hardy[which(hardy$P < 0.0000009),]
hwe_prob
q()
##########
plink --file GWAS_clean3 --exclude HWE_out.txt --recode --out GWAS_clean4

Plink - Part 2 - Controlling for Substructure

plink --file GWAS_clean4 --genome --cluster --mds-plot 10
#### in R
mydata = read.table("mds_components.txt", header=T)
mydata$pch[mydata$Group==1 ] <-15
mydata$pch[mydata$Group==2 ] <-16
mydata$pch[mydata$Group==3 ] <-2
jpeg("mds.jpeg", height=500, width=500)
plot(mydata$C1, mydata$C2 ,pch=mydata$pch)
dev.off()
q()
######
plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --logistic --adjust --out unadj
plink --file GWAS_clean4 --genome --cluster --pca 10 header
plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.eigenvec --covar-name PC1 --logistic --adjust --out PC1
plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.eigenvec --covar-name PC1-PC2 --logistic --adjust --out PC1-PC2
#### in R
broadqq <-function(pvals, title)
{
    observed <- sort(pvals)
    lobs <- -(log10(observed))
    expected <- c(1:length(observed))
    lexp <- -(log10(expected / (length(expected)+1)))
    plot(c(0,7), c(0,7), col="red", lwd=3, type="l", xlab="Expected (-logP)", ylab="Observed (-logP)", xlim=c(0,max(lobs)), ylim=c(0,max(lobs)), las=1, xaxs="i", yaxs="i", bty="l", main = title)
    points(lexp, lobs, pch=23, cex=.4, bg="black") }
jpeg("qqplot_compare.jpeg", height=1000, width=500)
par(mfrow=c(2,1))
aff_unadj<-read.table("unadj.assoc.logistic", header=TRUE)
aff_unadj.add.p<-aff_unadj[aff_unadj$TEST==c("ADD"),]$P
broadqq(aff_unadj.add.p,"Some Trait Unadjusted")
aff_C1C2<-read.table("PC1-PC2.assoc.logistic", header=TRUE)
aff_C1C2.add.p<-aff_C1C2[aff_C1C2$TEST==c("ADD"),]$P
broadqq(aff_C1C2.add.p, "Some Trait Adjusted for PC1 and PC2")
dev.off()
gws_unadj = aff_unadj[which(aff_unadj$P < 0.0000001),]
gws_unadj
gws_adjusted = aff_C1C2[which(aff_C1C2$P < 0.0000001),]
gws_adjusted

RV-TDT

### Variant Annotation
vtools init rvtdt
vtools import --format vcf data/data.vcf --build hg19
vtools phenotype --from_file data/phen.txt
vtools execute ANNOVAR geneanno
vtools select variant "variant.region_type like '%splicing%'or variant.mut_type like 'nonsynonymous%' or variant.mut_type like 'frameshift%' or variant.mut_type like 'stop%'" -t func_variant
vtools export func_variant --format tped --samples 'phenotype is not null' > vat_raw.tped
# set marker name as chr_pos, needs to avoid duplicate name
sort -k4 -n vat_raw.tped | awk 'BEGIN{OFS="\t";prev="None";copy=1} {$2=$1"_"$4; $3=0; if($2==prev) {$2=$2"_"copy; copy=copy+1} else {prev=$2; copy=1}; print $0}' > vat_export.tped
vtools phenotype --out family sample_name pid mid sex phenotype > vat_export.tfam
vtools use refGene-hg19_20130904
vtools update func_variant --set 'maf=0.001' # set the maf to be 0.001
vtools select func_variant -o chr pos refGene.name2 maf --header > vat_export.anno

### Phasing Trio
plink --noweb --tfile vat_export --recode12 --me 1 1 --set-me-missing --out "recode12_noME"
sort -n -k1 -k6 -k2 recode12_noME.ped | sed 's/ /\t/g' | cut -f1,3,4,5 --complement > linkage.ped cut -f2 recode12_noME.map | awk 'BEGIN{OFS="\t";} {print "M",$0}' | sed '1i\I\tid\nA\tDisease' > linkage.dat
java -Xmx10000m -jar java/linkage2beagle.jar linkage.dat linkage.ped > pre_beagle.bgl
python script/pre_phase.py -i pre_beagle.bgl -a pre_beagle_withMissing.bgl
java -Xmx10000m -jar java/beagle.jar missing=0 trios=pre_beagle.bgl out=bgl_phased verbose=false redundant=true
gunzip bgl_phased.pre_beagle.bgl.phased.gz

### RV-TDT Analysis
python script/post_phase.py -a vat_export.anno -b bgl_phased.pre_beagle.bgl.phased -o genes/
for g in `ls genes | grep tped | cut -d"." -f1 | head -20`
do
    echo "running rvTDT on gene "${g}
    rvTDT exercise_proj -G ./genes/${g}.tped -P ./data/rvtdt.phen -M ./genes/${g}.map --adapt 500 --alpha 0.00001 --permut 2000 --lower_cutoff 0 --upper_cutoff 100 --minVariants 3 --maxMissRatio 1 done
done

Seqspark

hdfs dfs -put demo.vcf.bz2
hdfs dfs -put demo.tsv
seqspark annotation.conf
seqspark qc.conf
seqspark demo.conf

VAT

vtools -h
vtools init VATDemo
vtools import *.vcf.gz --var_info DP filter --geno_info DP_geno --build hg18 -j1
vtools liftover hg19
head phenotypes.csv
vtools phenotype --from_file phenotypes.csv --delimiter ","
vtools show project
vtools show tables
vtools show table variant
vtools show samples
vtools show genotypes
vtools show fields
vtools select variant --count
vtools show genotypes > GenotypeSummary.txt
head GenotypeSummary.txt
vtools output variant "max(DP)" "min(DP)" "avg(DP)" "stdev(DP)" "lower_quartile(DP)" "upper_quartile(DP)" --header
vtools select variant "filter=’PASS’" --count
vtools select variant "filter=’PASS’" -o "max(DP)" "min(DP)" "avg(DP)" "stdev(DP)" "lower_quartile(DP)" "upper_quartile(DP)" --header
vtools update variant --from_stat ’total=#(GT)’ ’num=#(alt)’ ’het=#(het)’ ’hom=#(hom)’ ’other=#(other)’ ’minDP=min(DP_geno)’ ’maxDP=max(DP_geno)’ ’meanDP=avg(DP_geno)’ ’maf=maf()’
vtools show fields
vtools show table variant
vtools update variant --from_stat ’totalGD10=#(GT)’ ’numGD10=#(alt)’ ’hetGD10=#(het)’ ’homGD10=#(hom)’ ’otherGD10=#(other)’ ’mafGD10=maf()’ --genotypes "DP_geno > 10"
vtools show fields
vtools show table variant
vtools output variant chr pos maf mafGD10 --header --limit 20
vtools phenotype --set "RACE=0" --samples "filename like ’YRI%’"
vtools phenotype --set "RACE=1" --samples "filename like ’CEU%’"
vtools show samples --limit 10
vtools update variant --from_stat ’CEU_mafGD10=maf()’ --genotypes ’DP_geno>10’ --samples "RACE=1"
vtools update variant --from_stat ’YRI_mafGD10=maf()’ --genotypes ’DP_geno>10’ --samples "RACE=0"
vtools output variant chr pos mafGD10 CEU_mafGD10 YRI_mafGD10 --header --limit 10
vtools phenotype --from_stat ’CEU_totalGD10=#(GT)’ ’CEU_numGD10=#(alt)’ --genotypes ’DP_geno>10’ --samples "RACE=1"
vtools phenotype --from_stat ’YRI_totalGD10=#(GT)’ ’YRI_numGD10=#(alt)’ --genotypes ’DP_geno>10’ --samples "RACE=0"
vtools phenotype --output sample_nameCEU_totalGD10CEU_numGD10YRI_totalGD10YRI_numGD10 --header
vtools execute ANNOVAR geneanno
vtools output variant chr pos ref alt mut_type --limit 20 --header
vtools_report trans_ratio variant -n num
vtools_report trans_ratio variant -n numGD10
vtools select variant "DP<15" -t to_remove
vtools show tables
vtools remove variants to_remove -v0
vtools show tables
vtools remove genotypes "DP_geno<10" -v0
vtools select variant "mut_type like ’non%’ or mut_type like ’stop%’ or region_type=’splicing’" -t v_funct 
vtools show tables
vtools show samples --limit 5
vtools select variant --samples "RACE=1" -t CEU
mkdir -p ceu
cd ceu
vtools init ceu --parent ../ --variants CEU --samples "RACE=1" --build hg19 vtools show project
vtools select variant "CEU_mafGD10>=0.05" -t common_ceu
vtools select v_funct "CEU_mafGD10<0.01" -t rare_ceu
vtools use refGene
vtools show annotation refGene
vtools associate -h
vtools show tests
vtools show test LinRegBurden
vtools associate common_ceu BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db EA_CV > EA_CV.asso.res
grep -i error *.log
less EA_CV.asso.res
sort -g -k7 EA_CV.asso.res | head
vtools show fields
vtools associate rare_ceu BMI --covariate SEX -m "LinRegBurden --alternative 2" -g refGene.name2 -j1 --to_db EA_RV  > EA_RV.asso.res
grep -i error *.log | tail -10
less EA_RV.asso.res
sort -g -k6 EA_RV.asso.res | head
vtools associate rare_ceu BMI --covariate SEX -m "VariableThresholdsQt --alternative 2 -p 100000 --adaptive 0.0005" -g refGene.name2 -j1 --to_db EA_RV > EA_RV_VT.asso.res
grep -i error *.log | tail -10
less EA_RV_VT.asso.res
sort -g -k6 EA_RV_VT.asso.res | head
vtools select rare_ceu "refGene.name2=’ABCC1’" -o chr pos ref alt CEU_mafGD10 numGD10 mut_type --header
cd ..
vtools select variant --samples "RACE=0" -t YRI
mkdir -p yri
cd yri
vtools init yri --parent ../ --variants YRI --samples "RACE=0" --build hg19 vtools select variant "YRI_mafGD10>=0.05" -t common_yri vtools select v_funct "YRI_mafGD10<0.01" -t rare_yri
vtools use refGene
vtools associate common_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db YA_CV > YA_CV.asso.res
vtools associate rare_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -g refGene.name2 -j1 --to_db YA_RV > YA_RV.asso.res vtools associate rare_yri BMI --covariate SEX -m "VariableThresholdsQt --alternative 2 -p 100000 --adaptive 0.0005" -g refGene.name2 -j1 --to_db YA_RV > YA_RV_VT.asso.res
cd ..
vtools_report meta_analysis ceu/EA_RV_VT.asso.res yri/YA_RV_VT.asso.res --beta 5 --pval 6 --se 7 -n 2 --link 1 > ME\ TA_RV_VT.asso.res
cut -f1,3 META_RV_VT.asso.res | head

Personal tools

Search

Namespaces

Views

Actions

Widgets

Widgets

Recent changes

Wanted pages

Who is online?

Tools

Difference between revisions of "AdvGeneMap2018Commands"

From Statistical Genetics Courses

Latest revision as of 19:46, 23 January 2018

Contents

Functional Annotation

GenABEL

GxG Interaction

Plink - Part 1 - Data QC

Plink - Part 2 - Controlling for Substructure

RV-TDT

Seqspark

VAT

Navigation menu

Short Courses

Software

Course Materials