Changes

2016-genetic-association-commands

6,207 bytes added, 19:24, 19 May 2017
==GeneABEL==
  plink --file GWAS_clean4 --pheno pheno.phen --pheno-name Aff --transpose --recode --out gwa_gabel--noweb plink --file GWAS_clean4 --pheno pheno.phen --pheno-name systolic --transpose --recode --out gwa_gabel_qtl--noweb
R
library(GenABEL)
cps <- cps.full$points
plot(cps[,1], cps[,2], pch = g.dat@phdata$popn)
legend(”topright”"topright", c("TSI","MEX", "CEU"), pch = c(1,2,3))
colnames(cps)<-c('C1','C2','C3','C4','C5','C6','C7','C8','C9','C10')
gpc.dat <- g.dat
plot(test.qt, col = "black")
add.plot(test.eg, col = "gray", pch = 3)
legend("topright", c("Original plot","After correction w/ EIGENSTRAT"), pch = c(1,3))==Imputation exercise==
==Imputation exercise== plink --file chr22_imputation_ex--noweb plink --file chr22_imputation_ex --maf 0.01 --mind 0.02 --geno 0.05 --hwe 0.001 --out qc_check--noweb plink --file chr22_imputation_ex --maf 0.01 --mind 0.02 --geno 0.05 --hwe 0.001 --recode --out chr22_clean1--noweb plink --file chr22_clean1 --maf 0.01 --mind 0.02 --geno 0.05 --hwe 0.001 --out qc_check_2--noweb plink --file chr22_clean1 --filter-cases --hwe 0.001 --recode --out chr22_cases_clean--noweb plink --file chr22_clean1 --filter-controls --recode --out chr22_controls_clean--noweb plink --file chr22_controls_clean --merge chr22_cases_clean.ped chr22_cases_clean.map -–hwe -hwe 0.001 --recode --out chr22_all_clean--noweb plink --file chr22_all_clean --logistic --out chr22_all_clean_geno--noweb
R
mydata = read.table(“chr22_all_clean_geno"chr22_all_clean_geno.assoc.logistic”logistic", header=T)
names(mydata)
plot(mydata$BP, -log10(mydata$P))
smallp
q()
mach1 --hapmapFormat -d chr22_mach_merlin.map -p chr22_mach_merlin.ped --haps genotypes_chr22_CEU_r22_nr.b36_fwd.phase.gz --snps genotypes_chr22_CEU_r22_nr.b36_fwd_legend.txt.gz --greedy --rounds 100 --mle --mldetails --autoflip -o chr22_HIHII plink --dosage chr22_HIHII_dose_mach4plink.txt.gz Zin --fam chr22_imputation_ex.fam --map chr22_imputed_snps_positions.map --out chr22_HIHII_dosage--noweb
R
dosage = read.table("chr22_HIHII_dosage.assoc.dosage", header= T)
names(dosage)
plot(dosage$BP, -log10(dosage$P))
dosagep = dosage[which(dosage$P < 5E-8),] dosagep = dosagep[order(dosagep$BP),] dosagep
interest = dosage[which(dosage$SNP=='rs715586'),]
interest
 
 
==PLINK_R==
Introduction
city[c(1,5:6)]
population[3]
population["Oslo"]
population[c("Berlin","Rome")]
population
capital
GWAS Data QC
cd plink/exercise/ plink --file GWAS--noweb plink --file GWAS --mind 0.10 --recode --out GWAS_clean_mind--noweb plink --file GWAS_clean_mind --maf 0.05 --recode --out MAF_greater_5--noweb plink --file GWAS_clean_mind --exclude MAF_greater_5.map --recode --out MAF_less_5--noweb plink --file MAF_greater_5 --geno 0.05 --recode --out MAF_greater_5_clean--noweb plink --file MAF_less_5 --geno 0.01 --recode --out MAF_less_5_clean--noweb plink --file MAF_greater_5_clean --merge MAF_less_5_clean.ped MAF_less_5_clean.map --recode --out GWAS_MAF_clean--noweb plink --file GWAS_MAF_clean --mind 0.03 --recode --out GWAS_clean2--noweb plink --file GWAS_clean2 --check-sex --out GWAS_sex_checking --noweb
R
plink –-file GWAS_clean2 –-check-sex –-out GWAS_sex_checking
sexcheck = read.table("GWAS_sex_checking.sexcheck", header=T)
names(sexcheck)
sex_problem
q()
plink --file GWAS_clean2 --genome --out duplicates--noweb
R
dups = read.table(“duplicates"duplicates.genome”genome", header = T)
problem_pairs = dups[which(dups$PI_HAT > 0.4),]
problem_pairs
problem_pairs[myvars]
q()
plink --file GWAS_clean2 --remove IBS_excluded.txt --recode --out GWAS_clean3--noweb plink --file GWAS_clean3 --het--noweb
R
Dataset &lt;- read.table("plink.het", header=TRUE, sep="", na.strings="NA", dec=".",
dev.off()
q()
plink --file GWAS_clean3 --pheno pheno.txt --pheno-name Aff --hardy--noweb
R
hardy = read.table(“plink"plink.hwe”hwe", header = T)
names(hardy)
hwe_prob = hardy[which(hardy$P < 0.0000009),]
hwe_prob
q()
plink --file GWAS_clean3 --exclude HWE_out.txt --recode --out GWAS_clean4--noweb
MultifactorialPart 1
plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.add --logistic --noweb
plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.sexage.add --logistic --sex --covar dbp.age.pheno --noweb
plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.snp1112.add --logistic --condition rs1112 --noweb
plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.snp1117.add --logistic --condition rs1117--noweb
plink --ped dbp.qt.ped --map dbp.map --map3 --out linreg.sex.add --linear --sex --noweb
plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.sex.inter.add --logistic --sex --interaction --noweb
plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.snp1112.inter.add --logistic --condition rs1112 --interaction --noweb
R
load("dbp.R")
ls()
dbp[1:5,]
result.snp12 = glm (affection ~ rs1112, family=binomial("logit"), data=dpbdbp)
print (result.snp12)
print ( class (result.snp12) )
print (ci)
print ( exp(ci) )
snp.data = dpbdbp[,c("affection", "rs1112")] summary(snp.data) snp.data[,"rs1112"]<-as.numeric(snp.data[,"rs1112"])-1
summary(snp.data)
snp.data[,"rs1112"] summary(snp.data) result.all = glm (affection ~ rs1112, family=binomial("logit"), data=snp.data) dev.all = anova (result.all, test="Chi")
summary(result.all)
print(dev.all)
snp.data = dpbdbp[,c("affection", "trait","sex", "age", "rs1112", "rs1117")]
summary(snp.data)
snp.data[,"rs1112"] <-as.numeric(snp.data[,"rs1112"])-1 snp.data[,"rs1117"]<-as.numeric(snp.data[,"rs1117"] )-1 result.adj = glm (affection ~ sex + rs1112 , family=binomial("logit"), data=snp.data)
summary(result.adj)
result.adj = glm (affection ~ age + rs1112 , family=binomial("logit"), data=snp.data)
result.adj = lm (trait ~ sex + rs1112, data=snp.data)
summary(result.adj)
 
Multifactorial Part 2
 
plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.sex.inter.add --logistic --sex --interaction --noweb
plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.snp1112.inter.add --logistic --condition rs1112 --interaction --noweb
R<br />load("dbp.R")<br />ls()<br />dbp[1:5,]<br />summary(dbp)
result.inter = glm (affection ~ sex * rs1112, family=binomial("logit"), data=snp.data)
summary(result.inter)
result.reg = glm (affection ~ sex + age + rs1112 + rs1117, family=binomial("logit"), data=snp.data)
summary(result.reg)
modelchoice.result <-step(result.reg)<br />summary(modelchoice.result) q()
 GWAS Control Substructure
plink --file GWAS_clean4 --genome --mds-plot 10--noweb
R
mydata = read.table("mds_components.txt", header=T)
dev.off()
q()
plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --logistic --adjust --out unadj--noweb plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.mds --covar-name C1 --logistic --adjust --out C1--noweb plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.mds --covar-name C1-C2 --logistic --adjust --out C1-C2--noweb
R
broadqq &lt;-function(pvals, title)
adj.p.values
rownames(adj.p.values$adjp) = names(p.values[adj.p.values$index])
adj.p.values$adjp==SEQPower==
==PSEQ exercise==
Data analysis:
 
pseq help
pseq help all
pseq myproj new-project --resources hg19
pseq myproj load-vcf --vcf CEU.exon.2010_03.genotypes.hg19.vcf.gz YRI.exon.2010_03.genotypes.hg19.vcf.gz
pseq myproj load-pheno --file phenotype.phe
pseq myproj v-view | head
pseq myproj i-view | head
pseq myproj summary
pseq myproj var-summary
pseq myproj ind-summary
pseq myproj loc-summary
pseq myproj ref-summary
pseq myproj seq-summary
pseq myproj file-summary
pseq myproj meta-summary
pseq myproj v-stats
pseq myproj i-stats | head
pseq myproj tag-file --id 1 --name CEU
pseq myproj tag-file --id 2 --name YRI
pseq myproj var-summary
pseq myproj v-freq | head
pseq myproj v-freq --mask file=CEU | head
pseq myproj v-freq --mask file=YRI | head
pseq myproj v-view --mask any.filter.ex | head
pseq myproj v-view --mask any.filter.ex | wc -l
pseq myproj v-view --mask any.filter | wc -l
pseq myproj var-set --group pass --mask any.filter.ex
pseq myproj var-summary
pseq myproj var-set --group pass_DP15 --mask include="DP>14" var=pass
pseq myproj var-summary
pseq myproj var-set --group pass_DP15_DPgeno10 --mask geno=DP:ge:11 var=pass_DP15
pseq myproj var-summary
pseq myproj var-set --group pass_DP15_DPgeno10_CEU --mask file=CEU var=pass_DP15_DPgeno10
pseq myproj var-summary
pseq myproj var-set --group pass_DP15_DPgeno10_CEU_HWE --mask hwe=5.7e-7:1 var=pass_DP15_DPgeno10_CEU
pseq myproj var-summary
pseq myproj var-set --group pass_DP15_DPgeno10_CEU_HWE_MAFgt05 --mask maf=0.05:0.5 var=pass_DP15_DPgeno10_CEU_HWE
pseq myproj var-summary
pseq myproj var-set --group pass_DP15_DPgeno10_CEU_HWE_MAFlt01 --mask "mac=1 maf=0.01" var=pass_DP15_DPgeno10_CEU_HWE
pseq myproj var-summary
pseq myproj glm --phenotype BMI --covar SEX --mask var=pass_DP15_DPgeno10_CEU_HWE_MAFgt05 > SNV_CEU.result
head SNV_CEU.result
cat SNV_CEU.result | awk '{if(FNR==1) print $0; if(NR>1) print $0 | "sort -k9 2>/dev/null"}' | grep -v "NA\s\+NA\s\+NA" | head
pseq myproj assoc --tests fw vt --phenotype BMI
pseq myproj assoc --tests skat --phenotype BMI --covar SEX --mask var=pass_DP15_DPgeno10_CEU_HWE_MAFlt01 loc.group=refseq > SKAT_CEU.result
pseq myproj assoc --tests skat --phenotype BMI --covar SEX --mask include="DP>14" geno=DP:ge:11 file=CEU hwe=5.7e-7:1 "mac=1 maf=0.01" loc.group=refseq > SKAT_CEU.result
head -20 SKAT_CEU.result
cat SKAT_CEU.result | grep SKAT | grep -v "P=NA" | sort -k6 | head -15
 
Exercise&nbsp;analyzing YRI samples:
 
pseq myproj var-set --group pass_DP15_DPgeno10_YRI --mask file=YRI var=pass_DP15_DPgeno10
pseq myproj var-summary
pseq myproj var-set --group pass_DP15_DPgeno10_YRI_HWE --mask hwe=5.7e-7:1 var=pass_DP15_DPgeno10_YRI
pseq myproj var-summary
pseq myproj var-set --group pass_DP15_DPgeno10_YRI_HWE_MAFgt05 --mask maf=0.05:0.5 var=pass_DP15_DPgeno10_YRI_HWE
pseq myproj var-summary
pseq myproj var-set --group pass_DP15_DPgeno10_YRI_HWE_MAFlt01 --mask "mac=1 maf=0.01" var=pass_DP15_DPgeno10_YRI_HWE
pseq myproj var-summary
pseq myproj glm --phenotype BMI --covar SEX --mask var=pass_DP15_DPgeno10_YRI_HWE_MAFgt05 > SNV_YRI.result
head SNV_YRI.result
cat SNV_YRI.result | awk '{if(FNR==1) print $0; if(NR>1) print $0 | "sort -k9 2>/dev/null"}' | grep -v "NA\s\+NA\s\+NA" | head
pseq myproj assoc --tests skat --phenotype BMI --covar SEX --mask include="DP>14" geno=DP:ge:11 file=YRI hwe=5.7e-7:1 "mac=1 maf=0.01" loc.group=refseq > SKAT_YRI.result
head -20 SKAT_YRI.result
cat SKAT_YRI.result | grep SKAT | grep -v "P=NA" | sort -k6 | head -15==RV-TDT exercise==
vtools init rvtdt
vtools import --format vcf data/data.vcf --build hg19
vtools phenotype --from_file data/phen.txt
# variant selection
vtools execute ANNOVAR geneanno
vtools select variant "variant.region_type like '%splicing%'or variant.mut_type like 'nonsynonymous%' or variant.mut_type like 'frameshift%' or variant.mut_type like 'stop%'" -t func_variant
# tped file
vtools export func_variant --format tped --samples 'phenotype is not null' > vat_raw.tped
sort -k4 -n vat_raw.tped | awk 'BEGIN{OFS="\t";prev="None";copy=1} {$2=$1"_"$4; $3=0; if($2==prev) {$2=$2"_"copy; copy=copy+1} else {prev=$2; copy=1}; print $0}' > vat_export.tped
# tfam file
vtools phenotype --out family sample_name pid mid sex phenotype > vat_export.tfam
# anno file
vtools use refGene-hg19_20130904
vtools update func_variant --set 'maf=0.001'
vtools select func_variant -o chr pos refGene.name2 maf --header > vat_export.anno
# Mendelian error and recode
plink --noweb --tfile vat_export --recode12 --me 1 1 --set-me-missing --out "recode12_noME"
sort -n -k1 -k6 -k2 recode12_noME.ped | sed 's/ /\t/g' | cut -f1,3,4,5 --complement > linkage.ped
cut -f2 recode12_noME.map | awk 'BEGIN{OFS="\t";} {print "M",$0}' | sed '1i\I\tid\nA\tDisease' > linkage.dat
java -Xmx10000m -jar java/linkage2beagle.jar linkage.dat linkage.ped > pre_beagle.bgl
python script/pre_phase.py -i pre_beagle.bgl -a pre_beagle_withMissing.bgl
java -Xmx10000m -jar java/beagle.jar missing=0 trios=pre_beagle.bgl out=bgl_phased verbose=false redundant=true
gunzip bgl_phased.pre_beagle.bgl.phased.gz
python script/post_phase.py -a vat_export.anno -b bgl_phased.pre_beagle.bgl.phased -o genes/
for g in `ls genes | grep tped | cut -d"." -f1 | head -20`
do
echo "runing rvTDT on gene "${g}
rvTDT exercise_proj -G ./genes/${g}.tped -P ./data/rvtdt.phen -M ./genes/${g}.map --adapt 500 --alpha 0.00001 --permut 2000 --lower_cutoff 0 --upper_cutoff 100 --minVariants 3 --maxMissRatio 1
done
# Answer
vtools show tables
ls genes/ | grep tped | wc
cat exercise_proj_pval/*.pval | grep -v "^#" | sort -k2
cat exercise_proj_pval/*.pval | grep -v "^#" | sort -k3
# clean
rm -r exercise_proj* genes/* bgl* linkage* recode12* pre_beagle* vat_export.*
==SEQPower==
spower -h
spower LOGIT -h
spower show test SKAT
spower LOGIT Kryukov2009European1800.sfs --def_rare 0.01 --def_neutral -0.00001 0.00001 --moi A --proportion_detrimental 1 --proportion_protective 0 --OR_rare_detrimental 1.5 --OR_common_detrimental 1 --baseline_effect 0.01 --sample_size 1000 --p1 0.5 --limit 1 --alpha 0.05 --method "KBAC --name K1 --mafupper 0.01 --maflower 0 --alternative 1 --moi additive --permutations 1000 --adaptive 0.1" --replicates 1000 --jobs 4 -o exercise
spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.2 --ORmax_rare_detrimental 3.0 --method CFisher -r 100 -j 4 -l 1 -o exercise
spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.2 --ORmax_rare_detrimental 3.0 --method CFisher -r 100 -j 4 -l 1 -o exercise
spower show exercise.loci.csv effect*
spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.2 --ORmax_rare_detrimental 3.0 --proportion_detrimental 0.8 --method CFisher -r 100 -j 4 -l 1 -o exercise
spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --missing_sites 0.05 --method CFisher -r 100 -j 4 -l 1 -o exercise
spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --missing_sites 0.05 --method CFisher -r 100 -j 4 -l 1 -o exercise
spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --missing_low_maf 0.000125 --method CFisher -r 100 -j 4 -l 1 -o exercise
spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --missing_low_maf 0.000125 --method CFisher -r 100 -j 4 -l 1 -o exercise
spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --method "CFisher --alternative 1 --name CMC" "KBAC --permutations 1000 --alternative 1" "WSSRankTest --alternative 1 --name WSS" "VTtest --alternative 1 --permutations 1000" "SKAT disease" -r 100 -j 4 -l 1 -o exercise
spower show exercise.csv method power
spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 -j 1 -l 1 -o exercise
spower LOGIT Kryukov2009European1800.sfs --power 0.8 --OR_rare_detrimental 1.5 -j 1 -l 1 -o exercise
spower LNR Kryukov2009European1800.sfs --sample_size 1000 --meanshift_rare_detrimental 0.2 --method "CollapseQt --name CMC --alternative 2" -r 100 -j 4 -l 1 -o exercise
spower LNR Kryukov2009European1800.sfs --sample_size 1000 --meanshift_rare_detrimental 0.2 --meanshiftmax_rare_detrimental 0.5 --method "CollapseQt --alternative 2" -r 100 -j 4 -l 1 -o exercise
spower ELNR Kryukov2009European1800.sfs --sample_size 1000 --meanshift_rare_detrimental 0.2 --QT_thresholds 0.4 0.6 --method "CollapseQt --alternative 2" -r 100 -j 4 -l 1 -o exercise
spower show exercise.csv sample* power<br />spower ELNR Kryukov2009European1800.sfs --sample_size 1000 --p1 0.5 --meanshift_rare_detrimental 0.5 --QT_thresholds 0.4 0.6 --method "CollapseQt --alternative 2" -r 100 -j 4 -l 1 -o exercise
spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --method "GroupWrite ExerciseSimulation" -j 4 -o exercise -v1
spower show exercise.SEQPowerDB
spower show exercise.SEQPowerDB LOGIT
spower show exercise.SEQPowerDB LOGIT method power title --condition "where power between 0.25 and 0.95"
for i in 1 1.5 2 2.5 3 3.5 4; do
spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental $i --method "CFisher --name CMC$i" --title FixedOR$i -r 100 -j 4 -l 1 -o exercise2
done<br />spower show exercise2.SEQPowerDB LOGIT method power title==Unphased== 
unphased.sh
unphased mypeds.ped –marker 1 2 3 –missing –permutation 10
unphased all.ped -window 2 -LD
unphased all.ped -window 2 -LD &gt;&gt; results.txt
 
==VAT==
ulimit -s 8000
vtools -h
vtools init VATDemo
vtools remove variants to_remove -v0
vtools show tables
vtools remove genotypes "DP_geno&lt;10" -v0 <br />vtools select variant "mut_type like 'non%' or mut_type like 'stop%' or region_type='splicing'" -t v_funct <br />vtools show tables <br />vtools show samples --limit 5 <br />vtools select variant --samples "RACE=1" -t CEU <br />mkdir -p ceu <br />cd ceu <br />vtools init ceu --parent ../ --variants CEU --samples "RACE=1" --build hg19
vtools show project
vtools select variant "CEU_mafGD10&gt;=0.05" -t common_ceu
vtools select v_funct "CEU_mafGD10&lt;0.01" -t rare_ceu <br />vtools use refGene <br />vtools show annotation refGene <br />vtools associate -h <br />vtools show tests <br />vtools show test LinRegBurden <br />vtools associate common_ceu BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db EA_CV &gt; EA_CV.asso.res
grep -i error *.log
less EA_CV.asso.res
vtools select rare_ceu "refGene.name2='ABCC1'" -o chr pos ref alt CEU_mafGD10 numGD10 mut_type --header
vtools_report plot_association qq -o QQRV -b --label_top 2 -f 6 &lt; EA_RV.asso.res
vtools_report plot_association manhattan -o MHRV -b --label_top 5 --color Dark2 --chrom_prefix None -f 6 &lt; EA_RV.asso.res <br />vtools associate rare_ceu BMI --covariate SEX KING_MDS1 KING_MDS2 -m "LinRegBurden --name RVMDS2 --alternative 2" -g refGene.name2 -j1 --to_db EA_RV &gt; EA_RV_MDS2.asso.res vtools_report plot_association qq -o QQRV_MDS2 -b --label_top 2 -f 6 &lt; EA_RV_MDS2.asso.res <br />cd .. <br />vtools select variant --samples "RACE=0" -t YRI <br />mkdir -p yri <br />cd yri <br />vtools init yri --parent ../ --variants YRI --samples "RACE=0" --build hg19 <br />vtools select variant "YRI_mafGD10&gt;=0.05" -t common_yri vtools select v_funct "YRI_mafGD10&lt;0.01" -t rare_yri <br />vtools use refGene <br />vtools associate common_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db YA_CV &gt; YA_CV.asso.res
vtools associate rare_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -g refGene.name2 -j1 --to_db YA_RV &gt; YA_RV.asso.res
vtools associate rare_yri BMI --covariate SEX -m "VariableThresholdsQt --alternative 2 -p 100000 --adaptive 0.0005" -g refGene.name2 -j1 --to_db YA_RV &gt; YA_RV_VT.asso.res
Bureaucrat, administrator
1,252
edits