Changes - Statistical Genetics Courses

2016-genetic-association-commands

6,207 bytes added, 19:24, 19 May 2017

==GeneABEL==

plink --file GWAS_clean4 --pheno pheno.phen --pheno-name Aff --transpose --recode --out gwa_gabel--noweb plink --file GWAS_clean4 --pheno pheno.phen --pheno-name systolic --transpose --recode --out gwa_gabel_qtl--noweb

library(GenABEL)

cps <- cps.full$points

plot(cps[,1], cps[,2], pch = g.dat@phdata$popn)

legend(~~”topright”~~"topright", c("TSI","MEX", "CEU"), pch = c(1,2,3))

colnames(cps)<-c('C1','C2','C3','C4','C5','C6','C7','C8','C9','C10')

gpc.dat <- g.dat

plot(test.qt, col = "black")

add.plot(test.eg, col = "gray", pch = 3)

legend("topright", c("Original plot","After correction w/ EIGENSTRAT"), pch = c(1,3))==Imputation exercise==

~~==Imputation exercise==~~ plink --file chr22_imputation_ex--noweb plink --file chr22_imputation_ex --maf 0.01 --mind 0.02 --geno 0.05 --hwe 0.001 --out qc_check--noweb plink --file chr22_imputation_ex --maf 0.01 --mind 0.02 --geno 0.05 --hwe 0.001 --recode --out chr22_clean1--noweb plink --file chr22_clean1 --maf 0.01 --mind 0.02 --geno 0.05 --hwe 0.001 --out qc_check_2--noweb plink --file chr22_clean1 --filter-cases --hwe 0.001 --recode --out chr22_cases_clean--noweb plink --file chr22_clean1 --filter-controls --recode --out chr22_controls_clean--noweb plink --file chr22_controls_clean --merge chr22_cases_clean.ped chr22_cases_clean.map -~~–hwe~~ -hwe 0.001 --recode --out chr22_all_clean--noweb plink --file chr22_all_clean --logistic --out chr22_all_clean_geno--noweb

mydata = read.table(~~“chr22_all_clean_geno~~"chr22_all_clean_geno.assoc.~~logistic”~~logistic", header=T)

names(mydata)

plot(mydata$BP, -log10(mydata$P))

smallp

q()

mach1 --hapmapFormat -d chr22_mach_merlin.map -p chr22_mach_merlin.ped --haps genotypes_chr22_CEU_r22_nr.b36_fwd.phase.gz –--snps genotypes_chr22_CEU_r22_nr.b36_fwd_legend.txt.gz --greedy --rounds 100 --mle --mldetails --autoflip -o chr22_HIHII plink –--dosage chr22_HIHII_dose_mach4plink.txt.gz Zin –--fam chr22_imputation_ex.fam –--map chr22_imputed_snps_positions.map --out chr22_HIHII_dosage--noweb

dosage = read.table("chr22_HIHII_dosage.assoc.dosage", header= T)

names(dosage)

plot(dosage$BP, -log10(dosage$P))

dosagep = dosage[which(dosage$P < 5E-8),] dosagep = dosagep[order(dosagep$BP),] dosagep

interest = dosage[which(dosage$SNP=='rs715586'),]

interest

==PLINK_R==

Introduction

city[c(1,5:6)]

population[3]

population["Oslo"]

population[c("Berlin","Rome")]

population

capital

GWAS Data QC

~~cd plink/exercise/~~ plink --file GWAS--noweb plink –--file GWAS –--mind 0.10 –--recode –--out GWAS_clean_mind--noweb plink –--file GWAS_clean_mind –--maf 0.05 –--recode –--out MAF_greater_5--noweb plink –--file GWAS_clean_mind –--exclude MAF_greater_5.map –--recode –--out MAF_less_5--noweb plink --file MAF_greater_5 --geno 0.05 --recode --out MAF_greater_5_clean--noweb plink --file MAF_less_5 --geno 0.01 --recode --out MAF_less_5_clean--noweb plink --file MAF_greater_5_clean --merge MAF_less_5_clean.ped MAF_less_5_clean.map --recode --out GWAS_MAF_clean--noweb plink --file GWAS_MAF_clean --mind 0.03 --recode --out GWAS_clean2--noweb plink --file GWAS_clean2 --check-sex --out GWAS_sex_checking --noweb

~~plink –-file GWAS_clean2 –-check-sex –-out GWAS_sex_checking~~

sexcheck = read.table("GWAS_sex_checking.sexcheck", header=T)

names(sexcheck)

sex_problem

q()

plink –--file GWAS_clean2 –--genome –--out duplicates--noweb

dups = read.table(~~“duplicates~~"duplicates.~~genome”~~genome", header = T)

problem_pairs = dups[which(dups$PI_HAT > 0.4),]

problem_pairs

problem_pairs[myvars]

q()

plink --file GWAS_clean2 --remove IBS_excluded.txt --recode --out GWAS_clean3--noweb plink –--file GWAS_clean3 –--het--noweb

Dataset <- read.table("plink.het", header=TRUE, sep="", na.strings="NA", dec=".",

dev.off()

q()

plink –--file GWAS_clean3 –--pheno pheno.txt –--pheno-name Aff –--hardy--noweb

hardy = read.table(~~“plink~~"plink.~~hwe”~~hwe", header = T)

names(hardy)

hwe_prob = hardy[which(hardy$P < 0.0000009),]

hwe_prob

q()

plink –--file GWAS_clean3 –--exclude HWE_out.txt –--recode –--out GWAS_clean4--noweb

MultifactorialPart 1

plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.add --logistic --noweb

plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.sexage.add --logistic --sex --covar dbp.age.pheno --noweb

plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.snp1112.add --logistic --condition rs1112 --noweb

plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.snp1117.add --logistic --condition rs1117--noweb

plink --ped dbp.qt.ped --map dbp.map --map3 --out linreg.sex.add --linear --sex --noweb

~~plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.sex.inter.add --logistic --sex --interaction --noweb~~

~~plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.snp1112.inter.add --logistic --condition rs1112 --interaction --noweb~~

load("dbp.R")

ls()

dbp[1:5,]

result.snp12 = glm (affection ~ rs1112, family=binomial("logit"), data=~~dpb~~dbp)

print (result.snp12)

print ( class (result.snp12) )

print (ci)

print ( exp(ci) )

snp.data = ~~dpb~~dbp[,c("affection", "rs1112")] summary(snp.data) snp.data[,"rs1112"]<-as.numeric(snp.data[,"rs1112"])-1

summary(snp.data)

~~snp.data[,"rs1112"] summary(snp.data)~~ result.all = glm (affection ~ rs1112, family=binomial("logit"), data=snp.data) dev.all = anova (result.all, test="Chi")

summary(result.all)

print(dev.all)

snp.data = ~~dpb~~dbp[,c("affection", "trait","sex", "age", "rs1112", "rs1117")]

summary(snp.data)

snp.data[,"rs1112"] <-as.numeric(snp.data[,"rs1112"])-1 snp.data[,"rs1117"]<-as.numeric(snp.data[,"rs1117"] )-1 result.adj = glm (affection ~ sex + rs1112 , family=binomial("logit"), data=snp.data)

summary(result.adj)

result.adj = glm (affection ~ age + rs1112 , family=binomial("logit"), data=snp.data)

result.adj = lm (trait ~ sex + rs1112, data=snp.data)

summary(result.adj)

Multifactorial Part 2

plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.sex.inter.add --logistic --sex --interaction --noweb

plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.snp1112.inter.add --logistic --condition rs1112 --interaction --noweb

R load("dbp.R") ls() dbp[1:5,] summary(dbp)

result.inter = glm (affection ~ sex * rs1112, family=binomial("logit"), data=snp.data)

summary(result.inter)

result.reg = glm (affection ~ sex + age + rs1112 + rs1117, family=binomial("logit"), data=snp.data)

summary(result.reg)

modelchoice.result <-step(result.reg) summary(modelchoice.result) q()

GWAS Control Substructure

plink –--file GWAS_clean4 –--genome –--mds-plot 10--noweb

mydata = read.table("mds_components.txt", header=T)

dev.off()

q()

plink –--file GWAS_clean4 –--pheno pheno.txt –--pheno-name Aff –--logistic --adjust –--out unadj--noweb plink –--file GWAS_clean4 –--pheno pheno.txt –--pheno-name Aff –--covar plink.mds –--covar-name C1 –--logistic –--adjust –--out C1--noweb plink –--file GWAS_clean4 –--pheno pheno.txt –--pheno-name Aff –--covar plink.mds –--covar-name C1-C2 –--logistic –--adjust –--out C1-C2--noweb

broadqq <-function(pvals, title)

adj.p.values

rownames(adj.p.values$adjp) = names(p.values[adj.p.values$index])

adj.p.values$adjp~~==SEQPower==~~

==PSEQ exercise==

Data analysis:

pseq help

pseq help all

pseq myproj new-project --resources hg19

pseq myproj load-vcf --vcf CEU.exon.2010_03.genotypes.hg19.vcf.gz YRI.exon.2010_03.genotypes.hg19.vcf.gz

pseq myproj load-pheno --file phenotype.phe

pseq myproj v-view | head

pseq myproj i-view | head

pseq myproj summary

pseq myproj var-summary

pseq myproj ind-summary

pseq myproj loc-summary

pseq myproj ref-summary

pseq myproj seq-summary

pseq myproj file-summary

pseq myproj meta-summary

pseq myproj v-stats

pseq myproj i-stats | head

pseq myproj tag-file --id 1 --name CEU

pseq myproj tag-file --id 2 --name YRI

pseq myproj var-summary

pseq myproj v-freq | head

pseq myproj v-freq --mask file=CEU | head

pseq myproj v-freq --mask file=YRI | head

pseq myproj v-view --mask any.filter.ex | head

pseq myproj v-view --mask any.filter.ex | wc -l

pseq myproj v-view --mask any.filter | wc -l

pseq myproj var-set --group pass --mask any.filter.ex

pseq myproj var-summary

pseq myproj var-set --group pass_DP15 --mask include="DP>14" var=pass

pseq myproj var-summary

pseq myproj var-set --group pass_DP15_DPgeno10 --mask geno=DP:ge:11 var=pass_DP15

pseq myproj var-summary

pseq myproj var-set --group pass_DP15_DPgeno10_CEU --mask file=CEU var=pass_DP15_DPgeno10

pseq myproj var-summary

pseq myproj var-set --group pass_DP15_DPgeno10_CEU_HWE --mask hwe=5.7e-7:1 var=pass_DP15_DPgeno10_CEU

pseq myproj var-summary

pseq myproj var-set --group pass_DP15_DPgeno10_CEU_HWE_MAFgt05 --mask maf=0.05:0.5 var=pass_DP15_DPgeno10_CEU_HWE

pseq myproj var-summary

pseq myproj var-set --group pass_DP15_DPgeno10_CEU_HWE_MAFlt01 --mask "mac=1 maf=0.01" var=pass_DP15_DPgeno10_CEU_HWE

pseq myproj var-summary

pseq myproj glm --phenotype BMI --covar SEX --mask var=pass_DP15_DPgeno10_CEU_HWE_MAFgt05 > SNV_CEU.result

head SNV_CEU.result

cat SNV_CEU.result | awk '{if(FNR==1) print $0; if(NR>1) print $0 | "sort -k9 2>/dev/null"}' | grep -v "NA\s\+NA\s\+NA" | head

pseq myproj assoc --tests fw vt --phenotype BMI

pseq myproj assoc --tests skat --phenotype BMI --covar SEX --mask var=pass_DP15_DPgeno10_CEU_HWE_MAFlt01 loc.group=refseq > SKAT_CEU.result

pseq myproj assoc --tests skat --phenotype BMI --covar SEX --mask include="DP>14" geno=DP:ge:11 file=CEU hwe=5.7e-7:1 "mac=1 maf=0.01" loc.group=refseq > SKAT_CEU.result

head -20 SKAT_CEU.result

cat SKAT_CEU.result | grep SKAT | grep -v "P=NA" | sort -k6 | head -15

Exercise analyzing YRI samples:

pseq myproj var-set --group pass_DP15_DPgeno10_YRI --mask file=YRI var=pass_DP15_DPgeno10

pseq myproj var-summary

pseq myproj var-set --group pass_DP15_DPgeno10_YRI_HWE --mask hwe=5.7e-7:1 var=pass_DP15_DPgeno10_YRI

pseq myproj var-summary

pseq myproj var-set --group pass_DP15_DPgeno10_YRI_HWE_MAFgt05 --mask maf=0.05:0.5 var=pass_DP15_DPgeno10_YRI_HWE

pseq myproj var-summary

pseq myproj var-set --group pass_DP15_DPgeno10_YRI_HWE_MAFlt01 --mask "mac=1 maf=0.01" var=pass_DP15_DPgeno10_YRI_HWE

pseq myproj var-summary

pseq myproj glm --phenotype BMI --covar SEX --mask var=pass_DP15_DPgeno10_YRI_HWE_MAFgt05 > SNV_YRI.result

head SNV_YRI.result

cat SNV_YRI.result | awk '{if(FNR==1) print $0; if(NR>1) print $0 | "sort -k9 2>/dev/null"}' | grep -v "NA\s\+NA\s\+NA" | head

pseq myproj assoc --tests skat --phenotype BMI --covar SEX --mask include="DP>14" geno=DP:ge:11 file=YRI hwe=5.7e-7:1 "mac=1 maf=0.01" loc.group=refseq > SKAT_YRI.result

head -20 SKAT_YRI.result

cat SKAT_YRI.result | grep SKAT | grep -v "P=NA" | sort -k6 | head -15==RV-TDT exercise==

vtools init rvtdt

vtools import --format vcf data/data.vcf --build hg19

vtools phenotype --from_file data/phen.txt

# variant selection

vtools execute ANNOVAR geneanno

vtools select variant "variant.region_type like '%splicing%'or variant.mut_type like 'nonsynonymous%' or variant.mut_type like 'frameshift%' or variant.mut_type like 'stop%'" -t func_variant

# tped file

vtools export func_variant --format tped --samples 'phenotype is not null' > vat_raw.tped

sort -k4 -n vat_raw.tped | awk 'BEGIN{OFS="\t";prev="None";copy=1} {$2=$1"_"$4; $3=0; if($2==prev) {$2=$2"_"copy; copy=copy+1} else {prev=$2; copy=1}; print $0}' > vat_export.tped

# tfam file

vtools phenotype --out family sample_name pid mid sex phenotype > vat_export.tfam

# anno file

vtools use refGene-hg19_20130904

vtools update func_variant --set 'maf=0.001'

vtools select func_variant -o chr pos refGene.name2 maf --header > vat_export.anno

# Mendelian error and recode

plink --noweb --tfile vat_export --recode12 --me 1 1 --set-me-missing --out "recode12_noME"

sort -n -k1 -k6 -k2 recode12_noME.ped | sed 's/ /\t/g' | cut -f1,3,4,5 --complement > linkage.ped

cut -f2 recode12_noME.map | awk 'BEGIN{OFS="\t";} {print "M",$0}' | sed '1i\I\tid\nA\tDisease' > linkage.dat

java -Xmx10000m -jar java/linkage2beagle.jar linkage.dat linkage.ped > pre_beagle.bgl

python script/pre_phase.py -i pre_beagle.bgl -a pre_beagle_withMissing.bgl

java -Xmx10000m -jar java/beagle.jar missing=0 trios=pre_beagle.bgl out=bgl_phased verbose=false redundant=true

gunzip bgl_phased.pre_beagle.bgl.phased.gz

python script/post_phase.py -a vat_export.anno -b bgl_phased.pre_beagle.bgl.phased -o genes/

for g in `ls genes | grep tped | cut -d"." -f1 | head -20`

echo "runing rvTDT on gene "${g}

rvTDT exercise_proj -G ./genes/${g}.tped -P ./data/rvtdt.phen -M ./genes/${g}.map --adapt 500 --alpha 0.00001 --permut 2000 --lower_cutoff 0 --upper_cutoff 100 --minVariants 3 --maxMissRatio 1

done

# Answer

vtools show tables

ls genes/ | grep tped | wc

cat exercise_proj_pval/*.pval | grep -v "^#" | sort -k2

cat exercise_proj_pval/*.pval | grep -v "^#" | sort -k3

# clean

rm -r exercise_proj* genes/* bgl* linkage* recode12* pre_beagle* vat_export.*

==SEQPower==

spower -h

spower LOGIT -h

spower show test SKAT

spower LOGIT Kryukov2009European1800.sfs --def_rare 0.01 --def_neutral -0.00001 0.00001 --moi A --proportion_detrimental 1 --proportion_protective 0 --OR_rare_detrimental 1.5 --OR_common_detrimental 1 --baseline_effect 0.01 --sample_size 1000 --p1 0.5 --limit 1 --alpha 0.05 --method "KBAC --name K1 --mafupper 0.01 --maflower 0 --alternative 1 --moi additive --permutations 1000 --adaptive 0.1" --replicates 1000 --jobs 4 -o exercise

~~spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.2 --ORmax_rare_detrimental 3.0 --method CFisher -r 100 -j 4 -l 1 -o exercise~~

spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.2 --ORmax_rare_detrimental 3.0 --method CFisher -r 100 -j 4 -l 1 -o exercise

spower show exercise.loci.csv effect*

spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.2 --ORmax_rare_detrimental 3.0 --proportion_detrimental 0.8 --method CFisher -r 100 -j 4 -l 1 -o exercise

spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --missing_sites 0.05 --method CFisher -r 100 -j 4 -l 1 -o exercise

~~spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --missing_sites 0.05 --method CFisher -r 100 -j 4 -l 1 -o exercise~~

~~spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --missing_low_maf 0.000125 --method CFisher -r 100 -j 4 -l 1 -o exercise~~

spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --missing_low_maf 0.000125 --method CFisher -r 100 -j 4 -l 1 -o exercise

spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --method "CFisher --alternative 1 --name CMC" "KBAC --permutations 1000 --alternative 1" "WSSRankTest --alternative 1 --name WSS" "VTtest --alternative 1 --permutations 1000" "SKAT disease" -r 100 -j 4 -l 1 -o exercise

spower show exercise.csv method power

spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 -j 1 -l 1 -o exercise

spower LOGIT Kryukov2009European1800.sfs --power 0.8 --OR_rare_detrimental 1.5 -j 1 -l 1 -o exercise

spower LNR Kryukov2009European1800.sfs --sample_size 1000 --meanshift_rare_detrimental 0.2 --method "CollapseQt --name CMC --alternative 2" -r 100 -j 4 -l 1 -o exercise

spower LNR Kryukov2009European1800.sfs --sample_size 1000 --meanshift_rare_detrimental 0.2 --meanshiftmax_rare_detrimental 0.5 --method "CollapseQt --alternative 2" -r 100 -j 4 -l 1 -o exercise

spower ELNR Kryukov2009European1800.sfs --sample_size 1000 --meanshift_rare_detrimental 0.2 --QT_thresholds 0.4 0.6 --method "CollapseQt --alternative 2" -r 100 -j 4 -l 1 -o exercise

spower show exercise.csv sample* power spower ELNR Kryukov2009European1800.sfs --sample_size 1000 --p1 0.5 --meanshift_rare_detrimental 0.5 --QT_thresholds 0.4 0.6 --method "CollapseQt --alternative 2" -r 100 -j 4 -l 1 -o exercise

spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --method "GroupWrite ExerciseSimulation" -j 4 -o exercise -v1

spower show exercise.SEQPowerDB

spower show exercise.SEQPowerDB LOGIT

spower show exercise.SEQPowerDB LOGIT method power title --condition "where power between 0.25 and 0.95"

for i in 1 1.5 2 2.5 3 3.5 4; do

spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental $i --method "CFisher --name CMC$i" --title FixedOR$i -r 100 -j 4 -l 1 -o exercise2

done spower show exercise2.SEQPowerDB LOGIT method power title==Unphased==

unphased.sh

unphased mypeds.ped –marker 1 2 3 –missing –permutation 10

unphased all.ped -window 2 -LD

unphased all.ped -window 2 -LD >> results.txt

~~==VAT==~~

~~ulimit -s 8000~~

vtools -h

vtools init VATDemo

vtools remove variants to_remove -v0

vtools show tables

vtools remove genotypes "DP_geno<10" -v0 vtools select variant "mut_type like 'non%' or mut_type like 'stop%' or region_type='splicing'" -t v_funct vtools show tables vtools show samples --limit 5 vtools select variant --samples "RACE=1" -t CEU mkdir -p ceu cd ceu vtools init ceu --parent ../ --variants CEU --samples "RACE=1" --build hg19

vtools show project

vtools select variant "CEU_mafGD10>=0.05" -t common_ceu

vtools select v_funct "CEU_mafGD10<0.01" -t rare_ceu vtools use refGene vtools show annotation refGene vtools associate -h vtools show tests vtools show test LinRegBurden vtools associate common_ceu BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db EA_CV > EA_CV.asso.res

grep -i error *.log

less EA_CV.asso.res

vtools select rare_ceu "refGene.name2='ABCC1'" -o chr pos ref alt CEU_mafGD10 numGD10 mut_type --header

vtools_report plot_association qq -o QQRV -b --label_top 2 -f 6 < EA_RV.asso.res

vtools_report plot_association manhattan -o MHRV -b --label_top 5 --color Dark2 --chrom_prefix None -f 6 < EA_RV.asso.res vtools associate rare_ceu BMI --covariate SEX KING_MDS1 KING_MDS2 -m "LinRegBurden --name RVMDS2 --alternative 2" -g refGene.name2 -j1 --to_db EA_RV > EA_RV_MDS2.asso.res vtools_report plot_association qq -o QQRV_MDS2 -b --label_top 2 -f 6 < EA_RV_MDS2.asso.res cd .. vtools select variant --samples "RACE=0" -t YRI mkdir -p yri cd yri vtools init yri --parent ../ --variants YRI --samples "RACE=0" --build hg19 vtools select variant "YRI_mafGD10>=0.05" -t common_yri vtools select v_funct "YRI_mafGD10<0.01" -t rare_yri vtools use refGene vtools associate common_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db YA_CV > YA_CV.asso.res

vtools associate rare_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -g refGene.name2 -j1 --to_db YA_RV > YA_RV.asso.res

vtools associate rare_yri BMI --covariate SEX -m "VariableThresholdsQt --alternative 2 -p 100000 --adaptive 0.0005" -g refGene.name2 -j1 --to_db YA_RV > YA_RV_VT.asso.res

Serveradmin

Bureaucrat, administrator

1,252

edits