Difference between revisions of "2016-genetic-association-commands"
From Statistical Genetics Courses
Serveradmin (Talk | contribs) |
Serveradmin (Talk | contribs) |
||
(42 intermediate revisions by the same user not shown) | |||
Line 2: | Line 2: | ||
==GeneABEL== | ==GeneABEL== | ||
− | plink --file GWAS_clean4 --pheno pheno.phen --pheno-name Aff --transpose --recode --out gwa_gabel | + | |
− | plink --file GWAS_clean4 --pheno pheno.phen --pheno-name systolic --transpose --recode --out gwa_gabel_qtl | + | plink --file GWAS_clean4 --pheno pheno.phen --pheno-name Aff --transpose --recode --out gwa_gabel --noweb |
+ | plink --file GWAS_clean4 --pheno pheno.phen --pheno-name systolic --transpose --recode --out gwa_gabel_qtl --noweb | ||
R | R | ||
library(GenABEL) | library(GenABEL) | ||
Line 67: | Line 68: | ||
cps <- cps.full$points | cps <- cps.full$points | ||
plot(cps[,1], cps[,2], pch = g.dat@phdata$popn) | plot(cps[,1], cps[,2], pch = g.dat@phdata$popn) | ||
− | legend( | + | legend("topright", c("TSI","MEX", "CEU"), pch = c(1,2,3)) |
colnames(cps)<-c('C1','C2','C3','C4','C5','C6','C7','C8','C9','C10') | colnames(cps)<-c('C1','C2','C3','C4','C5','C6','C7','C8','C9','C10') | ||
gpc.dat <- g.dat | gpc.dat <- g.dat | ||
Line 117: | Line 118: | ||
plot(test.qt, col = "black") | plot(test.qt, col = "black") | ||
add.plot(test.eg, col = "gray", pch = 3) | add.plot(test.eg, col = "gray", pch = 3) | ||
− | legend("topright", c("Original plot","After correction w/ EIGENSTRAT"), pch = c(1,3)) | + | legend("topright", c("Original plot","After correction w/ EIGENSTRAT"), pch = c(1,3))==Imputation exercise== |
− | + | plink --file chr22_imputation_ex --noweb | |
− | plink --file chr22_imputation_ex | + | plink --file chr22_imputation_ex --maf 0.01 --mind 0.02 --geno 0.05 --hwe 0.001 --out qc_check --noweb |
− | plink --file chr22_imputation_ex --maf 0.01 --mind 0.02 --geno 0.05 --hwe 0.001 --out qc_check | + | plink --file chr22_imputation_ex --maf 0.01 --mind 0.02 --geno 0.05 --hwe 0.001 --recode --out chr22_clean1 --noweb |
− | plink --file chr22_imputation_ex --maf 0.01 --mind 0.02 --geno 0.05 --hwe 0.001 --recode --out chr22_clean1 | + | plink --file chr22_clean1 --maf 0.01 --mind 0.02 --geno 0.05 --hwe 0.001 --out qc_check_2 --noweb |
− | plink --file chr22_clean1 --maf 0.01 --mind 0.02 --geno 0.05 --hwe 0.001 --out qc_check_2 | + | plink --file chr22_clean1 --filter-cases --hwe 0.001 --recode --out chr22_cases_clean --noweb |
− | plink --file chr22_clean1 --filter-cases --hwe 0.001 --recode --out chr22_cases_clean | + | plink --file chr22_clean1 --filter-controls --recode --out chr22_controls_clean --noweb |
− | plink --file chr22_clean1 --filter-controls --recode --out chr22_controls_clean | + | plink --file chr22_controls_clean --merge chr22_cases_clean.ped chr22_cases_clean.map --hwe 0.001 --recode --out chr22_all_clean --noweb |
− | plink --file chr22_controls_clean --merge chr22_cases_clean.ped chr22_cases_clean.map - | + | plink --file chr22_all_clean --logistic --out chr22_all_clean_geno --noweb |
− | plink --file chr22_all_clean --logistic --out chr22_all_clean_geno | + | |
R | R | ||
− | mydata = read.table( | + | mydata = read.table("chr22_all_clean_geno.assoc.logistic", header=T) |
names(mydata) | names(mydata) | ||
plot(mydata$BP, -log10(mydata$P)) | plot(mydata$BP, -log10(mydata$P)) | ||
Line 137: | Line 137: | ||
smallp | smallp | ||
q() | q() | ||
− | mach1 --hapmapFormat -d chr22_mach_merlin.map -p chr22_mach_merlin.ped --haps genotypes_chr22_CEU_r22_nr.b36_fwd.phase.gz | + | mach1 --hapmapFormat -d chr22_mach_merlin.map -p chr22_mach_merlin.ped --haps genotypes_chr22_CEU_r22_nr.b36_fwd.phase.gz --snps genotypes_chr22_CEU_r22_nr.b36_fwd_legend.txt.gz --greedy --rounds 100 --mle --mldetails --autoflip -o chr22_HIHII |
− | plink | + | plink --dosage chr22_HIHII_dose_mach4plink.txt.gz Zin --fam chr22_imputation_ex.fam --map chr22_imputed_snps_positions.map --out chr22_HIHII_dosage --noweb |
R | R | ||
dosage = read.table("chr22_HIHII_dosage.assoc.dosage", header= T) | dosage = read.table("chr22_HIHII_dosage.assoc.dosage", header= T) | ||
names(dosage) | names(dosage) | ||
plot(dosage$BP, -log10(dosage$P)) | plot(dosage$BP, -log10(dosage$P)) | ||
− | dosagep = dosage[which(dosage$P < 5E-8),] dosagep = dosagep[order(dosagep$BP),] dosagep | + | dosagep = dosage[which(dosage$P < 5E-8),] dosagep = dosagep[order(dosagep$BP),] |
+ | dosagep | ||
interest = dosage[which(dosage$SNP=='rs715586'),] | interest = dosage[which(dosage$SNP=='rs715586'),] | ||
interest | interest | ||
− | |||
− | |||
==PLINK_R== | ==PLINK_R== | ||
Introduction | Introduction | ||
Line 186: | Line 185: | ||
city[c(1,5:6)] | city[c(1,5:6)] | ||
population[3] | population[3] | ||
+ | population["Oslo"] | ||
+ | population[c("Berlin","Rome")] | ||
population | population | ||
capital | capital | ||
Line 226: | Line 227: | ||
GWAS Data QC | GWAS Data QC | ||
− | + | plink --file GWAS --noweb | |
− | plink --file GWAS | + | plink --file GWAS --mind 0.10 --recode --out GWAS_clean_mind --noweb |
− | plink | + | plink --file GWAS_clean_mind --maf 0.05 --recode --out MAF_greater_5 --noweb |
− | plink | + | plink --file GWAS_clean_mind --exclude MAF_greater_5.map --recode --out MAF_less_5 --noweb |
− | plink | + | plink --file MAF_greater_5 --geno 0.05 --recode --out MAF_greater_5_clean --noweb |
− | plink --file MAF_greater_5 --geno 0.05 --recode --out MAF_greater_5_clean | + | plink --file MAF_less_5 --geno 0.01 --recode --out MAF_less_5_clean --noweb |
− | plink --file MAF_less_5 --geno 0.01 --recode --out MAF_less_5_clean | + | plink --file MAF_greater_5_clean --merge MAF_less_5_clean.ped MAF_less_5_clean.map --recode --out GWAS_MAF_clean --noweb |
− | plink --file MAF_greater_5_clean --merge MAF_less_5_clean.ped MAF_less_5_clean.map --recode --out GWAS_MAF_clean | + | plink --file GWAS_MAF_clean --mind 0.03 --recode --out GWAS_clean2 --noweb |
− | plink --file GWAS_MAF_clean --mind 0.03 --recode --out GWAS_clean2 | + | plink --file GWAS_clean2 --check-sex --out GWAS_sex_checking --noweb |
R | R | ||
− | |||
sexcheck = read.table("GWAS_sex_checking.sexcheck", header=T) | sexcheck = read.table("GWAS_sex_checking.sexcheck", header=T) | ||
names(sexcheck) | names(sexcheck) | ||
Line 242: | Line 242: | ||
sex_problem | sex_problem | ||
q() | q() | ||
− | plink | + | plink --file GWAS_clean2 --genome --out duplicates --noweb |
R | R | ||
− | dups = read.table( | + | dups = read.table("duplicates.genome", header = T) |
problem_pairs = dups[which(dups$PI_HAT > 0.4),] | problem_pairs = dups[which(dups$PI_HAT > 0.4),] | ||
problem_pairs | problem_pairs | ||
Line 251: | Line 251: | ||
problem_pairs[myvars] | problem_pairs[myvars] | ||
q() | q() | ||
− | plink --file GWAS_clean2 --remove IBS_excluded.txt --recode --out GWAS_clean3 | + | plink --file GWAS_clean2 --remove IBS_excluded.txt --recode --out GWAS_clean3 --noweb |
− | plink | + | plink --file GWAS_clean3 --het --noweb |
R | R | ||
Dataset <- read.table("plink.het", header=TRUE, sep="", na.strings="NA", dec=".", | Dataset <- read.table("plink.het", header=TRUE, sep="", na.strings="NA", dec=".", | ||
Line 262: | Line 262: | ||
dev.off() | dev.off() | ||
q() | q() | ||
− | plink | + | plink --file GWAS_clean3 --pheno pheno.txt --pheno-name Aff --hardy --noweb |
R | R | ||
− | hardy = read.table( | + | hardy = read.table("plink.hwe", header = T) |
names(hardy) | names(hardy) | ||
hwe_prob = hardy[which(hardy$P < 0.0000009),] | hwe_prob = hardy[which(hardy$P < 0.0000009),] | ||
hwe_prob | hwe_prob | ||
q() | q() | ||
− | plink | + | plink --file GWAS_clean3 --exclude HWE_out.txt --recode --out GWAS_clean4 --noweb |
− | Multifactorial | + | Multifactorial Part 1 |
plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.add --logistic --noweb | plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.add --logistic --noweb | ||
Line 279: | Line 279: | ||
plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.sexage.add --logistic --sex --covar dbp.age.pheno --noweb | plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.sexage.add --logistic --sex --covar dbp.age.pheno --noweb | ||
plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.snp1112.add --logistic --condition rs1112 --noweb | plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.snp1112.add --logistic --condition rs1112 --noweb | ||
− | plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.snp1117.add --logistic --condition rs1117 | + | plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.snp1117.add --logistic --condition rs1117 --noweb |
plink --ped dbp.qt.ped --map dbp.map --map3 --out linreg.sex.add --linear --sex --noweb | plink --ped dbp.qt.ped --map dbp.map --map3 --out linreg.sex.add --linear --sex --noweb | ||
− | |||
− | |||
R | R | ||
load("dbp.R") | load("dbp.R") | ||
ls() | ls() | ||
dbp[1:5,] | dbp[1:5,] | ||
− | result.snp12 = glm (affection ~ rs1112, family=binomial("logit"), data= | + | result.snp12 = glm (affection ~ rs1112, family=binomial("logit"), data=dbp) |
print (result.snp12) | print (result.snp12) | ||
print ( class (result.snp12) ) | print ( class (result.snp12) ) | ||
Line 301: | Line 299: | ||
print (ci) | print (ci) | ||
print ( exp(ci) ) | print ( exp(ci) ) | ||
− | snp.data = | + | snp.data = dbp[,c("affection", "rs1112")] |
+ | summary(snp.data) | ||
+ | snp.data[,"rs1112"]<-as.numeric(snp.data[,"rs1112"])-1 | ||
summary(snp.data) | summary(snp.data) | ||
− | + | result.all=glm(affection ~ rs1112, family=binomial("logit"), data=snp.data) | |
− | result.all = glm (affection ~ rs1112, family=binomial("logit"), data=snp.data) | + | dev.all = anova(result.all, test="Chi") |
− | dev.all = anova (result.all, test="Chi") | + | |
summary(result.all) | summary(result.all) | ||
print(dev.all) | print(dev.all) | ||
− | snp.data = | + | snp.data = dbp[,c("affection","trait","sex","age","rs1112","rs1117")] |
summary(snp.data) | summary(snp.data) | ||
− | snp.data[,"rs1112"] snp.data[,"rs1117"] result.adj = glm (affection ~ sex + rs1112 , family=binomial("logit"), data=snp.data) | + | snp.data[,"rs1112"]<-as.numeric(snp.data[,"rs1112"])-1 |
+ | snp.data[,"rs1117"]<-as.numeric(snp.data[,"rs1117"])-1 | ||
+ | result.adj = glm(affection ~ sex + rs1112, family=binomial("logit"), data=snp.data) | ||
summary(result.adj) | summary(result.adj) | ||
result.adj = glm (affection ~ age + rs1112 , family=binomial("logit"), data=snp.data) | result.adj = glm (affection ~ age + rs1112 , family=binomial("logit"), data=snp.data) | ||
Line 326: | Line 327: | ||
result.adj = lm (trait ~ sex + rs1112, data=snp.data) | result.adj = lm (trait ~ sex + rs1112, data=snp.data) | ||
summary(result.adj) | summary(result.adj) | ||
+ | |||
+ | Multifactorial Part 2 | ||
+ | |||
+ | plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.sex.inter.add --logistic --sex --interaction --noweb | ||
+ | plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.snp1112.inter.add --logistic --condition rs1112 --interaction --noweb | ||
+ | R<br />load("dbp.R")<br />ls()<br />dbp[1:5,]<br />summary(dbp) | ||
result.inter = glm (affection ~ sex * rs1112, family=binomial("logit"), data=snp.data) | result.inter = glm (affection ~ sex * rs1112, family=binomial("logit"), data=snp.data) | ||
summary(result.inter) | summary(result.inter) | ||
Line 334: | Line 341: | ||
result.reg = glm (affection ~ sex + age + rs1112 + rs1117, family=binomial("logit"), data=snp.data) | result.reg = glm (affection ~ sex + age + rs1112 + rs1117, family=binomial("logit"), data=snp.data) | ||
summary(result.reg) | summary(result.reg) | ||
− | modelchoice.result summary(modelchoice.result | + | modelchoice.result<-step(result.reg)<br />summary(modelchoice.result) |
− | + | ||
GWAS Control Substructure | GWAS Control Substructure | ||
− | plink | + | plink --file GWAS_clean4 --genome --mds-plot 10 --noweb |
R | R | ||
mydata = read.table("mds_components.txt", header=T) | mydata = read.table("mds_components.txt", header=T) | ||
Line 349: | Line 355: | ||
dev.off() | dev.off() | ||
q() | q() | ||
− | plink | + | plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --logistic --adjust --out unadj --noweb |
− | plink | + | plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.mds --covar-name C1 --logistic --adjust --out C1 --noweb |
− | plink | + | plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.mds --covar-name C1-C2 --logistic --adjust --out C1-C2 --noweb |
R | R | ||
broadqq <-function(pvals, title) | broadqq <-function(pvals, title) | ||
Line 389: | Line 395: | ||
adj.p.values | adj.p.values | ||
rownames(adj.p.values$adjp) = names(p.values[adj.p.values$index]) | rownames(adj.p.values$adjp) = names(p.values[adj.p.values$index]) | ||
− | adj.p.values$adjp | + | adj.p.values$adjp |
+ | ==PSEQ exercise== | ||
+ | Data analysis: | ||
+ | |||
+ | pseq help | ||
+ | pseq help all | ||
+ | pseq myproj new-project --resources hg19 | ||
+ | pseq myproj load-vcf --vcf CEU.exon.2010_03.genotypes.hg19.vcf.gz YRI.exon.2010_03.genotypes.hg19.vcf.gz | ||
+ | pseq myproj load-pheno --file phenotype.phe | ||
+ | pseq myproj v-view | head | ||
+ | pseq myproj i-view | head | ||
+ | pseq myproj summary | ||
+ | pseq myproj var-summary | ||
+ | pseq myproj ind-summary | ||
+ | pseq myproj loc-summary | ||
+ | pseq myproj ref-summary | ||
+ | pseq myproj seq-summary | ||
+ | pseq myproj file-summary | ||
+ | pseq myproj meta-summary | ||
+ | pseq myproj v-stats | ||
+ | pseq myproj i-stats | head | ||
+ | pseq myproj tag-file --id 1 --name CEU | ||
+ | pseq myproj tag-file --id 2 --name YRI | ||
+ | pseq myproj var-summary | ||
+ | pseq myproj v-freq | head | ||
+ | pseq myproj v-freq --mask file=CEU | head | ||
+ | pseq myproj v-freq --mask file=YRI | head | ||
+ | pseq myproj v-view --mask any.filter.ex | head | ||
+ | pseq myproj v-view --mask any.filter.ex | wc -l | ||
+ | pseq myproj v-view --mask any.filter | wc -l | ||
+ | pseq myproj var-set --group pass --mask any.filter.ex | ||
+ | pseq myproj var-summary | ||
+ | pseq myproj var-set --group pass_DP15 --mask include="DP>14" var=pass | ||
+ | pseq myproj var-summary | ||
+ | pseq myproj var-set --group pass_DP15_DPgeno10 --mask geno=DP:ge:11 var=pass_DP15 | ||
+ | pseq myproj var-summary | ||
+ | pseq myproj var-set --group pass_DP15_DPgeno10_CEU --mask file=CEU var=pass_DP15_DPgeno10 | ||
+ | pseq myproj var-summary | ||
+ | pseq myproj var-set --group pass_DP15_DPgeno10_CEU_HWE --mask hwe=5.7e-7:1 var=pass_DP15_DPgeno10_CEU | ||
+ | pseq myproj var-summary | ||
+ | pseq myproj var-set --group pass_DP15_DPgeno10_CEU_HWE_MAFgt05 --mask maf=0.05:0.5 var=pass_DP15_DPgeno10_CEU_HWE | ||
+ | pseq myproj var-summary | ||
+ | pseq myproj var-set --group pass_DP15_DPgeno10_CEU_HWE_MAFlt01 --mask "mac=1 maf=0.01" var=pass_DP15_DPgeno10_CEU_HWE | ||
+ | pseq myproj var-summary | ||
+ | pseq myproj glm --phenotype BMI --covar SEX --mask var=pass_DP15_DPgeno10_CEU_HWE_MAFgt05 > SNV_CEU.result | ||
+ | head SNV_CEU.result | ||
+ | cat SNV_CEU.result | awk '{if(FNR==1) print $0; if(NR>1) print $0 | "sort -k9 2>/dev/null"}' | grep -v "NA\s\+NA\s\+NA" | head | ||
+ | pseq myproj assoc --tests fw vt --phenotype BMI | ||
+ | pseq myproj assoc --tests skat --phenotype BMI --covar SEX --mask var=pass_DP15_DPgeno10_CEU_HWE_MAFlt01 loc.group=refseq > SKAT_CEU.result | ||
+ | pseq myproj assoc --tests skat --phenotype BMI --covar SEX --mask include="DP>14" geno=DP:ge:11 file=CEU hwe=5.7e-7:1 "mac=1 maf=0.01" loc.group=refseq > SKAT_CEU.result | ||
+ | head -20 SKAT_CEU.result | ||
+ | cat SKAT_CEU.result | grep SKAT | grep -v "P=NA" | sort -k6 | head -15 | ||
+ | |||
+ | Exercise analyzing YRI samples: | ||
+ | |||
+ | pseq myproj var-set --group pass_DP15_DPgeno10_YRI --mask file=YRI var=pass_DP15_DPgeno10 | ||
+ | pseq myproj var-summary | ||
+ | pseq myproj var-set --group pass_DP15_DPgeno10_YRI_HWE --mask hwe=5.7e-7:1 var=pass_DP15_DPgeno10_YRI | ||
+ | pseq myproj var-summary | ||
+ | pseq myproj var-set --group pass_DP15_DPgeno10_YRI_HWE_MAFgt05 --mask maf=0.05:0.5 var=pass_DP15_DPgeno10_YRI_HWE | ||
+ | pseq myproj var-summary | ||
+ | pseq myproj var-set --group pass_DP15_DPgeno10_YRI_HWE_MAFlt01 --mask "mac=1 maf=0.01" var=pass_DP15_DPgeno10_YRI_HWE | ||
+ | pseq myproj var-summary | ||
+ | pseq myproj glm --phenotype BMI --covar SEX --mask var=pass_DP15_DPgeno10_YRI_HWE_MAFgt05 > SNV_YRI.result | ||
+ | head SNV_YRI.result | ||
+ | cat SNV_YRI.result | awk '{if(FNR==1) print $0; if(NR>1) print $0 | "sort -k9 2>/dev/null"}' | grep -v "NA\s\+NA\s\+NA" | head | ||
+ | pseq myproj assoc --tests skat --phenotype BMI --covar SEX --mask include="DP>14" geno=DP:ge:11 file=YRI hwe=5.7e-7:1 "mac=1 maf=0.01" loc.group=refseq > SKAT_YRI.result | ||
+ | head -20 SKAT_YRI.result | ||
+ | cat SKAT_YRI.result | grep SKAT | grep -v "P=NA" | sort -k6 | head -15==RV-TDT exercise== | ||
+ | vtools init rvtdt | ||
+ | vtools import --format vcf data/data.vcf --build hg19 | ||
+ | vtools phenotype --from_file data/phen.txt | ||
+ | # variant selection | ||
+ | vtools execute ANNOVAR geneanno | ||
+ | vtools select variant "variant.region_type like '%splicing%'or variant.mut_type like 'nonsynonymous%' or variant.mut_type like 'frameshift%' or variant.mut_type like 'stop%'" -t func_variant | ||
+ | # tped file | ||
+ | vtools export func_variant --format tped --samples 'phenotype is not null' > vat_raw.tped | ||
+ | sort -k4 -n vat_raw.tped | awk 'BEGIN{OFS="\t";prev="None";copy=1} {$2=$1"_"$4; $3=0; if($2==prev) {$2=$2"_"copy; copy=copy+1} else {prev=$2; copy=1}; print $0}' > vat_export.tped | ||
+ | # tfam file | ||
+ | vtools phenotype --out family sample_name pid mid sex phenotype > vat_export.tfam | ||
+ | # anno file | ||
+ | vtools use refGene-hg19_20130904 | ||
+ | vtools update func_variant --set 'maf=0.001' | ||
+ | vtools select func_variant -o chr pos refGene.name2 maf --header > vat_export.anno | ||
+ | # Mendelian error and recode | ||
+ | plink --noweb --tfile vat_export --recode12 --me 1 1 --set-me-missing --out "recode12_noME" | ||
+ | sort -n -k1 -k6 -k2 recode12_noME.ped | sed 's/ /\t/g' | cut -f1,3,4,5 --complement > linkage.ped | ||
+ | cut -f2 recode12_noME.map | awk 'BEGIN{OFS="\t";} {print "M",$0}' | sed '1i\I\tid\nA\tDisease' > linkage.dat | ||
+ | java -Xmx10000m -jar java/linkage2beagle.jar linkage.dat linkage.ped > pre_beagle.bgl | ||
+ | python script/pre_phase.py -i pre_beagle.bgl -a pre_beagle_withMissing.bgl | ||
+ | java -Xmx10000m -jar java/beagle.jar missing=0 trios=pre_beagle.bgl out=bgl_phased verbose=false redundant=true | ||
+ | gunzip bgl_phased.pre_beagle.bgl.phased.gz | ||
+ | python script/post_phase.py -a vat_export.anno -b bgl_phased.pre_beagle.bgl.phased -o genes/ | ||
+ | for g in `ls genes | grep tped | cut -d"." -f1 | head -20` | ||
+ | do | ||
+ | echo "runing rvTDT on gene "${g} | ||
+ | rvTDT exercise_proj -G ./genes/${g}.tped -P ./data/rvtdt.phen -M ./genes/${g}.map --adapt 500 --alpha 0.00001 --permut 2000 --lower_cutoff 0 --upper_cutoff 100 --minVariants 3 --maxMissRatio 1 | ||
+ | done | ||
+ | # Answer | ||
+ | vtools show tables | ||
+ | ls genes/ | grep tped | wc | ||
+ | cat exercise_proj_pval/*.pval | grep -v "^#" | sort -k2 | ||
+ | cat exercise_proj_pval/*.pval | grep -v "^#" | sort -k3 | ||
+ | # clean | ||
+ | rm -r exercise_proj* genes/* bgl* linkage* recode12* pre_beagle* vat_export.* | ||
+ | ==SEQPower== | ||
spower -h | spower -h | ||
spower LOGIT -h | spower LOGIT -h | ||
Line 401: | Line 512: | ||
spower show test SKAT | spower show test SKAT | ||
spower LOGIT Kryukov2009European1800.sfs --def_rare 0.01 --def_neutral -0.00001 0.00001 --moi A --proportion_detrimental 1 --proportion_protective 0 --OR_rare_detrimental 1.5 --OR_common_detrimental 1 --baseline_effect 0.01 --sample_size 1000 --p1 0.5 --limit 1 --alpha 0.05 --method "KBAC --name K1 --mafupper 0.01 --maflower 0 --alternative 1 --moi additive --permutations 1000 --adaptive 0.1" --replicates 1000 --jobs 4 -o exercise | spower LOGIT Kryukov2009European1800.sfs --def_rare 0.01 --def_neutral -0.00001 0.00001 --moi A --proportion_detrimental 1 --proportion_protective 0 --OR_rare_detrimental 1.5 --OR_common_detrimental 1 --baseline_effect 0.01 --sample_size 1000 --p1 0.5 --limit 1 --alpha 0.05 --method "KBAC --name K1 --mafupper 0.01 --maflower 0 --alternative 1 --moi additive --permutations 1000 --adaptive 0.1" --replicates 1000 --jobs 4 -o exercise | ||
− | |||
spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.2 --ORmax_rare_detrimental 3.0 --method CFisher -r 100 -j 4 -l 1 -o exercise | spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.2 --ORmax_rare_detrimental 3.0 --method CFisher -r 100 -j 4 -l 1 -o exercise | ||
spower show exercise.loci.csv effect* | spower show exercise.loci.csv effect* | ||
spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.2 --ORmax_rare_detrimental 3.0 --proportion_detrimental 0.8 --method CFisher -r 100 -j 4 -l 1 -o exercise | spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.2 --ORmax_rare_detrimental 3.0 --proportion_detrimental 0.8 --method CFisher -r 100 -j 4 -l 1 -o exercise | ||
spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --missing_sites 0.05 --method CFisher -r 100 -j 4 -l 1 -o exercise | spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --missing_sites 0.05 --method CFisher -r 100 -j 4 -l 1 -o exercise | ||
− | |||
− | |||
spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --missing_low_maf 0.000125 --method CFisher -r 100 -j 4 -l 1 -o exercise | spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --missing_low_maf 0.000125 --method CFisher -r 100 -j 4 -l 1 -o exercise | ||
spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --method "CFisher --alternative 1 --name CMC" "KBAC --permutations 1000 --alternative 1" "WSSRankTest --alternative 1 --name WSS" "VTtest --alternative 1 --permutations 1000" "SKAT disease" -r 100 -j 4 -l 1 -o exercise | spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --method "CFisher --alternative 1 --name CMC" "KBAC --permutations 1000 --alternative 1" "WSSRankTest --alternative 1 --name WSS" "VTtest --alternative 1 --permutations 1000" "SKAT disease" -r 100 -j 4 -l 1 -o exercise | ||
+ | spower show exercise.csv method power | ||
+ | spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 -j 1 -l 1 -o exercise | ||
+ | spower LOGIT Kryukov2009European1800.sfs --power 0.8 --OR_rare_detrimental 1.5 -j 1 -l 1 -o exercise | ||
spower LNR Kryukov2009European1800.sfs --sample_size 1000 --meanshift_rare_detrimental 0.2 --method "CollapseQt --name CMC --alternative 2" -r 100 -j 4 -l 1 -o exercise | spower LNR Kryukov2009European1800.sfs --sample_size 1000 --meanshift_rare_detrimental 0.2 --method "CollapseQt --name CMC --alternative 2" -r 100 -j 4 -l 1 -o exercise | ||
spower LNR Kryukov2009European1800.sfs --sample_size 1000 --meanshift_rare_detrimental 0.2 --meanshiftmax_rare_detrimental 0.5 --method "CollapseQt --alternative 2" -r 100 -j 4 -l 1 -o exercise | spower LNR Kryukov2009European1800.sfs --sample_size 1000 --meanshift_rare_detrimental 0.2 --meanshiftmax_rare_detrimental 0.5 --method "CollapseQt --alternative 2" -r 100 -j 4 -l 1 -o exercise | ||
spower ELNR Kryukov2009European1800.sfs --sample_size 1000 --meanshift_rare_detrimental 0.2 --QT_thresholds 0.4 0.6 --method "CollapseQt --alternative 2" -r 100 -j 4 -l 1 -o exercise | spower ELNR Kryukov2009European1800.sfs --sample_size 1000 --meanshift_rare_detrimental 0.2 --QT_thresholds 0.4 0.6 --method "CollapseQt --alternative 2" -r 100 -j 4 -l 1 -o exercise | ||
− | spower ELNR Kryukov2009European1800.sfs --sample_size 1000 --p1 0.5 --meanshift_rare_detrimental 0.5 --QT_thresholds 0.4 0.6 --method "CollapseQt --alternative 2" -r 100 -j 4 -l 1 -o exercise | + | spower show exercise.csv sample* power<br />spower ELNR Kryukov2009European1800.sfs --sample_size 1000 --p1 0.5 --meanshift_rare_detrimental 0.5 --QT_thresholds 0.4 0.6 --method "CollapseQt --alternative 2" -r 100 -j 4 -l 1 -o exercise |
spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --method "GroupWrite ExerciseSimulation" -j 4 -o exercise -v1 | spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --method "GroupWrite ExerciseSimulation" -j 4 -o exercise -v1 | ||
+ | spower show exercise.SEQPowerDB | ||
+ | spower show exercise.SEQPowerDB LOGIT | ||
spower show exercise.SEQPowerDB LOGIT method power title --condition "where power between 0.25 and 0.95" | spower show exercise.SEQPowerDB LOGIT method power title --condition "where power between 0.25 and 0.95" | ||
for i in 1 1.5 2 2.5 3 3.5 4; do | for i in 1 1.5 2 2.5 3 3.5 4; do | ||
spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental $i --method "CFisher --name CMC$i" --title FixedOR$i -r 100 -j 4 -l 1 -o exercise2 | spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental $i --method "CFisher --name CMC$i" --title FixedOR$i -r 100 -j 4 -l 1 -o exercise2 | ||
− | done | + | done<br />spower show exercise2.SEQPowerDB LOGIT method power title==Unphased== |
− | ==Unphased== | + | |
− | + | ||
unphased.sh | unphased.sh | ||
unphased mypeds.ped –marker 1 2 3 –missing –permutation 10 | unphased mypeds.ped –marker 1 2 3 –missing –permutation 10 | ||
Line 428: | Line 539: | ||
unphased all.ped -window 2 -LD | unphased all.ped -window 2 -LD | ||
unphased all.ped -window 2 -LD >> results.txt | unphased all.ped -window 2 -LD >> results.txt | ||
− | |||
− | |||
− | |||
vtools -h | vtools -h | ||
vtools init VATDemo | vtools init VATDemo | ||
Line 478: | Line 586: | ||
vtools remove variants to_remove -v0 | vtools remove variants to_remove -v0 | ||
vtools show tables | vtools show tables | ||
− | vtools remove genotypes "DP_geno<10" -v0 vtools select variant "mut_type like 'non%' or mut_type like 'stop%' or region_type='splicing'" -t v_funct vtools show tables vtools show samples --limit 5 vtools select variant --samples "RACE=1" -t CEU mkdir -p ceu cd ceu vtools init ceu --parent ../ --variants CEU --samples "RACE=1" --build hg19 | + | vtools remove genotypes "DP_geno<10" -v0 <br />vtools select variant "mut_type like 'non%' or mut_type like 'stop%' or region_type='splicing'" -t v_funct <br />vtools show tables <br />vtools show samples --limit 5 <br />vtools select variant --samples "RACE=1" -t CEU <br />mkdir -p ceu <br />cd ceu <br />vtools init ceu --parent ../ --variants CEU --samples "RACE=1" --build hg19 |
vtools show project | vtools show project | ||
vtools select variant "CEU_mafGD10>=0.05" -t common_ceu | vtools select variant "CEU_mafGD10>=0.05" -t common_ceu | ||
− | vtools select v_funct "CEU_mafGD10<0.01" -t rare_ceu vtools use refGene vtools show annotation refGene vtools associate -h vtools show tests vtools show test LinRegBurden vtools associate common_ceu BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db EA_CV > EA_CV.asso.res | + | vtools select v_funct "CEU_mafGD10<0.01" -t rare_ceu <br />vtools use refGene <br />vtools show annotation refGene <br />vtools associate -h <br />vtools show tests <br />vtools show test LinRegBurden <br />vtools associate common_ceu BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db EA_CV > EA_CV.asso.res |
grep -i error *.log | grep -i error *.log | ||
less EA_CV.asso.res | less EA_CV.asso.res | ||
Line 496: | Line 604: | ||
vtools select rare_ceu "refGene.name2='ABCC1'" -o chr pos ref alt CEU_mafGD10 numGD10 mut_type --header | vtools select rare_ceu "refGene.name2='ABCC1'" -o chr pos ref alt CEU_mafGD10 numGD10 mut_type --header | ||
vtools_report plot_association qq -o QQRV -b --label_top 2 -f 6 < EA_RV.asso.res | vtools_report plot_association qq -o QQRV -b --label_top 2 -f 6 < EA_RV.asso.res | ||
− | vtools_report plot_association manhattan -o MHRV -b --label_top 5 --color Dark2 --chrom_prefix None -f 6 < EA_RV.asso.res | + | vtools_report plot_association manhattan -o MHRV -b --label_top 5 --color Dark2 --chrom_prefix None -f 6 < EA_RV.asso.res <br />vtools associate rare_ceu BMI --covariate SEX KING_MDS1 KING_MDS2 -m "LinRegBurden --name RVMDS2 --alternative 2" -g refGene.name2 -j1 --to_db EA_RV > EA_RV_MDS2.asso.res |
− | vtools_report plot_association qq -o QQRV_MDS2 -b --label_top 2 -f 6 < EA_RV_MDS2.asso.res cd .. vtools select variant --samples "RACE=0" -t YRI mkdir -p yri cd yri vtools init yri --parent ../ --variants YRI --samples "RACE=0" --build hg19 vtools select variant "YRI_mafGD10>=0.05" -t common_yri | + | vtools_report plot_association qq -o QQRV_MDS2 -b --label_top 2 -f 6 < EA_RV_MDS2.asso.res <br />cd .. <br />vtools select variant --samples "RACE=0" -t YRI <br />mkdir -p yri <br />cd yri <br />vtools init yri --parent ../ --variants YRI --samples "RACE=0" --build hg19 <br />vtools select variant "YRI_mafGD10>=0.05" -t common_yri |
− | vtools select v_funct "YRI_mafGD10<0.01" -t rare_yri vtools use refGene vtools associate common_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db YA_CV > YA_CV.asso.res | + | vtools select v_funct "YRI_mafGD10<0.01" -t rare_yri <br />vtools use refGene <br />vtools associate common_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db YA_CV > YA_CV.asso.res |
vtools associate rare_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -g refGene.name2 -j1 --to_db YA_RV > YA_RV.asso.res | vtools associate rare_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -g refGene.name2 -j1 --to_db YA_RV > YA_RV.asso.res | ||
vtools associate rare_yri BMI --covariate SEX -m "VariableThresholdsQt --alternative 2 -p 100000 --adaptive 0.0005" -g refGene.name2 -j1 --to_db YA_RV > YA_RV_VT.asso.res | vtools associate rare_yri BMI --covariate SEX -m "VariableThresholdsQt --alternative 2 -p 100000 --adaptive 0.0005" -g refGene.name2 -j1 --to_db YA_RV > YA_RV_VT.asso.res |
Latest revision as of 19:24, 19 May 2017
Contents
GeneABEL
plink --file GWAS_clean4 --pheno pheno.phen --pheno-name Aff --transpose --recode --out gwa_gabel --noweb plink --file GWAS_clean4 --pheno pheno.phen --pheno-name systolic --transpose --recode --out gwa_gabel_qtl --noweb R library(GenABEL) convert.snp.tped(tped = "gwa_gabel_qtl.tped", tfam = "gwa_gabel_qtl.tfam", out = "gwa_gabel_qtl.raw", strand = "u") g.dat <- load.gwaa.data(phen = "gwa_gabel_qtl.praw", gen = "gwa_gabel_qtl.raw", force = T) slotNames(g.dat) slotNames(g.dat@gtdata) colnames(g.dat@phdata) sample.size <- g.dat@gtdata@nids snps.total <- g.dat@gtdata@nsnps print(c(sample.size, snps.total)) summary(g.dat@phdata$disease) hist(g.dat@phdata$disease, main="Quantitative Phenotype data summary", xlab = "Systolic pressure", freq = F,breaks=20, col="gray") rug(g.dat@phdata$disease) test.snp <- scan.glm('disease ~ CRSNP', family = gaussian(), data = g.dat) names(test.snp) alpha <- 5e-8 test.snp$snpnames[test.snp$P1df < alpha] test.snp$P1df[test.snp$P1df < alpha] test.qt <- qtscore(disease, data = g.dat, trait = "gaussian") slotNames(test.qt) names(test.qt@results) head(results(test.qt)) test.qt@lambda descriptives.scan(test.qt) row.names(results(test.qt))[results(test.qt)$P1df < alpha] results(test.qt)$P1df[results(test.qt)$P1df < alpha] results(test.qt)$Pc1df[results(test.qt)$Pc1df < alpha] obs <- sort(results(test.qt)$P1df) ept <- ppoints(obs) plot(-log10(ept), -log10(obs), main = "GWAS QQ plot, qtl", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)") abline(0, 1, col = "red") abline(h = 8, lty = 2) plot(test.qt, col = "black") test.qt.sex <- qtscore(disease ~ sex, data = g.dat, trait = "gaussian") row.names(results(test.qt.sex))[results(test.qt)$P1df < alpha] summary(lm(disease ~ sex, data = g.dat)) convert.snp.tped(tped = "gwa_gabel.tped", tfam = "gwa_gabel.tfam", out = "gwa_gabel.raw", strand = "u") b.dat <- load.gwaa.data(phen = "gwa_gabel.praw", gen = "gwa_gabel.raw", force = T) slotNames(b.dat) slotNames(b.dat@gtdata) colnames(b.dat@phdata) b.dat@gtdata@nids case.size <- length(which(b.dat@phdata$disease == 1)) control.size <- length(which(b.dat@phdata$disease == 0)) case.size control.size snpsb.total <- b.dat@gtdata@nsnps testb.snp <- scan.glm('disease ~ CRSNP', family = binomial(), data = b.dat) names(testb.snp) alpha <- 5e-8 testb.snp$snpnames[testb.snp$P1df < alpha] testb.snp$P1df[testb.snp$P1df < alpha] testb.qt <- qtscore(disease, data = b.dat, trait = "binomial") slotNames(testb.qt) descriptives.scan(testb.qt) row.names(results(testb.qt))[results(testb.qt)$P1df < alpha] results(testb.qt)$P1df[results(testb.qt)$P1df < alpha] results(testb.qt)$Pc1df[results(testb.qt)$Pc1df < alpha] gkin <- ibs(g.dat, weight = "freq") gkin[1:10,1:10] cps.full <- cmdscale(as.dist(.5 - gkin), eig = T, k = 10) names(cps.full) cps <- cps.full$points plot(cps[,1], cps[,2], pch = g.dat@phdata$popn) legend("topright", c("TSI","MEX", "CEU"), pch = c(1,2,3)) colnames(cps)<-c('C1','C2','C3','C4','C5','C6','C7','C8','C9','C10') gpc.dat <- g.dat gpc.dat@phdata<-cbind(g.dat@phdata, cps) test.pc.a <- scan.glm('disease ~ CRSNP + C1 + C2 + C3 + C4 + C5', family=gaussian(), data = gpc.dat) test.pc.a$snpnames[test.pc.a$P1df < alpha] test.pc.a$P1df[test.pc.a$P1df < alpha] test.pc.b <- qtscore(disease ~ C1 + C2 + C3 + C4 + C5, data = gpc.dat, trait = "gaussian") test.pc.b@lambda plot(cps.full$eig[1:10]/sum(cps.full$eig), axes = F, type = "b", xlab = "Components", ylim = c(0,0.05), ylab = "Proportion of Variations", main = "MDS analysis scree plot") axis(1, 1:10) axis(2) plot(cumsum(cps.full$eig[1:10])/sum(cps.full$eig), axes = F, type = "b", ylim = c(0,0.2), xlab = "Components", ylab = "Proportion of Variations", main = "MDS analysis cumulative plot") axis(1, 1:10) axis(2) row.names(results(test.qt))[results(test.qt)$Pc1df < alpha] results(test.qt)$Pc1df[results(test.qt)$Pc1df < alpha] test.qt@lambda obs <- sort(results(test.qt)$chi2.1df) ept <- sort(qchisq(ppoints(obs), df = 1)) plot(ept, obs, main = "Genomic control (lambda = slope of the dashed line)", xlab="Expected chisq, 1df", ylab="Observed chisq, 1df") abline(0, 1, col = "red") abline(0, test.qt@lambda[1], lty = 2) median(results(test.qt)$chi2.1df)/0.456 obs <- sort(results(test.qt)$Pc1df) ept <- ppoints(obs) plot(-log10(ept), -log10(obs), main = "GWAS QQ plot adj. via Genomic Control", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)") abline(0, 1, col = "red") abline(h = 8, lty = 2) adj.gkin = gkin diag(adj.gkin) = hom(g.dat)$Var test.eg <- egscore(disease, data = g.dat, kin = adj.gkin, naxes = 2) descriptives.scan(test.eg) snp.eg <- row.names(results(test.eg))[results(test.eg)$P1df < alpha] pvalue.eg <- results(test.eg)$P1df[results(test.eg)$P1df < alpha] lambda.eg <- test.eg@lambda snp.eg pvalue.eg lambda.eg for (k in 1:10){ test.tmp <- egscore(disease, data = g.dat, kin = adj.gkin, naxes = k) print(test.tmp@lambda$estimate) } obs <- sort(results(test.eg)$Pc1df) ept <- ppoints(obs) plot(-log10(ept), -log10(obs), main = "GWAS QQ plot adj. w/ EIGENSTRAT", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)") abline(0, 1, col = "red") abline(h = 8, lty = 2) plot(test.qt, col = "black") add.plot(test.eg, col = "gray", pch = 3) legend("topright", c("Original plot","After correction w/ EIGENSTRAT"), pch = c(1,3))==Imputation exercise==
plink --file chr22_imputation_ex --noweb plink --file chr22_imputation_ex --maf 0.01 --mind 0.02 --geno 0.05 --hwe 0.001 --out qc_check --noweb plink --file chr22_imputation_ex --maf 0.01 --mind 0.02 --geno 0.05 --hwe 0.001 --recode --out chr22_clean1 --noweb plink --file chr22_clean1 --maf 0.01 --mind 0.02 --geno 0.05 --hwe 0.001 --out qc_check_2 --noweb plink --file chr22_clean1 --filter-cases --hwe 0.001 --recode --out chr22_cases_clean --noweb plink --file chr22_clean1 --filter-controls --recode --out chr22_controls_clean --noweb plink --file chr22_controls_clean --merge chr22_cases_clean.ped chr22_cases_clean.map --hwe 0.001 --recode --out chr22_all_clean --noweb plink --file chr22_all_clean --logistic --out chr22_all_clean_geno --noweb R mydata = read.table("chr22_all_clean_geno.assoc.logistic", header=T) names(mydata) plot(mydata$BP, -log10(mydata$P)) smallp = mydata[which(mydata$P < 1E-4),] smallp smallp = smallp[order(smallp$BP),] smallp q() mach1 --hapmapFormat -d chr22_mach_merlin.map -p chr22_mach_merlin.ped --haps genotypes_chr22_CEU_r22_nr.b36_fwd.phase.gz --snps genotypes_chr22_CEU_r22_nr.b36_fwd_legend.txt.gz --greedy --rounds 100 --mle --mldetails --autoflip -o chr22_HIHII plink --dosage chr22_HIHII_dose_mach4plink.txt.gz Zin --fam chr22_imputation_ex.fam --map chr22_imputed_snps_positions.map --out chr22_HIHII_dosage --noweb R dosage = read.table("chr22_HIHII_dosage.assoc.dosage", header= T) names(dosage) plot(dosage$BP, -log10(dosage$P)) dosagep = dosage[which(dosage$P < 5E-8),] dosagep = dosagep[order(dosagep$BP),] dosagep interest = dosage[which(dosage$SNP=='rs715586'),] interest
PLINK_R
Introduction
plink --ped dbp.cc.ped --map dbp.map --map3 --missing --noweb plink --ped dbp.cc.ped --map dbp.map --map3 --mind 0.10 --geno 0.05 --recode --out cleaned --noweb plink --ped cleaned.ped --map cleaned.map --freq --out cleaned --noweb plink --ped cleaned.ped --map cleaned.map --hardy --out cleaned --noweb plink --ped cleaned.ped --map cleaned.map --out cleaned.R --recode --tab --noweb R city = c("Oslo", "Bergen", "Munich", "Berlin", "Rome", "Milan") population = c(0.58, 0.25, 1.3, 3.4, 2.7, 1.3) country = factor( c("Norway" , "Norway", "Germany", "Germany", "Italy", "Italy" )) capital = c(TRUE, FALSE, FALSE, TRUE, TRUE, FALSE) updated = 2009 city population country capital c(city, city) c(population, updated) summary (city) summary (population) summary (country) summary (capital) is.numeric(city) is.character(city) is.factor(city) class (city) class (population) class (country) class (capital) length(city) names(population) = city population city [3] city [2:4] city[c(1,5:6)] population[3] population["Oslo"] population[c("Berlin","Rome")] population capital population[capital] population>=1.0 population[population>=1.0] cities = data.frame (city=city, pop=population, country=country, capital=capital, stringsAsFactors=F) cities length(cities) dim(cities) is.data.frame(cities) is.list(cities) colnames(cities) rownames(cities) cities$city cities[,1] cities[2,] cities[2,3] cities$pop[3] cities[capital,] cities[cities$pop>=1.0,] ls() save(cities, city, country, file="myobjects.R") write.table(cities, file="cities.txt") sink("cities.output.txt") print(cities) sink() dir() rm(list=ls()) ls() new.table = read.table ("cities.txt") ls() new.table load ("myobjects.R") ls() cities new.table q()
GWAS Data QC
plink --file GWAS --noweb plink --file GWAS --mind 0.10 --recode --out GWAS_clean_mind --noweb plink --file GWAS_clean_mind --maf 0.05 --recode --out MAF_greater_5 --noweb plink --file GWAS_clean_mind --exclude MAF_greater_5.map --recode --out MAF_less_5 --noweb plink --file MAF_greater_5 --geno 0.05 --recode --out MAF_greater_5_clean --noweb plink --file MAF_less_5 --geno 0.01 --recode --out MAF_less_5_clean --noweb plink --file MAF_greater_5_clean --merge MAF_less_5_clean.ped MAF_less_5_clean.map --recode --out GWAS_MAF_clean --noweb plink --file GWAS_MAF_clean --mind 0.03 --recode --out GWAS_clean2 --noweb plink --file GWAS_clean2 --check-sex --out GWAS_sex_checking --noweb R sexcheck = read.table("GWAS_sex_checking.sexcheck", header=T) names(sexcheck) sex_problem = sexcheck[which(sexcheck$STATUS=="PROBLEM"),] sex_problem q() plink --file GWAS_clean2 --genome --out duplicates --noweb R dups = read.table("duplicates.genome", header = T) problem_pairs = dups[which(dups$PI_HAT > 0.4),] problem_pairs problem_pairs = dups[which(dups$PI_HAT > 0.05),] myvars = c("FID1", "IID1", "FID2", "IID2", "PI_HAT") problem_pairs[myvars] q() plink --file GWAS_clean2 --remove IBS_excluded.txt --recode --out GWAS_clean3 --noweb plink --file GWAS_clean3 --het --noweb R Dataset <- read.table("plink.het", header=TRUE, sep="", na.strings="NA", dec=".", strip.white=TRUE) mean(Dataset$F) sd(Dataset$F) jpeg("hist.jpeg", height=1000, width=1000) hist(scale(Dataset$F), xlim=c(-4,4)) dev.off() q() plink --file GWAS_clean3 --pheno pheno.txt --pheno-name Aff --hardy --noweb R hardy = read.table("plink.hwe", header = T) names(hardy) hwe_prob = hardy[which(hardy$P < 0.0000009),] hwe_prob q() plink --file GWAS_clean3 --exclude HWE_out.txt --recode --out GWAS_clean4 --noweb
Multifactorial Part 1
plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.add --logistic --noweb plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.add.ci --logistic --ci 0.95 --noweb plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.age.add --logistic --covar dbp.age.pheno --noweb plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.sex.add --logistic --sex --noweb plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.sexage.add --logistic --sex --covar dbp.age.pheno --noweb plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.snp1112.add --logistic --condition rs1112 --noweb plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.snp1117.add --logistic --condition rs1117 --noweb plink --ped dbp.qt.ped --map dbp.map --map3 --out linreg.sex.add --linear --sex --noweb R load("dbp.R") ls() dbp[1:5,] result.snp12 = glm (affection ~ rs1112, family=binomial("logit"), data=dbp) print (result.snp12) print ( class (result.snp12) ) print ( summary(result.snp12) ) dev.geno = anova (result.snp12, test="Chi") lrt.pvalue = pchisq(dev.geno[dim(dev.geno)[1],"Deviance"], df=2, ncp=0, FALSE) print ( lrt.pvalue ) print ( summary(result.snp12)$coefficients ) snp.beta = summary(result.snp12)$coefficients[2:3,1] print ( snp.beta ) print ( exp(snp.beta) ) ci = confint (result.snp12) print (ci) print ( exp(ci) ) snp.data = dbp[,c("affection", "rs1112")] summary(snp.data) snp.data[,"rs1112"]<-as.numeric(snp.data[,"rs1112"])-1 summary(snp.data) result.all=glm(affection ~ rs1112, family=binomial("logit"), data=snp.data) dev.all = anova(result.all, test="Chi") summary(result.all) print(dev.all) snp.data = dbp[,c("affection","trait","sex","age","rs1112","rs1117")] summary(snp.data) snp.data[,"rs1112"]<-as.numeric(snp.data[,"rs1112"])-1 snp.data[,"rs1117"]<-as.numeric(snp.data[,"rs1117"])-1 result.adj = glm(affection ~ sex + rs1112, family=binomial("logit"), data=snp.data) summary(result.adj) result.adj = glm (affection ~ age + rs1112 , family=binomial("logit"), data=snp.data) summary(result.adj) result.adj = glm (affection ~ sex + age + rs1112, family=binomial("logit"), data=snp.data) summary(result.adj) result.adj = glm (affection ~ rs1117 + rs1112, family=binomial("logit"), data=snp.data) summary(result.adj) anova (result.adj, test="Chi") result.adj = glm (affection ~ rs1112 + rs1117, family=binomial("logit"), data=snp.data) summary(result.adj) anova (result.adj, test="Chi") result.adj = lm (trait ~ rs1112, data=snp.data) summary(result.adj) result.adj = lm (trait ~ sex + rs1112, data=snp.data) summary(result.adj)
Multifactorial Part 2
plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.sex.inter.add --logistic --sex --interaction --noweb plink --ped dbp.cc.ped --map dbp.map --map3 --out logreg.snp1112.inter.add --logistic --condition rs1112 --interaction --noweb R
load("dbp.R")
ls()
dbp[1:5,]
summary(dbp) result.inter = glm (affection ~ sex * rs1112, family=binomial("logit"), data=snp.data) summary(result.inter) result.inter = glm (affection ~ age * rs1112, family=binomial("logit"), data=snp.data) summary(result.inter) result.inter = glm (affection ~ rs1112 * rs1117, family=binomial("logit"), data=snp.data) summary(result.inter) result.reg = glm (affection ~ sex + age + rs1112 + rs1117, family=binomial("logit"), data=snp.data) summary(result.reg) modelchoice.result<-step(result.reg)
summary(modelchoice.result)
GWAS Control Substructure
plink --file GWAS_clean4 --genome --mds-plot 10 --noweb R mydata = read.table("mds_components.txt", header=T) mydata$pch[mydata$Group==1 ] <-15 mydata$pch[mydata$Group==2 ] <-16 mydata$pch[mydata$Group==3 ] <-2 jpeg("mds.jpeg", height=1000, width=1000) plot(mydata$C1, mydata$C2 ,pch=mydata$pch) dev.off() q() plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --logistic --adjust --out unadj --noweb plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.mds --covar-name C1 --logistic --adjust --out C1 --noweb plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.mds --covar-name C1-C2 --logistic --adjust --out C1-C2 --noweb R broadqq <-function(pvals, title) { observed <- sort(pvals) lobs <- -(log10(observed)) expected <- c(1:length(observed)) lexp <- -(log10(expected / (length(expected)+1))) plot(c(0,7), c(0,7), col="red", lwd=3, type="l", xlab="Expected (-logP)", ylab="Observed (-logP)", xlim=c(0,max(lobs)), ylim=c(0,max(lobs)), las=1, xaxs="i", yaxs="i", bty="l", main = title) points(lexp, lobs, pch=23, cex=.4, bg="black") } jpeg("qqplot_compare.jpeg", height=1000, width=1000) par(mfrow=c(2,1)) aff_unadj<-read.table("unadj.assoc.logistic", header=TRUE) aff_unadj.add.p<-aff_unadj[aff_unadj$TEST==c("ADD"),]$P broadqq(aff_unadj.add.p,"Some Trait Unadjusted") aff_C1C2<-read.table("C1-C2.assoc.logistic", header=TRUE) aff_C1C2.add.p<-aff_C1C2[aff_C1C2$TEST==c("ADD"),]$P broadqq(aff_C1C2.add.p, "Some Trait Adjusted") dev.off() gws_unadj = aff_unadj[which(aff_unadj$P < 0.0000001),] gws_unadj gws_adjusted = aff_C1C2[which(aff_C1C2$P < 0.0000001),] gws_adjusted q()
Multiple Testing
plink --ped dbp.cc.ped --map dbp.map --map3 --out multtest --assoc --adjust --noweb plink --ped dbp.cc.ped --map dbp.map --map3 --out multperm5000 --assoc --mperm 5000 --noweb plink --ped dbp.cc.ped --map dbp.map --map3 --out multperm100000 --assoc --mperm 100000 --noweb R load("p.values.R") ls() p.values library (multtest) adj.p.values = mt.rawp2adjp(p.values,c("Bonferroni","Holm","SidakSS","BH")) adj.p.values rownames(adj.p.values$adjp) = names(p.values[adj.p.values$index]) adj.p.values$adjp
PSEQ exercise
Data analysis:
pseq help pseq help all pseq myproj new-project --resources hg19 pseq myproj load-vcf --vcf CEU.exon.2010_03.genotypes.hg19.vcf.gz YRI.exon.2010_03.genotypes.hg19.vcf.gz pseq myproj load-pheno --file phenotype.phe pseq myproj v-view | head pseq myproj i-view | head pseq myproj summary pseq myproj var-summary pseq myproj ind-summary pseq myproj loc-summary pseq myproj ref-summary pseq myproj seq-summary pseq myproj file-summary pseq myproj meta-summary pseq myproj v-stats pseq myproj i-stats | head pseq myproj tag-file --id 1 --name CEU pseq myproj tag-file --id 2 --name YRI pseq myproj var-summary pseq myproj v-freq | head pseq myproj v-freq --mask file=CEU | head pseq myproj v-freq --mask file=YRI | head pseq myproj v-view --mask any.filter.ex | head pseq myproj v-view --mask any.filter.ex | wc -l pseq myproj v-view --mask any.filter | wc -l pseq myproj var-set --group pass --mask any.filter.ex pseq myproj var-summary pseq myproj var-set --group pass_DP15 --mask include="DP>14" var=pass pseq myproj var-summary pseq myproj var-set --group pass_DP15_DPgeno10 --mask geno=DP:ge:11 var=pass_DP15 pseq myproj var-summary pseq myproj var-set --group pass_DP15_DPgeno10_CEU --mask file=CEU var=pass_DP15_DPgeno10 pseq myproj var-summary pseq myproj var-set --group pass_DP15_DPgeno10_CEU_HWE --mask hwe=5.7e-7:1 var=pass_DP15_DPgeno10_CEU pseq myproj var-summary pseq myproj var-set --group pass_DP15_DPgeno10_CEU_HWE_MAFgt05 --mask maf=0.05:0.5 var=pass_DP15_DPgeno10_CEU_HWE pseq myproj var-summary pseq myproj var-set --group pass_DP15_DPgeno10_CEU_HWE_MAFlt01 --mask "mac=1 maf=0.01" var=pass_DP15_DPgeno10_CEU_HWE pseq myproj var-summary pseq myproj glm --phenotype BMI --covar SEX --mask var=pass_DP15_DPgeno10_CEU_HWE_MAFgt05 > SNV_CEU.result head SNV_CEU.result cat SNV_CEU.result | awk '{if(FNR==1) print $0; if(NR>1) print $0 | "sort -k9 2>/dev/null"}' | grep -v "NA\s\+NA\s\+NA" | head pseq myproj assoc --tests fw vt --phenotype BMI pseq myproj assoc --tests skat --phenotype BMI --covar SEX --mask var=pass_DP15_DPgeno10_CEU_HWE_MAFlt01 loc.group=refseq > SKAT_CEU.result pseq myproj assoc --tests skat --phenotype BMI --covar SEX --mask include="DP>14" geno=DP:ge:11 file=CEU hwe=5.7e-7:1 "mac=1 maf=0.01" loc.group=refseq > SKAT_CEU.result head -20 SKAT_CEU.result cat SKAT_CEU.result | grep SKAT | grep -v "P=NA" | sort -k6 | head -15
Exercise analyzing YRI samples:
pseq myproj var-set --group pass_DP15_DPgeno10_YRI --mask file=YRI var=pass_DP15_DPgeno10 pseq myproj var-summary pseq myproj var-set --group pass_DP15_DPgeno10_YRI_HWE --mask hwe=5.7e-7:1 var=pass_DP15_DPgeno10_YRI pseq myproj var-summary pseq myproj var-set --group pass_DP15_DPgeno10_YRI_HWE_MAFgt05 --mask maf=0.05:0.5 var=pass_DP15_DPgeno10_YRI_HWE pseq myproj var-summary pseq myproj var-set --group pass_DP15_DPgeno10_YRI_HWE_MAFlt01 --mask "mac=1 maf=0.01" var=pass_DP15_DPgeno10_YRI_HWE pseq myproj var-summary pseq myproj glm --phenotype BMI --covar SEX --mask var=pass_DP15_DPgeno10_YRI_HWE_MAFgt05 > SNV_YRI.result head SNV_YRI.result cat SNV_YRI.result | awk '{if(FNR==1) print $0; if(NR>1) print $0 | "sort -k9 2>/dev/null"}' | grep -v "NA\s\+NA\s\+NA" | head pseq myproj assoc --tests skat --phenotype BMI --covar SEX --mask include="DP>14" geno=DP:ge:11 file=YRI hwe=5.7e-7:1 "mac=1 maf=0.01" loc.group=refseq > SKAT_YRI.result head -20 SKAT_YRI.result cat SKAT_YRI.result | grep SKAT | grep -v "P=NA" | sort -k6 | head -15==RV-TDT exercise== vtools init rvtdt vtools import --format vcf data/data.vcf --build hg19 vtools phenotype --from_file data/phen.txt # variant selection vtools execute ANNOVAR geneanno vtools select variant "variant.region_type like '%splicing%'or variant.mut_type like 'nonsynonymous%' or variant.mut_type like 'frameshift%' or variant.mut_type like 'stop%'" -t func_variant # tped file vtools export func_variant --format tped --samples 'phenotype is not null' > vat_raw.tped sort -k4 -n vat_raw.tped | awk 'BEGIN{OFS="\t";prev="None";copy=1} {$2=$1"_"$4; $3=0; if($2==prev) {$2=$2"_"copy; copy=copy+1} else {prev=$2; copy=1}; print $0}' > vat_export.tped # tfam file vtools phenotype --out family sample_name pid mid sex phenotype > vat_export.tfam # anno file vtools use refGene-hg19_20130904 vtools update func_variant --set 'maf=0.001' vtools select func_variant -o chr pos refGene.name2 maf --header > vat_export.anno # Mendelian error and recode plink --noweb --tfile vat_export --recode12 --me 1 1 --set-me-missing --out "recode12_noME" sort -n -k1 -k6 -k2 recode12_noME.ped | sed 's/ /\t/g' | cut -f1,3,4,5 --complement > linkage.ped cut -f2 recode12_noME.map | awk 'BEGIN{OFS="\t";} {print "M",$0}' | sed '1i\I\tid\nA\tDisease' > linkage.dat java -Xmx10000m -jar java/linkage2beagle.jar linkage.dat linkage.ped > pre_beagle.bgl python script/pre_phase.py -i pre_beagle.bgl -a pre_beagle_withMissing.bgl java -Xmx10000m -jar java/beagle.jar missing=0 trios=pre_beagle.bgl out=bgl_phased verbose=false redundant=true gunzip bgl_phased.pre_beagle.bgl.phased.gz python script/post_phase.py -a vat_export.anno -b bgl_phased.pre_beagle.bgl.phased -o genes/ for g in `ls genes | grep tped | cut -d"." -f1 | head -20` do echo "runing rvTDT on gene "${g} rvTDT exercise_proj -G ./genes/${g}.tped -P ./data/rvtdt.phen -M ./genes/${g}.map --adapt 500 --alpha 0.00001 --permut 2000 --lower_cutoff 0 --upper_cutoff 100 --minVariants 3 --maxMissRatio 1 done # Answer vtools show tables ls genes/ | grep tped | wc cat exercise_proj_pval/*.pval | grep -v "^#" | sort -k2 cat exercise_proj_pval/*.pval | grep -v "^#" | sort -k3 # clean rm -r exercise_proj* genes/* bgl* linkage* recode12* pre_beagle* vat_export.*
SEQPower
spower -h spower LOGIT -h spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --method "CFisher --name CMC" -r 100 -j 4 -l 1 -o exercise spower show exercise.csv spower show exercise.csv power* spower show exercise.loci.csv spower show exercise.loci.csv maf spower show tests spower show test SKAT spower LOGIT Kryukov2009European1800.sfs --def_rare 0.01 --def_neutral -0.00001 0.00001 --moi A --proportion_detrimental 1 --proportion_protective 0 --OR_rare_detrimental 1.5 --OR_common_detrimental 1 --baseline_effect 0.01 --sample_size 1000 --p1 0.5 --limit 1 --alpha 0.05 --method "KBAC --name K1 --mafupper 0.01 --maflower 0 --alternative 1 --moi additive --permutations 1000 --adaptive 0.1" --replicates 1000 --jobs 4 -o exercise spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.2 --ORmax_rare_detrimental 3.0 --method CFisher -r 100 -j 4 -l 1 -o exercise spower show exercise.loci.csv effect* spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.2 --ORmax_rare_detrimental 3.0 --proportion_detrimental 0.8 --method CFisher -r 100 -j 4 -l 1 -o exercise spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --missing_sites 0.05 --method CFisher -r 100 -j 4 -l 1 -o exercise spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --missing_low_maf 0.000125 --method CFisher -r 100 -j 4 -l 1 -o exercise spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --method "CFisher --alternative 1 --name CMC" "KBAC --permutations 1000 --alternative 1" "WSSRankTest --alternative 1 --name WSS" "VTtest --alternative 1 --permutations 1000" "SKAT disease" -r 100 -j 4 -l 1 -o exercise spower show exercise.csv method power spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 -j 1 -l 1 -o exercise spower LOGIT Kryukov2009European1800.sfs --power 0.8 --OR_rare_detrimental 1.5 -j 1 -l 1 -o exercise spower LNR Kryukov2009European1800.sfs --sample_size 1000 --meanshift_rare_detrimental 0.2 --method "CollapseQt --name CMC --alternative 2" -r 100 -j 4 -l 1 -o exercise spower LNR Kryukov2009European1800.sfs --sample_size 1000 --meanshift_rare_detrimental 0.2 --meanshiftmax_rare_detrimental 0.5 --method "CollapseQt --alternative 2" -r 100 -j 4 -l 1 -o exercise spower ELNR Kryukov2009European1800.sfs --sample_size 1000 --meanshift_rare_detrimental 0.2 --QT_thresholds 0.4 0.6 --method "CollapseQt --alternative 2" -r 100 -j 4 -l 1 -o exercise spower show exercise.csv sample* power
spower ELNR Kryukov2009European1800.sfs --sample_size 1000 --p1 0.5 --meanshift_rare_detrimental 0.5 --QT_thresholds 0.4 0.6 --method "CollapseQt --alternative 2" -r 100 -j 4 -l 1 -o exercise spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental 1.5 --method "GroupWrite ExerciseSimulation" -j 4 -o exercise -v1 spower show exercise.SEQPowerDB spower show exercise.SEQPowerDB LOGIT spower show exercise.SEQPowerDB LOGIT method power title --condition "where power between 0.25 and 0.95" for i in 1 1.5 2 2.5 3 3.5 4; do spower LOGIT Kryukov2009European1800.sfs --sample_size 1000 --OR_rare_detrimental $i --method "CFisher --name CMC$i" --title FixedOR$i -r 100 -j 4 -l 1 -o exercise2 done
spower show exercise2.SEQPowerDB LOGIT method power title==Unphased== unphased.sh unphased mypeds.ped –marker 1 2 3 –missing –permutation 10 unphased mypeds.ped –permuation 10 morepeds.ped unphased mypeds.ped –window 2 –reference 1 2 unphased mypeds.ped –window 2 –reference 1 2 1 1 unphased all.ped -window 2 -LD unphased all.ped -window 2 -LD >> results.txt vtools -h vtools init VATDemo vtools import *.vcf.gz --var_info DP filter --geno_info DP_geno --build hg18 -j1 vtools liftover hg19 head phenotypes.csv vtools phenotype --from_file phenotypes.csv --delimiter "," vtools show project vtools show tables vtools show table variant vtools show samples vtools show genotypes vtools show fields vtools select variant --count vtools show genotypes > GenotypeSummary.txt head GenotypeSummary.txt vtools output variant "max(DP)" "min(DP)" "avg(DP)" "stdev(DP)" "lower_quartile(DP)" "upper_quartile(DP)" --header vtools select variant "filter='PASS'" --count vtools select variant "filter='PASS'" -o "max(DP)" "min(DP)" "avg(DP)" "stdev(DP)" "lower_quartile(DP)" "upper_quartile(DP)" --header vtools update variant --from_stat 'total=#(GT)' 'num=#(alt)' 'het=#(het)' 'hom=#(hom)' 'other=#(other)' 'minDP=min(DP_geno)' 'maxDP=max(DP_geno)' 'meanDP=avg(DP_geno)' 'maf=maf()' vtools show fields vtools show table variant vtools update variant --from_stat 'totalGD10=#(GT)' 'numGD10=#(alt)' 'hetGD10=#(het)' 'homGD10=#(hom)' 'otherGD10=#(other)' 'mafGD10=maf()' --genotypes "DP_geno > 10" vtools show fields vtools show table variant vtools output variant chr pos maf mafGD10 --header --limit 20 vtools phenotype --set "RACE=0" --samples "filename like 'YRI%'" vtools phenotype --set "RACE=1" --samples "filename like 'CEU%'" vtools show samples --limit 10 vtools update variant --from_stat 'CEU_mafGD10=maf()' --genotypes 'DP_geno>10' --samples "RACE=1" vtools update variant --from_stat 'YRI_mafGD10=maf()' --genotypes 'DP_geno>10' --samples "RACE=0" vtools output variant chr pos mafGD10 CEU_mafGD10 YRI_mafGD10 --header --limit 10 vtools phenotype --from_stat 'CEU_totalGD10=#(GT)' 'CEU_numGD10=#(alt)' --genotypes 'DP_geno>10' --samples "RACE=1" vtools phenotype --from_stat 'YRI_totalGD10=#(GT)' 'YRI_numGD10=#(alt)' --genotypes 'DP_geno>10' --samples "RACE=0" vtools phenotype --output sample_name CEU_totalGD10 CEU_numGD10 YRI_totalGD10 YRI_numGD10 --header vtools select variant 'maf>=0.01' -t variant_MAFge01 'Variants that have MAF >= 0.01' vtools show tables vtools execute KING --var_table variant_MAFge01 vtools_report plot_pheno_fields KING_MDS1 KING_MDS2 RACE --dot KING.mds.race.pdf --discrete_color Dark2 vtools_report plot_pheno_fields KING_MDS1 KING_MDS2 panel --dot KING.mds.panel.pdf --discrete_color Dark2 vtools execute ANNOVAR geneanno vtools output variant chr pos ref alt mut_type --limit 20 --header vtools_report trans_ratio variant -n num vtools_report trans_ratio variant -n numGD10 vtools select variant "DP<15" -t to_remove vtools show tables vtools remove variants to_remove -v0 vtools show tables vtools remove genotypes "DP_geno<10" -v0
vtools select variant "mut_type like 'non%' or mut_type like 'stop%' or region_type='splicing'" -t v_funct
vtools show tables
vtools show samples --limit 5
vtools select variant --samples "RACE=1" -t CEU
mkdir -p ceu
cd ceu
vtools init ceu --parent ../ --variants CEU --samples "RACE=1" --build hg19 vtools show project vtools select variant "CEU_mafGD10>=0.05" -t common_ceu vtools select v_funct "CEU_mafGD10<0.01" -t rare_ceu
vtools use refGene
vtools show annotation refGene
vtools associate -h
vtools show tests
vtools show test LinRegBurden
vtools associate common_ceu BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db EA_CV > EA_CV.asso.res grep -i error *.log less EA_CV.asso.res sort -g -k7 EA_CV.asso.res | head vtools show fields vtools associate rare_ceu BMI --covariate SEX -m "LinRegBurden --alternative 2" -g refGene.name2 -j1 --to_db EA_RV > EA_RV.asso.res grep -i error *.log | tail -22 less EA_RV.asso.res sort -g -k6 EA_RV.asso.res | head vtools associate rare_ceu BMI --covariate SEX -m "VariableThresholdsQt --alternative 2 -p 100000 --adaptive 0.0005" -g refGene.name2 -j1 --to_db EA_RV > EA_RV_VT.asso.res grep -i error *.log | tail -22 less EA_RV_VT.asso.res sort -g -k6 EA_RV_VT.asso.res | head vtools select rare_ceu "refGene.name2='ABCC1'" -o chr pos ref alt CEU_mafGD10 numGD10 mut_type --header vtools_report plot_association qq -o QQRV -b --label_top 2 -f 6 < EA_RV.asso.res vtools_report plot_association manhattan -o MHRV -b --label_top 5 --color Dark2 --chrom_prefix None -f 6 < EA_RV.asso.res
vtools associate rare_ceu BMI --covariate SEX KING_MDS1 KING_MDS2 -m "LinRegBurden --name RVMDS2 --alternative 2" -g refGene.name2 -j1 --to_db EA_RV > EA_RV_MDS2.asso.res vtools_report plot_association qq -o QQRV_MDS2 -b --label_top 2 -f 6 < EA_RV_MDS2.asso.res
cd ..
vtools select variant --samples "RACE=0" -t YRI
mkdir -p yri
cd yri
vtools init yri --parent ../ --variants YRI --samples "RACE=0" --build hg19
vtools select variant "YRI_mafGD10>=0.05" -t common_yri vtools select v_funct "YRI_mafGD10<0.01" -t rare_yri
vtools use refGene
vtools associate common_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -j1 --to_db YA_CV > YA_CV.asso.res vtools associate rare_yri BMI --covariate SEX -m "LinRegBurden --alternative 2" -g refGene.name2 -j1 --to_db YA_RV > YA_RV.asso.res vtools associate rare_yri BMI --covariate SEX -m "VariableThresholdsQt --alternative 2 -p 100000 --adaptive 0.0005" -g refGene.name2 -j1 --to_db YA_RV > YA_RV_VT.asso.res cd .. vtools_report meta_analysis ceu/EA_RV_VT.asso.res yri/YA_RV_VT.asso.res --beta 5 --pval 6 --se 7 -n 2 --link 1 > META_RV_VT.asso.res cut -f1,3 META_RV_VT.asso.res | head