Difference between revisions of "AdvGeneMap2018Commands"
From Statistical Genetics Courses
Serveradmin (Talk | contribs) (→ANNOVAR COMMANDS) |
Serveradmin (Talk | contribs) |
||
Line 1: | Line 1: | ||
__NOTITLE__ | __NOTITLE__ | ||
− | + | ==Data QC Plink== | |
− | == | + | #PLINK |
− | + | ||
− | + | plink --file GWAS | |
− | + | ||
− | + | plink --file GWAS --mind 0.10 --recode --out GWAS_clean_mind | |
− | + | ||
− | + | plink --file GWAS_clean_mind --maf 0.05 --recode --out MAF_greater_5 | |
− | + | ||
− | + | plink --file GWAS_clean_mind --exclude MAF_greater_5.map --recode --out MAF_less_5 | |
− | + | ||
− | + | plink --file MAF_greater_5 --geno 0.05 --recode --out MAF_greater_5_clean | |
− | + | ||
− | + | plink --file MAF_less_5 --geno 0.01 --recode --out MAF_less_5_clean | |
− | + | ||
− | + | plink --file MAF_greater_5_clean --merge MAF_less_5_clean.ped MAF_less_5_clean.map --recode --out GWAS_MAF_clean | |
− | + | ||
− | + | plink --file GWAS_MAF_clean --mind 0.03 --recode --out GWAS_clean2 | |
− | + | ||
− | + | plink --file GWAS_clean2 --check-sex --out GWAS_sex_checking | |
− | + | ||
− | + | #### in R - open R by simply typing R | |
− | + | ||
− | plink --file | + | setwd("to_your_working_directory/") |
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | + | ||
− | plink --file GWAS --mind 0.10 --recode --out GWAS_clean_mind | + | |
− | plink --file GWAS_clean_mind --maf 0.05 --recode --out MAF_greater_5 | + | |
− | plink --file GWAS_clean_mind --exclude MAF_greater_5.map --recode --out MAF_less_5 | + | |
− | plink --file MAF_greater_5 --geno 0.05 --recode --out MAF_greater_5_clean | + | |
− | plink --file MAF_less_5 --geno 0.01 --recode --out MAF_less_5_clean | + | |
− | plink --file MAF_greater_5_clean --merge MAF_less_5_clean.ped MAF_less_5_clean.map --recode --out GWAS_MAF_clean | + | |
− | plink --file GWAS_MAF_clean --mind 0.03 --recode --out GWAS_clean2 | + | |
− | plink --file GWAS_clean2 --check-sex --out GWAS_sex_checking | + | |
− | R | + | |
sexcheck = read.table("GWAS_sex_checking.sexcheck", header=T) | sexcheck = read.table("GWAS_sex_checking.sexcheck", header=T) | ||
names(sexcheck) | names(sexcheck) | ||
sex_problem = sexcheck[which(sexcheck$STATUS=="PROBLEM"),] | sex_problem = sexcheck[which(sexcheck$STATUS=="PROBLEM"),] | ||
sex_problem | sex_problem | ||
+ | |||
q() | q() | ||
− | plink --file GWAS_clean2 --genome --out duplicates | + | |
− | R | + | ################################## |
+ | |||
+ | plink --file GWAS_clean2 --genome --out duplicates | ||
+ | |||
+ | #### in R | ||
+ | |||
+ | setwd("to_your_working_directory/") | ||
+ | |||
dups = read.table("duplicates.genome", header = T) | dups = read.table("duplicates.genome", header = T) | ||
problem_pairs = dups[which(dups$PI_HAT > 0.4),] | problem_pairs = dups[which(dups$PI_HAT > 0.4),] | ||
Line 160: | Line 47: | ||
myvars = c("FID1", "IID1", "FID2", "IID2", "PI_HAT") | myvars = c("FID1", "IID1", "FID2", "IID2", "PI_HAT") | ||
problem_pairs[myvars] | problem_pairs[myvars] | ||
+ | |||
q() | q() | ||
− | plink --file GWAS_clean2 --remove IBS_excluded.txt --recode --out GWAS_clean3 | + | |
− | plink --file GWAS_clean3 --het | + | ###### |
− | R | + | |
− | Dataset | + | plink --file GWAS_clean2 --remove IBS_excluded.txt --recode --out GWAS_clean3 |
− | + | ||
+ | plink --file GWAS_clean3 --het | ||
+ | |||
+ | ###### in R | ||
+ | |||
+ | Dataset <- read.table("plink.het", header=TRUE, sep="", na.strings="NA", dec=".", strip.white=TRUE) | ||
+ | |||
mean(Dataset$F) | mean(Dataset$F) | ||
sd(Dataset$F) | sd(Dataset$F) | ||
+ | |||
jpeg("hist.jpeg", height=1000, width=1000) | jpeg("hist.jpeg", height=1000, width=1000) | ||
hist(scale(Dataset$F), xlim=c(-4,4)) | hist(scale(Dataset$F), xlim=c(-4,4)) | ||
dev.off() | dev.off() | ||
+ | |||
q() | q() | ||
− | plink --file GWAS_clean3 --pheno pheno.txt --pheno-name Aff --hardy | + | |
− | R | + | ###### |
+ | |||
+ | plink --file GWAS_clean3 --pheno pheno.txt --pheno-name Aff --hardy | ||
+ | |||
+ | ##### in R | ||
+ | |||
hardy = read.table("plink.hwe", header = T) | hardy = read.table("plink.hwe", header = T) | ||
names(hardy) | names(hardy) | ||
hwe_prob = hardy[which(hardy$P < 0.0000009),] | hwe_prob = hardy[which(hardy$P < 0.0000009),] | ||
hwe_prob | hwe_prob | ||
+ | |||
+ | |||
q() | q() | ||
− | plink --file GWAS_clean3 --exclude HWE_out.txt --recode --out GWAS_clean4 | + | |
− | plink --file GWAS_clean4 --genome --mds-plot 10 | + | |
− | R | + | ########## |
+ | |||
+ | plink --file GWAS_clean3 --exclude HWE_out.txt --recode --out GWAS_clean4 | ||
+ | |||
+ | ############################################### | ||
+ | ##### Part 2: controlling for substructure##### | ||
+ | ############################################### | ||
+ | |||
+ | plink --file GWAS_clean4 --genome --cluster --mds-plot 10 | ||
+ | |||
+ | #### in R | ||
+ | |||
mydata = read.table("mds_components.txt", header=T) | mydata = read.table("mds_components.txt", header=T) | ||
− | mydata$pch[mydata$Group==1 ] | + | |
− | mydata$pch[mydata$Group==2 ] | + | mydata$pch[mydata$Group==1 ] <-15 |
− | mydata$pch[mydata$Group==3 ] | + | mydata$pch[mydata$Group==2 ] <-16 |
− | jpeg("mds.jpeg", height= | + | mydata$pch[mydata$Group==3 ] <-2 |
+ | |||
+ | jpeg("mds.jpeg", height=500, width=500) | ||
plot(mydata$C1, mydata$C2 ,pch=mydata$pch) | plot(mydata$C1, mydata$C2 ,pch=mydata$pch) | ||
dev.off() | dev.off() | ||
+ | |||
q() | q() | ||
− | plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --logistic --adjust --out unadj -- | + | |
− | plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink. | + | ###### |
− | plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink. | + | |
− | R | + | plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --logistic --adjust --out unadj |
− | broadqq | + | |
+ | plink --file GWAS_clean4 --genome --cluster --pca 10 header | ||
+ | |||
+ | plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.eigenvec --covar-name PC1 --logistic --adjust --out PC1 | ||
+ | |||
+ | plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.eigenvec --covar-name PC1-PC2 --logistic --adjust --out PC1-PC2 | ||
+ | |||
+ | #### in R | ||
+ | |||
+ | broadqq <-function(pvals, title) | ||
{ | { | ||
− | + | observed <- sort(pvals) | |
− | + | lobs <- -(log10(observed)) | |
− | + | ||
− | + | expected <- c(1:length(observed)) | |
− | + | lexp <- -(log10(expected / (length(expected)+1))) | |
− | + | ||
− | jpeg("qqplot_compare.jpeg", height=1000, width= | + | plot(c(0,7), c(0,7), col="red", lwd=3, type="l", xlab="Expected (-logP)", ylab="Observed (-logP)", xlim=c(0,max(lobs)), ylim=c(0,max(lobs)), las=1, xaxs="i", yaxs="i", bty="l", main = title) |
+ | points(lexp, lobs, pch=23, cex=.4, bg="black") } | ||
+ | |||
+ | jpeg("qqplot_compare.jpeg", height=1000, width=500) | ||
par(mfrow=c(2,1)) | par(mfrow=c(2,1)) | ||
− | aff_unadj | + | aff_unadj<-read.table("unadj.assoc.logistic", header=TRUE) |
− | aff_unadj.add.p | + | aff_unadj.add.p<-aff_unadj[aff_unadj$TEST==c("ADD"),]$P |
broadqq(aff_unadj.add.p,"Some Trait Unadjusted") | broadqq(aff_unadj.add.p,"Some Trait Unadjusted") | ||
− | aff_C1C2 | + | aff_C1C2<-read.table("PC1-PC2.assoc.logistic", header=TRUE) |
− | aff_C1C2.add.p | + | aff_C1C2.add.p<-aff_C1C2[aff_C1C2$TEST==c("ADD"),]$P |
− | broadqq(aff_C1C2.add.p, "Some Trait Adjusted") | + | broadqq(aff_C1C2.add.p, "Some Trait Adjusted for PC1 and PC2") |
dev.off() | dev.off() | ||
+ | |||
gws_unadj = aff_unadj[which(aff_unadj$P < 0.0000001),] | gws_unadj = aff_unadj[which(aff_unadj$P < 0.0000001),] | ||
gws_unadj | gws_unadj | ||
gws_adjusted = aff_C1C2[which(aff_C1C2$P < 0.0000001),] | gws_adjusted = aff_C1C2[which(aff_C1C2$P < 0.0000001),] | ||
gws_adjusted | gws_adjusted | ||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− |
Revision as of 17:09, 22 January 2018
Data QC Plink
#PLINK plink --file GWAS plink --file GWAS --mind 0.10 --recode --out GWAS_clean_mind plink --file GWAS_clean_mind --maf 0.05 --recode --out MAF_greater_5 plink --file GWAS_clean_mind --exclude MAF_greater_5.map --recode --out MAF_less_5 plink --file MAF_greater_5 --geno 0.05 --recode --out MAF_greater_5_clean plink --file MAF_less_5 --geno 0.01 --recode --out MAF_less_5_clean plink --file MAF_greater_5_clean --merge MAF_less_5_clean.ped MAF_less_5_clean.map --recode --out GWAS_MAF_clean plink --file GWAS_MAF_clean --mind 0.03 --recode --out GWAS_clean2 plink --file GWAS_clean2 --check-sex --out GWAS_sex_checking #### in R - open R by simply typing R setwd("to_your_working_directory/") sexcheck = read.table("GWAS_sex_checking.sexcheck", header=T) names(sexcheck) sex_problem = sexcheck[which(sexcheck$STATUS=="PROBLEM"),] sex_problem q() ################################## plink --file GWAS_clean2 --genome --out duplicates #### in R setwd("to_your_working_directory/") dups = read.table("duplicates.genome", header = T) problem_pairs = dups[which(dups$PI_HAT > 0.4),] problem_pairs problem_pairs = dups[which(dups$PI_HAT > 0.05),] myvars = c("FID1", "IID1", "FID2", "IID2", "PI_HAT") problem_pairs[myvars] q() ###### plink --file GWAS_clean2 --remove IBS_excluded.txt --recode --out GWAS_clean3 plink --file GWAS_clean3 --het ###### in R Dataset <- read.table("plink.het", header=TRUE, sep="", na.strings="NA", dec=".", strip.white=TRUE) mean(Dataset$F) sd(Dataset$F) jpeg("hist.jpeg", height=1000, width=1000) hist(scale(Dataset$F), xlim=c(-4,4)) dev.off() q() ###### plink --file GWAS_clean3 --pheno pheno.txt --pheno-name Aff --hardy ##### in R hardy = read.table("plink.hwe", header = T) names(hardy) hwe_prob = hardy[which(hardy$P < 0.0000009),] hwe_prob q() ########## plink --file GWAS_clean3 --exclude HWE_out.txt --recode --out GWAS_clean4 ############################################### ##### Part 2: controlling for substructure##### ############################################### plink --file GWAS_clean4 --genome --cluster --mds-plot 10 #### in R mydata = read.table("mds_components.txt", header=T) mydata$pch[mydata$Group==1 ] <-15 mydata$pch[mydata$Group==2 ] <-16 mydata$pch[mydata$Group==3 ] <-2 jpeg("mds.jpeg", height=500, width=500) plot(mydata$C1, mydata$C2 ,pch=mydata$pch) dev.off() q() ###### plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --logistic --adjust --out unadj plink --file GWAS_clean4 --genome --cluster --pca 10 header plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.eigenvec --covar-name PC1 --logistic --adjust --out PC1 plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.eigenvec --covar-name PC1-PC2 --logistic --adjust --out PC1-PC2 #### in R broadqq <-function(pvals, title) { observed <- sort(pvals) lobs <- -(log10(observed)) expected <- c(1:length(observed)) lexp <- -(log10(expected / (length(expected)+1))) plot(c(0,7), c(0,7), col="red", lwd=3, type="l", xlab="Expected (-logP)", ylab="Observed (-logP)", xlim=c(0,max(lobs)), ylim=c(0,max(lobs)), las=1, xaxs="i", yaxs="i", bty="l", main = title) points(lexp, lobs, pch=23, cex=.4, bg="black") } jpeg("qqplot_compare.jpeg", height=1000, width=500) par(mfrow=c(2,1)) aff_unadj<-read.table("unadj.assoc.logistic", header=TRUE) aff_unadj.add.p<-aff_unadj[aff_unadj$TEST==c("ADD"),]$P broadqq(aff_unadj.add.p,"Some Trait Unadjusted") aff_C1C2<-read.table("PC1-PC2.assoc.logistic", header=TRUE) aff_C1C2.add.p<-aff_C1C2[aff_C1C2$TEST==c("ADD"),]$P broadqq(aff_C1C2.add.p, "Some Trait Adjusted for PC1 and PC2") dev.off() gws_unadj = aff_unadj[which(aff_unadj$P < 0.0000001),] gws_unadj gws_adjusted = aff_C1C2[which(aff_C1C2$P < 0.0000001),] gws_adjusted