Difference between revisions of "AdvGeneMap2018Commands"

From Statistical Genetics Courses

Jump to: navigation, search
Line 2: Line 2:
  
 
===Plink - Part 1 - Data QC===
 
===Plink - Part 1 - Data QC===
  plink --file GWAS
+
plink --file GWAS
  plink --file GWAS --mind 0.10 --recode --out GWAS_clean_mind
+
plink --file GWAS --mind 0.10 --recode --out GWAS_clean_mind
  plink --file GWAS_clean_mind --maf 0.05 --recode --out MAF_greater_5
+
plink --file GWAS_clean_mind --maf 0.05 --recode --out MAF_greater_5
  plink --file GWAS_clean_mind --exclude MAF_greater_5.map --recode --out MAF_less_5
+
plink --file GWAS_clean_mind --exclude MAF_greater_5.map --recode --out MAF_less_5
  plink --file MAF_greater_5 --geno 0.05 --recode --out MAF_greater_5_clean
+
plink --file MAF_greater_5 --geno 0.05 --recode --out MAF_greater_5_clean
  plink --file MAF_less_5 --geno 0.01 --recode --out MAF_less_5_clean
+
plink --file MAF_less_5 --geno 0.01 --recode --out MAF_less_5_clean
  plink --file MAF_greater_5_clean --merge MAF_less_5_clean.ped MAF_less_5_clean.map --recode --out GWAS_MAF_clean
+
plink --file MAF_greater_5_clean --merge MAF_less_5_clean.ped MAF_less_5_clean.map --recode --out GWAS_MAF_clean
  plink --file GWAS_MAF_clean --mind 0.03 --recode --out GWAS_clean2
+
plink --file GWAS_MAF_clean --mind 0.03 --recode --out GWAS_clean2
  plink --file GWAS_clean2 --check-sex --out GWAS_sex_checking
+
plink --file GWAS_clean2 --check-sex --out GWAS_sex_checking
  #### in R - open R by simply typing R
+
#### in R - open R by simply typing R
  setwd("to_your_working_directory/")
+
setwd("to_your_working_directory/")
  sexcheck = read.table("GWAS_sex_checking.sexcheck", header=T)
+
sexcheck = read.table("GWAS_sex_checking.sexcheck", header=T)
  names(sexcheck)
+
names(sexcheck)
  sex_problem = sexcheck[which(sexcheck$STATUS=="PROBLEM"),]
+
sex_problem = sexcheck[which(sexcheck$STATUS=="PROBLEM"),]
  sex_problem
+
sex_problem
  q()
+
q()
  ##################################
+
##################################
  plink --file GWAS_clean2 --genome --out duplicates
+
plink --file GWAS_clean2 --genome --out duplicates
  #### in R
+
#### in R
  setwd("to_your_working_directory/")
+
setwd("to_your_working_directory/")
  dups = read.table("duplicates.genome", header = T)
+
dups = read.table("duplicates.genome", header = T)
  problem_pairs = dups[which(dups$PI_HAT > 0.4),]
+
problem_pairs = dups[which(dups$PI_HAT > 0.4),]
  problem_pairs
+
problem_pairs
  problem_pairs = dups[which(dups$PI_HAT > 0.05),]
+
problem_pairs = dups[which(dups$PI_HAT > 0.05),]
  myvars = c("FID1", "IID1", "FID2", "IID2", "PI_HAT")
+
myvars = c("FID1", "IID1", "FID2", "IID2", "PI_HAT")
  problem_pairs[myvars]
+
problem_pairs[myvars]
  q()
+
q()
  ######
+
######
  plink --file GWAS_clean2 --remove IBS_excluded.txt --recode --out GWAS_clean3
+
plink --file GWAS_clean2 --remove IBS_excluded.txt --recode --out GWAS_clean3
  plink --file GWAS_clean3 --het
+
plink --file GWAS_clean3 --het
  ###### in R
+
###### in R
  Dataset <- read.table("plink.het", header=TRUE, sep="", na.strings="NA", dec=".", strip.white=TRUE)
+
Dataset &lt;- read.table("plink.het", header=TRUE, sep="", na.strings="NA", dec=".", strip.white=TRUE)
  mean(Dataset$F)
+
mean(Dataset$F)
  sd(Dataset$F)
+
sd(Dataset$F)
  jpeg("hist.jpeg", height=1000, width=1000)
+
jpeg("hist.jpeg", height=1000, width=1000)
  hist(scale(Dataset$F), xlim=c(-4,4))
+
hist(scale(Dataset$F), xlim=c(-4,4))
  dev.off()
+
dev.off()
  q()
+
q()
  ######
+
######
  plink --file GWAS_clean3 --pheno pheno.txt --pheno-name Aff --hardy  
+
plink --file GWAS_clean3 --pheno pheno.txt --pheno-name Aff --hardy  
  ##### in R
+
##### in R
  hardy = read.table("plink.hwe", header = T)
+
hardy = read.table("plink.hwe", header = T)
  names(hardy)
+
names(hardy)
  hwe_prob = hardy&#91;which(hardy$P < 0.0000009),&#93;
+
hwe_prob = hardy&#91;which(hardy$P &lt; 0.0000009),&#93;
  hwe_prob
+
hwe_prob
  q()
+
q()
  ##########
+
##########
  plink --file GWAS_clean3 --exclude HWE_out.txt --recode --out GWAS_clean4
+
plink --file GWAS_clean3 --exclude HWE_out.txt --recode --out GWAS_clean4
 
+
  
 
===Plink - Part 2 - Controlling for Substructure===
 
===Plink - Part 2 - Controlling for Substructure===
  plink --file GWAS_clean4 --genome --cluster --mds-plot 10
+
plink --file GWAS_clean4 --genome --cluster --mds-plot 10
  #### in R
+
#### in R
  mydata = read.table("mds_components.txt", header=T)
+
mydata = read.table("mds_components.txt", header=T)
  mydata$pch&#91;mydata$Group==1 &#93; <-15
+
mydata$pch&#91;mydata$Group==1 &#93; &lt;-15
  mydata$pch&#91;mydata$Group==2 &#93; <-16
+
mydata$pch&#91;mydata$Group==2 &#93; &lt;-16
  mydata$pch&#91;mydata$Group==3 &#93; <-2
+
mydata$pch&#91;mydata$Group==3 &#93; &lt;-2
  jpeg("mds.jpeg", height=500, width=500)
+
jpeg("mds.jpeg", height=500, width=500)
  plot(mydata$C1, mydata$C2 ,pch=mydata$pch)
+
plot(mydata$C1, mydata$C2 ,pch=mydata$pch)
  dev.off()
+
dev.off()
  q()
+
q()
  ######
+
######
  plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --logistic --adjust --out unadj
+
plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --logistic --adjust --out unadj
  plink --file GWAS_clean4 --genome --cluster --pca 10 header
+
plink --file GWAS_clean4 --genome --cluster --pca 10 header
  plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.eigenvec --covar-name PC1 --logistic --adjust --out PC1
+
plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.eigenvec --covar-name PC1 --logistic --adjust --out PC1
  plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.eigenvec --covar-name PC1-PC2 --logistic --adjust --out PC1-PC2
+
plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.eigenvec --covar-name PC1-PC2 --logistic --adjust --out PC1-PC2
  #### in R
+
#### in R
  broadqq <-function(pvals, title)
+
broadqq &lt;-function(pvals, title)
  {
+
{
  observed <- sort(pvals)
+
observed &lt;- sort(pvals)
  lobs <- -(log10(observed))
+
lobs &lt;- -(log10(observed))
  expected <- c(1:length(observed))  
+
expected &lt;- c(1:length(observed))  
  lexp <- -(log10(expected / (length(expected)+1)))
+
lexp &lt;- -(log10(expected / (length(expected)+1)))
  plot(c(0,7), c(0,7), col="red", lwd=3, type="l", xlab="Expected (-logP)", ylab="Observed (-logP)", xlim=c(0,max(lobs)), ylim=c(0,max(lobs)), las=1, xaxs="i", yaxs="i", bty="l", main = title)
+
plot(c(0,7), c(0,7), col="red", lwd=3, type="l", xlab="Expected (-logP)", ylab="Observed (-logP)", xlim=c(0,max(lobs)), ylim=c(0,max(lobs)), las=1, xaxs="i", yaxs="i", bty="l", main = title)
  points(lexp, lobs, pch=23, cex=.4, bg="black") }
+
points(lexp, lobs, pch=23, cex=.4, bg="black") }
  jpeg("qqplot_compare.jpeg", height=1000, width=500)
+
jpeg("qqplot_compare.jpeg", height=1000, width=500)
  par(mfrow=c(2,1))
+
par(mfrow=c(2,1))
  aff_unadj<-read.table("unadj.assoc.logistic", header=TRUE)
+
aff_unadj&lt;-read.table("unadj.assoc.logistic", header=TRUE)
  aff_unadj.add.p<-aff_unadj&#91;aff_unadj$TEST==c("ADD"),&#93;$P
+
aff_unadj.add.p&lt;-aff_unadj&#91;aff_unadj$TEST==c("ADD"),&#93;$P
  broadqq(aff_unadj.add.p,"Some Trait Unadjusted")
+
broadqq(aff_unadj.add.p,"Some Trait Unadjusted")
  aff_C1C2<-read.table("PC1-PC2.assoc.logistic", header=TRUE)
+
aff_C1C2&lt;-read.table("PC1-PC2.assoc.logistic", header=TRUE)
  aff_C1C2.add.p<-aff_C1C2&#91;aff_C1C2$TEST==c("ADD"),&#93;$P
+
aff_C1C2.add.p&lt;-aff_C1C2&#91;aff_C1C2$TEST==c("ADD"),&#93;$P
  broadqq(aff_C1C2.add.p, "Some Trait Adjusted for PC1 and PC2")
+
broadqq(aff_C1C2.add.p, "Some Trait Adjusted for PC1 and PC2")
  dev.off()
+
dev.off()
  gws_unadj = aff_unadj&#91;which(aff_unadj$P < 0.0000001),&#93;
+
gws_unadj = aff_unadj&#91;which(aff_unadj$P &lt; 0.0000001),&#93;
  gws_unadj
+
gws_unadj
  gws_adjusted = aff_C1C2&#91;which(aff_C1C2$P < 0.0000001),&#93;
+
gws_adjusted = aff_C1C2&#91;which(aff_C1C2$P &lt; 0.0000001),&#93;
  gws_adjusted
+
gws_adjusted

Revision as of 15:06, 23 January 2018

Plink - Part 1 - Data QC

plink --file GWAS
plink --file GWAS --mind 0.10 --recode --out GWAS_clean_mind
plink --file GWAS_clean_mind --maf 0.05 --recode --out MAF_greater_5
plink --file GWAS_clean_mind --exclude MAF_greater_5.map --recode --out MAF_less_5
plink --file MAF_greater_5 --geno 0.05 --recode --out MAF_greater_5_clean
plink --file MAF_less_5 --geno 0.01 --recode --out MAF_less_5_clean
plink --file MAF_greater_5_clean --merge MAF_less_5_clean.ped MAF_less_5_clean.map --recode --out GWAS_MAF_clean
plink --file GWAS_MAF_clean --mind 0.03 --recode --out GWAS_clean2
plink --file GWAS_clean2 --check-sex --out GWAS_sex_checking
#### in R - open R by simply typing R
setwd("to_your_working_directory/")
sexcheck = read.table("GWAS_sex_checking.sexcheck", header=T)
names(sexcheck)
sex_problem = sexcheck[which(sexcheck$STATUS=="PROBLEM"),]
sex_problem
q()
##################################
plink --file GWAS_clean2 --genome --out duplicates
#### in R
setwd("to_your_working_directory/")
dups = read.table("duplicates.genome", header = T)
problem_pairs = dups[which(dups$PI_HAT > 0.4),]
problem_pairs
problem_pairs = dups[which(dups$PI_HAT > 0.05),]
myvars = c("FID1", "IID1", "FID2", "IID2", "PI_HAT")
problem_pairs[myvars]
q()
######
plink --file GWAS_clean2 --remove IBS_excluded.txt --recode --out GWAS_clean3
plink --file GWAS_clean3 --het
###### in R
Dataset <- read.table("plink.het", header=TRUE, sep="", na.strings="NA", dec=".", strip.white=TRUE)
mean(Dataset$F)
sd(Dataset$F)
jpeg("hist.jpeg", height=1000, width=1000)
hist(scale(Dataset$F), xlim=c(-4,4))
dev.off()
q()
######
plink --file GWAS_clean3 --pheno pheno.txt --pheno-name Aff --hardy 
##### in R
hardy = read.table("plink.hwe", header = T)
names(hardy)
hwe_prob = hardy[which(hardy$P < 0.0000009),]
hwe_prob
q()
##########
plink --file GWAS_clean3 --exclude HWE_out.txt --recode --out GWAS_clean4

Plink - Part 2 - Controlling for Substructure

plink --file GWAS_clean4 --genome --cluster --mds-plot 10
#### in R
mydata = read.table("mds_components.txt", header=T)
mydata$pch[mydata$Group==1 ] <-15
mydata$pch[mydata$Group==2 ] <-16
mydata$pch[mydata$Group==3 ] <-2
jpeg("mds.jpeg", height=500, width=500)
plot(mydata$C1, mydata$C2 ,pch=mydata$pch)
dev.off()
q()
######
plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --logistic --adjust --out unadj
plink --file GWAS_clean4 --genome --cluster --pca 10 header
plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.eigenvec --covar-name PC1 --logistic --adjust --out PC1
plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.eigenvec --covar-name PC1-PC2 --logistic --adjust --out PC1-PC2
#### in R
broadqq <-function(pvals, title)
{
	observed <- sort(pvals)
	lobs <- -(log10(observed))
	expected <- c(1:length(observed)) 
	lexp <- -(log10(expected / (length(expected)+1)))
	plot(c(0,7), c(0,7), col="red", lwd=3, type="l", xlab="Expected (-logP)", ylab="Observed (-logP)", xlim=c(0,max(lobs)), ylim=c(0,max(lobs)), las=1, xaxs="i", yaxs="i", bty="l", main = title)
	points(lexp, lobs, pch=23, cex=.4, bg="black") }
jpeg("qqplot_compare.jpeg", height=1000, width=500)
par(mfrow=c(2,1))
aff_unadj<-read.table("unadj.assoc.logistic", header=TRUE)
aff_unadj.add.p<-aff_unadj[aff_unadj$TEST==c("ADD"),]$P
broadqq(aff_unadj.add.p,"Some Trait Unadjusted")
aff_C1C2<-read.table("PC1-PC2.assoc.logistic", header=TRUE)
aff_C1C2.add.p<-aff_C1C2[aff_C1C2$TEST==c("ADD"),]$P
broadqq(aff_C1C2.add.p, "Some Trait Adjusted for PC1 and PC2")
dev.off()
gws_unadj = aff_unadj[which(aff_unadj$P < 0.0000001),]
gws_unadj
gws_adjusted = aff_C1C2[which(aff_C1C2$P < 0.0000001),]
gws_adjusted