AdvGeneMap2018Commands

Data QC Plink

#PLINK

plink --file GWAS

plink --file GWAS --mind 0.10 --recode --out GWAS_clean_mind

plink --file GWAS_clean_mind --maf 0.05 --recode --out MAF_greater_5

plink --file GWAS_clean_mind --exclude MAF_greater_5.map --recode --out MAF_less_5

plink --file MAF_greater_5 --geno 0.05 --recode --out MAF_greater_5_clean

plink --file MAF_less_5 --geno 0.01 --recode --out MAF_less_5_clean

plink --file MAF_greater_5_clean --merge MAF_less_5_clean.ped MAF_less_5_clean.map --recode --out GWAS_MAF_clean

plink --file GWAS_MAF_clean --mind 0.03 --recode --out GWAS_clean2

plink --file GWAS_clean2 --check-sex --out GWAS_sex_checking

#### in R - open R by simply typing R

setwd("to_your_working_directory/")

sexcheck = read.table("GWAS_sex_checking.sexcheck", header=T)
names(sexcheck)
sex_problem = sexcheck[which(sexcheck$STATUS=="PROBLEM"),]
sex_problem

q()

##################################

plink --file GWAS_clean2 --genome --out duplicates

#### in R

setwd("to_your_working_directory/")

dups = read.table("duplicates.genome", header = T)
problem_pairs = dups[which(dups$PI_HAT > 0.4),]
problem_pairs
problem_pairs = dups[which(dups$PI_HAT > 0.05),]
myvars = c("FID1", "IID1", "FID2", "IID2", "PI_HAT")
problem_pairs[myvars]

q()

######

plink --file GWAS_clean2 --remove IBS_excluded.txt --recode --out GWAS_clean3

plink --file GWAS_clean3 --het

###### in R

Dataset <- read.table("plink.het", header=TRUE, sep="", na.strings="NA", dec=".", strip.white=TRUE)

mean(Dataset$F)
sd(Dataset$F)

jpeg("hist.jpeg", height=1000, width=1000)
hist(scale(Dataset$F), xlim=c(-4,4))
dev.off()

q()

######

plink --file GWAS_clean3 --pheno pheno.txt --pheno-name Aff --hardy 

##### in R

hardy = read.table("plink.hwe", header = T)
names(hardy)
hwe_prob = hardy[which(hardy$P < 0.0000009),]
hwe_prob


q()


##########

plink --file GWAS_clean3 --exclude HWE_out.txt --recode --out GWAS_clean4

###############################################
##### Part 2: controlling for substructure#####
###############################################

plink --file GWAS_clean4 --genome --cluster --mds-plot 10

#### in R

mydata = read.table("mds_components.txt", header=T)

mydata$pch[mydata$Group==1 ] <-15
mydata$pch[mydata$Group==2 ] <-16
mydata$pch[mydata$Group==3 ] <-2

jpeg("mds.jpeg", height=500, width=500)
plot(mydata$C1, mydata$C2 ,pch=mydata$pch)
dev.off()

q()

######

plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --logistic --adjust --out unadj

plink --file GWAS_clean4 --genome --cluster --pca 10 header

plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.eigenvec --covar-name PC1 --logistic --adjust --out PC1

plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.eigenvec --covar-name PC1-PC2 --logistic --adjust --out PC1-PC2

#### in R

broadqq <-function(pvals, title)
{
	observed <- sort(pvals)
	lobs <- -(log10(observed))

	expected <- c(1:length(observed)) 
	lexp <- -(log10(expected / (length(expected)+1)))

	plot(c(0,7), c(0,7), col="red", lwd=3, type="l", xlab="Expected (-logP)", ylab="Observed (-logP)", xlim=c(0,max(lobs)), ylim=c(0,max(lobs)), las=1, xaxs="i", yaxs="i", bty="l", main = title)
	points(lexp, lobs, pch=23, cex=.4, bg="black") }

jpeg("qqplot_compare.jpeg", height=1000, width=500)
par(mfrow=c(2,1))
aff_unadj<-read.table("unadj.assoc.logistic", header=TRUE)
aff_unadj.add.p<-aff_unadj[aff_unadj$TEST==c("ADD"),]$P
broadqq(aff_unadj.add.p,"Some Trait Unadjusted")
aff_C1C2<-read.table("PC1-PC2.assoc.logistic", header=TRUE)
aff_C1C2.add.p<-aff_C1C2[aff_C1C2$TEST==c("ADD"),]$P
broadqq(aff_C1C2.add.p, "Some Trait Adjusted for PC1 and PC2")
dev.off()

gws_unadj = aff_unadj[which(aff_unadj$P < 0.0000001),]
gws_unadj
gws_adjusted = aff_C1C2[which(aff_C1C2$P < 0.0000001),]
gws_adjusted
Personal tools

Search

Namespaces

Views

Actions

Widgets

Widgets

Recent changes

Wanted pages

Who is online?

Tools

AdvGeneMap2018Commands

From Statistical Genetics Courses

Data QC Plink

Navigation menu

Short Courses

Software

Course Materials