Changes

AdvGeneMap2018Commands

1,350 bytes added, 14:57, 23 January 2018
__NOTITLE__
===Plink - Part 1 - Data QC==  #PLINK=
plink --file GWAS
plink --file GWAS --mind 0.10 --recode --out GWAS_clean_mind plink --file GWAS_clean_mind --maf 0.05 --recode --out MAF_greater_5 plink --file GWAS_clean_mind --exclude MAF_greater_5.map --recode --out MAF_less_5 plink --file MAF_greater_5 --geno 0.05 --recode --out MAF_greater_5_clean plink --file MAF_less_5 --geno 0.01 --recode --out MAF_less_5_clean plink --file MAF_greater_5_clean --merge MAF_less_5_clean.ped MAF_less_5_clean.map --recode --out GWAS_MAF_clean plink --file GWAS_MAF_clean --mind 0.03 --recode --out GWAS_clean2 plink --file GWAS_clean2 --check-sex --out GWAS_sex_checking #### in R - open R by simply typing R setwd("to_your_working_directory/") sexcheck = read.table("GWAS_sex_checking.sexcheck", header=T) names(sexcheck) sex_problem = sexcheck[which(sexcheck$STATUS=="PROBLEM"),] sex_problem q() ################################## plink --file GWAS_clean2 --genome --out duplicates #### in R setwd("to_your_working_directory/") dups = read.table("duplicates.genome", header = T) problem_pairs = dups[which(dups$PI_HAT > 0.4),] problem_pairs problem_pairs = dups[which(dups$PI_HAT > 0.05),] myvars = c("FID1", "IID1", "FID2", "IID2", "PI_HAT") problem_pairs[myvars] q() ###### plink --file GWAS_clean2 --remove IBS_excluded.txt --recode --out GWAS_clean3 plink --file GWAS_clean3 --het ###### in R Dataset===Plink Part 2 - Controlling for Substructure===  plink --file GWAS_clean4 --genome --cluster --mds-plot 10
#### in R
mydata setwd("to_your_working_directory/") dups = read.table("mds_componentsduplicates.txtgenome", header=T) mydata$pchproblem_pairs = dups[mydatawhich(dups$Group==1 ] observed¶ lobsexpected ¶ lexp plot(c(PI_HAT > 0,7.4), c] problem_pairs problem_pairs = dups[which(dups$PI_HAT > 0,7.05), col] myvars =c("redFID1", lwd=3, type="lIID1", xlab="Expected (-logP)FID2", ylab="Observed (-logP)IID2", xlim=c"PI_HAT") problem_pairs[myvars] q(0,max(lobs)), ylim=c ###### plink --file GWAS_clean2 --remove IBS_excluded.txt --recode --out GWAS_clean3 plink --file GWAS_clean3 --het ###### in R Dataset <- read.table(0"plink.het",max(lobs)), lasheader=1TRUE, xaxssep="i", yaxsna.strings="iNA", btydec="l.", main strip.white= titleTRUE)points mean(lexpDataset$F) sd(Dataset$F) jpeg("hist.jpeg", lobsheight=1000, pchwidth=231000) hist(scale(Dataset$F), cexxlim=.c(-4, bg4)) dev.off() q() ###### plink --file GWAS_clean3 --pheno pheno.txt --pheno-name Aff --hardy ##### in R hardy =read.table("blackplink.hwe", header = T) } names(hardy) hwe_prob = hardy[which(hardy$P < 0.0000009),] hwe_prob q() ########## plink --file GWAS_clean3 --exclude HWE_out.txt --recode --out GWAS_clean4
===Plink - Part 2 - Controlling for Substructure===
plink --file GWAS_clean4 --genome --cluster --mds-plot 10
#### in R
mydata = read.table("mds_components.txt", header=T)
mydata$pch[mydata$Group==1 ] <-15
mydata$pch[mydata$Group==2 ] <-16
mydata$pch[mydata$Group==3 ] <-2
jpeg("mds.jpeg", height=500, width=500)
plot(mydata$C1, mydata$C2 ,pch=mydata$pch)
dev.off()
q()
######
plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --logistic --adjust --out unadj
plink --file GWAS_clean4 --genome --cluster --pca 10 header
plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.eigenvec --covar-name PC1 --logistic --adjust --out PC1
plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.eigenvec --covar-name PC1-PC2 --logistic --adjust --out PC1-PC2
#### in R
broadqq <-function(pvals, title)
{
observed <- sort(pvals)
lobs <- -(log10(observed))
expected <- c(1:length(observed))
lexp <- -(log10(expected / (length(expected)+1)))
plot(c(0,7), c(0,7), col="red", lwd=3, type="l", xlab="Expected (-logP)", ylab="Observed (-logP)", xlim=c(0,max(lobs)), ylim=c(0,max(lobs)), las=1, xaxs="i", yaxs="i", bty="l", main = title)
points(lexp, lobs, pch=23, cex=.4, bg="black") }
jpeg("qqplot_compare.jpeg", height=1000, width=500)
par(mfrow=c(2,1))
Bureaucrat, administrator
1,252
edits