Changes - Statistical Genetics Courses

AdvGeneMap2018Commands

1,350 bytes added, 14:57, 23 January 2018

__NOTITLE__

===Plink - Part 1 - Data QC== ~~#PLINK~~=

plink --file GWAS

plink --file GWAS --mind 0.10 --recode --out GWAS_clean_mind plink --file GWAS_clean_mind --maf 0.05 --recode --out MAF_greater_5 plink --file GWAS_clean_mind --exclude MAF_greater_5.map --recode --out MAF_less_5 plink --file MAF_greater_5 --geno 0.05 --recode --out MAF_greater_5_clean plink --file MAF_less_5 --geno 0.01 --recode --out MAF_less_5_clean plink --file MAF_greater_5_clean --merge MAF_less_5_clean.ped MAF_less_5_clean.map --recode --out GWAS_MAF_clean plink --file GWAS_MAF_clean --mind 0.03 --recode --out GWAS_clean2 plink --file GWAS_clean2 --check-sex --out GWAS_sex_checking #### in R - open R by simply typing R setwd("to_your_working_directory/") sexcheck = read.table("GWAS_sex_checking.sexcheck", header=T) names(sexcheck) sex_problem = sexcheck[which(sexcheck$STATUS=="PROBLEM"),] sex_problem q() ################################## plink --file GWAS_clean2 --genome --out duplicates ~~#### in R~~ ~~setwd("to_your_working_directory/")~~ ~~dups = read.table("duplicates.genome", header = T)~~ ~~problem_pairs = dups[which(dups$PI_HAT > 0.4),]~~ ~~problem_pairs~~ ~~problem_pairs = dups[which(dups$PI_HAT > 0.05),]~~ ~~myvars = c("FID1", "IID1", "FID2", "IID2", "PI_HAT")~~ ~~problem_pairs[myvars]~~ ~~q()~~ ~~######~~ ~~plink --file GWAS_clean2 --remove IBS_excluded.txt --recode --out GWAS_clean3~~ ~~plink --file GWAS_clean3 --het~~ ~~###### in R~~ ~~Dataset===Plink Part 2 - Controlling for Substructure===~~ ~~plink --file GWAS_clean4 --genome --cluster --mds-plot 10~~

#### in R

~~mydata~~ setwd("to_your_working_directory/") dups = read.table("~~mds_components~~duplicates.~~txt~~genome", header=T) ~~mydata$pch~~problem_pairs = dups[~~mydata~~which(dups$~~Group==1 ] observed¶ lobsexpected ¶ lexp plot(c(~~PI_HAT > 0,7.4), c] problem_pairs problem_pairs = dups[which(dups$PI_HAT > 0,7.05), ~~col~~] myvars =c("~~red~~FID1", ~~lwd=3, type=~~"lIID1", ~~xlab=~~"~~Expected (-logP)~~FID2", ~~ylab=~~"~~Observed (-logP)~~IID2", ~~xlim=c~~"PI_HAT") problem_pairs[myvars] q(~~0,max(lobs)~~)~~, ylim=c~~ ###### plink --file GWAS_clean2 --remove IBS_excluded.txt --recode --out GWAS_clean3 plink --file GWAS_clean3 --het ###### in R Dataset <- read.table(0"plink.het",~~max(lobs)), las~~header=1TRUE, ~~xaxs~~sep="i", ~~yaxs~~na.strings="iNA", ~~bty~~dec="l.", ~~main~~ strip.white= ~~title~~TRUE)~~points~~ mean(~~lexp~~Dataset$F) sd(Dataset$F) jpeg("hist.jpeg", ~~lobs~~height=1000, ~~pch~~width=231000) hist(scale(Dataset$F), ~~cex~~xlim=.c(-4, bg4)) dev.off() q() ###### plink --file GWAS_clean3 --pheno pheno.txt --pheno-name Aff --hardy ##### in R hardy =read.table("~~black~~plink.hwe", header = T) } names(hardy) hwe_prob = hardy[which(hardy$P < 0.0000009),] hwe_prob q() ########## plink --file GWAS_clean3 --exclude HWE_out.txt --recode --out GWAS_clean4

===Plink - Part 2 - Controlling for Substructure===

plink --file GWAS_clean4 --genome --cluster --mds-plot 10

#### in R

mydata = read.table("mds_components.txt", header=T)

mydata$pch[mydata$Group==1 ] <-15

mydata$pch[mydata$Group==2 ] <-16

mydata$pch[mydata$Group==3 ] <-2

jpeg("mds.jpeg", height=500, width=500)

plot(mydata$C1, mydata$C2 ,pch=mydata$pch)

dev.off()

q()

######

plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --logistic --adjust --out unadj

plink --file GWAS_clean4 --genome --cluster --pca 10 header

plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.eigenvec --covar-name PC1 --logistic --adjust --out PC1

plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.eigenvec --covar-name PC1-PC2 --logistic --adjust --out PC1-PC2

#### in R

broadqq <-function(pvals, title)

{

observed <- sort(pvals)

lobs <- -(log10(observed))

expected <- c(1:length(observed))

lexp <- -(log10(expected / (length(expected)+1)))

plot(c(0,7), c(0,7), col="red", lwd=3, type="l", xlab="Expected (-logP)", ylab="Observed (-logP)", xlim=c(0,max(lobs)), ylim=c(0,max(lobs)), las=1, xaxs="i", yaxs="i", bty="l", main = title)

points(lexp, lobs, pch=23, cex=.4, bg="black") }

jpeg("qqplot_compare.jpeg", height=1000, width=500)

par(mfrow=c(2,1))

Serveradmin

Bureaucrat, administrator

1,252

edits