Changes

AdvGeneMap2018Commands

32 bytes removed, 15:06, 23 January 2018
===Plink - Part 1 - Data QC===
plink --file GWAS plink --file GWAS --mind 0.10 --recode --out GWAS_clean_mind plink --file GWAS_clean_mind --maf 0.05 --recode --out MAF_greater_5 plink --file GWAS_clean_mind --exclude MAF_greater_5.map --recode --out MAF_less_5 plink --file MAF_greater_5 --geno 0.05 --recode --out MAF_greater_5_clean plink --file MAF_less_5 --geno 0.01 --recode --out MAF_less_5_clean plink --file MAF_greater_5_clean --merge MAF_less_5_clean.ped MAF_less_5_clean.map --recode --out GWAS_MAF_clean plink --file GWAS_MAF_clean --mind 0.03 --recode --out GWAS_clean2 plink --file GWAS_clean2 --check-sex --out GWAS_sex_checking #### in R - open R by simply typing R setwd("to_your_working_directory/") sexcheck = read.table("GWAS_sex_checking.sexcheck", header=T) names(sexcheck) sex_problem = sexcheck&#91;which(sexcheck$STATUS=="PROBLEM"),&#93; sex_problem q() ################################## plink --file GWAS_clean2 --genome --out duplicates #### in R setwd("to_your_working_directory/") dups = read.table("duplicates.genome", header = T) problem_pairs = dups&#91;which(dups$PI_HAT > &gt; 0.4),&#93; problem_pairs problem_pairs = dups&#91;which(dups$PI_HAT > &gt; 0.05),&#93; myvars = c("FID1", "IID1", "FID2", "IID2", "PI_HAT") problem_pairs&#91;myvars&#93; q() ###### plink --file GWAS_clean2 --remove IBS_excluded.txt --recode --out GWAS_clean3 plink --file GWAS_clean3 --het ###### in R Dataset <&lt;- read.table("plink.het", header=TRUE, sep="", na.strings="NA", dec=".", strip.white=TRUE) mean(Dataset$F) sd(Dataset$F) jpeg("hist.jpeg", height=1000, width=1000) hist(scale(Dataset$F), xlim=c(-4,4)) dev.off() q() ###### plink --file GWAS_clean3 --pheno pheno.txt --pheno-name Aff --hardy ##### in R hardy = read.table("plink.hwe", header = T) names(hardy) hwe_prob = hardy&#91;which(hardy$P < &lt; 0.0000009),&#93; hwe_prob q() ########## plink --file GWAS_clean3 --exclude HWE_out.txt --recode --out GWAS_clean4 
===Plink - Part 2 - Controlling for Substructure===
plink --file GWAS_clean4 --genome --cluster --mds-plot 10 #### in R mydata = read.table("mds_components.txt", header=T) mydata$pch&#91;mydata$Group==1 &#93; <&lt;-15 mydata$pch&#91;mydata$Group==2 &#93; <&lt;-16 mydata$pch&#91;mydata$Group==3 &#93; <&lt;-2 jpeg("mds.jpeg", height=500, width=500) plot(mydata$C1, mydata$C2 ,pch=mydata$pch) dev.off() q() ###### plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --logistic --adjust --out unadj plink --file GWAS_clean4 --genome --cluster --pca 10 header plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.eigenvec --covar-name PC1 --logistic --adjust --out PC1 plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.eigenvec --covar-name PC1-PC2 --logistic --adjust --out PC1-PC2 #### in R broadqq <&lt;-function(pvals, title) { observed <&lt;- sort(pvals) lobs <&lt;- -(log10(observed)) expected <&lt;- c(1:length(observed)) lexp <&lt;- -(log10(expected / (length(expected)+1))) plot(c(0,7), c(0,7), col="red", lwd=3, type="l", xlab="Expected (-logP)", ylab="Observed (-logP)", xlim=c(0,max(lobs)), ylim=c(0,max(lobs)), las=1, xaxs="i", yaxs="i", bty="l", main = title) points(lexp, lobs, pch=23, cex=.4, bg="black") } jpeg("qqplot_compare.jpeg", height=1000, width=500) par(mfrow=c(2,1)) aff_unadj<&lt;-read.table("unadj.assoc.logistic", header=TRUE) aff_unadj.add.p<&lt;-aff_unadj&#91;aff_unadj$TEST==c("ADD"),&#93;$P broadqq(aff_unadj.add.p,"Some Trait Unadjusted") aff_C1C2<&lt;-read.table("PC1-PC2.assoc.logistic", header=TRUE) aff_C1C2.add.p<&lt;-aff_C1C2&#91;aff_C1C2$TEST==c("ADD"),&#93;$P broadqq(aff_C1C2.add.p, "Some Trait Adjusted for PC1 and PC2") dev.off() gws_unadj = aff_unadj&#91;which(aff_unadj$P < &lt; 0.0000001),&#93; gws_unadj gws_adjusted = aff_C1C2&#91;which(aff_C1C2$P < &lt; 0.0000001),&#93; gws_adjusted
Bureaucrat, administrator
1,252
edits