Difference between revisions of "AdvGeneMap2018Commands"
From Statistical Genetics Courses
Serveradmin (Talk | contribs) |
Serveradmin (Talk | contribs) (→Data QC Plink) |
||
Line 1: | Line 1: | ||
__NOTITLE__ | __NOTITLE__ | ||
− | ==Data QC | + | ==Plink Data QC== |
#PLINK | #PLINK | ||
− | |||
plink --file GWAS | plink --file GWAS | ||
− | |||
plink --file GWAS --mind 0.10 --recode --out GWAS_clean_mind | plink --file GWAS --mind 0.10 --recode --out GWAS_clean_mind | ||
− | |||
plink --file GWAS_clean_mind --maf 0.05 --recode --out MAF_greater_5 | plink --file GWAS_clean_mind --maf 0.05 --recode --out MAF_greater_5 | ||
− | |||
plink --file GWAS_clean_mind --exclude MAF_greater_5.map --recode --out MAF_less_5 | plink --file GWAS_clean_mind --exclude MAF_greater_5.map --recode --out MAF_less_5 | ||
− | |||
plink --file MAF_greater_5 --geno 0.05 --recode --out MAF_greater_5_clean | plink --file MAF_greater_5 --geno 0.05 --recode --out MAF_greater_5_clean | ||
− | |||
plink --file MAF_less_5 --geno 0.01 --recode --out MAF_less_5_clean | plink --file MAF_less_5 --geno 0.01 --recode --out MAF_less_5_clean | ||
− | |||
plink --file MAF_greater_5_clean --merge MAF_less_5_clean.ped MAF_less_5_clean.map --recode --out GWAS_MAF_clean | plink --file MAF_greater_5_clean --merge MAF_less_5_clean.ped MAF_less_5_clean.map --recode --out GWAS_MAF_clean | ||
− | |||
plink --file GWAS_MAF_clean --mind 0.03 --recode --out GWAS_clean2 | plink --file GWAS_MAF_clean --mind 0.03 --recode --out GWAS_clean2 | ||
− | |||
plink --file GWAS_clean2 --check-sex --out GWAS_sex_checking | plink --file GWAS_clean2 --check-sex --out GWAS_sex_checking | ||
− | |||
#### in R - open R by simply typing R | #### in R - open R by simply typing R | ||
− | |||
setwd("to_your_working_directory/") | setwd("to_your_working_directory/") | ||
− | |||
sexcheck = read.table("GWAS_sex_checking.sexcheck", header=T) | sexcheck = read.table("GWAS_sex_checking.sexcheck", header=T) | ||
names(sexcheck) | names(sexcheck) | ||
sex_problem = sexcheck[which(sexcheck$STATUS=="PROBLEM"),] | sex_problem = sexcheck[which(sexcheck$STATUS=="PROBLEM"),] | ||
sex_problem | sex_problem | ||
− | |||
q() | q() | ||
− | |||
################################## | ################################## | ||
− | |||
plink --file GWAS_clean2 --genome --out duplicates | plink --file GWAS_clean2 --genome --out duplicates | ||
− | |||
#### in R | #### in R | ||
− | |||
setwd("to_your_working_directory/") | setwd("to_your_working_directory/") | ||
− | |||
dups = read.table("duplicates.genome", header = T) | dups = read.table("duplicates.genome", header = T) | ||
problem_pairs = dups[which(dups$PI_HAT > 0.4),] | problem_pairs = dups[which(dups$PI_HAT > 0.4),] | ||
Line 47: | Line 29: | ||
myvars = c("FID1", "IID1", "FID2", "IID2", "PI_HAT") | myvars = c("FID1", "IID1", "FID2", "IID2", "PI_HAT") | ||
problem_pairs[myvars] | problem_pairs[myvars] | ||
− | |||
q() | q() | ||
− | |||
###### | ###### | ||
− | |||
plink --file GWAS_clean2 --remove IBS_excluded.txt --recode --out GWAS_clean3 | plink --file GWAS_clean2 --remove IBS_excluded.txt --recode --out GWAS_clean3 | ||
− | |||
plink --file GWAS_clean3 --het | plink --file GWAS_clean3 --het | ||
− | |||
###### in R | ###### in R | ||
− | |||
Dataset <- read.table("plink.het", header=TRUE, sep="", na.strings="NA", dec=".", strip.white=TRUE) | Dataset <- read.table("plink.het", header=TRUE, sep="", na.strings="NA", dec=".", strip.white=TRUE) | ||
− | |||
mean(Dataset$F) | mean(Dataset$F) | ||
sd(Dataset$F) | sd(Dataset$F) | ||
− | |||
jpeg("hist.jpeg", height=1000, width=1000) | jpeg("hist.jpeg", height=1000, width=1000) | ||
hist(scale(Dataset$F), xlim=c(-4,4)) | hist(scale(Dataset$F), xlim=c(-4,4)) | ||
dev.off() | dev.off() | ||
− | |||
q() | q() | ||
− | |||
###### | ###### | ||
− | |||
plink --file GWAS_clean3 --pheno pheno.txt --pheno-name Aff --hardy | plink --file GWAS_clean3 --pheno pheno.txt --pheno-name Aff --hardy | ||
− | |||
##### in R | ##### in R | ||
− | |||
hardy = read.table("plink.hwe", header = T) | hardy = read.table("plink.hwe", header = T) | ||
names(hardy) | names(hardy) | ||
hwe_prob = hardy[which(hardy$P < 0.0000009),] | hwe_prob = hardy[which(hardy$P < 0.0000009),] | ||
hwe_prob | hwe_prob | ||
− | |||
− | |||
q() | q() | ||
− | |||
− | |||
########## | ########## | ||
− | |||
plink --file GWAS_clean3 --exclude HWE_out.txt --recode --out GWAS_clean4 | plink --file GWAS_clean3 --exclude HWE_out.txt --recode --out GWAS_clean4 | ||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− |
Revision as of 17:16, 22 January 2018
Plink Data QC
#PLINK plink --file GWAS plink --file GWAS --mind 0.10 --recode --out GWAS_clean_mind plink --file GWAS_clean_mind --maf 0.05 --recode --out MAF_greater_5 plink --file GWAS_clean_mind --exclude MAF_greater_5.map --recode --out MAF_less_5 plink --file MAF_greater_5 --geno 0.05 --recode --out MAF_greater_5_clean plink --file MAF_less_5 --geno 0.01 --recode --out MAF_less_5_clean plink --file MAF_greater_5_clean --merge MAF_less_5_clean.ped MAF_less_5_clean.map --recode --out GWAS_MAF_clean plink --file GWAS_MAF_clean --mind 0.03 --recode --out GWAS_clean2 plink --file GWAS_clean2 --check-sex --out GWAS_sex_checking #### in R - open R by simply typing R setwd("to_your_working_directory/") sexcheck = read.table("GWAS_sex_checking.sexcheck", header=T) names(sexcheck) sex_problem = sexcheck[which(sexcheck$STATUS=="PROBLEM"),] sex_problem q() ################################## plink --file GWAS_clean2 --genome --out duplicates #### in R setwd("to_your_working_directory/") dups = read.table("duplicates.genome", header = T) problem_pairs = dups[which(dups$PI_HAT > 0.4),] problem_pairs problem_pairs = dups[which(dups$PI_HAT > 0.05),] myvars = c("FID1", "IID1", "FID2", "IID2", "PI_HAT") problem_pairs[myvars] q() ###### plink --file GWAS_clean2 --remove IBS_excluded.txt --recode --out GWAS_clean3 plink --file GWAS_clean3 --het ###### in R Dataset <- read.table("plink.het", header=TRUE, sep="", na.strings="NA", dec=".", strip.white=TRUE) mean(Dataset$F) sd(Dataset$F) jpeg("hist.jpeg", height=1000, width=1000) hist(scale(Dataset$F), xlim=c(-4,4)) dev.off() q() ###### plink --file GWAS_clean3 --pheno pheno.txt --pheno-name Aff --hardy ##### in R hardy = read.table("plink.hwe", header = T) names(hardy) hwe_prob = hardy[which(hardy$P < 0.0000009),] hwe_prob q() ########## plink --file GWAS_clean3 --exclude HWE_out.txt --recode --out GWAS_clean4