plink --file GWAS

plink --file GWAS --mind 0.10 --recode --out GWAS_clean_mind

plink --file GWAS_clean_mind --maf 0.05 --recode --out MAF_greater_5

plink --file GWAS_clean_mind --exclude MAF_greater_5.map --recode --out MAF_less_5

plink --file MAF_greater_5 --geno 0.05 --recode --out MAF_greater_5_clean

plink --file MAF_less_5 --geno 0.01 --recode --out MAF_less_5_clean

plink --file MAF_greater_5_clean --merge MAF_less5.ped MAF_less_5.map --recode --out GWAS_MAF_clean

plink --file GWAS_MAF_clean --mind 0.03 --recode --out GWAS_clean2

plink --file GWAS_clean2 --check-sex --out GWAS_sex_checking

####in R

sexcheck=read.table("GWAS_sex_checking.sexcheck", header=T)
names(sexcheck)
sex_problem=sexcheck[which(sexcheck$STATUS=="PROBLEM"),]
sex_problem
q()


#################################

plink --file GWAS_clean2 --genome --out duplicates

####in R

setwd("path_to_your_working_directory/")
dups=read.table("duplicates.genome", header=T)

problem_pairs=dups[which(dups$PI_HAT> 0.4),]
problem_pairs
dim(problem_pairs) ##left number is the number of pairs

problem_pairs=dups[which(dups$PI_HAT > 0.05),]
myvars=c("FID1", "IID1", "FID2", "IID2", "PI_HAT")
problem_pairs[myvars]
dim(problem_pairs[myvars]) ##left number is the number of pairs

q()

#### in R

setwd=("path_to_your_working_directory/")
Dataset<-read.table("plink.het", header=TRUE, sep="", na.strings="NA", dec=".", strip.white=TRUE)

mean=(Dataset$F)
sd(Dataset$F)

##Generate histogram in LINUX environment
jpeg("hist.jpeg", height=1000, width=1000)
hist(scale(Dataset$F), xlim=c(-4,4))
dev.off()

q()


########

plink --file GWAS_clean3 --pheno pheno.txt -phno-name Aff --hardy


#### in R

hardy=read.table("plink.hwe", header=T)
names(hardy)
hwe_prob=hardy[which(hardy$P < 0.0000009),]
hwe_prob

q()

########

plink --file GWAS_clean3 --exclude HWE.out.txt --recode --out GWAS_clean4


##Please copy GWAS_clean4 plink files in your working directory for the next day



