Difference between revisions of "AdvGeneMap2016Commands"

Latest revision as of 20:50, 4 January 2017

GeneABEL

plink --file GWAS_clean4 --pheno pheno.phen --pheno-name Aff --transpose --recode --out gwa_gabel --noweb
plink --file GWAS_clean4 --pheno pheno.phen --pheno-name systolic --transpose --recode --out gwa_gabel_qtl --noweb
R
library(GenABEL)
convert.snp.tped(tped = "gwa_gabel_qtl.tped", tfam = "gwa_gabel_qtl.tfam", out = "gwa_gabel_qtl.raw", strand = "u")
g.dat <- load.gwaa.data(phen = "gwa_gabel_qtl.praw", gen = "gwa_gabel_qtl.raw", force = T)
slotNames(g.dat)
slotNames(g.dat@gtdata)
colnames(g.dat@phdata)
sample.size <- g.dat@gtdata@nids
snps.total <- g.dat@gtdata@nsnps
print(c(sample.size, snps.total))
summary(g.dat@phdata$disease)
hist(g.dat@phdata$disease, main="Quantitative Phenotype data summary", xlab = "Systolic pressure", freq = F,breaks=20, col="gray")
rug(g.dat@phdata$disease)
test.snp <- scan.glm('disease ~ CRSNP', family = gaussian(), data = g.dat)
names(test.snp)  
alpha <- 5e-8
test.snp$snpnames[test.snp$P1df < alpha]
test.snp$P1df[test.snp$P1df < alpha]
test.qt <- qtscore(disease, data = g.dat, trait = "gaussian")
slotNames(test.qt)
names(test.qt@results)
head(results(test.qt))
test.qt@lambda
descriptives.scan(test.qt)
row.names(results(test.qt))[results(test.qt)$P1df < alpha]
results(test.qt)$P1df[results(test.qt)$P1df < alpha] results(test.qt)$Pc1df[results(test.qt)$Pc1df < alpha]
obs <- sort(results(test.qt)$P1df) 
ept <- ppoints(obs) 
plot(-log10(ept), -log10(obs), main = "GWAS QQ plot, qtl", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")
abline(0, 1, col = "red")
abline(h = 8, lty = 2)
plot(test.qt, col = "black")
test.qt.sex <- qtscore(disease ~ sex, data = g.dat, trait = "gaussian")
row.names(results(test.qt.sex))[results(test.qt)$P1df < alpha]
summary(lm(disease ~ sex, data = g.dat))
convert.snp.tped(tped = "gwa_gabel.tped", tfam = "gwa_gabel.tfam", out = "gwa_gabel.raw", strand = "u")
b.dat <- load.gwaa.data(phen = "gwa_gabel.praw", gen = "gwa_gabel.raw", force = T)
slotNames(b.dat)
slotNames(b.dat@gtdata)
colnames(b.dat@phdata)
b.dat@gtdata@nids
case.size <- length(which(b.dat@phdata$disease == 1))
control.size <- length(which(b.dat@phdata$disease == 0))
case.size 
control.size 
snpsb.total <- b.dat@gtdata@nsnps
testb.snp <- scan.glm('disease ~ CRSNP', family = binomial(), data = b.dat)
names(testb.snp)  
alpha <- 5e-8
testb.snp$snpnames[testb.snp$P1df < alpha]
testb.snp$P1df[testb.snp$P1df < alpha]
testb.qt <- qtscore(disease, data = b.dat, trait = "binomial")
slotNames(testb.qt)
descriptives.scan(testb.qt)
row.names(results(testb.qt))[results(testb.qt)$P1df < alpha]
results(testb.qt)$P1df[results(testb.qt)$P1df < alpha]
results(testb.qt)$Pc1df[results(testb.qt)$Pc1df < alpha]  
gkin <- ibs(g.dat, weight = "freq")
gkin[1:10,1:10]
cps.full <- cmdscale(as.dist(.5 - gkin), eig = T, k = 10)
names(cps.full)
cps <- cps.full$points
plot(cps[,1], cps[,2], pch = g.dat@phdata$popn)
legend("topright", c("TSI","MEX", "CEU"), pch = c(1,2,3))       
colnames(cps)<-c('C1','C2','C3','C4','C5','C6','C7','C8','C9','C10') 
gpc.dat <- g.dat
gpc.dat@phdata<-cbind(g.dat@phdata, cps)
test.pc.a <- scan.glm('disease ~ CRSNP + C1 + C2 + C3 + C4 + C5', family=gaussian(), data = gpc.dat)
test.pc.a$snpnames[test.pc.a$P1df < alpha]
test.pc.a$P1df[test.pc.a$P1df < alpha]
test.pc.b <- qtscore(disease ~  C1 + C2 + C3 + C4 + C5, data = gpc.dat, trait = "gaussian") 
test.pc.b@lambda
plot(cps.full$eig[1:10]/sum(cps.full$eig), axes = F, type = "b", xlab = "Components",  ylim = c(0,0.05), ylab = "Proportion of Variations", main = "MDS analysis scree plot") 
axis(1, 1:10)
axis(2)
plot(cumsum(cps.full$eig[1:10])/sum(cps.full$eig), axes = F, type = "b", ylim = c(0,0.2), xlab = "Components", ylab = "Proportion of Variations", main = "MDS analysis cumulative plot") 
axis(1, 1:10)
axis(2)
row.names(results(test.qt))[results(test.qt)$Pc1df < alpha]
results(test.qt)$Pc1df[results(test.qt)$Pc1df < alpha]
test.qt@lambda
obs <- sort(results(test.qt)$chi2.1df)
ept <- sort(qchisq(ppoints(obs), df = 1)) 
plot(ept, obs, main = "Genomic control (lambda = slope of the dashed line)", xlab="Expected chisq, 1df", ylab="Observed chisq, 1df")
abline(0, 1, col = "red")
abline(0, test.qt@lambda[1], lty = 2)
median(results(test.qt)$chi2.1df)/0.456
obs <- sort(results(test.qt)$Pc1df)
ept <- ppoints(obs) 
plot(-log10(ept), -log10(obs), main = "GWAS QQ plot adj. via Genomic Control", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")
abline(0, 1, col = "red")
abline(h = 8, lty = 2) 
adj.gkin = gkin
diag(adj.gkin) = hom(g.dat)$Var
test.eg <- egscore(disease, data = g.dat, kin = adj.gkin, naxes = 2)
descriptives.scan(test.eg)
snp.eg <- row.names(results(test.eg))[results(test.eg)$P1df < alpha]
pvalue.eg <- results(test.eg)$P1df[results(test.eg)$P1df < alpha] lambda.eg <- test.eg@lambda
snp.eg 
pvalue.eg
lambda.eg
for (k in 1:10){
 test.tmp <- egscore(disease, data = g.dat, kin = adj.gkin, naxes = k)
print(test.tmp@lambda$estimate)
}
obs <- sort(results(test.eg)$Pc1df)
ept <- ppoints(obs) 
plot(-log10(ept), -log10(obs), main = "GWAS QQ plot adj. w/ EIGENSTRAT", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")
abline(0, 1, col = "red")
abline(h = 8, lty = 2)
plot(test.qt, col = "black")
add.plot(test.eg, col = "gray", pch = 3)
legend("topright", c("Original plot","After correction w/ EIGENSTRAT"), pch = c(1,3))

GWAS Data QC

plink --file GWAS --noweb
plink --file GWAS --mind 0.10 --recode --out GWAS_clean_mind --noweb
plink --file GWAS_clean_mind --maf 0.05 --recode --out MAF_greater_5 --noweb
plink --file GWAS_clean_mind --exclude MAF_greater_5.map --recode --out MAF_less_5 --noweb
plink --file MAF_greater_5 --geno 0.05 --recode --out MAF_greater_5_clean --noweb
plink --file MAF_less_5 --geno 0.01 --recode --out MAF_less_5_clean --noweb
plink --file MAF_greater_5_clean --merge MAF_less_5_clean.ped MAF_less_5_clean.map --recode --out GWAS_MAF_clean --noweb
plink --file GWAS_MAF_clean --mind 0.03 --recode --out GWAS_clean2 --noweb
plink --file GWAS_clean2 --check-sex --out GWAS_sex_checking --noweb
R
sexcheck = read.table("GWAS_sex_checking.sexcheck", header=T)
names(sexcheck)
sex_problem = sexcheck[which(sexcheck$STATUS=="PROBLEM"),]
sex_problem
q()
plink --file GWAS_clean2 --genome --out duplicates --noweb
R
dups = read.table("duplicates.genome", header = T)
problem_pairs = dups[which(dups$PI_HAT > 0.4),]
problem_pairs
problem_pairs = dups[which(dups$PI_HAT > 0.05),]
myvars = c("FID1", "IID1", "FID2", "IID2", "PI_HAT")
problem_pairs[myvars]
q()
plink --file GWAS_clean2 --remove IBS_excluded.txt --recode --out GWAS_clean3 --noweb
plink --file GWAS_clean3 --het --noweb
R
Dataset <- read.table("plink.het", header=TRUE, sep="", na.strings="NA", dec=".",
strip.white=TRUE)
mean(Dataset$F)
sd(Dataset$F)
jpeg("hist.jpeg", height=1000, width=1000)
hist(scale(Dataset$F), xlim=c(-4,4))
dev.off()
q()
plink --file GWAS_clean3 --pheno pheno.txt --pheno-name Aff --hardy --noweb
R
hardy = read.table("plink.hwe", header = T)
names(hardy)
hwe_prob = hardy[which(hardy$P < 0.0000009),]
hwe_prob
q()
plink --file GWAS_clean3 --exclude HWE_out.txt --recode --out GWAS_clean4 --noweb

GWAS Control Substructure

plink --file GWAS_clean4 --genome --mds-plot 10 --noweb
R
mydata = read.table("mds_components.txt", header=T)
mydata$pch[mydata$Group==1 ] <-15
mydata$pch[mydata$Group==2 ] <-16
mydata$pch[mydata$Group==3 ] <-2
jpeg("mds.jpeg", height=1000, width=1000)
plot(mydata$C1, mydata$C2 ,pch=mydata$pch)
dev.off()
q()
plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --logistic --adjust --out unadj --noweb
plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.mds --covar-name C1 --logistic --adjust --out C1 --noweb
plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.mds --covar-name C1-C2 --logistic --adjust --out C1-C2 --noweb
R
broadqq <-function(pvals, title)
{
    observed <- sort(pvals)
    lobs <- -(log10(observed))
    expected <- c(1:length(observed))
    lexp <- -(log10(expected / (length(expected)+1)))
    plot(c(0,7), c(0,7), col="red", lwd=3, type="l", xlab="Expected (-logP)", ylab="Observed (-logP)", xlim=c(0,max(lobs)), ylim=c(0,max(lobs)), las=1, xaxs="i", yaxs="i", bty="l", main = title)
    points(lexp, lobs, pch=23, cex=.4, bg="black") }
jpeg("qqplot_compare.jpeg", height=1000, width=1000)
par(mfrow=c(2,1))
aff_unadj<-read.table("unadj.assoc.logistic", header=TRUE)
aff_unadj.add.p<-aff_unadj[aff_unadj$TEST==c("ADD"),]$P
broadqq(aff_unadj.add.p,"Some Trait Unadjusted")
aff_C1C2<-read.table("C1-C2.assoc.logistic", header=TRUE)
aff_C1C2.add.p<-aff_C1C2[aff_C1C2$TEST==c("ADD"),]$P
broadqq(aff_C1C2.add.p, "Some Trait Adjusted")
dev.off()
gws_unadj = aff_unadj[which(aff_unadj$P < 0.0000001),]
gws_unadj
gws_adjusted = aff_C1C2[which(aff_C1C2$P < 0.0000001),]
gws_adjusted
q()

Personal tools

Search

Namespaces

Views

Actions

Widgets

Widgets

Recent changes

Wanted pages

Who is online?

Tools

Difference between revisions of "AdvGeneMap2016Commands"

From Statistical Genetics Courses

Latest revision as of 20:50, 4 January 2017

GeneABEL

GWAS Data QC

GWAS Control Substructure

Navigation menu

Short Courses

Software

Course Materials