Changes

From Statistical Genetics Courses

Jump to: navigation, search

AdvGeneMap2017Commands

10,935 bytes added, 20:43, 4 January 2017
Created page with "==GeneABEL== plink --file GWAS_clean4 --pheno pheno.phen --pheno-name Aff --transpose --recode --out gwa_gabel --noweb plink --file GWAS_clean4 --pheno pheno.phen --pheno-na..."
==GeneABEL==
plink --file GWAS_clean4 --pheno pheno.phen --pheno-name Aff --transpose --recode --out gwa_gabel --noweb
plink --file GWAS_clean4 --pheno pheno.phen --pheno-name systolic --transpose --recode --out gwa_gabel_qtl --noweb
R
library(GenABEL)
convert.snp.tped(tped = "gwa_gabel_qtl.tped", tfam = "gwa_gabel_qtl.tfam", out = "gwa_gabel_qtl.raw", strand = "u")
g.dat <- load.gwaa.data(phen = "gwa_gabel_qtl.praw", gen = "gwa_gabel_qtl.raw", force = T)
slotNames(g.dat)
slotNames(g.dat@gtdata)
colnames(g.dat@phdata)
sample.size <- g.dat@gtdata@nids
snps.total <- g.dat@gtdata@nsnps
print(c(sample.size, snps.total))
summary(g.dat@phdata$disease)
hist(g.dat@phdata$disease, main="Quantitative Phenotype data summary", xlab = "Systolic pressure", freq = F,breaks=20, col="gray")
rug(g.dat@phdata$disease)
test.snp <- scan.glm('disease ~ CRSNP', family = gaussian(), data = g.dat)
names(test.snp)
alpha <- 5e-8
test.snp$snpnames[test.snp$P1df < alpha]
test.snp$P1df[test.snp$P1df < alpha]
test.qt &lt;- qtscore(disease, data = g.dat, trait = "gaussian")
slotNames(test.qt)
names(test.qt@results)
head(results(test.qt))
test.qt@lambda
descriptives.scan(test.qt)
row.names(results(test.qt))[results(test.qt)$P1df < alpha]
results(test.qt)$P1df[results(test.qt)$P1df < alpha] results(test.qt)$Pc1df[results(test.qt)$Pc1df < alpha]
obs &lt;- sort(results(test.qt)$P1df)
ept &lt;- ppoints(obs)
plot(-log10(ept), -log10(obs), main = "GWAS QQ plot, qtl", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")
abline(0, 1, col = "red")
abline(h = 8, lty = 2)
plot(test.qt, col = "black")
test.qt.sex &lt;- qtscore(disease ~ sex, data = g.dat, trait = "gaussian")
row.names(results(test.qt.sex))[results(test.qt)$P1df < alpha]
summary(lm(disease ~ sex, data = g.dat))
convert.snp.tped(tped = "gwa_gabel.tped", tfam = "gwa_gabel.tfam", out = "gwa_gabel.raw", strand = "u")
b.dat &lt;- load.gwaa.data(phen = "gwa_gabel.praw", gen = "gwa_gabel.raw", force = T)
slotNames(b.dat)
slotNames(b.dat@gtdata)
colnames(b.dat@phdata)
b.dat@gtdata@nids
case.size &lt;- length(which(b.dat@phdata$disease == 1))
control.size &lt;- length(which(b.dat@phdata$disease == 0))
case.size
control.size
snpsb.total &lt;- b.dat@gtdata@nsnps
testb.snp &lt;- scan.glm('disease ~ CRSNP', family = binomial(), data = b.dat)
names(testb.snp)
alpha &lt;- 5e-8
testb.snp$snpnames[testb.snp$P1df < alpha]
testb.snp$P1df[testb.snp$P1df < alpha]
testb.qt &lt;- qtscore(disease, data = b.dat, trait = "binomial")
slotNames(testb.qt)
descriptives.scan(testb.qt)
row.names(results(testb.qt))[results(testb.qt)$P1df < alpha]
results(testb.qt)$P1df[results(testb.qt)$P1df < alpha]
results(testb.qt)$Pc1df[results(testb.qt)$Pc1df < alpha]
gkin &lt;- ibs(g.dat, weight = "freq")
gkin[1:10,1:10]
cps.full &lt;- cmdscale(as.dist(.5 - gkin), eig = T, k = 10)
names(cps.full)
cps &lt;- cps.full$points
plot(cps[,1], cps[,2], pch = g.dat@phdata$popn)
legend("topright", c("TSI","MEX", "CEU"), pch = c(1,2,3))
colnames(cps)&lt;-c('C1','C2','C3','C4','C5','C6','C7','C8','C9','C10')
gpc.dat &lt;- g.dat
gpc.dat@phdata&lt;-cbind(g.dat@phdata, cps)
test.pc.a &lt;- scan.glm('disease ~ CRSNP + C1 + C2 + C3 + C4 + C5', family=gaussian(), data = gpc.dat)
test.pc.a$snpnames[test.pc.a$P1df < alpha]
test.pc.a$P1df[test.pc.a$P1df < alpha]
test.pc.b &lt;- qtscore(disease ~ C1 + C2 + C3 + C4 + C5, data = gpc.dat, trait = "gaussian")
test.pc.b@lambda
plot(cps.full$eig[1:10]/sum(cps.full$eig), axes = F, type = "b", xlab = "Components", ylim = c(0,0.05), ylab = "Proportion of Variations", main = "MDS analysis scree plot")
axis(1, 1:10)
axis(2)
plot(cumsum(cps.full$eig[1:10])/sum(cps.full$eig), axes = F, type = "b", ylim = c(0,0.2), xlab = "Components", ylab = "Proportion of Variations", main = "MDS analysis cumulative plot")
axis(1, 1:10)
axis(2)
row.names(results(test.qt))[results(test.qt)$Pc1df < alpha]
results(test.qt)$Pc1df[results(test.qt)$Pc1df < alpha]
test.qt@lambda
obs &lt;- sort(results(test.qt)$chi2.1df)
ept &lt;- sort(qchisq(ppoints(obs), df = 1))
plot(ept, obs, main = "Genomic control (lambda = slope of the dashed line)", xlab="Expected chisq, 1df", ylab="Observed chisq, 1df")
abline(0, 1, col = "red")
abline(0, test.qt@lambda[1], lty = 2)
median(results(test.qt)$chi2.1df)/0.456
obs &lt;- sort(results(test.qt)$Pc1df)
ept &lt;- ppoints(obs)
plot(-log10(ept), -log10(obs), main = "GWAS QQ plot adj. via Genomic Control", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")
abline(0, 1, col = "red")
abline(h = 8, lty = 2)
adj.gkin = gkin
diag(adj.gkin) = hom(g.dat)$Var
test.eg &lt;- egscore(disease, data = g.dat, kin = adj.gkin, naxes = 2)
descriptives.scan(test.eg)
snp.eg &lt;- row.names(results(test.eg))[results(test.eg)$P1df < alpha]
pvalue.eg &lt;- results(test.eg)$P1df[results(test.eg)$P1df < alpha]
lambda.eg &lt;- test.eg@lambda
snp.eg
pvalue.eg
lambda.eg
for (k in 1:10){
test.tmp &lt;- egscore(disease, data = g.dat, kin = adj.gkin, naxes = k)
print(test.tmp@lambda$estimate)
}
obs &lt;- sort(results(test.eg)$Pc1df)
ept &lt;- ppoints(obs)
plot(-log10(ept), -log10(obs), main = "GWAS QQ plot adj. w/ EIGENSTRAT", xlab="Expected -log10(pvalue)", ylab="Observed -log10(pvalue)")
abline(0, 1, col = "red")
abline(h = 8, lty = 2)
plot(test.qt, col = "black")
add.plot(test.eg, col = "gray", pch = 3)
legend("topright", c("Original plot","After correction w/ EIGENSTRAT"), pch = c(1,3))==Imputation exercise==
plink --file chr22_imputation_ex --noweb
plink --file chr22_imputation_ex --maf 0.01 --mind 0.02 --geno 0.05 --hwe 0.001 --out qc_check --noweb
plink --file chr22_imputation_ex --maf 0.01 --mind 0.02 --geno 0.05 --hwe 0.001 --recode --out chr22_clean1 --noweb
plink --file chr22_clean1 --maf 0.01 --mind 0.02 --geno 0.05 --hwe 0.001 --out qc_check_2 --noweb
plink --file chr22_clean1 --filter-cases --hwe 0.001 --recode --out chr22_cases_clean --noweb
plink --file chr22_clean1 --filter-controls --recode --out chr22_controls_clean --noweb
plink --file chr22_controls_clean --merge chr22_cases_clean.ped chr22_cases_clean.map --hwe 0.001 --recode --out chr22_all_clean --noweb
plink --file chr22_all_clean --logistic --out chr22_all_clean_geno --noweb
R
mydata = read.table("chr22_all_clean_geno.assoc.logistic", header=T)
names(mydata)
plot(mydata$BP, -log10(mydata$P))
smallp = mydata[which(mydata$P < 1E-4),]
smallp
smallp = smallp[order(smallp$BP),]
smallp
q()
mach1 --hapmapFormat -d chr22_mach_merlin.map -p chr22_mach_merlin.ped --haps genotypes_chr22_CEU_r22_nr.b36_fwd.phase.gz --snps genotypes_chr22_CEU_r22_nr.b36_fwd_legend.txt.gz --greedy --rounds 100 --mle --mldetails --autoflip -o chr22_HIHII
plink --dosage chr22_HIHII_dose_mach4plink.txt.gz Zin --fam chr22_imputation_ex.fam --map chr22_imputed_snps_positions.map --out chr22_HIHII_dosage --noweb
R
dosage = read.table("chr22_HIHII_dosage.assoc.dosage", header= T)
names(dosage)
plot(dosage$BP, -log10(dosage$P))
dosagep = dosage[which(dosage$P < 5E-8),] dosagep = dosagep[order(dosagep$BP),]
dosagep
interest = dosage[which(dosage$SNP=='rs715586'),]
interest

==GWAS Data QC==
plink --file GWAS --noweb
plink --file GWAS --mind 0.10 --recode --out GWAS_clean_mind --noweb
plink --file GWAS_clean_mind --maf 0.05 --recode --out MAF_greater_5 --noweb
plink --file GWAS_clean_mind --exclude MAF_greater_5.map --recode --out MAF_less_5 --noweb
plink --file MAF_greater_5 --geno 0.05 --recode --out MAF_greater_5_clean --noweb
plink --file MAF_less_5 --geno 0.01 --recode --out MAF_less_5_clean --noweb
plink --file MAF_greater_5_clean --merge MAF_less_5_clean.ped MAF_less_5_clean.map --recode --out GWAS_MAF_clean --noweb
plink --file GWAS_MAF_clean --mind 0.03 --recode --out GWAS_clean2 --noweb
plink --file GWAS_clean2 --check-sex --out GWAS_sex_checking --noweb
R
sexcheck = read.table("GWAS_sex_checking.sexcheck", header=T)
names(sexcheck)
sex_problem = sexcheck[which(sexcheck$STATUS=="PROBLEM"),]
sex_problem
q()
plink --file GWAS_clean2 --genome --out duplicates --noweb
R
dups = read.table("duplicates.genome", header = T)
problem_pairs = dups[which(dups$PI_HAT > 0.4),]
problem_pairs
problem_pairs = dups[which(dups$PI_HAT > 0.05),]
myvars = c("FID1", "IID1", "FID2", "IID2", "PI_HAT")
problem_pairs[myvars]
q()
plink --file GWAS_clean2 --remove IBS_excluded.txt --recode --out GWAS_clean3 --noweb
plink --file GWAS_clean3 --het --noweb
R
Dataset &lt;- read.table("plink.het", header=TRUE, sep="", na.strings="NA", dec=".",
strip.white=TRUE)
mean(Dataset$F)
sd(Dataset$F)
jpeg("hist.jpeg", height=1000, width=1000)
hist(scale(Dataset$F), xlim=c(-4,4))
dev.off()
q()
plink --file GWAS_clean3 --pheno pheno.txt --pheno-name Aff --hardy --noweb
R
hardy = read.table("plink.hwe", header = T)
names(hardy)
hwe_prob = hardy[which(hardy$P < 0.0000009),]
hwe_prob
q()
plink --file GWAS_clean3 --exclude HWE_out.txt --recode --out GWAS_clean4 --noweb


==GWAS Control Substructure==
plink --file GWAS_clean4 --genome --mds-plot 10 --noweb
R
mydata = read.table("mds_components.txt", header=T)
mydata$pch[mydata$Group==1 ] &lt;-15
mydata$pch[mydata$Group==2 ] &lt;-16
mydata$pch[mydata$Group==3 ] &lt;-2
jpeg("mds.jpeg", height=1000, width=1000)
plot(mydata$C1, mydata$C2 ,pch=mydata$pch)
dev.off()
q()
plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --logistic --adjust --out unadj --noweb
plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.mds --covar-name C1 --logistic --adjust --out C1 --noweb
plink --file GWAS_clean4 --pheno pheno.txt --pheno-name Aff --covar plink.mds --covar-name C1-C2 --logistic --adjust --out C1-C2 --noweb
R
broadqq &lt;-function(pvals, title)
{
&nbsp;&nbsp;&nbsp;&nbsp;observed &lt;- sort(pvals)
&nbsp;&nbsp;&nbsp;&nbsp;lobs &lt;- -(log10(observed))
&nbsp;&nbsp;&nbsp;&nbsp;expected &lt;- c(1:length(observed))
&nbsp;&nbsp;&nbsp;&nbsp;lexp &lt;- -(log10(expected / (length(expected)+1)))
&nbsp;&nbsp;&nbsp;&nbsp;plot(c(0,7), c(0,7), col="red", lwd=3, type="l", xlab="Expected (-logP)", ylab="Observed (-logP)", xlim=c(0,max(lobs)), ylim=c(0,max(lobs)), las=1, xaxs="i", yaxs="i", bty="l", main = title)
&nbsp;&nbsp;&nbsp;&nbsp;points(lexp, lobs, pch=23, cex=.4, bg="black") }
jpeg("qqplot_compare.jpeg", height=1000, width=1000)
par(mfrow=c(2,1))
aff_unadj&lt;-read.table("unadj.assoc.logistic", header=TRUE)
aff_unadj.add.p&lt;-aff_unadj[aff_unadj$TEST==c("ADD"),]$P
broadqq(aff_unadj.add.p,"Some Trait Unadjusted")
aff_C1C2&lt;-read.table("C1-C2.assoc.logistic", header=TRUE)
aff_C1C2.add.p&lt;-aff_C1C2[aff_C1C2$TEST==c("ADD"),]$P
broadqq(aff_C1C2.add.p, "Some Trait Adjusted")
dev.off()
gws_unadj = aff_unadj[which(aff_unadj$P < 0.0000001),]
gws_unadj
gws_adjusted = aff_C1C2[which(aff_C1C2$P < 0.0000001),] gws_adjusted
q()