Changes - Statistical Genetics Courses

AdvGeneMap2018Commands

143 bytes removed, 15:16, 23 January 2018

__NOTITLE__

__FORCETOC__

===GenABEL===

# Load files

library(GenABEL)

convert.snp.tped(tped = "gwa_gabel_qtl.tped", tfam = "gwa_gabel_qtl.tfam", out = "gwa_gabel_qtl.raw", strand = "u")

g.dat <- load.gwaa.data(phen = "gwa_gabel_qtl.praw", gen = "gwa_gabel_qtl.raw", force = T)

slotNames(g.dat)

slotNames(g.dat@gtdata)

colnames(g.dat@phdata)

# sample size

sample.size <- g.dat@gtdata@nids

hist(g.dat@phdata$disease, main="Quantitative Phenotype data summary", xlab = "Systolic pressure measure", freq = F,breaks=20, col="gray")

rug(g.dat@phdata$disease)

###

# tests for association

names(test.snp)

alpha <- 5e-8

test.snp$snpnames[test.snp$P1df ~~<~~ < alpha] test.snp$P1df[test.snp$P1df ~~<~~ < alpha]

# Score test

test.qt <- qtscore(disease, data = g.dat, trait = "gaussian")

slotNames(test.qt)

names(test.qt@results)

test.qt@lambda

descriptives.scan(test.qt)

rownames(results(test.qt))[results(test.qt)$P1df ~~<~~ < alpha] results(test.qt)$P1df[results(test.qt)$P1df ~~<~~ < alpha] results(test.qt)$Pc1df[results(test.qt)$Pc1df ~~<~~ < alpha]

# QQ plot

obs <- sort(results(test.qt)$P1df)

# Manhattan plot

plot(test.qt, col = "black")

# Adding confounders

test.qt.sex <- qtscore(disease ~ sex, data = g.dat, trait = "gaussian")

rownames(results(test.qt.sex))[results(test.qt)$P1df ~~<~~ < alpha]

summary(lm(disease ~ sex, data = g.dat))

###

# MDS

###

gkin <- ibs(g.dat, weight = "freq")

gkin[1:10,1:10]

cps.full <- cmdscale(as.dist(.5 - gkin), eig = T, k = 10)

names(cps.full)

cps <- cps.full$points

plot(cps[,1], cps[,2], pch = g.dat@phdata$popn)

legend(-0.16, 0.06, c("TSI","MEX", "CEU"), pch = c(1,2,3))

###

# Corrected test

###

# Incorporating PCs as predictors

colnames(cps)<-c('C1','C2','C3','C4','C5','C6','C7','C8','C9','C10')

gpc.dat <- g.dat

gpc.dat@phdata<-cbind(g.dat@phdata, cps)

test.pc.a <- scan.glm('disease ~ CRSNP + C1 + C2 + C3 + C4 + C5', family=gaussian(), data = gpc.dat)

test.pc.a$snpnames[test.pc.a$P1df ~~<~~ < alpha] test.pc.a$P1df[test.pc.a$P1df ~~<~~ < alpha]

test.pc.b <- qtscore(disease ~ C1 + C2 + C3 + C4 + C5, data = gpc.dat, trait = "gaussian")

test.pc.b@lambda

# scree plot

plot(cps.full$eig[1:10]/sum(cps.full$eig), axes = F, type = "b", xlab = "Components", ylim = c(0,0.05), ylab = "Proportion of Variations", main = "MDS analysis scree plot")

axis(1, 1:10)

axis(2)

# cumulative plot

plot(cumsum(cps.full$eig[1:10])/sum(cps.full$eig), axes = F, type = "b", ylim = c(0,0.2), xlab = "Components", ylab = "Proportion of Variations", main = "MDS analysis cumulative plot")

axis(1, 1:10)

axis(2)

# Genomic control

# Uncorrected GIF

test.qt@lambda

# Corrected p-value

row.names(results(test.qt))[results(test.qt)$Pc1df ~~<~~ < alpha] results(test.qt)$Pc1df[results(test.qt)$Pc1df ~~<~~ < alpha]

# Check for inflation of statistic

obs <- sort(results(test.qt)$chi2.1df)

abline(0, 1, col = "red")

abline(0, test.qt@lambda[1], lty = 2)

# Definition of GIF

# Conventional definition

# GenABEL definition

lm(obs~ept)$coef[2]

# QQ plot

obs <- sort(results(test.qt)$Pc1df)

abline(0, 1, col = "red")

abline(h = 8, lty = 2)

# EIGENSTRAT

adj.gkin = gkin

diag(adj.gkin) = hom(g.dat)$Var

# naxes = 3 is default value

test.eg <- egscore(disease, data = g.dat, kin = adj.gkin, naxes = 2)

descriptives.scan(test.eg)

snp.eg <- row.names(results(test.eg))[results(test.eg)$P1df ~~<~~ < alpha] pvalue.eg <- results(test.eg)$P1df[results(test.eg)$P1df ~~<~~ < alpha]

lambda.eg <- test.eg@lambda

snp.eg

pvalue.eg

lambda.eg

# Change #PCs

for (k in 1:10){

print(test.tmp@lambda$estimate)

}

# QQ plot

obs <- sort(results(test.eg)$Pc1df)

add.plot(test.eg, col = "gray", pch = 3)

legend("topright", c("Original plot","After correction w/ EIGENSTRAT"), pch = c(1,3))

###

# Basic test, binary trait

###

# load files to GenABEL

convert.snp.tped(tped = "gwa_gabel.tped", tfam = "gwa_gabel.tfam", out = "gwa_gabel.raw", strand = "u")

b.dat <- load.gwaa.data(phen = "gwa_gabel.praw", gen = "gwa_gabel.raw", force = T)

slotNames(b.dat)

slotNames(b.dat@gtdata)

colnames(b.dat@phdata)

# sample size

b.dat@gtdata@nids

# number of cases and controls

case.size <- length(which(b.dat@phdata$disease == 1))

case.size

control.size

# number of SNPs

snpsb.total <- b.dat@gtdata@nsnps

# GLM test

testb.snp <- scan.glm('disease ~ CRSNP', family = binomial(), data = b.dat)

names(testb.snp)

alpha <- 5e-8

testb.snp$snpnames[testb.snp$P1df ~~<~~ < alpha] testb.snp$P1df[testb.snp$P1df ~~<~~ < alpha]

# Score test

testb.qt <- qtscore(disease, data = b.dat, trait = "binomial")

slotNames(testb.qt)

descriptives.scan(testb.qt)

row.names(results(testb.qt))[results(testb.qt)$P1df ~~<~~ < alpha] results(testb.qt)$P1df[results(testb.qt)$P1df ~~<~~ < alpha] results(testb.qt)$Pc1df[results(testb.qt)$Pc1df ~~<~~ < alpha]

===Plink - Part 1 - Data QC===

plink --file GWAS

plink --file GWAS --mind 0.10 --recode --out GWAS_clean_mind

setwd("to_your_working_directory/")

dups = read.table("duplicates.genome", header = T)

problem_pairs = dups[which(dups$PI_HAT ~~>~~ > 0.4),]

problem_pairs

problem_pairs = dups[which(dups$PI_HAT ~~>~~ > 0.05),]

myvars = c("FID1", "IID1", "FID2", "IID2", "PI_HAT")

problem_pairs[myvars]

hardy = read.table("plink.hwe", header = T)

names(hardy)

hwe_prob = hardy[which(hardy$P ~~<~~ < 0.0000009),]

hwe_prob

q()

##########

plink --file GWAS_clean3 --exclude HWE_out.txt --recode --out GWAS_clean4===Plink - Part 2 - Controlling for Substructure===

plink --file GWAS_clean4 --genome --cluster --mds-plot 10

#### in R

broadqq <-function(pvals, title)

{

observed <- sort(pvals)

lobs <- -(log10(observed))

broadqq(aff_C1C2.add.p, "Some Trait Adjusted for PC1 and PC2")

dev.off()

gws_unadj = aff_unadj[which(aff_unadj$P ~~<~~ < 0.0000001),]

gws_unadj

gws_adjusted = aff_C1C2[which(aff_C1C2$P ~~<~~ < 0.0000001),]

gws_adjusted===VAT===

vtools -h

vtools init VATDemo

Serveradmin

Bureaucrat, administrator

1,252

edits