Background Color:
 
Background Pattern:
Reset
Thursday, November 23, 2017
menu click
Search

Tutorial

We will use some simulated ASP data in three different countries NL, B and D. The data have been analyzed using the 'merlin --npl' command and the results have been saved in 'merlinNL,out','merlinB.out' and 'merlinD.out'. The R code below assumes that all files used are present in the R working directory. All files required can be downloaded from the bottom of this page.

We first compile the Meta in R.R code,

source("META in R.R")

we then read the data.

data1 <- fREAD.MERLIN(FNAME="merlinNL.out", TRAIT="AFFECT", TYPE="NPL")
data1$STUDY <- "NL"
data2 <- fREAD.MERLIN(FNAME="merlinB.out", TRAIT="AFFECT", TYPE="NPL")
data2$STUDY <- "B"
data3 <- fREAD.MERLIN(FNAME="merlinD.out", TRAIT="AFFECT", TYPE="NPL")
data3$STUDY <- "D"
data <- rbind(data1,data2,data3)
names(data)[match("DELTA",names(data))] <- "GAMMA"

We need the standard error S of the linkage effect estimate GAMMA at each position, it can be computed approximately using the LOD score and estimate GAMMA of effect  using the approximate relation LOD =chi^2/4.6=(GAMMA/S)^2.

data$S <- sqrt( data$GAMMA^2 / (2*log(10)*abs(data$LOD)) )

S is ill defined when LOD=0 so we impute a conservative value for it, separately for each country: the 95% percentile of the standard error S at other positions on the chromosome

imput <- quantile(data$S[data$S!=Inf & data$STUDY=="NL"],probs=0.95,na.rm=T)
data$S[data$S==Inf & data$STUDY=="NL"] <- imput
imput <- quantile(data$S[data$S!=Inf & data$STUDY=="B"],probs=0.95,na.rm=T)
data$S[data$S==Inf & data$STUDY=="B"] <- imput
imput <- quantile(data$S[data$S!=Inf & data$STUDY=="D"],probs=0.95,na.rm=T)
data$S[data$S==Inf & data$STUDY=="D"] <- imput

We can now perform the META-ANALYSIS itself, we run the homogeneous analysis first:

fMETA(DATA=data, MODEL="hom", OUTPUT="meta.hom", GRID=seq(from=0,to=110,by=2))

we now read the results,

meta.hom <- read.table(file="meta.hom", header=T)

and plot the individual LOD scores and the meta-analysic LOD scores.

lwd <- 2
plot(meta.hom$POSITION, meta.hom$LOD,type="l", lwd=lwd, xlab="Position (cM)", ylab="LOD", ylim=c(0,3.5))
lines(data1$POSITION,data1$LOD, col=2, lwd=lwd)
lines(data2$POSITION,data2$LOD, col=3, lwd=lwd)
lines(data3$POSITION,data3$LOD, col=4, lwd=lwd)
legend(60,3.5,leg=c("Meta homogeneous","NL","B","D"), lwd=2, lty=c(1,1,1,1), col=c(1:4))

We may want to have a look at the precision of each study across the chromosome, roughly reflecting marker information and the number of families in each study

data$PRECISION <- 1/data$S^2
meta.hom$PRECISION <- 1/meta.hom$S^2
plot(meta.hom$POSITION, meta.hom$PRECISION,type="l", lwd=lwd, xlab="Position (cM)", ylab="Precision", ylim=c(0,300))
lines(data$POSITION[data$STUDY=='NL'],data$PRECISION[data$STUDY=='NL'], col=2, lwd=lwd)
lines(data$POSITION[data$STUDY=='B'],data$PRECISION[data$STUDY=='B'], col=3, lwd=lwd)
lines(data$POSITION[data$STUDY=='D'],data$PRECISION[data$STUDY=='D'], col=4, lwd=lwd)
legend(0,300,leg=c("Meta hom.","NL","B","D"), lwd=2, lty=c(1,1,1,1), col=c(1:4))

We may also be interested in looking at the linkage effect estimates to check whether there is any evidence for heterogeneity

plot(meta.hom$POSITION, meta.hom$GAMMA,type="l", lwd=lwd, xlab="Position (cM)", ylab="GAMMA", ylim=c(-0.05,0.35))
lines(data$POSITION[data$STUDY=='NL'],data$GAMMA[data$STUDY=='NL'], col=2, lwd=lwd)
lines(data$POSITION[data$STUDY=='B'],data$GAMMA[data$STUDY=='B'], col=3, lwd=lwd)
lines(data$POSITION[data$STUDY=='D'],data$GAMMA[data$STUDY=='D'], col=4, lwd=lwd)
legend(-2,0.36,leg=c("Meta hom.","NL","B","D"), lwd=2, lty=c(1,1,1,1), col=c(1:4))

which can be formally tested as follows (no evidence for heterogeneity here)

plot(meta.hom$POSITION, meta.hom$X2,type="l", lwd=lwd, xlab="Position (cM)", ylab=expression(X^2), ylim=c(0,6.5))
lines(c(0,110), c(qchisq(0.95, df=2),qchisq(0.95, df=2)), col="gray", lty=3)
text(x=c(90,98), y=c(qchisq(0.95, df=2) 0.05,qchisq(0.95, df=2)), labels=c(expression(chi[2]^2),"(0.95)"), cex=c(1.2,1))

We now add two new data sets whose individuals analysis results are stored in 'merlinF.out' and 'merlinE.out'

data4 <- fREAD.MERLIN(FNAME="merlinF.out", TRAIT="AFFECT", TYPE="NPL")
data4$STUDY <- "F"
data5 <- fREAD.MERLIN(FNAME="merlin.out", TRAIT="AFFECT", TYPE="NPL")
data5$STUDY <- "E"
data <- rbind(data1,data2,data3,data4,data5)
names(data)[match("DELTA",names(data))] <- "GAMMA"

data$S <- sqrt( data$GAMMA^2 / (2*log(10)*abs(data$LOD)) )

imput <- quantile(data$S[data$S!=Inf & data$STUDY=="NL"],probs=0.95,na.rm=T)
data$S[data$S==Inf & data$STUDY=="NL"] <- imput
imput <- quantile(data$S[data$S!=Inf & data$STUDY=="B"],probs=0.95,na.rm=T)
data$S[data$S==Inf & data$STUDY=="B"] <- imput
imput <- quantile(data$S[data$S!=Inf & data$STUDY=="D"],probs=0.95,na.rm=T)
data$S[data$S==Inf & data$STUDY=="D"] <- imput
imput <- quantile(data$S[data$S!=Inf & data$STUDY=="F"],probs=0.95,na.rm=T)
data$S[data$S==Inf & data$STUDY=="F"] <- imput
data$S[is.na(data$S) & data$STUDY=="F"] <- imput
imput <- quantile(data$S[data$S!=Inf & data$STUDY=="E"],probs=0.95,na.rm=T)
data$S[data$S==Inf & data$STUDY=="E"] <- imput

fMETA(DATA=data, MODEL="hom", OUTPUT="meta5.hom", GRID=seq(from=0,to=110,by=2))
fMETA(DATA=data, MODEL="het", OUTPUT="meta5.het", GRID=seq(from=0,to=110,by=2))
fMETA(DATA=data, MODEL="mixt", OUTPUT="meta5.mixt", GRID=seq(from=0,to=110,by=2))

meta.hom <- read.table(file="meta5.hom", header=T)DNN Promise - hpsierat
meta.het <- read.table(file="meta5.het", header=T)
meta.mixt <- read.table(file="meta5.mixt", header=T)

lwd <- 2
plot(meta.hom$POSITION, meta.hom$LOD,type="l", lwd=lwd, xlab="Position (cM)", ylab="LOD", ylim=c(0,3.5))
lines(data1$POSITION,data1$LOD, col=2, lwd=lwd)
lines(data2$POSITION,data2$LOD, col=3, lwd=lwd)
lines(data3$POSITION,data3$LOD, col=4, lwd=lwd)
lines(data4$POSITION,data4$LOD, col=5, lwd=lwd)
lines(data5$POSITION,data5$LOD, col=6, lwd=lwd)
lines(meta.het$POSITION, meta.het$LOD, lwd=lwd, lty=2)
lines(meta.mixt$POSITION, meta.mixt$LOD, lwd=lwd, lty=3)
legend(0,3.5,leg=c("Meta homogeneous","Meta heterogeneous","Meta 2-pt mixture","NL","B","D","F","E"), lwd=2, lty=c(1,2,3,1,1,1,1,1), col=c(1,1,1,2:6), ncol=2)

The 2 alternative models yield different results than the homogeneous effect model since there is now clear evidence for heterogeneity.

df <- 4
plot(meta.hom$POSITION, meta.hom$X2,type="l", lwd=lwd, xlab="Position (cM)", ylab=expression(X^2), ylim=c(0,15))
lines(c(0,110), c(qchisq(0.95, df=df),qchisq(0.95, df=df)), col="gray", lty=3)
text(x=c(10,18), y=c(qchisq(0.95, df=df)+0.05,qchisq(0.95, df=df)), labels=c(expression(chi[4]^2),"(0.95)"), cex=c(1.2,1))

The LOD scores for the 2 alternative heterogeneous and 2-point mixture models are unadjusted for the extra parameter introduced in those models. Nevertheless, we can obtain the corresponding point-wise adjusted p-values and LOD scores by parametric bootstrapping, this is fairly time consuming so we do this only at positions where the maximum unadjusted LOD scores was attained, i.e. at 48cM for the size heterogeneous model and at 50cM for the 2-point mixture model.

LOD.max <- meta.het$LOD[meta.het$POSITION==48]
fPAR.BOOT(S=data$S[data$POSITION==pos.max], MODEL="mixt", N=10^4, OUTPUT="boot@48cM.het")
parboot.out <- read.table(file="boot@48cM.het", header=T)
parboot.out$LOD[parboot.out$GAMMA < 0] <- 0
pval <- length(parboot.out$LOD[parboot.out$LOD > LOD.max]) / length(parboot.out$LOD)
LOD.adj <- qchisq( 1-2*pval,df=1)/(2*log(10))

The adjusted LOD score becomes 1.52 instead of 1.75 for the unadjusted version. For the 2-point mixture,

LOD.max <- meta.mixt$LOD[meta.mixt$POSITION==50]
fPAR.BOOT(S=data$S[data$POSITION==pos.max], MODEL="mixt", N=10^4, OUTPUT="boot@50cM.mix")
parboot.out <- read.table(file="boot@50cM.mix", header=T)
parboot.out$LOD[parboot.out$GAMMA < 0] <- 0
pval <- length(parboot.out$LOD[parboot.out$LOD > LOD.max]) / length(parboot.out$LOD)
LOD.adj <- qchisq( 1-2*pval,df=1)/(2*log(10))

the LOD score becomes 2.04 instead of 2.18.