############################################################################### # STATISTICAL ADJUSTMENT FOR SIGNAL CENSORING IN GENE EXPRESSION EXPERIMENTS # # Author: Dr Ernst Wit (ernst@stats.gla.ac.uk) # Department of Statistics # University of Glasgow # # Date: April 2002 # Revised July 1, 2003 # # Description of function CENSORING # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # System: Splus2000 # This function is written in the S-language as a script for Splus2000 # # Idea: The function calculates the maximum likelihood estimate of the parameters # for a Gamma(alpha, beta) pixel intensity model, when only the mean, median # variance and number of pixels are given. The algorithm is described in # Wit & McClure, "Statistical Adjustment of Signal Censoring in Gene Expression # Experiments," Bioinformatics, 19:9, p.1055-1060, 2003 # # Input: x = vector # x[1] = spot mean # x[2] = spot median # x[3] = spot variance # x[4] = number of pixels in spot # MaxInt = upper threshold of scanning intensity. # nsweep = number of iterations to find maximum likelihood estimate # plot = if T(rue), then the likelihood plots of alpha and beta are produced # for each iteration. # # Output: y = list # maximum likelihood estimate of alpha # maximum likelihood estimate of beta # maximum likelihood estimate of mean (alpha/beta) # log likelihood at MLE # # # Alternatives: this function can easily be adjusted to calculate the maximum likelihood # estimates for a lognormal, Weibull or any other two-parameter distribution. censoring<-function(x,MaxInt=2^16-1,nsweep=4,plot=F){ mn<-x[1] md<-x[2] vr<-x[3] npix<-x[4] moments1<-function(density,alpha,beta,lb,ub,precision=5000){ # This function calculates the mean and variance of a restricted # 2 parameter density "density(alpha,beta)" on the interval (lb,ub) x<-seq(lb,ub,length=precision) dens<-density(x,alpha,beta) norm<-sum(dens) if (norm==0){rtrn<-c(0,.1,.2)} else {ex<-sum(x*dens)/norm exsq<-sum(x^2*dens)/norm exqu<-sum(x^4*dens)/norm rtrn<-c(ex,exsq,exqu)} rtrn } moments2<-function(density,cdf,alpha,beta,lb,ub,precision=1000){ # This function calculates the mean and variance of a restricted # 2 parameter density "density(alpha,beta)" on the interval (lb,ub] # where "ub" is the upperbound and censoring point of density(alpha,beta). pi1<-1-cdf(ub,alpha,beta) pi2<-(1-pi1)-cdf(lb,alpha,beta) x<-seq(lb,ub,length=precision) dens<-density(x,alpha,beta) norm<-sum(dens) if ((pi1+pi2==0)|(norm==0)){rtrn<-c(0,0.1,0.2)} else{ ex<-(pi2*sum(x*dens)/norm+pi1*ub)/(pi1+pi2) exsq<-(pi2*sum(x^2*dens)/norm+pi1*ub^2)/(pi1+pi2) exqu<-(pi2*sum(x^4*dens)/norm+pi1*ub^4)/(pi1+pi2) rtrn<-c(ex,exsq,exqu)} rtrn } # Make sure that the number of pixels is odd if (npix/2==round(npix/2,0)){npix<-npix-1} h<-(npix+1)/2 # When using the Gamma(alpha,beta) we use the convention: # alpha = alpha # beta = beta # Set to use the lognormal density<-dgamma cdf<-pgamma # set up initial grid alpha.lb <- 0.0001 alpha.ub <- 180 beta.lb <- 1e-10 beta.ub <- .2 grid.density<-16 if (plot==T){like.contour<-matrix(ncol=grid.density+1,nrow=grid.density+1) par(mfrow=c(2,2))} max.lik<-c(0,0,0) for (k in 1:nsweep){ step.alpha<-(log(alpha.ub)-log(alpha.lb))/grid.density step.beta<-(log(beta.ub)-log(beta.lb))/grid.density grid.alpha<- exp(seq(log(alpha.lb),log(alpha.ub),step.alpha)) grid.beta<-exp(seq(log(beta.lb),log(beta.ub),step.beta)) xx<-0 for (i in grid.alpha){ xx<-xx+1 yy<-0 for (j in grid.beta){ yy<-yy+1 if (md==MaxInt){ like.median<-cdf(md,i,j)^h*(1-cdf(md,i,j))^(h+1) if (mn==MaxInt){ like.mean<-(1-cdf(mn,i,j))^h like.var<-1 } else {hlp1<-moments2(density,cdf,i,j,0,MaxInt,precision=60000) mu1<-hlp1[1] si1<-hlp1[2]-mu1^2 like.mean<-dnorm((npix*mn-(h+1)*md-h*mu1)/sqrt(h*si1)) mu1<-hlp1[2] si1<-hlp1[3]-mu1^2 like.var<-dnorm(((npix-1)*vr+npix*mn^2-(h+1)*md^2-h*mu1)/sqrt(h*si1)) } } else { like.median<-cdf(md,i,j)^h*density(md,i,j)*(1-cdf(md,i,j))^h hlp1<-moments1(density,i,j,0,md) hlp2<-moments2(density,cdf,i,j,md,MaxInt) mu1<-hlp1[1] si1<-hlp1[2]-mu1^2 mu2<-hlp2[1] si2<-hlp2[2]-mu2^2 like.mean<-dnorm(npix*mn-md,h*(mu1+mu2),sqrt(h*(si1+si2))) mu1<-hlp1[2] si1<-hlp1[3]-mu1^2 mu2<-hlp2[2] si2<-hlp2[3]-mu2^2 like.var<-dnorm((npix-1)*vr+npix*mn^2-md^2,h*(mu1+mu2),sqrt(h*(si1+si2))) } likelihood<-like.median*like.mean*like.var*1e50 if (! is.na(likelihood)){ if (likelihood>max.lik[1]){max.lik[1]<-likelihood max.lik[2]<-i max.lik[3]<-j}} if (plot==T){like.contour[xx,yy]<-likelihood} } } if (plot==T){image(grid.alpha,grid.beta,like.contour/max(like.contour),xlab="alpha",ylab="beta")} alpha.lb<-exp(log(max.lik[2])-step.alpha) alpha.ub<-exp(log(max.lik[2])+step.alpha) beta.lb<- exp(log(max.lik[3])-step.beta) beta.ub<- exp(log(max.lik[3])+step.beta) } list(corrected.mean=max.lik[2]/max.lik[3],alpha=max.lik[2],beta=max.lik[3],loglikelihood=log(max.lik[1])) }