############################################################################################################################ # FGX FUNCTION # ############################################################################################################################ #--------------------------------------------------------------------------------------------------------------------------# # Inputs of FGX function #--------------------------------------------------------------------------------------------------------------------------# # PMs : Perfect matches probes on log scale # # MMs : Mismatches probes on log scale # # gene.names : Names of the genes or EST used in each oligonucleotide # # n.probes : Number of probe pairs of each gene. The default is "NULL". # # # return.cov : Logical comment if the calculation of variances is desired. The default is "True". If it is "False", then # # only the estimates of variables are computed. # # cov.mat : Calculation of complete Fisher Information Matrix which gives the covariances and variances of variables # First two columns show the related values of mean of the background signal(mu_H) and the amount of specific # First two columns belong to the mean of the backgroung signal (mu_H) and the amount of specific # # hybridization to the mismatch probe (p), respectively. The rest shows the related values of signals. # # The default is "True". If it is "False", then only the variances are calculated in the same order. # # # max.cov.mat : Maximum number of genes whose covariances and variances are calculated by the inverse of Fisher # # Information matrix. The default is 500. If it is greater than 500, then these calculations are done by # # their explicit formulas rather than inverting the Information matrix. # #--------------------------------------------------------------------------------------------------------------------------# #--------------------------------------------------------------------------------------------------------------------------# # Outputs of FGX function #--------------------------------------------------------------------------------------------------------------------------# # p.est : MLE estimate of the amount of specific hybridization to the mismatch probe # # muH : MLE estimate of the mean of the background signal # # Signal : MLE estimate of the signals # # sigma.noadjust : MLE estimate of the overall standard deviation of signals without any adjustment in degree of # # freedom # # sigma.withadjust : MLE estimate of the overall standard deviation of signals with an adjustment in degree of freedom # # sigma : min(sigma.noadjust, sigma.withadjust) # # onlyvar : Estimated variance vector of the mean of the backgroung signal (mu_H), the amount of specific # # hybridization (p), and the signals, respectively # # cov.mat.est : Estimated Fisher Information matrix which give the covariances and variances of all variables # # # varterm.noadjust : MLE estimate of the overall variance of signals without any adjustment in degree of freedom # # varterm.withadjust: MLE estimate of the overall variance of signals with an adjustment in degree of freedom # # varterm : min(varterm.noadjust, varterm.withadjust) # # # model.warning : 0 or 1 value which indicates if there is any negative correlation between PMs and MMs. If it is 0, # # then FGX is suitable model for this data set, whereas, if it is 1, then there is a negative # # correlation between PMs and MMs which makes FGX invalid. # # PM : Logarithm of perfect matches probes # # MM : Logarithm of mismatches probes # #--------------------------------------------------------------------------------------------------------------------------# fgx<-function(PMs,MMs,gene.names=NULL,n.probes=NULL,return.cov=T,cov.mat=T,max.cov.mat=500){ #FIRST PART :calculation of parameter estimations #************************************************** #gene.names are not given #************************* if (is.null(gene.names)){ PM<- PMs MM<- MMs n.genes<- length(PM) n.probes<-1 S.PM<- sum((PM-mean(PM))^2) S.MM<- sum((MM-mean(MM))^2) S.PMMM<- sum((PM-mean(PM))*(MM-mean(MM))) if (S.PMMM<=0){ model.warning<-1 cat("\nSince there is a negative correlation between PMs and MMs,FGX is not a suitable model for this data set") return(list(model.warning=model.warning)) }else{ model.warning<-0 delta<- (S.PM-S.MM)^2+4*(S.PMMM^2) p.est<- (S.MM-S.PM+sqrt(delta))/(2*S.PMMM) muH<- (mean(PM)*p.est-mean(MM))/(p.est-1) Si<- (PM+p.est*MM-(1+p.est)*muH)/(1+p.est^2) varterm.withadjust<-n.probes*sum((PM-Si-muH)^2+(MM-p.est*Si-muH)^2)/(n.genes-2) varterm.noadjust<-n.probes*sum((PM-Si-muH)^2+(MM-p.est*Si-muH)^2)/(2*n.genes) sigma.withadjust<- sqrt(varterm.withadjust) sigma.noadjust<- sqrt(varterm.noadjust) sigma<-min(sigma.withadjust,sigma.noadjust) varterm<-min(varterm.withadjust,varterm.noadjust) } #gene.names are given #********************* }else{ PM<- tapply(PMs, gene.names, mean) MM<- tapply(MMs, gene.names, mean) n.genes<- length(PM) n.probes<- sum(gene.names==gene.names[1]) S.PM<- sum((PM-mean(PM))^2) S.MM<- sum((MM-mean(MM))^2) S.PMMM<- sum((PM-mean(PM))*(MM-mean(MM))) if (S.PMMM<=0){ model.warning<-1 cat("\nSince there is a negative correlation between PMs and MMs,FGX is not a suitable model for this data set") return(list(model.warning=model.warning)) }else{ model.warning<-0 delta<-(S.PM-S.MM)^2+4*(S.PMMM^2) p.est<- (S.MM-S.PM+sqrt(delta))/(2*S.PMMM) muH<- (mean(PM)*p.est-mean(MM))/(p.est-1) Si<- (PM+p.est*MM-(1+p.est)*muH)/(1+p.est^2) PM.mat<- matrix(PMs, nrow=n.genes, ncol=n.probes, byrow=T) MM.mat<- matrix(MMs, nrow=n.genes, ncol=n.probes, byrow=T) sumsquare<- 0 for (i1 in 1:n.genes) { for (i2 in 1:n.probes) { sumsquare<-sumsquare+(PM.mat[i1,i2]-Si[i1]-muH)^2+(MM.mat[i1,i2]-p.est*Si[i1]-muH)^2 } } varterm.withadjust<- sumsquare/(2*n.genes*n.probes-n.genes-2) varterm.noadjust<-sumsquare/(2*n.genes*n.probes) sigma.withadjust<- sqrt(varterm.withadjust) sigma.noadjust<- sqrt(varterm.noadjust) sigma<-min(sigma.withadjust,sigma.noadjust) varterm<-min(varterm.withadjust,varterm.noadjust) } #End of loop for "gene.names" #***************************** } #SECOND PART:Calculation of information matrix #************************************************ #Calculation based on "return.cov=T" #************************************* #Since standard deviation doesn`t need any adjustment in its degree of freedom, #but minimum variance changes regarding the data, varterm is chosen from min(vartem.noadjust,varterm.withadjust) if(return.cov){ dim.cov.mat<- n.genes+2 #Small dimensional information matrix #************************************** if (dim.cov.mat< max.cov.mat) { AA<- matrix(c(2*n.probes*n.genes/varterm, n.probes*sum(Si)/varterm, (n.probes*sum(Si))/varterm, (n.probes*sum(Si^2))/varterm), nrow=2,ncol=2,byrow=T) BB.line1<- rep(n.probes*(1+p.est)/varterm, n.genes) BB.line2<- rep(0,n.genes) for(i3 in 1:n.genes){ BB.line2[i3]<- (2*p.est*Si[i3]+muH-MM[i3])*(n.probes/varterm) } BB<- rbind(BB.line1,BB.line2) BB.trans<- t(BB) CC.inverse<- matrix(0,nrow=n.genes,ncol=n.genes) diag(CC.inverse)<- varterm/(n.probes*(1+p.est^2)) #Inverse of small dimensional information matrix #************************************************* matP<- (AA-BB%*%CC.inverse%*%BB.trans) P<- solve(matP) Q<- -t(CC.inverse%*%BB.trans%*%P) Q.trans<- t(Q) R<- CC.inverse-(CC.inverse%*%BB.trans%*%Q) row1<- cbind(P,Q) row2<- cbind(Q.trans,R) cov.mat.est<- rbind(row1,row2) #cov.mat=T #*********** onlyvar<- rep(0, (n.genes+2)) for (i10 in 1:(n.genes+2)){ onlyvar[i10]<-cov.mat.est[i10,i10] } if (cov.mat){ output<-list(p.est=p.est,muH=muH,Signal=Si,sigma.noadjust=sigma.noadjust, sigma.withadjust=sigma.withadjust,sigma=sigma,onlyvar=onlyvar,cov.mat.est=cov.mat.est, varterm.noadjust=varterm.noadjust,varterm.withadjust=varterm.withadjust, varterm=varterm,model.warning=model.warning,PM=PM,MM=MM) }else{ output<-list(p.est=p.est,muH=muH,Signal=Si,sigma.noadjust=sigma.noadjust, sigma.withadjust=sigma.withadjust,sigma=sigma,onlyvar=onlyvar, varterm.noadjust=varterm.noadjust,varterm.withadjust=varterm.withadjust, varterm=varterm,model.warning=model.warning,PM=PM,MM=MM) } }else{ #Large dimensional information matrix #************************************* Y<- 0 const<- 0 Term1<- 0 Term2<- 0 for(i4 in 1:n.genes){ Term1<- Term1+ (2*p.est*Si[i4]+muH-MM[i4])^2 Term2<- Term2+ (2*p.est*Si[i4]+muH-MM[i4]) } deno<- 1+p.est^2 numo<- 1+p.est #Explicit variances #******************** Y<-(2*n.genes-n.genes*(numo^2)/deno)*(sum(Si^2)-Term1/deno)-(sum(Si)-numo*Term2/deno)^2 const<- (varterm)/(n.probes*Y) var.p.est<- const*(2*n.genes-(n.genes*(numo^2)/deno)) var.muH<- const*(sum(Si^2)-Term1/deno) #Explicit covariances #********************** cov.muH.p.est<- const*(numo/deno*Term2-sum(Si)) cov.muH.Si<- rep(0,n.genes) cov.p.est.Si<- rep(0,n.genes) for (i5 in 1:n.genes){ cov.muH.Si[i5]<-(const*numo/deno)*(Term1/deno-sum(Si^2))+ const*(2*p.est*Si[i5]+muH-MM[i5])*(sum(Si)-numo*Term2/deno)/deno cov.p.est.Si[i5]<-(const*numo/deno)*(sum(Si)-numo*Term2/deno)+ const*(2*p.est*Si[i5]+muH-MM[i5])*(n.genes*(numo^2)/deno-2*n.genes)/deno } cov.SiSi<- matrix(0, nrow=n.genes, ncol=n.genes, byrow=T) for (i6 in 1:n.genes){ for(i7 in 1:n.genes){ if (i6==i7) { cov.SiSi[i6,i7]<-(const/(deno^3))*(Y*(deno^2)-(numo^2)*(Term1-deno*sum(Si^2))- 2*numo*(2*p.est*Si[i6]+muH-MM[i6])*(deno*sum(Si)-numo*Term2)+ (2*p.est*Si[i6]+muH-MM[i6])^2*(n.genes*(p.est^2)-2*n.genes*p.est+n.genes)) }else{ cov.SiSi[i6,i7]<--(const/(deno^3))*((numo^2)*(Term1-deno*sum(Si^2))+ numo*(2*p.est*Si[i6]+muH-MM[i6])*(deno*sum(Si)-numo*Term2)+ numo*(2*p.est*Si[i7]+muH-MM[i7])*(deno*sum(Si)-numo*Term2)+ (2*p.est*Si[i6]+muH-MM[i6])*(2*p.est*Si[i7]+muH-MM[i7])* (2*n.genes*p.est-n.genes*(p.est^2)-n.genes)) } } } #Inverse of large dimensional information matrix #************************************************* P<-matrix(c(var.muH,cov.muH.p.est,cov.muH.p.est,var.p.est),nrow=2,ncol=2,byrow=T) Q<- rbind(cov.muH.Si, cov.p.est.Si) Q.trans<- t(Q) row1<- cbind(P,Q) row2<- cbind(Q.trans,cov.SiSi) cov.mat.est<- rbind(row1,row2) #cov.mat=T for large dimensional information matrix #**************************************************** onlyvar1<- rep(0, (n.genes+2)) for (i11 in 1:(n.genes+2)){ onlyvar[i11]<-cov.mat.est[i11,i11] } if (cov.mat){ output<-list(p.est=p.est,muH=muH,Signal=Si,sigma.noadjust=sigma.noadjust, sigma.withadjust=sigma.withadjust,sigma=sigma,onlyvar=onlyvar, cov.mat.est=cov.mat.est,varterm=varterm,varterm.noadjust=varterm.noadjust, varterm.withadjust=varterm.withadjust,model.warning=model.warning,PM=PM,MM=MM) }else{ output<-list(p.est=p.est,muH=muH,Signal=Si,sigma.noadjust=sigma.noadjust, sigma.withadjust=sigma.withadjust,sigma=sigma,onlyvar=onlyvar, varterm=varterm,varterm.noadjust=varterm.noadjust,varterm.withadjust=varterm.withadjust, model.warning=model.warning,PM=PM,MM=MM) } #End of loop for "dim.cov.mat< or >max.cov.mat" #************************************************* } }else{ output <- list(p.est=p.est,muH=muH,Signal=Si,sigma.noadjust=sigma.noadjust,sigma.withadjust=sigma.withadjust, sigma=sigma,varterm=varterm,varterm.noadjust=varterm.noadjust, varterm.withadjust=varterm.withadjust,model.warning=model.warning,PM=PM,MM=MM) #End of loop for "return.cov" #***************************** } output } #*************************************************************************************************************** #***************************************************************************************************************