## ----------------------------------------------------------------------------- ## Name: AverageEstimate.R ## R code for the (Cancer Informatics) paper by Hongmei Jiang and R.W. Doerge: ## "Estimating the proportion of true null hypotheses for multiple comparisons" ## Date: December 2007 ## Contact: Hongmei Jiang hongmei@northwestern.edu ## R.W. Doerge doerge@purdue.edu ## Example: ## y <- runif(1000) ## Result <- AverageEstimate(y) ## sum(Result$significant) ## Result$pi0 ## ----------------------------------------------------------------------------- ############### Subfunction ############################ ## Compute the estimate of pi0 for a fixed value of B ## ######################################################## FixedB <- function(p, B) { ## Input: #p: a vector of p-values #B: an integer, the interval [0,1] is divided into B equal-length intervals ## Output: #pi0: an estimate of the proportion of true null hypotheses m <- length(p) t <- seq(0,1,length=B+1) # equally spaced points in the interval [0,1] NB <- rep(0,B) # number of p-values greater than t_i NBaverage <- rep(0,B) # average number of p-values in each of the (B-i+1) small intervals on [t_i,1] NS <- rep(0,B) # number of p-values in the interval [t_i, t_(i+1)] pi <- rep(0,B) # estimates of pi0 for(i in 1:B) { NB[i] <- length(p[p>=t[i]]) NBaverage[i] <- NB[i]/(B-(i-1)) NS[i] <- length(p[p>=t[i]]) - length(p[p>=t[i+1]]) pi[i] <- NB[i]/(1-t[i])/m } i <- min(which(NS <= NBaverage)) # Find change point i pi0 <- min(1, mean(pi[(i-1):B])) # average estiamte of pi0 return(pi0) } ############## main function ########################### ## (1) Compute the estimate of pi0 ## ## (2) Apply the adaptive FDR controlling procedure ## ######################################################## AverageEstimate <- function(p=NULL, Bvector=c(5, 10, 20, 50, 100), alpha=0.05) { ## Input: #p: a vector of p-values #Bvector: a vector of integer values where the interval [0,1] is divided into B equal-length intervals # When Bvector is an integer, number of intervals is consider as fixed. For example Bvector = 10; # When Bvector is a vector, bootstrap method is used to choose an optimal value of B #alpha: FDR signficance level so that the FDR is controlled below alpha #Numboot: number of bootstrap samples ## Output: #pi0: an estimate of the proportion of true null hypotheses #significant: a vector of indicator variables; # it is 1 if the corresponding p-value is significant # it is 0 if the corresponding p-value is not significant # check if the p-values are valid if (min(p)<0 || max(p)>1) print("Error: p-values are not in the interval of [0,1]") m <- length(p) # Total number p-values Bvector <- as.integer(Bvector) # Make sure Bvector is a vector of integers #Bvector has to be bigger than 1 if(min(Bvector) <=1) print ("Error: B has to be bigger than 1") ######## Estimate pi0 ######## if(length(Bvector) == 1) # fixed number of numbers, i.e., B is fixed { pi0 <- AverageEstimateFixedB(p, Bvector) } else { Numboot <- 100 OrigPi0Est <- rep(0, length(Bvector)) for (Bloop in 1:length(Bvector)) { OrigPi0Est[Bloop] <- FixedB(p, Bvector[Bloop]) } BootResult <- matrix(0, nrow=Numboot, ncol=length(Bvector)) # Contains the bootstrap results for(k in 1:Numboot) { p.boot <- sample(p, m, replace=TRUE) # bootstrap sample for (Bloop in 1:length(Bvector)) { BootResult[k,Bloop] <- FixedB(p.boot, Bvector[Bloop]) } } MeanPi0Est <- mean(OrigPi0Est) # avearge of pi0 estimates over the range of Bvector MSEestimate <- rep(0, length(Bvector)) # compute mean-squared error for (i in 1:length(Bvector)) { MSEestimate[i] <- (OrigPi0Est[i]- MeanPi0Est)^2 for (k in 1:Numboot) { MSEestimate[i] <- MSEestimate[i]+1/Numboot*(BootResult[k,i] - OrigPi0Est[i])^2 } } pi0 <- OrigPi0Est[MSEestimate==min(MSEestimate)] } # end of else ######## Apply the adaptive FDR controlling procedure ######## sorted.p <- sort(p) # sorted p-values order.p <- order(p) # order of the p-values m0 <- pi0*m # estimate of the number of true null i <- m crit <- i/m0*alpha while (sorted.p[i] > crit ) { i <- i-1 crit <- i/m0*alpha if (i==1) break } K <- i if (K==1 & sorted.p[K] <= 1/m0*alpha) K <- 1 if (K==1 & sorted.p[K] > 1/m0*alpha) K <- 0 significant <- rep(0,m) # indicator of significance of the p-values if (K > 0) significant[order.p[1:K]] <- 1 result <- list(pi0=pi0, significant=significant) result }