#instruction in the bottom f1 = function(y, lambda, t, freq) { k = length(t); k1 = k-1; support = cumprod( (1-t[1:k1])^(lambda[1:k1]-lambda[2:k]) ); support = c(1, support); ss = rep(support, freq); lam = rep(lambda, freq); ss*lam*(1-y)^(lam-1); } F1 = function(y, lambda, t, freq) { lam = rep(lambda, freq); 1 - f1(y, lambda, t, freq)*(1-y)/lam; } log.likelihood.tau.total = function(tau, y, z, t, sigma2, freq) { k = length(t); k1 = k-1; lambda = exp(tau) + 1; sigma.local = lambda[2:k]/(lambda[2:k]-1); sigma.local = 1; diff = tau[1:(k-1)] - tau[2:k]; diff = diff/sigma.local; result = -sum( diff^2 )/2/sigma2; result2 = f1(y, lambda, t, freq)[z==1]; result2 = log(result2); result + sum(result2); } sample.tau.one.component = function(i, tau, y, z, t, sigma2, sigma2.mean, freq) { alpha.old = log.likelihood.tau.total(tau, y, z, t, sigma2, freq); tau.single.old = tau[i]; tau.single.new = tau.single.old + rnorm( 1, 0, sqrt(sigma2.mean) ); tau[i] = tau.single.new; alpha.new = log.likelihood.tau.total(tau, y, z, t, sigma2, freq); alpha = exp( alpha.new - alpha.old ); if( runif(1) < alpha ) return(tau.single.new) else return(tau.single.old); } location = function(y, t) { n = length(y); ll = numeric(n); for(i in 1:n) ll[i] = which(t>y[i])[1]; ll <- factor(ll,levels=1:length(t)) ll; } localFDR = function(y, k, n.simu, v=.30*k) #v for prior of sigma2 { #the y is a vector of pvalues to be decomposed # t is the vector of kots not including 0 but including 1 y = sort(y); small = 100*.Machine$double.eps; y[y1-small] = 1-small; t = comp.t(y, k); n = length(y); #number of p-value zeta = 0.0001; #initial values pi0 = mean(y>.5)/.5; tau = rep(.5, k); sigma2 = .1; tau.mean = numeric(k); pi0.mean = 0; sigma2.mean = sigma2; pi0.save = numeric(n.simu); freq = table( location(y, t) ); lambda = exp(tau) + 1; k1 = k-1; for(i in 1:n.simu) { fdr = (1-pi0)*f1(y, lambda, t, freq); fdr = pi0/(fdr + pi0); z = rbinom(n, 1, 1-fdr); ## sum.z = sum(z); if(i>100) pi0 = rbeta(1, n-sum.z+1, sum.z+1); ## pi0.save[i] = pi0; for(j in k:1) tau[j] = sample.tau.one.component(j, tau, y, z, t, sigma2, sigma2.mean, freq); ## lambda = exp(tau) + 1; sigma.local = lambda[2:k]/(lambda[2:k]-1); sigma.local = 1; diff = tau[1:(k-1)] - tau[2:k]; diff = diff/sigma.local; scale = zeta*v + sum( diff^2 ); scale = scale/(v+k-1); sigma2 = scale*(k-1+v)/rchisq(1, k-1+v); #print(sigma2); print(tau); ################################################## ww = 1/i; tau.mean = tau.mean*(1-ww) + tau*ww; pi0.mean = pi0.mean*(1-ww) + pi0*ww; sigma2.mean = sigma2.mean*(1-ww) + sigma2*ww; lambda.mean = exp(tau.mean) + 1; ###################################################### if(i%%10!=0) next; print(lambda.mean); fdr = (1-pi0.mean)*f1(y, lambda.mean, t, freq); fdr = pi0.mean/(fdr + pi0.mean); FDR = (1-pi0.mean)*F1(y, lambda.mean, t, freq); FDR = 1 - FDR/(FDR + pi0.mean*y); #plot(y, f1(y, lambda.mean, t, freq), type="l"); plot(y, fdr, type="l"); lines(y, FDR, type="l"); print(i); print("posterior of pi0"); print(quantile(pi0.save[1:i], c(.025,.50,.975))); } F1.value = F1(y, lambda.mean, t, freq); F = pi0.mean*y + (1-pi0.mean)*F1.value; NPV = pi0.mean*(1-y)/(1-F); f.value = pi0.mean + (1-pi0.mean)*f1(y, lambda.mean, t, freq); as.data.frame( cbind(y, fdr, pFDR=FDR, F1=F1.value, NPV, F, f=f.value) ); } ##################################################################################### comp.t = function(pvalue, k) #select the knots t based on the quantiles of pavlue { pvalue = sort(pvalue); n = length(pvalue); t=numeric(k); for(j in 1:k) t[j] = pvalue[n*j/k]; t[k] = 1; start = which(t==1)[1]-1; diff = 1 - t[start]; step = diff/(k-start); for(i in (start+1):k) t[i] = t[start] + (i-start)*step; t; } # The statistical method implemented in this program is described in # Liao J. G., Lin Y. Selvanayagam, Z.E. and Shih, W.J. # A Mixture Model for Estimating the Local False Discovery Rate # in DNA Microarray Analysis # The input is a vector of pvalues to be adjsuted for. Let us generate one by simulation pvalue = c( runif(8000), rbeta(2000, 1, 3) ); # now decide the number of knots, say 60 of them k = 100; # decide the number of MCMC iterations in the program n.simu = 1000; # decide the smoothing parameter v, say .30*k; v = .3*k # now run the program result = localFDR(pvalue, k, n.simu, v); #after the program finishes, result will contain a data frame # 1st column is the raw pavlues # 2nd colunm is the estimated fdr (local fasle predictive value) # 3rd column is the estimated pFDR # 4th column is the estimated F1 or sensitivity # 5th column is the estimated negative predictive value # 6th column is the estimated F for model checking # 7th column is the estimated f