rm(list=ls(all=TRUE)) library(foreign) source("GeneLogit.txt") # Reading data read.data = function() { dataset = read.dta("golub_train.dta") y = dataset[,1] x = as.matrix(dataset[,-1]) print(dim(x)) dimnames(x) = NULL #remove the variable name print(dim(x)) list(x=x, y=y) } ###### Cervical Data set ################################# # Read data obj = read.data() x = obj$x y = obj$y rm(obj) ######################################## localFDR.cal(x, y, v=100) #see Liao et al, Bioinformatics 20, 2694-2701 tau = model.estimation(x, y) ######################################## # Prediction Error bootstrap.prediction(x, y, q=20) ###applying to test dataset ########################################## result = pena.logit(x, y, q=20, tau= .628) ########## applying to test dataset ################################# read.data = function() { dataset = read.dta("golub_test.dta") y = dataset[,1] x = as.matrix(dataset[,-1]) print(dim(x)) dimnames(x) = NULL print(dim(x)) list(x=x, y=y) } # Read data dataset.test = read.data() x = dataset.test$x y = dataset.test$y rm(dataset.test) b = result$b r1 = result$gene.selected xx = x[, r1] xx = cbind(rep(1, nrow(xx)), xx) temp = as.vector(xx %*% b) + log(20/14) - log(27/11) fitted = plogis(temp) plot(y, fitted) mean((y-fitted)^2) ratio = sum(y)/sum(fitted) fitted_adjusted = fitted*ratio plot(y, fitted_adjusted)