library(ggplot2)
library(glmnet)
## Loading required package: Matrix
## Loading required package: foreach
## Loaded glmnet 2.0-10
library(cowplot)
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
##
## ggsave
set.seed(113)
Coef = rnorm(5, 0, sd=1)
X = matrix(rnorm(500, 0, sd=1), nrow = 5, ncol = 100)
Y = apply(X, 2, function(x) Coef%*%x)
df = data.frame(x = c("a", "b", "c", "d", "e"), coef = Coef, Category="True")
ggplot(df, aes(x=x, y=coef, fill=Category)) + geom_bar(stat = "Identity") + theme_bw() +
xlab("") + ylab("Coefficients")
Reg_analysis <- function(Xtrain, Ytrain, Xtest, Ytest, Ronly = FALSE) {
model = glmnet(t(Xtrain), Ytrain, alpha=0)
Ypred = predict.glmnet(model, newx = t(Xtest), s=0)
df = data.frame(True = Ytest, Predict = as.numeric(Ypred))
dfcoef = rbind(
data.frame(Var = c("a", "b", "c", "d", "e"), Coef = Coef, Category="True"),
data.frame(Var = c("a", "b", "c", "d", "e"), Coef = coef.glmnet(model, s=0)[2:6], Category="Trained")
)
dfcoef$Category = factor(dfcoef$Category)
plots = list()
err = max(1-sum((Ypred-Ytest)^2)/sum((Ypred-mean(Ypred))^2), 0)
if (!Ronly) {
plots[[1]] = ggplot(df, aes(y=True, x=Predict)) + geom_point() + xlab("Prected Y") + ylab("True Y") +
annotate("text", x=min(df$Predict)*0.5, y=max(df$True), label=paste0("R squared = ",as.character(round(err, digits = 2)))) + coord_cartesian(xlim = c(min(Ytest), max(Ytest)))
plots[[2]] = ggplot(dfcoef, aes(x=Var, y=Coef, fill=Category)) + geom_bar(stat = "Identity", position = "dodge") +
ylab("Coefficients")
return(plots)
} else{
return(err)
}
}
Only 10 training data and 200 validation data is used.
Nfeats = c(0,2,3,4,5,6)
Rs = list()
Xtrain = matrix(rnorm(50, 0, sd=1), nrow = 5, ncol = 10)
Ytrain = apply(Xtrain, 2, function(x) Coef%*%x)
Xextra = matrix(rnorm(1000, 0, sd=1), nrow = 100, ncol = 10)
Xtest = matrix(rnorm(1000, 0, sd=1), nrow = 5, ncol = 200)
Ytest = apply(Xtest, 2, function(x) Coef%*%x)
Xextra_test = matrix(rnorm(100*200, 0, sd=1), nrow = 100, ncol=200)
for (Nfeat in Nfeats) {
R <- Reg_analysis(rbind(Xtrain,Xextra[1:Nfeat,]), Ytrain, rbind(Xtest,Xextra_test[1:Nfeat, ]), Ytest)[[1]] +
ggtitle(paste0("dimension = ", as.character(Nfeat + 5))) + theme(aspect.ratio=1)
Rs[[length(Rs)+1]] = R
# plot(R)
}
plot_grid(plotlist = Rs, ncol=2)
Ntrains = c(10,50, seq(100,1000, by = 50))
Nfeats = c(5,10,15,20,25,30,50,100,500)
Noises = c(0,1,2,5)
Xtrain = matrix(rnorm(5 * max(Ntrains), 0, sd=1), nrow = 5, ncol = max(Ntrains))
Ytrain = apply(Xtrain, 2, function(x) Coef%*%x)
Xextra = matrix(rnorm(max(Nfeats)*max(Ntrains), 0, sd=1), nrow = max(Nfeats), ncol = max(Ntrains))
Xtest = matrix(rnorm(1000, 0, sd=1), nrow = 5, ncol = 200)
Ytest = apply(Xtest, 2, function(x) Coef%*%x)
Xextra_test = matrix(rnorm(max(Nfeats)*200, 0, sd=1), nrow = max(Nfeats), ncol=200)
df = data.frame()
for (Noise in Noises) {
Ytrain_use = Ytrain + rnorm(max(Ntrains), 0, Noise)
for (Ntrain in Ntrains){
for (Nfeat in Nfeats){
R <- Reg_analysis(rbind(Xtrain[,1:Ntrain],Xextra[1:Nfeat,1:Ntrain]), Ytrain_use[1:Ntrain],
rbind(Xtest,Xextra_test[1:Nfeat, ]), Ytest, Ronly = TRUE)
df = rbind(df, data.frame(R = R, Ntrain = Ntrain, dimension = Nfeat, Noise = Noise))
}
}
# plot(R)
}
#plot_grid(plotlist = Rs, ncol=2)
df$dimension = factor(df$dimension)
for (Noise in Noises){
plot(
ggplot(df[df$Noise==Noise,], aes(x=Ntrain, y=R, col=dimension)) +
geom_smooth(se=FALSE) + ggtitle(paste0("Noise level = ", as.character(Noise))) +
xlab("Number of training data") + ylab("Predictive performance (R squared)")
)
}
## `geom_smooth()` using method = 'loess'
## `geom_smooth()` using method = 'loess'
## `geom_smooth()` using method = 'loess'
## `geom_smooth()` using method = 'loess'