n <- 20 x <- runif(n,0,1) eps <- rnorm(n,0,1) y <- 2*x + eps A <- data.frame(x=x,y=y) model <- lm(data=A,y~x) A$prediction <- predict(model,newdata = A) mse <- mean((A$prediction-A$y)^2) x.new <- runif(n,0,1) #from same population eps <- rnorm(n,0,1) y.new <- 2*x.new + eps A.new <- data.frame(x=x.new,y=y.new) A.new$prediction <- predict(model,newdata = A.new) mse.new <- mean(abs(A.new$prediction-A.new$y)^2) mse;mse.new #repeat several times and save the two mses R <- 1000 MSE <- data.frame(mse.in=rep(0,R),mse.out=rep(0,R)) for(i in 1:R){ n <- 20 x <- runif(n,0,1) eps <- rnorm(n,0,1) y <- 2*x + eps A <- data.frame(x=x,y=y) model <- lm(data=A,y~x) A$prediction <- predict(model,newdata = A) MSE$mse.in[i] <- mean((A$prediction-A$y)^2) x.new <- runif(n,0,1); eps <- rnorm(n,0,1) y.new <- 2*x.new + eps A.new <- data.frame(x=x.new,y=y.new) A.new$prediction <- predict(model,newdata = A.new) MSE$mse.out[i] <- mean((A.new$prediction-A.new$y)^2) } plot(MSE) abline(a=0,b=1) MSE$worse <- ifelse(MSE$mse.out>=MSE$mse.in,1,0) summary(MSE)