Live code:
Live code
  
    LOOCV
  
Data
LOOCV
n <- nrow(mite_dat)
rmses <- rep(NA, n)
for(i in 1:n){
  train_dat <- mite_dat[-i,]
  test_dat <- mite_dat[i,]
  mod <- lm(abundance ~ WatrCont, data = train_dat)
  pred <- predict(mod, newdata = test_dat)
  rmses[i] <- sqrt((test_dat$abundance - pred)^2)
}
loocv_err <- mean(rmses)
loocv_err[1] 9.513248How does this compare to when I take a usual validation set approach?
set.seed(2)
train_ids <- sample(n, 0.7 * n)
train_dat <- mite_dat[train_ids,]
test_dat <- mite_dat[-train_ids,]
mod <- lm(abundance ~ WatrCont , data = train_dat)
pred <- predict(mod, newdata = test_dat)
rmse_val <- sqrt(mean((test_dat$abundance - pred)^2))
rmse_val[1] 10.73352Also, if you run with different seeds, you will get different estimated RMSEs!