# MEPS data
# read file MEPSpanel13.csv
# sample of 750 observations from panel 13
bmi <- read.table(file="http://instruction.bus.wisc.edu/jfrees/jfreesbooks/Regression%20Modeling/KyotoSageDataCode/MEPSpanel13.csv", header = TRUE, sep = ",")
str(bmi)
Figure 3.1: Distribution of BMI
Figure 3.2: *BMI versus Candidate Predictors**
Figure 3.3: BMI versus Risk Types
# Linear Model A
## function to calculate the PRESS statistic
PRESS <- function(mod){
pr <- residuals(mod)/(1 - hatvalues(mod))
press <- sum(pr^2)
return(press)
}
## Here's the model
Model.a <- lm(Log.BMI ~ Age + I(Age*Age) + Female +
Race.f + Comord.Cnt, data = bmi)
summary(Model.a)
AIC(Model.a)
PRESS(Model.a)
par(mfrow = c(1, 2), oma = c(0, 0, 2, 0))
plot(Model.a, which = c(1,2))
## read Out of Sample data
bmi.14 <- read.table(file = "http://instruction.bus.wisc.edu/jfrees/jfreesbooks/Regression%20Modeling/KyotoSageDataCode/MEPSpanel14.csv", header = TRUE, sep = ",")
## functions to calculate the SSPE statistic
SSPE <- function(mod, test){
yhat <- predict(mod, test)
y <- test$Log.BMI
sspe <- sum( (y-yhat)^2 )
return(sspe)
}
SSPE(Model.a, bmi.14)
[1] 29.07913
# Linear Model B
## Here's the model
Model.b <- lm(Log.BMI ~ AgeCat + Female +
Race.f + CANCER + DIABETES + EMPHYSEMA + ASTHMA + STROKE +
CORONARY + CHOLEST + HIGHBP, data = bmi)
summary(Model.b)
AIC(Model.b)
PRESS(Model.b)
SSPE(Model.b, bmi.14)
par(mfrow = c(1, 2), oma = c(0, 0, 2, 0))
plot(Model.b, which = c(1,2))