> ############# > # Section 1 > ############# > > x <- seq(0,8,by=0.1) > plot(x,dgamma(x,0.75), type="l", ylab="", xlab="", ylim=c(0,1.25), xaxs="i", yaxs="i") > plot(x,dgamma(x,1.0), type="l", ylab="", xlab="", ylim=c(0,1.25), xaxs="i",yaxs="i") > plot(x,dgamma(x,2.0), type="l", ylab="", xlab="", ylim=c(0,1.25), xaxs="i",yaxs="i") > > ########### > # Curves for Gamma PDF > ########### > > library(faraway) Warning message: package ‘faraway’ was built under R version 3.4.3 > data(wafer) > summary(wafer) x1 x2 x3 x4 resist -:8 -:8 -:8 -:8 Min. :165.7 +:8 +:8 +:8 +:8 1st Qu.:201.0 Median :214.2 Mean :229.3 3rd Qu.:259.3 Max. :339.9 > wafer x1 x2 x3 x4 resist 1 - - - - 193.4 2 + - - - 247.6 3 - + - - 168.2 4 + + - - 205.0 5 - - + - 303.4 6 + - + - 339.9 7 - + + - 226.3 8 + + + - 208.3 9 - - - + 220.0 10 + - - + 256.4 11 - + - + 165.7 12 + + - + 203.5 13 - - + + 285.0 14 + - + + 268.0 15 - + + + 169.1 16 + + + + 208.5 > > ############## > # This is an experimental design example. > # We need to understang > ############## > > llmdl <- lm(log(resist) ~ .^2, wafer) > rlmdl <- step(llmdl) Start: AIC=-76.51 log(resist) ~ (x1 + x2 + x3 + x4)^2 Df Sum of Sq RSS AIC - x1:x4 1 0.000060 0.033967 -78.479 - x1:x2 1 0.000377 0.034284 -78.331 - x2:x4 1 0.001942 0.035850 -77.616 0.033907 -76.507 - x1:x3 1 0.024402 0.058309 -69.833 - x2:x3 1 0.031771 0.065678 -67.929 - x3:x4 1 0.033499 0.067407 -67.514 Step: AIC=-78.48 log(resist) ~ x1 + x2 + x3 + x4 + x1:x2 + x1:x3 + x2:x3 + x2:x4 + x3:x4 Df Sum of Sq RSS AIC - x1:x2 1 0.000377 0.034344 -80.303 - x2:x4 1 0.001942 0.035910 -79.589 0.033967 -78.479 - x1:x3 1 0.024402 0.058369 -71.817 - x2:x3 1 0.031771 0.065738 -69.915 - x3:x4 1 0.033499 0.067466 -69.499 Step: AIC=-80.3 log(resist) ~ x1 + x2 + x3 + x4 + x1:x3 + x2:x3 + x2:x4 + x3:x4 Df Sum of Sq RSS AIC - x2:x4 1 0.001942 0.036286 -81.422 0.034344 -80.303 - x1:x3 1 0.024402 0.058746 -73.714 - x2:x3 1 0.031771 0.066115 -71.823 - x3:x4 1 0.033499 0.067843 -71.410 Step: AIC=-81.42 log(resist) ~ x1 + x2 + x3 + x4 + x1:x3 + x2:x3 + x3:x4 Df Sum of Sq RSS AIC 0.036286 -81.422 - x1:x3 1 0.024402 0.060688 -75.193 - x2:x3 1 0.031771 0.068057 -73.360 - x3:x4 1 0.033499 0.069786 -72.959 > summary(rlmdl) Call: lm(formula = log(resist) ~ x1 + x2 + x3 + x4 + x1:x3 + x2:x3 + x3:x4, data = wafer) Residuals: Min 1Q Median 3Q Max -0.081164 -0.036518 -0.000397 0.038557 0.083618 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 5.31111 0.04762 111.525 4.67e-14 *** x1+ 0.20088 0.04762 4.218 0.00292 ** x2+ -0.21073 0.04762 -4.425 0.00221 ** x3+ 0.43718 0.06735 6.491 0.00019 *** x4+ 0.03537 0.04762 0.743 0.47892 x1+:x3+ -0.15621 0.06735 -2.319 0.04896 * x2+:x3+ -0.17824 0.06735 -2.647 0.02941 * x3+:x4+ -0.18303 0.06735 -2.718 0.02635 * --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 0.06735 on 8 degrees of freedom Multiple R-squared: 0.9471, Adjusted R-squared: 0.9008 F-statistic: 20.46 on 7 and 8 DF, p-value: 0.000165 > > ############ > # The final model contains > # x1, x2, x3, x4, x1:x3, x2:x3, and x3:x4 > # Lognormal Model > # Q: What is the difference between lognormal model > # and gamma model. > ############ > > library(faraway) > gmdl <- glm(resist ~ .^2, family=Gamma(link=log), wafer) > rgmdl <- step(gmdl) Start: AIC=144.11 resist ~ (x1 + x2 + x3 + x4)^2 Df Deviance AIC - x1:x4 1 0.033944 142.12 - x1:x2 1 0.034281 142.17 - x2:x4 1 0.035808 142.40 0.033882 144.11 - x1:x3 1 0.057723 145.64 - x2:x3 1 0.064983 146.71 - x3:x4 1 0.067121 147.03 Step: AIC=142.14 resist ~ x1 + x2 + x3 + x4 + x1:x2 + x1:x3 + x2:x3 + x2:x4 + x3:x4 Df Deviance AIC - x1:x2 1 0.034343 140.21 - x2:x4 1 0.035869 140.48 0.033944 142.14 - x1:x3 1 0.057785 144.36 - x2:x3 1 0.064998 145.64 - x3:x4 1 0.067183 146.03 Step: AIC=140.33 resist ~ x1 + x2 + x3 + x4 + x1:x3 + x2:x3 + x2:x4 + x3:x4 Df Deviance AIC - x2:x4 1 0.036266 138.72 0.034343 140.33 - x1:x3 1 0.058187 143.19 - x2:x3 1 0.065399 144.66 - x3:x4 1 0.067434 145.08 Step: AIC=139.2 resist ~ x1 + x2 + x3 + x4 + x1:x3 + x2:x3 + x3:x4 Df Deviance AIC 0.036266 139.20 - x1:x3 1 0.060433 142.54 - x2:x3 1 0.067314 144.06 - x3:x4 1 0.069345 144.51 > summary(rgmdl) Call: glm(formula = resist ~ x1 + x2 + x3 + x4 + x1:x3 + x2:x3 + x3:x4, family = Gamma(link = log), data = wafer) Deviance Residuals: Min 1Q Median 3Q Max -0.083185 -0.036793 -0.000648 0.038199 0.081390 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 5.31195 0.04757 111.677 4.62e-14 *** x1+ 0.20029 0.04757 4.211 0.00295 ** x2+ -0.21101 0.04757 -4.436 0.00218 ** x3+ 0.43674 0.06727 6.493 0.00019 *** x4+ 0.03537 0.04757 0.744 0.47836 x1+:x3+ -0.15549 0.06727 -2.312 0.04957 * x2+:x3+ -0.17626 0.06727 -2.620 0.03064 * x3+:x4+ -0.18195 0.06727 -2.705 0.02687 * --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 (Dispersion parameter for Gamma family taken to be 0.004524942) Null deviance: 0.697837 on 15 degrees of freedom Residual deviance: 0.036266 on 8 degrees of freedom AIC: 139.2 Number of Fisher Scoring iterations: 4 > > ############# > # The form of final model is identical to the previous one, but > # this the the Gamma GLM > ############# > > sqrt(summary(rgmdl)$dispersion) [1] 0.06726769 > > ########## > # Compare it with the root-MSE of the previous model. > ########## > > library(MASS) > gamma.dispersion(rgmdl) [1] 0.002265746 > > ########## > # This is another estimate of > # the dispersion parameter. > ########## > > data(motorins) > motori <- motorins[motorins$Zone == 1,] > gl <- glm(Payment ~ offset(log(Insured)) + as.numeric(Kilometres) + Make+Bonus, family=Gamma(link=log), motori) > summary(gl) Call: glm(formula = Payment ~ offset(log(Insured)) + as.numeric(Kilometres) + Make + Bonus, family = Gamma(link = log), data = motori) Deviance Residuals: Min 1Q Median 3Q Max -2.03727 -0.63467 -0.09602 0.26508 2.70324 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 6.52731 0.17774 36.723 < 2e-16 *** as.numeric(Kilometres) 0.12007 0.03115 3.855 0.000143 *** Make2 0.40704 0.17824 2.284 0.023128 * Make3 0.15535 0.17955 0.865 0.387666 Make4 -0.34394 0.19150 -1.796 0.073546 . Make5 0.14467 0.18097 0.799 0.424726 Make6 -0.34557 0.17824 -1.939 0.053515 . Make7 0.06136 0.18244 0.336 0.736893 Make8 0.75038 0.18726 4.007 7.85e-05 *** Make9 0.03197 0.17824 0.179 0.857783 Bonus -0.20069 0.02150 -9.334 < 2e-16 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 (Dispersion parameter for Gamma family taken to be 0.5559565) Null deviance: 238.97 on 294 degrees of freedom Residual deviance: 155.06 on 284 degrees of freedom AIC: 7167.5 Number of Fisher Scoring iterations: 6 > > ########## > # This model studies rates. > # It models Payment/Insured. > ########## > > llg <- glm(log(Payment) ~ offset(log(Insured))+as.numeric(Kilometres)+Make+Bonus,family=gaussian,motori) > summary(llg) Call: glm(formula = log(Payment) ~ offset(log(Insured)) + as.numeric(Kilometres) + Make + Bonus, family = gaussian, data = motori) Deviance Residuals: Min 1Q Median 3Q Max -2.28524 -0.41757 0.02732 0.46135 2.47377 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 6.514035 0.186339 34.958 < 2e-16 *** as.numeric(Kilometres) 0.057132 0.032654 1.750 0.08126 . Make2 0.363869 0.186857 1.947 0.05248 . Make3 0.006916 0.188235 0.037 0.97072 Make4 -0.547855 0.200755 -2.729 0.00675 ** Make5 -0.021785 0.189720 -0.115 0.90866 Make6 -0.458814 0.186857 -2.455 0.01467 * Make7 -0.321179 0.191264 -1.679 0.09420 . Make8 0.209578 0.196310 1.068 0.28661 Make9 0.125446 0.186857 0.671 0.50254 Bonus -0.178061 0.022540 -7.900 6.17e-14 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 (Dispersion parameter for gaussian family taken to be 0.611019) Null deviance: 238.56 on 294 degrees of freedom Residual deviance: 173.53 on 284 degrees of freedom AIC: 704.64 Number of Fisher Scoring iterations: 2 > > ########## > # Compare lognormal model with Gamma-GLM. > # The results should be close. > ########## > > x <- seq(0,5,by=0.05) > par(mfrow=c(1,2)) > plot(x,dgamma(x,1/0.55597,scale=0.55597),type="l",ylab="", xlab="",yaxs="i",ylim=c(0,1)) > plot(x,dlnorm(x,meanlog=-0.30551,sdlog=sqrt(0.61102)),type="l", ylab="",xlab="",yaxs="i",ylim=c(0,1)) > > ############ > > x0 <- data.frame(Make="1",Kilometres=1,Bonus=1,Insured=100) > predict(gl,new=x0,se=T,type="response") $fit 1 63060.85 $se.fit 1 9711.378 $residual.scale [1] 0.7456249 > predict(llg,new=x0,se=T,type="response") $fit 1 10.99828 $se.fit [1] 0.1614463 $residual.scale [1] 0.781677 > c(exp(10.998),exp(10.998)*0.16145) [1] 59754.513 9647.366 > > ########### > # The above is > # prediction. > ########### > > ############## > # Section 3: Joint Modeling > ############## > > data(weldstrength) > lmod <- lm(Strength ~ Drying + Material + Preheating, weldstrength) > summary(lmod) Call: lm(formula = Strength ~ Drying + Material + Preheating, data = weldstrength) Residuals: Min 1Q Median 3Q Max -1.0500 -0.2000 0.0750 0.1062 1.1000 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 43.6250 0.2624 166.253 < 2e-16 *** Drying 2.1500 0.2624 8.194 2.94e-06 *** Material -3.1000 0.2624 -11.814 5.75e-08 *** Preheating -0.3750 0.2624 -1.429 0.178 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 0.5248 on 12 degrees of freedom Multiple R-squared: 0.9456, Adjusted R-squared: 0.932 F-statistic: 69.58 on 3 and 12 DF, p-value: 7.389e-08 > > ############# > # We want to account for both mean and variance by explanatory variables. > ############# > > X <- cbind(1,as.matrix(lmod$model[,2:4])) > hat.matrix <- X%*%(solve(t(X)%*%X))%*%t(X) > h <- diag(hat.matrix) > > ############# > > h <- influence(lmod)$hat > d <- residuals(lmod)^2/(1-h) > gmod <- glm(d ~ Material+Preheating,family=Gamma(link=log), weldstrength,weights=1-h) > > ############ > > w <- 1/fitted(gmod) > lmod <- lm(Strength ~ Drying + Material + Preheating, weldstrength, weights=w) > summary(lmod) Call: lm(formula = Strength ~ Drying + Material + Preheating, data = weldstrength, weights = w) Weighted Residuals: Min 1Q Median 3Q Max -1.6514 -0.4806 0.1426 0.4722 1.4518 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 43.66983 0.20558 212.419 < 2e-16 *** Drying 1.99527 0.09694 20.582 9.97e-11 *** Material -3.13548 0.20240 -15.491 2.69e-09 *** Preheating -0.24332 0.09977 -2.439 0.0312 * --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 0.8548 on 12 degrees of freedom Multiple R-squared: 0.9824, Adjusted R-squared: 0.978 F-statistic: 223.2 on 3 and 12 DF, p-value: 8.67e-11 > summary(gmod) Call: glm(formula = d ~ Material + Preheating, family = Gamma(link = log), data = weldstrength, weights = 1 - h) Deviance Residuals: Min 1Q Median 3Q Max -2.0632 -1.0292 -0.7659 0.2605 1.7474 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -1.0771 0.6144 -1.753 0.103 Material -2.7326 0.7095 -3.852 0.002 ** Preheating 0.4821 0.7095 0.680 0.509 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 (Dispersion parameter for Gamma family taken to be 1.50997) Null deviance: 43.193 on 15 degrees of freedom Residual deviance: 21.286 on 13 degrees of freedom AIC: -21.899 Number of Fisher Scoring iterations: 23 > > ########## > # The above is the weighted least square model. > ##########