> ###########
> # Section 1
> ###########
> 
> library(faraway)
> data(pulp)
> pulp
   bright operator
1    59.8        a
2    60.0        a
3    60.8        a
4    60.8        a
5    59.8        a
6    59.8        b
7    60.2        b
8    60.4        b
9    59.9        b
10   60.0        b
11   60.7        c
12   60.7        c
13   60.5        c
14   60.9        c
15   60.3        c
16   61.0        d
17   60.8        d
18   60.6        d
19   60.5        d
20   60.5        d
> op <- options(contrasts=c("contr.sum", "contr.poly"))
> 
> ##########
> # Change the constraint. 
> # Note that there are two major constrant: zero-sum and baseline.
> # Try the following two
> ##########
> 
> op <- options(contrasts=c("contr.treatment", "contr.treatment"))
> mod1 <- lm(bright ~ operator, pulp)
> summary(mod1)

Call:
lm(formula = bright ~ operator, data = pulp)

Residuals:
   Min     1Q Median     3Q    Max 
-0.440 -0.195 -0.070  0.175  0.560 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  60.2400     0.1458 413.243   <2e-16 ***
operatorb    -0.1800     0.2062  -0.873   0.3955    
operatorc     0.3800     0.2062   1.843   0.0839 .  
operatord     0.4400     0.2062   2.134   0.0486 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.326 on 16 degrees of freedom
Multiple R-squared:  0.4408,    Adjusted R-squared:  0.3359 
F-statistic: 4.204 on 3 and 16 DF,  p-value: 0.02261

> 
> op <- options(contrasts=c("contr.sum", "contr.sum"))
> mod2 <- lm(bright ~ operator, pulp)
> summary(mod2)

Call:
lm(formula = bright ~ operator, data = pulp)

Residuals:
   Min     1Q Median     3Q    Max 
-0.440 -0.195 -0.070  0.175  0.560 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 60.40000    0.07289 828.681   <2e-16 ***
operator1   -0.16000    0.12624  -1.267    0.223    
operator2   -0.34000    0.12624  -2.693    0.016 *  
operator3    0.22000    0.12624   1.743    0.101    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.326 on 16 degrees of freedom
Multiple R-squared:  0.4408,    Adjusted R-squared:  0.3359 
F-statistic: 4.204 on 3 and 16 DF,  p-value: 0.02261

> 
> ###########
> 
> op <- options(contrasts=c("contr.sum", "contr.poly"))
> mod3 <- lm(bright ~ operator, pulp)
> summary(mod3)

Call:
lm(formula = bright ~ operator, data = pulp)

Residuals:
   Min     1Q Median     3Q    Max 
-0.440 -0.195 -0.070  0.175  0.560 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 60.40000    0.07289 828.681   <2e-16 ***
operator1   -0.16000    0.12624  -1.267    0.223    
operator2   -0.34000    0.12624  -2.693    0.016 *  
operator3    0.22000    0.12624   1.743    0.101    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.326 on 16 degrees of freedom
Multiple R-squared:  0.4408,    Adjusted R-squared:  0.3359 
F-statistic: 4.204 on 3 and 16 DF,  p-value: 0.02261

> 
> ###########
> 
> lmod <- aov(bright ~ operator, pulp)
> summary(lmod)
            Df Sum Sq Mean Sq F value Pr(>F)  
operator     3   1.34  0.4467   4.204 0.0226 *
Residuals   16   1.70  0.1062                 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
> 
> ###########
> # The above can be used to estimate both fixed-effect and random-effect
> # in the one-way model.
> ##########
> 
> library(ggplot2)
> ggplot(pulp, aes(x=operator, y=bright))+geom_point(position = position_jitter(width=0.1, height=0.0))
Warning messages:
1: In structure(NULL, class = "waiver") :
  Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
  Consider 'structure(list(), *)' instead.
2: In structure(NULL, class = "waiver") :
  Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
  Consider 'structure(list(), *)' instead.
3: In structure(NULL, class = "waiver") :
  Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
  Consider 'structure(list(), *)' instead.
4: In structure(NULL, class = "waiver") :
  Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
  Consider 'structure(list(), *)' instead.
5: In structure(NULL, class = "waiver") :
  Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
  Consider 'structure(list(), *)' instead.
6: In structure(NULL, class = "waiver") :
  Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
  Consider 'structure(list(), *)' instead.
7: In structure(NULL, class = "waiver") :
  Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
  Consider 'structure(list(), *)' instead.
8: In structure(NULL, class = "waiver") :
  Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
  Consider 'structure(list(), *)' instead.
> 
> ###########
> 
> coef(lmod)
(Intercept)   operator1   operator2   operator3 
      60.40       -0.16       -0.34        0.22 
> options(op)
> (0.447-0.106)/5
[1] 0.0682
> sqrt((0.447-0.106)/5)
[1] 0.2611513
> sqrt(0.1062)
[1] 0.3258834
> ###########
> # Understand the above estimators. It is based on the ANOVA method.
> ###########
> 
> library(lme4)
> mmod <- lmer(bright ~ 1+(1|operator), pulp)
> summary(mmod)
Linear mixed model fit by REML ['lmerMod']
Formula: bright ~ 1 + (1 | operator)
   Data: pulp

REML criterion at convergence: 18.6

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-1.4666 -0.7595 -0.1244  0.6281  1.6012 

Random effects:
 Groups   Name        Variance Std.Dev.
 operator (Intercept) 0.06808  0.2609  
 Residual             0.10625  0.3260  
Number of obs: 20, groups:  operator, 4

Fixed effects:
            Estimate Std. Error t value
(Intercept)  60.4000     0.1494   404.2
> 
> ###########
> # REML method. It is identical to the ANOVE method.
> ###########
> 
> sumary(mmod)
Fixed Effects:
coef.est  coef.se 
   60.40     0.15 

Random Effects:
 Groups   Name        Std.Dev.
 operator (Intercept) 0.26    
 Residual             0.33    
---
number of obs: 20, groups: operator, 4
AIC = 24.6, DIC = 14.4
deviance = 16.5 
> smod <- lmer(bright ~ 1+(1|operator), pulp, REML=FALSE)
> sumary(smod)
Fixed Effects:
coef.est  coef.se 
   60.40     0.13 

Random Effects:
 Groups   Name        Std.Dev.
 operator (Intercept) 0.21    
 Residual             0.33    
---
number of obs: 20, groups: operator, 4
AIC = 22.5, DIC = 16.5
deviance = 16.5 
> 
> ###########
> # The ML method
> # Q: what is the difference between the two methods.
> ###########
> 
> ###########
> # Section 2
> ###########
> 
> nullmod <- lm(bright ~ 1, pulp)
> y <- simulate(nullmod)
> t(y)
             1        2        3        4        5        6        7        8        9       10       11       12       13
sim_1 60.84266 61.09791 61.49466 60.49825 60.49319 60.77106 60.90779 60.51476 59.95737 60.26217 60.56345 60.46944 60.11805
           14       15       16       17       18       19       20
sim_1 60.9209 60.48808 60.19818 60.91522 60.52636 60.12637 60.45124
> 
> y <- simulate(nullmod)
> t(y)
             1        2        3        4        5        6        7        8        9      10      11       12       13
sim_1 60.09028 60.59748 60.35374 61.01448 60.87297 60.15631 60.41993 60.60805 59.90961 60.6126 60.4216 60.39088 60.28935
            14       15       16       17      18       19       20
sim_1 61.29074 60.34064 60.83731 59.82429 60.5684 59.89015 60.40686
> 
> y <- simulate(nullmod)
> t(y)
             1        2        3        4        5        6        7        8        9       10       11       12       13
sim_1 61.27655 60.86062 60.35686 60.46518 60.27082 60.86865 60.69358 60.24239 60.21036 60.10724 59.73932 60.21544 60.78132
           14       15       16       17       18       19       20
sim_1 61.0222 59.84705 60.48705 60.06609 60.66942 60.20886 60.61996

> lrtstat <- as.numeric(2*(logLik(smod)-logLik(nullmod)))
> pvalue <- pchisq(lrtstat,1,lower=FALSE)
> 
> ############
> # Test for random-effects. It is based on
> # the likelihood ratio test, but the method has
> # defects.
> ############
> 
> data.frame(lrtstat, pvalue)
   lrtstat    pvalue
1 2.568371 0.1090199
> y <- simulate(nullmod)
> lrstat <- numeric(1000)
> #  set.seed(123)
> 
> tt1 <- Sys.time()
> for(i in 1:1000){
+    y <- unlist(simulate(nullmod))
+    bnull <- lm(y ~ 1)
+    balt <- lmer(y ~ 1 + (1|operator), pulp, REML=FALSE)
+    lrstat[i] <- as.numeric(2*(logLik(balt)-logLik(bnull)))
+   }
> tt2 <- Sys.time()
> tt2-tt1
Time difference of 16.2553 secs
> 
> mean(lrstat < 0.00001)
[1] 0.691
> mean(lrstat > 2.5684)
[1] 0.02
> sqrt(0.019*0.981/1000)
[1] 0.004317291
> 
> ###########
> # The above is the bootstrap method.
> # This method is time-consuming.
> ###########
> 
> library(RLRsim)
> exactLRT(smod, nullmod)
No restrictions on fixed effects. REML-based inference preferable.

        simulated finite sample distribution of LRT. (p-value based on 10000 simulated values)

data:  
LRT = 2.5684, p-value = 0.0226

> exactRLRT(mmod)

        simulated finite sample distribution of RLRT.
        
        (p-value based on 10000 simulated values)

data:  
RLRT = 3.4701, p-value = 0.0214

Warning message:
In deviance.merMod(object, ...) :
  deviance() is deprecated for REML fits; use REMLcrit for the REML criterion or deviance(.,REML=FALSE) for deviance calculated at the REML fit
> VarCorr(mmod)
 Groups   Name        Std.Dev.
 operator (Intercept) 0.26093 
 Residual             0.32596 
> as.data.frame(VarCorr(mmod))
       grp        var1 var2       vcov     sdcor
1 operator (Intercept) <NA> 0.06808333 0.2609278
2 Residual        <NA> <NA> 0.10625000 0.3259601
> bsd <- numeric(1000)
> for(i in 1:1000){
+   y <- unlist(simulate(mmod))
+   bmod <- refit(mmod, y)
+   bsd[i] <- as.data.frame(VarCorr(bmod))$sdcor[1]
+ }
> quantile(bsd, c(0.025, 0.975))
     2.5%     97.5% 
0.0000000 0.5128652 
> confint(mmod, method="boot")
Computing bootstrap confidence intervals ...
                 2.5 %     97.5 %
.sig01       0.0000000  0.4871901
.sigma       0.2145954  0.4454772
(Intercept) 60.1326163 60.6914485
> 
> #############
> # The above is a convenient way by R.
> # We can use confidence intervals to test.
> ############
> 
> ##########
> # Section 3
> ##########
> 
> ranef(mmod)$operator
  (Intercept)
a  -0.1219403
b  -0.2591231
c   0.1676679
d   0.2133955
> (cc <- model.tables(lmod))
Tables of effects

 operator 
operator
    a     b     c     d 
-0.16 -0.34  0.22  0.28 
> cc[[1]]$operator/ranef(mmod)$operator
  (Intercept)
a    1.312118
b    1.312118
c    1.312118
d    1.312118
> library(lattice)
> dotplot(ranef(mmod, condVar=TRUE))
$operator

> fixef(mmod)+ranef(mmod)$operator
  (Intercept)
a    60.27806
b    60.14088
c    60.56767
d    60.61340
> 
> #############
> # Prediction is based on Bayesian way.
> # Posterior mean is the predicted value.
> #############
> 
> ###########
> # Section 4
> ###########
> 
> predict(mmod, re.form=~0)[1]
   1 
60.4 
> predict(mmod, newdata=data.frame(operator="a"))
       1 
60.27806 
> group.sd <- as.data.frame(VarCorr(mmod))$sdcor[1]
> resid.sd <- as.data.frame(VarCorr(mmod))$sdcor[2]
> 
> ############
> # Prediction of the observed values.
> # It exactly follows the model.
> ###########
> 
> pv <- numeric(1000)
> for(i in 1:1000){
+  y <- unlist(simulate(mmod))
+  bmod <- refit(mmod, y)
+  pv[i] <- predict(bmod, re.form=~0)[1] + rnorm(n=1,sd=group.sd) + rnorm(n=1,sd=resid.sd)
+ }
> quantile(pv, c(0.025, 0.975))
    2.5%    97.5% 
59.52879 61.26854 
> 
> ###########
> # It is a bootstrap method for confidence interval of the prediction.
> # It is the CI for the observed value.
> ###########
> 
> for(i in 1:1000){
+  y <- unlist(simulate(mmod, use.u=TRUE))
+  bmod <- refit(mmod, y)
+  pv[i] <- predict(bmod, newdata=data.frame(operator="a")) + rnorm(n=1,sd=resid.sd)
+ }
> quantile(pv, c(0.025, 0.975))
    2.5%    97.5% 
59.69448 60.97581 
> 
> ##########
> # it is the CI for the mean value.
> ##########
> 
> ############
> # Section 5
> ############
> 
> qqnorm(residuals(mmod),main="")
> plot(fitted(mmod),residuals(mmod),xlab="Fitted",ylab="Residuals")
> abline(h=0)
> 
> ###########
> # Residual plots.
> # We want to see whether it is random.
> ###########
> 
> ###########
> # Section 6
> ###########
> 
> library(faraway)
> data(penicillin)
> penicillin
   treat  blend yield
1      A Blend1    89
2      B Blend1    88
3      C Blend1    97
4      D Blend1    94
5      A Blend2    84
6      B Blend2    77
7      C Blend2    92
8      D Blend2    79
9      A Blend3    81
10     B Blend3    87
11     C Blend3    87
12     D Blend3    85
13     A Blend4    87
14     B Blend4    92
15     C Blend4    89
16     D Blend4    84
17     A Blend5    79
18     B Blend5    81
19     C Blend5    80
20     D Blend5    88
> 
> #############
> # Treatment must be fixed, but block can be random.
> ############
> 
> summary(penicillin)
 treat    blend       yield   
 A:5   Blend1:4   Min.   :77  
 B:5   Blend2:4   1st Qu.:81  
 C:5   Blend3:4   Median :87  
 D:5   Blend4:4   Mean   :86  
       Blend5:4   3rd Qu.:89  
                  Max.   :97  
> penicillin$Blend <- gl(5,4)
> gl(5,4)
 [1] 1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4 5 5 5 5
Levels: 1 2 3 4 5
> 
> penicillin
   treat  blend yield Blend
1      A Blend1    89     1
2      B Blend1    88     1
3      C Blend1    97     1
4      D Blend1    94     1
5      A Blend2    84     2
6      B Blend2    77     2
7      C Blend2    92     2
8      D Blend2    79     2
9      A Blend3    81     3
10     B Blend3    87     3
11     C Blend3    87     3
12     D Blend3    85     3
13     A Blend4    87     4
14     B Blend4    92     4
15     C Blend4    89     4
16     D Blend4    84     4
17     A Blend5    79     5
18     B Blend5    81     5
19     C Blend5    80     5
20     D Blend5    88     5
> summary(penicillin)
 treat    blend       yield    Blend
 A:5   Blend1:4   Min.   :77   1:4  
 B:5   Blend2:4   1st Qu.:81   2:4  
 C:5   Blend3:4   Median :87   3:4  
 D:5   Blend4:4   Mean   :86   4:4  
       Blend5:4   3rd Qu.:89   5:4  
                  Max.   :97        
> 
> ###########
> 
> ggplot(penicillin, aes(y=yield, x=treat, shape=Blend))+geom_point()+xlab("Treatment")
There were 12 warnings (use warnings() to see them)
> ggplot(penicillin, aes(y=yield, x=Blend, shape=treat))+geom_point()
There were 12 warnings (use warnings() to see them)
> 
> ############
> # Look at the above plots.
> ############
> 
> op <- options(contrasts=c("contr.sum", "contr.poly"))
> lmod <- aov(yield ~ blend + treat, penicillin)
> summary(lmod)
            Df Sum Sq Mean Sq F value Pr(>F)  
blend        4    264   66.00   3.504 0.0407 *
treat        3     70   23.33   1.239 0.3387  
Residuals   12    226   18.83                 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
> coef(lmod)
(Intercept)      blend1      blend2      blend3      blend4      treat1      treat2      treat3 
         86           6          -3          -1           2          -2          -1           3 
> 
> ##########
> # We can use ANOVA method to estimate.
> # Q: how to derive the formula?
> ##########
> 
> mmod <- lmer(yield ~ treat + (1|blend), penicillin)
> sumary(mmod)
Fixed Effects:
            coef.est coef.se
(Intercept) 86.00     1.82  
treat1      -2.00     1.68  
treat2      -1.00     1.68  
treat3       3.00     1.68  

Random Effects:
 Groups   Name        Std.Dev.
 blend    (Intercept) 3.43    
 Residual             4.34    
---
number of obs: 20, groups: blend, 5
AIC = 118.6, DIC = 128
deviance = 117.3 
> 
> ###########
> # This is given by R. The result of REML is identical to that of the ANOVA
> ###########
> 
> options(op)
> ranef(mmod)$blend
       (Intercept)
Blend1   4.2878788
Blend2  -2.1439394
Blend3  -0.7146465
Blend4   1.4292929
Blend5  -2.8585859
> amod <- aov(yield ~ treat + Error(blend), penicillin)
> summary(amod)

Error: blend
          Df Sum Sq Mean Sq F value Pr(>F)
Residuals  4    264      66               

Error: Within
          Df Sum Sq Mean Sq F value Pr(>F)
treat      3     70   23.33   1.239  0.339
Residuals 12    226   18.83               
> anova(mmod)
Analysis of Variance Table
      Df Sum Sq Mean Sq F value
treat  3     70  23.333  1.2389
> 
> 
> library(pbkrtest)
> amod <- lmer(yield ~ treat + (1|blend), penicillin, REML=FALSE)
> nmod <- lmer(yield ~ 1 + (1|blend), penicillin, REML=FALSE)
> KRmodcomp(amod, nmod)
F-test with Kenward-Roger approximation; computing time: 0.03 sec.
large : yield ~ treat + (1 | blend)
small : yield ~ 1 + (1 | blend)
          stat      ndf      ddf F.scaling p.value
Ftest -11.9128   3.0000   2.5438         1       1
Warning message:
In deviance.merMod(object, ...) :
  deviance() is deprecated for REML fits; use REMLcrit for the REML criterion or deviance(.,REML=FALSE) for deviance calculated at the REML fit
> 
> aa <- KRmodcomp(amod, nmod)
Warning message:
In deviance.merMod(object, ...) :
  deviance() is deprecated for REML fits; use REMLcrit for the REML criterion or deviance(.,REML=FALSE) for deviance calculated at the REML fit
> names(aa)
[1] "test"    "type"    "aux"     "stats"   "f.large" "f.small" "ctime"   "L"      
> aa$stats$Fstat
[1] -11.9128

> ######
> # Boostrap
> ######
> 
> run <- 1000
> lstat <- rep(0,run)
> for(i in 1:run){
+  y <- unlist(simulate(nmod, use.u=TRUE))
+  mod.a <- lmer(yield ~ treat + (1|blend), penicillin, REML=FALSE)
+  mod.n <- lmer(yield ~ 1 + (1|blend), penicillin, REML=FALSE)
+  aa <- KRmodcomp(amod, nmod)
+  lstat[i] <- aa$stats$Fstat
+ }
There were 50 or more warnings (use warnings() to see the first 50)
> 
> mean(lstat[i]>aa$stats$Fstat)
> ##########
> # The observed likelihood ratio statistic for the test of 
> # fixed-effects.
> ##########
> 
> as.numeric(2*(logLik(amod)-logLik(nmod)))
[1] 4.047367
> 
> 1-pchisq(4.0474,3)
[1] 0.2563911
> 
> ##########
> # Test for fixed effects.
> # The p-value from the 
> # chi-sqiare distribution.
> ##########
> 
> lrstat <- numeric(1000)
> for(i in 1:1000){
+ ryield <- unlist(simulate(nmod))
+ nmodr <- refit(nmod, ryield)
+ amodr <- refit(amod, ryield)
+ lrstat[i] <- 2*(logLik(amodr)-logLik(nmodr))
+ }
> mean(lrstat > 4.0474)
[1] 0.342
> 
> #############
> # The above is the bootstrap p-value. Both can be used.
> #############
> 
> pmod <- PBmodcomp(amod, nmod)
> pmod
Parametric bootstrap test; time: 16.79 sec; samples: 1000 extremes: 335;
large : yield ~ treat + (1 | blend)
small : yield ~ 1 + (1 | blend)
         stat df p.value
LRT    4.0474  3  0.2564
PBtest 4.0474     0.3357
> summary(pmod)
Parametric bootstrap test; time: 16.79 sec; samples: 1000 extremes: 335;
large : yield ~ treat + (1 | blend)
small : yield ~ 1 + (1 | blend)
           stat     df    ddf p.value
PBtest   4.0474                0.3357
Gamma    4.0474                0.3477
Bartlett 3.2879 3.0000         0.3493
F        1.3491 3.0000 2.7427  0.4154
LRT      4.0474 3.0000         0.2564

> 
> ########
> 
> rmod <- lmer(yield ~ treat + (1|blend), penicillin)
> nlmod <- lm(yield ~ treat, penicillin)
> as.numeric(2*(logLik(rmod)-logLik(nlmod,REML=TRUE)))
[1] 2.762908
> 1-pchisq(2.7629,1)
[1] 0.09647321
> 
> ###########
> # This is the observed likelihood ratio statistic
> # for the test of random-effect.
> # We do not recommend chi-square test.
> ##########
> 
> lrstatf <- numeric(1000)
> for(i in 1:1000){
+    ryield <-  unlist(simulate(nlmod))
+    nlmodr <- lm(ryield ~ treat, penicillin)
+    rmodr <- refit(rmod, ryield)
+    lrstatf[i] <- 2*(logLik(rmodr)-logLik(nlmodr,REML=TRUE))
+   }
> mean(lrstatf < 0.00001)
[1] 0.998
> mean(lrstatf > 2.7629)
[1] 0.001
> summary(lrstatf)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-18.169 -15.900 -15.038 -14.707 -14.001   3.997 
> 
> library(RLRsim)
> exactRLRT(rmod)

        simulated finite sample distribution of RLRT.
        
        (p-value based on 10000 simulated values)

data:  
RLRT = 2.7629, p-value = 0.039

Warning message:
In deviance.merMod(object, ...) :
  deviance() is deprecated for REML fits; use REMLcrit for the REML criterion or deviance(.,REML=FALSE) for deviance calculated at the REML fit
> 
> ###############
> # Test for random-effects. The bootstrap method is recommended.
> ###############
> # Now, we look at the ML method.
> ###############
> 
> pmod <- PBmodcomp(amod, nmod)
> pmod
Parametric bootstrap test; time: 16.60 sec; samples: 1000 extremes: 341;
large : yield ~ treat + (1 | blend)
small : yield ~ 1 + (1 | blend)
         stat df p.value
LRT    4.0474  3  0.2564
PBtest 4.0474     0.3417
> summary(pmod)
Parametric bootstrap test; time: 16.60 sec; samples: 1000 extremes: 341;
large : yield ~ treat + (1 | blend)
small : yield ~ 1 + (1 | blend)
           stat     df    ddf p.value
PBtest   4.0474                0.3417
Gamma    4.0474                0.3547
Bartlett 3.2521 3.0000         0.3544
F        1.3491 3.0000 2.7316  0.4159
LRT      4.0474 3.0000         0.2564

> 
> ##########
> 
> rmod <- lmer(yield ~ treat + (1|blend), penicillin,REML=FALSE)
> nlmod <- lm(yield ~ treat, penicillin)
> as.numeric(2*(logLik(rmod,REML=FALSE)-logLik(nlmod,REML=FALSE)))
[1] 3.453634
> 1-pchisq(3.4536,1)
[1] 0.06311416
> 
> ###########
> 
> lrstatf <- numeric(1000)
> for(i in 1:1000){
+    ryield <-  unlist(simulate(nlmod))
+    nlmodr <- lm(ryield ~ treat, penicillin,REML=FALSE)
+    rmodr <- refit(rmod, ryield)
+    lrstatf[i] <- 2*(logLik(rmodr)-logLik(nlmodr,REML=FALSE))
+   }
There were 50 or more warnings (use warnings() to see the first 50)
> mean(lrstatf < 0.00001)
[1] 0.552
> mean(lrstatf > 3.4536)
[1] 0.042
> summary(lrstatf)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.0000  0.0000  0.0000  0.5259  0.3443 10.2014 
> 
> library(RLRsim)
> exactRLRT(rmod)
Using restricted likelihood evaluated at ML estimators.
Refit with method="REML" for exact results.

        simulated finite sample distribution of RLRT.
        
        (p-value based on 10000 simulated values)

data:  
RLRT = 2.7629, p-value = 0.0384

> 
> ###########
> # The above is the ML method.
> ###########
> 
> ##########
> # Section 7
> ##########
> 
> library(faraway)
> data(irrigation)
> irrigation
   field irrigation variety yield
1     f1         i1      v1  35.4
2     f1         i1      v2  37.9
3     f2         i2      v1  36.7
4     f2         i2      v2  38.2
5     f3         i3      v1  34.8
6     f3         i3      v2  36.4
7     f4         i4      v1  39.5
8     f4         i4      v2  40.0
9     f5         i1      v1  41.6
10    f5         i1      v2  40.3
11    f6         i2      v1  42.7
12    f6         i2      v2  41.6
13    f7         i3      v1  43.6
14    f7         i3      v2  42.8
15    f8         i4      v1  44.5
16    f8         i4      v2  47.6
> summary(irrigation)
     field   irrigation variety     yield      
 f1     :2   i1:4       v1:8    Min.   :34.80  
 f2     :2   i2:4       v2:8    1st Qu.:37.60  
 f3     :2   i3:4               Median :40.15  
 f4     :2   i4:4               Mean   :40.23  
 f5     :2                      3rd Qu.:42.73  
 f6     :2                      Max.   :47.60  
 (Other):4                                     
> ggplot(irrigation, aes(y=yield, x=field, shape=irrigation, color=variety)) + geom_point()
There were 16 warnings (use warnings() to see them)
> 
> ############
> # Look at field and irrigation. They are confounded.
> # We cannot use the fixed-effect model for all of the variables. We need to fit a 
> # random effect model.
> ############
> 
> lmer(yield ~ irrigation * variety + (1|field) +(1|field:variety),  irrigation)
Error: number of levels of each grouping factor must be < number of observations
> 
> ############
> # The model fails.
> ############
> 
> lmod <- lmer(yield ~ irrigation * variety + (1|field), irrigation)
> sumary(lmod)
Fixed Effects:
                     coef.est coef.se
(Intercept)          40.23     1.47  
irrigation1          -1.43     2.54  
irrigation2          -0.42     2.54  
irrigation3          -0.83     2.54  
variety1             -0.37     0.36  
irrigation1:variety1  0.08     0.63  
irrigation2:variety1  0.27     0.63  
irrigation3:variety1  0.18     0.63  

Random Effects:
 Groups   Name        Std.Dev.
 field    (Intercept) 4.02    
 Residual             1.45    
---
number of obs: 16, groups: field, 8
AIC = 76.5, DIC = 80.7
deviance = 68.6 
> 
> ###########
> # This model avoids the problem.
> ###########
> 
> library(pbkrtest)
> lmoda <- lmer(yield ~ irrigation + variety + (1|field),data=irrigation)
> summary(lmoda)
Linear mixed model fit by REML ['lmerMod']
Formula: yield ~ irrigation + variety + (1 | field)
   Data: irrigation

REML criterion at convergence: 59

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-0.8751 -0.5615 -0.1090  0.6889  1.0931 

Random effects:
 Groups   Name        Variance Std.Dev.
 field    (Intercept) 16.541   4.067   
 Residual              1.426   1.194   
Number of obs: 16, groups:  field, 8

Fixed effects:
            Estimate Std. Error t value
(Intercept)  40.2250     1.4686  27.390
irrigation1  -1.4250     2.5437  -0.560
irrigation2  -0.4250     2.5437  -0.167
irrigation3  -0.8250     2.5437  -0.324
variety1     -0.3750     0.2985  -1.256

Correlation of Fixed Effects:
            (Intr) irrgt1 irrgt2 irrgt3
irrigation1  0.000                     
irrigation2  0.000 -0.333              
irrigation3  0.000 -0.333 -0.333       
variety1     0.000  0.000  0.000  0.000
> KRmodcomp(lmod, lmoda)
F-test with Kenward-Roger approximation; computing time: 0.05 sec.
large : yield ~ irrigation * variety + (1 | field)
small : yield ~ irrigation + variety + (1 | field)
         stat     ndf     ddf F.scaling p.value
Ftest  0.2032  3.0000 15.2469         1  0.8926
Warning message:
In deviance.merMod(object, ...) :
  deviance() is deprecated for REML fits; use REMLcrit for the REML criterion or deviance(.,REML=FALSE) for deviance calculated at the REML fit
> 
> #############
> # We want to test the interaction effect.
> #############
> 
> lmodi <- lmer(yield ~ irrigation + (1|field), irrigation)
> KRmodcomp(lmoda, lmodi)
F-test with Kenward-Roger approximation; computing time: 0.05 sec.
large : yield ~ irrigation + variety + (1 | field)
small : yield ~ irrigation + (1 | field)
        stat    ndf    ddf F.scaling p.value
Ftest  1.403  1.000 56.283         1  0.2412
Warning message:
In deviance.merMod(object, ...) :
  deviance() is deprecated for REML fits; use REMLcrit for the REML criterion or deviance(.,REML=FALSE) for deviance calculated at the REML fit
> 
> ##########
> # Test the variaty main effect.
> ##########
> 
> lmodv <- lmer(yield ~  variety + (1|field), irrigation)
> KRmodcomp(lmoda, lmodv)
F-test with Kenward-Roger approximation; computing time: 0.03 sec.
large : yield ~ irrigation + variety + (1 | field)
small : yield ~ variety + (1 | field)
        stat    ndf    ddf F.scaling p.value
Ftest 0.3653 3.0000 4.6228         1  0.7821
Warning message:
In deviance.merMod(object, ...) :
  deviance() is deprecated for REML fits; use REMLcrit for the REML criterion or deviance(.,REML=FALSE) for deviance calculated at the REML fit
> 
> ############
> # Test the irrigation main effect.
> ###########
> 
> plot(fitted(lmod),residuals(lmod),xlab="Fitted",ylab="Residuals")
> qqnorm(residuals(lmod),main="")
> 
> ###########
> # Look at the residual plot.
> ###########
> 
> library(RLRsim)
> exactRLRT(lmod)

        simulated finite sample distribution of RLRT.
        
        (p-value based on 10000 simulated values)

data:  
RLRT = 6.1118, p-value = 0.0089

Warning message:
In deviance.merMod(object, ...) :
  deviance() is deprecated for REML fits; use REMLcrit for the REML criterion or deviance(.,REML=FALSE) for deviance calculated at the REML fit
> 
> #############
> # Bootstrap p-value for the random effect.
> ############
> 
> mod <- lm(yield ~ irrigation * variety, data=irrigation)
> anova(mod)
Analysis of Variance Table

Response: yield
                   Df Sum Sq Mean Sq F value Pr(>F)
irrigation          3  40.19 13.3967  0.7318 0.5615
variety             1   2.25  2.2500  0.1229 0.7350
irrigation:variety  3   1.55  0.5167  0.0282 0.9931
Residuals           8 146.46 18.3075               
> 
> ############
> # The fixed-effect model
> ###########
> 
> 
> ##########
> # Section 8: This is the nested model. Very important.
> ##########
> 
> library(faraway)
> data(eggs)
> eggs
    Fat Lab Technician Sample
1  0.62   I        one      G
2  0.55   I        one      G
3  0.34   I        one      H
4  0.24   I        one      H
5  0.80   I        two      G
6  0.68   I        two      G
7  0.76   I        two      H
8  0.65   I        two      H
9  0.30  II        one      G
10 0.40  II        one      G
11 0.33  II        one      H
12 0.43  II        one      H
13 0.39  II        two      G
14 0.40  II        two      G
15 0.29  II        two      H
16 0.18  II        two      H
17 0.46 III        one      G
18 0.38 III        one      G
19 0.27 III        one      H
20 0.37 III        one      H
21 0.37 III        two      G
22 0.42 III        two      G
23 0.45 III        two      H
24 0.54 III        two      H
25 0.18  IV        one      G
26 0.47  IV        one      G
27 0.53  IV        one      H
28 0.32  IV        one      H
29 0.40  IV        two      G
30 0.37  IV        two      G
31 0.31  IV        two      H
32 0.43  IV        two      H
33 0.35   V        one      G
34 0.39   V        one      G
35 0.37   V        one      H
36 0.33   V        one      H
37 0.42   V        two      G
38 0.36   V        two      G
39 0.20   V        two      H
40 0.41   V        two      H
41 0.37  VI        one      G
42 0.43  VI        one      G
43 0.28  VI        one      H
44 0.36  VI        one      H
45 0.18  VI        two      G
46 0.20  VI        two      G
47 0.26  VI        two      H
48 0.06  VI        two      H
> summary(eggs)
      Fat          Lab    Technician Sample
 Min.   :0.0600   I  :8   one:24     G:24  
 1st Qu.:0.3075   II :8   two:24     H:24  
 Median :0.3700   III:8                    
 Mean   :0.3875   IV :8                    
 3rd Qu.:0.4300   V  :8                    
 Max.   :0.8000   VI :8                    
> 
> ############
> # Q: waht means nested.
> ############
> 
> library(ggplot2)
> ggplot(eggs, aes(y=Fat, x=Lab, color=Technician, shape=Sample)) + geom_point(position = position_jitter(width=0.1, height=0.0))+scale_color_grey()
There were 16 warnings (use warnings() to see them)
> 
> #############
> 
> cmod <- lmer(Fat ~ 1 + (1|Lab) + (1|Lab:Technician) +  (1|Lab:Technician:Sample), data=eggs)
> sumary(cmod)
Fixed Effects:
coef.est  coef.se 
    0.39     0.04 

Random Effects:
 Groups                Name        Std.Dev.
 Lab:Technician:Sample (Intercept) 0.06    
 Lab:Technician        (Intercept) 0.08    
 Lab                   (Intercept) 0.08    
 Residual                          0.08    
---
number of obs: 48, groups: Lab:Technician:Sample, 24; Lab:Technician, 12; Lab, 6
AIC = -54.2, DIC = -73.3
deviance = -68.8 
> cmod
Linear mixed model fit by REML ['lmerMod']
Formula: Fat ~ 1 + (1 | Lab) + (1 | Lab:Technician) + (1 | Lab:Technician:Sample)
   Data: eggs
REML criterion at convergence: -64.2351
Random effects:
 Groups                Name        Std.Dev.
 Lab:Technician:Sample (Intercept) 0.05536 
 Lab:Technician        (Intercept) 0.08355 
 Lab                   (Intercept) 0.07694 
 Residual                          0.08483 
Number of obs: 48, groups:  Lab:Technician:Sample, 24; Lab:Technician, 12; Lab, 6
Fixed Effects:
(Intercept)  
     0.3875  
> 
> ###########
> # Estimation of the nested model.
> ###########
> 
> cmodr <- lmer(Fat ~ 1 + (1|Lab) + (1|Lab:Technician), data=eggs)
> summary(cmodr)
Linear mixed model fit by REML ['lmerMod']
Formula: Fat ~ 1 + (1 | Lab) + (1 | Lab:Technician)
   Data: eggs

REML criterion at convergence: -62.6

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.17800 -0.42424  0.08043  0.67361  1.77543 

Random effects:
 Groups         Name        Variance Std.Dev.
 Lab:Technician (Intercept) 0.008002 0.08945 
 Lab            (Intercept) 0.005920 0.07694 
 Residual                   0.009239 0.09612 
Number of obs: 48, groups:  Lab:Technician, 12; Lab, 6

Fixed effects:
            Estimate Std. Error t value
(Intercept)  0.38750    0.04296   9.019
> cmodr
Linear mixed model fit by REML ['lmerMod']
Formula: Fat ~ 1 + (1 | Lab) + (1 | Lab:Technician)
   Data: eggs
REML criterion at convergence: -62.6317
Random effects:
 Groups         Name        Std.Dev.
 Lab:Technician (Intercept) 0.08945 
 Lab            (Intercept) 0.07694 
 Residual                   0.09612 
Number of obs: 48, groups:  Lab:Technician, 12; Lab, 6
Fixed Effects:
(Intercept)  
     0.3875  
> 
> ##########
> # Another model
> ##########
> 
> lrstat <- numeric(1000)
> 
> tt1 <- Sys.time()
> for(i in 1:1000){
+    rFat <- unlist(simulate(cmodr))
+    nmod <- lmer(rFat ~ 1 + (1|Lab) + (1|Lab:Technician), data=eggs)
+    amod <- lmer(rFat ~ 1 + (1|Lab) + (1|Lab:Technician) +
+    (1|Lab:Technician:Sample), data=eggs)
+    lrstat[i] <- 2*(logLik(amod)-logLik(nmod))
+   }
> tt2 <- Sys.time()
> tt2-tt1
Time difference of 1.054307 mins
> 
> mean(lrstat > 2*(logLik(cmod)-logLik(cmodr)))
[1] 0.092
> 2*(logLik(cmod)-logLik(cmodr))
'log Lik.' 1.603423 (df=5)
> 
> #############
> # Bootstrap p-value
> #############
> 
> library(RLRsim)
> cmods <- lmer(Fat ~ 1 + (1|Lab:Technician:Sample), data=eggs)
> VarCorr(cmodr)
 Groups         Name        Std.Dev.
 Lab:Technician (Intercept) 0.089452
 Lab            (Intercept) 0.076941
 Residual                   0.096119
> confint(cmod, method="boot")
Computing bootstrap confidence intervals ...
                 2.5 %     97.5 %
.sig01      0.00000000 0.09274127
.sig02      0.00000000 0.14236671
.sig03      0.00000000 0.15090252
.sigma      0.06076966 0.10462162
(Intercept) 0.30284233 0.46802929
> 
> ###########
> # Also, the bootstrap method for the test.
> ###########
> 
> ##########
> # Section 9: Not nested model.
> ##########
> 
> library(faraway)
> data(abrasion)
> abrasion
   run position material wear
1    1        1        C  235
2    1        2        D  236
3    1        3        B  218
4    1        4        A  268
5    2        1        A  251
6    2        2        B  241
7    2        3        D  227
8    2        4        C  229
9    3        1        D  234
10   3        2        C  273
11   3        3        A  274
12   3        4        B  226
13   4        1        B  195
14   4        2        A  270
15   4        3        C  230
16   4        4        D  225
> 
> matrix(abrasion$material,4,4)
     [,1] [,2] [,3] [,4]
[1,] "C"  "A"  "D"  "B" 
[2,] "D"  "B"  "C"  "A" 
[3,] "B"  "D"  "A"  "C" 
[4,] "A"  "C"  "B"  "D" 
> library(ggplot2)
> ggplot(abrasion,aes(x=material, y=wear, shape=run, color=position))+geom_point(position = position_jitter(width=0.1, height=0.0))+scale_color_grey()
There were 16 warnings (use warnings() to see them)

> ###############
> 
> lmod <- aov(wear ~ material + run + position, abrasion)
> summary(lmod)
            Df Sum Sq Mean Sq F value  Pr(>F)    
material     3   4622  1540.5  25.151 0.00085 ***
run          3    987   328.8   5.369 0.03901 *  
position     3   1469   489.5   7.992 0.01617 *  
Residuals    6    367    61.2                    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
> 
> mmod <- lmer(wear ~ material + (1|run) + (1|position), abrasion)
> 
> 
> 
> ############
> 
> mmod <- lmer(wear ~ material + (1|run) + (1|position), abrasion)
> sumary(mmod)
Fixed Effects:
            coef.est coef.se
(Intercept) 239.50     6.88 
material1    26.25     3.39 
material2   -19.50     3.39 
material3     2.25     3.39 

Random Effects:
 Groups   Name        Std.Dev.
 run      (Intercept)  8.18   
 position (Intercept) 10.35   
 Residual              7.83   
---
number of obs: 16, groups: run, 4; position, 4
AIC = 117, DIC = 137.6
deviance = 120.3 
> 
> #############
> 
> library(RLRsim)
Warning message:
package ‘RLRsim’ was built under R version 3.4.4 
> mmodp <- lmer(wear ~ material + (1|position), abrasion)
> mmodr <- lmer(wear ~ material + (1|run), abrasion)
> exactRLRT(mmodp, mmod, mmodr)

        simulated finite sample distribution of RLRT.
        
        (p-value based on 10000 simulated values)

data:  
RLRT = 4.5931, p-value = 0.0123

Warning message:
In deviance.merMod(object, ...) :
  deviance() is deprecated for REML fits; use REMLcrit for the REML criterion or deviance(.,REML=FALSE) for deviance calculated at the REML fit
> exactRLRT(mmodr, mmod, mmodp)

        simulated finite sample distribution of RLRT.
        
        (p-value based on 10000 simulated values)

data:  
RLRT = 3.0459, p-value = 0.0332

Warning message:
In deviance.merMod(object, ...) :
  deviance() is deprecated for REML fits; use REMLcrit for the REML criterion or deviance(.,REML=FALSE) for deviance calculated at the REML fit
> 
> ################
> 
> library(pbkrtest)
> mmod <- lmer(wear ~ material + (1|run) + (1|position), abrasion,REML=FALSE)
> nmod <- lmer(wear ~ 1+ (1|run) + (1|position), abrasion,REML=FALSE)
> KRmodcomp(mmod, nmod)
F-test with Kenward-Roger approximation; computing time: 0.06 sec.
large : wear ~ material + (1 | run) + (1 | position)
small : wear ~ 1 + (1 | run) + (1 | position)
         stat     ndf     ddf F.scaling p.value
Ftest -54.567   3.000   2.409         1       1
Warning message:
In deviance.merMod(object, ...) :
  deviance() is deprecated for REML fits; use REMLcrit for the REML criterion or deviance(.,REML=FALSE) for deviance calculated at the REML fit