載入套件
library(pacman)
## Warning: 套件 'pacman' 是用 R 版本 4.2.3 來建造的
p_load(lme4,lmerTest,misty,multilevel,ggplot2,lattice)
資料前處理
讀進資料
並將 schID 視為類別變項(factor)
將 chem 以群體平均置中
dta <- read.csv('Chemistry100c.csv')
head(dta)
## schID stuID chem gender sciscore
## 1 1 1 5 M 6.875
## 2 1 2 3 M 5.875
## 3 1 3 5 M 8.000
## 4 1 4 5 F 6.625
## 5 1 5 4 M 6.125
## 6 1 6 5 M 7.250
dta$schID <- as.factor(dta$schID)
dta$chem_wc <- center(dta$chem, type = "CWC", cluster = dta$schID)
形成學校層次的資料
dta_m <- aggregate(chem~ schID,data = dta, mean)
names(dta_m) <- c('schID', 'G.chem')
head(dta_m)
## schID G.chem
## 1 1 3.478261
## 2 2 2.708333
## 3 3 3.360000
## 4 4 4.243243
## 5 5 3.285714
## 6 6 3.109091
dta <- merge(dta,dta_m, by="schID")
head(dta)
## schID stuID chem gender sciscore chem_wc G.chem
## 1 1 1 5 M 6.875 1.5217391 3.478261
## 2 1 2 3 M 5.875 -0.4782609 3.478261
## 3 1 3 5 M 8.000 1.5217391 3.478261
## 4 1 4 5 F 6.625 1.5217391 3.478261
## 5 1 5 4 M 6.125 0.5217391 3.478261
## 6 1 6 5 M 7.250 1.5217391 3.478261
計算化學平均分數的 ICC1, ICC2, Rwg
rsttemp <- aov(chem ~ schID,data=dta)
c(ICC1=ICC1(rsttemp),ICC2=ICC2(rsttemp))
## ICC1 ICC2
## 0.1191869 0.8335176
npoints <- 6
RwgOUT<-rwg(dta$chem,dta$schID,ranvar=(npoints*npoints-1)/12)
summary(RwgOUT)
## grpid rwg gsize
## Length:100 Min. :0.0000 Min. : 21.00
## Class :character 1st Qu.:0.1327 1st Qu.: 24.00
## Mode :character Median :0.2197 Median : 30.00
## Mean :0.2417 Mean : 37.00
## 3rd Qu.:0.3702 3rd Qu.: 40.25
## Max. :0.6253 Max. :188.00
第一部份:探索資料
畫圖
ggplot(data = dta, aes(x = chem_wc, y = sciscore, group = schID))+
stat_smooth(method = 'lm', se = F, color = 'lightgray') +
geom_point(size = 1) +
stat_smooth(aes(group = 1), method= 'lm', se = F, color = 'black') +
labs(x = 'chem', y = 'sciscore', title = 'schID')
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
分群體畫圖
ggplot(data = dta, aes(x = chem_wc, y = sciscore))+
geom_point(size = 1) +
stat_smooth(method = 'lm', se = F) +
facet_wrap( ~ schID)
## `geom_smooth()` using formula = 'y ~ x'
第二部份 正式分析
先只放階層一變項
m1 <- lmer(sciscore ~ chem_wc + ( 1 + chem_wc| schID ), data = dta )
summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: sciscore ~ chem_wc + (1 + chem_wc | schID)
## Data: dta
##
## REML criterion at convergence: 7125.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.8877 -0.5793 0.0685 0.6524 3.4552
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## schID (Intercept) 0.11874 0.3446
## chem_wc 0.00198 0.0445 0.07
## Residual 0.37140 0.6094
## Number of obs: 3700, groups: schID, 100
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 6.188178 0.036160 98.997764 171.13 <2e-16 ***
## chem_wc 0.340003 0.008392 74.520847 40.52 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## chem_wc 0.033
再放階層二變項
m2 <- lmer(sciscore ~ chem_wc + G.chem + chem_wc:G.chem+ ( 1 + chem_wc| schID ), data = dta )
summary(m2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: sciscore ~ chem_wc + G.chem + chem_wc:G.chem + (1 + chem_wc |
## schID)
## Data: dta
##
## REML criterion at convergence: 7089.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.8706 -0.5774 0.0788 0.6608 3.5011
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## schID (Intercept) 0.076605 0.27678
## chem_wc 0.001321 0.03634 -0.31
## Residual 0.371915 0.60985
## Number of obs: 3700, groups: schID, 100
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 5.20881 0.14423 98.72614 36.114 < 2e-16 ***
## chem_wc 0.24122 0.03988 65.69326 6.048 7.76e-08 ***
## G.chem 0.33636 0.04846 98.62462 6.941 4.16e-10 ***
## chem_wc:G.chem 0.03420 0.01352 65.68885 2.529 0.0139 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) chm_wc G.chem
## chem_wc -0.127
## G.chem -0.978 0.125
## chm_wc:G.ch 0.123 -0.980 -0.126
試試看去除效果
m2.1 <- lmer(sciscore ~ chem_wc + G.chem + chem_wc:G.chem+ ( 1 | schID ), data = dta )
anova(m2.1,m2)
## refitting model(s) with ML (instead of REML)
## Data: dta
## Models:
## m2.1: sciscore ~ chem_wc + G.chem + chem_wc:G.chem + (1 | schID)
## m2: sciscore ~ chem_wc + G.chem + chem_wc:G.chem + (1 + chem_wc | schID)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m2.1 6 7081.1 7118.4 -3534.5 7069.1
## m2 8 7081.4 7131.1 -3532.7 7065.4 3.6355 2 0.1624
summary(m2.1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: sciscore ~ chem_wc + G.chem + chem_wc:G.chem + (1 | schID)
## Data: dta
##
## REML criterion at convergence: 7093.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.8871 -0.5816 0.0773 0.6557 3.7315
##
## Random effects:
## Groups Name Variance Std.Dev.
## schID (Intercept) 0.07647 0.2765
## Residual 0.37464 0.6121
## Number of obs: 3700, groups: schID, 100
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 5.210e+00 1.442e-01 9.875e+01 36.133 < 2e-16 ***
## chem_wc 2.305e-01 3.468e-02 3.598e+03 6.646 3.46e-11 ***
## G.chem 3.358e-01 4.845e-02 9.865e+01 6.931 4.36e-10 ***
## chem_wc:G.chem 3.823e-02 1.174e-02 3.598e+03 3.256 0.00114 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) chm_wc G.chem
## chem_wc 0.000
## G.chem -0.978 0.000
## chm_wc:G.ch 0.000 -0.981 0.000
試試看去除效果
m2.2 <- lmer(sciscore ~ chem_wc + G.chem + ( 1 | schID ), data = dta )
anova(m2.2,m2.1)
## refitting model(s) with ML (instead of REML)
## Data: dta
## Models:
## m2.2: sciscore ~ chem_wc + G.chem + (1 | schID)
## m2.1: sciscore ~ chem_wc + G.chem + chem_wc:G.chem + (1 | schID)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m2.2 5 7089.6 7120.7 -3539.8 7079.6
## m2.1 6 7081.1 7118.4 -3534.5 7069.1 10.592 1 0.001136 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m2.2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: sciscore ~ chem_wc + G.chem + (1 | schID)
## Data: dta
##
## REML criterion at convergence: 7097.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.8986 -0.5827 0.0724 0.6550 3.5175
##
## Random effects:
## Groups Name Variance Std.Dev.
## schID (Intercept) 0.07644 0.2765
## Residual 0.37564 0.6129
## Number of obs: 3700, groups: schID, 100
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 5.210e+00 1.442e-01 9.875e+01 36.133 < 2e-16 ***
## chem_wc 3.412e-01 6.818e-03 3.599e+03 50.048 < 2e-16 ***
## G.chem 3.358e-01 4.845e-02 9.865e+01 6.931 4.36e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) chm_wc
## chem_wc 0.000
## G.chem -0.978 0.000
底下是第三部份,診斷(檢查假設)
學校隨機效果QQ圖,檢查隨機效果是否呈常態
qqmath(~ ranef(m2.2, condVar = T)$schID, type = c('p', 'g', 'r'), pch = '.',xlab = '常態分位數', ylab = '隨機效果分位數')
殘差的QQ圖,檢驗殘差是否呈常態
qqmath(~ resid(m2.2, scale = T), type = c('p', 'g', 'r'), pch = '.',xlab = '標準化常態分位數', ylab = '標準化殘差')
預測值對殘差圖,檢查殘差與獨變項無關的假設
m2.2_f <- fortify.merMod(m2.2)
ggplot(data = m2.2_f, aes(x = .fitted, y = .scresid)) +
geom_point(pch = '.') +
stat_smooth(method = 'loess', se = F) +
labs( x = '預測值', y = '標準化殘差')
## `geom_smooth()` using formula = 'y ~ x'