Author:
Yunbin Peng
Data set:
Audiometry & Demographic from NHANES 2005-2006 survey
https://wwwn.cdc.gov/Nchs/Nhanes/2005-2006/DEMO_D.XPT
https://wwwn.cdc.gov/Nchs/Nhanes/2005-2006/AUX_D.XPT
Description:
This script use linear regression function and lme4 package (for mixed model) in R to investigate how age, age group, gender affect hearing using NHANES survey.

library(sas7bdat)
library(lme4)
## Loading required package: Matrix
data = read.sas7bdat("datalong.sas7bdat")

# create categorical variables
data$GENDER = factor(data$GENDER)
data$AGEGROUP = factor(data$AGEGROUP)
data$FREQUENCY = factor(data$FREQUENCY)
data$RIGHTEAR = factor(data$RIGHTEAR)
summary(data)
##       SEQN       GENDER         AGE        AGEGROUP  RIGHTEAR  FREQUENCY  
##  Min.   :31129   0:18662   Min.   :12.00   0:27986   0:18634   500 :5324  
##  1st Qu.:33673   1:18606   1st Qu.:14.00   1: 9282   1:18634   1000:5324  
##  Median :36210             Median :17.00                       2000:5324  
##  Mean   :36226             Mean   :30.92                       3000:5324  
##  3rd Qu.:38781             3rd Qu.:19.00                       4000:5324  
##  Max.   :41469             Max.   :85.00                       6000:5324  
##                                                                8000:5324  
##      RESULT      
##  Min.   :-10.00  
##  1st Qu.:  5.00  
##  Median : 10.00  
##  Mean   : 16.71  
##  3rd Qu.: 20.00  
##  Max.   :120.00  
## 

c

i

# subset Right Ear Frequency 1000Hz
R1K = subset(data, (data$RIGHTEAR == 1) & (data$FREQUENCY == 1000))
model1 = lm(RESULT ~ GENDER*AGEGROUP, data = R1K)
summary(model1)
## 
## Call:
## lm(formula = RESULT ~ GENDER * AGEGROUP, data = R1K)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.758  -5.398  -0.398   4.602  84.602 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         5.3982     0.3372  16.011  < 2e-16 ***
## GENDER1            -0.1499     0.4750  -0.316 0.752326    
## AGEGROUP1          20.2177     0.6666  30.329  < 2e-16 ***
## GENDER1:AGEGROUP1   3.2919     0.9521   3.457 0.000554 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.62 on 2658 degrees of freedom
## Multiple R-squared:  0.4433, Adjusted R-squared:  0.4427 
## F-statistic: 705.5 on 3 and 2658 DF,  p-value: < 2.2e-16

Since coefficient for interaction term (GENDER1:AGEGROUP1) is significant, there is a significant interaction between age group and gender in determining how well an individual hears.

ii

model2 = lm(RESULT ~ AGE + AGEGROUP + GENDER, data = R1K)
summary(model2)
## 
## Call:
## lm(formula = RESULT ~ AGE + AGEGROUP + GENDER, data = R1K)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -30.110  -5.957  -0.957   4.236  86.017 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -5.94926    1.01388  -5.868 4.96e-09 ***
## AGE           0.70947    0.06266  11.322  < 2e-16 ***
## AGEGROUP1   -22.11708    3.90941  -5.657 1.70e-08 ***
## GENDER1       0.55491    0.40313   1.376    0.169    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.4 on 2658 degrees of freedom
## Multiple R-squared:  0.4665, Adjusted R-squared:  0.4659 
## F-statistic: 774.8 on 3 and 2658 DF,  p-value: < 2.2e-16

Even after controlling for age group, the coefficient of age is still significant.

iii

model3 = lm(RESULT ~ AGE + AGE*AGEGROUP + AGEGROUP, data = R1K)
summary(model3)
## 
## Call:
## lm(formula = RESULT ~ AGE + AGE * AGEGROUP + AGEGROUP, data = R1K)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -31.968  -5.394  -0.394   4.606  84.889 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     3.1295     1.5821   1.978    0.048 *  
## AGE             0.1416     0.1010   1.401    0.161    
## AGEGROUP1     -57.8639     6.3066  -9.175  < 2e-16 ***
## AGE:AGEGROUP1   0.9158     0.1280   7.152  1.1e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.3 on 2658 degrees of freedom
## Multiple R-squared:  0.4762, Adjusted R-squared:  0.4756 
## F-statistic: 805.6 on 3 and 2658 DF,  p-value: < 2.2e-16

Since the coefficient of AGE*AGEGROUP (0.9158) is statistically significant, the effect of age is different for different age groups.

d

part i

mixmodel1 = lmer(RESULT ~ GENDER*AGEGROUP + (1|FREQUENCY) + (1|RIGHTEAR), data = data)
summary(mixmodel1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: RESULT ~ GENDER * AGEGROUP + (1 | FREQUENCY) + (1 | RIGHTEAR)
##    Data: data
## 
## REML criterion at convergence: 302935.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.0705 -0.5764 -0.0207  0.4704  6.5333 
## 
## Random effects:
##  Groups    Name        Variance  Std.Dev.
##  FREQUENCY (Intercept)  28.16479  5.3071 
##  RIGHTEAR  (Intercept)   0.07585  0.2754 
##  Residual              198.23711 14.0797 
## Number of obs: 37268, groups:  FREQUENCY, 7; RIGHTEAR, 2
## 
## Fixed effects:
##                   Estimate Std. Error t value
## (Intercept)         7.5461     2.0188    3.74
## GENDER1            -0.8728     0.1683   -5.18
## AGEGROUP1          41.3207     0.2362  174.93
## GENDER1:AGEGROUP1  -5.6848     0.3374  -16.85
## 
## Correlation of Fixed Effects:
##             (Intr) GENDER1 AGEGRO
## GENDER1     -0.042               
## AGEGROUP1   -0.030  0.359        
## GENDER1:AGE  0.021 -0.499  -0.700

The coefficient of GENDER1:AGEGROUP1 is significant, indicating there is significant interaction between age group and gender.

part ii

mixmodel2 = lmer(RESULT ~ AGE + AGEGROUP + GENDER + (1|FREQUENCY) + (1|RIGHTEAR), data = data)
summary(mixmodel2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: 
## RESULT ~ AGE + AGEGROUP + GENDER + (1 | FREQUENCY) + (1 | RIGHTEAR)
##    Data: data
## 
## REML criterion at convergence: 302157.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.0516 -0.5685 -0.0335  0.4842  6.4031 
## 
## Random effects:
##  Groups    Name        Variance  Std.Dev.
##  FREQUENCY (Intercept)  28.16584  5.3071 
##  RIGHTEAR  (Intercept)   0.07607  0.2758 
##  Residual              194.11650 13.9326 
## Number of obs: 37268, groups:  FREQUENCY, 7; RIGHTEAR, 2
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) -3.11404    2.04783   -1.52
## AGE          0.73791    0.02245   32.88
## AGEGROUP1   -7.17565    1.40038   -5.12
## GENDER1     -2.40699    0.14441  -16.67
## 
## Correlation of Fixed Effects:
##           (Intr) AGE    AGEGRO
## AGE       -0.169              
## AGEGROUP1  0.165 -0.993       
## GENDER1   -0.031 -0.025  0.027

Coefficient of AGE is significant, hence after controlling for age group and gender, age is still important as a variable.

part iii

mixmodel3 = lmer(RESULT ~ AGE + AGE*AGEGROUP + AGEGROUP + (1|FREQUENCY) + (1|RIGHTEAR), data = data)
summary(mixmodel3)
## Linear mixed model fit by REML ['lmerMod']
## Formula: RESULT ~ AGE + AGE * AGEGROUP + AGEGROUP + (1 | FREQUENCY) +  
##     (1 | RIGHTEAR)
##    Data: data
## 
## REML criterion at convergence: 301978.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.1109 -0.5832 -0.0369  0.4631  6.6221 
## 
## Random effects:
##  Groups    Name        Variance  Std.Dev.
##  FREQUENCY (Intercept)  28.16599  5.3072 
##  RIGHTEAR  (Intercept)   0.07612  0.2759 
##  Residual              193.17187 13.8986 
## Number of obs: 37268, groups:  FREQUENCY, 7; RIGHTEAR, 2
## 
## Fixed effects:
##                Estimate Std. Error t value
## (Intercept)     5.38514    2.09459   2.571
## AGE             0.11109    0.03643   3.049
## AGEGROUP1     -45.10954    2.27434 -19.834
## AGE:AGEGROUP1   0.99184    0.04618  21.480
## 
## Correlation of Fixed Effects:
##             (Intr) AGE    AGEGRO
## AGE         -0.269              
## AGEGROUP1   -0.068  0.248       
## AGE:AGEGROU  0.213 -0.789 -0.789

Coefficient of AGE:AGEGROUP1 is significant, so effect of age is different for different age groups.