Load the dataset: everyone in NHANES 2007-2008 and 2009-2010, no missing BMI or CRP (C-reactive protein, the metric of inflammation), older than 17yrs and not pregnant.
load(file = "../data/nhanes_cr_new.rda")
We exclude alcohol and marijua since they are not in the original study and they have many missing values. We also restrict to people at least 21 years old.
data.pooled[, c("alcohol.ever", "alcohol.days", "alcohol.per.day", "alcohol.total", "marijuana")] <- NULL
data.pooled <- subset(data.pooled, age >= 21)
summary(data.pooled)
## id gender age income race marital
## Min. :41475 Male :4749 Min. :21.0 Min. :0.000 Mexican American :1657 Married :5189
## 1st Qu.:46634 Female:4928 1st Qu.:36.0 1st Qu.:1.110 Other Hispanic : 958 Widowed : 839
## Median :51903 Median :50.0 Median :2.060 Non-Hispanic White:4845 Divorced :1113
## Mean :51867 Mean :50.4 Mean :2.498 Non-Hispanic Black:1757 Separated : 335
## 3rd Qu.:57091 3rd Qu.:64.0 3rd Qu.:4.070 Other Race : 460 Never Married :1494
## Max. :62158 Max. :80.0 Max. :5.000 Living with partner: 707
## education vigorous.work vigorous.recreation smoking.ever smoking.now crp
## < 9th Grade :1185 Mode :logical Mode :logical Mode :logical Min. : 0.00 Min. : 0.0100
## 9--11th Grade :1605 FALSE:7810 FALSE:7848 FALSE:5071 1st Qu.: 0.00 1st Qu.: 0.0800
## High School :2278 TRUE :1867 TRUE :1829 TRUE :4606 Median : 0.00 Median : 0.1900
## Some College :2615 NA's :0 NA's :0 NA's :0 Mean : 87.48 Mean : 0.4241
## College Graduates:1994 3rd Qu.: 0.00 3rd Qu.: 0.4600
## Max. :2850.00 Max. :20.0000
## bmi pregnant estrogen bronchitis.now asthma.now emphysema.ever thyroid.ever
## Min. :13.18 Mode :logical Mode :logical Mode :logical Mode :logical Mode :logical Mode :logical
## 1st Qu.:24.45 FALSE:9677 FALSE:9400 FALSE:9393 FALSE:8931 FALSE:9438 FALSE:8965
## Median :28.13 NA's :0 TRUE :277 TRUE :284 TRUE :746 TRUE :239 TRUE :712
## Mean :29.14 NA's :0 NA's :0 NA's :0 NA's :0 NA's :0
## 3rd Qu.:32.53
## Max. :84.87
## arthritis.ever heart.attack.ever stroke.ever liver.now gout.ever obesity
## Mode :logical Mode :logical Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:6905 FALSE:9243 FALSE:9312 FALSE:9516 FALSE:9217 FALSE:2709
## TRUE :2772 TRUE :434 TRUE :365 TRUE :161 TRUE :460 TRUE :6968
## NA's :0 NA's :0 NA's :0 NA's :0 NA's :0 NA's :0
##
##
nrow(data.pooled)
## [1] 9677
Create a binary index for obesity
data.pooled$obesity <- data.pooled$bmi > 25
Clean up
levels(data.pooled$gender) <- c(" Male", " Female")
levels(data.pooled$race) <- c(" Mexican American", " Other Hispanic", " Non-Hispanic White", " Non-Hispanic Black", " Other Race")
levels(data.pooled$marital) <- c(" Married", " Widowed", " Divorced", " Separated", " Never Married", " Living with partner")
data.pooled$education <- factor(data.pooled$education)
levels(data.pooled$education) <- c(" < 9th Grade", " 9--11th Grade", " High School", " Some College", " College Graduates")
data.pooled$vigorous.recreation<- data.pooled$vigorous.recreation == 1
data.pooled$vigorous.work <- data.pooled$vigorous.work == 1
for (j in 1:ncol(data.pooled)) {
if (class(data.pooled[, j]) == "logical") {
data.pooled[, j] <- as.numeric(data.pooled[, j])
}
}
range(data.pooled$crp)
## [1] 0.01 20.00
hist(log2(data.pooled$crp))
y <- log2(data.pooled$crp)
t <- data.pooled$obesity
x <- model.matrix(~ ., data.pooled[, -c(1, 12, 13, 14, 25)])[, -1]
x <- scale(x, scale = FALSE) # center the regressors
Look for the significant interactions.
naive.model <- lm(y ~ t * x)
## naive.model$coef[c("obesityTRUE:gender2", "obesityTRUE:age")]
## confint(naive.model)[c("obesityTRUE:gender2", "obesityTRUE:age"), ]
summary(naive.model)
##
## Call:
## lm(formula = y ~ t * x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.5585 -1.1302 -0.0101 1.1025 7.1359
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.2069285 0.0344418 -93.111 < 2e-16 ***
## t 1.1660550 0.0398856 29.235 < 2e-16 ***
## xgender Female -0.1594840 0.0719433 -2.217 0.026660 *
## xage 0.0221459 0.0024951 8.876 < 2e-16 ***
## xincome -0.0332816 0.0230009 -1.447 0.147938
## xrace Other Hispanic -0.2302499 0.1463562 -1.573 0.115702
## xrace Non-Hispanic White -0.4193533 0.1130653 -3.709 0.000209 ***
## xrace Non-Hispanic Black -0.2972465 0.1291921 -2.301 0.021424 *
## xrace Other Race -0.5548231 0.1502063 -3.694 0.000222 ***
## xmarital Widowed 0.0241838 0.1350212 0.179 0.857854
## xmarital Divorced -0.0183663 0.1077111 -0.171 0.864609
## xmarital Separated 0.2911611 0.1822670 1.597 0.110200
## xmarital Never Married -0.1187449 0.0980970 -1.210 0.226123
## xmarital Living with partner 0.0278301 0.1279432 0.218 0.827808
## xeducation 9--11th Grade -0.0989241 0.1372364 -0.721 0.471032
## xeducation High School -0.0884296 0.1301812 -0.679 0.496976
## xeducation Some College -0.1052955 0.1319269 -0.798 0.424812
## xeducation College Graduates -0.2972596 0.1402019 -2.120 0.034013 *
## xvigorous.work -0.0826367 0.0861205 -0.960 0.337307
## xvigorous.recreation -0.0791500 0.0822328 -0.963 0.335817
## xsmoking.ever 0.1645905 0.0758886 2.169 0.030119 *
## xsmoking.now 0.0005353 0.0001444 3.706 0.000212 ***
## xestrogen 1.8006306 0.1576798 11.420 < 2e-16 ***
## xbronchitis.now 0.3748326 0.2308772 1.624 0.104512
## xasthma.now 0.0732103 0.1418510 0.516 0.605792
## xemphysema.ever 0.3150096 0.2188275 1.440 0.150032
## xthyroid.ever 0.0310385 0.1377609 0.225 0.821745
## xarthritis.ever 0.2696296 0.0861890 3.128 0.001763 **
## xheart.attack.ever 0.3095199 0.1841733 1.681 0.092875 .
## xstroke.ever 0.5017384 0.1875725 2.675 0.007488 **
## xliver.now -0.0398560 0.2908722 -0.137 0.891016
## xgout.ever 0.8018952 0.2146335 3.736 0.000188 ***
## t:xgender Female 0.6543354 0.0848445 7.712 1.36e-14 ***
## t:xage -0.0239064 0.0029864 -8.005 1.33e-15 ***
## t:xincome -0.0186943 0.0273937 -0.682 0.494982
## t:xrace Other Hispanic 0.0522829 0.1654135 0.316 0.751953
## t:xrace Non-Hispanic White 0.1662468 0.1285371 1.293 0.195912
## t:xrace Non-Hispanic Black 0.3762288 0.1466133 2.566 0.010299 *
## t:xrace Other Race 0.0381840 0.1915752 0.199 0.842020
## t:xmarital Widowed -0.0834606 0.1561236 -0.535 0.592953
## t:xmarital Divorced 0.1611143 0.1262494 1.276 0.201930
## t:xmarital Separated -0.2349084 0.2138957 -1.098 0.272128
## t:xmarital Never Married 0.1167643 0.1174308 0.994 0.320090
## t:xmarital Living with partner -0.0497249 0.1528283 -0.325 0.744912
## t:xeducation 9--11th Grade 0.2585835 0.1569847 1.647 0.099552 .
## t:xeducation High School 0.3068091 0.1499309 2.046 0.040750 *
## t:xeducation Some College 0.2956213 0.1519158 1.946 0.051689 .
## t:xeducation College Graduates 0.3162175 0.1641243 1.927 0.054047 .
## t:xvigorous.work -0.0185723 0.1008948 -0.184 0.853958
## t:xvigorous.recreation -0.3225230 0.1010018 -3.193 0.001411 **
## t:xsmoking.ever -0.0668789 0.0878709 -0.761 0.446613
## t:xsmoking.now -0.0001360 0.0001749 -0.778 0.436692
## t:xestrogen -0.6452366 0.2129379 -3.030 0.002451 **
## t:xbronchitis.now -0.0918470 0.2609946 -0.352 0.724913
## t:xasthma.now 0.1934760 0.1610496 1.201 0.229647
## t:xemphysema.ever 0.0452139 0.2595954 0.174 0.861735
## t:xthyroid.ever 0.1223261 0.1577078 0.776 0.437975
## t:xarthritis.ever -0.0458100 0.0989863 -0.463 0.643524
## t:xheart.attack.ever -0.1777707 0.2080552 -0.854 0.392882
## t:xstroke.ever -0.3643378 0.2148165 -1.696 0.089910 .
## t:xliver.now -0.3316552 0.3270870 -1.014 0.310624
## t:xgout.ever -0.5844787 0.2325832 -2.513 0.011988 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.657 on 9615 degrees of freedom
## Multiple R-squared: 0.1963, Adjusted R-squared: 0.1912
## F-statistic: 38.51 on 61 and 9615 DF, p-value: < 2.2e-16
df.naive <- cbind(summary(naive.model)$coef[, c(1, 4)], confint(naive.model))[33:62, ]
First step: marginal regressions on X.
set.seed(20170606)
library(randomForest)
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
mu.y <- randomForest(x, y, nodesize = 20, ntree = 2000)
mu.t <- randomForest(x, factor(t), nodesize = 20, ntree = 2000)
## mu.y <- lm(y ~ ., x)
## mu.t <- glm(t ~ ., data = x, family = "binomial")
y.tilde <- y - predict(mu.y, x)
t.tilde <- t - predict(mu.t, x, "prob")[, 2]
## t.tilde <- t - predict(mu.t, x)
Overall treatment effect
summary(lm(y.tilde ~ t.tilde))
##
## Call:
## lm(formula = y.tilde ~ t.tilde)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4710 -0.9689 -0.0098 0.9403 5.9230
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.11799 0.01481 7.968 1.8e-15 ***
## t.tilde 1.14554 0.03819 29.996 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.405 on 9675 degrees of freedom
## Multiple R-squared: 0.08508, Adjusted R-squared: 0.08499
## F-statistic: 899.7 on 1 and 9675 DF, p-value: < 2.2e-16
Second step: transform the response/predictors.
## x$age.group <- cut(x$age, breaks = c(20, 39, 59, Inf))
## x.tilde <- model.matrix(~ (.-age)^2, x)[, -1]
# x.tilde <- model.matrix(~ .^2, data.frame(x))[, -1] ## no intercept, all first order interactions
x.tilde <- x
## x.tilde <- model.matrix(~ ., x)[, -1] ## no intercept
x.tilde <- x.tilde[, apply(x.tilde, 2, sd) > 0]
x.tilde <- x.tilde[, !duplicated(t(x.tilde))]
x.tilde <- scale(x.tilde, scale = FALSE) * t.tilde ## make the mean of x.tilde zero
y.tilde <- lm(y.tilde ~ t.tilde)$residuals
full.model <- lm(y.tilde ~ x.tilde - 1)
## summary(full.model)
df.full <- cbind(summary(full.model)$coef[, c(1, 4)], confint(full.model))
df.univariate <- matrix(0, ncol(x.tilde), 4)
for (j in 1:ncol(x.tilde)) {
df.univariate[j, 1] <- coef(lm(y.tilde ~ x.tilde[, j] - 1))
df.univariate[j, 3:4] <- confint(lm(y.tilde ~ x.tilde[, j] - 1))
df.univariate[j, 2] <- summary(lm(y.tilde ~ x.tilde[, j] - 1))$coef[1, 4]
}
rownames(df.univariate) <- colnames(x.tilde)
colnames(df.univariate) <- c("estiamte", "p.value", "CI.low", "CI.up")
## df.univariate
## first scale x.tilde, as recommended in high-dimensional regression
x.tilde.scale <- apply(x.tilde, 2, sd)
x.tilde <- scale(x.tilde, center = FALSE, scale = x.tilde.scale)
library(selectiveInference)
## Loading required package: glmnet
## Loading required package: Matrix
## Loading required package: foreach
## Loaded glmnet 2.0-13
## Loading required package: intervals
##
## Attaching package: 'intervals'
## The following object is masked from 'package:Matrix':
##
## expand
## Loading required package: survival
gfit <- glmnet(x.tilde, y.tilde, standardize = FALSE, intercept = FALSE)
eps <- matrix(rnorm(nrow(x.tilde) * 500, sd = sd(lm(y.tilde ~ x.tilde)$residuals)), ncol = 500)
lambda <- 1.1 * mean(apply(abs(t(x.tilde) %*% eps), 2, max))
beta <- coef(gfit, s = lambda/nrow(x.tilde), exact=TRUE, x = x.tilde, y = y.tilde)[-1]
out <- fixedLassoInf(x.tilde, y.tilde, beta, lambda, intercept = FALSE, alpha = 0.05)
out$ci <- t(scale(t(out$ci), FALSE, x.tilde.scale[out$vars]))
out$coef0 <- out$coef0 / x.tilde.scale[out$vars]
out.pretty <- round(data.frame(cbind(out$vars, out$coef0, out$ci, out$pv)), 4)
out.pretty <- cbind(out.pretty, symnum(out$pv, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", " ")))
colnames(out.pretty) <- c("var", "estimate", "ci.low", "ci.up", "p.value", "")
## out.pretty
df.selective <- out.pretty[, c(2, 5, 3, 4)]
Got the same significant coefficients… Good or bad?
Ashkan wants the “wrong” inference that ignores selection. Here it is
x.tilde <- scale(x.tilde, scale = 1/x.tilde.scale)
snooping.model <- lm(y.tilde ~ x.tilde[,out$vars] - 1)
df.snooping <- cbind(summary(snooping.model)$coef[, c(1, 4)], confint(snooping.model))
x.tilde2 <- model.matrix(~ .^2, data.frame(x))[, -1]
x.tilde2 <- x.tilde2[, apply(x.tilde2, 2, sd) > 0]
x.tilde2 <- x.tilde2[, !duplicated(t(x.tilde2))]
x.tilde2 <- scale(x.tilde2, scale = FALSE) * t.tilde ## make the mean of x.tilde2 zero
y.tilde <- lm(y.tilde ~ t.tilde)$residuals
x.tilde2.scale <- apply(x.tilde2, 2, sd)
x.tilde2 <- scale(x.tilde2, center = FALSE, scale = x.tilde2.scale)
library(selectiveInference)
gfit <- glmnet(x.tilde2, y.tilde, standardize = FALSE, intercept = FALSE)
eps <- matrix(rnorm(nrow(x.tilde2) * 500, sd = sd(lm(y.tilde ~ x.tilde2)$residuals)), ncol = 500)
lambda <- 1.1 * mean(apply(abs(t(x.tilde2) %*% eps), 2, max))
beta <- coef(gfit, s = lambda/nrow(x.tilde2), exact=TRUE, x = x.tilde2, y = y.tilde)[-1]
out2 <- fixedLassoInf(x.tilde2, y.tilde, beta, lambda, intercept = FALSE, alpha = 0.05)
## Warning in lsfit(x, y, intercept = oo): 'X' matrix was collinear
out2$ci <- t(scale(t(out2$ci), FALSE, x.tilde2.scale[out2$vars]))
out2$coef0 <- out2$coef0 / x.tilde2.scale[out2$vars]
out2.pretty <- round(data.frame(cbind(out2$vars, out2$coef0, out2$ci, out2$pv)), 4)
out2.pretty <- cbind(out2.pretty, symnum(out2$pv, corr = FALSE, na = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***", "**", "*", ".", " ")))
colnames(out2.pretty) <- c("var", "estimate", "ci.low", "ci.up", "p.value", "")
## out2.pretty
df.selective.inter <- out2.pretty[, c(2, 5, 3, 4)]
x.tilde2 <- scale(x.tilde2, scale = 1/x.tilde2.scale)
snooping.model <- lm(y.tilde ~ x.tilde2[,out2$vars] - 1)
df.snooping.inter <- cbind(summary(snooping.model)$coef[, c(1, 4)], confint(snooping.model))
After cleaning up the results (see the source code, not included in the html), we get
df.naive
## estimate p.value ci.low ci.up
## gender Female 0.654 0.000 0.488 0.821 ***
## age -0.024 0.000 -0.030 -0.018 ***
## income -0.019 0.495 -0.072 0.035
## race Other Hispanic 0.052 0.752 -0.272 0.377
## race Non-Hispanic White 0.166 0.196 -0.086 0.418
## race Non-Hispanic Black 0.376 0.010 0.089 0.664 **
## race Other Race 0.038 0.842 -0.337 0.414
## marital Widowed -0.083 0.593 -0.389 0.223
## marital Divorced 0.161 0.202 -0.086 0.409
## marital Separated -0.235 0.272 -0.654 0.184
## marital Never Married 0.117 0.320 -0.113 0.347
## marital Living with partner -0.050 0.745 -0.349 0.250
## education 9--11th Grade 0.259 0.100 -0.049 0.566 .
## education High School 0.307 0.041 0.013 0.601 *
## education Some College 0.296 0.052 -0.002 0.593 .
## education College Graduates 0.316 0.054 -0.006 0.638 .
## vigorous.work -0.019 0.854 -0.216 0.179
## vigorous.recreation -0.323 0.001 -0.521 -0.125 ***
## smoking.ever -0.067 0.447 -0.239 0.105
## smoking.now 0.000 0.437 0.000 0.000
## estrogen -0.645 0.002 -1.063 -0.228 **
## bronchitis.now -0.092 0.725 -0.603 0.420
## asthma.now 0.193 0.230 -0.122 0.509
## emphysema.ever 0.045 0.862 -0.464 0.554
## thyroid.ever 0.122 0.438 -0.187 0.431
## arthritis.ever -0.046 0.644 -0.240 0.148
## heart.attack.ever -0.178 0.393 -0.586 0.230
## stroke.ever -0.364 0.090 -0.785 0.057 .
## liver.now -0.332 0.311 -0.973 0.310
## gout.ever -0.584 0.012 -1.040 -0.129 *
df.full
## estimate p.value ci.low ci.up
## gender Female 0.467 0.000 0.307 0.628 ***
## age -0.021 0.000 -0.027 -0.016 ***
## income 0.000 0.986 -0.053 0.054
## race Other Hispanic 0.209 0.183 -0.099 0.517
## race Non-Hispanic White 0.266 0.028 0.028 0.504 *
## race Non-Hispanic Black 0.384 0.006 0.112 0.656 **
## race Other Race 0.136 0.482 -0.244 0.516
## marital Widowed -0.103 0.496 -0.401 0.194
## marital Divorced 0.152 0.219 -0.090 0.394
## marital Separated -0.157 0.501 -0.613 0.299
## marital Never Married 0.116 0.316 -0.111 0.344
## marital Living with partner -0.168 0.271 -0.468 0.131
## education 9--11th Grade 0.036 0.812 -0.262 0.334
## education High School 0.073 0.616 -0.213 0.359
## education Some College 0.007 0.962 -0.282 0.296
## education College Graduates 0.074 0.647 -0.243 0.392
## vigorous.work 0.012 0.904 -0.185 0.209
## vigorous.recreation -0.314 0.002 -0.514 -0.113 **
## smoking.ever 0.005 0.957 -0.162 0.171
## smoking.now 0.000 0.565 0.000 0.000
## estrogen -0.608 0.006 -1.045 -0.171 **
## bronchitis.now -0.144 0.609 -0.695 0.407
## asthma.now 0.197 0.242 -0.133 0.527
## emphysema.ever -0.006 0.984 -0.541 0.530
## thyroid.ever 0.134 0.380 -0.165 0.433
## arthritis.ever -0.076 0.427 -0.264 0.112
## heart.attack.ever -0.214 0.321 -0.636 0.208
## stroke.ever -0.492 0.023 -0.917 -0.067 *
## liver.now -0.190 0.603 -0.908 0.527
## gout.ever -0.489 0.034 -0.940 -0.037 *
df.univariate
## estimate p.value ci.low ci.up
## gender Female 0.494 0.000 0.350 0.639 ***
## age -0.021 0.000 -0.025 -0.017 ***
## income -0.013 0.582 -0.057 0.032
## race Other Hispanic 0.108 0.399 -0.143 0.359
## race Non-Hispanic White -0.060 0.416 -0.204 0.084
## race Non-Hispanic Black 0.175 0.076 -0.018 0.368 .
## race Other Race -0.069 0.683 -0.399 0.261
## marital Widowed -0.529 0.000 -0.790 -0.268 ***
## marital Divorced 0.088 0.455 -0.143 0.320
## marital Separated -0.135 0.554 -0.583 0.313
## marital Never Married 0.454 0.000 0.257 0.651 ***
## marital Living with partner 0.073 0.609 -0.206 0.351
## education 9--11th Grade -0.005 0.964 -0.200 0.191
## education High School 0.068 0.433 -0.102 0.238
## education Some College 0.090 0.278 -0.073 0.254
## education College Graduates 0.005 0.953 -0.165 0.175
## vigorous.work 0.043 0.654 -0.145 0.230
## vigorous.recreation -0.048 0.602 -0.229 0.133
## smoking.ever -0.138 0.062 -0.282 0.007 .
## smoking.now 0.000 0.651 0.000 0.000
## estrogen 0.070 0.746 -0.352 0.491
## bronchitis.now -0.194 0.457 -0.704 0.317
## asthma.now 0.229 0.155 -0.086 0.544
## emphysema.ever -0.453 0.067 -0.937 0.032 .
## thyroid.ever -0.007 0.962 -0.296 0.282
## arthritis.ever -0.410 0.000 -0.576 -0.244 ***
## heart.attack.ever -0.730 0.000 -1.137 -0.324 ***
## stroke.ever -0.919 0.000 -1.332 -0.506 ***
## liver.now -0.526 0.149 -1.240 0.189
## gout.ever -0.987 0.000 -1.428 -0.546 ***
df.selective
## estimate p.value ci.low ci.up
## gender Female 0.476 0.000 0.330 0.624 ***
## age -0.019 0.000 -0.024 -0.015 ***
## stroke.ever -0.515 0.311 -0.899 1.256
## gout.ever -0.475 0.493 -0.852 2.295
df.snooping
## estimate p.value ci.low ci.up
## gender Female 0.476 0.000 0.332 0.620 ***
## age -0.019 0.000 -0.023 -0.015 ***
## stroke.ever -0.514 0.016 -0.933 -0.096 *
## gout.ever -0.473 0.038 -0.919 -0.026 *
df.selective.inter
## estimate p.value ci.low ci.up
## gender.Female 0.471 0.000 0.323 0.618 ***
## age -0.020 0.000 -0.024 -0.016 ***
## age:vigorous.recreation 0.018 0.371 -0.052 0.027
## age:stroke.ever -0.036 0.069 -0.054 0.014 .
df.snooping.inter
## estimate p.value ci.low ci.up
## gender.Female 0.471 0.000 0.327 0.615
## age -0.020 0.000 -0.024 -0.016
## age:vigorous.recreation 0.018 0.001 0.008 0.028
## age:stroke.ever -0.036 0.000 -0.055 -0.017
## symnum(df.snooping.inter$p.value, corr = FALSE, na = FALSE, cutpoints = c(0,
## gender.Female ***
## age ***
## age:vigorous.recreation ***
## age:stroke.ever ***
Problem of the univariate regression
round(cor(x.tilde[, rownames(df.univariate)[df.univariate[, 2] <= 0.01]]), 2)
## gender Female age marital Widowed marital Never Married arthritis.ever heart.attack.ever
## gender Female 1.00 0.00 0.16 -0.05 0.06 -0.07
## age 0.00 1.00 0.40 -0.37 0.42 0.17
## marital Widowed 0.16 0.40 1.00 -0.13 0.21 0.09
## marital Never Married -0.05 -0.37 -0.13 1.00 -0.13 -0.04
## arthritis.ever 0.06 0.42 0.21 -0.13 1.00 0.14
## heart.attack.ever -0.07 0.17 0.09 -0.04 0.14 1.00
## stroke.ever -0.01 0.18 0.15 -0.05 0.16 0.14
## gout.ever -0.10 0.16 0.01 -0.05 0.15 0.09
## stroke.ever gout.ever
## gender Female -0.01 -0.10
## age 0.18 0.16
## marital Widowed 0.15 0.01
## marital Never Married -0.05 -0.05
## arthritis.ever 0.16 0.15
## heart.attack.ever 0.14 0.09
## stroke.ever 1.00 0.09
## gout.ever 0.09 1.00
So all the significant effect modifiers find by univariate regression are quite correlated with gender or age, and their t-statistics are smaller than gender and age.