Skip to content

Commit dc609ea

Browse files
authored
estimate_contrasts() problem with p-value adjustments (#593)
* `estimate_contrasts()` problem with p-value adjustments Fixes #592 * apply suggestion * fix * fix * fix * update news * add test * typo * fix * fix
1 parent 7b2aab8 commit dc609ea

File tree

6 files changed

+1021
-254
lines changed

6 files changed

+1021
-254
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Type: Package
22
Package: modelbased
33
Title: Estimation of Model-Based Predictions, Contrasts and Means
4-
Version: 0.13.1.1
4+
Version: 0.13.1.2
55
Authors@R:
66
c(person(given = "Dominique",
77
family = "Makowski",

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
* Fixed issue in `estimate_slope()` when `p_adjust = "esarey"`.
66

7+
* Fixed issue in `estimate_contrasts()` when `p_adjust = "tukey"`.
8+
79
# modelbased 0.13.1
810

911
## Changes

R/get_marginalcontrasts.R

Lines changed: 54 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -146,11 +146,6 @@ get_marginalcontrasts <- function(
146146
# `by="Petal.Width=c(1, 2)"`)
147147
out <- .filter_contrasts_average(out, my_args)
148148

149-
# adjust p-values
150-
if (!model_info$is_bayesian) {
151-
out <- .p_adjust(model, out, p_adjust, verbose, ...)
152-
}
153-
154149
# Last step: Save information in attributes --------------------------------
155150
# ---------------------------------------------------------------------------
156151

@@ -169,8 +164,17 @@ get_marginalcontrasts <- function(
169164
)
170165
)
171166

167+
# adjust p-values - we do this here because we need all the information
168+
# from the attributes
169+
if (!model_info$is_bayesian) {
170+
out <- .p_adjust(model, out, p_adjust, verbose, ...)
171+
}
172+
172173
# remove "estimate_means" class attribute
173-
class(out) <- setdiff(unique(c("marginaleffects_contrasts", class(out))), "estimate_means")
174+
class(out) <- setdiff(
175+
unique(c("marginaleffects_contrasts", class(out))),
176+
"estimate_means"
177+
)
174178
out
175179
}
176180

@@ -191,7 +195,8 @@ get_marginalcontrasts <- function(
191195
example_values <- sample(unique(out[[i]]), pmin(3, insight::n_unique(out[[i]])))
192196
# tell user...
193197
insight::format_error(paste0(
194-
"None of the values specified for the predictor `", i,
198+
"None of the values specified for the predictor `",
199+
i,
195200
"` are available in the data. This is required for `estimate=\"average\"`.",
196201
" Either use a different option for the `estimate` argument, or use values that",
197202
" are present in the data, such as ",
@@ -217,11 +222,13 @@ get_marginalcontrasts <- function(
217222
# in the marginaleffects package, and extract the potential filter values used
218223
# in `by` and `contrast` (if any), to "clean" these arguments and save the levels
219224
# or values at which rows should be filtered later...
220-
.get_marginaleffects_hypothesis_argument <- function(comparison,
221-
my_args,
222-
model_data = NULL,
223-
estimate = NULL,
224-
...) {
225+
.get_marginaleffects_hypothesis_argument <- function(
226+
comparison,
227+
my_args,
228+
model_data = NULL,
229+
estimate = NULL,
230+
...
231+
) {
225232
# init
226233
comparison_slopes <- by_filter <- contrast_filter <- by_token <- NULL
227234
joint_test <- FALSE
@@ -235,7 +242,8 @@ get_marginalcontrasts <- function(
235242
# contrasts (but only for `estimate = "average"`!). Furthermore, "clean" `by`
236243
# argument (remove filter), because we need the pure variable name for setting
237244
# up the hypothesis argument, where variables in `by` are used in the formula
238-
if (!is.null(my_args$by) && any(grepl("=", my_args$by, fixed = TRUE))) { # "[^0-9A-Za-z\\._]"
245+
if (!is.null(my_args$by) && any(grepl("=", my_args$by, fixed = TRUE))) {
246+
# "[^0-9A-Za-z\\._]"
239247
# find which element in `by` has a filter
240248
filter_index <- grep("=", my_args$by, fixed = TRUE)
241249
for (f in filter_index) {
@@ -269,7 +277,12 @@ get_marginalcontrasts <- function(
269277
# values for later use. we only need this for `estimate = "average"`, because
270278
# that is the only situation where we do *not* use a data grid, which we else
271279
# could use for filtering, by dropping not-wanted rows from the grid.
272-
if (identical(estimate, "average") && !is.null(my_args$contrast) && any(grepl("=", my_args$contrast, fixed = TRUE))) { # nolint
280+
if (
281+
identical(estimate, "average") &&
282+
!is.null(my_args$contrast) &&
283+
any(grepl("=", my_args$contrast, fixed = TRUE))
284+
) {
285+
# nolint
273286
# find which element in `by` has a filter
274287
filter_index <- grep("=", my_args$contrast, fixed = TRUE)
275288
for (f in filter_index) {
@@ -332,12 +345,7 @@ get_marginalcontrasts <- function(
332345
}
333346
# for some comparisons, we need an empty left-hand side. else, we default
334347
# to "difference".
335-
formula_lhs <- switch(
336-
comparison,
337-
poly = ,
338-
helmert = "",
339-
"difference"
340-
)
348+
formula_lhs <- switch(comparison, poly = , helmert = "", "difference")
341349
formula_rhs <- comparison
342350
}
343351
# we put "by" into the formula. user either provided "by", or we put the
@@ -419,10 +427,22 @@ get_marginalcontrasts <- function(
419427

420428
.valid_hypothesis_strings <- function() {
421429
c(
422-
"pairwise", "reference", "sequential", "meandev", "meanotherdev",
423-
"revpairwise", "revreference", "revsequential", "poly", "helmert",
424-
"trt_vs_ctrl", "joint", "inequality", "inequality_pairwise",
425-
"inequality_ratio", "inequality_ratio_pairwise"
430+
"pairwise",
431+
"reference",
432+
"sequential",
433+
"meandev",
434+
"meanotherdev",
435+
"revpairwise",
436+
"revreference",
437+
"revsequential",
438+
"poly",
439+
"helmert",
440+
"trt_vs_ctrl",
441+
"joint",
442+
"inequality",
443+
"inequality_pairwise",
444+
"inequality_ratio",
445+
"inequality_ratio_pairwise"
426446
)
427447
}
428448

@@ -445,9 +465,12 @@ get_marginalcontrasts <- function(
445465
match_lengths <- attr(matches, "match.length")
446466

447467
# extract all "b" strings, so we have a vector of all "b" used in the comparison
448-
unlist(lapply(seq_along(matches), function(i) {
449-
substr(comparison, matches[i], matches[i] + match_lengths[i] - 1)
450-
}), use.names = FALSE)
468+
unlist(
469+
lapply(seq_along(matches), function(i) {
470+
substr(comparison, matches[i], matches[i] + match_lengths[i] - 1)
471+
}),
472+
use.names = FALSE
473+
)
451474
}
452475

453476

@@ -458,7 +481,10 @@ get_marginalcontrasts <- function(
458481
# this is the row-order we use in modelbased
459482
datagrid$.rowid <- 1:nrow(datagrid)
460483
# this is the row-order in marginaleffects
461-
datagrid <- datawizard::data_arrange(datagrid, colnames(datagrid)[1:(length(datagrid) - 1)])
484+
datagrid <- datawizard::data_arrange(
485+
datagrid,
486+
colnames(datagrid)[1:(length(datagrid) - 1)]
487+
)
462488
# we need to extract all b's and the former parameter numbers
463489
b <- .extract_custom_comparison(comparison)
464490
old_b_numbers <- as.numeric(gsub("b", "", b, fixed = TRUE))

R/p_adjust.R

Lines changed: 37 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,7 @@
88

99
# extract information
1010
datagrid <- attributes(params)$datagrid
11-
focal <- attributes(params)$contrast
12-
# Use .safe to handle cases where no statistic is extracted
13-
statistic <- .safe(insight::get_statistic(model)$Statistic)
11+
focal <- .safe(insight::trim_ws(gsub("=.*", "\\1", attributes(params)$contrast)))
1412
# extract degrees of freedom
1513
dof <- .safe(params$df[1])
1614
if (is.null(dof)) {
@@ -54,25 +52,48 @@
5452
}
5553

5654
# needed for rank adjustment
57-
focal_terms <- datagrid[focal]
58-
rank_adjust <- prod(vapply(focal_terms, insight::n_unique, numeric(1)))
55+
focal_terms <- .safe(datagrid[focal])
56+
if (is.null(focal_terms)) {
57+
rank_adjust <- 1
58+
} else {
59+
rank_adjust <- prod(vapply(focal_terms, insight::n_unique, numeric(1)))
60+
}
5961

6062
if (p_adjust %in% tolower(stats::p.adjust.methods)) {
6163
# base R adjustments
6264
params[["p"]] <- stats::p.adjust(params[["p"]], method = p_adjust)
6365
} else if (p_adjust == "tukey") {
66+
# find first occurence of one of the following columns: "t", "z", or "statistic"
67+
stat_col_name <- Find(
68+
function(col) col %in% colnames(params),
69+
c("t", "z", "statistic")
70+
)
71+
if (!is.null(stat_col_name)) {
72+
statistic <- params[[stat_col_name]]
73+
} else {
74+
statistic <- NULL
75+
}
6476
if (!is.null(statistic)) {
65-
# tukey adjustment
66-
params[["p"]] <- suppressWarnings(stats::ptukey(
67-
sqrt(2) * abs(statistic),
68-
rank_adjust,
69-
dof,
70-
lower.tail = FALSE
71-
))
72-
# for specific contrasts, ptukey might fail, and the tukey-adjustement
73-
# could just be simple p-value calculation
74-
if (all(is.na(params[["p"]]))) {
75-
params[["p"]] <- 2 * stats::pt(abs(statistic), df = dof, lower.tail = FALSE)
77+
if (rank_adjust < 2) {
78+
if (verbose) {
79+
insight::format_alert(
80+
"Tukey adjustment requires at least 2 groups. P-values were not adjusted."
81+
)
82+
}
83+
} else if (!is.null(dof) && is.finite(dof) && dof <= 0) {
84+
if (verbose) {
85+
insight::format_alert(
86+
"Tukey adjustment requires positive degrees of freedom. P-values were not adjusted."
87+
)
88+
}
89+
} else {
90+
# tukey adjustment
91+
params[["p"]] <- stats::ptukey(
92+
sqrt(2) * abs(statistic),
93+
rank_adjust,
94+
dof,
95+
lower.tail = FALSE
96+
)
7697
}
7798
} else if (verbose) {
7899
insight::format_alert("No test-statistic found. P-values were not adjusted.")

0 commit comments

Comments
 (0)