library(gsDesignNB)
library(data.table)
library(ggplot2)
library(DT)
has_plotly <- requireNamespace("plotly", quietly = TRUE)This vignette compares the Wald and score test implementations in
mutze_test() across a factorial grid of negative binomial
trial scenarios. It also gives practical recommendations for sample-size
calculation when the usual Zhu–Lakkis / Friede–Schmidli / Mutze Wald
formula is adequate, when score-test sizing is a useful diagnostic, and
when the score test itself is the more important change for Type I error
control.
The Wald sizing option in
sample_size_nbinom(test_type = "wald") uses the alternative
variance \(V_1\) for both the Type I
and power components. The score sizing option in
sample_size_nbinom(test_type = "score") uses the null
variance \(V_0\) for the Type I
component and the alternative variance \(V_1\) for the power component:
\[ n_1 = \frac{(z_{\alpha/s}\sqrt{V_0} + z_\beta\sqrt{V_1})^2} {(\theta - \theta_0)^2}. \]
This distinction matters most when the planned final analysis uses a score statistic evaluated under the null, or when finite-sample Type I error control is more important than preserving the historical Wald analysis convention. In the superiority scenarios below, the Wald and score sample sizes are close; the traditional Wald sample size paired with the score test often provides a useful practical margin for power while preserving the score test’s Type I error protection.
The full \(2 \times 2\) factorial comparison is:
| Wald-sized trial | Score-sized trial | |
|---|---|---|
| Wald test | Wald / Wald | Score / Wald |
| Score test | Wald / Score | Score / Score |
We assess:
All tables are interactive: use the column filters to narrow results.
Figures are interactive when the optional plotly package is
installed; otherwise they render as static ggplot2
figures.
Results are pre-computed by
data-raw/generate_score_sweep.R and loaded here.
results_file <- system.file("extdata", "score_sweep_results.rds",
package = "gsDesignNB")
if (results_file == "" && file.exists("../inst/extdata/score_sweep_results.rds")) {
results_file <- "../inst/extdata/score_sweep_results.rds"
}
res <- readRDS(results_file)
config <- res$config
scenarios <- as.data.table(res$scenarios)
base_grid <- as.data.table(res$base_grid)cat(sprintf(
"Expanded scenarios: %d | Power sims: %s | Null sims: %s | RR: %.2f | alpha: %.3f\n",
nrow(scenarios),
format(config$n_sims_power, big.mark = ","),
format(config$n_sims_null, big.mark = ","),
config$rr_power,
config$alpha
))
#> Expanded scenarios: 54 | Power sims: 3,500 | Null sims: 20,000 | RR: 0.70 | alpha: 0.025The base scenario grid varies control event rate (\(\lambda_1\)), overdispersion (\(k\)), and minimum inter-event gap. For each base scenario, sample sizes are computed using both the Wald and score variance formulas. In this superiority grid the score-sized trials are equal to or slightly smaller than the Wald-sized trials; score sizing is therefore not a generic “add a few subjects” rule, and the operating characteristics still need to be checked under the planned analysis test.
base_display <- base_grid[, .(
`Control rate` = lambda1,
`Dispersion (k)` = k,
`Event gap (days)` = gap_days,
`N (Wald sizing)` = n_wald,
`N (Score sizing)` = n_score,
`Wald - Score` = n_wald - n_score
)]
datatable(
base_display,
caption = "Base scenario grid with sample sizes by method",
filter = "top",
options = list(pageLength = 15, dom = "ftip"),
rownames = FALSE
) |> formatRound(columns = "Control rate", digits = 2)null_dt <- as.data.table(res$null_summary)
null_long <- melt(
null_dt,
id.vars = c("lambda1", "k", "gap_days", "n_total", "sizing"),
measure.vars = c("rate_wald", "rate_score"),
variable.name = "test",
value.name = "rejection_rate"
)
null_long[, test := fifelse(test == "rate_wald", "Wald", "Score")]
se_long <- melt(
null_dt,
id.vars = c("lambda1", "k", "gap_days", "sizing"),
measure.vars = c("se_wald", "se_score"),
variable.name = "test",
value.name = "se"
)
se_long[, test := fifelse(test == "se_wald", "Wald", "Score")]
null_long <- merge(null_long, se_long,
by = c("lambda1", "k", "gap_days", "sizing", "test"))
null_long[, combo := paste0(sizing, "-sized / ", test, " test")]
null_long[, `:=`(
above_nominal_95 = rejection_rate - 1.96 * se > config$alpha,
below_nominal_95 = rejection_rate + 1.96 * se < config$alpha
)]type1_summary <- null_long[, .(
`Scenarios` = .N,
`Minimum` = min(rejection_rate),
`Mean` = mean(rejection_rate),
`Maximum` = max(rejection_rate),
`Above nominal beyond MC error` = sum(above_nominal_95),
`Below nominal beyond MC error` = sum(below_nominal_95)
), by = .(`Sizing` = sizing, `Test` = test)]
datatable(
type1_summary[order(Sizing, Test)],
caption = htmltools::tags$caption(
style = "caption-side: top; text-align: left; font-weight: bold;",
"Type I error synopsis across the scenario grid"
),
options = list(pageLength = 8, dom = "tip"),
rownames = FALSE
) |>
formatRound(columns = c("Minimum", "Mean", "Maximum"), digits = 4)null_display <- null_long[order(lambda1, k, gap_days, sizing, test),
.(
`Control rate` = lambda1,
Dispersion = k,
`Gap (days)` = gap_days,
Sizing = sizing,
N = n_total,
Test = test,
`Type I error` = round(rejection_rate, 4),
SE = round(se, 4)
)
]
datatable(
null_display,
caption = htmltools::tags$caption(
style = "caption-side: top; text-align: left; font-weight: bold;",
sprintf("Type I error rate — nominal alpha = %.3f, %s null sims/scenario",
config$alpha, format(config$n_sims_null, big.mark = ","))
),
filter = "top",
options = list(pageLength = 20, dom = "ftip"),
rownames = FALSE
)null_long[, scenario := paste0("λ₁=", lambda1, " k=", k)]
p_null <- ggplot(null_long,
aes(x = scenario, y = rejection_rate,
color = combo, shape = test,
text = paste0("Sizing: ", sizing, "\nTest: ", test,
"\nRate: ", round(rejection_rate, 4),
"\nN: ", n_total))) +
geom_point(size = 2.5, position = position_dodge(width = 0.5)) +
geom_errorbar(aes(ymin = rejection_rate - 1.96 * se,
ymax = rejection_rate + 1.96 * se),
width = 0.2, position = position_dodge(width = 0.5)) +
geom_hline(yintercept = config$alpha, linetype = "dashed", color = "grey40") +
facet_wrap(~ paste0("Gap = ", gap_days, "d")) +
labs(
title = "Type I error: sizing method × test type",
x = NULL, y = "Rejection rate",
color = "Sizing / Test", shape = "Test"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
if (has_plotly) {
plotly::ggplotly(p_null, tooltip = "text") |>
plotly::layout(legend = list(orientation = "v", x = 1.02, y = 0.5))
} else {
p_null
}power_dt <- as.data.table(res$power_summary)
power_long <- melt(
power_dt,
id.vars = c("lambda1", "k", "gap_days", "n_total", "sizing"),
measure.vars = c("rate_wald", "rate_score"),
variable.name = "test",
value.name = "power"
)
power_long[, test := fifelse(test == "rate_wald", "Wald", "Score")]
se_power <- melt(
power_dt,
id.vars = c("lambda1", "k", "gap_days", "sizing"),
measure.vars = c("se_wald", "se_score"),
variable.name = "test",
value.name = "se"
)
se_power[, test := fifelse(test == "se_wald", "Wald", "Score")]
power_long <- merge(power_long, se_power,
by = c("lambda1", "k", "gap_days", "sizing", "test"))
power_long[, combo := paste0(sizing, "-sized / ", test, " test")]power_summary <- power_long[, .(
`Scenarios` = .N,
`Minimum` = min(power),
`Mean` = mean(power),
`Maximum` = max(power),
`Below 90%` = sum(power < config$power_target)
), by = .(`Sizing` = sizing, `Test` = test)]
datatable(
power_summary[order(Sizing, Test)],
caption = htmltools::tags$caption(
style = "caption-side: top; text-align: left; font-weight: bold;",
"Power synopsis across the scenario grid"
),
options = list(pageLength = 8, dom = "tip"),
rownames = FALSE
) |>
formatRound(columns = c("Minimum", "Mean", "Maximum"), digits = 4)power_display <- power_long[order(lambda1, k, gap_days, sizing, test),
.(
`Control rate` = lambda1,
Dispersion = k,
`Gap (days)` = gap_days,
Sizing = sizing,
N = n_total,
Test = test,
Power = round(power, 4),
SE = round(se, 4)
)
]
datatable(
power_display,
caption = htmltools::tags$caption(
style = "caption-side: top; text-align: left; font-weight: bold;",
sprintf("Power — RR = %.2f, %s power sims/scenario",
config$rr_power, format(config$n_sims_power, big.mark = ","))
),
filter = "top",
options = list(pageLength = 20, dom = "ftip"),
rownames = FALSE
)power_long[, scenario := paste0("λ₁=", lambda1, " k=", k)]
p_power <- ggplot(power_long,
aes(x = scenario, y = power,
color = combo, shape = test,
text = paste0("Sizing: ", sizing, "\nTest: ", test,
"\nPower: ", round(power, 4),
"\nN: ", n_total))) +
geom_point(size = 2.5, position = position_dodge(width = 0.5)) +
geom_errorbar(aes(ymin = power - 1.96 * se,
ymax = power + 1.96 * se),
width = 0.2, position = position_dodge(width = 0.5)) +
geom_hline(yintercept = config$power_target, linetype = "dashed", color = "grey40") +
facet_wrap(~ paste0("Gap = ", gap_days, "d")) +
labs(
title = "Power: sizing method × test type",
subtitle = sprintf("Target = %.0f%%, RR = %.2f",
100 * config$power_target, config$rr_power),
x = NULL, y = "Power",
color = "Sizing / Test", shape = "Test"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
if (has_plotly) {
plotly::ggplotly(p_power, tooltip = "text") |>
plotly::layout(legend = list(orientation = "v", x = 1.02, y = 0.5))
} else {
p_power
}Under \(H_0\), the Z-statistics should follow \(N(0, 1)\) if the asymptotic approximation holds.
z_null <- as.data.table(res$z_sample_null)
sc_info <- data.table(
scenario_id = seq_len(nrow(scenarios)),
scenarios[, .(lambda1, k, gap_days, sizing)]
)
z_null <- merge(z_null, sc_info, by = "scenario_id")
z_null[, label := sprintf("l1=%.2f k=%.1f gap=%dd (%s)",
lambda1, k, gap_days, sizing)]
z_null_long <- melt(
z_null,
id.vars = c("scenario_id", "label", "sizing"),
measure.vars = c("z_wald", "z_score"),
variable.name = "test",
value.name = "z"
)
z_null_long[, test := fifelse(test == "z_wald", "Wald", "Score")]
z_null_long <- z_null_long[is.finite(z)]
p_z <- ggplot(z_null_long, aes(x = z, color = test)) +
geom_density(linewidth = 0.6) +
stat_function(fun = dnorm, color = "black", linetype = "dashed", linewidth = 0.4) +
facet_wrap(~ label, scales = "free_y") +
labs(
title = "Null Z-score densities: Wald vs Score",
subtitle = "Dashed line = N(0,1) reference",
x = "Z-statistic", y = "Density",
color = "Test"
) +
theme_minimal() +
coord_cartesian(xlim = c(-4, 4))
if (has_plotly) {
plotly::ggplotly(p_z) |>
plotly::layout(legend = list(orientation = "h", x = 0.3, y = -0.05))
} else {
p_z
}