# --------------------------------------------------------------
# packages
# --------------------------------------------------------------
<- c("data.table", "etm", "survival", "mvna", "knitr")
packs for (i in 1:length(packs)){library(packs[i], character.only = TRUE)}
SAVVY: estimation of AE probabilities
Background
The assessment of safety is an important aspect of the evaluation of new therapies in clinical trials, with analyses of adverse events being an essential part of this. Standard methods for the analysis of adverse events such as the incidence proportion, i.e. the number of patients with a specific adverse event out of all patients in the treatment groups, do not account for both varying follow-up times and competing risks. Alternative approaches such as the Aalen-Johansen estimator of the cumulative incidence function have been suggested. Theoretical arguments and numerical evaluations support the application of these more advanced methodology, but as yet there is to our knowledge only insufficient empirical evidence whether these methods would lead to different conclusions in safety evaluations. The Survival analysis for AdVerse events with VarYing follow-up times (SAVVY) project strives to close this gap in evidence by conducting a meta-analytical study to assess the impact of the methodology on the conclusion of the safety assessment empirically. Three papers are currently under review summarizing the rationale and results of the project:
- Regina Stegherr et al. (2021): Statistical analysis plan, presenting the rationale and statistical concept of the empirical study conducted as part of the SAVVY project. The statistical methods are presented in unified notation and examples of their implementation in R and SAS are provided. arxiv | publication
- R. Stegherr, Schmoor, Beyersmann, et al. (2021): 1-sample case, compares estimators of AE risk in one treatment arm. arxiv | publication
- Rufibach et al. (2022): 2-sample case, compares relative risk estimators comparing two treatment arms based on (1) AE probabilities and (2) hazards. arxiv | publication
Purpose of this document
This R markdown file provides easy accessible code to compute all the estimators for AE probabilities that are compared to each other in these papers. The github repository where this document is hosted is available here.
SAS code
The original SAS macros that were used by each sponsor to generate the summary data for the meta-analysis is available as supplement to Regina Stegherr et al. (2021), see here, or direct download link for zip-file.
Setup
Packages
Functions
Below all functions are defined.
data_generation_constant_cens
: This function generates the dataset denoted by \(S1\) in Table~4 of R. Stegherr, Schmoor, Lübbert, et al. (2021), i.e. we assume constant hazards for the AE hazard, the hazard for the competing event of death, and the hazard for the “soft” competing events. Censoring is uniform.incidenceProportion
: Compute incidence proportion.probTransIncidenceDensity
: Compute probability transform incidence density ignoring competing events.probTransIncidenceDensity
: Compute probability transform incidence density ignoring competing events.oneMinusKaplanMeier
: Compute 1 - Kaplan-Meier estimator.AJE
: Compute Aalen-Johansen estimator.
The argument CE
codifies the type of competing event: CE = 2
refers to a competing event of death only whereas CE = 3
refers to all competing events.
# --------------------------------------------------------------
# functions
# --------------------------------------------------------------
# function to generate dataset with constant hazards for AE, death, and soft competing events
<- function(N, min.cens, max.cens, haz.AE, haz.death,
data_generation_constant_cens seed = 57 * i + 5){
haz.soft,
# status, 1 for AE, 2 for death 3 for soft competing event
set.seed(seed)
<- haz.AE + haz.death + haz.soft
haz.all
<- data.table(time_to_event = rep(0, N), type_of_event = rep(0, N))
my.data $time_to_event<- rexp(n = N, rate = haz.all) # event time
my.data$type_of_event <- rbinom(n = N, size = 2,
my.dataprob = c(haz.AE / haz.all, haz.death / haz.all, haz.soft / haz.all)) + 1
# status, 1 for AE, 2 for death 3 for soft competing event
$cens <- runif(n = N, min = min.cens, max = max.cens)
my.data$type_of_event <- as.numeric(my.data$time_to_event <= my.data$cens) * my.data$type_of_event
my.data$time_to_event <- pmin(my.data$time_to_event, my.data$cens)
my.data$id <- 1:N
my.data
# reorder columns
<- my.data[, c("id", "time_to_event", "type_of_event", "cens")]
my.data return(my.data)
}
# compute incidence proportion
<- function(data, tau){
incidenceProportion
<- nrow(data[type_of_event == 1 & time_to_event <= tau]) / nrow(data)
ae <- ae * (1 - ae) / nrow(data)
ae_prob_var
<- c("ae_prob" = ae, "ae_prob_var" = ae_prob_var)
res return(res)
}
# compute probability transform incidence density
<- function(data, tau){
probTransIncidenceDensity
<- data$time_to_event
time <- nrow(data[type_of_event == 1 & time_to_event <= tau]) /
incidence.dens sum(ifelse(time <= tau, time, tau))
<- 1 - exp(-incidence.dens * tau)
ae
<- nrow(data[type_of_event == 1 & time_to_event <= tau]) /
var_A_var sum(ifelse(time <= tau, time, tau)) ^ 2
<- exp(-incidence.dens * tau) ^ 2 * var_A_var * tau ^ 2
ae_var
<- c("ae_prob" = ae, "ae_prob_var" = ae_var)
res return(res)
}
# compute probability transform incidence density accounting for CE
<- function(data, CE, tau){
probTransIncidprobTransIncidenceDensityCompEvents
<- data[, type_of_event2 := ifelse(CE == 2 & data$type_of_event == 3, 0,
data2 ifelse(CE == 3 & data$type_of_event == 3, 2, type_of_event))]
<- ifelse(data2$time_to_event <= tau, data2$time_to_event, tau)
time2 <- sum(time2)
s2
<- sum(dim(data2[type_of_event2 == 1 & time_to_event <= tau])[1]) / s2
id <- sum(dim(data2[type_of_event2 == 2 & time_to_event <= tau])[1]) / s2
id_ce <- id + id_ce
tmp <- exp(-tau * tmp)
ett
<- id / tmp * (1 - exp(- tau*tmp))
ae_prob <- (((ett * (id_ce * (1 / ett - 1) + tau * id * tmp)) / tmp ^ 2) ^ 2 * id / s2 +
ae_prob_var * id * (tau * tmp - 1 / ett + 1)) / tmp ^ 2) ^ 2 * id_ce / s2)
((ett
<- c("ae_prob" = ae_prob, "ae_prob_var" = ae_prob_var)
res return(res)
}
# compute 1 - Kaplan-Meier
<- function(data, tau){
oneMinusKaplanMeier
if(nrow(data[type_of_event == 1]) == 0){
<- 0
ae_prob <- 0
ae_prob_var
}
if(nrow(data[type_of_event == 1]) > 0){
<- data.frame(id = data$id)
help $from <- 0
help$to <- ifelse(data$type_of_event != 1, "cens", data$type_of_event)
help$time <-ifelse(data$time_to_event == 0, 0.001, data$time_to_event)
help
<- matrix(FALSE, 2, 2)
tra 1, 2] <- TRUE
tra[<-as.character(0:1)
state.names <-etm(help, state.names, tra, "cens", s = 0)
etmmm
<- summary(etmmm)[[2]][sum(summary(etmmm)[[2]]$time <= tau),]$P
ae_prob <- summary(etmmm)[[2]][sum(summary(etmmm)[[2]]$time <= tau),]$var
ae_prob_var
}
<- c("ae_prob" = ae_prob, "ae_prob_var" = ae_prob_var)
res return(res)
}
# compute Aalen-Johansen estimator
<- function(data, CE, tau){
AJE
:= ifelse(CE == 2 & data$type_of_event == 3, 0,
data[, type_of_event2 ifelse(CE == 3 & data$type_of_event == 3, 2, type_of_event))]
<- data$time_to_event
time <- data$type_of_event2
type2
# conditions
<- nrow(data[type_of_event2 == 1])
c1 <- nrow(data[type_of_event2 == 2])
c2
if(c1 == 0){
<- 0
ae_prob <- 0
ae_prob_var
}
if(c2 == 0){
<- 0
ce_prob <- 0
ce_prob_var
}
# define auxiliary objects
<- data.frame(id = data$id)
help $from <- 0
help$time <-ifelse(time == 0, 0.001, time)
help<- matrix(FALSE, 2, 2)
tra 1, 2] <- TRUE
tra[<- as.character(0:1)
state.names
if(c1 == 0 & c2 != 0){
$to <- ifelse(type2 != 2, "cens", type2 - 1)
help<- etm(help, state.names, tra, "cens", s = 0)
etmmm <- summary(etmmm)[[2]]
setmm <- setmm[sum(setmm$time <= tau),]$P
ce_prob <- setmm[sum(setmm$time <= tau),]$var
ce_prob_var
}
if(c1 != 0 & c2 == 0){
$to <- ifelse(type2 != 1, "cens", type2)
help<- etm(help, state.names, tra, "cens", s = 0)
etmmm <- summary(etmmm)[[2]]
setmm
<- setmm[sum(setmm$time <= tau),]$P
ae_prob <- setmm[sum(setmm$time <= tau),]$var
ae_prob_var
}
if(c1 != 0 & c2 != 0){
$to <- ifelse(!(type2 %in% c(1, 2)),"cens", type2)
help
<- matrix(FALSE, 3, 3)
tra 1, 2:3] <- TRUE
tra[<- as.character(0:2)
state.names <- etm(help, state.names, tra, "cens", s = 0)
etmmm <- summary(etmmm)
setmm
<- setmm[[2]][sum(setmm[[2]]$time <= tau),]$P
ae_prob <- setmm[[2]][sum(setmm[[2]]$time <= tau),]$var
ae_prob_var
<- setmm[[3]][sum(setmm[[3]]$time <= tau),]$P
ce_prob <- setmm[[3]][sum(setmm[[3]]$time <= tau),]$var
ce_prob_var
}
<- c("ae_prob" = ae_prob, "ae_prob_var" = ae_prob_var)
res_ae <- c("ce_prob" = ce_prob, "ce_prob_var" = ce_prob_var)
res_ce
<- rbind(res_ae, res_ce)
res return(res)
}
Estimation of AE probabilities
We generate the dataset \(S1\) in Regina Stegherr et al. (2021) using the parameter values for Arm A:
# sample size
<- 200
N
# support of uniform censoring distribution
<- 0
min.cens <- 1000
max.cens
# hazards for the three event types
<- 0.00265
haz.AE <- 0.00151
haz.death <- 0.00227
haz.soft
# generate dataset
<- data_generation_constant_cens(N, min.cens, max.cens, haz.AE, haz.death, haz.soft, seed = 2020)
dat1
# compute tau
<- max(dat1[, "time_to_event"]) tau
The structure of the dataset looks as follows:
kable(head(dat1, 10), align = c("crcr"))
id | time_to_event | type_of_event | cens |
---|---|---|---|
1 | 45.692951 | 1 | 424.518663 |
2 | 197.519473 | 1 | 266.013197 |
3 | 36.859040 | 2 | 650.432466 |
4 | 115.062905 | 2 | 164.620580 |
5 | 220.764432 | 1 | 994.178471 |
6 | 8.499869 | 0 | 8.499869 |
7 | 833.042670 | 0 | 833.042670 |
8 | 37.389099 | 1 | 987.244922 |
9 | 82.243862 | 2 | 556.460553 |
10 | 75.843159 | 1 | 813.761866 |
For this dataset we then compute all the estimators that enter the comparison in our papers:
# compute each estimator
<- incidenceProportion(dat1, tau)
IP <- probTransIncidenceDensity(dat1, tau)
ID <- oneMinusKaplanMeier(dat1, tau)
KM
# competing event "death only"
<- probTransIncidprobTransIncidenceDensityCompEvents(dat1, CE = 2, tau)
IDCE2 <- AJE(dat1, CE = 2, tau)
AJ2
# account for all competing events
<- probTransIncidprobTransIncidenceDensityCompEvents(dat1, CE = 3, tau)
IDCE3 <- AJE(dat1, CE = 3, tau)
AJ3
# display
<- rbind(IP, ID, KM, IDCE2, AJ2[1, ], IDCE3, AJ3[1, ])
tab colnames(tab) <- c("estimated AE probability", "variance of estimation")
rownames(tab) <- c("incidence proportion", "probability transform incidence density ignoring competing event",
"1 - Kaplan-Meier", "probability transform incidence density (death only)",
"Aalen-Johansen (death only), AE risk", "probability transform incidence density (all CEs)",
"Aalen-Johansen (all CEs), AE risk")
# probability of AE
kable(tab, digits = c(3, 5))
estimated AE probability | variance of estimation | |
---|---|---|
incidence proportion | 0.400 | 0.00120 |
probability transform incidence density ignoring competing event | 0.891 | 0.00073 |
1 - Kaplan-Meier | 0.832 | 0.00303 |
probability transform incidence density (death only) | 0.509 | 0.00157 |
Aalen-Johansen (death only), AE risk | 0.509 | 0.00164 |
probability transform incidence density (all CEs) | 0.472 | 0.00146 |
Aalen-Johansen (all CEs), AE risk | 0.471 | 0.00149 |
Finally, the estimated probabilities of competing events based on the Aalen-Johansen estimators:
# display
<- rbind(AJ2[2, ], AJ3[2, ])
tab colnames(tab) <- c("estimated probability", "variance of estimation")
rownames(tab) <- c("Aalen-Johansen (death only), CE risk",
"Aalen-Johansen (all CEs), CE risk")
# probability of AE
kable(tab, digits = c(3, 5))
estimated probability | variance of estimation | |
---|---|---|
Aalen-Johansen (death only), CE risk | 0.466 | 0.00165 |
Aalen-Johansen (all CEs), CE risk | 0.507 | 0.00151 |