Title: | Estimation and Validation Methods for Subgroup Identification and Personalized Medicine |
---|---|
Description: | Provides functions for fitting and validation of models for subgroup identification and personalized medicine / precision medicine under the general subgroup identification framework of Chen et al. (2017) <doi:10.1111/biom.12676>. This package is intended for use for both randomized controlled trials and observational studies and is described in detail in Huling and Yu (2021) <doi:10.18637/jss.v098.i05>. |
Authors: | Jared Huling [aut, cre] , Aaron Potvien [ctb], Alexandros Karatzoglou [cph], Alex Smola [cph] |
Maintainer: | Jared Huling <[email protected]> |
License: | GPL-2 |
Version: | 0.2.8 |
Built: | 2024-11-24 03:40:51 UTC |
Source: | https://github.com/jaredhuling/personalized |
Results in a plot to check whether the propensity score has adequate overlap between treatment groups
check.overlap( x, trt, propensity.func, type = c("histogram", "density", "both"), bins = 50L, alpha = ifelse(type == "both", 0.35, 0.5) )
check.overlap( x, trt, propensity.func, type = c("histogram", "density", "both"), bins = 50L, alpha = ifelse(type == "both", 0.35, 0.5) )
x |
The design matrix (not including intercept term) |
trt |
treatment vector with each element equal to a 0 or a 1, with 1 indicating treatment status is active. |
propensity.func |
function that inputs the design matrix x and the treatment vector trt and outputs
the propensity score, ie Pr(trt = 1 | X = x). Function should take two arguments 1) x and 2) trt. See example below.
For a randomized controlled trial this can simply be a function that returns a constant equal to the proportion
of patients assigned to the treatment group, i.e.:
|
type |
Type of plot to create. Options are either a histogram ( |
bins |
integer number of bins for histograms when |
alpha |
value between 0 and 1 indicating transparency level (1 for solid, 0 for fully transparent) |
library(personalized) set.seed(123) n.obs <- 250 n.vars <- 15 x <- matrix(rnorm(n.obs * n.vars, sd = 3), n.obs, n.vars) # simulate non-randomized treatment xbetat <- 0.25 + 0.5 * x[,11] - 0.5 * x[,12] trt.prob <- exp(xbetat) / (1 + exp(xbetat)) trt01 <- rbinom(n.obs, 1, prob = trt.prob) # create function for fitting propensity score model prop.func <- function(x, trt) { # fit propensity score model propens.model <- cv.glmnet(y = trt, x = x, family = "binomial") pi.x <- predict(propens.model, s = "lambda.min", newx = x, type = "response")[,1] pi.x } check.overlap(x = x, trt = trt01, propensity.func = prop.func) # now add density plot with histogram check.overlap(x = x, trt = trt01, type = "both", propensity.func = prop.func) # simulated non-randomized treatment with multiple levels xbetat_1 <- 0.15 + 0.5 * x[,9] - 0.25 * x[,12] xbetat_2 <- 0.15 - 0.5 * x[,11] + 0.25 * x[,15] trt.1.prob <- exp(xbetat_1) / (1 + exp(xbetat_1) + exp(xbetat_2)) trt.2.prob <- exp(xbetat_2) / (1 + exp(xbetat_1) + exp(xbetat_2)) trt.3.prob <- 1 - (trt.1.prob + trt.2.prob) prob.mat <- cbind(trt.1.prob, trt.2.prob, trt.3.prob) trt <- apply(prob.mat, 1, function(rr) rmultinom(1, 1, prob = rr)) trt <- apply(trt, 2, function(rr) which(rr == 1)) # use multinomial logistic regression model with lasso penalty for propensity propensity.multinom.lasso <- function(x, trt) { if (!is.factor(trt)) trt <- as.factor(trt) gfit <- cv.glmnet(y = trt, x = x, family = "multinomial") # predict returns a matrix of probabilities: # one column for each treatment level propens <- drop(predict(gfit, newx = x, type = "response", s = "lambda.min", nfolds = 5, alpha = 0)) # return the probability corresponding to the # treatment that was observed probs <- propens[,match(levels(trt), colnames(propens))] probs } check.overlap(x = x, trt = trt, type = "histogram", propensity.func = propensity.multinom.lasso)
library(personalized) set.seed(123) n.obs <- 250 n.vars <- 15 x <- matrix(rnorm(n.obs * n.vars, sd = 3), n.obs, n.vars) # simulate non-randomized treatment xbetat <- 0.25 + 0.5 * x[,11] - 0.5 * x[,12] trt.prob <- exp(xbetat) / (1 + exp(xbetat)) trt01 <- rbinom(n.obs, 1, prob = trt.prob) # create function for fitting propensity score model prop.func <- function(x, trt) { # fit propensity score model propens.model <- cv.glmnet(y = trt, x = x, family = "binomial") pi.x <- predict(propens.model, s = "lambda.min", newx = x, type = "response")[,1] pi.x } check.overlap(x = x, trt = trt01, propensity.func = prop.func) # now add density plot with histogram check.overlap(x = x, trt = trt01, type = "both", propensity.func = prop.func) # simulated non-randomized treatment with multiple levels xbetat_1 <- 0.15 + 0.5 * x[,9] - 0.25 * x[,12] xbetat_2 <- 0.15 - 0.5 * x[,11] + 0.25 * x[,15] trt.1.prob <- exp(xbetat_1) / (1 + exp(xbetat_1) + exp(xbetat_2)) trt.2.prob <- exp(xbetat_2) / (1 + exp(xbetat_1) + exp(xbetat_2)) trt.3.prob <- 1 - (trt.1.prob + trt.2.prob) prob.mat <- cbind(trt.1.prob, trt.2.prob, trt.3.prob) trt <- apply(prob.mat, 1, function(rr) rmultinom(1, 1, prob = rr)) trt <- apply(trt, 2, function(rr) which(rr == 1)) # use multinomial logistic regression model with lasso penalty for propensity propensity.multinom.lasso <- function(x, trt) { if (!is.factor(trt)) trt <- as.factor(trt) gfit <- cv.glmnet(y = trt, x = x, family = "multinomial") # predict returns a matrix of probabilities: # one column for each treatment level propens <- drop(predict(gfit, newx = x, type = "response", s = "lambda.min", nfolds = 5, alpha = 0)) # return the probability corresponding to the # treatment that was observed probs <- propens[,match(levels(trt), colnames(propens))] probs } check.overlap(x = x, trt = trt, type = "histogram", propensity.func = propensity.multinom.lasso)
Creates an augmentation function that optionally utilizes cross-fitting
create.augmentation.function( family, crossfit = TRUE, nfolds.crossfit = 10, cv.glmnet.args = NULL )
create.augmentation.function( family, crossfit = TRUE, nfolds.crossfit = 10, cv.glmnet.args = NULL )
family |
The response type (see options in |
crossfit |
A logical value indicating whether to use cross-fitting ( |
nfolds.crossfit |
An integer specifying the number of folds to use for cross-fitting. Must be greater than 1 |
cv.glmnet.args |
A list of NAMED arguments to pass to the |
A function which can be passed to the augment.func
argument of the fit.subgroup
function.
Chernozhukov, V., Chetverikov, D., Demirer, M., Duflo, E., Hansen, C., Newey, W., & Robins, J. (2018). Double/debiased machine learning for treatment and structural parameters https://arxiv.org/abs/1608.00060
fit.subgroup
for estimating ITRs and create.propensity.function
for creation of propensity functions
library(personalized) set.seed(123) n.obs <- 500 n.vars <- 15 x <- matrix(rnorm(n.obs * n.vars, sd = 3), n.obs, n.vars) # simulate non-randomized treatment xbetat <- 0.5 + 0.5 * x[,7] - 0.5 * x[,9] trt.prob <- exp(xbetat) / (1 + exp(xbetat)) trt01 <- rbinom(n.obs, 1, prob = trt.prob) trt <- 2 * trt01 - 1 # simulate response # delta below drives treatment effect heterogeneity delta <- 2 * (0.5 + x[,2] - x[,3] - x[,11] + x[,1] * x[,12] ) xbeta <- x[,1] + x[,11] - 2 * x[,12]^2 + x[,13] + 0.5 * x[,15] ^ 2 xbeta <- xbeta + delta * trt # continuous outcomes y <- drop(xbeta) + rnorm(n.obs, sd = 2) aug.func <- create.augmentation.function(family = "gaussian", crossfit = TRUE, nfolds.crossfit = 10, cv.glmnet.args = list(type.measure = "mae", nfolds = 5)) prop.func <- create.propensity.function(crossfit = TRUE, nfolds.crossfit = 10, cv.glmnet.args = list(type.measure = "auc", nfolds = 5)) ## Not run: subgrp.model <- fit.subgroup(x = x, y = y, trt = trt01, propensity.func = prop.func, augment.func = aug.func, loss = "sq_loss_lasso", nfolds = 10) # option for cv.glmnet (for ITR estimation) summary(subgrp.model) ## End(Not run)
library(personalized) set.seed(123) n.obs <- 500 n.vars <- 15 x <- matrix(rnorm(n.obs * n.vars, sd = 3), n.obs, n.vars) # simulate non-randomized treatment xbetat <- 0.5 + 0.5 * x[,7] - 0.5 * x[,9] trt.prob <- exp(xbetat) / (1 + exp(xbetat)) trt01 <- rbinom(n.obs, 1, prob = trt.prob) trt <- 2 * trt01 - 1 # simulate response # delta below drives treatment effect heterogeneity delta <- 2 * (0.5 + x[,2] - x[,3] - x[,11] + x[,1] * x[,12] ) xbeta <- x[,1] + x[,11] - 2 * x[,12]^2 + x[,13] + 0.5 * x[,15] ^ 2 xbeta <- xbeta + delta * trt # continuous outcomes y <- drop(xbeta) + rnorm(n.obs, sd = 2) aug.func <- create.augmentation.function(family = "gaussian", crossfit = TRUE, nfolds.crossfit = 10, cv.glmnet.args = list(type.measure = "mae", nfolds = 5)) prop.func <- create.propensity.function(crossfit = TRUE, nfolds.crossfit = 10, cv.glmnet.args = list(type.measure = "auc", nfolds = 5)) ## Not run: subgrp.model <- fit.subgroup(x = x, y = y, trt = trt01, propensity.func = prop.func, augment.func = aug.func, loss = "sq_loss_lasso", nfolds = 10) # option for cv.glmnet (for ITR estimation) summary(subgrp.model) ## End(Not run)
Creates an propensity function that optionally utilizes cross-fitting
create.propensity.function( crossfit = TRUE, nfolds.crossfit = 10, cv.glmnet.args = NULL )
create.propensity.function( crossfit = TRUE, nfolds.crossfit = 10, cv.glmnet.args = NULL )
crossfit |
A logical value indicating whether to use cross-fitting ( |
nfolds.crossfit |
An integer specifying the number of folds to use for cross-fitting. Must be greater than 1 |
cv.glmnet.args |
A list of NAMED arguments to pass to the |
A function which can be passed to the augment.func
argument of the fit.subgroup
function.
Chernozhukov, V., Chetverikov, D., Demirer, M., Duflo, E., Hansen, C., Newey, W., & Robins, J. (2018). Double/debiased machine learning for treatment and structural parameters https://arxiv.org/abs/1608.00060
fit.subgroup
for estimating ITRs and create.propensity.function
for creation of propensity functions
library(personalized) set.seed(123) n.obs <- 500 n.vars <- 15 x <- matrix(rnorm(n.obs * n.vars, sd = 3), n.obs, n.vars) # simulate non-randomized treatment xbetat <- 0.5 + 0.5 * x[,7] - 0.5 * x[,9] trt.prob <- exp(xbetat) / (1 + exp(xbetat)) trt01 <- rbinom(n.obs, 1, prob = trt.prob) trt <- 2 * trt01 - 1 # simulate response # delta below drives treatment effect heterogeneity delta <- 2 * (0.5 + x[,2] - x[,3] - x[,11] + x[,1] * x[,12] ) xbeta <- x[,1] + x[,11] - 2 * x[,12]^2 + x[,13] + 0.5 * x[,15] ^ 2 xbeta <- xbeta + delta * trt # continuous outcomes y <- drop(xbeta) + rnorm(n.obs, sd = 2) aug.func <- create.augmentation.function(family = "gaussian", crossfit = TRUE, nfolds.crossfit = 10, cv.glmnet.args = list(type.measure = "mae", nfolds = 5)) prop.func <- create.propensity.function(crossfit = TRUE, nfolds.crossfit = 10, cv.glmnet.args = list(type.measure = "mae", nfolds = 5)) subgrp.model <- fit.subgroup(x = x, y = y, trt = trt01, propensity.func = prop.func, augment.func = aug.func, loss = "sq_loss_lasso", nfolds = 10) # option for cv.glmnet (for ITR estimation) summary(subgrp.model)
library(personalized) set.seed(123) n.obs <- 500 n.vars <- 15 x <- matrix(rnorm(n.obs * n.vars, sd = 3), n.obs, n.vars) # simulate non-randomized treatment xbetat <- 0.5 + 0.5 * x[,7] - 0.5 * x[,9] trt.prob <- exp(xbetat) / (1 + exp(xbetat)) trt01 <- rbinom(n.obs, 1, prob = trt.prob) trt <- 2 * trt01 - 1 # simulate response # delta below drives treatment effect heterogeneity delta <- 2 * (0.5 + x[,2] - x[,3] - x[,11] + x[,1] * x[,12] ) xbeta <- x[,1] + x[,11] - 2 * x[,12]^2 + x[,13] + 0.5 * x[,15] ^ 2 xbeta <- xbeta + delta * trt # continuous outcomes y <- drop(xbeta) + rnorm(n.obs, sd = 2) aug.func <- create.augmentation.function(family = "gaussian", crossfit = TRUE, nfolds.crossfit = 10, cv.glmnet.args = list(type.measure = "mae", nfolds = 5)) prop.func <- create.propensity.function(crossfit = TRUE, nfolds.crossfit = 10, cv.glmnet.args = list(type.measure = "mae", nfolds = 5)) subgrp.model <- fit.subgroup(x = x, y = y, trt = trt01, propensity.func = prop.func, augment.func = aug.func, loss = "sq_loss_lasso", nfolds = 10) # option for cv.glmnet (for ITR estimation) summary(subgrp.model)
Fits subgroup identification model class of Chen, et al (2017)
fit.subgroup( x, y, trt, propensity.func = NULL, loss = c("sq_loss_lasso", "logistic_loss_lasso", "poisson_loss_lasso", "cox_loss_lasso", "owl_logistic_loss_lasso", "owl_logistic_flip_loss_lasso", "owl_hinge_loss", "owl_hinge_flip_loss", "sq_loss_lasso_gam", "poisson_loss_lasso_gam", "logistic_loss_lasso_gam", "sq_loss_gam", "poisson_loss_gam", "logistic_loss_gam", "owl_logistic_loss_gam", "owl_logistic_flip_loss_gam", "owl_logistic_loss_lasso_gam", "owl_logistic_flip_loss_lasso_gam", "sq_loss_xgboost", "custom"), method = c("weighting", "a_learning"), match.id = NULL, augment.func = NULL, fit.custom.loss = NULL, cutpoint = 0, larger.outcome.better = TRUE, reference.trt = NULL, retcall = TRUE, ... )
fit.subgroup( x, y, trt, propensity.func = NULL, loss = c("sq_loss_lasso", "logistic_loss_lasso", "poisson_loss_lasso", "cox_loss_lasso", "owl_logistic_loss_lasso", "owl_logistic_flip_loss_lasso", "owl_hinge_loss", "owl_hinge_flip_loss", "sq_loss_lasso_gam", "poisson_loss_lasso_gam", "logistic_loss_lasso_gam", "sq_loss_gam", "poisson_loss_gam", "logistic_loss_gam", "owl_logistic_loss_gam", "owl_logistic_flip_loss_gam", "owl_logistic_loss_lasso_gam", "owl_logistic_flip_loss_lasso_gam", "sq_loss_xgboost", "custom"), method = c("weighting", "a_learning"), match.id = NULL, augment.func = NULL, fit.custom.loss = NULL, cutpoint = 0, larger.outcome.better = TRUE, reference.trt = NULL, retcall = TRUE, ... )
x |
The design matrix (not including intercept term) |
y |
The response vector |
trt |
treatment vector with each element equal to a 0 or a 1, with 1 indicating treatment status is active. |
propensity.func |
function that inputs the design matrix x and the treatment vector trt and outputs
the propensity score, ie Pr(trt = 1 | X = x). Function should take two arguments 1) x and 2) trt. See example below.
For a randomized controlled trial this can simply be a function that returns a constant equal to the proportion
of patients assigned to the treatment group, i.e.:
|
loss |
choice of both the M function from Chen, et al (2017) and potentially the penalty used for variable selection.
All
|
method |
subgroup ID model type. Either the weighting or A-learning method of Chen et al, (2017) |
match.id |
a (character, factor, or integer) vector with length equal to the number of observations in |
augment.func |
function which inputs the response Example 1: Example 2: augment.func <- function(x, y, trt) { data <- data.frame(x, y, trt) lmod <- lm(y ~ x * trt) ## get predictions when trt = 1 data$trt <- 1 preds_1 <- predict(lmod, data) ## get predictions when trt = -1 data$trt <- -1 preds_n1 <- predict(lmod, data) ## return predictions averaged over trt return(0.5 * (preds_1 + preds_n1)) } For binary and time-to-event outcomes, make sure that predictions are returned on the scale of the predictors Example 3: augment.func <- function(x, y) { bmod <- glm(y ~ x, family = binomial()) return(predict(bmod, type = "link")) } |
fit.custom.loss |
A function which minimizes a user-specified
custom loss function M(y,v) to be used in model fitting.
If provided, The loss function
An example of a valid loss function is The provided function MUST return a list with the following elements:
The provided function MUST be a function with the following arguments:
The provided function can also optionally take the following arguments:
Example 1: Here we minimize fit.custom.loss <- function(x, y, weights, ...) { df <- data.frame(y = y, x) # minimize squared error loss with NO lasso penalty lmf <- lm(y ~ x - 1, weights = weights, data = df, ...) # save coefficients cfs = coef(lmf) # create prediction function. Notice # how a column of 1's is appended # to ensure treatment main effects are included # in predictions prd = function(x, type = "response") { dfte <- cbind(1, x) colnames(dfte) <- names(cfs) predict(lmf, data.frame(dfte)) } # return lost of required components list(predict = prd, model = lmf, coefficients = cfs) } Example 2: fit.expo.loss <- function(x, y, weights, ...) { ## define loss function to be minimized expo.loss <- function(beta, x, y, weights) { sum(weights * y * exp(-drop(tcrossprod(x, t(beta) ))) } # use optim() to minimize loss function opt <- optim(rep(0, NCOL(x)), fn = expo.loss, x = x, y = y, weights = weights) coefs <- opt$par pred <- function(x, type = "response") { tcrossprod(cbind(1, x), t(coefs)) } # return list of required components list(predict = pred, model = opt, coefficients = coefs) } |
cutpoint |
numeric value for patients with benefit scores above which
(or below which if |
larger.outcome.better |
boolean value of whether a larger outcome is better/preferable. Set to |
reference.trt |
which treatment should be treated as the reference treatment. Defaults to the first level of |
retcall |
boolean value. if |
... |
options to be passed to underlying fitting function. For all For all |
An object of class "subgroup_fitted"
.
predict |
A function that returns predictions of the covariate-conditional treatment effects |
model |
An object returned by the underlying fitting function used. For example, if the lasso use used to fit
the underlying subgroup identification model, this will be an object returned by |
coefficients |
If the underlying subgroup identification model is parametric, |
call |
The call that produced the returned object. If |
family |
The family corresponding to the outcome provided |
loss |
The loss function used |
method |
The method used (either weighting or A-learning) |
propensity.func |
The propensity score function used |
larger.outcome.better |
If larger outcomes are preferred for this model |
cutpoint |
Benefit score cutoff value used for determining subgroups |
var.names |
The names of all variables used |
n.trts |
The number of treatment levels |
comparison.trts |
All treatment levels other than the reference level |
reference.trt |
The reference level for the treatment. This should usually be the control group/level |
trts |
All treatment levels |
trt.received |
The vector of treatment assignments |
pi.x |
A vector of propensity scores |
y |
A vector of outcomes |
benefit.scores |
A vector of conditional treatment effects, i.e. benefit scores |
recommended.trts |
A vector of treatment recommendations (i.e. for each patient, which treatment results in the best expected potential outcomes) |
subgroup.trt.effects |
(Biased) estimates of the conditional treatment effects and conditional outcomes. These are essentially just empirical averages within different combinations of treatment assignments and treatment recommendations |
individual.trt.effects |
estimates of the individual treatment effects as returned by
|
Huling. J.D. and Yu, M. (2021), Subgroup Identification Using the personalized Package. Journal of Statistical Software 98(5), 1-60. doi:10.18637/jss.v098.i05
Chen, S., Tian, L., Cai, T. and Yu, M. (2017), A general statistical framework for subgroup identification and comparative treatment scoring. Biometrics. doi:10.1111/biom.12676 doi:10.1111/biom.12676
Xu, Y., Yu, M., Zhao, Y. Q., Li, Q., Wang, S., & Shao, J. (2015), Regularized outcome weighted subgroup identification for differential treatment effects. Biometrics, 71(3), 645-653. doi: 10.1111/biom.12322 doi:10.1111/biom.12322
Zhao, Y., Zeng, D., Rush, A. J., & Kosorok, M. R. (2012), Estimating individualized treatment rules using outcome weighted learning. Journal of the American Statistical Association, 107(499), 1106-1118. doi: 10.1080/01621459.2012.695674
validate.subgroup
for function which creates validation results for subgroup
identification models, predict.subgroup_fitted
for a prediction function for fitted models
from fit.subgroup
, plot.subgroup_fitted
for a function which plots
results from fitted models, and print.subgroup_fitted
for arguments for printing options for fit.subgroup()
.
from fit.subgroup
.
library(personalized) set.seed(123) n.obs <- 500 n.vars <- 15 x <- matrix(rnorm(n.obs * n.vars, sd = 3), n.obs, n.vars) # simulate non-randomized treatment xbetat <- 0.5 + 0.5 * x[,7] - 0.5 * x[,9] trt.prob <- exp(xbetat) / (1 + exp(xbetat)) trt01 <- rbinom(n.obs, 1, prob = trt.prob) trt <- 2 * trt01 - 1 # simulate response # delta below drives treatment effect heterogeneity delta <- 2 * (0.5 + x[,2] - x[,3] - x[,11] + x[,1] * x[,12] ) xbeta <- x[,1] + x[,11] - 2 * x[,12]^2 + x[,13] + 0.5 * x[,15] ^ 2 xbeta <- xbeta + delta * trt # continuous outcomes y <- drop(xbeta) + rnorm(n.obs, sd = 2) # binary outcomes y.binary <- 1 * (xbeta + rnorm(n.obs, sd = 2) > 0 ) # count outcomes y.count <- round(abs(xbeta + rnorm(n.obs, sd = 2))) # time-to-event outcomes surv.time <- exp(-20 - xbeta + rnorm(n.obs, sd = 1)) cens.time <- exp(rnorm(n.obs, sd = 3)) y.time.to.event <- pmin(surv.time, cens.time) status <- 1 * (surv.time <= cens.time) # create function for fitting propensity score model prop.func <- function(x, trt) { # fit propensity score model propens.model <- cv.glmnet(y = trt, x = x, family = "binomial") pi.x <- predict(propens.model, s = "lambda.min", newx = x, type = "response")[,1] pi.x } #################### Continuous outcomes ################################ subgrp.model <- fit.subgroup(x = x, y = y, trt = trt01, propensity.func = prop.func, loss = "sq_loss_lasso", # option for cv.glmnet, # better to use 'nfolds=10' nfolds = 3) summary(subgrp.model) # estimates of the individual-specific # treatment effect estimates: subgrp.model$individual.trt.effects # fit lasso + gam model with REML option for gam subgrp.modelg <- fit.subgroup(x = x, y = y, trt = trt01, propensity.func = prop.func, loss = "sq_loss_lasso_gam", method.gam = "REML", # option for gam nfolds = 5) # option for cv.glmnet subgrp.modelg #################### Using an augmentation function ##################### ## augmentation funcions involve modeling the conditional mean E[Y|T, X] ## and returning predictions that are averaged over the treatment values ## return <- 1/2 * (hat{E}[Y|T=1, X] + hat{E}[Y|T=-1, X]) ########################################################################## augment.func <- function(x, y, trt) { data <- data.frame(x, y, trt) xm <- model.matrix(y~trt*x-1, data = data) lmod <- cv.glmnet(y = y, x = xm) ## get predictions when trt = 1 data$trt <- 1 xm <- model.matrix(y~trt*x-1, data = data) preds_1 <- predict(lmod, xm, s = "lambda.min") ## get predictions when trt = -1 data$trt <- -1 xm <- model.matrix(y~trt*x-1, data = data) preds_n1 <- predict(lmod, xm, s = "lambda.min") ## return predictions averaged over trt return(0.5 * (preds_1 + preds_n1)) } subgrp.model.aug <- fit.subgroup(x = x, y = y, trt = trt01, propensity.func = prop.func, augment.func = augment.func, loss = "sq_loss_lasso", # option for cv.glmnet, # better to use 'nfolds=10' nfolds = 3) # option for cv.glmnet summary(subgrp.model.aug) #################### Binary outcomes #################################### # use logistic loss for binary outcomes subgrp.model.bin <- fit.subgroup(x = x, y = y.binary, trt = trt01, propensity.func = prop.func, loss = "logistic_loss_lasso", type.measure = "auc", # option for cv.glmnet nfolds = 3) # option for cv.glmnet subgrp.model.bin #################### Count outcomes ##################################### # use poisson loss for count/poisson outcomes subgrp.model.poisson <- fit.subgroup(x = x, y = y.count, trt = trt01, propensity.func = prop.func, loss = "poisson_loss_lasso", type.measure = "mse", # option for cv.glmnet nfolds = 3) # option for cv.glmnet subgrp.model.poisson #################### Time-to-event outcomes ############################# library(survival) subgrp.model.cox <- fit.subgroup(x = x, y = Surv(y.time.to.event, status), trt = trt01, propensity.func = prop.func, loss = "cox_loss_lasso", nfolds = 3) # option for cv.glmnet subgrp.model.cox #################### Using custom loss functions ######################## ## Use custom loss function for binary outcomes fit.custom.loss.bin <- function(x, y, weights, offset, ...) { df <- data.frame(y = y, x) # minimize logistic loss with NO lasso penalty # with allowance for efficiency augmentation glmf <- glm(y ~ x - 1, weights = weights, offset = offset, # offset term allows for efficiency augmentation family = binomial(), ...) # save coefficients cfs = coef(glmf) # create prediction function. prd = function(x, type = "response") { dfte <- cbind(1, x) colnames(dfte) <- names(cfs) ## predictions must be returned on the scale ## of the linear predictor predict(glmf, data.frame(dfte), type = "link") } # return lost of required components list(predict = prd, model = glmf, coefficients = cfs) } subgrp.model.bin.cust <- fit.subgroup(x = x, y = y.binary, trt = trt01, propensity.func = prop.func, fit.custom.loss = fit.custom.loss.bin) subgrp.model.bin.cust ## try exponential loss for ## positive outcomes fit.expo.loss <- function(x, y, weights, ...) { expo.loss <- function(beta, x, y, weights) { sum(weights * y * exp(-drop(x %*% beta))) } # use optim() to minimize loss function opt <- optim(rep(0, NCOL(x)), fn = expo.loss, x = x, y = y, weights = weights) coefs <- opt$par pred <- function(x, type = "response") { tcrossprod(cbind(1, x), t(coefs)) } # return list of required components list(predict = pred, model = opt, coefficients = coefs) } # use exponential loss for positive outcomes subgrp.model.expo <- fit.subgroup(x = x, y = y.count, trt = trt01, propensity.func = prop.func, fit.custom.loss = fit.expo.loss) subgrp.model.expo
library(personalized) set.seed(123) n.obs <- 500 n.vars <- 15 x <- matrix(rnorm(n.obs * n.vars, sd = 3), n.obs, n.vars) # simulate non-randomized treatment xbetat <- 0.5 + 0.5 * x[,7] - 0.5 * x[,9] trt.prob <- exp(xbetat) / (1 + exp(xbetat)) trt01 <- rbinom(n.obs, 1, prob = trt.prob) trt <- 2 * trt01 - 1 # simulate response # delta below drives treatment effect heterogeneity delta <- 2 * (0.5 + x[,2] - x[,3] - x[,11] + x[,1] * x[,12] ) xbeta <- x[,1] + x[,11] - 2 * x[,12]^2 + x[,13] + 0.5 * x[,15] ^ 2 xbeta <- xbeta + delta * trt # continuous outcomes y <- drop(xbeta) + rnorm(n.obs, sd = 2) # binary outcomes y.binary <- 1 * (xbeta + rnorm(n.obs, sd = 2) > 0 ) # count outcomes y.count <- round(abs(xbeta + rnorm(n.obs, sd = 2))) # time-to-event outcomes surv.time <- exp(-20 - xbeta + rnorm(n.obs, sd = 1)) cens.time <- exp(rnorm(n.obs, sd = 3)) y.time.to.event <- pmin(surv.time, cens.time) status <- 1 * (surv.time <= cens.time) # create function for fitting propensity score model prop.func <- function(x, trt) { # fit propensity score model propens.model <- cv.glmnet(y = trt, x = x, family = "binomial") pi.x <- predict(propens.model, s = "lambda.min", newx = x, type = "response")[,1] pi.x } #################### Continuous outcomes ################################ subgrp.model <- fit.subgroup(x = x, y = y, trt = trt01, propensity.func = prop.func, loss = "sq_loss_lasso", # option for cv.glmnet, # better to use 'nfolds=10' nfolds = 3) summary(subgrp.model) # estimates of the individual-specific # treatment effect estimates: subgrp.model$individual.trt.effects # fit lasso + gam model with REML option for gam subgrp.modelg <- fit.subgroup(x = x, y = y, trt = trt01, propensity.func = prop.func, loss = "sq_loss_lasso_gam", method.gam = "REML", # option for gam nfolds = 5) # option for cv.glmnet subgrp.modelg #################### Using an augmentation function ##################### ## augmentation funcions involve modeling the conditional mean E[Y|T, X] ## and returning predictions that are averaged over the treatment values ## return <- 1/2 * (hat{E}[Y|T=1, X] + hat{E}[Y|T=-1, X]) ########################################################################## augment.func <- function(x, y, trt) { data <- data.frame(x, y, trt) xm <- model.matrix(y~trt*x-1, data = data) lmod <- cv.glmnet(y = y, x = xm) ## get predictions when trt = 1 data$trt <- 1 xm <- model.matrix(y~trt*x-1, data = data) preds_1 <- predict(lmod, xm, s = "lambda.min") ## get predictions when trt = -1 data$trt <- -1 xm <- model.matrix(y~trt*x-1, data = data) preds_n1 <- predict(lmod, xm, s = "lambda.min") ## return predictions averaged over trt return(0.5 * (preds_1 + preds_n1)) } subgrp.model.aug <- fit.subgroup(x = x, y = y, trt = trt01, propensity.func = prop.func, augment.func = augment.func, loss = "sq_loss_lasso", # option for cv.glmnet, # better to use 'nfolds=10' nfolds = 3) # option for cv.glmnet summary(subgrp.model.aug) #################### Binary outcomes #################################### # use logistic loss for binary outcomes subgrp.model.bin <- fit.subgroup(x = x, y = y.binary, trt = trt01, propensity.func = prop.func, loss = "logistic_loss_lasso", type.measure = "auc", # option for cv.glmnet nfolds = 3) # option for cv.glmnet subgrp.model.bin #################### Count outcomes ##################################### # use poisson loss for count/poisson outcomes subgrp.model.poisson <- fit.subgroup(x = x, y = y.count, trt = trt01, propensity.func = prop.func, loss = "poisson_loss_lasso", type.measure = "mse", # option for cv.glmnet nfolds = 3) # option for cv.glmnet subgrp.model.poisson #################### Time-to-event outcomes ############################# library(survival) subgrp.model.cox <- fit.subgroup(x = x, y = Surv(y.time.to.event, status), trt = trt01, propensity.func = prop.func, loss = "cox_loss_lasso", nfolds = 3) # option for cv.glmnet subgrp.model.cox #################### Using custom loss functions ######################## ## Use custom loss function for binary outcomes fit.custom.loss.bin <- function(x, y, weights, offset, ...) { df <- data.frame(y = y, x) # minimize logistic loss with NO lasso penalty # with allowance for efficiency augmentation glmf <- glm(y ~ x - 1, weights = weights, offset = offset, # offset term allows for efficiency augmentation family = binomial(), ...) # save coefficients cfs = coef(glmf) # create prediction function. prd = function(x, type = "response") { dfte <- cbind(1, x) colnames(dfte) <- names(cfs) ## predictions must be returned on the scale ## of the linear predictor predict(glmf, data.frame(dfte), type = "link") } # return lost of required components list(predict = prd, model = glmf, coefficients = cfs) } subgrp.model.bin.cust <- fit.subgroup(x = x, y = y.binary, trt = trt01, propensity.func = prop.func, fit.custom.loss = fit.custom.loss.bin) subgrp.model.bin.cust ## try exponential loss for ## positive outcomes fit.expo.loss <- function(x, y, weights, ...) { expo.loss <- function(beta, x, y, weights) { sum(weights * y * exp(-drop(x %*% beta))) } # use optim() to minimize loss function opt <- optim(rep(0, NCOL(x)), fn = expo.loss, x = x, y = y, weights = weights) coefs <- opt$par pred <- function(x, type = "response") { tcrossprod(cbind(1, x), t(coefs)) } # return list of required components list(predict = pred, model = opt, coefficients = coefs) } # use exponential loss for positive outcomes subgrp.model.expo <- fit.subgroup(x = x, y = y.count, trt = trt01, propensity.func = prop.func, fit.custom.loss = fit.expo.loss) subgrp.model.expo
The LaLonde dataset comes from the National Supported Work Study, which sought to evaluate the effectiveness of an employment trainining program on wage increases.
LaLonde
LaLonde
A data frame with 722 observations and 12 variables:
whether earnings in 1978 are larger than in 1975; 1 for yes, 0 for no
whether the individual received the treatment; "Yes" or "No"
age in years
education in years
black or not; factor with levels "Yes" or "No"
hispanic or not; factor with levels "Yes" or "No"
white or not; factor with levels "Yes" or "No"
married or not; factor with levels "Yes" or "No"
No high school degree; factor with levels "Yes" (for no HS degree) or "No"
log of earnings in 1975
unemployed in 1975; factor with levels "Yes" or "No"
extrapolation weights to the 1978 Panel Study for Income Dynamics dataset
The National Supported Work Study.
LaLonde, R.J. 1986. "Evaluating the econometric evaulations of training programs with experimental data." American Economic Review, Vol.76, No.4, pp. 604-620.
Egami N, Ratkovic M, Imai K (2017). "FindIt: Finding Heterogeneous Treatment Effects." R
package version 1.1.2, https://CRAN.R-project.org/package=FindIt.
data(LaLonde) y <- LaLonde$outcome trt <- LaLonde$treat x.varnames <- c("age", "educ", "black", "hisp", "white", "marr", "nodegr", "log.re75", "u75") # covariates data.x <- LaLonde[, x.varnames] # construct design matrix (with no intercept) x <- model.matrix(~ -1 + ., data = data.x) const.propens <- function(x, trt) { mean.trt <- mean(trt == "Trt") rep(mean.trt, length(trt)) } subgrp_fit_w <- fit.subgroup(x = x, y = y, trt = trt, loss = "logistic_loss_lasso", propensity.func = const.propens, cutpoint = 0, type.measure = "auc", nfolds = 10) summary(subgrp_fit_w)
data(LaLonde) y <- LaLonde$outcome trt <- LaLonde$treat x.varnames <- c("age", "educ", "black", "hisp", "white", "marr", "nodegr", "log.re75", "u75") # covariates data.x <- LaLonde[, x.varnames] # construct design matrix (with no intercept) x <- model.matrix(~ -1 + ., data = data.x) const.propens <- function(x, trt) { mean.trt <- mean(trt == "Trt") rep(mean.trt, length(trt)) } subgrp_fit_w <- fit.subgroup(x = x, y = y, trt = trt, loss = "logistic_loss_lasso", propensity.func = const.propens, cutpoint = 0, type.measure = "auc", nfolds = 10) summary(subgrp_fit_w)
Plots results for estimated subgroup treatment effects
Plots validation results for estimated subgroup treatment effects
## S3 method for class 'subgroup_fitted' plot( x, type = c("boxplot", "density", "interaction", "conditional"), avg.line = TRUE, ... ) ## S3 method for class 'subgroup_validated' plot( x, type = c("boxplot", "density", "interaction", "conditional", "stability"), avg.line = TRUE, ... )
## S3 method for class 'subgroup_fitted' plot( x, type = c("boxplot", "density", "interaction", "conditional"), avg.line = TRUE, ... ) ## S3 method for class 'subgroup_validated' plot( x, type = c("boxplot", "density", "interaction", "conditional", "stability"), avg.line = TRUE, ... )
x |
fitted object returned by |
type |
type of plot. |
avg.line |
boolean value of whether or not to plot a line for the average
value in addition to the density (only valid for |
... |
not used |
fit.subgroup
for function which fits subgroup identification models.
validate.subgroup
for function which creates validation results
and fit.subgroup
for function which fits subgroup identification models.
library(personalized) set.seed(123) n.obs <- 250 n.vars <- 15 x <- matrix(rnorm(n.obs * n.vars, sd = 3), n.obs, n.vars) # simulate non-randomized treatment xbetat <- 0.5 + 0.5 * x[,11] - 0.5 * x[,13] trt.prob <- exp(xbetat) / (1 + exp(xbetat)) trt01 <- rbinom(n.obs, 1, prob = trt.prob) trt <- 2 * trt01 - 1 # simulate response delta <- 2 * (0.5 + x[,2] - x[,3] - x[,11] + x[,1] * x[,12]) xbeta <- x[,1] + x[,11] - 2 * x[,12]^2 + x[,13] xbeta <- xbeta + delta * trt # continuous outcomes y <- drop(xbeta) + rnorm(n.obs, sd = 2) # create function for fitting propensity score model prop.func <- function(x, trt) { # fit propensity score model propens.model <- cv.glmnet(y = trt, x = x, family = "binomial") pi.x <- predict(propens.model, s = "lambda.min", newx = x, type = "response")[,1] pi.x } subgrp.model <- fit.subgroup(x = x, y = y, trt = trt01, propensity.func = prop.func, loss = "sq_loss_lasso", # option for cv.glmnet, # better to use 'nfolds=10' nfolds = 3) # option for cv.glmnet subgrp.model$subgroup.trt.effects plot(subgrp.model) plot(subgrp.model, type = "boxplot") plot(subgrp.model, type = "interaction") plot(subgrp.model, type = "conditional") valmod <- validate.subgroup(subgrp.model, B = 3, method = "training_test", benefit.score.quantiles = c(0.25, 0.5, 0.75), train.fraction = 0.75) plot(valmod) plot(valmod, type = "interaction") # see how summary statistics of subgroups change # when the subgroups are defined based on different cutoffs # (25th quantile of bene score, 50th, and 75th) plot(valmod, type = "conditional") # visualize the frequency of particular variables # of being selected across the resampling iterations with # 'type = "stability"' # not run: # plot(valmod, type = "stability")
library(personalized) set.seed(123) n.obs <- 250 n.vars <- 15 x <- matrix(rnorm(n.obs * n.vars, sd = 3), n.obs, n.vars) # simulate non-randomized treatment xbetat <- 0.5 + 0.5 * x[,11] - 0.5 * x[,13] trt.prob <- exp(xbetat) / (1 + exp(xbetat)) trt01 <- rbinom(n.obs, 1, prob = trt.prob) trt <- 2 * trt01 - 1 # simulate response delta <- 2 * (0.5 + x[,2] - x[,3] - x[,11] + x[,1] * x[,12]) xbeta <- x[,1] + x[,11] - 2 * x[,12]^2 + x[,13] xbeta <- xbeta + delta * trt # continuous outcomes y <- drop(xbeta) + rnorm(n.obs, sd = 2) # create function for fitting propensity score model prop.func <- function(x, trt) { # fit propensity score model propens.model <- cv.glmnet(y = trt, x = x, family = "binomial") pi.x <- predict(propens.model, s = "lambda.min", newx = x, type = "response")[,1] pi.x } subgrp.model <- fit.subgroup(x = x, y = y, trt = trt01, propensity.func = prop.func, loss = "sq_loss_lasso", # option for cv.glmnet, # better to use 'nfolds=10' nfolds = 3) # option for cv.glmnet subgrp.model$subgroup.trt.effects plot(subgrp.model) plot(subgrp.model, type = "boxplot") plot(subgrp.model, type = "interaction") plot(subgrp.model, type = "conditional") valmod <- validate.subgroup(subgrp.model, B = 3, method = "training_test", benefit.score.quantiles = c(0.25, 0.5, 0.75), train.fraction = 0.75) plot(valmod) plot(valmod, type = "interaction") # see how summary statistics of subgroups change # when the subgroups are defined based on different cutoffs # (25th quantile of bene score, 50th, and 75th) plot(valmod, type = "conditional") # visualize the frequency of particular variables # of being selected across the resampling iterations with # 'type = "stability"' # not run: # plot(valmod, type = "stability")
Plots comparison of results for estimated subgroup treatment effects
plotCompare( ..., type = c("boxplot", "density", "interaction", "conditional"), avg.line = TRUE )
plotCompare( ..., type = c("boxplot", "density", "interaction", "conditional"), avg.line = TRUE )
... |
the fitted (model or validation) objects to be plotted. Must be either
objects returned from |
type |
type of plot. |
avg.line |
boolean value of whether or not to plot a line for the average
value in addition to the density (only valid for |
fit.subgroup
for function which fits subgroup identification models and
validate.subgroup
for function which creates validation results.
library(personalized) set.seed(123) n.obs <- 100 n.vars <- 15 x <- matrix(rnorm(n.obs * n.vars, sd = 3), n.obs, n.vars) # simulate non-randomized treatment xbetat <- 0.5 + 0.5 * x[,1] - 0.5 * x[,4] trt.prob <- exp(xbetat) / (1 + exp(xbetat)) trt01 <- rbinom(n.obs, 1, prob = trt.prob) trt <- 2 * trt01 - 1 # simulate response delta <- 2 * (0.5 + x[,2] - x[,3] - x[,11] + x[,1] * x[,12]) xbeta <- x[,1] + x[,11] - 2 * x[,12]^2 + x[,13] xbeta <- xbeta + delta * trt # continuous outcomes y <- drop(xbeta) + rnorm(n.obs, sd = 2) # create function for fitting propensity score model prop.func <- function(x, trt) { # fit propensity score model propens.model <- cv.glmnet(y = trt, x = x, family = "binomial") pi.x <- predict(propens.model, s = "lambda.min", newx = x, type = "response")[,1] pi.x } subgrp.model <- fit.subgroup(x = x, y = y, trt = trt01, propensity.func = prop.func, loss = "sq_loss_lasso", # option for cv.glmnet, # better to use 'nfolds=10' nfolds = 3) # option for cv.glmnet subgrp.model.o <- fit.subgroup(x = x, y = y, trt = trt01, propensity.func = prop.func, # option for cv.glmnet, # better to use 'nfolds=10' loss = "owl_logistic_flip_loss_lasso", nfolds = 3) plotCompare(subgrp.model, subgrp.model.o)
library(personalized) set.seed(123) n.obs <- 100 n.vars <- 15 x <- matrix(rnorm(n.obs * n.vars, sd = 3), n.obs, n.vars) # simulate non-randomized treatment xbetat <- 0.5 + 0.5 * x[,1] - 0.5 * x[,4] trt.prob <- exp(xbetat) / (1 + exp(xbetat)) trt01 <- rbinom(n.obs, 1, prob = trt.prob) trt <- 2 * trt01 - 1 # simulate response delta <- 2 * (0.5 + x[,2] - x[,3] - x[,11] + x[,1] * x[,12]) xbeta <- x[,1] + x[,11] - 2 * x[,12]^2 + x[,13] xbeta <- xbeta + delta * trt # continuous outcomes y <- drop(xbeta) + rnorm(n.obs, sd = 2) # create function for fitting propensity score model prop.func <- function(x, trt) { # fit propensity score model propens.model <- cv.glmnet(y = trt, x = x, family = "binomial") pi.x <- predict(propens.model, s = "lambda.min", newx = x, type = "response")[,1] pi.x } subgrp.model <- fit.subgroup(x = x, y = y, trt = trt01, propensity.func = prop.func, loss = "sq_loss_lasso", # option for cv.glmnet, # better to use 'nfolds=10' nfolds = 3) # option for cv.glmnet subgrp.model.o <- fit.subgroup(x = x, y = y, trt = trt01, propensity.func = prop.func, # option for cv.glmnet, # better to use 'nfolds=10' loss = "owl_logistic_flip_loss_lasso", nfolds = 3) plotCompare(subgrp.model, subgrp.model.o)
Predicts benefit scores or treatment recommendations based on a fitted subgroup identification model
Function to obtain predictions for weighted ksvm objects
## S3 method for class 'subgroup_fitted' predict( object, newx, type = c("benefit.score", "trt.group"), cutpoint = 0, ... ) ## S3 method for class 'wksvm' predict(object, newx, type = c("class", "linear.predictor"), ...)
## S3 method for class 'subgroup_fitted' predict( object, newx, type = c("benefit.score", "trt.group"), cutpoint = 0, ... ) ## S3 method for class 'wksvm' predict(object, newx, type = c("class", "linear.predictor"), ...)
object |
fitted object returned by For |
newx |
new design matrix for which predictions will be made |
type |
type of prediction. For |
cutpoint |
numeric value for patients with benefit scores above which
(or below which if |
... |
not used |
fit.subgroup
for function which fits subgroup identification models.
weighted.ksvm
for fitting weighted.ksvm
objects
library(personalized) set.seed(123) n.obs <- 500 n.vars <- 15 x <- matrix(rnorm(n.obs * n.vars, sd = 3), n.obs, n.vars) # simulate non-randomized treatment xbetat <- 0.5 + 0.5 * x[,11] - 0.5 * x[,3] trt.prob <- exp(xbetat) / (1 + exp(xbetat)) trt01 <- rbinom(n.obs, 1, prob = trt.prob) trt <- 2 * trt01 - 1 # simulate response delta <- 2 * (0.5 + x[,2] - x[,3] - x[,11] + x[,1] * x[,12]) xbeta <- x[,1] + x[,11] - 2 * x[,12]^2 + x[,13] xbeta <- xbeta + delta * trt # continuous outcomes y <- drop(xbeta) + rnorm(n.obs, sd = 2) # create function for fitting propensity score model prop.func <- function(x, trt) { # fit propensity score model propens.model <- cv.glmnet(y = trt, x = x, family = "binomial") pi.x <- predict(propens.model, s = "lambda.min", newx = x, type = "response")[,1] pi.x } subgrp.model <- fit.subgroup(x = x, y = y, trt = trt01, propensity.func = prop.func, loss = "sq_loss_lasso", nfolds = 3) # option for cv.glmnet subgrp.model$subgroup.trt.effects benefit.scores <- predict(subgrp.model, newx = x, type = "benefit.score") rec.trt.grp <- predict(subgrp.model, newx = x, type = "trt.group")
library(personalized) set.seed(123) n.obs <- 500 n.vars <- 15 x <- matrix(rnorm(n.obs * n.vars, sd = 3), n.obs, n.vars) # simulate non-randomized treatment xbetat <- 0.5 + 0.5 * x[,11] - 0.5 * x[,3] trt.prob <- exp(xbetat) / (1 + exp(xbetat)) trt01 <- rbinom(n.obs, 1, prob = trt.prob) trt <- 2 * trt01 - 1 # simulate response delta <- 2 * (0.5 + x[,2] - x[,3] - x[,11] + x[,1] * x[,12]) xbeta <- x[,1] + x[,11] - 2 * x[,12]^2 + x[,13] xbeta <- xbeta + delta * trt # continuous outcomes y <- drop(xbeta) + rnorm(n.obs, sd = 2) # create function for fitting propensity score model prop.func <- function(x, trt) { # fit propensity score model propens.model <- cv.glmnet(y = trt, x = x, family = "binomial") pi.x <- predict(propens.model, s = "lambda.min", newx = x, type = "response")[,1] pi.x } subgrp.model <- fit.subgroup(x = x, y = y, trt = trt01, propensity.func = prop.func, loss = "sq_loss_lasso", nfolds = 3) # option for cv.glmnet subgrp.model$subgroup.trt.effects benefit.scores <- predict(subgrp.model, newx = x, type = "benefit.score") rec.trt.grp <- predict(subgrp.model, newx = x, type = "trt.group")
Prints results for estimated subgroup treatment effects
## S3 method for class 'individual_treatment_effects' print(x, digits = max(getOption("digits") - 3, 3), ...)
## S3 method for class 'individual_treatment_effects' print(x, digits = max(getOption("digits") - 3, 3), ...)
x |
a fitted object from either |
digits |
minimal number of significant digits to print. |
... |
further arguments passed to or from |
Prints results for estimated subgroup treatment effects
Prints summary results for estimated subgroup treatment effects
## S3 method for class 'subgroup_fitted' print(x, digits = max(getOption("digits") - 3, 3), ...) ## S3 method for class 'subgroup_validated' print( x, digits = max(getOption("digits") - 3, 3), sample.pct = FALSE, which.quant = NULL, ... ) ## S3 method for class 'subgroup_summary' print(x, p.value = 0.001, digits = max(getOption("digits") - 3, 3), ...)
## S3 method for class 'subgroup_fitted' print(x, digits = max(getOption("digits") - 3, 3), ...) ## S3 method for class 'subgroup_validated' print( x, digits = max(getOption("digits") - 3, 3), sample.pct = FALSE, which.quant = NULL, ... ) ## S3 method for class 'subgroup_summary' print(x, p.value = 0.001, digits = max(getOption("digits") - 3, 3), ...)
x |
a fitted object from either |
digits |
minimal number of significant digits to print. |
... |
further arguments passed to or from |
sample.pct |
boolean variable of whether to print the percent of the test sample within each subgroup. If false the sample size itself, not the percent is printed. This may not be informative if the test sample size is much different from the total sample size |
which.quant |
when |
p.value |
a p-value threshold for mean differences below which covariates will be displayed. P-values are adjusted for
multiple comparisons by the Hommel approach. For example,
setting |
validate.subgroup
for function which creates validation results
and fit.subgroup
for function which fits subgroup identification models.
summarize.subgroups
for function which summarizes subgroup covariate values
Computes treatment effects within various subgroups to estimate subgroup treatment effects
subgroup.effects( benefit.scores, y, trt, pi.x, cutpoint = 0, larger.outcome.better = TRUE, reference.trt = NULL )
subgroup.effects( benefit.scores, y, trt, pi.x, cutpoint = 0, larger.outcome.better = TRUE, reference.trt = NULL )
benefit.scores |
vector of estimated benefit scores |
y |
The response vector |
trt |
treatment vector with each element equal to a 0 or a 1, with 1 indicating treatment status is active. |
pi.x |
The propensity score for each observation |
cutpoint |
numeric value for patients with benefit scores above which
(or below which if |
larger.outcome.better |
boolean value of whether a larger outcome is better. Set to |
reference.trt |
index of which treatment is the reference (in the case of multiple treatments).
This should be known already, as for a |
fit.subgroup
for function which fits subgroup identification models which generate
benefit scores.
Summarizes covariate values within the estimated subgroups
summarize.subgroups(x, ...) ## Default S3 method: summarize.subgroups(x, subgroup, ...) ## S3 method for class 'subgroup_fitted' summarize.subgroups(x, ...)
summarize.subgroups(x, ...) ## Default S3 method: summarize.subgroups(x, subgroup, ...) ## S3 method for class 'subgroup_fitted' summarize.subgroups(x, ...)
x |
a fitted object from |
... |
optional arguments to |
subgroup |
vector of indicators of same length as the number of rows in x if x is a matrix.
A value of 1 in the ith position of |
The p-values shown are raw p-values and are not adjusted for multiple comparisons.
fit.subgroup
for function which fits subgroup identification models and
print.subgroup_summary
for arguments for printing options for summarize.subgroups()
.
library(personalized) set.seed(123) n.obs <- 1000 n.vars <- 50 x <- matrix(rnorm(n.obs * n.vars, sd = 3), n.obs, n.vars) # simulate non-randomized treatment xbetat <- 0.5 + 0.5 * x[,21] - 0.5 * x[,41] trt.prob <- exp(xbetat) / (1 + exp(xbetat)) trt01 <- rbinom(n.obs, 1, prob = trt.prob) trt <- 2 * trt01 - 1 # simulate response delta <- 2 * (0.5 + x[,2] - x[,3] - x[,11] + x[,1] * x[,12]) xbeta <- x[,1] + x[,11] - 2 * x[,12]^2 + x[,13] xbeta <- xbeta + delta * trt # continuous outcomes y <- drop(xbeta) + rnorm(n.obs, sd = 2) # create function for fitting propensity score model prop.func <- function(x, trt) { # fit propensity score model propens.model <- cv.glmnet(y = trt, x = x, family = "binomial") pi.x <- predict(propens.model, s = "lambda.min", newx = x, type = "response")[,1] pi.x } subgrp.model <- fit.subgroup(x = x, y = y, trt = trt01, propensity.func = prop.func, loss = "sq_loss_lasso", nfolds = 5) # option for cv.glmnet comp <- summarize.subgroups(subgrp.model) print(comp, p.value = 0.01) # or we can simply supply the matrix x and the subgroups comp2 <- summarize.subgroups(x, subgroup = 1 * (subgrp.model$benefit.scores > 0)) print(comp2, p.value = 0.01)
library(personalized) set.seed(123) n.obs <- 1000 n.vars <- 50 x <- matrix(rnorm(n.obs * n.vars, sd = 3), n.obs, n.vars) # simulate non-randomized treatment xbetat <- 0.5 + 0.5 * x[,21] - 0.5 * x[,41] trt.prob <- exp(xbetat) / (1 + exp(xbetat)) trt01 <- rbinom(n.obs, 1, prob = trt.prob) trt <- 2 * trt01 - 1 # simulate response delta <- 2 * (0.5 + x[,2] - x[,3] - x[,11] + x[,1] * x[,12]) xbeta <- x[,1] + x[,11] - 2 * x[,12]^2 + x[,13] xbeta <- xbeta + delta * trt # continuous outcomes y <- drop(xbeta) + rnorm(n.obs, sd = 2) # create function for fitting propensity score model prop.func <- function(x, trt) { # fit propensity score model propens.model <- cv.glmnet(y = trt, x = x, family = "binomial") pi.x <- predict(propens.model, s = "lambda.min", newx = x, type = "response")[,1] pi.x } subgrp.model <- fit.subgroup(x = x, y = y, trt = trt01, propensity.func = prop.func, loss = "sq_loss_lasso", nfolds = 5) # option for cv.glmnet comp <- summarize.subgroups(subgrp.model) print(comp, p.value = 0.01) # or we can simply supply the matrix x and the subgroups comp2 <- summarize.subgroups(x, subgroup = 1 * (subgrp.model$benefit.scores > 0)) print(comp2, p.value = 0.01)
Prints summary of results for estimated subgroup treatment effects
Prints summary of results for estimated weighted ksvm
## S3 method for class 'subgroup_fitted' summary(object, digits = max(getOption("digits") - 3, 3), ...) ## S3 method for class 'wksvm' summary(object, digits = max(getOption("digits") - 3, 3), ...)
## S3 method for class 'subgroup_fitted' summary(object, digits = max(getOption("digits") - 3, 3), ...) ## S3 method for class 'wksvm' summary(object, digits = max(getOption("digits") - 3, 3), ...)
object |
a fitted object from either |
digits |
minimal number of significant digits to print. |
... |
further arguments passed to or from |
validate.subgroup
for function which creates validation results
and fit.subgroup
for function which fits subgroup identification models.
Calculates covariate conditional treatment effects using estimated benefit scores
treatment.effects(x, ...) ## Default S3 method: treatment.effects(x, ...) treat.effects( benefit.scores, loss = c("sq_loss_lasso", "logistic_loss_lasso", "poisson_loss_lasso", "cox_loss_lasso", "owl_logistic_loss_lasso", "owl_logistic_flip_loss_lasso", "owl_hinge_loss", "owl_hinge_flip_loss", "sq_loss_lasso_gam", "poisson_loss_lasso_gam", "logistic_loss_lasso_gam", "sq_loss_gam", "poisson_loss_gam", "logistic_loss_gam", "owl_logistic_loss_gam", "owl_logistic_flip_loss_gam", "owl_logistic_loss_lasso_gam", "owl_logistic_flip_loss_lasso_gam", "sq_loss_xgboost", "custom"), method = c("weighting", "a_learning"), pi.x = NULL, ... ) ## S3 method for class 'subgroup_fitted' treatment.effects(x, ...)
treatment.effects(x, ...) ## Default S3 method: treatment.effects(x, ...) treat.effects( benefit.scores, loss = c("sq_loss_lasso", "logistic_loss_lasso", "poisson_loss_lasso", "cox_loss_lasso", "owl_logistic_loss_lasso", "owl_logistic_flip_loss_lasso", "owl_hinge_loss", "owl_hinge_flip_loss", "sq_loss_lasso_gam", "poisson_loss_lasso_gam", "logistic_loss_lasso_gam", "sq_loss_gam", "poisson_loss_gam", "logistic_loss_gam", "owl_logistic_loss_gam", "owl_logistic_flip_loss_gam", "owl_logistic_loss_lasso_gam", "owl_logistic_flip_loss_lasso_gam", "sq_loss_xgboost", "custom"), method = c("weighting", "a_learning"), pi.x = NULL, ... ) ## S3 method for class 'subgroup_fitted' treatment.effects(x, ...)
x |
a fitted object from |
... |
not used |
benefit.scores |
vector of estimated benefit scores |
loss |
loss choice USED TO CALCULATE |
method |
method choice USED TO CALCULATE |
pi.x |
The propensity score for each observation |
A List with elements delta
(if the treatment effects are a difference/contrast,
i.e. ) and
gamma
(if the treatment effects are a ratio,
i.e. )
fit.subgroup
for function which fits subgroup identification models.
print.individual_treatment_effects
for printing of objects returned by
treat.effects
or treatment.effects
library(personalized) set.seed(123) n.obs <- 500 n.vars <- 25 x <- matrix(rnorm(n.obs * n.vars, sd = 3), n.obs, n.vars) # simulate non-randomized treatment xbetat <- 0.5 + 0.5 * x[,21] - 0.5 * x[,11] trt.prob <- exp(xbetat) / (1 + exp(xbetat)) trt01 <- rbinom(n.obs, 1, prob = trt.prob) trt <- 2 * trt01 - 1 # simulate response delta <- 2 * (0.5 + x[,2] - x[,3] - x[,11] + x[,1] * x[,12]) xbeta <- x[,1] + x[,11] - 2 * x[,12]^2 + x[,13] xbeta <- xbeta + delta * trt # continuous outcomes y <- drop(xbeta) + rnorm(n.obs, sd = 2) # time-to-event outcomes surv.time <- exp(-20 - xbeta + rnorm(n.obs, sd = 1)) cens.time <- exp(rnorm(n.obs, sd = 3)) y.time.to.event <- pmin(surv.time, cens.time) status <- 1 * (surv.time <= cens.time) # create function for fitting propensity score model prop.func <- function(x, trt) { # fit propensity score model propens.model <- cv.glmnet(y = trt, x = x, family = "binomial") pi.x <- predict(propens.model, s = "lambda.min", newx = x, type = "response")[,1] pi.x } subgrp.model <- fit.subgroup(x = x, y = y, trt = trt01, propensity.func = prop.func, loss = "sq_loss_lasso", nfolds = 3) # option for cv.glmnet trt_eff <- treatment.effects(subgrp.model) str(trt_eff) trt_eff library(survival) subgrp.model.cox <- fit.subgroup(x = x, y = Surv(y.time.to.event, status), trt = trt01, propensity.func = prop.func, loss = "cox_loss_lasso", nfolds = 3) # option for cv.glmnet trt_eff_c <- treatment.effects(subgrp.model.cox) str(trt_eff_c) trt_eff_c
library(personalized) set.seed(123) n.obs <- 500 n.vars <- 25 x <- matrix(rnorm(n.obs * n.vars, sd = 3), n.obs, n.vars) # simulate non-randomized treatment xbetat <- 0.5 + 0.5 * x[,21] - 0.5 * x[,11] trt.prob <- exp(xbetat) / (1 + exp(xbetat)) trt01 <- rbinom(n.obs, 1, prob = trt.prob) trt <- 2 * trt01 - 1 # simulate response delta <- 2 * (0.5 + x[,2] - x[,3] - x[,11] + x[,1] * x[,12]) xbeta <- x[,1] + x[,11] - 2 * x[,12]^2 + x[,13] xbeta <- xbeta + delta * trt # continuous outcomes y <- drop(xbeta) + rnorm(n.obs, sd = 2) # time-to-event outcomes surv.time <- exp(-20 - xbeta + rnorm(n.obs, sd = 1)) cens.time <- exp(rnorm(n.obs, sd = 3)) y.time.to.event <- pmin(surv.time, cens.time) status <- 1 * (surv.time <= cens.time) # create function for fitting propensity score model prop.func <- function(x, trt) { # fit propensity score model propens.model <- cv.glmnet(y = trt, x = x, family = "binomial") pi.x <- predict(propens.model, s = "lambda.min", newx = x, type = "response")[,1] pi.x } subgrp.model <- fit.subgroup(x = x, y = y, trt = trt01, propensity.func = prop.func, loss = "sq_loss_lasso", nfolds = 3) # option for cv.glmnet trt_eff <- treatment.effects(subgrp.model) str(trt_eff) trt_eff library(survival) subgrp.model.cox <- fit.subgroup(x = x, y = Surv(y.time.to.event, status), trt = trt01, propensity.func = prop.func, loss = "cox_loss_lasso", nfolds = 3) # option for cv.glmnet trt_eff_c <- treatment.effects(subgrp.model.cox) str(trt_eff_c) trt_eff_c
Validates subgroup treatment effects for fitted subgroup identification model class of Chen, et al (2017)
validate.subgroup( model, B = 50L, method = c("training_test_replication", "boot_bias_correction"), train.fraction = 0.75, benefit.score.quantiles = c(0.1666667, 0.3333333, 0.5, 0.6666667, 0.8333333), parallel = FALSE )
validate.subgroup( model, B = 50L, method = c("training_test_replication", "boot_bias_correction"), train.fraction = 0.75, benefit.score.quantiles = c(0.1666667, 0.3333333, 0.5, 0.6666667, 0.8333333), parallel = FALSE )
model |
fitted model object returned by |
B |
integer. number of bootstrap replications or refitting replications. |
method |
validation method. |
train.fraction |
fraction (between 0 and 1) of samples to be used for training in
training/test replication. Only used for |
benefit.score.quantiles |
a vector of quantiles (between 0 and 1) of the benefit score values for which to return bootstrapped information about the subgroups. ie if one of the quantile values is 0.5, the median value of the benefit scores will be used as a cutoff to determine subgroups and summary statistics will be returned about these subgroups |
parallel |
Should the loop over replications be parallelized? If |
Estimates of various quantities conditional on subgroups and treatment statuses are provided and displayed
via the print.subgroup_validated
function:
"Conditional expected outcomes" The first results shown when printing
a subgroup_validated
object are estimates of the expected outcomes conditional on
the estimated subgroups (i.e. which subgroup is 'recommended' by the model) and conditional
on treatment/intervention status. If there are two total treatment options, this results in a 2x2 table
of expected conditional outcomes.
"Treatment effects conditional on subgroups" The second results shown when printing
a subgroup_validated
object are estimates of the expected outcomes conditional on
the estimated subgroups. If the treatment takes levels , a total of
conditional treatment effects will be shown. For example, of the outcome is continuous, the
th conditional treatment effect is defined as
,
where
if treatment
is recommended, i.e. treatment
results in the largest/best
expected potential outcomes given the fitted model.
"Overall treatment effect conditional on subgroups " The third quantity displayed shows the overall improvement in outcomes resulting from all treatment recommendations. This is essentially an average over all of the conditional treatment effects weighted by the proportion of the population recommended each respective treatment level.
An object of class "subgroup_validated"
avg.results |
Estimates of average conditional treatment effects when
subgroups are determined based on the provided cutoff value for the benefit score. For example,
if |
se.results |
Standard errors of the estimates from |
boot.results |
Contains the individual results for each replication. |
avg.quantile.results |
Estimates of average conditional treatment effects when
subgroups are determined based on different quntile cutoff values for the benefit score. For example,
if |
se.quantile.results |
Standard errors corresponding to |
boot.results.quantiles |
Contains the individual results for each replication. |
family |
Family of the outcome. For example, |
method |
Method used for subgroup identification model. Weighting or A-learning |
n.trts |
The number of treatment levels |
comparison.trts |
All treatment levels other than the reference level |
reference.trt |
The reference level for the treatment. This should usually be the control group/level |
larger.outcome.better |
If larger outcomes are preferred for this model |
cutpoint |
Benefit score cutoff value used for determining subgroups |
val.method |
Method used for validation |
iterations |
Number of replications used in the validation process |
nobs |
Number of observations in |
nvars |
Number of variables in |
Chen, S., Tian, L., Cai, T. and Yu, M. (2017), A general statistical framework for subgroup identification and comparative treatment scoring. Biometrics. doi:10.1111/biom.12676
Harrell, F. E., Lee, K. L., and Mark, D. B. (1996). Tutorial in biostatistics multivariable prognostic models: issues in developing models, evaluating assumptions and adequacy, and measuring and reducing errors. Statistics in medicine, 15, 361-387. doi:10.1002/(SICI)1097-0258(19960229)15:4<361::AID-SIM168>3.0.CO;2-4
Huling. J.D. and Yu, M. (2021), Subgroup Identification Using the personalized Package. Journal of Statistical Software 98(5), 1-60. doi:10.18637/jss.v098.i05
fit.subgroup
for function which fits subgroup identification models,
plot.subgroup_validated
for plotting of validation results, and
print.subgroup_validated
for arguments for printing options for validate.subgroup()
.
library(personalized) set.seed(123) n.obs <- 500 n.vars <- 20 x <- matrix(rnorm(n.obs * n.vars, sd = 3), n.obs, n.vars) # simulate non-randomized treatment xbetat <- 0.5 + 0.5 * x[,11] - 0.5 * x[,13] trt.prob <- exp(xbetat) / (1 + exp(xbetat)) trt01 <- rbinom(n.obs, 1, prob = trt.prob) trt <- 2 * trt01 - 1 # simulate response delta <- 2 * (0.5 + x[,2] - x[,3] - x[,11] + x[,1] * x[,12]) xbeta <- x[,1] + x[,11] - 2 * x[,12]^2 + x[,13] xbeta <- xbeta + delta * trt # continuous outcomes y <- drop(xbeta) + rnorm(n.obs, sd = 2) # create function for fitting propensity score model prop.func <- function(x, trt) { # fit propensity score model propens.model <- cv.glmnet(y = trt, x = x, family = "binomial") pi.x <- predict(propens.model, s = "lambda.min", newx = x, type = "response")[,1] pi.x } subgrp.model <- fit.subgroup(x = x, y = y, trt = trt01, propensity.func = prop.func, loss = "sq_loss_lasso", # option for cv.glmnet, # better to use 'nfolds=10' nfolds = 3) x.test <- matrix(rnorm(10 * n.obs * n.vars, sd = 3), 10 * n.obs, n.vars) # simulate non-randomized treatment xbetat.test <- 0.5 + 0.5 * x.test[,11] - 0.5 * x.test[,13] trt.prob.test <- exp(xbetat.test) / (1 + exp(xbetat.test)) trt01.test <- rbinom(10 * n.obs, 1, prob = trt.prob.test) trt.test <- 2 * trt01.test - 1 # simulate response delta.test <- 2 * (0.5 + x.test[,2] - x.test[,3] - x.test[,11] + x.test[,1] * x.test[,12]) xbeta.test <- x.test[,1] + x.test[,11] - 2 * x.test[,12]^2 + x.test[,13] xbeta.test <- xbeta.test + delta.test * trt.test y.test <- drop(xbeta.test) + rnorm(10 * n.obs, sd = 2) valmod <- validate.subgroup(subgrp.model, B = 2, method = "training_test", train.fraction = 0.75) valmod print(valmod, which.quant = c(4, 5))
library(personalized) set.seed(123) n.obs <- 500 n.vars <- 20 x <- matrix(rnorm(n.obs * n.vars, sd = 3), n.obs, n.vars) # simulate non-randomized treatment xbetat <- 0.5 + 0.5 * x[,11] - 0.5 * x[,13] trt.prob <- exp(xbetat) / (1 + exp(xbetat)) trt01 <- rbinom(n.obs, 1, prob = trt.prob) trt <- 2 * trt01 - 1 # simulate response delta <- 2 * (0.5 + x[,2] - x[,3] - x[,11] + x[,1] * x[,12]) xbeta <- x[,1] + x[,11] - 2 * x[,12]^2 + x[,13] xbeta <- xbeta + delta * trt # continuous outcomes y <- drop(xbeta) + rnorm(n.obs, sd = 2) # create function for fitting propensity score model prop.func <- function(x, trt) { # fit propensity score model propens.model <- cv.glmnet(y = trt, x = x, family = "binomial") pi.x <- predict(propens.model, s = "lambda.min", newx = x, type = "response")[,1] pi.x } subgrp.model <- fit.subgroup(x = x, y = y, trt = trt01, propensity.func = prop.func, loss = "sq_loss_lasso", # option for cv.glmnet, # better to use 'nfolds=10' nfolds = 3) x.test <- matrix(rnorm(10 * n.obs * n.vars, sd = 3), 10 * n.obs, n.vars) # simulate non-randomized treatment xbetat.test <- 0.5 + 0.5 * x.test[,11] - 0.5 * x.test[,13] trt.prob.test <- exp(xbetat.test) / (1 + exp(xbetat.test)) trt01.test <- rbinom(10 * n.obs, 1, prob = trt.prob.test) trt.test <- 2 * trt01.test - 1 # simulate response delta.test <- 2 * (0.5 + x.test[,2] - x.test[,3] - x.test[,11] + x.test[,1] * x.test[,12]) xbeta.test <- x.test[,1] + x.test[,11] - 2 * x.test[,12]^2 + x.test[,13] xbeta.test <- xbeta.test + delta.test * trt.test y.test <- drop(xbeta.test) + rnorm(10 * n.obs, sd = 2) valmod <- validate.subgroup(subgrp.model, B = 2, method = "training_test", train.fraction = 0.75) valmod print(valmod, which.quant = c(4, 5))
Fits weighted kernel SVM. To be used for OWL with hinge loss (but can be used more generally)
weighted.ksvm( y, x, weights, C = c(0.1, 0.5, 1, 2, 10), kernel = "rbfdot", kpar = "automatic", nfolds = 10, foldid = NULL, eps = 1e-08, ... )
weighted.ksvm( y, x, weights, C = c(0.1, 0.5, 1, 2, 10), kernel = "rbfdot", kpar = "automatic", nfolds = 10, foldid = NULL, eps = 1e-08, ... )
y |
The response vector (either a character vector, factor vector, or numeric vector with values in -1, 1) |
x |
The design matrix (not including intercept term) |
weights |
vector of sample weights for weighted SVM |
C |
cost of constraints violation, see |
kernel |
kernel function used for training and prediction. See |
kpar |
list of hyperparameters for the kernel function. See |
nfolds |
number of cross validation folds for selecting value of C |
foldid |
optional vector of values between 1 and nfolds specifying which fold each observation is in. If specified, it will
override the |
eps |
penalty nugget parameter. Defaults to |
... |
extra arguments to be passed to |
predict.wksvm
for predicting from fitted weighted.ksvm
objects
library(kernlab) x <- matrix(rnorm(200 * 2), ncol = 2) y <- 2 * (sin(x[,2]) ^ 2 * exp(-x[,2]) - 0.2 > rnorm(200, sd = 0.1)) - 1 weights <- runif(100, max = 1.5, min = 0.5) wk <- weighted.ksvm(x = x[1:100,], y = y[1:100], C = c(0.1, 0.5, 1, 2), nfolds = 5, weights = weights[1:100]) pr <- predict(wk, newx = x[101:200,]) mean(pr == y[101:200])
library(kernlab) x <- matrix(rnorm(200 * 2), ncol = 2) y <- 2 * (sin(x[,2]) ^ 2 * exp(-x[,2]) - 0.2 > rnorm(200, sd = 0.1)) - 1 weights <- runif(100, max = 1.5, min = 0.5) wk <- weighted.ksvm(x = x[1:100,], y = y[1:100], C = c(0.1, 0.5, 1, 2), nfolds = 5, weights = weights[1:100]) pr <- predict(wk, newx = x[101:200,]) mean(pr == y[101:200])