| Title: | Discrete Choice Models for Economic Applications |
|---|---|
| Description: | Fast estimation of discrete-choice models for applied economics. Likelihoods, analytical gradients and Hessians are implemented in C++ with 'OpenMP' parallelism, scaling efficiently to specifications with many alternative-specific constants. Post-estimation routines return predicted shares, own- and cross-price elasticities, and diversion ratios. Supports multinomial logit ('MNL'), mixed logit ('MXL'), and nested logit ('NL'). |
| Authors: | Fernando Cordeiro [aut, cre, cph] |
| Maintainer: | Fernando Cordeiro <[email protected]> |
| License: | LGPL (>= 3) |
| Version: | 0.1.0.9000 |
| Built: | 2026-06-10 11:40:48 UTC |
| Source: | https://github.com/fpcordeiro/choicer |
Finds the ASC (delta) parameters such that predicted market shares match target shares, using the contraction mapping of Berry, Levinsohn, and Pakes (1995) doi:10.2307/2171802.
blp(object, target_shares, ...)blp(object, target_shares, ...)
object |
A fitted model object. |
target_shares |
Numeric vector of target market shares (length J). |
... |
Additional arguments passed to methods. |
Converged delta (ASC) vector.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) blp(fit, target_shares = rep(1/J, J))library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) blp(fit, target_shares = rep(1/J, J))
BLP95 contraction mapping to find delta given target shares
blp_contraction( delta, target_shares, X, beta, alt_idx, M, weights, include_outside_option = FALSE, tol = 1e-08, max_iter = 1000L )blp_contraction( delta, target_shares, X, beta, alt_idx, M, weights, include_outside_option = FALSE, tol = 1e-08, max_iter = 1000L )
delta |
J x 1 vector with initial guess for deltas (ASCs) |
target_shares |
J x 1 vector with target shares for each alternative |
X |
sum(M) x K design matrix with covariates. M[i] x K matrix for individual i |
beta |
K x 1 vector with model parameters |
alt_idx |
sum(M) x 1 vector with indices of alternatives within each choice set; 1-based indexing |
M |
N x 1 vector with number of alternatives for each individual |
weights |
N x 1 vector with weights for each observation |
include_outside_option |
whether to include outside option normalized to 0 (if so, the outside option is not included in the data) |
tol |
convergence tolerance |
max_iter |
maximum number of iterations |
vector with contraction's delta (ASCs) output
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) beta <- coef(fit)[fit$param_map$beta] delta <- blp_contraction(rep(0, J), rep(1/J, J), fit$data$X, beta, fit$data$alt_idx, fit$data$M, fit$data$weights) deltalibrary(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) beta <- coef(fit)[fit$param_map$beta] delta <- blp_contraction(rep(0, J), rep(1/J, J), fit$data$X, beta, fit$data$alt_idx, fit$data$M, fit$data$weights) delta
BLP contraction mapping for multinomial logit model
## S3 method for class 'choicer_mnl' blp( object, target_shares, delta_init = NULL, tol = 1e-08, max_iter = 1000, ... )## S3 method for class 'choicer_mnl' blp( object, target_shares, delta_init = NULL, tol = 1e-08, max_iter = 1000, ... )
object |
A |
target_shares |
Numeric vector of target market shares.
Length |
delta_init |
Initial guess for delta (ASC) values. If |
tol |
Convergence tolerance (default 1e-8). |
max_iter |
Maximum iterations (default 1000). |
... |
Additional arguments (ignored). |
Converged delta (ASC) vector.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) blp(fit, target_shares = rep(1/J, J))library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) blp(fit, target_shares = rep(1/J, J))
BLP contraction mapping for mixed logit model
## S3 method for class 'choicer_mxl' blp( object, target_shares, delta_init = NULL, tol = 1e-08, max_iter = 1000, ... )## S3 method for class 'choicer_mxl' blp( object, target_shares, delta_init = NULL, tol = 1e-08, max_iter = 1000, ... )
object |
A |
target_shares |
Numeric vector of target market shares.
Length |
delta_init |
Initial guess for delta (ASC) values. If |
tol |
Convergence tolerance (default 1e-8). |
max_iter |
Maximum iterations (default 1000). |
... |
Additional arguments (ignored). |
Converged delta (ASC) vector.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mxlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = "x1", random_var_cols = "w1", S = 50L ) blp(fit, target_shares = rep(1/J, J))library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mxlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = "x1", random_var_cols = "w1", S = 50L ) blp(fit, target_shares = rep(1/J, J))
BLP contraction mapping for nested logit model
## S3 method for class 'choicer_nl' blp( object, target_shares, delta_init = NULL, damping = 1, tol = 1e-08, max_iter = 1000, ... )## S3 method for class 'choicer_nl' blp( object, target_shares, delta_init = NULL, damping = 1, tol = 1e-08, max_iter = 1000, ... )
object |
A |
target_shares |
Numeric vector of target market shares.
Length |
delta_init |
Initial guess for delta (ASC) values. If |
damping |
Contraction damping factor in (0, 1] (default 1). |
tol |
Convergence tolerance (default 1e-8). |
max_iter |
Maximum iterations (default 1000). |
... |
Additional arguments (ignored). |
Converged delta (ASC) vector.
library(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, nest := rep(c(1L, 1L, 2L, 2L), N)] dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_nestlogit(dt, "id", "alt", "choice", c("x1", "x2"), "nest") blp(fit, target_shares = rep(1/J, J))library(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, nest := rep(c(1L, 1L, 2L, 2L), N)] dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_nestlogit(dt, "id", "alt", "choice", c("x1", "x2"), "nest") blp(fit, target_shares = rep(1/J, J))
Reconstruct variance matrix L from L_params
build_var_mat(L_params, K_w, rc_correlation)build_var_mat(L_params, K_w, rc_correlation)
L_params |
flattened choleski decomposition version of the random coefficient parameters matrix |
K_w |
dimension of the random coefficient parameter (symmetric) matrix |
rc_correlation |
whether random coefficients are correlated |
matrix equal to LL', where L is the choleski decomposition of random coefficient matrix
L_params <- c(log(1.0), 0.3, log(0.5)) Sigma <- build_var_mat(L_params, K_w = 2, rc_correlation = TRUE) Sigma # 2x2 covariance matrixL_params <- c(log(1.0), 0.3, log(0.5)) Sigma <- build_var_mat(L_params, K_w = 2, rc_correlation = TRUE) Sigma # 2x2 covariance matrix
Extract coefficients from a choicer_fit object
## S3 method for class 'choicer_fit' coef(object, ...)## S3 method for class 'choicer_fit' coef(object, ...)
object |
A choicer_fit object. |
... |
Additional arguments (ignored). |
Named numeric vector of estimated coefficients.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) coef(fit)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) coef(fit)
Computes the expected consumer surplus per choice situation (Train 2009, Ch. 3):
where is the expected maximum utility (see
logsum) and is the (fixed) price coefficient,
so that is the marginal utility of income. The formula
assumes no income effects: utility is linear in price, and the
marginal utility of income is constant across the price changes considered.
consumer_surplus( object, price_var, newdata = NULL, level = 0.95, weights = NULL, ... ) ## S3 method for class 'choicer_mnl' consumer_surplus( object, price_var, newdata = NULL, level = 0.95, weights = NULL, ... ) ## S3 method for class 'choicer_mxl' consumer_surplus( object, price_var, newdata = NULL, level = 0.95, weights = NULL, ... ) ## S3 method for class 'choicer_nl' consumer_surplus( object, price_var, newdata = NULL, level = 0.95, weights = NULL, ... )consumer_surplus( object, price_var, newdata = NULL, level = 0.95, weights = NULL, ... ) ## S3 method for class 'choicer_mnl' consumer_surplus( object, price_var, newdata = NULL, level = 0.95, weights = NULL, ... ) ## S3 method for class 'choicer_mxl' consumer_surplus( object, price_var, newdata = NULL, level = 0.95, weights = NULL, ... ) ## S3 method for class 'choicer_nl' consumer_surplus( object, price_var, newdata = NULL, level = 0.95, weights = NULL, ... )
object |
A fitted model object ( |
price_var |
Name of the price variable. Must be a fixed-coefficient
variable (a column of the design matrix |
newdata |
Optional counterfactual data (data.frame or list), as in
|
level |
Confidence level for the normal-approximation interval around the mean CS (MNL only). Default 0.95. |
weights |
Optional numeric vector with one weight per choice situation,
used for the mean CS (and its SE), as in |
... |
Additional arguments passed to methods. |
Consumer surplus levels inherit the additive utility normalization
(in particular the ASC normalization), so the level is only defined up to a
constant; differences in CS between scenarios — e.g.
consumer_surplus(fit, "price", newdata = scenario) minus the baseline
— are the economically meaningful quantity.
For MNL fits, a delta-method standard error of the weighted mean CS is
reported (weights are the stored fit weights, or the resolved
newdata weights). For MXL and NL fits only point estimates are
returned (se_mean_cs = NA): the delta method for the simulated MXL
logsum and the nested logsum is deferred; simulation-based intervals
(Krinsky-Robb: resample coefficients from their asymptotic distribution
and recompute the mean CS) are a practical alternative.
The price variable must have a fixed coefficient. For mixed logit a
random price coefficient is rejected (as in wtp): with a
random denominator generally has no finite moments.
A choicer_cs object: a list with cs (per-choice-
situation surplus, length N), mean_cs (weighted mean),
se_mean_cs (delta-method SE; NA for MXL/NL or when the
variance-covariance matrix is unavailable), ci (confidence
interval for the mean), price_var, level, and n.
Train, K. (2009). Discrete Choice Methods with Simulation, 2nd ed., Ch. 3. Cambridge University Press.
library(data.table) sim <- simulate_mnl_data(N = 1000, J = 3, beta = c(0.8, -0.6), seed = 123, outside_option = FALSE, vary_choice_set = FALSE) fit <- run_mnlogit(sim$data, "id", "alt", "choice", c("x1", "x2")) # treat x2 as the price variable cs0 <- consumer_surplus(fit, price_var = "x2") cs0 # Change in consumer surplus from a price increase on alternative 2: # levels depend on the ASC normalization, differences do not. dt_cf <- copy(sim$data)[alt == 2, x2 := x2 + 0.5] cs1 <- consumer_surplus(fit, price_var = "x2", newdata = dt_cf) delta_cs <- cs1$mean_cs - cs0$mean_cs delta_cs # negative: the price increase lowers expected surpluslibrary(data.table) sim <- simulate_mnl_data(N = 1000, J = 3, beta = c(0.8, -0.6), seed = 123, outside_option = FALSE, vary_choice_set = FALSE) fit <- run_mnlogit(sim$data, "id", "alt", "choice", c("x1", "x2")) # treat x2 as the price variable cs0 <- consumer_surplus(fit, price_var = "x2") cs0 # Change in consumer surplus from a price increase on alternative 2: # levels depend on the ASC normalization, differences do not. dt_cf <- copy(sim$data)[alt == 2, x2 := x2 + 0.5] cs1 <- consumer_surplus(fit, price_var = "x2", newdata = dt_cf) delta_cs <- cs1$mean_cs - cs0$mean_cs delta_cs # negative: the price increase lowers expected surplus
Computes a J x J matrix of diversion ratios. Entry (i, j) is the fraction of demand lost by alternative j that is captured by alternative i when alternative j becomes less attractive.
diversion_ratios(object, ...)diversion_ratios(object, ...)
object |
A fitted model object. |
... |
Additional arguments passed to methods. |
A J x J diversion ratio matrix with alternative labels.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) diversion_ratios(fit)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) diversion_ratios(fit)
Diversion ratios for multinomial logit model
## S3 method for class 'choicer_mnl' diversion_ratios(object, ...)## S3 method for class 'choicer_mnl' diversion_ratios(object, ...)
object |
A |
... |
Additional arguments (ignored). |
A J x J diversion ratio matrix with alternative labels.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) diversion_ratios(fit)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) diversion_ratios(fit)
Computes the attribute-based diversion ratio matrix. Entry (k, j) is the
fraction of demand lost by alternative j that is captured by alternative k
when a marginal change in alternative j's wrt_var attribute reduces
s_j.
## S3 method for class 'choicer_mxl' diversion_ratios(object, wrt_var, is_random_coef = FALSE, ...)## S3 method for class 'choicer_mxl' diversion_ratios(object, wrt_var, is_random_coef = FALSE, ...)
object |
A |
wrt_var |
Variable used to perturb alternative j's utility: a column
name (character) or 1-based index. Indexes into X columns for fixed
coefficients, or W columns for random coefficients (when
|
is_random_coef |
Logical. |
... |
Additional arguments (ignored). |
Unlike MNL, the MXL diversion ratio depends on which variable is perturbed:
the realised coefficient varies across individuals and
draws and does not cancel in the ratio. For a variable with a fixed
coefficient the result is independent of the variable ( cancels);
for a random-coefficient variable it is not.
A J x J diversion ratio matrix with alternative labels. Cross-products are averaged across simulation draws inside the integration to avoid Jensen-style bias.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mxlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = "x1", random_var_cols = "w1", S = 50L ) diversion_ratios(fit, "x1") diversion_ratios(fit, "w1", is_random_coef = TRUE)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mxlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = "x1", random_var_cols = "w1", S = 50L ) diversion_ratios(fit, "x1") diversion_ratios(fit, "w1", is_random_coef = TRUE)
Diversion ratios for nested logit model
## S3 method for class 'choicer_nl' diversion_ratios(object, ...)## S3 method for class 'choicer_nl' diversion_ratios(object, ...)
object |
A |
... |
Additional arguments (ignored). |
A J x J diversion ratio matrix with alternative labels.
library(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, nest := rep(c(1L, 1L, 2L, 2L), N)] dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_nestlogit(dt, "id", "alt", "choice", c("x1", "x2"), "nest") diversion_ratios(fit)library(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, nest := rep(c(1L, 1L, 2L, 2L), N)] dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_nestlogit(dt, "id", "alt", "choice", c("x1", "x2"), "nest") diversion_ratios(fit)
Computes a J x J matrix of aggregate elasticities. Entry (i, j) is the percentage change in the probability of choosing alternative i when the attribute of alternative j changes by 1\
elasticities(object, ...)elasticities(object, ...)
object |
A fitted model object. |
... |
Additional arguments passed to methods. |
A J x J elasticity matrix with alternative labels.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) elasticities(fit, "x1")library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) elasticities(fit, "x1")
Elasticities for multinomial logit model
## S3 method for class 'choicer_mnl' elasticities(object, elast_var, ...)## S3 method for class 'choicer_mnl' elasticities(object, elast_var, ...)
object |
A |
elast_var |
Variable for elasticity computation: a column name (character) or 1-based index into the design matrix X. |
... |
Additional arguments (ignored). |
A J x J elasticity matrix with alternative labels.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) elasticities(fit, "x1")library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) elasticities(fit, "x1")
Elasticities for mixed logit model
## S3 method for class 'choicer_mxl' elasticities(object, elast_var, is_random_coef = FALSE, ...)## S3 method for class 'choicer_mxl' elasticities(object, elast_var, is_random_coef = FALSE, ...)
object |
A |
elast_var |
Variable for elasticity computation: a column name (character)
or 1-based index. Indexes into X columns for fixed coefficients, or W columns
for random coefficients (when |
is_random_coef |
Logical. |
... |
Additional arguments (ignored). |
A J x J elasticity matrix with alternative labels.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mxlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = "x1", random_var_cols = "w1", S = 50L ) elasticities(fit, "x1") elasticities(fit, "w1", is_random_coef = TRUE)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mxlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = "x1", random_var_cols = "w1", S = 50L ) elasticities(fit, "x1") elasticities(fit, "w1", is_random_coef = TRUE)
Elasticities for nested logit model
## S3 method for class 'choicer_nl' elasticities(object, elast_var, ...)## S3 method for class 'choicer_nl' elasticities(object, elast_var, ...)
object |
A |
elast_var |
Variable for elasticity computation: a column name (character) or 1-based index into the design matrix X. |
... |
Additional arguments (ignored). |
A J x J elasticity matrix with alternative labels.
library(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, nest := rep(c(1L, 1L, 2L, 2L), N)] dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_nestlogit(dt, "id", "alt", "choice", c("x1", "x2"), "nest") elasticities(fit, "x1")library(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, nest := rep(c(1L, 1L, 2L, 2L), N)] dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_nestlogit(dt, "id", "alt", "choice", c("x1", "x2"), "nest") elasticities(fit, "x1")
Create halton normal draws in appropriate format for mixed logit estimation
get_halton_normals(S, N, K_w)get_halton_normals(S, N, K_w)
S |
Number of draws for each choice situation |
N |
number of choice situations |
K_w |
dimension of random coefficients (number of columns in W matrix) |
K_w x S x N array with halton standard normal draws
draws <- get_halton_normals(S = 50, N = 10, K_w = 2) dim(draws) # 2 x 50 x 10draws <- get_halton_normals(S = 50, N = 10, K_w = 2) dim(draws) # 2 x 50 x 10
Computes McFadden's pseudo R-squared (plain and adjusted) and the in-sample hit rate for a fitted model.
gof(object, null = c("equal_shares", "market_shares"), ...) ## S3 method for class 'choicer_fit' gof(object, null = c("equal_shares", "market_shares"), ...)gof(object, null = c("equal_shares", "market_shares"), ...) ## S3 method for class 'choicer_fit' gof(object, null = c("equal_shares", "market_shares"), ...)
object |
A fitted model object ( |
null |
Null model for the pseudo R-squared: |
... |
Additional arguments passed to methods. |
Two null models are available for the pseudo R-squared
(adjusted:
with the number of estimated
parameters):
"equal_shares" (default): every alternative in individual
's choice set is equally likely, so
. This is exact for
unbalanced choice sets and arbitrary weights.
"market_shares": the maximized log-likelihood of an
ASC-only model, with the
choice counts and the observed market shares (including the
outside option when present). This closed form is valid only for
balanced choice sets and uniform weights; otherwise an error suggests
refitting an ASC-only model.
The hit rate is the weighted share of individuals whose observed choice has
the highest predicted probability. When the model includes an outside
option, the outside good competes for the predicted maximum (its
probability is ), and an individual predicted to
choose the outside good is a hit when they actually did.
Both the null log-likelihood and the hit rate require the stored estimation
data; models fitted with keep_data = FALSE return NA fields with a
message.
A choicer_gof object: a list with loglik,
loglik_null, null, mcfadden_r2,
mcfadden_r2_adj, hit_rate, nobs, and
n_params.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) gof(fit) gof(fit, null = "market_shares")library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) gof(fit) gof(fit, null = "market_shares")
Utility to compute analytical Jacobian of random coefficient matrix transformed by vech (dVech(Sigma) / dTheta)
jacobian_vech_Sigma(L_params, K_w, rc_correlation = TRUE)jacobian_vech_Sigma(L_params, K_w, rc_correlation = TRUE)
L_params |
flattened choleski decomposition version of the random coefficient parameters matrix |
K_w |
dimension of the random coefficient parameter (symmetric) matrix |
rc_correlation |
whether random coefficients are correlated |
Jacobian (dVech(Sigma) / dTheta)
L_params <- c(log(0.8), 0.2, log(0.6)) J_mat <- jacobian_vech_Sigma(L_params, K_w = 2, rc_correlation = TRUE) dim(J_mat) # 3 x 3 for K_w=2 correlatedL_params <- c(log(0.8), 0.2, log(0.6)) J_mat <- jacobian_vech_Sigma(L_params, K_w = 2, rc_correlation = TRUE) dim(J_mat) # 3 x 3 for K_w=2 correlated
Returns a logLik object, which enables AIC() and BIC() automatically.
## S3 method for class 'choicer_fit' logLik(object, ...)## S3 method for class 'choicer_fit' logLik(object, ...)
object |
A choicer_fit object. |
... |
Additional arguments (ignored). |
A logLik object with df and nobs attributes.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) logLik(fit) AIC(fit) BIC(fit)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) logLik(fit) AIC(fit) BIC(fit)
Computes the expected maximum utility ("logsum" or inclusive value) for each choice situation, up to the additive constant of the extreme-value error:
MNL: .
MXL: , simulated by
averaging the log-sum-exp across the deterministic Halton draws
(regenerated from object$draws_info). Taking the log-sum-exp of
draw-averaged utilities would understate the expectation (Jensen's
inequality), so a dedicated kernel (mxl_logsum) is used.
NL: with the nest inclusive
value
(singleton nests have ).
When the model includes an outside option, its normalized utility
contributes an term to the sum.
logsum(object, newdata = NULL, ...) ## S3 method for class 'choicer_mnl' logsum(object, newdata = NULL, ...) ## S3 method for class 'choicer_mxl' logsum(object, newdata = NULL, ...) ## S3 method for class 'choicer_nl' logsum(object, newdata = NULL, ...)logsum(object, newdata = NULL, ...) ## S3 method for class 'choicer_mnl' logsum(object, newdata = NULL, ...) ## S3 method for class 'choicer_mxl' logsum(object, newdata = NULL, ...) ## S3 method for class 'choicer_nl' logsum(object, newdata = NULL, ...)
object |
A fitted model object ( |
newdata |
Optional counterfactual data: a data.frame in the fit-time
long format or a list with |
... |
Additional arguments passed to methods. |
Logsum levels depend on the ASC normalization (and, more generally,
on any additive utility normalization), so only logsum differences
between scenarios (e.g. via newdata) are meaningful.
Numeric vector with one logsum per choice situation. With a
data.frame newdata, choice situations are ordered by id (as in
predict()).
library(data.table) sim <- simulate_mnl_data(N = 500, J = 3, beta = c(0.8, -0.6), seed = 1, outside_option = FALSE, vary_choice_set = FALSE) fit <- run_mnlogit(sim$data, "id", "alt", "choice", c("x1", "x2")) head(logsum(fit))library(data.table) sim <- simulate_mnl_data(N = 500, J = 3, beta = c(0.8, -0.6), seed = 1, outside_option = FALSE, vary_choice_set = FALSE) fit <- run_mnlogit(sim$data, "id", "alt", "choice", c("x1", "x2")) head(logsum(fit))
Consumes a choicer_mc object and returns per-parameter asymptotic
diagnostics: Monte Carlo bias (with MC standard error), empirical SD of
the estimates, mean of the reported standard errors, SE-to-SD ratio
(information-matrix-equality check), Wald coverage at nominal 90 / 95 /
99 percent with Wilson confidence bands, moments of the studentized
statistic z = (theta_hat - theta_0) / se, and four normality tests on
z (Shapiro-Wilk, Anderson-Darling via goftest::ad.test, a hand-coded
Jarque-Bera statistic, and a one-sample Kolmogorov-Smirnov test against
N(0, 1)).
mc_asymptotics( mc, level = 0.95, se_col = "se", conv_threshold = 0.99, se_ratio_threshold_floor = 0.1 )mc_asymptotics( mc, level = 0.95, se_col = "se", conv_threshold = 0.99, se_ratio_threshold_floor = 0.1 )
mc |
A |
level |
Confidence level for the Wilson bands on coverage rates.
Defaults to |
se_col |
Name of the column in |
conv_threshold |
Numeric in |
se_ratio_threshold_floor |
Numeric scalar. Minimum half-width for
the |
Six logical pass / fail flags are attached to every parameter row:
pass_bias requires |bias_mc_se| < 3; pass_se_ratio requires
|se_ratio - 1| to lie within max(se_ratio_threshold_floor, 3 * 1.4 / sqrt(R_used)) (a noise-aware band that widens at small
R_used and tightens to the floor at large R_used); pass_cov95
requires the nominal 95 percent level to lie in the Wilson band for
empirical coverage; pass_skew requires |skew_z| < 0.3; pass_kurt
requires excess kurtosis of z in [-0.5, 1.0]; pass_convergence
requires the per-parameter convergence rate (R_used / R_total) to
meet conv_threshold.
Non-converged replications are excluded per parameter (reported in
R_excluded). Winsorized (5 percent / 95 percent) versions of bias,
sd_emp, and mean_se are reported in parallel columns
(bias_w, sd_emp_w, mean_se_w) so silent outlier exclusion is
transparent to the reader. Two robust SE-to-SD ratios accompany the
Hessian-mean-based se_ratio: se_ratio_med (median SE divided by
the empirical SD) and se_ratio_w (winsorized mean SE divided by the
winsorized empirical SD); both stay near 1 when 1-2 replications
produce near-singular Hessians that inflate mean_se. The companion
se_med column reports the median per-replication SE used by
se_ratio_med. Neither robust ratio drives a pass_* flag — they
are purely informational.
Winsorized z-moment counterparts (mean_z_w, sd_z_w, skew_z_w,
kurt_excess_z_w) are reported alongside the raw z-moments and feed an
additional pass_z_w flag (Winsorized skew within the same band as
pass_skew AND Winsorized excess kurtosis within the same band as
pass_kurt). A companion pass_cov95_w flag is TRUE when either
pass_cov95 is TRUE OR the per-rep Winsorized z-CI (the empirical
2.5 / 97.5 percentiles of the Winsorized z) covers truth-zero. These
two flags are designed for boundary scenarios (e.g., near-zero variance
components) where a small number of reps with vanishing SE inflate the
raw z-moments without indicating an estimator defect.
An object of class choicer_mc_asymptotics — a data.table
with one row per unique parameter and columns documented above — with
meta attached as an attribute (attr(x, "meta")).
sim_fun <- function(seed) simulate_mnl_data(N = 1000, J = 3, seed = seed) fit_fun <- function(sim) run_mnlogit( data = sim$data, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = c("x1", "x2"), outside_opt_label = 0L, include_outside_option = FALSE, use_asc = TRUE, control = list(print_level = 0L) ) mc <- monte_carlo(sim_fun, fit_fun, R = 50L, seed = 1L, progress = FALSE) mc_asymptotics(mc)sim_fun <- function(seed) simulate_mnl_data(N = 1000, J = 3, seed = seed) fit_fun <- function(sim) run_mnlogit( data = sim$data, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = c("x1", "x2"), outside_opt_label = 0L, include_outside_option = FALSE, use_asc = TRUE, control = list(print_level = 0L) ) mc <- monte_carlo(sim_fun, fit_fun, R = 50L, seed = 1L, progress = FALSE) mc_asymptotics(mc)
Computes the diversion ratio matrix DR(j->k), which measures the fraction of demand lost by alternative j that is captured by alternative k. For MNL: DR(j->k) = sum_n(w_n * P_nj * P_nk) / sum_n(w_n * P_nj * (1 - P_nj))
mnl_diversion_ratios_parallel( theta, X, alt_idx, M, weights, use_asc = TRUE, include_outside_option = FALSE )mnl_diversion_ratios_parallel( theta, X, alt_idx, M, weights, use_asc = TRUE, include_outside_option = FALSE )
theta |
K + J - 1 or K + J vector with model parameters |
X |
sum(M) x K design matrix with covariates. |
alt_idx |
sum(M) x 1 vector with indices of alternatives; 1-based indexing |
M |
N x 1 vector with number of alternatives for each individual |
weights |
N x 1 vector with weights for each observation |
use_asc |
whether to use alternative-specific constants |
include_outside_option |
whether to include outside option |
J x J matrix where entry (k, j) = DR(j->k). Diagonal is 0.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) dr <- mnl_diversion_ratios_parallel(coef(fit), fit$data$X, fit$data$alt_idx, fit$data$M, fit$data$weights) drlibrary(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) dr <- mnl_diversion_ratios_parallel(coef(fit), fit$data$X, fit$data$alt_idx, fit$data$M, fit$data$weights) dr
Computes the aggregate elasticity matrix (weighted average of individual elasticities) for the Multinomial Logit model.
mnl_elasticities_parallel( theta, X, alt_idx, choice_idx, M, weights, elast_var_idx, use_asc = TRUE, include_outside_option = FALSE )mnl_elasticities_parallel( theta, X, alt_idx, choice_idx, M, weights, elast_var_idx, use_asc = TRUE, include_outside_option = FALSE )
theta |
K + J - 1 or K + J vector with model parameters |
X |
sum(M) x K design matrix with covariates. |
alt_idx |
sum(M) x 1 vector with indices of alternatives; 1-based indexing |
choice_idx |
N x 1 vector (kept for API consistency, but not used) |
M |
N x 1 vector with number of alternatives for each individual |
weights |
N x 1 vector with weights for each observation |
elast_var_idx |
1-based index of the column in X for which to compute the elasticity |
use_asc |
whether to use alternative-specific constants |
include_outside_option |
whether to include outside option |
J x J matrix of aggregate elasticities
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) elas <- mnl_elasticities_parallel(coef(fit), fit$data$X, fit$data$alt_idx, fit$data$choice_idx, fit$data$M, fit$data$weights, elast_var_idx = 1L) elaslibrary(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) elas <- mnl_elasticities_parallel(coef(fit), fit$data$X, fit$data$alt_idx, fit$data$choice_idx, fit$data$M, fit$data$weights, elast_var_idx = 1L) elas
Computes the log-likelihood and its gradient for the Multinomial Logit model using OpenMP for parallelization. Allows for inclusion of alternative-specific constants, outside option, and observation weights.
mnl_loglik_gradient_parallel( theta, X, alt_idx, choice_idx, M, weights, use_asc = TRUE, include_outside_option = FALSE )mnl_loglik_gradient_parallel( theta, X, alt_idx, choice_idx, M, weights, use_asc = TRUE, include_outside_option = FALSE )
theta |
K + J - 1 or K + J vector with model parameters |
X |
sum(M) x K design matrix with covariates. Stacks M[i] x K matrices for individual i. |
alt_idx |
sum(M) x 1 vector with indices of alternatives within each choice set; 1-based indexing |
choice_idx |
N x 1 vector with indices of chosen alternatives; 1-based indexing relative to X; 0 is used if include_outside_option=True |
M |
N x 1 vector with number of alternatives for each individual |
weights |
N x 1 vector with weights for each observation |
use_asc |
whether to use alternative-specific constants |
include_outside_option |
whether to include outside option normalized to 0 (if so, the outside option is not included in the data) |
List with loglikelihood and gradient evaluated at input arguments
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_mnl_data(dt, "id", "alt", "choice", c("x1", "x2")) theta <- rep(0, ncol(d$X) + nrow(d$alt_mapping) - 1) result <- mnl_loglik_gradient_parallel(theta, d$X, d$alt_idx, d$choice_idx, d$M, d$weights) result$objective # negative log-likelihoodlibrary(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_mnl_data(dt, "id", "alt", "choice", c("x1", "x2")) theta <- rep(0, ncol(d$X) + nrow(d$alt_mapping) - 1) result <- mnl_loglik_gradient_parallel(theta, d$X, d$alt_idx, d$choice_idx, d$M, d$weights) result$objective # negative log-likelihood
Hessian matrix for multinomial logit model
mnl_loglik_hessian_parallel( theta, X, alt_idx, choice_idx, M, weights, use_asc = TRUE, include_outside_option = FALSE )mnl_loglik_hessian_parallel( theta, X, alt_idx, choice_idx, M, weights, use_asc = TRUE, include_outside_option = FALSE )
theta |
K + J - 1 or K + J vector with model parameters |
X |
sum(M) x K design matrix with covariates. Stacks M[i] x K matrices for individual i. |
alt_idx |
sum(M) x 1 vector with indices of alternatives within each choice set; 1-based indexing |
choice_idx |
N x 1 vector with indices of chosen alternatives; 1-based indexing relative to X; 0 is used if include_outside_option=True |
M |
N x 1 vector with number of alternatives for each individual |
weights |
N x 1 vector with weights for each observation |
use_asc |
whether to use alternative-specific constants |
include_outside_option |
whether to include outside option normalized to 0 (if so, the outside option is not included in the data) |
Hessian matrix of the negative log-likelihood
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) H <- mnl_loglik_hessian_parallel(coef(fit), fit$data$X, fit$data$alt_idx, fit$data$choice_idx, fit$data$M, fit$data$weights) dim(H)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) H <- mnl_loglik_hessian_parallel(coef(fit), fit$data$X, fit$data$alt_idx, fit$data$choice_idx, fit$data$M, fit$data$weights) dim(H)
Prediction of choice probabilities and utilities based on fitted model
mnl_predict( theta, X, alt_idx, M, use_asc = TRUE, include_outside_option = FALSE )mnl_predict( theta, X, alt_idx, M, use_asc = TRUE, include_outside_option = FALSE )
theta |
K + J - 1 or K + J vector with model parameters |
X |
sum(M) x K design matrix with covariates. Stacks M[i] x K matrices for individual i. |
alt_idx |
sum(M) x 1 vector with indices of alternatives within each choice set; 1-based indexing |
M |
N x 1 vector with number of alternatives for each individual |
use_asc |
whether to use alternative-specific constants |
include_outside_option |
whether to include outside option normalized to 0 (if so, the outside option is not included in the data) |
List with choice probability and utility for each choice situation evaluated at input arguments
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) pred <- mnl_predict(coef(fit), fit$data$X, fit$data$alt_idx, fit$data$M, use_asc = TRUE) head(pred$choice_prob)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) pred <- mnl_predict(coef(fit), fit$data$X, fit$data$alt_idx, fit$data$M, use_asc = TRUE) head(pred$choice_prob)
Replicates a (DGP -> fit) cycle R times with independent seeds and
collects per-parameter estimates, standard errors, bias, and coverage.
Returns a choicer_mc object; call summary() for aggregated statistics
(mean estimate, bias, RMSE, coverage rate, convergence rate).
monte_carlo( sim_fun, fit_fun, R = 100, seed = 1L, parallel = FALSE, progress = TRUE, ... )monte_carlo( sim_fun, fit_fun, R = 100, seed = 1L, parallel = FALSE, progress = TRUE, ... )
sim_fun |
Function of |
fit_fun |
Function of a |
R |
Number of replications. |
seed |
Base integer seed. Replication |
parallel |
Logical; if |
progress |
Logical; print a one-line progress update per iteration in
serial mode. Ignored when |
... |
Unused. |
Each iteration calls sim_fun(seed = seed + r - 1L), then fit_fun(sim).
Write sim_fun as a closure that captures N, J, and other DGP settings
and forwards seed. Write fit_fun as a closure that takes a
choicer_sim and returns a fitted choicer_fit object, wrapping any
data-preparation, draws, or optimizer-control setup.
A choicer_mc object: a list with elements replications (a long
data.table with one row per estimated parameter per replication) and
meta (run metadata).
sim_fun <- function(seed) simulate_mnl_data(N = 1000, J = 4, seed = seed) fit_fun <- function(sim) run_mnlogit( data = sim$data, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = c("x1", "x2"), outside_opt_label = 0L, include_outside_option = FALSE, use_asc = TRUE, control = list(print_level = 0L) ) mc <- monte_carlo(sim_fun, fit_fun, R = 5, seed = 1L, progress = FALSE) summary(mc)sim_fun <- function(seed) simulate_mnl_data(N = 1000, J = 4, seed = seed) fit_fun <- function(sim) run_mnlogit( data = sim$data, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = c("x1", "x2"), outside_opt_label = 0L, include_outside_option = FALSE, use_asc = TRUE, control = list(print_level = 0L) ) mc <- monte_carlo(sim_fun, fit_fun, R = 5, seed = 1L, progress = FALSE) summary(mc)
Computes the BHHH approximation to the observed information matrix for the
Mixed Logit model: , where
is the per-individual score (gradient of ).
This outer product of gradients (OPG) estimator provides an alternative to
the analytical Hessian for standard error computation that scales to large
problems where the analytical Hessian is infeasible (e.g., many alternatives
or simulation draws).
mxl_bhhh_parallel( theta, X, W, alt_idx, choice_idx, M, weights, eta_draws, rc_dist, rc_correlation = TRUE, rc_mean = FALSE, use_asc = TRUE, include_outside_option = FALSE )mxl_bhhh_parallel( theta, X, W, alt_idx, choice_idx, M, weights, eta_draws, rc_dist, rc_correlation = TRUE, rc_mean = FALSE, use_asc = TRUE, include_outside_option = FALSE )
theta |
vector collecting model parameters (beta, mu, L, delta (ASCs)) |
X |
design matrix for covariates with fixed coefficients; sum(M_i) x K_x |
W |
design matrix for covariates with random coefficients; sum(M_i) x K_w or J x K_w |
alt_idx |
sum(M) x 1 vector with indices of alternatives within each choice set; 1-based indexing |
choice_idx |
N x 1 vector with indices of chosen alternatives; 1-based indexing relative to X; 0 is used if include_outside_option=True |
M |
N x 1 vector with number of alternatives for each individual |
weights |
N x 1 vector with weights for each observation |
eta_draws |
Array with choice situation draws; K_w x S x N |
rc_dist |
K_w x 1 integer vector indicating distribution of random coefficients: 0 = normal, 1 = log-normal |
rc_correlation |
whether random coefficients should be correlated |
rc_mean |
whether to estimate means for random coefficients. |
use_asc |
whether to use alternative-specific constants. |
include_outside_option |
whether to include outside option normalized to 0 (if so, the outside option is not included in the data) |
n_params x n_params PSD matrix representing the observed information
matrix estimated by the outer product of gradients (same sign convention
as the negated Hessian returned by mxl_hessian_parallel, so it can
be inverted directly to obtain vcov).
The BHHH/OPG estimator is only asymptotically equivalent to the Hessian-based information matrix at the true MLE. In finite samples it can underestimate standard errors, particularly when the model is mis-specified or away from the optimum.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_mxl_data(dt, "id", "alt", "choice", "x1", "w1") eta <- get_halton_normals(50, d$N, ncol(d$W)) theta <- rep(0, ncol(d$X) + ncol(d$W) + nrow(d$alt_mapping) - 1) H <- mxl_bhhh_parallel(theta, d$X, d$W, d$alt_idx, d$choice_idx, d$M, d$weights, eta, rc_dist = rep(0L, ncol(d$W)), rc_correlation = FALSE, rc_mean = FALSE) dim(H)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_mxl_data(dt, "id", "alt", "choice", "x1", "w1") eta <- get_halton_normals(50, d$N, ncol(d$W)) theta <- rep(0, ncol(d$X) + ncol(d$W) + nrow(d$alt_mapping) - 1) H <- mxl_bhhh_parallel(theta, d$X, d$W, d$alt_idx, d$choice_idx, d$M, d$weights, eta, rc_dist = rep(0L, ncol(d$W)), rc_correlation = FALSE, rc_mean = FALSE) dim(H)
Finds the ASC (delta) parameters such that predicted market shares match target shares, using the contraction mapping of Berry, Levinsohn, and Pakes (1995).
mxl_blp_contraction( delta, target_shares, X, W, beta, mu, L_params, alt_idx, M, weights, eta_draws, rc_dist, rc_correlation = TRUE, rc_mean = FALSE, include_outside_option = FALSE, tol = 1e-08, max_iter = 1000L )mxl_blp_contraction( delta, target_shares, X, W, beta, mu, L_params, alt_idx, M, weights, eta_draws, rc_dist, rc_correlation = TRUE, rc_mean = FALSE, include_outside_option = FALSE, tol = 1e-08, max_iter = 1000L )
delta |
J-1 or J vector with initial guess for deltas (ASCs) |
target_shares |
J vector with target market shares |
X |
design matrix for fixed coefficients; sum(M_i) x K_x |
W |
design matrix for random coefficients; sum(M_i) x K_w or J x K_w |
beta |
K_x vector with fixed coefficients |
mu |
K_w vector with mean parameters (raw, will be transformed if log-normal) |
L_params |
Cholesky parameters vector |
alt_idx |
sum(M) x 1 vector with indices of alternatives; 1-based indexing |
M |
N x 1 vector with number of alternatives for each individual |
weights |
N x 1 vector with weights for each observation |
eta_draws |
Array with draws; K_w x S x N |
rc_dist |
K_w vector indicating distribution (0=normal, 1=log-normal) |
rc_correlation |
whether random coefficients are correlated |
rc_mean |
whether mu parameters represent means (TRUE) or are zero (FALSE) |
include_outside_option |
whether outside option is included |
tol |
convergence tolerance (default 1e-8) |
max_iter |
maximum iterations (default 1000) |
vector with converged delta (ASC) values
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_mxl_data(dt, "id", "alt", "choice", "x1", "w1") eta <- get_halton_normals(50, d$N, ncol(d$W)) fit <- run_mxlogit(input_data = d, eta_draws = eta) pm <- fit$param_map delta <- mxl_blp_contraction(rep(0, J), rep(1/J, J), d$X, d$W, coef(fit)[pm$beta], rep(0, ncol(d$W)), coef(fit)[pm$sigma], d$alt_idx, d$M, d$weights, eta, rc_dist = rep(0L, ncol(d$W)), rc_correlation = FALSE, rc_mean = FALSE) deltalibrary(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_mxl_data(dt, "id", "alt", "choice", "x1", "w1") eta <- get_halton_normals(50, d$N, ncol(d$W)) fit <- run_mxlogit(input_data = d, eta_draws = eta) pm <- fit$param_map delta <- mxl_blp_contraction(rep(0, J), rep(1/J, J), d$X, d$W, coef(fit)[pm$beta], rep(0, ncol(d$W)), coef(fit)[pm$sigma], d$alt_idx, d$M, d$weights, eta, rc_dist = rep(0L, ncol(d$W)), rc_correlation = FALSE, rc_mean = FALSE) delta
Computes the matrix of attribute-based diversion ratios for a fitted
Mixed Logit model. DR(k, j) is the fraction of demand lost by alternative
j that is captured by alternative k when a marginal change in
alternative j's elast_var attribute reduces s_j.
mxl_diversion_ratios_parallel( theta, X, W, alt_idx, M, weights, eta_draws, rc_dist, elast_var_idx, is_random_coef, rc_correlation = TRUE, rc_mean = FALSE, use_asc = TRUE, include_outside_option = FALSE )mxl_diversion_ratios_parallel( theta, X, W, alt_idx, M, weights, eta_draws, rc_dist, elast_var_idx, is_random_coef, rc_correlation = TRUE, rc_mean = FALSE, use_asc = TRUE, include_outside_option = FALSE )
theta |
parameter vector (beta, [mu], L, delta) |
X |
design matrix for fixed coefficients; sum(M_i) x K_x |
W |
design matrix for random coefficients; sum(M_i) x K_w or J x K_w |
alt_idx |
sum(M) x 1 vector with indices of alternatives; 1-based indexing |
M |
N x 1 vector with number of alternatives for each individual |
weights |
N x 1 vector with weights for each observation |
eta_draws |
Array with draws; K_w x S x N |
rc_dist |
K_w vector indicating distribution (0=normal, 1=log-normal) |
elast_var_idx |
1-based index of the perturbed variable |
is_random_coef |
TRUE if the variable is in W (random coef), FALSE if in X (fixed) |
rc_correlation |
whether random coefficients are correlated |
rc_mean |
whether mu parameters are estimated |
use_asc |
whether ASCs are included |
include_outside_option |
whether outside option is included |
In MNL the per-draw realized coefficient is a constant, so it cancels in
the ratio and the result is independent of the variable chosen. In MXL,
the realized coefficient varies across individuals
and draws, so the diversion ratio depends on which attribute is perturbed.
For a variable with a fixed coefficient the dependence again vanishes
(the constant cancels); for a random-coefficient variable it does not.
J x J (or (J+1) x (J+1)) matrix of diversion ratios with zero diagonal.
Computes the aggregate elasticity matrix (weighted average of individual elasticities) for the Mixed Logit model. The elasticity E(i,j) represents the percentage change in the probability of choosing alternative i when the attribute of alternative j changes by 1%.
mxl_elasticities_parallel( theta, X, W, alt_idx, choice_idx, M, weights, eta_draws, rc_dist, elast_var_idx, is_random_coef, rc_correlation = TRUE, rc_mean = FALSE, use_asc = TRUE, include_outside_option = FALSE )mxl_elasticities_parallel( theta, X, W, alt_idx, choice_idx, M, weights, eta_draws, rc_dist, elast_var_idx, is_random_coef, rc_correlation = TRUE, rc_mean = FALSE, use_asc = TRUE, include_outside_option = FALSE )
theta |
parameter vector (beta, [mu], L, delta) |
X |
design matrix for fixed coefficients; sum(M_i) x K_x |
W |
design matrix for random coefficients; sum(M_i) x K_w or J x K_w |
alt_idx |
sum(M) x 1 vector with indices of alternatives; 1-based indexing |
choice_idx |
N x 1 vector (kept for API consistency, not used) |
M |
N x 1 vector with number of alternatives for each individual |
weights |
N x 1 vector with weights for each observation |
eta_draws |
Array with draws; K_w x S x N |
rc_dist |
K_w vector indicating distribution (0=normal, 1=log-normal) |
elast_var_idx |
1-based index of the variable for elasticity computation |
is_random_coef |
TRUE if variable is in W (random coef), FALSE if in X (fixed coef) |
rc_correlation |
whether random coefficients are correlated |
rc_mean |
whether mu parameters are estimated |
use_asc |
whether ASCs are included |
include_outside_option |
whether outside option is included |
J x J matrix of aggregate elasticities
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_mxl_data(dt, "id", "alt", "choice", "x1", "w1") eta <- get_halton_normals(50, d$N, ncol(d$W)) fit <- run_mxlogit(input_data = d, eta_draws = eta) elas <- mxl_elasticities_parallel(coef(fit), d$X, d$W, d$alt_idx, d$choice_idx, d$M, d$weights, eta, rc_dist = rep(0L, ncol(d$W)), elast_var_idx = 1L, is_random_coef = FALSE, rc_correlation = FALSE, rc_mean = FALSE) elaslibrary(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_mxl_data(dt, "id", "alt", "choice", "x1", "w1") eta <- get_halton_normals(50, d$N, ncol(d$W)) fit <- run_mxlogit(input_data = d, eta_draws = eta) elas <- mxl_elasticities_parallel(coef(fit), d$X, d$W, d$alt_idx, d$choice_idx, d$M, d$weights, eta, rc_dist = rep(0L, ncol(d$W)), elast_var_idx = 1L, is_random_coef = FALSE, rc_correlation = FALSE, rc_mean = FALSE) elas
Computes the Hessian of the log-likelihood for the Mixed Logit model using OpenMP for parallelization. Mirrors the parameters of mxl_loglik_gradient_parallel.
mxl_hessian_parallel( theta, X, W, alt_idx, choice_idx, M, weights, eta_draws, rc_dist, rc_correlation = TRUE, rc_mean = FALSE, use_asc = TRUE, include_outside_option = FALSE )mxl_hessian_parallel( theta, X, W, alt_idx, choice_idx, M, weights, eta_draws, rc_dist, rc_correlation = TRUE, rc_mean = FALSE, use_asc = TRUE, include_outside_option = FALSE )
theta |
vector collecting model parameters (beta, mu, L, delta (ASCs)) |
X |
design matrix for covariates with fixed coefficients; sum(M_i) x K_x |
W |
design matrix for covariates with random coefficients; sum(M_i) x K_w or J x K_w |
alt_idx |
sum(M) x 1 vector with indices of alternatives within each choice set; 1-based indexing |
choice_idx |
N x 1 vector with indices of chosen alternatives; 1-based indexing relative to X; 0 is used if include_outside_option=True |
M |
N x 1 vector with number of alternatives for each individual |
weights |
N x 1 vector with weights for each observation |
eta_draws |
Array with choice situation draws; K_w x S x N |
rc_dist |
K_w x 1 integer vector indicating distribution of random coefficients: 0 = normal, 1 = log-normal |
rc_correlation |
whether random coefficients should be correlated |
rc_mean |
whether to estimate means for random coefficients. |
use_asc |
whether to use alternative-specific constants. |
include_outside_option |
whether to include outside option normalized to 0 (if so, the outside option is not included in the data) |
Hessian evaluated at input arguments
For log-normal random coefficients (rc_dist=1) with rc_mean=TRUE, the distribution is a shifted log-normal: beta_k = exp(mu_k) + exp(L_k * eta), where exp(mu_k) shifts the location and exp(L_k * eta) ~ LogNormal(0, sigma_k^2). This differs from the textbook parameterization exp(mu_k + L_k * eta).
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_mxl_data(dt, "id", "alt", "choice", "x1", "w1") eta <- get_halton_normals(50, d$N, ncol(d$W)) theta <- rep(0, ncol(d$X) + ncol(d$W) + nrow(d$alt_mapping) - 1) H <- mxl_hessian_parallel(theta, d$X, d$W, d$alt_idx, d$choice_idx, d$M, d$weights, eta, rc_dist = rep(0L, ncol(d$W)), rc_correlation = FALSE, rc_mean = FALSE) dim(H)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_mxl_data(dt, "id", "alt", "choice", "x1", "w1") eta <- get_halton_normals(50, d$N, ncol(d$W)) theta <- rep(0, ncol(d$X) + ncol(d$W) + nrow(d$alt_mapping) - 1) H <- mxl_hessian_parallel(theta, d$X, d$W, d$alt_idx, d$choice_idx, d$M, d$weights, eta, rc_dist = rep(0L, ncol(d$W)), rc_correlation = FALSE, rc_mean = FALSE) dim(H)
Computes the log-likelihood and its gradient for the Mixed Logit model using OpenMP for parallelization. Allows for inclusion of alternative-specific constants, outside option, observation weights, correlated random coefficients.
mxl_loglik_gradient_parallel( theta, X, W, alt_idx, choice_idx, M, weights, eta_draws, rc_dist, rc_correlation = TRUE, rc_mean = FALSE, use_asc = TRUE, include_outside_option = FALSE )mxl_loglik_gradient_parallel( theta, X, W, alt_idx, choice_idx, M, weights, eta_draws, rc_dist, rc_correlation = TRUE, rc_mean = FALSE, use_asc = TRUE, include_outside_option = FALSE )
theta |
vector collecting model parameters (beta, mu, L, delta (ASCs)) |
X |
design matrix for covariates with fixed coefficients; sum(M_i) x K_x |
W |
design matrix for covariates with random coefficients; sum(M_i) x K_w or J x K_w |
alt_idx |
sum(M) x 1 vector with indices of alternatives within each choice set; 1-based indexing |
choice_idx |
N x 1 vector with indices of chosen alternatives; 1-based indexing relative to X; 0 is used if include_outside_option=True |
M |
N x 1 vector with number of alternatives for each individual |
weights |
N x 1 vector with weights for each observation |
eta_draws |
Array with choice situation draws; K_w x S x N |
rc_dist |
K_w x 1 integer vector indicating distribution of random coefficients: 0 = normal, 1 = log-normal |
rc_correlation |
whether random coefficients should be correlated |
rc_mean |
whether to estimate means for random coefficients. If so, mean parameters (mu) should be included in theta after beta parameters. |
use_asc |
whether to use alternative-specific constants. If so, parameters should be included in theta after beta and L (and mu, if applicable). |
include_outside_option |
whether to include outside option normalized to 0 (if so, the outside option is not included in the data) |
List with loglikelihood and gradient evaluated at input arguments
For log-normal random coefficients (rc_dist=1) with rc_mean=TRUE, the distribution is a shifted log-normal: beta_k = exp(mu_k) + exp(L_k * eta), where exp(mu_k) shifts the location and exp(L_k * eta) ~ LogNormal(0, sigma_k^2). This differs from the textbook parameterization exp(mu_k + L_k * eta).
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_mxl_data(dt, "id", "alt", "choice", "x1", "w1") eta <- get_halton_normals(50, d$N, ncol(d$W)) K_x <- ncol(d$X); K_w <- ncol(d$W); J <- nrow(d$alt_mapping) theta <- rep(0, K_x + K_w + J - 1) result <- mxl_loglik_gradient_parallel(theta, d$X, d$W, d$alt_idx, d$choice_idx, d$M, d$weights, eta, rc_dist = rep(0L, K_w), rc_correlation = FALSE, rc_mean = FALSE) result$objectivelibrary(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_mxl_data(dt, "id", "alt", "choice", "x1", "w1") eta <- get_halton_normals(50, d$N, ncol(d$W)) K_x <- ncol(d$X); K_w <- ncol(d$W); J <- nrow(d$alt_mapping) theta <- rep(0, K_x + K_w + J - 1) result <- mxl_loglik_gradient_parallel(theta, d$X, d$W, d$alt_idx, d$choice_idx, d$M, d$weights, eta, rc_dist = rep(0L, K_w), rc_correlation = FALSE, rc_mean = FALSE) result$objective
Computes the simulated expected logsum (expected maximum utility, up to an additive constant) for each choice situation:
where the inner sum runs over individual i's alternatives and includes the
outside option's term when include_outside_option = TRUE.
The log-sum-exp must be averaged across draws: applying log-sum-exp to
the draw-averaged utilities returned by mxl_predict understates the
expectation because log-sum-exp is convex (Jensen's inequality).
mxl_logsum( theta, X, W, alt_idx, M, eta_draws, rc_dist, rc_correlation = TRUE, rc_mean = FALSE, use_asc = TRUE, include_outside_option = FALSE )mxl_logsum( theta, X, W, alt_idx, M, eta_draws, rc_dist, rc_correlation = TRUE, rc_mean = FALSE, use_asc = TRUE, include_outside_option = FALSE )
theta |
parameter vector (beta, [mu], L, delta) |
X |
design matrix for fixed coefficients; sum(M_i) x K_x |
W |
design matrix for random coefficients; sum(M_i) x K_w or J x K_w |
alt_idx |
sum(M) x 1 vector with indices of alternatives; 1-based indexing |
M |
N x 1 vector with number of alternatives for each individual |
eta_draws |
Array with draws; K_w x S x N |
rc_dist |
K_w vector indicating distribution (0=normal, 1=log-normal) |
rc_correlation |
whether random coefficients are correlated |
rc_mean |
whether mu parameters are estimated |
use_asc |
whether ASCs are included |
include_outside_option |
whether the outside option is present |
Vector of length N with the simulated expected logsum per choice situation.
For log-normal random coefficients (rc_dist=1) with rc_mean=TRUE, the distribution is a shifted log-normal: beta_k = exp(mu_k) + exp(L_k * eta), where exp(mu_k) shifts the location and exp(L_k * eta) ~ LogNormal(0, sigma_k^2). This differs from the textbook parameterization exp(mu_k + L_k * eta).
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_mxl_data(dt, "id", "alt", "choice", "x1", "w1") eta <- get_halton_normals(50, d$N, ncol(d$W)) fit <- run_mxlogit(input_data = d, eta_draws = eta) ls <- mxl_logsum(coef(fit), d$X, d$W, d$alt_idx, d$M, eta, rc_dist = rep(0L, ncol(d$W)), rc_correlation = FALSE, rc_mean = FALSE) head(ls)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_mxl_data(dt, "id", "alt", "choice", "x1", "w1") eta <- get_halton_normals(50, d$N, ncol(d$W)) fit <- run_mxlogit(input_data = d, eta_draws = eta) ls <- mxl_logsum(coef(fit), d$X, d$W, d$alt_idx, d$M, eta, rc_dist = rep(0L, ncol(d$W)), rc_correlation = FALSE, rc_mean = FALSE) head(ls)
Returns the simulated choice probability for each (individual, alternative)
row of X, averaged over the supplied Halton draws. Mirrors mnl_predict.
mxl_predict( theta, X, W, alt_idx, M, eta_draws, rc_dist, rc_correlation = TRUE, rc_mean = FALSE, use_asc = TRUE, include_outside_option = FALSE )mxl_predict( theta, X, W, alt_idx, M, eta_draws, rc_dist, rc_correlation = TRUE, rc_mean = FALSE, use_asc = TRUE, include_outside_option = FALSE )
theta |
parameter vector (beta, [mu], L, delta) |
X |
design matrix for fixed coefficients; sum(M_i) x K_x |
W |
design matrix for random coefficients; sum(M_i) x K_w or J x K_w |
alt_idx |
sum(M) x 1 vector with indices of alternatives; 1-based indexing |
M |
N x 1 vector with number of alternatives for each individual |
eta_draws |
Array with draws; K_w x S x N |
rc_dist |
K_w vector indicating distribution (0=normal, 1=log-normal) |
rc_correlation |
whether random coefficients are correlated |
rc_mean |
whether mu parameters are estimated |
use_asc |
whether ASCs are included |
include_outside_option |
whether the outside option is present |
List with choice_prob (length sum(M)), utility (length sum(M),
simulated mean of the deterministic + W*gamma component), and, when
include_outside_option = TRUE, choice_prob_outside (length N).
choicer_sim objectWraps simulated data, true parameter values, and DGP settings into a
classed list. Returned by simulate_mnl_data(), simulate_mxl_data(),
and simulate_nl_data(), and consumed by recovery_table().
new_choicer_sim(data, true_params, settings, model)new_choicer_sim(data, true_params, settings, model)
data |
A |
true_params |
Named list of true DGP parameters
(e.g. |
settings |
Named list of DGP settings (e.g. |
model |
Character scalar: |
A list of class choicer_sim.
Damped iterative fixed point recovering delta given target shares, using the
NL probability structure. damping = 1 reproduces the plain BLP update.
nl_blp_contraction( delta, target_shares, X, beta, lambda, alt_idx, nest_idx, M, weights, include_outside_option = FALSE, damping = 1, tol = 1e-08, max_iter = 1000L )nl_blp_contraction( delta, target_shares, X, beta, lambda, alt_idx, nest_idx, M, weights, include_outside_option = FALSE, damping = 1, tol = 1e-08, max_iter = 1000L )
delta |
J x 1 vector with initial guess for deltas (ASCs). |
target_shares |
vector with target shares (outside-option share first when present). |
X |
sum(M) x K design matrix with covariates. |
beta |
K x 1 vector with fixed coefficients. |
lambda |
full nest dissimilarity vector of length n_nests (singletons = 1). |
alt_idx |
sum(M) x 1 vector with indices of alternatives; 1-based indexing. |
nest_idx |
J x 1 vector with nest indices for each alternative; 1-based indexing. |
M |
N x 1 vector with number of alternatives for each individual. |
weights |
N x 1 vector with weights for each observation. |
include_outside_option |
whether to include outside option normalized to V=0, lambda=1. |
damping |
damping factor for the update (default 1.0 = plain BLP). |
tol |
convergence tolerance. |
max_iter |
maximum number of iterations. |
vector with contraction's delta (ASCs) output.
library(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_nestlogit(dt, "id", "alt", "choice", c("x1", "x2"), "nest") beta <- coef(fit)[fit$param_map$beta] lambda <- rep(1, length(unique(fit$data$nest_idx))) lambda[as.integer(names(which(table(fit$data$nest_idx) > 1)))] <- coef(fit)[fit$param_map$lambda] delta <- nl_blp_contraction(rep(0, J), rep(1/J, J), fit$data$X, beta, lambda, fit$data$alt_idx, fit$data$nest_idx, fit$data$M, fit$data$weights) deltalibrary(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_nestlogit(dt, "id", "alt", "choice", c("x1", "x2"), "nest") beta <- coef(fit)[fit$param_map$beta] lambda <- rep(1, length(unique(fit$data$nest_idx))) lambda[as.integer(names(which(table(fit$data$nest_idx) > 1)))] <- coef(fit)[fit$param_map$lambda] delta <- nl_blp_contraction(rep(0, J), rep(1/J, J), fit$data$X, beta, lambda, fit$data$alt_idx, fit$data$nest_idx, fit$data$M, fit$data$weights) delta
Computes the diversion ratio matrix DR(j->k) for the Nested Logit model. Entry (k, j) = fraction of demand lost by alternative j captured by k. Reduces to the MNL diversion ratios when all lambda = 1.
nl_diversion_ratios_parallel( theta, X, alt_idx, nest_idx, M, weights, use_asc = TRUE, include_outside_option = FALSE )nl_diversion_ratios_parallel( theta, X, alt_idx, nest_idx, M, weights, use_asc = TRUE, include_outside_option = FALSE )
theta |
(K + n_non_singleton_nests + n_delta) vector with model
parameters. Order: |
X |
sum(M) x K design matrix with covariates. |
alt_idx |
sum(M) x 1 vector with indices of alternatives; 1-based indexing. |
nest_idx |
J x 1 vector with nest indices for each alternative; 1-based indexing. |
M |
N x 1 vector with number of alternatives for each individual. |
weights |
N x 1 vector with weights for each observation. |
use_asc |
whether to use alternative-specific constants. |
include_outside_option |
whether to include outside option normalized to V=0, lambda=1. |
J x J matrix where entry (k, j) = DR(j->k). Diagonal is 0.
library(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_nestlogit(dt, "id", "alt", "choice", c("x1", "x2"), "nest") dr <- nl_diversion_ratios_parallel(coef(fit), fit$data$X, fit$data$alt_idx, fit$data$nest_idx, fit$data$M, fit$data$weights) drlibrary(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_nestlogit(dt, "id", "alt", "choice", c("x1", "x2"), "nest") dr <- nl_diversion_ratios_parallel(coef(fit), fit$data$X, fit$data$alt_idx, fit$data$nest_idx, fit$data$M, fit$data$weights) dr
Computes the aggregate (weighted-average) elasticity matrix for the Nested Logit model. Reduces to the MNL elasticities when all lambda = 1.
nl_elasticities_parallel( theta, X, alt_idx, choice_idx, nest_idx, M, weights, elast_var_idx, use_asc = TRUE, include_outside_option = FALSE )nl_elasticities_parallel( theta, X, alt_idx, choice_idx, nest_idx, M, weights, elast_var_idx, use_asc = TRUE, include_outside_option = FALSE )
theta |
(K + n_non_singleton_nests + n_delta) vector with model
parameters. Order: |
X |
sum(M) x K design matrix with covariates. |
alt_idx |
sum(M) x 1 vector with indices of alternatives; 1-based indexing. |
choice_idx |
N x 1 vector (kept for API consistency, not used). |
nest_idx |
J x 1 vector with nest indices for each alternative; 1-based indexing. |
M |
N x 1 vector with number of alternatives for each individual. |
weights |
N x 1 vector with weights for each observation. |
elast_var_idx |
1-based index of the column in X for which to compute the elasticity. |
use_asc |
whether to use alternative-specific constants. |
include_outside_option |
whether to include outside option normalized to V=0, lambda=1. |
J x J matrix of aggregate elasticities (row = responding alt, col = perturbed alt).
library(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_nestlogit(dt, "id", "alt", "choice", c("x1", "x2"), "nest") elas <- nl_elasticities_parallel(coef(fit), fit$data$X, fit$data$alt_idx, fit$data$choice_idx, fit$data$nest_idx, fit$data$M, fit$data$weights, elast_var_idx = 1L) elaslibrary(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_nestlogit(dt, "id", "alt", "choice", c("x1", "x2"), "nest") elas <- nl_elasticities_parallel(coef(fit), fit$data$X, fit$data$alt_idx, fit$data$choice_idx, fit$data$nest_idx, fit$data$M, fit$data$weights, elast_var_idx = 1L) elas
Computes the log-likelihood and its gradient for the Nested Logit model using OpenMP for parallelization. Especially handles singleton nests by fixing their lambda parameters to 1. Only non-singleton nests have a inclusive value coefficient estimated in theta.
nl_loglik_gradient_parallel( theta, X, alt_idx, choice_idx, nest_idx, M, weights, use_asc = TRUE, include_outside_option = FALSE )nl_loglik_gradient_parallel( theta, X, alt_idx, choice_idx, nest_idx, M, weights, use_asc = TRUE, include_outside_option = FALSE )
theta |
(K + n_non_singleton_nests + n_delta) vector with model parameters.
Order: |
X |
sum(M) x K design matrix with covariates. |
alt_idx |
sum(M) x 1 vector with indices of alternatives; 1-based indexing. |
choice_idx |
N x 1 vector with indices of chosen alternatives; 0 for outside option, 1-based index relative to rows in X_i otherwise. |
nest_idx |
J x 1 vector with indices of nests for each alternative; 1-based indexing (1 to n_nests). |
M |
N x 1 vector with number of alternatives for each individual. |
weights |
N x 1 vector with weights for each observation. |
use_asc |
whether to use alternative-specific constants. |
include_outside_option |
whether to include outside option normalized to V=0, lambda=1. |
List with loglikelihood and gradient evaluated at input arguments
library(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_nl_data(dt, "id", "alt", "choice", c("x1", "x2"), "nest") K_x <- ncol(d$X); K_l <- length(unique(d$nest_idx)) theta <- c(rep(0, K_x), rep(0.5, K_l), rep(0, J - 1)) result <- nl_loglik_gradient_parallel(theta, d$X, d$alt_idx, d$choice_idx, d$nest_idx, d$M, d$weights) result$objectivelibrary(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_nl_data(dt, "id", "alt", "choice", c("x1", "x2"), "nest") K_x <- ncol(d$X); K_l <- length(unique(d$nest_idx)) theta <- c(rep(0, K_x), rep(0.5, K_l), rep(0, J - 1)) result <- nl_loglik_gradient_parallel(theta, d$X, d$alt_idx, d$choice_idx, d$nest_idx, d$M, d$weights) result$objective
Numerical Hessian of the log-likelihood via finite differences
nl_loglik_numeric_hessian( theta, X, alt_idx, choice_idx, nest_idx, M, weights, use_asc = TRUE, include_outside_option = FALSE, eps = 1e-06 )nl_loglik_numeric_hessian( theta, X, alt_idx, choice_idx, nest_idx, M, weights, use_asc = TRUE, include_outside_option = FALSE, eps = 1e-06 )
theta |
(K + n_delta + n_nests) vector with model parameters.
Order: |
X |
sum(M) x K design matrix with covariates. |
alt_idx |
sum(M) x 1 vector with indices of alternatives; 1-based indexing. |
choice_idx |
N x 1 vector with indices of chosen alternatives; 0 for outside option, 1-based index relative to rows in X_i otherwise. |
nest_idx |
J x 1 vector with indices of nests for each alternative; 1-based indexing (1 to n_nests). |
M |
N x 1 vector with number of alternatives for each individual. |
weights |
N x 1 vector with weights for each observation. |
use_asc |
whether to use alternative-specific constants. |
include_outside_option |
whether to include outside option normalized to V=0, lambda=1. |
eps |
finite difference step size |
Hessian evaluated at input arguments
library(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_nl_data(dt, "id", "alt", "choice", c("x1", "x2"), "nest") K_x <- ncol(d$X); K_l <- length(unique(d$nest_idx)) theta <- c(rep(0, K_x), rep(0.5, K_l), rep(0, J - 1)) H <- nl_loglik_numeric_hessian(theta, d$X, d$alt_idx, d$choice_idx, d$nest_idx, d$M, d$weights) dim(H)library(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_nl_data(dt, "id", "alt", "choice", c("x1", "x2"), "nest") K_x <- ncol(d$X); K_l <- length(unique(d$nest_idx)) theta <- c(rep(0, K_x), rep(0.5, K_l), rep(0, J - 1)) H <- nl_loglik_numeric_hessian(theta, d$X, d$alt_idx, d$choice_idx, d$nest_idx, d$M, d$weights) dim(H)
Prediction of choice probabilities and utilities for the Nested Logit model
nl_predict( theta, X, alt_idx, M, nest_idx, use_asc = TRUE, include_outside_option = FALSE )nl_predict( theta, X, alt_idx, M, nest_idx, use_asc = TRUE, include_outside_option = FALSE )
theta |
(K + n_non_singleton_nests + n_delta) vector with model
parameters. Order: |
X |
sum(M) x K design matrix with covariates. |
alt_idx |
sum(M) x 1 vector with indices of alternatives; 1-based indexing. |
M |
N x 1 vector with number of alternatives for each individual. |
nest_idx |
J x 1 vector with nest indices for each alternative; 1-based indexing. |
use_asc |
whether to use alternative-specific constants. |
include_outside_option |
whether to include outside option normalized to V=0, lambda=1. |
List with choice_prob (joint P_ij per stacked row) and utility (V_ij).
library(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_nestlogit(dt, "id", "alt", "choice", c("x1", "x2"), "nest") pred <- nl_predict(coef(fit), fit$data$X, fit$data$alt_idx, fit$data$M, fit$data$nest_idx, use_asc = TRUE) head(pred$choice_prob)library(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_nestlogit(dt, "id", "alt", "choice", c("x1", "x2"), "nest") pred <- nl_predict(coef(fit), fit$data$X, fit$data$alt_idx, fit$data$M, fit$data$nest_idx, use_asc = TRUE) head(pred$choice_prob)
Extract number of observations from a choicer_fit object
## S3 method for class 'choicer_fit' nobs(object, ...)## S3 method for class 'choicer_fit' nobs(object, ...)
object |
A choicer_fit object. |
... |
Additional arguments (ignored). |
Integer number of choice situations.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) nobs(fit)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) nobs(fit)
Computes choice probabilities or aggregate market shares, either for the
data used at fit time (default) or for counterfactual newdata.
## S3 method for class 'choicer_mnl' predict( object, type = c("probabilities", "shares"), newdata = NULL, weights = NULL, ... )## S3 method for class 'choicer_mnl' predict( object, type = c("probabilities", "shares"), newdata = NULL, weights = NULL, ... )
object |
A choicer_mnl object. |
type |
One of "probabilities" (individual-level choice probabilities) or "shares" (aggregate market shares). |
newdata |
Optional data for counterfactual prediction. Either:
When |
weights |
Optional numeric vector with one weight per choice situation,
used for |
... |
Additional arguments (ignored). |
For "probabilities": a list with choice_prob and utility vectors.
For "shares": a named numeric vector of market shares per alternative.
With a data.frame newdata, rows are ordered by id, then by fit-time
alternative code (alt_int in object$alt_mapping).
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) predict(fit, type = "shares") predict(fit, type = "probabilities") # Counterfactual: increase x1 for alternative 2 dt_cf <- copy(dt)[alt == 2, x1 := x1 + 1] predict(fit, type = "shares", newdata = dt_cf)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) predict(fit, type = "shares") predict(fit, type = "probabilities") # Counterfactual: increase x1 for alternative 2 dt_cf <- copy(dt)[alt == 2, x1 := x1 + 1] predict(fit, type = "shares", newdata = dt_cf)
Computes simulated choice probabilities or aggregate market shares using
deterministic Halton draws, either for the data used at fit time (default)
or for counterfactual newdata.
## S3 method for class 'choicer_mxl' predict( object, type = c("probabilities", "shares"), newdata = NULL, weights = NULL, ... )## S3 method for class 'choicer_mxl' predict( object, type = c("probabilities", "shares"), newdata = NULL, weights = NULL, ... )
object |
A choicer_mxl object. |
type |
Either "probabilities" (per-observation simulated choice probabilities) or "shares" (aggregate simulated market shares). |
newdata |
Optional data for counterfactual prediction. Either:
When |
weights |
Optional numeric vector with one weight per choice situation,
used for |
... |
Additional arguments (ignored). |
For "probabilities": a list with choice_prob and utility
vectors averaged across simulation draws. For "shares": a named numeric
vector of simulated market shares per alternative. With a data.frame
newdata, rows are ordered by id, then by fit-time alternative code
(alt_int in object$alt_mapping).
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mxlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = "x1", random_var_cols = "w1", S = 50L ) predict(fit, type = "shares") predict(fit, type = "probabilities")library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mxlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = "x1", random_var_cols = "w1", S = 50L ) predict(fit, type = "shares") predict(fit, type = "probabilities")
Computes choice probabilities or aggregate market shares, either for the
data used at fit time (default) or for counterfactual newdata.
## S3 method for class 'choicer_nl' predict( object, type = c("probabilities", "shares"), newdata = NULL, weights = NULL, ... )## S3 method for class 'choicer_nl' predict( object, type = c("probabilities", "shares"), newdata = NULL, weights = NULL, ... )
object |
A choicer_nl object. |
type |
One of "probabilities" (individual-level choice probabilities) or "shares" (aggregate market shares). |
newdata |
Optional data for counterfactual prediction. Either:
When |
weights |
Optional numeric vector with one weight per choice situation,
used for |
... |
Additional arguments (ignored). |
For "probabilities": a list with choice_prob and utility vectors.
For "shares": a named numeric vector of market shares per alternative.
With a data.frame newdata, rows are ordered by id, then by fit-time
alternative code (alt_int in object$alt_mapping).
library(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, nest := rep(c(1L, 1L, 2L, 2L), N)] dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_nestlogit(dt, "id", "alt", "choice", c("x1", "x2"), "nest") predict(fit, type = "shares") predict(fit, type = "probabilities")library(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, nest := rep(c(1L, 1L, 2L, 2L), N)] dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_nestlogit(dt, "id", "alt", "choice", c("x1", "x2"), "nest") predict(fit, type = "shares") predict(fit, type = "probabilities")
mnl_loglik_gradient_parallel()
Prepares and validates inputs for multinomial logit estimation routine.
prepare_mnl_data( data, id_col, alt_col, choice_col, covariate_cols, weights = NULL, outside_opt_label = NULL, include_outside_option = FALSE )prepare_mnl_data( data, id_col, alt_col, choice_col, covariate_cols, weights = NULL, outside_opt_label = NULL, include_outside_option = FALSE )
data |
Data frame containing choice data. |
id_col |
Name of the column identifying choice situations (individuals). |
alt_col |
Name of the column identifying alternatives. |
choice_col |
Name of the column indicating chosen alternative (1 = chosen, 0 = not chosen). |
covariate_cols |
Vector of names of columns to be used as covariates. |
weights |
Optional vector of weights for each choice situation. If |
outside_opt_label |
Label for the outside option (if any). If |
include_outside_option |
Logical indicating whether to include an outside option in the model. |
A list containing:
X: Design matrix (sum(M) x K).
alt_idx: Integer vector of alternative indices.
choice_idx: Integer vector of chosen alternative indices.
M: Integer vector with number of alternatives per choice situation.
N: Number of choice situations.
weights: Vector of weights.
include_outside_option: Logical flag.
alt_mapping: Data.table mapping alternatives to summary statistics.
dropped_cols: Names of columns dropped due to collinearity, if any.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] input <- prepare_mnl_data(dt, "id", "alt", "choice", c("x1", "x2")) str(input$X) input$alt_mappinglibrary(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] input <- prepare_mnl_data(dt, "id", "alt", "choice", c("x1", "x2")) str(input$X) input$alt_mapping
mxl_loglik_gradient_parallel()
Prepares and validates inputs for mixed logit estimation routine.
prepare_mxl_data( data, id_col, alt_col, choice_col, covariate_cols, random_var_cols, weights = NULL, outside_opt_label = NULL, include_outside_option = FALSE, rc_correlation = FALSE, weights_col = NULL )prepare_mxl_data( data, id_col, alt_col, choice_col, covariate_cols, random_var_cols, weights = NULL, outside_opt_label = NULL, include_outside_option = FALSE, rc_correlation = FALSE, weights_col = NULL )
data |
Data frame containing choice data |
id_col |
Name of the column identifying choice situations (individuals) |
alt_col |
Name of the column identifying alternatives |
choice_col |
Name of the column indicating chosen alternative (1 = chosen, 0 = not chosen) |
covariate_cols |
Vector of names of columns to be used as covariates |
random_var_cols |
Vector of names of columns to be used as random variables |
weights |
Optional vector of weights for each choice situation. If NULL, equal weights are used. |
outside_opt_label |
Label for the outside option (if any). If NULL, no outside option is assumed. |
include_outside_option |
Logical indicating whether to include an outside option in the model. |
rc_correlation |
Logical indicating whether random coefficients are correlated. Default is FALSE. |
weights_col |
Optional name of a column in |
A choicer_data_mxl object (list) containing:
X: Fixed-coefficient design matrix (sum(M) x K_x).
W: Random-coefficient design matrix (sum(M) x K_w).
alt_idx: Integer vector of alternative indices.
choice_idx: Integer vector of chosen alternative indices.
M: Integer vector with number of alternatives per choice situation.
N: Number of choice situations.
weights: Vector of weights.
include_outside_option: Logical flag.
rc_correlation: Logical flag.
alt_mapping: data.table mapping alternatives to summary statistics.
dropped_cols: Names of columns dropped due to collinearity, if any.
data_spec: List with column-name metadata.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N), w2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] input <- prepare_mxl_data(dt, "id", "alt", "choice", "x1", c("w1", "w2")) str(input$X) str(input$W)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N), w2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] input <- prepare_mxl_data(dt, "id", "alt", "choice", "x1", c("w1", "w2")) str(input$X) str(input$W)
Validates inputs, builds design matrices, and constructs nest structure
for nested logit estimation. Calls prepare_mnl_data internally
for base data preparation, then adds nest-specific fields.
prepare_nl_data( data, id_col, alt_col, choice_col, covariate_cols, nest_col, weights = NULL, outside_opt_label = NULL, include_outside_option = FALSE )prepare_nl_data( data, id_col, alt_col, choice_col, covariate_cols, nest_col, weights = NULL, outside_opt_label = NULL, include_outside_option = FALSE )
data |
Data frame containing choice data. |
id_col |
Name of the column identifying choice situations (individuals). |
alt_col |
Name of the column identifying alternatives. |
choice_col |
Name of the column indicating chosen alternative (1 = chosen, 0 = not chosen). |
covariate_cols |
Vector of names of columns to be used as covariates. |
nest_col |
Name of the column mapping each alternative to its nest. Every alternative must belong to exactly one nest. |
weights |
Optional vector of weights for each choice situation. If |
outside_opt_label |
Label for the outside option (if any). If |
include_outside_option |
Logical indicating whether to include an outside option in the model. |
A choicer_data_nl object (list) containing:
All fields from prepare_mnl_data (X, alt_idx,
choice_idx, M, N, weights, include_outside_option,
alt_mapping, dropped_cols).
nest_idx: Integer vector of length J mapping each alternative
(in alt_mapping row order) to its nest.
data_spec: List with column name metadata including nest_col.
library(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] input <- prepare_nl_data(dt, "id", "alt", "choice", c("x1", "x2"), "nest") input$nest_idx input$alt_mappinglibrary(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] input <- prepare_nl_data(dt, "id", "alt", "choice", c("x1", "x2"), "nest") input$nest_idx input$alt_mapping
Print a consumer surplus summary
## S3 method for class 'choicer_cs' print(x, digits = 4, ...)## S3 method for class 'choicer_cs' print(x, digits = 4, ...)
x |
A |
digits |
Number of significant digits to print. |
... |
Additional arguments (ignored). |
The object invisibly.
Prints a brief summary of the fitted model.
## S3 method for class 'choicer_fit' print(x, ...)## S3 method for class 'choicer_fit' print(x, ...)
x |
A choicer_fit object. |
... |
Additional arguments (ignored). |
The object invisibly.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) print(fit)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) print(fit)
Print goodness-of-fit measures
## S3 method for class 'choicer_gof' print(x, ...)## S3 method for class 'choicer_gof' print(x, ...)
x |
A |
... |
Additional arguments (ignored). |
The object invisibly.
Print a WTP table
## S3 method for class 'choicer_wtp' print(x, digits = 4, ...)## S3 method for class 'choicer_wtp' print(x, digits = 4, ...)
x |
A |
digits |
Number of significant digits to print. |
... |
Additional arguments passed to |
The object invisibly.
Print summary for multinomial logit model
## S3 method for class 'summary.choicer_mnl' print(x, ...)## S3 method for class 'summary.choicer_mnl' print(x, ...)
x |
A summary.choicer_mnl object. |
... |
Additional arguments (ignored). |
The object invisibly.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) print(summary(fit))library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) print(summary(fit))
Print summary for mixed logit model
## S3 method for class 'summary.choicer_mxl' print(x, ...)## S3 method for class 'summary.choicer_mxl' print(x, ...)
x |
A summary.choicer_mxl object. |
... |
Additional arguments (ignored). |
The object invisibly.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mxlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = "x1", random_var_cols = "w1", S = 50L ) print(summary(fit))library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mxlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = "x1", random_var_cols = "w1", S = 50L ) print(summary(fit))
Print summary for nested logit model
## S3 method for class 'summary.choicer_nl' print(x, ...)## S3 method for class 'summary.choicer_nl' print(x, ...)
x |
A summary.choicer_nl object. |
... |
Additional arguments (ignored). |
The object invisibly.
library(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_nestlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = c("x1", "x2"), nest_col = "nest" ) print(summary(fit))library(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_nestlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = c("x1", "x2"), nest_col = "nest" ) print(summary(fit))
Compares fitted coefficients to a set of true parameter values on the same scale as the estimator's internal parameterization. Returns one row per estimated parameter with true value, estimate, standard error, bias, relative bias (%), z-score against the truth, Wald CI, and a coverage indicator.
recovery_table(object, truth = NULL, level = 0.95, ...) ## S3 method for class 'choicer_fit' recovery_table(object, truth = NULL, level = 0.95, ...) ## S3 method for class 'choicer_mc' recovery_table(object, truth = NULL, level = 0.95, ...)recovery_table(object, truth = NULL, level = 0.95, ...) ## S3 method for class 'choicer_fit' recovery_table(object, truth = NULL, level = 0.95, ...) ## S3 method for class 'choicer_mc' recovery_table(object, truth = NULL, level = 0.95, ...)
object |
A |
truth |
Either a |
level |
Confidence level for the Wald CI and coverage indicator.
Default |
... |
Unused. |
For MXL fits the sigma block compares the raw Cholesky parameters
(L_params), not the reconstructed covariance matrix. For log-normal
random-coefficient means the raw mu estimate is compared directly; callers
who want recovery on the DGP scale (exp(mu)) should transform both sides
before calling.
When the estimator has normalized the first inside alternative's ASC to
zero (which happens for MNL/MXL with include_outside_option = FALSE and
no outside option baked into the fit), the first entry of truth$delta is
dropped before the comparison so lengths match.
See class-specific methods.
recovery_table(choicer_fit): Returns a choicer_recovery object (a
data.table) with columns parameter, group, true, estimate,
se, bias, rel_bias_pct, z_vs_true, lower_ci, upper_ci,
covers.
recovery_table(choicer_mc): For a choicer_mc object, delegates to
summary(object, level) and returns a choicer_mc_summary. Inspect
object$replications directly for per-rep detail.
sim <- simulate_mnl_data(N = 2000, J = 4, seed = 123) fit <- run_mnlogit( data = sim$data, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = c("x1", "x2"), outside_opt_label = 0L, include_outside_option = FALSE, use_asc = TRUE ) recovery_table(fit, sim)sim <- simulate_mnl_data(N = 2000, J = 4, seed = 123) fit <- run_mnlogit( data = sim$data, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = c("x1", "x2"), outside_opt_label = 0L, include_outside_option = FALSE, use_asc = TRUE ) recovery_table(fit, sim)
Estimates a multinomial logit model via maximum likelihood.
run_mnlogit( data = NULL, id_col = NULL, alt_col = NULL, choice_col = NULL, covariate_cols = NULL, input_data = NULL, optimizer = NULL, control = list(), weights = NULL, outside_opt_label = NULL, include_outside_option = FALSE, use_asc = TRUE, keep_data = TRUE, scale_vars = c("none", "sd", "mad", "iqr"), nloptr_opts = NULL )run_mnlogit( data = NULL, id_col = NULL, alt_col = NULL, choice_col = NULL, covariate_cols = NULL, input_data = NULL, optimizer = NULL, control = list(), weights = NULL, outside_opt_label = NULL, include_outside_option = FALSE, use_asc = TRUE, keep_data = TRUE, scale_vars = c("none", "sd", "mad", "iqr"), nloptr_opts = NULL )
data |
Data frame containing choice data (convenience workflow).
Mutually exclusive with |
id_col |
Name of the column identifying choice situations (individuals). |
alt_col |
Name of the column identifying alternatives. |
choice_col |
Name of the column indicating chosen alternative (1 = chosen, 0 = not chosen). |
covariate_cols |
Vector of names of columns to be used as covariates. |
input_data |
List output from |
optimizer |
Optimizer to use: |
control |
List of optimizer-specific control parameters passed to the
chosen optimizer (e.g., |
weights |
Optional vector of weights for each choice situation. If |
outside_opt_label |
Label for the outside option (if any). If |
include_outside_option |
Logical indicating whether to include an outside option in the model. |
use_asc |
Logical indicating whether to include alternative-specific constants (ASCs) in the model. |
keep_data |
Logical. If |
scale_vars |
Pre-estimation column scaling for the design matrix. One of
|
nloptr_opts |
Deprecated. Use |
Two workflows are supported:
Supply data and column names. Data
preparation (prepare_mnl_data) is handled automatically.
Call prepare_mnl_data yourself and pass the
result via input_data.
A choicer_mnl object (inherits from choicer_fit).
Standard S3 methods available: summary(), coef(), vcov(),
logLik(), AIC(), BIC(), nobs(), predict().
library(data.table) set.seed(42) N <- 100; J <- 3; beta_true <- c(1.0, -0.5) dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, V := drop(as.matrix(.SD) %*% beta_true), .SDcols = c("x1","x2")] dt[, prob := exp(V) / sum(exp(V)), by = id] dt[, choice := as.integer(alt == sample(alt, 1, prob = prob)), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) summary(fit) coef(fit) AIC(fit) predict(fit, type = "shares")library(data.table) set.seed(42) N <- 100; J <- 3; beta_true <- c(1.0, -0.5) dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, V := drop(as.matrix(.SD) %*% beta_true), .SDcols = c("x1","x2")] dt[, prob := exp(V) / sum(exp(V)), by = id] dt[, choice := as.integer(alt == sample(alt, 1, prob = prob)), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) summary(fit) coef(fit) AIC(fit) predict(fit, type = "shares")
Estimates a mixed logit model via simulated maximum likelihood.
run_mxlogit( data = NULL, id_col = NULL, alt_col = NULL, choice_col = NULL, covariate_cols = NULL, random_var_cols = NULL, input_data = NULL, eta_draws = NULL, S = 100L, rc_dist = NULL, rc_mean = FALSE, rc_correlation = FALSE, use_asc = TRUE, theta_init = NULL, lower = NULL, upper = NULL, optimizer = NULL, control = list(), se_method = c("hessian", "bhhh", "sandwich"), scale_vars = c("none", "sd", "mad", "iqr"), weights = NULL, outside_opt_label = NULL, include_outside_option = FALSE, keep_data = TRUE, nloptr_opts = NULL, weights_col = NULL )run_mxlogit( data = NULL, id_col = NULL, alt_col = NULL, choice_col = NULL, covariate_cols = NULL, random_var_cols = NULL, input_data = NULL, eta_draws = NULL, S = 100L, rc_dist = NULL, rc_mean = FALSE, rc_correlation = FALSE, use_asc = TRUE, theta_init = NULL, lower = NULL, upper = NULL, optimizer = NULL, control = list(), se_method = c("hessian", "bhhh", "sandwich"), scale_vars = c("none", "sd", "mad", "iqr"), weights = NULL, outside_opt_label = NULL, include_outside_option = FALSE, keep_data = TRUE, nloptr_opts = NULL, weights_col = NULL )
data |
Data frame containing choice data (convenience workflow).
Mutually exclusive with |
id_col |
Name of the column identifying choice situations. |
alt_col |
Name of the column identifying alternatives. |
choice_col |
Name of the column indicating chosen alternative (1/0). |
covariate_cols |
Vector of column names for fixed covariates. |
random_var_cols |
Vector of column names for random coefficients. |
input_data |
List output from |
eta_draws |
Array of shape K_w x S x N with standard normal draws.
Required for the advanced workflow; auto-generated from |
S |
Integer number of Halton draws per individual (convenience workflow only). Default 100. |
rc_dist |
Integer vector indicating distribution of random coefficients (0 = normal, 1 = log-normal). Default: all normal. |
rc_mean |
Logical indicating whether to estimate means for random coefficients. |
rc_correlation |
Logical indicating whether random coefficients are
correlated (convenience workflow). Ignored when |
use_asc |
Logical indicating whether to include alternative-specific constants. |
theta_init |
Initial parameter vector in natural-scale units. If
|
lower, upper
|
Optional parameter bounds for the optimizer, in
natural-scale units (forward-transformed internally to scaled space when
|
optimizer |
Optimizer to use: |
control |
List of optimizer-specific control parameters. |
se_method |
Method for computing standard errors. One of
|
scale_vars |
Pre-estimation column scaling for design matrices. One of
|
weights |
Optional weight vector (convenience workflow). If |
outside_opt_label |
Label for the outside option (convenience workflow). |
include_outside_option |
Logical whether to include an outside option (convenience workflow). |
keep_data |
Logical. If |
nloptr_opts |
Deprecated. Use |
weights_col |
Optional name of a column in |
Two workflows are supported:
Supply data and column names. Data preparation
(prepare_mxl_data) and Halton draw generation
(get_halton_normals) are handled automatically.
Call prepare_mxl_data and
get_halton_normals yourself, then pass the results via
input_data and eta_draws.
A choicer_mxl object (inherits from choicer_fit).
Standard S3 methods available: summary(), coef(),
vcov(), logLik(), AIC(), BIC(),
nobs().
library(data.table) set.seed(42) N <- 100; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N), w2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mxlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = "x1", random_var_cols = c("w1", "w2"), S = 50L ) summary(fit)library(data.table) set.seed(42) N <- 100; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N), w2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mxlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = "x1", random_var_cols = c("w1", "w2"), S = 50L ) summary(fit)
Estimates a nested logit model via maximum likelihood.
run_nestlogit( data = NULL, id_col = NULL, alt_col = NULL, choice_col = NULL, covariate_cols = NULL, nest_col = NULL, input_data = NULL, use_asc = TRUE, theta_init = NULL, param_names = NULL, optimizer = NULL, control = list(), weights = NULL, outside_opt_label = NULL, include_outside_option = FALSE, keep_data = TRUE, nloptr_opts = NULL )run_nestlogit( data = NULL, id_col = NULL, alt_col = NULL, choice_col = NULL, covariate_cols = NULL, nest_col = NULL, input_data = NULL, use_asc = TRUE, theta_init = NULL, param_names = NULL, optimizer = NULL, control = list(), weights = NULL, outside_opt_label = NULL, include_outside_option = FALSE, keep_data = TRUE, nloptr_opts = NULL )
data |
Data frame containing choice data (convenience workflow).
Mutually exclusive with |
id_col |
Name of the column identifying choice situations. |
alt_col |
Name of the column identifying alternatives. |
choice_col |
Name of the column indicating chosen alternative (1/0). |
covariate_cols |
Vector of column names for covariates. |
nest_col |
Name of the column mapping each alternative to its nest (convenience workflow). |
input_data |
List containing prepared input data for estimation
(advanced workflow). Mutually exclusive with |
use_asc |
Logical indicating whether to include alternative specific constants (ASCs). |
theta_init |
Optional initial parameter vector. If |
param_names |
Optional vector of parameter names. If |
optimizer |
Optimizer to use: |
control |
List of optimizer-specific control parameters. |
weights |
Optional weight vector (convenience workflow). If |
outside_opt_label |
Label for the outside option (convenience workflow). |
include_outside_option |
Logical whether to include an outside option (convenience workflow). |
keep_data |
Logical. If |
nloptr_opts |
Deprecated. Use |
Two workflows are supported:
Supply data and column names (including
nest_col). Data preparation (prepare_nl_data) is
handled automatically.
Call prepare_nl_data (or build the input
list manually) and pass it via input_data.
A choicer_nl object (inherits from choicer_fit).
Standard S3 methods available: summary(), coef(),
vcov(), logLik(), AIC(), BIC(),
nobs().
library(data.table) set.seed(42) N <- 100; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_nestlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = c("x1", "x2"), nest_col = "nest" ) summary(fit)library(data.table) set.seed(42) N <- 100; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_nestlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = c("x1", "x2"), nest_col = "nest" ) summary(fit)
Subsamples whole choice situations from a population data set according to
fixed per-stratum quotas, where strata are defined by the chosen
alternative. The input data set is treated as the population, so the
population shares are known exactly; the returned sample carries a
ready-to-use WESML weight column (see wesml_weights).
sample_by_choice( data, id_col, alt_col, choice_col, n_per_alt = NULL, frac_per_alt = NULL, seed = NULL, weight_name = ".wesml_weight", outside_opt_label = NULL, include_outside_option = FALSE )sample_by_choice( data, id_col, alt_col, choice_col, n_per_alt = NULL, frac_per_alt = NULL, seed = NULL, weight_name = ".wesml_weight", outside_opt_label = NULL, include_outside_option = FALSE )
data, id_col, alt_col, choice_col
|
As in |
n_per_alt |
Either a single integer applied to every stratum, or a named
integer vector of per-stratum counts (names matched to
|
frac_per_alt |
Either a single fraction in |
seed |
Optional integer seed for reproducible sampling. |
weight_name |
Name of the attached weight column (default
|
outside_opt_label, include_outside_option
|
As in
|
Sampling is by choice situation (id), never by row: all alternative-rows of a sampled situation are kept together. Sampling is without replacement.
A data.table subsample with the weight column appended and
"Q", "H", and "choice_sampling" attributes (the last
records the scheme, shares, quotas, and meat = "robust").
Manski, C. F. and Lerman, S. R. (1977). Econometrica 45(8), 1977-1988.
library(data.table) set.seed(1) N <- 600L; J <- 3L pop <- data.table(id = rep(seq_len(N), each = J), alt = rep(1:J, N)) pop[, x1 := rnorm(.N)] pop[, w1 := rnorm(.N)] pop[, choice := as.integer(seq_len(.N) == sample.int(.N, 1L)), by = id] s <- sample_by_choice(pop, "id", "alt", "choice", n_per_alt = 50L, seed = 1L) attr(s, "choice_sampling")$H # realized sample shares head(s[[".wesml_weight"]])library(data.table) set.seed(1) N <- 600L; J <- 3L pop <- data.table(id = rep(seq_len(N), each = J), alt = rep(1:J, N)) pop[, x1 := rnorm(.N)] pop[, w1 := rnorm(.N)] pop[, choice := as.integer(seq_len(.N) == sample.int(.N, 1L)), by = id] s <- sample_by_choice(pop, "id", "alt", "choice", n_per_alt = 50L, seed = 1L) attr(s, "choice_sampling")$H # realized sample shares head(s[[".wesml_weight"]])
Generates synthetic choice data with i.i.d. Gumbel errors, optionally with varying choice-set sizes and an outside option (alt = 0). Choices are determined by argmax of utility; covariates are drawn as Uniform(-1, 1).
simulate_mnl_data( N = 5000, J = 5, beta = c(0.8, -0.6), delta = NULL, seed = 123, outside_option = TRUE, vary_choice_set = TRUE )simulate_mnl_data( N = 5000, J = 5, beta = c(0.8, -0.6), delta = NULL, seed = 123, outside_option = TRUE, vary_choice_set = TRUE )
N |
Number of choice situations. |
J |
Number of inside alternatives. |
beta |
Fixed coefficients for |
delta |
Alternative-specific constants for inside alternatives
(length |
seed |
Random seed. Pass |
outside_option |
Logical; if |
vary_choice_set |
Logical; if |
A choicer_sim object.
sim <- simulate_mnl_data(N = 1000, J = 5, seed = 123) print(sim)sim <- simulate_mnl_data(N = 1000, J = 5, seed = 123) print(sim)
Generates synthetic choice data with random coefficients drawn from a
multivariate normal (optionally log-normal per dimension) and an additional
mean shifter mu. Random coefficients are parameterized via the lower
Cholesky factor of Sigma. Covariates are Uniform(-1, 1) by default;
columns named in price_cols are drawn as -Uniform(0.1, 3) to mimic
strictly-negative price variables.
simulate_mxl_data( N = 5000, J = 4, beta = c(0.8, -0.6), delta = NULL, mu = NULL, Sigma = matrix(c(1, 0.5, 0.5, 1.5), nrow = 2), rc_dist = NULL, rc_correlation = NULL, price_cols = NULL, seed = 123, outside_option = TRUE, vary_choice_set = TRUE )simulate_mxl_data( N = 5000, J = 4, beta = c(0.8, -0.6), delta = NULL, mu = NULL, Sigma = matrix(c(1, 0.5, 0.5, 1.5), nrow = 2), rc_dist = NULL, rc_correlation = NULL, price_cols = NULL, seed = 123, outside_option = TRUE, vary_choice_set = TRUE )
N |
Number of choice situations. |
J |
Number of inside alternatives. |
beta |
Fixed coefficients for |
delta |
ASCs for inside alternatives (length |
mu |
Mean shifter for random coefficients (length |
Sigma |
Covariance matrix of random coefficients (square, |
rc_dist |
Integer vector (length |
rc_correlation |
Logical; if |
price_cols |
Character vector of |
seed |
Random seed ( |
outside_option |
Logical; include outside option with |
vary_choice_set |
Logical; if |
Random coefficients are constructed to match the estimator's
parameterization in src/mxlogit.cpp. For every dimension the raw draw
is L %*% eta where eta ~ N(0, I). A normal random coefficient
(rc_dist = 0) is then gamma_k = mu_k + (L %*% eta)_k. A log-normal
random coefficient (rc_dist = 1) follows the shifted log-normal
beta_k = exp(mu_k) + exp((L %*% eta)_k) – not the textbook
exp(mu_k + sigma_k * eta) – so mu_k in true_params$mu is on the
same scale the estimator recovers and recovery_table() can compare
like-for-like.
A choicer_sim object. true_params includes beta, delta,
Sigma, L_params (packed Cholesky parameters), mu, rc_dist,
rc_correlation.
sim <- simulate_mxl_data(N = 1000, J = 4, seed = 123) print(sim)sim <- simulate_mxl_data(N = 1000, J = 4, seed = 123) print(sim)
Generates synthetic choice data with nested logit probabilities computed
analytically (log-sum-exp over inclusive values), then samples choices from
the implied multinomial. The outside option (j = 0) sits in a singleton
nest with lambda = 1.
simulate_nl_data( N = 10000, beta = c(1.5, -0.8), delta = c(`1` = 0.5, `2` = 0.3, `3` = -0.2, `4` = -0.5, `5` = 0.4), nests = list(c(1, 2), c(3, 4, 5)), lambdas = c(0.8, 0.2), seed = 123 )simulate_nl_data( N = 10000, beta = c(1.5, -0.8), delta = c(`1` = 0.5, `2` = 0.3, `3` = -0.2, `4` = -0.5, `5` = 0.4), nests = list(c(1, 2), c(3, 4, 5)), lambdas = c(0.8, 0.2), seed = 123 )
N |
Number of choice situations. |
beta |
Fixed coefficients for covariates |
delta |
Named numeric vector of ASCs for inside alternatives. |
nests |
List of integer vectors defining nest membership for inside alternatives. |
lambdas |
Numeric vector of dissimilarity parameters, one per nest. |
seed |
Random seed ( |
A choicer_sim object. true_params includes beta, delta,
lambdas; settings includes the nest_structure. The returned
data retains a nest column (integer, with 0L for the outside
option) for convenient use with run_nestlogit().
Unlike simulate_mnl_data() and simulate_mxl_data(), this
function does not expose outside_option or vary_choice_set flags.
The outside option (j = 0) is always present as a singleton nest with
lambda = 1, and every individual faces the full set of inside
alternatives. Add these flags if downstream use cases need them.
sim <- simulate_nl_data(N = 2000, seed = 123) print(sim)sim <- simulate_nl_data(N = 2000, seed = 123) print(sim)
Computes and returns a coefficient summary table with standard errors, z-values, p-values, and significance codes. Triggers lazy Hessian computation if standard errors have not been computed yet.
## S3 method for class 'choicer_mnl' summary(object, gof = TRUE, ...)## S3 method for class 'choicer_mnl' summary(object, gof = TRUE, ...)
object |
A choicer_mnl object. |
gof |
Logical; compute goodness-of-fit measures (McFadden R-squared, hit rate) for the summary footer. Involves an in-sample prediction pass (for mixed logit, a full simulation over draws); set to FALSE to skip. |
... |
Additional arguments (ignored). |
A summary.choicer_mnl object (list with coefficients table and
metadata, including a gof element with goodness-of-fit measures from
gof; its fields are NA when the model was fitted with
keep_data = FALSE).
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) summary(fit)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) summary(fit)
Computes coefficient summary with delta-method transformation for variance parameters (Cholesky to covariance scale) and log-normal mean parameters. Triggers lazy Hessian computation if standard errors have not been computed yet.
## S3 method for class 'choicer_mxl' summary(object, gof = TRUE, ...)## S3 method for class 'choicer_mxl' summary(object, gof = TRUE, ...)
object |
A choicer_mxl object. |
gof |
Logical; compute goodness-of-fit measures (McFadden R-squared, hit rate) for the summary footer. Involves an in-sample prediction pass (for mixed logit, a full simulation over draws); set to FALSE to skip. |
... |
Additional arguments (ignored). |
A summary.choicer_mxl object (includes a gof element with
goodness-of-fit measures from gof; its fields are NA when
the model was fitted with keep_data = FALSE).
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mxlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = "x1", random_var_cols = "w1", S = 50L ) summary(fit)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mxlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = "x1", random_var_cols = "w1", S = 50L ) summary(fit)
Triggers lazy Hessian computation if standard errors have not been computed yet.
## S3 method for class 'choicer_nl' summary(object, gof = TRUE, ...)## S3 method for class 'choicer_nl' summary(object, gof = TRUE, ...)
object |
A choicer_nl object. |
gof |
Logical; compute goodness-of-fit measures (McFadden R-squared, hit rate) for the summary footer. Involves an in-sample prediction pass (for mixed logit, a full simulation over draws); set to FALSE to skip. |
... |
Additional arguments (ignored). |
A summary.choicer_nl object (includes a gof element with
goodness-of-fit measures from gof; its fields are NA when
the model was fitted with keep_data = FALSE).
library(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_nestlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = c("x1", "x2"), nest_col = "nest" ) summary(fit)library(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_nestlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = c("x1", "x2"), nest_col = "nest" ) summary(fit)
Triggers lazy Hessian computation if vcov has not been computed yet.
## S3 method for class 'choicer_fit' vcov(object, ...)## S3 method for class 'choicer_fit' vcov(object, ...)
object |
A choicer_fit object. |
... |
Additional arguments (ignored). |
Named variance-covariance matrix, or NULL if unavailable.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) vcov(fit)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) vcov(fit)
Recomputes the robust Huber-White sandwich variance
for a fitted mixed logit, where the bread
is the weighted negated Hessian and the meat
is the weight-squared outer product of the
per-individual scores. This is the appropriate variance under choice-based
(endogenous stratified) / WESML weighting, where the inverse-Hessian and the
ordinary BHHH variance are invalid. It can be called on any fitted model
(e.g. one estimated with se_method = "hessian") to obtain robust
standard errors post hoc, without refitting.
wesml_vcov(object, ...) ## S3 method for class 'choicer_mxl' wesml_vcov(object, type = c("vcov", "se"), ...)wesml_vcov(object, ...) ## S3 method for class 'choicer_mxl' wesml_vcov(object, type = c("vcov", "se"), ...)
object |
A fitted |
... |
Unused. |
type |
Either |
If the stored weights are uniform (all equal), a warning is emitted: the returned variance is then the ordinary robust (Huber-White) variance, not a WESML-weighted variance. Refit with WESML weights for a choice-based-sampling correction.
A variance-covariance matrix (type = "vcov") or a named
numeric vector of standard errors (type = "se"), in the raw
parameter space.
wesml_weights, sample_by_choice,
run_mxlogit
library(data.table) set.seed(1) N <- 200L; J <- 3L dt <- data.table(id = rep(seq_len(N), each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := as.integer(seq_len(.N) == sample.int(.N, 1L)), by = id] fit <- run_mxlogit(dt, "id", "alt", "choice", "x1", "w1", S = 50L) wesml_vcov(fit, "se")library(data.table) set.seed(1) N <- 200L; J <- 3L dt <- data.table(id = rep(seq_len(N), each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := as.integer(seq_len(.N) == sample.int(.N, 1L)), by = id] fit <- run_mxlogit(dt, "id", "alt", "choice", "x1", "w1", S = 50L) wesml_vcov(fit, "se")
Computes Manski-Lerman (1977) Weighted Exogenous Sample Maximum Likelihood
(WESML) weights for a choice-based sample. The weight for a choice situation
whose chosen alternative is is , where
is the population share of alternative and its
sample share among choosers. Using these weights in
run_mxlogit restores consistency under choice-based sampling;
pair them with se_method = "sandwich" for valid (robust) standard
errors (the plain inverse-Hessian is invalid under weighting).
wesml_weights( data, id_col, alt_col, choice_col, Q, H = NULL, normalize = TRUE, attach = FALSE, weight_name = ".wesml_weight", outside_opt_label = NULL, include_outside_option = FALSE )wesml_weights( data, id_col, alt_col, choice_col, Q, H = NULL, normalize = TRUE, attach = FALSE, weight_name = ".wesml_weight", outside_opt_label = NULL, include_outside_option = FALSE )
data |
A long-format choice data set (data.frame or data.table), one row per alternative per choice situation. |
id_col, alt_col, choice_col
|
Column names identifying the choice situation, the alternative, and the 0/1 chosen indicator. |
Q |
Named numeric vector of population shares, one entry per chosen
stratum (names matched to |
H |
Optional named numeric vector of sample shares. If |
normalize |
If |
attach |
If |
weight_name |
Name of the weight column (default |
outside_opt_label, include_outside_option
|
Set
|
Strata are defined by the chosen alternative and keyed by
as.character(alt) so numeric and character alternative codes match
supplied share names unambiguously.
Either an id-keyed data.table with columns id_col and
weight_name (default), or, when attach = TRUE, a copy of
data with the weight column appended. The result carries "Q",
"H", and "choice_sampling" attributes recording provenance.
Manski, C. F. and Lerman, S. R. (1977). The Estimation of Choice Probabilities from Choice Based Samples. Econometrica 45(8), 1977-1988. Train, K. E. (2009). Discrete Choice Methods with Simulation, Section 3.7. Cambridge University Press.
sample_by_choice, run_mxlogit,
wesml_vcov
library(data.table) set.seed(1) N <- 300L; J <- 3L pop <- data.table(id = rep(seq_len(N), each = J), alt = rep(1:J, N)) pop[, x1 := rnorm(.N)] pop[, w1 := rnorm(.N)] pop[, choice := as.integer(seq_len(.N) == sample.int(.N, 1L)), by = id] # Population shares of the chosen alternative Q <- prop.table(table(pop[choice == 1, alt])) wt <- wesml_weights(pop, "id", "alt", "choice", Q = Q) head(wt)library(data.table) set.seed(1) N <- 300L; J <- 3L pop <- data.table(id = rep(seq_len(N), each = J), alt = rep(1:J, N)) pop[, x1 := rnorm(.N)] pop[, w1 := rnorm(.N)] pop[, choice := as.integer(seq_len(.N) == sample.int(.N, 1L)), by = id] # Population shares of the chosen alternative Q <- prop.table(table(pop[choice == 1, alt])) wt <- wesml_weights(pop, "id", "alt", "choice", Q = Q) head(wt)
Computes willingness-to-pay (WTP) ratios with delta-method standard errors
from a fitted choice model. For an attribute coefficient
and a price coefficient , the WTP is
the marginal rate of substitution between the attribute and price. Standard
errors use the delta method with analytic gradients
and
, applied to the
corresponding 2x2 block of vcov(object).
wtp(object, price_var, attr_vars = NULL, level = 0.95, ...) ## S3 method for class 'choicer_fit' wtp(object, price_var, attr_vars = NULL, level = 0.95, ...) ## S3 method for class 'choicer_mxl' wtp(object, price_var, attr_vars = NULL, level = 0.95, ...)wtp(object, price_var, attr_vars = NULL, level = 0.95, ...) ## S3 method for class 'choicer_fit' wtp(object, price_var, attr_vars = NULL, level = 0.95, ...) ## S3 method for class 'choicer_mxl' wtp(object, price_var, attr_vars = NULL, level = 0.95, ...)
object |
A fitted model object ( |
price_var |
Name of the price variable. Must be a fixed-coefficient
variable (a column of the design matrix |
attr_vars |
Character vector of attributes to report. Defaults to all
fixed-coefficient variables other than |
level |
Confidence level for the normal-approximation interval
|
... |
Additional arguments passed to methods. |
For mixed logit models, random coefficients are included via their
estimated location parameters. The package's log-normal random coefficient
is the shifted log-normal
(see
run_mxlogit()), so:
Normal random coefficient (rc_mean = TRUE): mean WTP
, labeled Mu_x.
Log-normal random coefficient (rc_mean = TRUE):
median WTP , since the
median of is 1. (The mean,
, is highly sensitive to the
estimated variance; the median is the more robust summary.) These rows
are labeled by the attribute name and flagged as medians when printed.
Log-normal random coefficient with rc_mean = FALSE:
has median 1, so the median WTP is
with uncertainty driven solely by .
Normal random coefficients with rc_mean = FALSE have mean 0 by
construction and are excluded from the table.
The price variable must have a fixed coefficient. A random price coefficient is rejected: the ratio of two random coefficients generally has no finite moments (the denominator has positive density at 0), so mean or median WTP computed from location parameters would be meaningless. Use a fixed price coefficient, or estimate the model in WTP space.
A data.frame of class choicer_wtp with one row per
attribute and columns Estimate, Std_Error, z_value,
CI_lower, CI_upper. Attributes price_var and
level record the inputs; median_rows lists rows that are
median (rather than mean) WTP. Standard errors are NA when the
variance-covariance matrix is unavailable.
library(data.table) sim <- simulate_mnl_data(N = 1000, J = 4, beta = c(0.8, -0.6), seed = 123, outside_option = FALSE, vary_choice_set = FALSE) fit <- run_mnlogit(sim$data, "id", "alt", "choice", c("x1", "x2")) # treat x2 as the price variable wtp(fit, price_var = "x2") wtp(fit, price_var = "x2", attr_vars = c("x1", "ASC_2"), level = 0.90)library(data.table) sim <- simulate_mnl_data(N = 1000, J = 4, beta = c(0.8, -0.6), seed = 123, outside_option = FALSE, vary_choice_set = FALSE) fit <- run_mnlogit(sim$data, "id", "alt", "choice", c("x1", "x2")) # treat x2 as the price variable wtp(fit, price_var = "x2") wtp(fit, price_var = "x2", attr_vars = c("x1", "ASC_2"), level = 0.90)