1 Purpose

This R Markdown file provides a reusable analysis pipeline for student course-evaluation engagement. It is designed so an institution can use either:

  1. the pseudo data generated by 01_generate_pseudo_course_eval_data.R, or
  2. its own student-term course evaluation file.

The unit of analysis is one row per student per term.

The core questions are:

2 Expected data structure

At minimum, the input data should contain:

Column Description
UserId Student identifier
Term_Semester Numeric or character term code
Number_Invited_Evals Number of evaluations assigned to the student in that term
Number_Completed_Evals Number completed
Number_Opted_Out_Evals Number opted out; can be reconstructed from Opt_Out_Rate if unavailable
Completion_Rate Completion percentage; can be calculated if unavailable
Opt_Out_Rate Opt-out percentage; can be calculated if unavailable
CUR_GPA Current-term GPA
CUM_GPA Cumulative GPA
Acad_Level or academic-level dummies Freshman, sophomore, junior, senior, postbac, master’s, doctoral

Freshmen are used as the reference category in models that include academic level.

3 Load data

if (!file.exists(params$data_path) && params$generate_pseudo_if_missing) {
  if (!file.exists(params$pseudo_generator_script)) {
    stop("Data file and pseudo generator script are both missing.")
  }
  source(params$pseudo_generator_script)
}

eval_raw <- read_csv(params$data_path, show_col_types = FALSE)

dim(eval_raw)
## [1] 145617     17
head(eval_raw)

4 Data preparation

This section standardizes variable names, creates missing rates/counts, generates lagged predictors, and builds the behavioral typology outcomes.

make_academic_dummies <- function(df) {
  if ("Acad_Level" %in% names(df)) {
    df <- df %>%
      mutate(
        Acad_Level = toupper(as.character(Acad_Level)),
        IS_FRESHMEN  = as.integer(Acad_Level %in% c("FRESHMEN", "FRESHMAN", "FIRST YEAR", "FIRST-YEAR")),
        IS_SOPHOMORE = as.integer(Acad_Level == "SOPHOMORE"),
        IS_JUNIOR    = as.integer(Acad_Level == "JUNIOR"),
        IS_SENIOR    = as.integer(Acad_Level == "SENIOR"),
        IS_POSTBAC   = as.integer(Acad_Level %in% c("POSTBAC", "POST-BAC", "POSTBACCALAUREATE")),
        IS_MASTER    = as.integer(Acad_Level %in% c("MASTER", "MASTERS", "MASTER'S")),
        IS_DOCTORAL  = as.integer(Acad_Level %in% c("DOCTORAL", "PHD", "DOCTORATE"))
      )
  }

  required_dummies <- c(
    "IS_FRESHMEN", "IS_SOPHOMORE", "IS_JUNIOR", "IS_SENIOR",
    "IS_POSTBAC", "IS_MASTER", "IS_DOCTORAL"
  )

  for (v in required_dummies) {
    if (!v %in% names(df)) {
      df[[v]] <- 0L
    }
  }

  df
}

prepare_course_eval_data <- function(df, lms_cutoff_term = 2238) {
  df <- df %>%
    make_academic_dummies() %>%
    mutate(
      UserId = as.character(UserId),
      Term_Semester_num = as.integer(Term_Semester),
      Term_Semester = as.integer(Term_Semester)
    )

  if (!"Number_Opted_Out_Evals" %in% names(df)) {
    if (!"Opt_Out_Rate" %in% names(df)) {
      stop("Need Number_Opted_Out_Evals or Opt_Out_Rate to estimate opt-out models.")
    }
    df <- df %>%
      mutate(Number_Opted_Out_Evals = round(Opt_Out_Rate / 100 * Number_Invited_Evals))
  }

  if (!"Completion_Rate" %in% names(df)) {
    df <- df %>%
      mutate(Completion_Rate = 100 * Number_Completed_Evals / Number_Invited_Evals)
  }

  if (!"Opt_Out_Rate" %in% names(df)) {
    df <- df %>%
      mutate(Opt_Out_Rate = 100 * Number_Opted_Out_Evals / Number_Invited_Evals)
  }

  df <- df %>%
    mutate(
      Number_Acted_Evals = Number_Completed_Evals + Number_Opted_Out_Evals,
      Number_No_Action_Evals = Number_Invited_Evals - Number_Acted_Evals,
      Number_No_Action_Evals = pmax(Number_No_Action_Evals, 0),
      no_action_flag = as.integer(Number_Completed_Evals == 0 & Number_Opted_Out_Evals == 0),
      completed_only_flag = as.integer(Number_Completed_Evals > 0 & Number_Opted_Out_Evals == 0),
      any_optout_flag = as.integer(Number_Opted_Out_Evals > 0),
      action_type_3b = case_when(
        Number_Completed_Evals == 0 & Number_Opted_Out_Evals == 0 ~ "no_action",
        Number_Opted_Out_Evals > 0 ~ "any_optout",
        Number_Completed_Evals > 0 ~ "completed_only",
        TRUE ~ NA_character_
      ),
      action_type_4 = case_when(
        Number_Completed_Evals > 0 & Number_Opted_Out_Evals > 0 ~ "mixed_complete_optout",
        Number_Completed_Evals > 0 ~ "completed_only",
        Number_Opted_Out_Evals > 0 ~ "opted_out_only",
        TRUE ~ "no_action"
      ),
      post_lms = Term_Semester_num >= lms_cutoff_term
    ) %>%
    arrange(UserId, Term_Semester_num) %>%
    group_by(UserId) %>%
    mutate(
      L_Term_Semester = lag(Term_Semester_num),
      L_Number_Invited_Evals = lag(Number_Invited_Evals),
      L_Number_Completed_Evals = lag(Number_Completed_Evals),
      L_Number_Opted_Out_Evals = lag(Number_Opted_Out_Evals),
      L_Completion_Rate = lag(Completion_Rate),
      L_Opt_Out_Rate = lag(Opt_Out_Rate),
      L_CUR_GPA = lag(CUR_GPA),
      L_CUM_GPA = lag(CUM_GPA),
      L_L_Completion_Rate = lag(Completion_Rate, 2),
      L_L_Opt_Out_Rate = lag(Opt_Out_Rate, 2),
      prior_terms = row_number() - 1,
      cum_prior_completion_rate = lag(cummean(Completion_Rate)),
      cum_prior_optout_rate = lag(cummean(Opt_Out_Rate)),
      ever_completed_prior = as.integer(lag(cumsum(Number_Completed_Evals > 0), default = 0) > 0),
      ever_opted_out_prior = as.integer(lag(cumsum(Number_Opted_Out_Evals > 0), default = 0) > 0)
    ) %>%
    ungroup()

  df
}

my_data3 <- prepare_course_eval_data(eval_raw, lms_cutoff_term = params$lms_cutoff_term)

dim(my_data3)
## [1] 145617     41

5 Descriptive checks

data_diagnostics <- tibble(
  metric = c(
    "student-term rows",
    "unique students",
    "terms",
    "mean invitations",
    "median invitations",
    "mean completion rate",
    "mean opt-out rate",
    "missing prior completion"
  ),
  value = c(
    nrow(my_data3),
    n_distinct(my_data3$UserId),
    n_distinct(my_data3$Term_Semester),
    round(mean(my_data3$Number_Invited_Evals, na.rm = TRUE), 2),
    median(my_data3$Number_Invited_Evals, na.rm = TRUE),
    round(mean(my_data3$Completion_Rate, na.rm = TRUE), 2),
    round(mean(my_data3$Opt_Out_Rate, na.rm = TRUE), 2),
    sum(is.na(my_data3$L_Completion_Rate))
  )
)

data_diagnostics

5.1 Action-type distribution

action_distribution <- my_data3 %>%
  count(action_type_3b, name = "n") %>%
  mutate(percent = 100 * n / sum(n)) %>%
  arrange(desc(n))

action_distribution
ggplot(action_distribution, aes(x = reorder(action_type_3b, percent), y = percent)) +
  geom_col() +
  coord_flip() +
  labs(
    title = "Student-term action distribution",
    x = NULL,
    y = "Percent of student-terms"
  )

5.2 Term-level response patterns

term_summary <- my_data3 %>%
  group_by(Term_Semester_num) %>%
  summarize(
    n = n(),
    completion_rate = mean(Completion_Rate, na.rm = TRUE),
    optout_rate = mean(Opt_Out_Rate, na.rm = TRUE),
    no_action_rate = mean(no_action_flag, na.rm = TRUE) * 100,
    any_optout_rate = mean(any_optout_flag, na.rm = TRUE) * 100,
    .groups = "drop"
  )

term_summary
ggplot(term_summary, aes(x = Term_Semester_num, y = no_action_rate)) +
  geom_line() +
  geom_point() +
  geom_vline(xintercept = lms_cutoff_term, linetype = "dashed") +
  labs(
    title = "No-action rate by term",
    subtitle = "Dashed line marks LMS integration cutoff",
    x = "Term",
    y = "No-action rate (%)"
  )

6 Utility functions for model interpretation

The lagged response variables are percentages. For those terms, a 10 percentage-point shift is often easier to interpret than a one-point shift.

term_scale <- function(term) {
  case_when(
    str_detect(term, "Completion_Rate|Opt_Out_Rate") ~ 10,
    TRUE ~ 1
  )
}

tidy_or <- function(model, model_label = NULL) {
  broom::tidy(model) %>%
    mutate(
      model = model_label,
      scale = term_scale(term),
      odds_ratio = exp(estimate * scale),
      odds_ratio_lower = exp((estimate - 1.96 * std.error) * scale),
      odds_ratio_upper = exp((estimate + 1.96 * std.error) * scale)
    ) %>%
    select(model, everything())
}

tidy_multinom_or <- function(model, model_label = NULL) {
  broom::tidy(model) %>%
    mutate(
      model = model_label,
      scale = term_scale(term),
      odds_ratio = exp(estimate * scale),
      odds_ratio_lower = exp((estimate - 1.96 * std.error) * scale),
      odds_ratio_upper = exp((estimate + 1.96 * std.error) * scale)
    ) %>%
    select(model, y.level, everything())
}

simple_fit_stats <- function(model, model_label) {
  out <- tryCatch(
    broom::glance(model),
    error = function(e) tibble()
  )
  if (nrow(out) == 0) {
    out <- tibble(model = model_label)
  } else {
    out <- out %>% mutate(model = model_label, .before = 1)
  }
  out
}

7 Model 1: Pooled OLS rate models

7.1 Formal model

For student \(i\) in term \(t\):

\[ C_{it} = \alpha + \beta_1 C_{i,t-1} + \beta_2 O_{i,t-1} + \gamma X_{it} + \varepsilon_{it} \]

\[ O_{it} = \alpha + \beta_1 C_{i,t-1} + \beta_2 O_{i,t-1} + \gamma X_{it} + u_{it} \]

where:

  • \(C_{it}\) is completion rate,
  • \(O_{it}\) is opt-out rate,
  • \(X_{it}\) includes academic level and GPA features.

These models are descriptive baselines. They are easy to explain but should not be treated as causal habit-formation models.

m_complete_ols <- lm(
  Completion_Rate ~
    IS_SOPHOMORE + IS_JUNIOR + IS_SENIOR + IS_POSTBAC +
    IS_MASTER + IS_DOCTORAL +
    CUR_GPA + CUM_GPA +
    L_Completion_Rate + L_Opt_Out_Rate +
    L_CUR_GPA + L_CUM_GPA,
  data = my_data3
)

m_optout_ols <- lm(
  Opt_Out_Rate ~
    IS_SOPHOMORE + IS_JUNIOR + IS_SENIOR + IS_POSTBAC +
    IS_MASTER + IS_DOCTORAL +
    CUR_GPA + CUM_GPA +
    L_Completion_Rate + L_Opt_Out_Rate +
    L_CUR_GPA + L_CUM_GPA,
  data = my_data3
)

modelsummary(
  list(
    "Completion rate OLS" = m_complete_ols,
    "Opt-out rate OLS" = m_optout_ols
  ),
  stars = TRUE
)
Completion rate OLS Opt-out rate OLS
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) 2.536*** 0.346***
(0.547) (0.079)
IS_SOPHOMORE 4.012*** 0.344***
(0.367) (0.053)
IS_JUNIOR 4.495*** 0.351***
(0.357) (0.051)
IS_SENIOR 4.646*** 0.435***
(0.333) (0.048)
IS_POSTBAC 4.063*** 0.289**
(0.756) (0.109)
IS_MASTER 4.249*** 0.190**
(0.407) (0.058)
IS_DOCTORAL 3.868*** 0.205**
(0.546) (0.078)
CUR_GPA 3.417*** 0.074
(0.393) (0.056)
CUM_GPA 6.326*** -0.133
(1.125) (0.162)
L_Completion_Rate 0.206*** 0.000
(0.003) (0.000)
L_Opt_Out_Rate 0.239*** 0.020***
(0.022) (0.003)
L_CUR_GPA 0.645+ 0.142**
(0.340) (0.049)
L_CUM_GPA 5.090*** -0.133
(0.882) (0.127)
Num.Obs. 127617 127617
R2 0.154 0.001
R2 Adj. 0.154 0.001
AIC 1248815.1 753636.6
BIC 1248951.7 753773.2
Log.Lik. -624393.559 -376804.294
F 1935.478 13.248
RMSE 32.26 4.64
bind_rows(
  simple_fit_stats(m_complete_ols, "Completion rate OLS"),
  simple_fit_stats(m_optout_ols, "Opt-out rate OLS")
)

7.2 Pooled OLS interpretation aid

pooled_effects <- bind_rows(
  broom::tidy(m_complete_ols) %>% mutate(model = "Completion rate OLS"),
  broom::tidy(m_optout_ols) %>% mutate(model = "Opt-out rate OLS")
) %>%
  filter(term %in% c("L_Completion_Rate", "L_Opt_Out_Rate")) %>%
  mutate(
    ten_point_effect = estimate * 10,
    interpretation = paste0(
      "A 10pp increase in ", term,
      " is associated with a ",
      round(ten_point_effect, 2),
      " percentage-point change in the outcome."
    )
  )

pooled_effects

8 Model 2: Quasi-binomial count models

8.1 Formal model

The quasi-binomial model treats each student-term as a set of assigned evaluation opportunities.

For completion:

\[ Y^C_{it} \sim \text{quasi-binomial}(N_{it}, p^C_{it}) \]

\[ \log \left(\frac{p^C_{it}}{1-p^C_{it}}\right) = \alpha + \beta_1 C_{i,t-1} + \beta_2 O_{i,t-1} + \gamma X_{it} + \theta N_{it} + \lambda_t \]

For opt-out:

\[ Y^O_{it} \sim \text{quasi-binomial}(N_{it}, p^O_{it}) \]

This specification is useful because students receive different numbers of evaluation invitations.

m_complete_qbinom <- glm(
  cbind(Number_Completed_Evals,
        Number_Invited_Evals - Number_Completed_Evals) ~
    IS_SOPHOMORE + IS_JUNIOR + IS_SENIOR + IS_POSTBAC +
    IS_MASTER + IS_DOCTORAL +
    L_Completion_Rate + L_Opt_Out_Rate +
    L_CUR_GPA + L_CUM_GPA +
    Number_Invited_Evals +
    factor(Term_Semester),
  family = quasibinomial(),
  data = my_data3
)

m_optout_qbinom <- glm(
  cbind(Number_Opted_Out_Evals,
        Number_Invited_Evals - Number_Opted_Out_Evals) ~
    IS_SOPHOMORE + IS_JUNIOR + IS_SENIOR + IS_POSTBAC +
    IS_MASTER + IS_DOCTORAL +
    L_Completion_Rate + L_Opt_Out_Rate +
    L_CUR_GPA + L_CUM_GPA +
    Number_Invited_Evals +
    factor(Term_Semester),
  family = quasibinomial(),
  data = my_data3
)

modelsummary(
  list(
    "Completion count" = m_complete_qbinom,
    "Opt-out count" = m_optout_qbinom
  ),
  stars = TRUE
)
Completion count Opt-out count
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) -2.442*** -7.616***
(0.030) (0.291)
IS_SOPHOMORE 0.067*** 0.189
(0.017) (0.123)
IS_JUNIOR 0.094*** 0.193
(0.017) (0.121)
IS_SENIOR 0.076*** 0.227+
(0.016) (0.117)
IS_POSTBAC 0.085* 0.021
(0.036) (0.211)
IS_MASTER 0.080*** -0.090
(0.019) (0.135)
IS_DOCTORAL 0.053* -0.106
(0.026) (0.169)
L_Completion_Rate 0.009*** -0.003***
(0.000) (0.001)
L_Opt_Out_Rate 0.007*** 0.003
(0.001) (0.004)
L_CUR_GPA -0.008 -0.043
(0.016) (0.075)
L_CUM_GPA 0.728*** -0.007
(0.017) (0.083)
Number_Invited_Evals 0.037*** 0.028***
(0.002) (0.008)
factor(Term_Semester)2222 -0.023 -0.046
(0.018) (0.333)
factor(Term_Semester)2228 -0.035+ 0.008
(0.018) (0.324)
factor(Term_Semester)2232 -0.064*** 0.085
(0.018) (0.316)
factor(Term_Semester)2238 0.466*** 2.954***
(0.019) (0.253)
factor(Term_Semester)2242 0.316*** 2.924***
(0.018) (0.253)
factor(Term_Semester)2248 0.294*** 2.982***
(0.018) (0.253)
factor(Term_Semester)2252 0.308*** 3.056***
(0.018) (0.253)
Num.Obs. 127617 127617
F 1315.947 43.692
RMSE 0.32 0.05

8.2 Quasi-binomial odds ratios

For percentage-rate predictors, odds ratios below are scaled to a 10 percentage-point increase.

qbinom_or <- bind_rows(
  tidy_or(m_complete_qbinom, "Completion count"),
  tidy_or(m_optout_qbinom, "Opt-out count")
) %>%
  filter(term %in% c("L_Completion_Rate", "L_Opt_Out_Rate", "Number_Invited_Evals", "L_CUR_GPA", "L_CUM_GPA")) %>%
  select(model, term, scale, estimate, std.error, odds_ratio, odds_ratio_lower, odds_ratio_upper)

qbinom_or

8.3 Quasi-binomial fit diagnostics

qbinom_fit <- tibble(
  model = c("Completion count", "Opt-out count"),
  n = c(nobs(m_complete_qbinom), nobs(m_optout_qbinom)),
  null_deviance = c(m_complete_qbinom$null.deviance, m_optout_qbinom$null.deviance),
  residual_deviance = c(m_complete_qbinom$deviance, m_optout_qbinom$deviance),
  dispersion = c(summary(m_complete_qbinom)$dispersion, summary(m_optout_qbinom)$dispersion),
  fisher_iterations = c(m_complete_qbinom$iter, m_optout_qbinom$iter)
)

qbinom_fit

9 Model 3: Student and term fixed-effects models

9.1 Formal model

\[ C_{it} = \beta_1 C_{i,t-1} + \beta_2 O_{i,t-1} + \gamma X_{it} + \alpha_i + \lambda_t + \varepsilon_{it} \]

where \(\alpha_i\) is a student fixed effect and \(\lambda_t\) is a term fixed effect.

This model asks whether changes within the same student over time predict later evaluation behavior.

m_complete_fe <- feols(
  Completion_Rate ~
    L_Completion_Rate + L_Opt_Out_Rate +
    L_CUR_GPA + L_CUM_GPA +
    Number_Invited_Evals |
    UserId + Term_Semester,
  cluster = ~ UserId,
  data = my_data3
)

m_optout_fe <- feols(
  Opt_Out_Rate ~
    L_Completion_Rate + L_Opt_Out_Rate +
    L_CUR_GPA + L_CUM_GPA +
    Number_Invited_Evals |
    UserId + Term_Semester,
  cluster = ~ UserId,
  data = my_data3
)

etable(m_complete_fe, m_optout_fe)

10 Model 4: Cumulative and two-lag habit checks

These models test whether the prior-term effect is really a longer-run behavioral history.

m_cumulative <- feols(
  Completion_Rate ~
    L_Completion_Rate +
    cum_prior_completion_rate +
    ever_completed_prior +
    L_Opt_Out_Rate +
    cum_prior_optout_rate +
    Number_Invited_Evals +
    L_CUR_GPA + L_CUM_GPA |
    UserId + Term_Semester,
  cluster = ~ UserId,
  data = my_data3
)

m_two_lag <- feols(
  Completion_Rate ~
    L_Completion_Rate + L_L_Completion_Rate +
    L_Opt_Out_Rate + L_L_Opt_Out_Rate +
    Number_Invited_Evals +
    L_CUR_GPA + L_CUM_GPA |
    UserId + Term_Semester,
  cluster = ~ UserId,
  data = my_data3
)

etable(m_cumulative, m_two_lag)

11 Model 5: Behavioral typology and multinomial action model

11.1 Three action types

The collapsed typology uses three categories:

Category Definition
completed_only Completed at least one evaluation and opted out of none
any_optout Opted out of at least one evaluation, whether or not the student also completed another
no_action Completed zero and opted out of zero

11.2 Formal model

\[ \log \left[ \frac{P(A_{it}=k)}{P(A_{it}=\text{completed only})} \right] = \alpha_k + \beta_{1k}C_{i,t-1} + \beta_{2k}O_{i,t-1} + \gamma_k X_{it} + \lambda_{kt} \]

where:

\[ k \in \{\text{any opt-out}, \text{no action}\} \]

action3b_model_data <- my_data3 %>%
  filter(complete.cases(
    action_type_3b,
    L_Completion_Rate,
    L_Opt_Out_Rate,
    L_CUR_GPA,
    L_CUM_GPA,
    Number_Invited_Evals,
    IS_SOPHOMORE,
    IS_JUNIOR,
    IS_SENIOR,
    IS_POSTBAC,
    IS_MASTER,
    IS_DOCTORAL,
    Term_Semester
  ))

m_action_type_3b <- nnet::multinom(
  relevel(factor(action_type_3b), ref = "completed_only") ~
    L_Completion_Rate + L_Opt_Out_Rate +
    L_CUR_GPA + L_CUM_GPA +
    Number_Invited_Evals +
    IS_SOPHOMORE + IS_JUNIOR + IS_SENIOR +
    IS_POSTBAC + IS_MASTER + IS_DOCTORAL +
    factor(Term_Semester),
  data = action3b_model_data,
  maxit = 500
)
## # weights:  60 (38 variable)
## initial  value 140201.604443 
## iter  10 value 59662.746602
## iter  20 value 58511.344931
## iter  30 value 57009.984073
## iter  40 value 56496.498135
## iter  50 value 56103.108167
## final  value 56100.506887 
## converged
summary(m_action_type_3b)
## Call:
## nnet::multinom(formula = relevel(factor(action_type_3b), ref = "completed_only") ~ 
##     L_Completion_Rate + L_Opt_Out_Rate + L_CUR_GPA + L_CUM_GPA + 
##         Number_Invited_Evals + IS_SOPHOMORE + IS_JUNIOR + IS_SENIOR + 
##         IS_POSTBAC + IS_MASTER + IS_DOCTORAL + factor(Term_Semester), 
##     data = action3b_model_data, maxit = 500)
## 
## Coefficients:
##            (Intercept) L_Completion_Rate L_Opt_Out_Rate    L_CUR_GPA  L_CUM_GPA Number_Invited_Evals IS_SOPHOMORE  IS_JUNIOR  IS_SENIOR IS_POSTBAC   IS_MASTER IS_DOCTORAL
## any_optout   -6.247202      -0.003254398    0.006483555 0.0382108949 -0.1783375           0.07737074    0.2811011  0.2982899  0.3124598  0.1298199  0.03284481 -0.01990022
## no_action     1.975907      -0.007121529   -0.006700380 0.0004228962 -0.6277945          -0.14948485   -0.1389099 -0.1679252 -0.1418444 -0.1397281 -0.16056670 -0.14920457
##            factor(Term_Semester)2222 factor(Term_Semester)2228 factor(Term_Semester)2232 factor(Term_Semester)2238 factor(Term_Semester)2242 factor(Term_Semester)2248
## any_optout              -0.003628562                0.22964387                0.10824567                  2.854160                  2.803591                  2.837096
## no_action                0.025667897                0.05031032                0.09230354                 -1.578582                 -1.447108                 -1.394935
##            factor(Term_Semester)2252
## any_optout                  2.924782
## no_action                  -1.462941
## 
## Std. Errors:
##            (Intercept) L_Completion_Rate L_Opt_Out_Rate  L_CUR_GPA  L_CUM_GPA Number_Invited_Evals IS_SOPHOMORE  IS_JUNIOR IS_SENIOR IS_POSTBAC IS_MASTER IS_DOCTORAL
## any_optout  0.28505449      0.0006642658    0.003287223 0.07108924 0.07901573          0.007617155   0.12158769 0.11986562 0.1158860 0.20261637 0.1321895  0.16376156
## no_action   0.05299834      0.0002278193    0.002827485 0.03365594 0.03626051          0.003668550   0.03142715 0.03051603 0.0283768 0.06988381 0.0357047  0.04959069
##            factor(Term_Semester)2222 factor(Term_Semester)2228 factor(Term_Semester)2232 factor(Term_Semester)2238 factor(Term_Semester)2242 factor(Term_Semester)2248
## any_optout                0.32330370                0.30397967                0.30801033                0.24706635                0.24756179                0.24745532
## no_action                 0.03048469                0.02996225                0.02973846                0.03985562                0.03959836                0.03896206
##            factor(Term_Semester)2252
## any_optout                0.24711963
## no_action                 0.03974962
## 
## Residual Deviance: 112201 
## AIC: 112277
multinom_or <- tidy_multinom_or(m_action_type_3b, "3-category action model") %>%
  filter(term %in% c("L_Completion_Rate", "L_Opt_Out_Rate", "L_CUR_GPA", "L_CUM_GPA", "Number_Invited_Evals")) %>%
  select(model, y.level, term, scale, estimate, std.error, odds_ratio, odds_ratio_lower, odds_ratio_upper)

multinom_or

11.3 Predicted probabilities from the action model

pred_probs <- predict(m_action_type_3b, newdata = action3b_model_data, type = "probs")

pred_prob_summary <- action3b_model_data %>%
  bind_cols(as.data.frame(pred_probs) %>% rename_with(~ paste0("p_", .x))) %>%
  group_by(action_type_3b) %>%
  summarize(
    mean_p_completed_only = mean(p_completed_only, na.rm = TRUE),
    mean_p_any_optout = mean(p_any_optout, na.rm = TRUE),
    mean_p_no_action = mean(p_no_action, na.rm = TRUE),
    .groups = "drop"
  )

pred_prob_summary

11.4 Diagnostic four-category action model

This model is diagnostic. It separates mixed completion/opt-out from opt-out-only behavior.

action4_model_data <- my_data3 %>%
  mutate(
    action_type_4 = factor(
      action_type_4,
      levels = c("completed_only", "mixed_complete_optout", "opted_out_only", "no_action")
    )
  ) %>%
  filter(complete.cases(
    action_type_4,
    L_Completion_Rate,
    L_Opt_Out_Rate,
    L_CUR_GPA,
    L_CUM_GPA,
    Number_Invited_Evals,
    IS_SOPHOMORE,
    IS_JUNIOR,
    IS_SENIOR,
    IS_POSTBAC,
    IS_MASTER,
    IS_DOCTORAL,
    Term_Semester
  ))

action4_distribution <- my_data3 %>%
  count(action_type_4, name = "n") %>%
  mutate(percent = 100 * n / sum(n))

action4_distribution
m_action_type_4 <- nnet::multinom(
  action_type_4 ~
    L_Completion_Rate + L_Opt_Out_Rate +
    L_CUR_GPA + L_CUM_GPA +
    Number_Invited_Evals +
    IS_SOPHOMORE + IS_JUNIOR + IS_SENIOR +
    IS_POSTBAC + IS_MASTER + IS_DOCTORAL +
    factor(Term_Semester),
  data = action4_model_data,
  maxit = 500
)
## # weights:  80 (57 variable)
## initial  value 176914.727483 
## iter  10 value 81569.508411
## iter  20 value 77154.467816
## iter  30 value 73046.047043
## iter  40 value 68914.415292
## iter  50 value 66197.149967
## iter  60 value 64582.847035
## iter  70 value 59393.129843
## iter  80 value 57422.749021
## iter  90 value 57057.108879
## iter 100 value 56977.059622
## iter 110 value 56953.853765
## iter 120 value 56952.145312
## iter 130 value 56951.716048
## iter 140 value 56951.312632
## iter 150 value 56949.477236
## iter 160 value 56944.542654
## iter 170 value 56941.803642
## final  value 56939.340516 
## converged
summary(m_action_type_4)
## Call:
## nnet::multinom(formula = action_type_4 ~ L_Completion_Rate + 
##     L_Opt_Out_Rate + L_CUR_GPA + L_CUM_GPA + Number_Invited_Evals + 
##     IS_SOPHOMORE + IS_JUNIOR + IS_SENIOR + IS_POSTBAC + IS_MASTER + 
##     IS_DOCTORAL + factor(Term_Semester), data = action4_model_data, 
##     maxit = 500)
## 
## Coefficients:
##                       (Intercept) L_Completion_Rate L_Opt_Out_Rate     L_CUR_GPA   L_CUM_GPA Number_Invited_Evals IS_SOPHOMORE  IS_JUNIOR  IS_SENIOR IS_POSTBAC   IS_MASTER
## mixed_complete_optout   -6.898435      -0.002821951    0.006952224 -0.0257643706 -0.05010776            0.1230620   0.32211692  0.3157589  0.3227678  0.1344359  0.09246626
## opted_out_only          -5.328592      -0.005573557    0.004219520  0.3849667155 -0.86848688           -0.2941367   0.05291916  0.2132726  0.2693455  0.1211721 -0.31849033
## no_action                1.978140      -0.007122714   -0.006695677  0.0008347724 -0.62850781           -0.1497491  -0.13900400 -0.1679147 -0.1417739 -0.1397616 -0.16073516
##                         IS_DOCTORAL factor(Term_Semester)2222 factor(Term_Semester)2228 factor(Term_Semester)2232 factor(Term_Semester)2238 factor(Term_Semester)2242
## mixed_complete_optout  0.0003234837               -0.13784187                 0.3206592              -0.009241662                  2.812921                  2.785167
## opted_out_only        -0.1086571107                0.66004129                -1.0405911               0.710951790                  3.107446                  2.930800
## no_action             -0.1492583586                0.02571192                 0.0501787               0.092355413                 -1.578034                 -1.447048
##                       factor(Term_Semester)2248 factor(Term_Semester)2252
## mixed_complete_optout                  2.795356                  2.892566
## opted_out_only                         3.090035                  3.133236
## no_action                             -1.394517                 -1.462577
## 
## Std. Errors:
##                       (Intercept) L_Completion_Rate L_Opt_Out_Rate  L_CUR_GPA  L_CUM_GPA Number_Invited_Evals IS_SOPHOMORE  IS_JUNIOR  IS_SENIOR IS_POSTBAC  IS_MASTER IS_DOCTORAL
## mixed_complete_optout  0.30639739      0.0007257536    0.003537138 0.07735078 0.08612597          0.007911546   0.13262704 0.13100379 0.12676691 0.22154304 0.14349831  0.17847652
## opted_out_only         0.36965436      0.0016266515    0.008572523 0.17634336 0.19509179          0.027144632   0.30042732 0.29233657 0.28136242 0.49082771 0.33873977  0.40535430
## no_action              0.05301066      0.0002278261    0.002827039 0.03365710 0.03626279          0.003670696   0.03142828 0.03051718 0.02837785 0.06988676 0.03570583  0.04959228
##                       factor(Term_Semester)2222 factor(Term_Semester)2228 factor(Term_Semester)2232 factor(Term_Semester)2238 factor(Term_Semester)2242 factor(Term_Semester)2248
## mixed_complete_optout                0.35464908                0.31853677                0.33542195                0.26325002                0.26366911                0.26365288
## opted_out_only                       0.42667242                0.83171272                0.39735820                0.19893439                0.20593447                0.20195012
## no_action                            0.03048529                0.02996278                0.02973899                0.03985404                0.03959773                0.03896045
##                       factor(Term_Semester)2252
## mixed_complete_optout                0.26323970
## opted_out_only                       0.20111412
## no_action                            0.03974824
## 
## Residual Deviance: 113878.7 
## AIC: 113992.7

12 Model 6: Student-level rule-based typology

The student-level typology is a descriptive segmentation.

student_types <- my_data3 %>%
  group_by(UserId) %>%
  summarize(
    mean_completion = mean(Completion_Rate, na.rm = TRUE),
    mean_optout = mean(Opt_Out_Rate, na.rm = TRUE),
    mean_action = mean((Number_Completed_Evals + Number_Opted_Out_Evals) /
                         Number_Invited_Evals * 100, na.rm = TRUE),
    mean_gpa = mean(CUM_GPA, na.rm = TRUE),
    terms_observed = n(),
    .groups = "drop"
  ) %>%
  mutate(
    student_type = case_when(
      mean_completion >= 75 ~ "Conscientious responder",
      mean_optout >= 10 ~ "Active opt-outer",
      mean_action < 25 ~ "Disengaged non-actor",
      TRUE ~ "Mixed responder"
    )
  )

student_type_distribution <- student_types %>%
  count(student_type) %>%
  mutate(percent = 100 * n / sum(n))

student_type_distribution
m_student_type <- nnet::multinom(
  relevel(factor(student_type), ref = "Active opt-outer") ~
    mean_gpa + terms_observed,
  data = student_types,
  maxit = 500
)
## # weights:  16 (9 variable)
## initial  value 24953.298500 
## iter  10 value 12803.360411
## iter  20 value 12363.589476
## iter  30 value 12363.455957
## final  value 12363.455190 
## converged
summary(m_student_type)
## Call:
## nnet::multinom(formula = relevel(factor(student_type), ref = "Active opt-outer") ~ 
##     mean_gpa + terms_observed, data = student_types, maxit = 500)
## 
## Coefficients:
##                         (Intercept)    mean_gpa terms_observed
## Conscientious responder   -7.161541  1.75471015      0.7791317
## Disengaged non-actor       5.444283 -1.83110844      0.1687729
## Mixed responder            1.280374 -0.04308683      0.4864251
## 
## Std. Errors:
##                         (Intercept)  mean_gpa terms_observed
## Conscientious responder   0.9525475 0.2050302      0.1065737
## Disengaged non-actor      0.9915067 0.2168376      0.1122019
## Mixed responder           0.9378346 0.2031819      0.1055058
## 
## Residual Deviance: 24726.91 
## AIC: 24744.91
tidy_multinom_or(m_student_type, "Student-level typology") %>%
  select(model, y.level, term, estimate, std.error, odds_ratio, odds_ratio_lower, odds_ratio_upper)

13 Model 7: LMS integration and no-action behavior

This section estimates whether the LMS integration period is associated with reduced no-action behavior.

13.1 Descriptive pre/post comparison

pre_post_action <- my_data3 %>%
  group_by(post_lms) %>%
  summarize(
    n = n(),
    no_action_rate = mean(no_action_flag, na.rm = TRUE) * 100,
    completed_only_rate = mean(completed_only_flag, na.rm = TRUE) * 100,
    any_optout_rate = mean(any_optout_flag, na.rm = TRUE) * 100,
    .groups = "drop"
  )

pre_post_action
pre_post_long <- pre_post_action %>%
  pivot_longer(
    cols = c(no_action_rate, completed_only_rate, any_optout_rate),
    names_to = "action",
    values_to = "percent"
  ) %>%
  mutate(
    action = recode(
      action,
      no_action_rate = "No action",
      completed_only_rate = "Completed only",
      any_optout_rate = "Any opt-out"
    ),
    period = if_else(post_lms, "Post-LMS", "Pre-LMS")
  )

ggplot(pre_post_long, aes(x = period, y = percent, fill = action)) +
  geom_col(position = "stack") +
  labs(
    title = "Action distribution before and after LMS integration",
    x = NULL,
    y = "Percent of student-terms",
    fill = "Action type"
  )

13.2 Adjusted no-action logit

policy_data <- my_data3 %>%
  mutate(
    time_index = dense_rank(Term_Semester_num),
    fall_term = Term_Semester_num %% 10 == 8
  ) %>%
  filter(
    complete.cases(
      no_action_flag,
      post_lms,
      time_index,
      fall_term,
      L_Completion_Rate,
      L_Opt_Out_Rate,
      L_CUR_GPA,
      L_CUM_GPA,
      Number_Invited_Evals,
      IS_SOPHOMORE,
      IS_JUNIOR,
      IS_SENIOR,
      IS_POSTBAC,
      IS_MASTER,
      IS_DOCTORAL
    )
  )

m_no_action_policy <- feglm(
  no_action_flag ~
    post_lms +
    time_index +
    fall_term +
    L_Completion_Rate +
    L_Opt_Out_Rate +
    L_CUR_GPA +
    L_CUM_GPA +
    Number_Invited_Evals +
    IS_SOPHOMORE +
    IS_JUNIOR +
    IS_SENIOR +
    IS_POSTBAC +
    IS_MASTER +
    IS_DOCTORAL,
  family = binomial(),
  cluster = ~ UserId,
  data = policy_data
)

summary(m_no_action_policy)
## GLM estimation, family = binomial, Dep. Var.: no_action_flag
## Observations: 127,617
## Standard-errors: Clustered (UserId) 
##                       Estimate Std. Error    z value   Pr(>|z|)    
## (Intercept)           1.898544   0.060291  31.489620  < 2.2e-16 ***
## post_lmsTRUE         -1.676760   0.037494 -44.720702  < 2.2e-16 ***
## time_index            0.032881   0.008431   3.899933 9.6219e-05 ***
## fall_termTRUE        -0.000787   0.019554  -0.040232 9.6791e-01    
## L_Completion_Rate    -0.007038   0.000234 -30.057669  < 2.2e-16 ***
## L_Opt_Out_Rate       -0.006595   0.002788  -2.365274 1.8017e-02 *  
## L_CUR_GPA             0.000223   0.033458   0.006660 9.9469e-01    
## L_CUM_GPA            -0.626790   0.036207 -17.311449  < 2.2e-16 ***
## Number_Invited_Evals -0.150365   0.003691 -40.736493  < 2.2e-16 ***
## IS_SOPHOMORE         -0.139817   0.031723  -4.407428 1.0461e-05 ***
## IS_JUNIOR            -0.169066   0.030290  -5.581634 2.3827e-08 ***
## IS_SENIOR            -0.143514   0.028190  -5.090940 3.5629e-07 ***
## IS_POSTBAC           -0.138610   0.068948  -2.010346 4.4395e-02 *  
## IS_MASTER            -0.158897   0.035779  -4.441085 8.9506e-06 ***
## IS_DOCTORAL          -0.147360   0.049763  -2.961206 3.0644e-03 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Log-Likelihood: -45,817.6   Adj. Pseudo R2: 0.138949
##            BIC:  91,811.6     Squared Cor.: 0.12363
policy_logit_or <- tidy_or(m_no_action_policy, "No-action policy logit") %>%
  filter(term %in% c("post_lmsTRUE", "L_Completion_Rate", "L_Opt_Out_Rate",
                     "L_CUR_GPA", "L_CUM_GPA", "Number_Invited_Evals")) %>%
  select(model, term, scale, estimate, std.error, odds_ratio, odds_ratio_lower, odds_ratio_upper)

policy_logit_or

13.3 Student fixed-effects linear probability model

m_no_action_policy_fe <- feols(
  no_action_flag ~
    post_lms +
    time_index +
    fall_term +
    L_Completion_Rate +
    L_Opt_Out_Rate +
    L_CUR_GPA +
    L_CUM_GPA +
    Number_Invited_Evals |
    UserId,
  cluster = ~ UserId,
  data = policy_data
)

summary(m_no_action_policy_fe)
## OLS estimation, Dep. Var.: no_action_flag
## Observations: 127,617
## Fixed-effects: UserId: 18,000
## Standard-errors: Clustered (UserId) 
##                       Estimate Std. Error    t value   Pr(>|t|)    
## post_lmsTRUE         -0.189962   0.004080 -46.563987  < 2.2e-16 ***
## time_index           -0.001837   0.000920  -1.996062 4.5942e-02 *  
## fall_termTRUE         0.002789   0.001938   1.438727 1.5025e-01    
## L_Completion_Rate     0.001173   0.000032  36.256795  < 2.2e-16 ***
## L_Opt_Out_Rate        0.001058   0.000199   5.305188 1.1389e-07 ***
## L_CUR_GPA            -0.003461   0.003664  -0.944660 3.4484e-01    
## L_CUM_GPA            -0.019171   0.009919  -1.932748 5.3283e-02 .  
## Number_Invited_Evals -0.015164   0.000366 -41.386950  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.30206     Adj. R2: 0.151775
##                 Within R2: 0.106684

13.4 Event-study model

# fixest formulas are parsed differently than ordinary R expressions.
# In particular, R Markdown parameters such as params$policy_reference_term
# can be interpreted as variables inside the model formula. To avoid that,
# we build the event-study formula as a string after evaluating the parameter.
policy_reference_term <- as.integer(params$policy_reference_term)
lms_cutoff_term <- as.integer(params$lms_cutoff_term)

event_formula <- as.formula(paste0(
  "no_action_flag ~ ",
  "i(Term_Semester_num, ref = ", policy_reference_term, ") + ",
  "L_Completion_Rate + ",
  "L_Opt_Out_Rate + ",
  "L_CUR_GPA + ",
  "L_CUM_GPA + ",
  "Number_Invited_Evals + ",
  "IS_SOPHOMORE + ",
  "IS_JUNIOR + ",
  "IS_SENIOR + ",
  "IS_POSTBAC + ",
  "IS_MASTER + ",
  "IS_DOCTORAL | ",
  "UserId"
))

m_no_action_event <- feols(
  event_formula,
  cluster = ~ UserId,
  data = policy_data
)

summary(m_no_action_event)
## OLS estimation, Dep. Var.: no_action_flag
## Observations: 127,617
## Fixed-effects: UserId: 18,000
## Standard-errors: Clustered (UserId) 
##                          Estimate Std. Error    t value   Pr(>|t|)    
## Term_Semester_num::2218 -0.007620   0.005014  -1.519638 1.2862e-01    
## Term_Semester_num::2222 -0.007155   0.004643  -1.540996 1.2334e-01    
## Term_Semester_num::2228 -0.004838   0.004414  -1.096070 2.7306e-01    
## Term_Semester_num::2238 -0.187747   0.003517 -53.375073  < 2.2e-16 ***
## Term_Semester_num::2242 -0.202158   0.003787 -53.379730  < 2.2e-16 ***
## Term_Semester_num::2248 -0.201623   0.003816 -52.830523  < 2.2e-16 ***
## Term_Semester_num::2252 -0.207042   0.003782 -54.747610  < 2.2e-16 ***
## L_Completion_Rate        0.001190   0.000033  36.484163  < 2.2e-16 ***
## L_Opt_Out_Rate           0.001140   0.000200   5.694301 1.2581e-08 ***
## L_CUR_GPA               -0.003412   0.003665  -0.931089 3.5182e-01    
## L_CUM_GPA               -0.019498   0.009923  -1.964937 4.9437e-02 *  
## Number_Invited_Evals    -0.015171   0.000366 -41.395054  < 2.2e-16 ***
## IS_SOPHOMORE            -0.015522   0.004451  -3.486921 4.8978e-04 ***
## IS_JUNIOR               -0.020822   0.004296  -4.846355 1.2680e-06 ***
## IS_SENIOR               -0.017073   0.004102  -4.162154 3.1672e-05 ***
## IS_POSTBAC              -0.019727   0.008413  -2.344838 1.9046e-02 *  
## IS_MASTER               -0.022170   0.004800  -4.618545 3.8911e-06 ***
## IS_DOCTORAL             -0.021850   0.006247  -3.497842 4.7017e-04 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.301975     Adj. R2: 0.152176
##                  Within R2: 0.107187
event_df <- broom::tidy(m_no_action_event, conf.int = TRUE) %>%
  filter(str_detect(term, "Term_Semester_num::")) %>%
  mutate(
    term_num = as.integer(str_extract(term, "\\d+"))
  ) %>%
  select(term_num, estimate, conf.low, conf.high)

ref_row <- tibble(
  term_num = policy_reference_term,
  estimate = 0,
  conf.low = 0,
  conf.high = 0
)

event_plot_data <- bind_rows(event_df, ref_row) %>%
  arrange(term_num)

ggplot(event_plot_data, aes(x = term_num, y = estimate)) +
  geom_hline(yintercept = 0) +
  geom_vline(xintercept = lms_cutoff_term, linetype = "dashed") +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) +
  labs(
    title = "Adjusted change in no-action behavior by term",
    subtitle = paste0("Reference term = ", policy_reference_term, "; dashed line marks LMS integration"),
    x = "Term",
    y = "Difference in no-action probability"
  )

13.5 Policy version of the 3-category action model

m_action_3b_policy <- nnet::multinom(
  relevel(factor(action_type_3b), ref = "completed_only") ~
    post_lms +
    time_index +
    fall_term +
    L_Completion_Rate +
    L_Opt_Out_Rate +
    L_CUR_GPA +
    L_CUM_GPA +
    Number_Invited_Evals +
    IS_SOPHOMORE +
    IS_JUNIOR +
    IS_SENIOR +
    IS_POSTBAC +
    IS_MASTER +
    IS_DOCTORAL,
  data = policy_data,
  maxit = 500
)
## # weights:  48 (30 variable)
## initial  value 140201.604443 
## iter  10 value 63110.507893
## iter  20 value 60850.602658
## iter  30 value 57656.581017
## iter  40 value 56112.549647
## final  value 56107.580221 
## converged
summary(m_action_3b_policy)
## Call:
## nnet::multinom(formula = relevel(factor(action_type_3b), ref = "completed_only") ~ 
##     post_lms + time_index + fall_term + L_Completion_Rate + L_Opt_Out_Rate + 
##         L_CUR_GPA + L_CUM_GPA + Number_Invited_Evals + IS_SOPHOMORE + 
##         IS_JUNIOR + IS_SENIOR + IS_POSTBAC + IS_MASTER + IS_DOCTORAL, 
##     data = policy_data, maxit = 500)
## 
## Coefficients:
##            (Intercept) post_lmsTRUE time_index fall_termTRUE L_Completion_Rate L_Opt_Out_Rate    L_CUR_GPA  L_CUM_GPA Number_Invited_Evals IS_SOPHOMORE  IS_JUNIOR  IS_SENIOR
## any_optout   -6.255625     2.649290 0.02886177  0.0114809595      -0.003377110    0.006128218 0.0381010707 -0.1758997           0.07730697    0.2755434  0.2924677  0.3063151
## no_action     1.901130    -1.644841 0.03306477 -0.0005783408      -0.007069802   -0.006280986 0.0008458476 -0.6290895          -0.14946844   -0.1382942 -0.1673364 -0.1414722
##            IS_POSTBAC   IS_MASTER IS_DOCTORAL
## any_optout  0.1248333  0.02634462 -0.02469086
## no_action  -0.1387586 -0.15999680 -0.14900119
## 
## Std. Errors:
##            (Intercept) post_lmsTRUE  time_index fall_termTRUE L_Completion_Rate L_Opt_Out_Rate  L_CUR_GPA  L_CUM_GPA Number_Invited_Evals IS_SOPHOMORE  IS_JUNIOR  IS_SENIOR IS_POSTBAC
## any_optout  0.20288055   0.13277316 0.021315687    0.04724778      0.0006608437    0.003288141 0.07108982 0.07900350          0.007616186   0.12149642 0.11976784 0.11576671 0.20249369
## no_action   0.06005611   0.03811995 0.008565245    0.01874402      0.0002265900    0.002816161 0.03364795 0.03624848          0.003668287   0.03140568 0.03048941 0.02832671 0.06987489
##             IS_MASTER IS_DOCTORAL
## any_optout 0.13208849  0.16368924
## no_action  0.03567964  0.04957375
## 
## Residual Deviance: 112215.2 
## AIC: 112275.2
policy_multinom_or <- tidy_multinom_or(m_action_3b_policy, "Policy 3-category action model") %>%
  filter(term %in% c("post_lmsTRUE", "L_Completion_Rate", "L_Opt_Out_Rate",
                     "L_CUR_GPA", "L_CUM_GPA", "Number_Invited_Evals")) %>%
  select(model, y.level, term, scale, estimate, std.error, odds_ratio, odds_ratio_lower, odds_ratio_upper)

policy_multinom_or

13.6 Predicted action probabilities before and after LMS integration

policy_pred <- predict(m_action_3b_policy, newdata = policy_data, type = "probs")

policy_pred_summary <- policy_data %>%
  bind_cols(as.data.frame(policy_pred) %>% rename_with(~ paste0("p_", .x))) %>%
  group_by(post_lms) %>%
  summarize(
    p_completed_only = mean(p_completed_only, na.rm = TRUE),
    p_any_optout = mean(p_any_optout, na.rm = TRUE),
    p_no_action = mean(p_no_action, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  pivot_longer(
    starts_with("p_"),
    names_to = "outcome",
    values_to = "probability"
  ) %>%
  mutate(
    period = if_else(post_lms, "Post-LMS", "Pre-LMS"),
    outcome = str_remove(outcome, "^p_")
  )

policy_pred_summary
ggplot(policy_pred_summary, aes(x = period, y = probability, fill = outcome)) +
  geom_col(position = "dodge") +
  labs(
    title = "Predicted action probabilities by LMS period",
    x = NULL,
    y = "Mean predicted probability",
    fill = "Outcome"
  )

15 Export model artifacts

dir.create("course_eval_replication_outputs", showWarnings = FALSE)

write_csv(data_diagnostics, "course_eval_replication_outputs/data_diagnostics.csv")
write_csv(term_summary, "course_eval_replication_outputs/term_summary.csv")
write_csv(action_distribution, "course_eval_replication_outputs/action_distribution_3category.csv")
write_csv(action4_distribution, "course_eval_replication_outputs/action_distribution_4category.csv")
write_csv(qbinom_or, "course_eval_replication_outputs/quasibinomial_odds_ratios.csv")
write_csv(multinom_or, "course_eval_replication_outputs/multinomial_action_odds_ratios.csv")
write_csv(policy_logit_or, "course_eval_replication_outputs/policy_logit_odds_ratios.csv")
write_csv(policy_multinom_or, "course_eval_replication_outputs/policy_multinomial_odds_ratios.csv")
write_csv(pre_post_action, "course_eval_replication_outputs/pre_post_action_summary.csv")

saveRDS(
  list(
    m_complete_ols = m_complete_ols,
    m_optout_ols = m_optout_ols,
    m_complete_qbinom = m_complete_qbinom,
    m_optout_qbinom = m_optout_qbinom,
    m_complete_fe = m_complete_fe,
    m_optout_fe = m_optout_fe,
    m_cumulative = m_cumulative,
    m_two_lag = m_two_lag,
    m_action_type_3b = m_action_type_3b,
    m_action_type_4 = m_action_type_4,
    m_student_type = m_student_type,
    m_no_action_policy = m_no_action_policy,
    m_no_action_policy_fe = m_no_action_policy_fe,
    m_no_action_event = m_no_action_event,
    m_action_3b_policy = m_action_3b_policy
  ),
  "course_eval_replication_outputs/models_all.rds"
)