Generate Detailed Descriptive Statistics
Usage
summarize_descriptives(
data,
patient_id_col = "patient_id",
setting_col = "care_setting",
cohort_col = "cohort",
encounter_id_col = "encounter_id",
cost_col = "cost_usd",
los_col = "length_of_stay",
readmission_col = "readmission",
time_window_col = "time_window"
)Arguments
- data
A dataframe with variables to summarize.
- patient_id_col
A character specifying the name of patient identifier column
- setting_col
A character specifying the name of HRCU setting column
- cohort_col
A character specifying the name of cohort column
- encounter_id_col
A character specifying the name of encounter/claim column
- cost_col
A character specifying the name of cost column
- los_col
A character specifying the name of length of stay column
- readmission_col
A character specifying the name of readmission column
- time_window_col
A character specifying the name of time window column
Examples
if (requireNamespace("dplyr", quietly = TRUE) &&
requireNamespace("checkmate", quietly = TRUE)) {
hcru_sample_data <- data.frame(
patient_id = rep(1:10, each = 2),
cohort = rep(c("A", "B"), 10),
care_setting = rep(c("IP", "OP"), 10),
admission_date = Sys.Date() - sample(1:100, 20, TRUE),
discharge_date = Sys.Date() - sample(1:90, 20, TRUE),
index_date = Sys.Date() - 50,
visit_date = Sys.Date() - sample(1:100, 20, TRUE),
encounter_id = 1:20,
cost_usd = runif(20, 100, 1000)
)
df <- preproc_hcru_fun(data = hcru_sample_data)
summary_df <- summarize_descriptives(data = df)
# Only keep required columns for demonstration
summary_df$LOS <- ifelse(summary_df$care_setting == "IP",
sample(1:10, nrow(summary_df), TRUE), NA)
summary_df$Readmission <- ifelse(summary_df$care_setting == "IP",
sample(0:1, nrow(summary_df), TRUE), NA)
summary_df$time_window <- "Pre"
summary_df
}
#> # A tibble: 20 × 15
#> patient_id cohort care_setting time_window Days Month Year Visits Cost
#> <int> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 A IP Pre 7 0.230 0.0192 1 136.
#> 2 1 B OP Pre 5 0.164 0.0137 1 950.
#> 3 2 A IP Pre 36 1.18 0.0985 1 320.
#> 4 2 B OP Pre 9 0.296 0.0246 1 803.
#> 5 3 A IP Pre 22 0.723 0.0602 1 359.
#> 6 3 B OP Pre 25 0.822 0.0684 1 888.
#> 7 4 A IP Pre 18 0.592 0.0492 1 366.
#> 8 4 B OP Pre 51 1.68 0.140 1 985.
#> 9 5 A IP Pre 37 1.22 0.101 1 631.
#> 10 5 B OP Pre 17 0.559 0.0465 1 783.
#> 11 6 A IP Pre 20 0.658 0.0547 1 852.
#> 12 6 B OP Pre 33 1.08 0.0903 1 787.
#> 13 7 A IP Pre 14 0.460 0.0383 1 476.
#> 14 7 B OP Pre 47 1.55 0.129 1 224.
#> 15 8 A IP Pre 34 1.12 0.0930 1 173.
#> 16 8 B OP Pre 7 0.230 0.0192 1 690.
#> 17 9 A IP Pre 3 0.0986 0.00821 1 642.
#> 18 9 B OP Pre 8 0.263 0.0219 1 691.
#> 19 10 A IP Pre 34 1.12 0.0930 1 396.
#> 20 10 B OP Pre 38 1.25 0.104 1 982.
#> # ℹ 6 more variables: LOS <int>, Readmission <int>, Visit_PPPM <dbl>,
#> # Visit_PPPY <dbl>, Cost_PPPM <dbl>, Cost_PPPY <dbl>