Skip to contents

Generate Detailed Descriptive Statistics

Usage

summarize_descriptives(
  data,
  patient_id_col = "patient_id",
  setting_col = "care_setting",
  cohort_col = "cohort",
  encounter_id_col = "encounter_id",
  cost_col = "cost_usd",
  los_col = "length_of_stay",
  readmission_col = "readmission",
  time_window_col = "time_window"
)

Arguments

data

A dataframe with variables to summarize.

patient_id_col

A character specifying the name of patient identifier column

setting_col

A character specifying the name of HRCU setting column

cohort_col

A character specifying the name of cohort column

encounter_id_col

A character specifying the name of encounter/claim column

cost_col

A character specifying the name of cost column

los_col

A character specifying the name of length of stay column

readmission_col

A character specifying the name of readmission column

time_window_col

A character specifying the name of time window column

Value

A table object

Examples

if (requireNamespace("dplyr", quietly = TRUE) &&
    requireNamespace("checkmate", quietly = TRUE)) {
  hcru_sample_data <- data.frame(
    patient_id = rep(1:10, each = 2),
    cohort = rep(c("A", "B"), 10),
    care_setting = rep(c("IP", "OP"), 10),
    admission_date = Sys.Date() - sample(1:100, 20, TRUE),
    discharge_date = Sys.Date() - sample(1:90, 20, TRUE),
    index_date = Sys.Date() - 50,
    visit_date = Sys.Date() - sample(1:100, 20, TRUE),
    encounter_id = 1:20,
    cost_usd = runif(20, 100, 1000)
  )
  df <- preproc_hcru_fun(data = hcru_sample_data)
  summary_df <- summarize_descriptives(data = df)
  # Only keep required columns for demonstration
  summary_df$LOS <- ifelse(summary_df$care_setting == "IP",
    sample(1:10, nrow(summary_df), TRUE), NA)
  summary_df$Readmission <- ifelse(summary_df$care_setting == "IP",
    sample(0:1, nrow(summary_df), TRUE), NA)
  summary_df$time_window <- "Pre"
  summary_df
}
#> # A tibble: 20 × 15
#>    patient_id cohort care_setting time_window  Days  Month    Year Visits  Cost
#>         <int> <chr>  <chr>        <chr>       <dbl>  <dbl>   <dbl>  <dbl> <dbl>
#>  1          1 A      IP           Pre             7 0.230  0.0192       1  136.
#>  2          1 B      OP           Pre             5 0.164  0.0137       1  950.
#>  3          2 A      IP           Pre            36 1.18   0.0985       1  320.
#>  4          2 B      OP           Pre             9 0.296  0.0246       1  803.
#>  5          3 A      IP           Pre            22 0.723  0.0602       1  359.
#>  6          3 B      OP           Pre            25 0.822  0.0684       1  888.
#>  7          4 A      IP           Pre            18 0.592  0.0492       1  366.
#>  8          4 B      OP           Pre            51 1.68   0.140        1  985.
#>  9          5 A      IP           Pre            37 1.22   0.101        1  631.
#> 10          5 B      OP           Pre            17 0.559  0.0465       1  783.
#> 11          6 A      IP           Pre            20 0.658  0.0547       1  852.
#> 12          6 B      OP           Pre            33 1.08   0.0903       1  787.
#> 13          7 A      IP           Pre            14 0.460  0.0383       1  476.
#> 14          7 B      OP           Pre            47 1.55   0.129        1  224.
#> 15          8 A      IP           Pre            34 1.12   0.0930       1  173.
#> 16          8 B      OP           Pre             7 0.230  0.0192       1  690.
#> 17          9 A      IP           Pre             3 0.0986 0.00821      1  642.
#> 18          9 B      OP           Pre             8 0.263  0.0219       1  691.
#> 19         10 A      IP           Pre            34 1.12   0.0930       1  396.
#> 20         10 B      OP           Pre            38 1.25   0.104        1  982.
#> # ℹ 6 more variables: LOS <int>, Readmission <int>, Visit_PPPM <dbl>,
#> #   Visit_PPPY <dbl>, Cost_PPPM <dbl>, Cost_PPPY <dbl>