Skip to contents

Load library and the data

# Load library
library(hcruR)

# Load sample data
data(hcru_sample_data)
head(hcru_sample_data)
#> # A tibble: 6 × 11
#>   patient_id cohort  encounter_id care_setting visit_date admission_date
#>   <chr>      <chr>   <chr>        <chr>        <date>     <date>        
#> 1 P_1001     control E_645327     IP           2023-08-23 2023-08-23    
#> 2 P_1001     control E_591096     ED           2023-08-27 NA            
#> 3 P_1001     control E_864419     OP           2023-09-25 NA            
#> 4 P_1001     control E_379584     IP           2023-10-03 2023-10-03    
#> 5 P_1001     control E_617025     ED           2024-02-07 NA            
#> 6 P_1001     control E_379047     ED           2024-03-01 NA            
#> # ℹ 5 more variables: discharge_date <date>, encounter_date <date>,
#> #   index_date <date>, period <chr>, cost_usd <dbl>

Generate HCRU summary using dplyr (this can be used for create HCRU plots)

# Step 1: Estimate HCRU
hcru_summary <- estimate_hcru(data = hcru_sample_data,
                             cohort_col = "cohort",
                             patient_id_col = "patient_id",
                             admit_col = "admission_date",
                             discharge_col = "discharge_date",
                             index_col = "index_date",
                             visit_col = "visit_date",
                             encounter_id_col = "encounter_id",
                             setting_col = "care_setting",
                             cost_col = "cost_usd",
                             readmission_col = "readmission",
                             time_window_col = "period",
                             los_col = "length_of_stay",
                             custom_var_list = NULL,
                             pre_days = 180,
                             post_days = 365,
                             readmission_days_rule = 30,
                             group_var = "cohort",
                             test = NULL,
                             gt_output = FALSE)

hcru_summary
#> $`Summary by settings using dplyr`
#> # A tibble: 12 × 10
#>    cohort    care_setting period Patients Visits     Cost Avg_visits_per_patient
#>    <chr>     <chr>        <chr>     <int>  <int>    <dbl>                  <dbl>
#>  1 control   ED           post         65    101  262128.                   1.55
#>  2 control   ED           pre          50     57  150620.                   1.14
#>  3 control   IP           post         78    127 1877868.                   1.63
#>  4 control   IP           pre          53     66 1066097.                   1.25
#>  5 control   OP           post         67    111  116931.                   1.66
#>  6 control   OP           pre          50     61   62427.                   1.22
#>  7 treatment ED           post         78    126  335850.                   1.62
#>  8 treatment ED           pre          51     63  153831.                   1.24
#>  9 treatment IP           post         68    100 1523437.                   1.47
#> 10 treatment IP           pre          65     80 1333135.                   1.23
#> 11 treatment OP           post         76    126  133056.                   1.66
#> 12 treatment OP           pre          56     71   67378.                   1.27
#> # ℹ 3 more variables: Avg_cost_per_patient <dbl>, Avg_LOS <dbl>,
#> #   Readmit_30d_Rate <dbl>

Generate HCRU summary using gtsummary (a publication ready output)

# Step 1: Estimate HCRU
hcru_summary_gt <- estimate_hcru(data = hcru_sample_data,
                             cohort_col = "cohort",
                             patient_id_col = "patient_id",
                             admit_col = "admission_date",
                             discharge_col = "discharge_date",
                             index_col = "index_date",
                             visit_col = "visit_date",
                             encounter_id_col = "encounter_id",
                             setting_col = "care_setting",
                             cost_col = "cost_usd",
                             readmission_col = "readmission",
                             time_window_col = "period",
                             los_col = "length_of_stay",
                             custom_var_list = NULL,
                             pre_days = 180,
                             post_days = 365,
                             readmission_days_rule = 30,
                             group_var = "cohort",
                             test = NULL,
                             gt_output = TRUE)

hcru_summary_gt$`Summary by settings using gtsummary`
Variable N
Group
p-value2
Overall
N = 1,089
1
control
N = 523
1
treatment
N = 566
1
cost_usd 1,089


0.2
    Mean (SD)
6,504 (8,063) 6,761 (8,149) 6,266 (7,982)
    Median (Q1, Q3)
2,522 (1,127, 9,433) 2,775 (1,122, 9,649) 2,276 (1,127, 8,534)
    Q1
1,127 1,122 1,127
    Q3
9,433 9,649 8,534
    Min - Max
104 - 29,990 111 - 29,779 104 - 29,990
length_of_stay 373


0.7
    Mean (SD)
6.42 (2.91) 6.36 (2.88) 6.49 (2.96)
    Median (Q1, Q3)
6.00 (4.00, 9.00) 6.00 (4.00, 9.00) 6.50 (4.00, 9.00)
    Q1
4.00 4.00 4.00
    Q3
9.00 9.00 9.00
    Min - Max
2.00 - 11.00 2.00 - 11.00 2.00 - 11.00
    Unknown
716 330 386
readmission 1,089 47 (4.3%) 30 (5.7%) 17 (3.0%) 0.027
1 n (%)
2 Wilcoxon rank sum test; Pearson’s Chi-squared test

Generate the HCRU plot: average visits per patient by cohort and time-line

# Plot results
p_avg_visit <- plot_hcru(summary_df = hcru_summary$`Summary by settings using dplyr`,
               x_var = "period",
               y_var = "Avg_visits_per_patient",
               cohort_col = "cohort",
               facet_var = "care_setting",
               facet_var_n = 3,
               title = "Per patient average visits by domain and cohort",
               x_lable = "Healthcare Setting (Domain)",
               y_lable = "Average visits",
               fill_lable = "Cohort"
)

p_avg_visit

Generate the HCRU plot for average costs per patient by cohort and time-line

# Plot results
p_avg_cost <- plot_hcru(summary_df = hcru_summary$`Summary by settings using dplyr`,
               x_var = "period",
               y_var = "Avg_cost_per_patient",
               cohort_col = "cohort",
               facet_var = "care_setting",
               facet_var_n = 3,
               title = "Per patient average cost by domain and cohort",
               x_lable = "Healthcare Setting (Domain)",
               y_lable = "Average costs",
               fill_lable = "Cohort"
)

p_avg_cost