By Christian McDonald, Assistant Professor of Practice
School of Journalism and Media, Moody College of Communication
University of Texas at Austin


Purpose of the notebook

Each of the other analysis notebooks in the project export two types of summaries for each measure: statewide rates and averages of hospital rates. This notebook simply brings them all together into a single data file for export.

There are some additional data combinations for interactives.

library(fs)
library(tidyverse)
library(jsonlite)
library(janitor)

# suppresses grouping warning
options(dplyr.summarise.inform = FALSE)

Summary imports

# set up folder
data_dir <- "data-processed"
# find the files
summary_files <- dir_ls(data_dir, recurse = TRUE, regexp = "_summary")
# peek at them
summary_files
## data-processed/ahrq_pcsec_rate_hosp_yr_summary.rds
## data-processed/ahrq_pcsec_rate_tx_yr_summary.rds
## data-processed/ahrq_vbac_rate_hosp_yr_summary.rds
## data-processed/ahrq_vbac_rate_tx_yr_summary.rds
## data-processed/csec_rate_tx_yr_summary.rds
## data-processed/epicsec_rate_hosp_yr_summary.rds
## data-processed/epicsec_rate_tx_yr_summary.rds
## data-processed/lf_epi_rate_hosp_yr_summary.rds
## data-processed/lf_epi_rate_tx_yr_summary.rds
## data-processed/medi_rate_tx_yr_summary.rds
# read and combine
summary_data <- summary_files %>%
  map_dfr(read_rds) %>% 
  arrange(SUMMARY, CATEGORY, YR)

# finished data
summary_data

Summary exports

summary_data %>% 
  write_csv("exports/summary_data.csv")

Interactives exports

Import pcsec and epi rates, providers

pcsec <- read_rds("data-processed/ahrq_pcsec_rate_hosp_yr.rds")
epi <- read_rds("data-processed/lf_epi_rate_hosp_yr.rds")
vbac <- read_rds("data-processed/ahrq_vbac_rate_hosp_yr.rds")
providers_full <- read_rds("data-processed/providers_full.rds")

Hospitals table

Filters and assembles 2019 data for hospitals table.

## filter and select for 2019
pcsec_2019 <- pcsec %>% 
  filter(YR == 2019) %>% 
  ungroup() %>% 
  select(THCIC_ID, PCRATE)

epi_2019 <- epi %>% 
  filter(YR == 2019) %>% 
  ungroup() %>% 
  select(THCIC_ID, EPIRATE)

vbac_2019 <- vbac %>% 
  filter(YR == 2019) %>% 
  ungroup() %>% 
  select(THCIC_ID, VBACRATE)

Assemble table for output

We are starting with the providers_full here, but some of our hospitals are filtered out earlier because they didn’t have data (or enough) in 2019, so they might not have a rate.

# assemble
table_2019 <- providers_full %>% 
  rename(PROVIDER_NAME = PROVIDER_NAME_CLEANED) %>% 
  filter(!is.na(PROVIDER_CITY)) %>% 
  left_join(pcsec_2019, by = "THCIC_ID") %>% 
  left_join(epi_2019, by = "THCIC_ID") %>% 
  # filter out cities without either measure
  filter(
    !(
      is.na(PCRATE) &
      is.na(EPIRATE)
    )
  )

# write
table_2019 %>% 
  write_csv("exports/table_2019.csv")

# peek
table_2019

Data export: charts by year

Assemble Texas averages

chart_texas <- summary_data %>%
  filter(
    SUMMARY == "TX",
    CATEGORY %in% c("EPISIOTOMY", "PRIMARY_CESAREAN", "VAGINAL_BIRTH_AFTER_CESAREAN")
  ) %>% 
  select(-MEASUREMENT) %>% 
  pivot_wider(names_from = CATEGORY, values_from = VALUE) %>% 
  rename(
    EPIRATE = EPISIOTOMY,
    PCRATE = PRIMARY_CESAREAN,
    VBACRATE = VAGINAL_BIRTH_AFTER_CESAREAN,
    THCIC_ID = SUMMARY
  ) %>% 
  mutate(
    PROVIDER_NAME = "Texas",
    PROVIDER_CITY = "",
    PROVIDER_ADDRESS = ""
  ) %>% 
  # select to order
  select(
    YR, THCIC_ID, PROVIDER_NAME, PROVIDER_CITY, PROVIDER_ADDRESS, PCRATE, EPIRATE, VBACRATE
  ) %>% 
  arrange(YR)

# peek
chart_texas

Assemble charts by year

Again, some hospitals won’t have rates if they didn’t have enough deliveries for that year.

# build table from data

chart_rates <- providers_full %>% 
  rename(PROVIDER_NAME = PROVIDER_NAME_CLEANED) %>% 
  ungroup() %>% 
  left_join(
    epi %>% ungroup() %>% select(YR, THCIC_ID, EPIRATE)
  ) %>% 
  left_join(
    pcsec %>% ungroup() %>% select(YR, THCIC_ID, PCRATE)
  ) %>% 
  left_join(
    vbac %>% ungroup() %>% select(YR, THCIC_ID, VBACRATE)
  ) %>% 
  filter(
    # filters closed/old hospitals
    !is.na(PROVIDER_CITY),
    !(
      is.na(PCRATE) &
      is.na(EPIRATE)
    )
  )
## Joining, by = "THCIC_ID"
## Joining, by = c("THCIC_ID", "YR")
## Joining, by = c("THCIC_ID", "YR")
# peek
chart_rates

Bind Texas to rates

And write to JSON.

chart_data <- chart_texas %>% 
  bind_rows(chart_rates)

chart_data %>% 
  write_json("exports/chart_data.json")

# peek
chart_data %>% head(12)

Chart data for print

Right now there are now special needs for print beyond format, but updates can go here if they arise.

# set new df
chart_data_print <- chart_data

# write the data
chart_data_print %>% 
  write_csv("exports/chart_data_print.csv")

# peek at the data
chart_data_print %>% head(20)

Alt bind method for json

This adds the Texas values for a given year to each hospitals’ yearly data.

chart_data_v2 <- chart_rates %>%
  left_join(
    chart_texas %>%
      select(YR, PCRATE, EPIRATE, VBACRATE) %>% 
      rename(
        TXPCRATE = PCRATE,
        TXEPIRATE = EPIRATE,
        TXVBACRATE = VBACRATE
      ),
    by = "YR")

chart_data_v2 %>% 
  write_json("exports/chart_data_v2.json")

## peek
chart_data_v2

Closing

# A klaxon to indicate the processing is complete
beepr::beep(4)