By Christian McDonald, Assistant Professor of Practice
School of Journalism and Media, Moody College of Communication
University of Texas at Austin
Each of the other analysis notebooks in the project export two types of summaries for each measure: statewide rates and averages of hospital rates. This notebook simply brings them all together into a single data file for export.
There are some additional data combinations for interactives.
library(fs)
library(tidyverse)
library(jsonlite)
library(janitor)
# suppresses grouping warning
options(dplyr.summarise.inform = FALSE)
# set up folder
data_dir <- "data-processed"
# find the files
summary_files <- dir_ls(data_dir, recurse = TRUE, regexp = "_summary")
# peek at them
summary_files
## data-processed/ahrq_pcsec_rate_hosp_yr_summary.rds
## data-processed/ahrq_pcsec_rate_tx_yr_summary.rds
## data-processed/ahrq_vbac_rate_hosp_yr_summary.rds
## data-processed/ahrq_vbac_rate_tx_yr_summary.rds
## data-processed/csec_rate_tx_yr_summary.rds
## data-processed/epicsec_rate_hosp_yr_summary.rds
## data-processed/epicsec_rate_tx_yr_summary.rds
## data-processed/lf_epi_rate_hosp_yr_summary.rds
## data-processed/lf_epi_rate_tx_yr_summary.rds
## data-processed/medi_rate_tx_yr_summary.rds
# read and combine
summary_data <- summary_files %>%
map_dfr(read_rds) %>%
arrange(SUMMARY, CATEGORY, YR)
# finished data
summary_data
summary_data %>%
write_csv("exports/summary_data.csv")
pcsec <- read_rds("data-processed/ahrq_pcsec_rate_hosp_yr.rds")
epi <- read_rds("data-processed/lf_epi_rate_hosp_yr.rds")
vbac <- read_rds("data-processed/ahrq_vbac_rate_hosp_yr.rds")
providers_full <- read_rds("data-processed/providers_full.rds")
Filters and assembles 2019 data for hospitals table.
## filter and select for 2019
pcsec_2019 <- pcsec %>%
filter(YR == 2019) %>%
ungroup() %>%
select(THCIC_ID, PCRATE)
epi_2019 <- epi %>%
filter(YR == 2019) %>%
ungroup() %>%
select(THCIC_ID, EPIRATE)
vbac_2019 <- vbac %>%
filter(YR == 2019) %>%
ungroup() %>%
select(THCIC_ID, VBACRATE)
We are starting with the providers_full here, but some of our hospitals are filtered out earlier because they didn’t have data (or enough) in 2019, so they might not have a rate.
# assemble
table_2019 <- providers_full %>%
rename(PROVIDER_NAME = PROVIDER_NAME_CLEANED) %>%
filter(!is.na(PROVIDER_CITY)) %>%
left_join(pcsec_2019, by = "THCIC_ID") %>%
left_join(epi_2019, by = "THCIC_ID") %>%
# filter out cities without either measure
filter(
!(
is.na(PCRATE) &
is.na(EPIRATE)
)
)
# write
table_2019 %>%
write_csv("exports/table_2019.csv")
# peek
table_2019
chart_texas <- summary_data %>%
filter(
SUMMARY == "TX",
CATEGORY %in% c("EPISIOTOMY", "PRIMARY_CESAREAN", "VAGINAL_BIRTH_AFTER_CESAREAN")
) %>%
select(-MEASUREMENT) %>%
pivot_wider(names_from = CATEGORY, values_from = VALUE) %>%
rename(
EPIRATE = EPISIOTOMY,
PCRATE = PRIMARY_CESAREAN,
VBACRATE = VAGINAL_BIRTH_AFTER_CESAREAN,
THCIC_ID = SUMMARY
) %>%
mutate(
PROVIDER_NAME = "Texas",
PROVIDER_CITY = "",
PROVIDER_ADDRESS = ""
) %>%
# select to order
select(
YR, THCIC_ID, PROVIDER_NAME, PROVIDER_CITY, PROVIDER_ADDRESS, PCRATE, EPIRATE, VBACRATE
) %>%
arrange(YR)
# peek
chart_texas
Again, some hospitals won’t have rates if they didn’t have enough deliveries for that year.
# build table from data
chart_rates <- providers_full %>%
rename(PROVIDER_NAME = PROVIDER_NAME_CLEANED) %>%
ungroup() %>%
left_join(
epi %>% ungroup() %>% select(YR, THCIC_ID, EPIRATE)
) %>%
left_join(
pcsec %>% ungroup() %>% select(YR, THCIC_ID, PCRATE)
) %>%
left_join(
vbac %>% ungroup() %>% select(YR, THCIC_ID, VBACRATE)
) %>%
filter(
# filters closed/old hospitals
!is.na(PROVIDER_CITY),
!(
is.na(PCRATE) &
is.na(EPIRATE)
)
)
## Joining, by = "THCIC_ID"
## Joining, by = c("THCIC_ID", "YR")
## Joining, by = c("THCIC_ID", "YR")
# peek
chart_rates
And write to JSON.
chart_data <- chart_texas %>%
bind_rows(chart_rates)
chart_data %>%
write_json("exports/chart_data.json")
# peek
chart_data %>% head(12)
Right now there are now special needs for print beyond format, but updates can go here if they arise.
# set new df
chart_data_print <- chart_data
# write the data
chart_data_print %>%
write_csv("exports/chart_data_print.csv")
# peek at the data
chart_data_print %>% head(20)
This adds the Texas values for a given year to each hospitals’ yearly data.
chart_data_v2 <- chart_rates %>%
left_join(
chart_texas %>%
select(YR, PCRATE, EPIRATE, VBACRATE) %>%
rename(
TXPCRATE = PCRATE,
TXEPIRATE = EPIRATE,
TXVBACRATE = VBACRATE
),
by = "YR")
chart_data_v2 %>%
write_json("exports/chart_data_v2.json")
## peek
chart_data_v2
# A klaxon to indicate the processing is complete
beepr::beep(4)