By Christian McDonald, Assistant Professor of Practice
School of Journalism and Media, Moody College of Communication
University of Texas at Austin
This analysis looks at episiotomy rates using the Texas Health Care Information Collection’s Texas Inpatient Public Use Data File. The definition to calculate the rates comes from the 2020 Leapfrog Hospital Survey p118. I start with data filtered from the THCIC data using the Leapfrog specs. That process is explained in “01-process-lf-epi-loop”.
library(tidyverse)
library(janitor)
library(DT)
library(tigris)
# suppresses grouping warning
options(dplyr.summarise.inform = FALSE)
test_flag <- F
### test data
path_test <- "data-test/lf_del_vag_loop_test.rds"
### production data
path_prod <- "data-processed/lf_del_cleaned.rds"
### import based on flag
if (test_flag == T) del_vag_excl <- read_rds(path_test) else del_vag_excl <- read_rds(path_prod)
del_vag_excl %>% nrow()
## [1] 930101
This creates a column for cases where episiotomy is true. We have to look through each of the surgical procedure columns to check for the code.
I’d like to refactor this method, but at least it is easy to see and understand.
# list of codes for episiotomy, which is really one: 0W8NXZZ
epi_list <- read_rds("procedures-lists/lf_epi.rds") %>% .$epi
epi_list
## [1] "0W8NXZZ"
lf_epi <- del_vag_excl %>%
mutate(
EPI = case_when(
PRINC_SURG_PROC_CODE %in% epi_list ~ TRUE,
OTH_SURG_PROC_CODE_1 %in% epi_list ~ TRUE,
OTH_SURG_PROC_CODE_2 %in% epi_list ~ TRUE,
OTH_SURG_PROC_CODE_3 %in% epi_list ~ TRUE,
OTH_SURG_PROC_CODE_4 %in% epi_list ~ TRUE,
OTH_SURG_PROC_CODE_5 %in% epi_list ~ TRUE,
OTH_SURG_PROC_CODE_6 %in% epi_list ~ TRUE,
OTH_SURG_PROC_CODE_7 %in% epi_list ~ TRUE,
OTH_SURG_PROC_CODE_8 %in% epi_list ~ TRUE,
OTH_SURG_PROC_CODE_9 %in% epi_list ~ TRUE,
OTH_SURG_PROC_CODE_10 %in% epi_list ~ TRUE,
OTH_SURG_PROC_CODE_11 %in% epi_list ~ TRUE,
OTH_SURG_PROC_CODE_12 %in% epi_list ~ TRUE,
OTH_SURG_PROC_CODE_13 %in% epi_list ~ TRUE,
OTH_SURG_PROC_CODE_14 %in% epi_list ~ TRUE,
OTH_SURG_PROC_CODE_15 %in% epi_list ~ TRUE,
OTH_SURG_PROC_CODE_16 %in% epi_list ~ TRUE,
OTH_SURG_PROC_CODE_17 %in% epi_list ~ TRUE,
OTH_SURG_PROC_CODE_18 %in% epi_list ~ TRUE,
OTH_SURG_PROC_CODE_19 %in% epi_list ~ TRUE,
OTH_SURG_PROC_CODE_20 %in% epi_list ~ TRUE,
OTH_SURG_PROC_CODE_21 %in% epi_list ~ TRUE,
OTH_SURG_PROC_CODE_22 %in% epi_list ~ TRUE,
OTH_SURG_PROC_CODE_23 %in% epi_list ~ TRUE,
OTH_SURG_PROC_CODE_24 %in% epi_list ~ TRUE,
TRUE ~ FALSE
)
)
This completes the processing of the data.
Hospitals with fewer than 120 deliveries excluded.
lf_epi_rate_hosp <- lf_epi %>%
group_by(THCIC_ID, PROVIDER_NAME, EPI) %>%
summarise(
EPI_CNT = n()
) %>%
pivot_wider(names_from = EPI, values_from = EPI_CNT) %>%
rename(
EPIF = "FALSE",
EPIT = "TRUE"
) %>%
mutate(
TOTAL = EPIF + EPIT,
EPIRATE = round_half_up((EPIT / TOTAL) * 100,1)
) %>%
select(THCIC_ID, PROVIDER_NAME, TOTAL, EPIF, EPIT, EPIRATE) %>%
filter(
TOTAL >= 120
) %>%
arrange(EPIRATE %>% desc())
lf_epi_rate_hosp %>%
datatable()
A look at the episiotomy rates for hospitals by year. We create the rate using the EPI column we created above, then pivot to do the math. We pivot again to see the years nicely in a table. Excludes hospitals with fewer than 30 deliveries in a given year.
# pivot to crate the rate based on logical EPI column
lf_epi_rate_hosp_yr <- lf_epi %>%
group_by(YR, THCIC_ID, PROVIDER_NAME, EPI) %>%
summarise(
EPI_CNT = n()
) %>%
pivot_wider(names_from = EPI, values_from = EPI_CNT) %>%
rename(
EPIF = "FALSE",
EPIT = "TRUE"
) %>%
mutate(
TOTAL = EPIF + EPIT,
EPIRATE = round_half_up((EPIT / TOTAL) * 100,1)
)
# select, pivot to see years.
lf_epi_rate_hosp_yr_table <- lf_epi_rate_hosp_yr %>%
filter(TOTAL >= 30) %>%
select(YR, THCIC_ID, PROVIDER_NAME, EPIRATE) %>%
pivot_wider(names_from = YR, values_from = EPIRATE) %>%
arrange(`2019` %>% desc())
lf_epi_rate_hosp_yr_table %>% datatable()
Filtering above sets for Laredo hospitals.
From combined years of the data.
lf_epi_rate_hosp %>%
filter(str_detect(PROVIDER_NAME, "Laredo"))
The table above, filtered for Laredo.
# filter for Laredo
lf_epi_rate_hosp_yr_table %>%
filter(str_detect(PROVIDER_NAME, "Laredo"))
lf_epi_rate_hosp_yr %>%
filter(str_detect(PROVIDER_NAME, "Laredo")) %>%
ggplot(aes(YR, EPIRATE)) +
geom_line(aes(group = PROVIDER_NAME, color = PROVIDER_NAME)) +
expand_limits(y = c(0,40)) +
theme(legend.position="bottom", legend.box = "vertical") +
labs(title = "Episiotomy rate by year, Laredo hospitals", x = "YEAR", y = "Episiotomy rate")
lf_epi_rate_hosp_qr_laredo_plot <- lf_epi %>%
filter(str_detect(PROVIDER_NAME, "Laredo")) %>%
group_by(DISCHARGE, PROVIDER_NAME, EPI) %>%
summarise(
EPI_CNT = n()
) %>%
pivot_wider(names_from = EPI, values_from = EPI_CNT) %>%
rename(
EPIF = "FALSE",
EPIT = "TRUE"
) %>%
mutate(
TOTAL = EPIF + EPIT,
EPIRATE = round_half_up((EPIT / TOTAL) * 100,1)
)
lf_epi_rate_hosp_qr_laredo_plot %>%
ggplot(aes(DISCHARGE, EPIRATE)) +
geom_line(aes(group = PROVIDER_NAME, color = PROVIDER_NAME)) +
expand_limits(y = c(0,40)) +
theme(legend.position="bottom", legend.box = "vertical",axis.text.x=element_text(angle = -45, hjust = 0)) +
labs(title = "Episiotomy rate by quarter, Laredo hospitals", x = "QUARTER", y = "Episiotomy rate")
Get FIPS codes lookup for county names.
tx_fips <- fips_codes %>%
filter(state == "TX") %>%
select(county_code, county)
# peek
tx_fips %>% head(2)
This looks at rates by the patient’s county (the hospital could be elsewhere.). Currently not filtered for any minimum caseload since it is a 3+ year period.
lf_epi_rate_county <- lf_epi %>%
group_by(PAT_COUNTY, EPI) %>%
summarise(
EPI_CNT = n()
) %>%
# join for county names
left_join(tx_fips, by=c( "PAT_COUNTY" = "county_code")) %>%
ungroup() %>%
rename(COUNTY = county) %>%
# pivot for mutates
pivot_wider(names_from = EPI, values_from = EPI_CNT) %>%
rename(
EPIF = "FALSE",
EPIT = "TRUE"
) %>%
mutate(
TOTAL = EPIF + EPIT,
EPIRATE = round_half_up((EPIT / TOTAL) * 100,1)
) %>%
select(PAT_COUNTY, COUNTY, TOTAL, EPIF, EPIT, EPIRATE) %>%
# filter by total
# filter(
# TOTAL >= 300
# ) %>%
arrange(EPIRATE %>% desc())
lf_epi_rate_county %>%
datatable()
Group by patient county. This table excludes fewer than 30 cases in a county in a given year.
# pivot to crate the rate based on logical EPI column
lf_epi_rate_county_yr <- lf_epi %>%
group_by(YR, PAT_COUNTY, EPI) %>%
summarise(
EPI_CNT = n()
) %>%
# Join to add county names
left_join(tx_fips, by=c( "PAT_COUNTY" = "county_code")) %>%
ungroup() %>%
rename(COUNTY = county) %>%
pivot_wider(names_from = EPI, values_from = EPI_CNT) %>%
rename(
EPIF = "FALSE",
EPIT = "TRUE"
) %>%
mutate(
TOTAL = EPIF + EPIT,
EPIRATE = round_half_up((EPIT / TOTAL) * 100,1)) %>%
# filter caseload
filter(TOTAL >= 30)
# select, pivot to see years.
lf_epi_rate_county_yr_table <- lf_epi_rate_county_yr %>%
select(YR, COUNTY, EPIRATE) %>%
pivot_wider(names_from = YR, values_from = EPIRATE) %>%
arrange(`2019` %>% desc())
lf_epi_rate_county_yr_table %>% datatable()
We combine all years of data to compare the percentage of episiotomy cases. The TRUE value is the percentage of cases where an episiotomy is performed.
lf_epi %>%
tabyl(EPI) %>%
rename(count = n) %>%
adorn_pct_formatting()
Creating a summary table in case we want it.
lf_epi_rate_tx_summary <- lf_epi %>%
group_by(EPI) %>%
summarize(CNT = n()) %>%
pivot_wider(names_from = EPI, values_from = CNT) %>%
rename(
EPIF = "FALSE",
EPIT = "TRUE"
) %>%
mutate(
SUMMARY = "TX",
CATEGORY = "EPISIOTOMY",
MEASUREMENT = "RATE",
TOTAL = EPIF + EPIT,
VALUE = round_half_up((EPIT / TOTAL) * 100,1) # EPIRATE
) %>%
select(SUMMARY, CATEGORY, MEASUREMENT, VALUE)
lf_epi_rate_tx_summary
lf_epi_rate_tx_yr_summary <- lf_epi %>%
group_by(YR, EPI) %>%
summarise(
EPI_CNT = n()
) %>%
pivot_wider(names_from = EPI, values_from = EPI_CNT) %>%
rename(
EPIF = "FALSE",
EPIT = "TRUE"
) %>%
mutate(
SUMMARY = "TX",
CATEGORY = "EPISIOTOMY",
MEASUREMENT = "RATE",
TOTAL = EPIF + EPIT,
VALUE = round_half_up((EPIT / TOTAL) * 100,1) # EPIRATE
) %>%
select(
YR, SUMMARY, CATEGORY, MEASUREMENT, VALUE
)
lf_epi_rate_tx_yr_summary
lf_epi_rate_hosp_yr_summary <- lf_epi_rate_hosp_yr %>%
ungroup() %>%
group_by(YR) %>%
summarize(
SUMMARY = "HOSPITAL",
CATEGORY = "EPISIOTOMY",
MEASUREMENT = "MEAN_OF_RATE",
# MEDIAN = median(EPIRATE),
VALUE = round_half_up(mean(EPIRATE, na.rm = TRUE),1) #MEAN
)
lf_epi_rate_hosp_yr_summary
Writing out aggregate files. Here is a list of CSVs exports:
Plus some data outputs for 03-analysis-summaries.
# lf_epi %>% write_rds("data-processed/epi.rds")
if (test_flag == F) lf_epi_rate_tx_yr_summary %>%
write_rds("data-processed/lf_epi_rate_tx_yr_summary.rds")
if (test_flag == F) lf_epi_rate_hosp_yr_summary %>%
write_rds("data-processed/lf_epi_rate_hosp_yr_summary.rds")
if (test_flag == F) lf_epi_rate_hosp_yr %>%
write_rds("data-processed/lf_epi_rate_hosp_yr.rds")
if (test_flag == F) lf_epi_rate_county_yr %>%
write_csv("exports/lf_epi_rate_county_yr.csv")
beepr::beep(4)