WHYY project data cleanup. Problem was inconsistent data in multiple sheets that needed to be combined into single data frame.
library(tidyverse)
library(readxl)
library(purrr)
library(janitor)
This comes from read-xl workflows.
path <- "data-raw/insurance-births-year-hospital-philadelphia-correct-names-edit.xlsx"
sheets <- excel_sheets(path)
births <-
lapply(excel_sheets(path), read_excel, path = path)
births <- lapply(seq_along(sheets), function(i) {
data.frame(sheet = I(sheets[i]), births[[i]])
})
births <- do.call(rbind, births)
births <- births %>% clean_names()
births %>% glimpse()
## Rows: 154
## Columns: 8
## $ sheet <I<chr>> 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019,…
## $ year <dbl> 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 20…
## $ hospital <chr> "Albert Einstein", "Chestnut Hill", "Children's Ho…
## $ private <chr> "308", "*", "323", "*", "133", "1685", "3562", "16…
## $ medicaid <chr> "2440", "*", "66", "*", "381", "2391", "1586", "21…
## $ self_pay_uninsured <chr> "95", "*", "*", "*", "74", "26", "*", "86", "21", …
## $ other <chr> "*", "*", "79", "*", "54", "*", "*", "39", "219", …
## $ unknown <chr> "*", "*", "*", "*", "154", "155", "*", "27", "288"…
births <- births %>%
mutate(
hospital = hospital %>% str_to_upper(),
private = private %>% as.numeric(),
medicaid = medicaid %>% as.numeric(),
self_pay_uninsured = self_pay_uninsured %>% as.numeric(),
other = other %>% as.numeric(),
unknown = unknown %>% as.numeric(),
)
## Warning in private %>% as.numeric(): NAs introduced by coercion
## Warning in medicaid %>% as.numeric(): NAs introduced by coercion
## Warning in self_pay_uninsured %>% as.numeric(): NAs introduced by coercion
## Warning in other %>% as.numeric(): NAs introduced by coercion
## Warning in unknown %>% as.numeric(): NAs introduced by coercion
births %>% glimpse()
## Rows: 154
## Columns: 8
## $ sheet <I<chr>> 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019,…
## $ year <dbl> 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 20…
## $ hospital <chr> "ALBERT EINSTEIN", "CHESTNUT HILL", "CHILDREN'S HO…
## $ private <dbl> 308, NA, 323, NA, 133, 1685, 3562, 169, 988, 356, …
## $ medicaid <dbl> 2440, NA, 66, NA, 381, 2391, 1586, 2114, 936, 2353…
## $ self_pay_uninsured <dbl> 95, NA, NA, NA, 74, 26, NA, 86, 21, 59, NA, NA, 18…
## $ other <dbl> NA, NA, 79, NA, 54, NA, NA, 39, 219, 10, 81, NA, 1…
## $ unknown <dbl> NA, NA, NA, NA, 154, 155, NA, 27, 288, NA, NA, NA,…
births %>% write_excel_csv("data-processed/phi-ins-births.csv")