library(tidyverse)
library(janitor)
library(lubridate)
Import
data <- read_csv(
"data-raw/Riverside_Covid_tests_zip.csv",
col_types = cols(ZIPCODE = col_character())
) %>%
clean_names()
data %>% glimpse()
## Rows: 1,967,201
## Columns: 4
## $ lab_result <chr> "Negative", "Negative", "Positive", "Negative", "Negativ…
## $ zipcode <chr> "92536", "92536", "92536", "92592", "92592", "92592", "9…
## $ name <chr> "AGUANGA", "AGUANGA", "AGUANGA", "TEMECULA", "TEMECULA",…
## $ new_lab_date <chr> "4/5/2020", "4/5/2020", "7/26/2020", "11/27/2020", "1/4/…
Cleaning
data_clean <- data %>%
mutate(
lab_date = mdy(new_lab_date)
) %>%
rename(
place = name
) %>%
select(lab_date, place, zipcode, lab_result) %>%
arrange(lab_date)
data_clean %>% glimpse()
## Rows: 1,967,201
## Columns: 4
## $ lab_date <date> 2020-01-04, 2020-01-04, 2020-01-04, 2020-01-04, 2020-01-0…
## $ place <chr> "TEMECULA", "MURRIETA", "INDIO", "INDIO", "HEMET", "RIVERS…
## $ zipcode <chr> "92591", "92563", "92203", "92203", "92543", "92503", "928…
## $ lab_result <chr> "Negative", "Negative", "Negative", "Positive", "Negative"…
Export
data_clean %>%
write_rds("data-processed/riverside.rds")