library(tidyverse)
library(janitor)
library(lubridate)

Import

data <- read_csv(
  "data-raw/Riverside_Covid_tests_zip.csv",
  col_types = cols(ZIPCODE = col_character())
  ) %>% 
  clean_names()

data %>% glimpse()
## Rows: 1,967,201
## Columns: 4
## $ lab_result   <chr> "Negative", "Negative", "Positive", "Negative", "Negativ…
## $ zipcode      <chr> "92536", "92536", "92536", "92592", "92592", "92592", "9…
## $ name         <chr> "AGUANGA", "AGUANGA", "AGUANGA", "TEMECULA", "TEMECULA",…
## $ new_lab_date <chr> "4/5/2020", "4/5/2020", "7/26/2020", "11/27/2020", "1/4/…

Cleaning

data_clean <- data %>% 
  mutate(
    lab_date = mdy(new_lab_date)
  ) %>% 
  rename(
    place = name
  ) %>% 
  select(lab_date, place, zipcode, lab_result) %>% 
  arrange(lab_date)

data_clean %>% glimpse()
## Rows: 1,967,201
## Columns: 4
## $ lab_date   <date> 2020-01-04, 2020-01-04, 2020-01-04, 2020-01-04, 2020-01-0…
## $ place      <chr> "TEMECULA", "MURRIETA", "INDIO", "INDIO", "HEMET", "RIVERS…
## $ zipcode    <chr> "92591", "92563", "92203", "92203", "92543", "92503", "928…
## $ lab_result <chr> "Negative", "Negative", "Negative", "Positive", "Negative"…

Export

data_clean %>% 
  write_rds("data-processed/riverside.rds")