The goal of this notebook is to build a categorized list of police agencies in Texas. See the README for information about where the data comes from.
In this notebook we will:
In another notebook we might do further categorization to note agencies from Central Texas if we can find a programattic way to do that.
library(tidyverse)
library(readxl)
library(janitor)
library(DT)
We deal with each data file because they have different import strategies
Import the data
city <- read_excel("data-raw/Table_78_Full_time_Law_Enforcement_Employees_Texas_by_Cities_2020.xlsx", skip = 5) %>% clean_names()
city %>% glimpse()
## Rows: 605
## Columns: 5
## $ city <chr> "Abernathy", "Abilene", "Addison", "Al…
## $ population <dbl> 2694, 124061, 16778, 20078, 8807, 550,…
## $ total_law_enforcement_employees <dbl> 5, 275, 71, 46, 34, 2, 36, 207, 20, 28…
## $ total_officers <dbl> 5, 211, 60, 34, 21, 2, 27, 144, 12, 22…
## $ total_civilians <dbl> 0, 64, 11, 12, 13, 0, 9, 63, 8, 6, 2, …
city_cleaned <- city %>%
mutate(type = "City") %>%
select(
type,
agency = city,
everything()
)
city_cleaned %>% head()
city_pared <- city_cleaned %>%
select(type, agency, total_officers)
city_pared %>% glimpse()
## Rows: 605
## Columns: 3
## $ type <chr> "City", "City", "City", "City", "City", "City", "City",…
## $ agency <chr> "Abernathy", "Abilene", "Addison", "Alamo", "Alamo Heig…
## $ total_officers <dbl> 5, 211, 60, 34, 21, 2, 27, 144, 12, 22, 17, 50, 354, 18…
county <- read_excel("data-raw/Table_80_Full_time_Law_Enforcement_Employees_Texas_by_Metropolitan_and_Nonmetropolitan_Counties_2020.xlsx", skip = 5) %>% clean_names()
county %>% head
county_cleaned <- county %>%
fill(metropolitan_nonmetropolitan) %>%
mutate(
metropolitan_nonmetropolitan = str_remove(metropolitan_nonmetropolitan, " Counties"),
type = "County"
) %>%
select(
type,
metro = metropolitan_nonmetropolitan,
agency = county,
everything()
)
county_cleaned %>% head()
county_pared <- county_cleaned %>%
select(type, agency, total_officers)
county_pared %>% glimpse()
## Rows: 219
## Columns: 3
## $ type <chr> "County", "County", "County", "County", "County", "Coun…
## $ agency <chr> "Armstrong", "Austin", "Bandera", "Bastrop", "Bell", "B…
## $ total_officers <dbl> 2, 35, 32, 82, 101, 580, 37, 157, 103, 22, 36, 6, 105, …
Two notes from the file:
1 The student enrollment figures provided by the United States Department of Education are for the 2019 school year, the most recent available. The enrollment figures include full-time and part-time students. 2 Student enrollment figures were not available. (values are blank - crit)
school <- read_excel(
"data-raw/Table_79_Full_time_Law_Enforcement_Employees_Texas_by_University_and_College_2020.xlsx",
skip = 5,
n_max = 69
) %>% clean_names()
school
school_cleaned <- school %>% fill(universities_college) %>%
mutate(
campus = str_remove(campus, "\\d"),
universities_college = str_remove(universities_college, "[:\\d]") %>% trimws(),
type = "School"
) %>%
select(
type,
agency = universities_college,
description = campus,
student_enrollment = student_enrollment1,
everything()
)
school_cleaned
school_pared <- school_cleaned %>%
select(type, agency, description, total_officers)
school_pared %>% glimpse()
## Rows: 69
## Columns: 4
## $ type <chr> "School", "School", "School", "School", "School", "Scho…
## $ agency <chr> "Abilene Christian University", "Alvin Community Colleg…
## $ description <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ total_officers <dbl> 14, 12, 10, 15, 8, 82, 53, 9, 8, 1, 7, 4, 129, 35, 7, 5…
state <- read_excel("data-raw/Table_81_Full_time_Law_Enforcement_Employees_Texas_by_State_Tribal_and_Other_Agencies_2020.xlsx", skip = 4) %>% clean_names()
state %>% head(10)
:
from compound namesstate_cleaned <- state %>%
fill(state_tribal_other, agency) %>%
mutate(
agency = str_remove(agency, ":") %>% trimws()
) %>%
select(
type = state_tribal_other,
agency,
description = unit_office,
everything()
)
state_cleaned %>% head()
state_pared <- state_cleaned %>%
select(type, agency, description, total_officers)
state_pared %>% glimpse()
## Rows: 67
## Columns: 4
## $ type <chr> "Tribal Agencies", "Other Agencies", "Other Agencies", …
## $ agency <chr> "Ysleta del Sur Pueblo Tribal", "Amarillo International…
## $ description <chr> NA, NA, NA, NA, "Dallas County", "Tarrant County", NA, …
## $ total_officers <dbl> 17, 13, 208, 16, 110, 46, 200, 46, 33, 11, 3, 81, 6, 10…
agencies_combined <- school_pared %>%
bind_rows(state_pared) %>%
bind_rows(county_pared) %>%
bind_rows(city_pared)
agencies_combined %>% glimpse()
## Rows: 960
## Columns: 4
## $ type <chr> "School", "School", "School", "School", "School", "Scho…
## $ agency <chr> "Abilene Christian University", "Alvin Community Colleg…
## $ description <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ total_officers <dbl> 14, 12, 10, 15, 8, 82, 53, 9, 8, 1, 7, 4, 129, 35, 7, 5…
agencies_combined %>%
datatable()
agencies_combined %>% write_rds("data-processed/ped_texas_2020.rds")
agencies_combined %>% write_csv("data-processed/ped_texas_2020.csv")