The goal of this notebook is to build a categorized list of police agencies in Texas. See the README for information about where the data comes from.

In this notebook we will:

In another notebook we might do further categorization to note agencies from Central Texas if we can find a programattic way to do that.

library(tidyverse)
library(readxl)
library(janitor)
library(DT)

We deal with each data file because they have different import strategies

City agencies

Import the data

city <- read_excel("data-raw/Table_78_Full_time_Law_Enforcement_Employees_Texas_by_Cities_2020.xlsx", skip = 5) %>% clean_names()

city %>% glimpse()
## Rows: 605
## Columns: 5
## $ city                            <chr> "Abernathy", "Abilene", "Addison", "Al…
## $ population                      <dbl> 2694, 124061, 16778, 20078, 8807, 550,…
## $ total_law_enforcement_employees <dbl> 5, 275, 71, 46, 34, 2, 36, 207, 20, 28…
## $ total_officers                  <dbl> 5, 211, 60, 34, 21, 2, 27, 144, 12, 22…
## $ total_civilians                 <dbl> 0, 64, 11, 12, 13, 0, 9, 63, 8, 6, 2, …

Clean cities

  • add a type
  • rename columns
city_cleaned <- city %>% 
  mutate(type = "City") %>% 
  select(
    type,
    agency = city,
    everything()
  )

city_cleaned %>% head()

Pare city

city_pared <- city_cleaned %>% 
  select(type, agency, total_officers)

city_pared %>% glimpse()
## Rows: 605
## Columns: 3
## $ type           <chr> "City", "City", "City", "City", "City", "City", "City",…
## $ agency         <chr> "Abernathy", "Abilene", "Addison", "Alamo", "Alamo Heig…
## $ total_officers <dbl> 5, 211, 60, 34, 21, 2, 27, 144, 12, 22, 17, 50, 354, 18…

County agencies

county <- read_excel("data-raw/Table_80_Full_time_Law_Enforcement_Employees_Texas_by_Metropolitan_and_Nonmetropolitan_Counties_2020.xlsx", skip = 5) %>% clean_names()

county %>% head

Clean counties

  • Fill down the metro/non-metro columns
  • create type column
  • rename for consistency
county_cleaned <- county %>%
  fill(metropolitan_nonmetropolitan) %>%
  mutate(
    metropolitan_nonmetropolitan = str_remove(metropolitan_nonmetropolitan, " Counties"),
    type = "County"
  ) %>% 
  select(
    type,
    metro = metropolitan_nonmetropolitan,
    agency = county,
    everything()
  )

county_cleaned %>% head()

Pare counties

county_pared <- county_cleaned %>% 
  select(type, agency, total_officers)

county_pared %>% glimpse()
## Rows: 219
## Columns: 3
## $ type           <chr> "County", "County", "County", "County", "County", "Coun…
## $ agency         <chr> "Armstrong", "Austin", "Bandera", "Bastrop", "Bell", "B…
## $ total_officers <dbl> 2, 35, 32, 82, 101, 580, 37, 157, 103, 22, 36, 6, 105, …

Universities and Colleges

Two notes from the file:

1 The student enrollment figures provided by the United States Department of Education are for the 2019 school year, the most recent available. The enrollment figures include full-time and part-time students. 2 Student enrollment figures were not available. (values are blank - crit)

school <- read_excel(
    "data-raw/Table_79_Full_time_Law_Enforcement_Employees_Texas_by_University_and_College_2020.xlsx",
    skip = 5,
    n_max = 69
  ) %>% clean_names()

school

Schools clean

  • remove some note numbers from school and campus columns
  • add the type
  • reorder/rename columns
school_cleaned <- school %>% fill(universities_college) %>% 
  mutate(
    campus = str_remove(campus, "\\d"),
    universities_college = str_remove(universities_college, "[:\\d]") %>% trimws(),
    type = "School"
  ) %>% 
  select(
    type,
    agency = universities_college,
    description = campus,
    student_enrollment = student_enrollment1,
    everything()
  )

school_cleaned

Pare schools

school_pared <- school_cleaned %>% 
  select(type, agency, description, total_officers)

school_pared %>% glimpse()
## Rows: 69
## Columns: 4
## $ type           <chr> "School", "School", "School", "School", "School", "Scho…
## $ agency         <chr> "Abilene Christian University", "Alvin Community Colleg…
## $ description    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ total_officers <dbl> 14, 12, 10, 15, 8, 82, 53, 9, 8, 1, 7, 4, 129, 35, 7, 5…

State agencies

state <- read_excel("data-raw/Table_81_Full_time_Law_Enforcement_Employees_Texas_by_State_Tribal_and_Other_Agencies_2020.xlsx", skip = 4) %>% clean_names()

state %>% head(10)

Clean state agencies

  • Fill columns
  • remove : from compound names
  • rename/reorder columns
state_cleaned <- state %>% 
  fill(state_tribal_other, agency) %>% 
  mutate(
    agency = str_remove(agency, ":") %>% trimws()
  ) %>% 
  select(
    type = state_tribal_other,
    agency,
    description = unit_office,
    everything()
  )

state_cleaned %>% head()

Pare state agencies

state_pared <- state_cleaned %>% 
  select(type, agency, description, total_officers)

state_pared %>% glimpse()
## Rows: 67
## Columns: 4
## $ type           <chr> "Tribal Agencies", "Other Agencies", "Other Agencies", …
## $ agency         <chr> "Ysleta del Sur Pueblo Tribal", "Amarillo International…
## $ description    <chr> NA, NA, NA, NA, "Dallas County", "Tarrant County", NA, …
## $ total_officers <dbl> 17, 13, 208, 16, 110, 46, 200, 46, 33, 11, 3, 81, 6, 10…

Combining

agencies_combined <- school_pared %>% 
  bind_rows(state_pared) %>% 
  bind_rows(county_pared) %>% 
  bind_rows(city_pared)

agencies_combined %>% glimpse()
## Rows: 960
## Columns: 4
## $ type           <chr> "School", "School", "School", "School", "School", "Scho…
## $ agency         <chr> "Abilene Christian University", "Alvin Community Colleg…
## $ description    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ total_officers <dbl> 14, 12, 10, 15, 8, 82, 53, 9, 8, 1, 7, 4, 129, 35, 7, 5…

Searchable table

agencies_combined %>% 
  datatable()

Export

agencies_combined %>% write_rds("data-processed/ped_texas_2020.rds")
agencies_combined %>% write_csv("data-processed/ped_texas_2020.csv")