Expand this to see code
library(tidyverse)
library(janitor)
# general <- read_rds("data-processed/01-general-results-rep.rds")
# primary <- read_rds("data-processed/01-primary-results-rep.rds")
reps <- read_rds("data-processed/01-house-totals.rds")To find which Texas House district races were within a 5-point and 10-point margin in both the general and primary election for each year? Our data spans from 2012 to 2024.
library(tidyverse)
library(janitor)
# general <- read_rds("data-processed/01-general-results-rep.rds")
# primary <- read_rds("data-processed/01-primary-results-rep.rds")
reps <- read_rds("data-processed/01-house-totals.rds")This summarise method was developed with the help of chatGPT. It’s explained here.
reps_calcs <- reps |>
group_by(year, election, district) |>
mutate(
total_votes = sum(candvotes),
pct = (candvotes / total_votes * 100) |> round(2)
) |>
arrange(desc(candvotes), .by_group = TRUE) |>
summarise(
first_place = first(name),
first_place_party = first(party),
first_place_inc = first(incumbent),
first_place_pct = first(pct),
second_place = nth(name, 2),
second_place_party = nth(party, 2),
second_place_inc = nth(incumbent, 2),
second_place_pct = nth(pct, 2),
vote_margin = (first_place_pct - second_place_pct) |> round_half_up(2),
.groups = "drop"
) |>
mutate(
runoff = first_place_pct <= 50,
winner = if_else(!runoff, first_place, NA_character_)
) |>
arrange(year, election, district)
reps_calcs |> head()Here we add flags if the race is competitive within 5 or 10 points.
Just showing the margin and flags to check.
reps_flags <- reps_calcs |>
mutate(flg_5 = if_else(vote_margin <= 5, T, F),
flg_10 = if_else(vote_margin <= 10, T, F))
# peek at flags
reps_flags |>
head(20) |>
select(vote_margin:flg_10)Doing some manual calculations to compare with prepared data.
reps |>
filter(year == "2012", election == "General", district == 1) |>
adorn_totals()reps_flags |> filter(year == "2012", election == "General", district == 1)A test for 3+ candidate race
reps |>
filter(year == "2019", election == "House District 148", district == 148) |>
adorn_totals()reps_flags |> filter(year == "2019", election == "House District 148", district == 148)reps_flags |>
count(election)Glimpse
reps_flags |> glimpse()Rows: 1,278
Columns: 16
$ year <chr> "2012", "2012", "2012", "2012", "2012", "2012", "20…
$ election <chr> "Democratic Primary", "Democratic Primary", "Democr…
$ district <dbl> 35, 37, 39, 40, 43, 74, 75, 77, 80, 90, 95, 101, 11…
$ first_place <chr> "Longoria", "Oliveira", "Martinez", "Canales", "Gon…
$ first_place_party <chr> "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "…
$ first_place_inc <chr> "N", "Y", "Y", "N", "N", "N", "N", "Y", "Y", "Y", "…
$ first_place_pct <dbl> 55.15, 62.62, 83.88, 30.46, 70.59, 53.82, 52.07, 62…
$ second_place <chr> "Ruiz", "Dominguez", "Campos", "Hernandez", "Zamora…
$ second_place_party <chr> "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "…
$ second_place_inc <chr> "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "…
$ second_place_pct <dbl> 44.85, 37.38, 16.12, 26.21, 29.41, 28.28, 36.05, 37…
$ vote_margin <dbl> 10.30, 25.24, 67.76, 4.25, 41.18, 25.54, 16.02, 24.…
$ runoff <lgl> FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FAL…
$ winner <chr> "Longoria", "Oliveira", "Martinez", NA, "Gonzalez T…
$ flg_5 <lgl> FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FAL…
$ flg_10 <lgl> FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FAL…
reps_flags |> slice_sample(n = 10)# reps_flags |> names() |> clipr::write_clip()Sends files for R to data-processed, and .csv to data-export.
# last assignment (doesn't include flags)
reps_margins <- reps_calcs
# just margins for analysis
reps_margins |>
write_rds("data-processed/02-house-margins.rds")
# margins with flags for analysis
reps_flags |>
write_rds("data-processed/02-house-margins-flags.rds")
# just margins to use in other projects
reps_margins |>
write_csv("data-export/02-house-margins.csv")