library(rvest)
library(dplyr)
library(janitor)CFB Stats Generic Scraper
Generic Scraper Function
Pass any URL path from cfbstats.com/2025/national/index.html along with a year. The path is the portion of the URL after the year segment.
For example, for this URL:
https://cfbstats.com/2025/leader/national/team/offense/split01/category09/sort01.html
The path argument would be:
/leader/national/team/offense/split01/category09/sort01.html
#' Scrape any stat leaderboard from cfbstats.com
#'
#' @param path Character. The URL path after the year, starting with "/".
#' Example: "/leader/national/team/offense/split01/category09/sort01.html"
#' @param year Integer. Season year (e.g., 2025). Available years: 2016–2025.
#' @return A data frame of the stats table with a `year` column appended.
scrape_cfbstats <- function(path, year) {
# Strip leading slash if present so sprintf doesn't double up
path <- sub("^/", "", path)
url <- sprintf("https://cfbstats.com/%d/%s", year, path)
page <- tryCatch(
read_html(url),
error = function(e) stop("Failed to fetch: ", url, "\n", e$message)
)
tbl_node <- html_element(page, "table.leaders")
if (is.na(tbl_node)) {
stop("No table with class 'leaders' found at: ", url)
}
tbl_node |>
html_table(header = TRUE) |>
clean_names() |> # snake_case column names via janitor
mutate(year = year, .before = 1)
}Example Usage
Single page + year
scoring_offense <- scrape_cfbstats(
path = "/leader/national/team/offense/split01/category09/sort01.html",
year = 2025
)
head(scoring_offense)# A tibble: 6 × 11
year x name g td fg x1xp x2xp safety points points_g
<dbl> <int> <chr> <int> <int> <int> <int> <int> <int> <int> <dbl>
1 2025 1 North Texas 14 85 12 77 4 0 631 45.1
2 2025 2 Notre Dame 12 70 5 63 2 1 504 42
3 2025 3 Indiana 16 87 19 87 0 0 666 41.6
4 2025 4 Utah 13 72 11 68 1 1 537 41.3
5 2025 5 USF 13 67 18 62 2 2 526 40.5
6 2025 6 Tennessee 13 68 14 67 0 0 517 39.8
Loop over multiple years
rushing_defense_multi <- lapply(2022:2025, function(yr) {
Sys.sleep(0.5) # be polite to the server
scrape_cfbstats(
path = "/leader/national/team/defense/split01/category01/sort01.html",
year = yr
)
}) |>
bind_rows()
glimpse(rushing_defense_multi)Rows: 534
Columns: 10
$ year <int> 2022, 2022, 2022, 2022, 2022, 2022, 2022, 2022, 2022, 2022, 20…
$ x <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,…
$ name <chr> "Georgia", "James Madison", "Navy", "Marshall", "South Alabama…
$ g <int> 15, 11, 12, 13, 13, 13, 14, 13, 13, 13, 13, 13, 14, 12, 13, 13…
$ att <int> 401, 356, 355, 419, 367, 366, 421, 384, 443, 395, 390, 465, 44…
$ yards <int> 1155, 882, 1067, 1209, 1221, 1239, 1371, 1274, 1290, 1297, 130…
$ avg <dbl> 2.88, 2.48, 3.01, 2.89, 3.33, 3.39, 3.26, 3.32, 2.91, 3.28, 3.…
$ td <int> 7, 8, 7, 10, 8, 10, 10, 13, 14, 6, 13, 4, 12, 8, 17, 10, 14, 1…
$ att_g <dbl> 26.73, 32.36, 29.58, 32.23, 28.23, 28.15, 30.07, 29.54, 34.08,…
$ yards_g <dbl> 77.00, 80.18, 88.92, 93.00, 93.92, 95.31, 97.93, 98.00, 99.23,…
Player stats example
passing_leaders <- scrape_cfbstats(
path = "/leader/national/player/split01/category02/sort01.html",
year = 2025
)
head(passing_leaders)# A tibble: 6 × 17
year x name team yr pos g att comp pct yards yards_att
<dbl> <int> <chr> <chr> <chr> <chr> <int> <int> <int> <dbl> <int> <dbl>
1 2025 1 Drew Me… N Te… FR QB 14 463 319 68.9 4379 9.5
2 2025 2 Sawyer … Bayl… SR QB 12 504 304 60.3 3681 7.3
3 2025 3 Caden V… FAU JR QB 12 515 345 67 3641 7.1
4 2025 4 Josh Ho… TCU JR QB 12 413 272 65.9 3472 8.4
5 2025 5 Joe Fag… UConn SR QB 12 413 285 69 3448 8.3
6 2025 6 Jayden … USC JR QB 13 403 265 65.8 3711 9.2
# ℹ 5 more variables: td <int>, int <int>, rating <dbl>, att_g <dbl>,
# yards_g <dbl>
Try another
I tried to use copilot to complete but it didn’t get the url right, but I fixed it manually.
total_offense <- scrape_cfbstats(
path = "leader/national/team/offense/split01/category10/sort01.html",
year = 2025
)
# show the result
head(total_offense)# A tibble: 6 × 10
year x name g rush_yards pass_yards plays total_yards yards_play
<dbl> <int> <chr> <int> <int> <int> <int> <int> <dbl>
1 2025 1 North Te… 14 2722 4452 992 7174 7.23
2 2025 2 Ole Miss 15 2645 4700 1099 7345 6.68
3 2025 3 USF 13 2757 3596 911 6353 6.97
4 2025 4 Utah 13 3462 2816 948 6278 6.62
5 2025 5 Texas St… 13 2887 3260 920 6147 6.68
6 2025 6 Florida … 12 2624 3041 845 5665 6.7
# ℹ 1 more variable: yards_g <dbl>