CFB Stats Generic Scraper

library(rvest)
library(dplyr)
library(janitor)

Generic Scraper Function

Pass any URL path from cfbstats.com/2025/national/index.html along with a year. The path is the portion of the URL after the year segment.

For example, for this URL:

https://cfbstats.com/2025/leader/national/team/offense/split01/category09/sort01.html

The path argument would be:

/leader/national/team/offense/split01/category09/sort01.html
#' Scrape any stat leaderboard from cfbstats.com
#'
#' @param path Character. The URL path after the year, starting with "/".
#'   Example: "/leader/national/team/offense/split01/category09/sort01.html"
#' @param year Integer. Season year (e.g., 2025). Available years: 2016–2025.
#' @return A data frame of the stats table with a `year` column appended.
scrape_cfbstats <- function(path, year) {
  # Strip leading slash if present so sprintf doesn't double up
  path <- sub("^/", "", path)

  url <- sprintf("https://cfbstats.com/%d/%s", year, path)

  page <- tryCatch(
    read_html(url),
    error = function(e) stop("Failed to fetch: ", url, "\n", e$message)
  )

  tbl_node <- html_element(page, "table.leaders")

  if (is.na(tbl_node)) {
    stop("No table with class 'leaders' found at: ", url)
  }

  tbl_node |>
    html_table(header = TRUE) |>
    clean_names() |>            # snake_case column names via janitor
    mutate(year = year, .before = 1)
}

Example Usage

Single page + year

scoring_offense <- scrape_cfbstats(
  path = "/leader/national/team/offense/split01/category09/sort01.html",
  year = 2025
)

head(scoring_offense)
# A tibble: 6 × 11
   year     x name            g    td    fg  x1xp  x2xp safety points points_g
  <dbl> <int> <chr>       <int> <int> <int> <int> <int>  <int>  <int>    <dbl>
1  2025     1 North Texas    14    85    12    77     4      0    631     45.1
2  2025     2 Notre Dame     12    70     5    63     2      1    504     42  
3  2025     3 Indiana        16    87    19    87     0      0    666     41.6
4  2025     4 Utah           13    72    11    68     1      1    537     41.3
5  2025     5 USF            13    67    18    62     2      2    526     40.5
6  2025     6 Tennessee      13    68    14    67     0      0    517     39.8

Loop over multiple years

rushing_defense_multi <- lapply(2022:2025, function(yr) {
  Sys.sleep(0.5)  # be polite to the server
  scrape_cfbstats(
    path = "/leader/national/team/defense/split01/category01/sort01.html",
    year = yr
  )
}) |>
  bind_rows()

glimpse(rushing_defense_multi)
Rows: 534
Columns: 10
$ year    <int> 2022, 2022, 2022, 2022, 2022, 2022, 2022, 2022, 2022, 2022, 20…
$ x       <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,…
$ name    <chr> "Georgia", "James Madison", "Navy", "Marshall", "South Alabama…
$ g       <int> 15, 11, 12, 13, 13, 13, 14, 13, 13, 13, 13, 13, 14, 12, 13, 13…
$ att     <int> 401, 356, 355, 419, 367, 366, 421, 384, 443, 395, 390, 465, 44…
$ yards   <int> 1155, 882, 1067, 1209, 1221, 1239, 1371, 1274, 1290, 1297, 130…
$ avg     <dbl> 2.88, 2.48, 3.01, 2.89, 3.33, 3.39, 3.26, 3.32, 2.91, 3.28, 3.…
$ td      <int> 7, 8, 7, 10, 8, 10, 10, 13, 14, 6, 13, 4, 12, 8, 17, 10, 14, 1…
$ att_g   <dbl> 26.73, 32.36, 29.58, 32.23, 28.23, 28.15, 30.07, 29.54, 34.08,…
$ yards_g <dbl> 77.00, 80.18, 88.92, 93.00, 93.92, 95.31, 97.93, 98.00, 99.23,…

Player stats example

passing_leaders <- scrape_cfbstats(
  path = "/leader/national/player/split01/category02/sort01.html",
  year = 2025
)

head(passing_leaders)
# A tibble: 6 × 17
   year     x name     team  yr    pos       g   att  comp   pct yards yards_att
  <dbl> <int> <chr>    <chr> <chr> <chr> <int> <int> <int> <dbl> <int>     <dbl>
1  2025     1 Drew Me… N Te… FR    QB       14   463   319  68.9  4379       9.5
2  2025     2 Sawyer … Bayl… SR    QB       12   504   304  60.3  3681       7.3
3  2025     3 Caden V… FAU   JR    QB       12   515   345  67    3641       7.1
4  2025     4 Josh Ho… TCU   JR    QB       12   413   272  65.9  3472       8.4
5  2025     5 Joe Fag… UConn SR    QB       12   413   285  69    3448       8.3
6  2025     6 Jayden … USC   JR    QB       13   403   265  65.8  3711       9.2
# ℹ 5 more variables: td <int>, int <int>, rating <dbl>, att_g <dbl>,
#   yards_g <dbl>

Try another

I tried to use copilot to complete but it didn’t get the url right, but I fixed it manually.

total_offense <- scrape_cfbstats(
  path = "leader/national/team/offense/split01/category10/sort01.html",
  year = 2025
)

# show the result
head(total_offense)
# A tibble: 6 × 10
   year     x name          g rush_yards pass_yards plays total_yards yards_play
  <dbl> <int> <chr>     <int>      <int>      <int> <int>       <int>      <dbl>
1  2025     1 North Te…    14       2722       4452   992        7174       7.23
2  2025     2 Ole Miss     15       2645       4700  1099        7345       6.68
3  2025     3 USF          13       2757       3596   911        6353       6.97
4  2025     4 Utah         13       3462       2816   948        6278       6.62
5  2025     5 Texas St…    13       2887       3260   920        6147       6.68
6  2025     6 Florida …    12       2624       3041   845        5665       6.7 
# ℹ 1 more variable: yards_g <dbl>