# install.packages("devtools")
# devtools::install_github("JaseZiv/worldfootballR")
FBRef team stats
Exploring the worldfootballR package where you can get soccer stats from FBref.com and more.
Here we are trying to get goals by team for a season, which are in the standard season ending team stats.
Install the packages
If you don’t already have the devtools and worldfootballR packages, uncomment the code below to install them. HOWEVER, recomment after you’ve done so. You only have to install the packages ONE TIME.
Setup
library(tidyverse)
library(janitor)
library(worldfootballR)
Basic use
An attempt to learn how to do certain things.
This gets the URL for specific leagues and years.
The function is described here: fb_league_urls()
fb_league_urls(country = "ENG", gender = "M", season_end_year = 2021, tier = '1st')
[1] "https://fbref.com/en/comps/9/2020-2021/2020-2021-Premier-League-Stats"
Which then lets you get team urls:
This function is described here. fb_teams_urls()
fb_teams_urls("https://fbref.com/en/comps/9/Premier-League-Stats")
[1] "https://fbref.com/en/squads/b8fd03ef/Manchester-City-Stats"
[2] "https://fbref.com/en/squads/822bd0ba/Liverpool-Stats"
[3] "https://fbref.com/en/squads/18bb7c10/Arsenal-Stats"
[4] "https://fbref.com/en/squads/8602292d/Aston-Villa-Stats"
[5] "https://fbref.com/en/squads/cff3d9bb/Chelsea-Stats"
[6] "https://fbref.com/en/squads/d07537b9/Brighton-and-Hove-Albion-Stats"
[7] "https://fbref.com/en/squads/e4a775cb/Nottingham-Forest-Stats"
[8] "https://fbref.com/en/squads/361ca564/Tottenham-Hotspur-Stats"
[9] "https://fbref.com/en/squads/cd051869/Brentford-Stats"
[10] "https://fbref.com/en/squads/fd962109/Fulham-Stats"
[11] "https://fbref.com/en/squads/4ba7cbea/Bournemouth-Stats"
[12] "https://fbref.com/en/squads/b2b47a98/Newcastle-United-Stats"
[13] "https://fbref.com/en/squads/7c21e445/West-Ham-United-Stats"
[14] "https://fbref.com/en/squads/19538871/Manchester-United-Stats"
[15] "https://fbref.com/en/squads/a2d435b3/Leicester-City-Stats"
[16] "https://fbref.com/en/squads/d3fd31cc/Everton-Stats"
[17] "https://fbref.com/en/squads/47c64c55/Crystal-Palace-Stats"
[18] "https://fbref.com/en/squads/b74092de/Ipswich-Town-Stats"
[19] "https://fbref.com/en/squads/8cec06e1/Wolverhampton-Wanderers-Stats"
[20] "https://fbref.com/en/squads/33c895d4/Southampton-Stats"
Looking for goals
A goal for a student was to find how many goal teams scored throughout seasons to see if there are generally more.
Big 5 Leagues
Exploring functions that will get goals by team each season.
This gets “standard” team stats from all big 5 European leagues using fb_big5_advanced_season_stats()
<- fb_big5_advanced_season_stats(
big5_stand_21 season_end_year=2021,
stat_type="standard",
team_or_player="team",
time_pause = 3) |>
clean_names()
|> glimpse() big5_stand_21
Rows: 196
Columns: 36
$ season_end_year <int> 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021, 20…
$ squad <chr> "Alavés", "Alavés", "Angers", "Angers", "Arminia",…
$ comp <chr> "La Liga", "La Liga", "Ligue 1", "Ligue 1", "Bunde…
$ team_or_opponent <chr> "team", "opponent", "team", "opponent", "team", "o…
$ num_players <dbl> 30, 30, 31, 31, 26, 26, 29, 29, 24, 24, 30, 30, 27…
$ age <dbl> 28.7, 27.5, 27.9, 26.0, 26.0, 25.9, 25.9, 26.7, 25…
$ poss <dbl> 42.8, 57.3, 46.4, 53.8, 42.3, 57.8, 53.5, 46.2, 48…
$ mp_playing <dbl> 38, 38, 38, 38, 34, 34, 38, 38, 38, 38, 38, 38, 38…
$ starts_playing <dbl> 418, 418, 418, 418, 374, 374, 418, 418, 418, 418, …
$ min_playing <dbl> 3420, 3420, 3420, 3420, 3060, 3060, 3420, 3420, 34…
$ mins_per_90_playing <dbl> 38, 38, 38, 38, 34, 34, 38, 38, 38, 38, 38, 38, 38…
$ gls <dbl> 35, 57, 40, 57, 23, 51, 53, 35, 52, 45, 90, 46, 43…
$ ast <dbl> 21, 44, 23, 41, 16, 36, 38, 25, 38, 30, 65, 36, 31…
$ g_a <dbl> 56, 101, 63, 98, 39, 87, 91, 60, 90, 75, 155, 82, …
$ g_minus_pk <dbl> 30, 54, 35, 50, 22, 46, 47, 33, 47, 40, 84, 41, 39…
$ pk <dbl> 5, 3, 5, 7, 1, 5, 6, 2, 5, 5, 6, 5, 4, 5, 5, 1, 2,…
$ p_katt <dbl> 8, 3, 6, 8, 2, 6, 6, 3, 6, 6, 7, 8, 5, 6, 7, 4, 5,…
$ crd_y <dbl> 96, 80, 65, 73, 52, 63, 49, 74, 71, 73, 67, 87, 82…
$ crd_r <dbl> 8, 3, 2, 3, 1, 2, 5, 2, 4, 7, 3, 4, 3, 5, 0, 3, 4,…
$ x_g_expected <dbl> 42.6, 49.9, 41.6, 52.3, 32.9, 56.8, 51.7, 43.0, 52…
$ npx_g_expected <dbl> 36.4, 47.5, 36.9, 46.6, 31.3, 52.1, 47.0, 40.6, 47…
$ x_ag_expected <dbl> 27.4, 37.1, 26.4, 36.8, 23.9, 40.6, 35.1, 31.8, 37…
$ npx_g_x_ag_expected <dbl> 63.7, 84.7, 63.3, 83.4, 55.2, 92.7, 82.1, 72.4, 85…
$ prg_c_progression <dbl> 404, 631, 614, 731, 358, 597, 758, 604, 629, 726, …
$ prg_p_progression <dbl> 938, 1477, 1435, 1639, 880, 1706, 1760, 1175, 1420…
$ gls_per <dbl> 0.92, 1.50, 1.05, 1.50, 0.68, 1.50, 1.39, 0.92, 1.…
$ ast_per <dbl> 0.55, 1.16, 0.61, 1.08, 0.47, 1.06, 1.00, 0.66, 1.…
$ g_a_per <dbl> 1.47, 2.66, 1.66, 2.58, 1.15, 2.56, 2.39, 1.58, 2.…
$ g_minus_pk_per <dbl> 0.79, 1.42, 0.92, 1.32, 0.65, 1.35, 1.24, 0.87, 1.…
$ g_a_minus_pk_per <dbl> 1.34, 2.58, 1.53, 2.39, 1.12, 2.41, 2.24, 1.53, 2.…
$ x_g_per <dbl> 1.12, 1.31, 1.10, 1.38, 0.97, 1.67, 1.36, 1.13, 1.…
$ x_ag_per <dbl> 0.72, 0.98, 0.70, 0.97, 0.70, 1.19, 0.92, 0.84, 0.…
$ x_g_x_ag_per <dbl> 1.84, 2.29, 1.79, 2.34, 1.67, 2.86, 2.29, 1.97, 2.…
$ npx_g_per <dbl> 0.96, 1.25, 0.97, 1.23, 0.92, 1.53, 1.24, 1.07, 1.…
$ npx_g_x_ag_per <dbl> 1.68, 2.23, 1.67, 2.19, 1.62, 2.73, 2.16, 1.91, 2.…
$ url <chr> "https://fbref.com/en/squads/8d6fd021/2020-2021/Al…
|> write_rds("data-raw/fbref-team-stats/big5_stand_2021.rds") big5_stand_21
My suggestion would be to save the above table as a .rds file to a computer so you don’t have to hit the website everytime you work with it.
That big5 could then then be filtered to a specific league.
|> filter(comp == "Premier League") |> filter(team_or_opponent == "team") big5_stand_21
Functionalize this
There idea is I could take what is above and turn it into a function and swap out the years.
# you could update the start, end year here
# I would not do more than maybe 5 years at a time
<- 2020:2022
yrs
<- function(yr) {
scrape_big5 fb_big5_advanced_season_stats(
season_end_year=yr,
stat_type="standard",
team_or_player="team",
time_pause = 3) |>
clean_names()
}
for (i in yrs) {
<- scrape_big5(i)
big5_stand |> write_rds(paste0("data-raw/fbref-team-stats/big5_stand_", i, ".rds"))
big5_stand }
Specific league
This is same as big_5 but for specific leagues. Here we get Premier League. using fb_season_team_stats().
fb_season_team_stats("ENG", "M", 2021, "1st", "standard", time_pause = 3)
Again, this could be turned into a function and sent through a range of years if you just wanted one league. Just like the one above.
Goal logs
Interesting information about goals. This function is described here: fb_team_goal_logs(). What I don’t know is how to get different years.
<- c("https://fbref.com/en/squads/b8fd03ef/Manchester-City-Stats", "https://fbref.com/en/squads/822bd0ba/Liverpool-Stats")
team_urls
fb_team_goal_logs(team_urls = team_urls, for_or_against = "for", time_pause = 3)