library(xml2)
library(rvest)
<- '~/Downloads/IPEDS/' # Location to put the downloaded files
out_dir
<- 2023:1980
years
<- 'https://nces.ed.gov/ipeds/datacenter/'
ipeds_base <- 'https://nces.ed.gov/ipeds/datacenter/DataFiles.aspx?year='
ipeds_url
<- c() # Save any links that could not be downloaded.
error_links for(year in years) {
cat(paste0('Downloading year ', year, '...\n'))
dir.create(paste0(out_dir, '/', year), showWarnings = FALSE, recursive = TRUE)
<- read_html(paste0(ipeds_url, year))
page <- page |> html_nodes("table") |> html_table(convert = FALSE)
tables # Guessing the one with the most rows is the one we want to keep as the index
<- lapply(tables, nrow) |> unlist() |> which.max()
tab_index write.csv(tables[[tab_index]],
file = paste0(out_dir, year, '/_TOC_', year, '.csv'),
row.names = FALSE)
<- html_attr(html_nodes(page, "a"), "href")
links <- links[grep("*.zip", links)]
zip_files for(i in zip_files) {
<- paste0(out_dir, '/', year, '/', basename(i))
dest if(!file.exists(dest)) {
cat(paste0('Downloading ', basename(i), '...\n'))
tryCatch({
download.file(url = paste0(ipeds_base, i), dest = dest)
error = function(e) {
}, <<- c(error_links, paste0(ipeds_base, i))
error_links print(e)
})
}
}
}# Print any links that could not download error_links
ipeds
r-package
An R package to interface with the Integrated Postsecondary Education Data System.
Github: https://github.com/jbryer/ipeds
The following R script will download all available IPEDS data files.