@@ -28,47 +28,22 @@ github_dependents <- function(owner,
2828 max_pages = 1000 ,
2929 verbose = TRUE ) {
3030
31- # devoptera::args2vars(github_insights )
31+ # devoptera::args2vars(github_dependents )
3232
33- requireNamespace(" rvest" )
34- owner_repo <- NULL ;
3533 messager(" Searching for dependents of:" ,paste(owner ,repo ,sep = " /" ),
3634 v = verbose )
37- url <- paste0(" https://github.com/" , owner ," /" ,repo , " /network/dependents" )
38- # ### Loop over the specified number of pages ####
39- all_dat <- list ()
40- for (i in seq_len(max_pages )) {
41- # Print a message indicating the URL being scraped
42- messager(paste0(" + Scraping page " ,i ," ." ),v = verbose )
43- # Retrieve the HTML content of the page
44- page <- rvest :: read_html(url )
45- box_rows <- rvest :: html_elements(page ," .Box-row" )
46- dt <- (rvest :: html_text2(box_rows )) | >
47- stringr :: str_split(" / |\n |[ ]" , simplify = TRUE ) | >
48- data.table :: data.table() | >
49- `colnames<-`(c(" owner" ," repo" ," stargazers_count" ," forks_count" ))
50- dt [,owner_repo : = paste(owner ,repo ,sep = " /" )]
51- dt <- cbind(target = paste(owner ,repo ,sep = " /" ),dt )
52- all_dat [[i ]] <- dt
53- # ### Find the button for the next page ####
54- buttons <- page | > rvest :: html_nodes(" .paginate-container .btn" )
55- next_buttons <- buttons [rvest :: html_text(buttons )== " Next" ]
56- # ### Check if the button is disabled ####
57- is_disabled <- any(sapply(next_buttons , function (btn ) {
58- btn_attr <- rvest :: html_attr(btn , " disabled" )
59- ! is.na(btn_attr ) && btn_attr == " disabled"
60- }))
61- # ### If the button isn't disable, update the URL to scrape ####
62- if (isFALSE(is_disabled )) {
63- url <- next_buttons | > rvest :: html_attr(" href" )
64- # ### Otherwise, break the loop ####
65- } else {
66- break
67- }
68- }
69- # ### Bind data from all pages ####
70- all_dat <- data.table :: rbindlist(all_dat ,
71- use.names = TRUE , idcol = " page" )
35+
36+ # ### Method 1: JSON file ####
37+ # URL <- paste0("https://github.com/", owner,"/",repo, "/dependency-graph/sbom")
38+ # j <- jsonlite::fromJSON("~/Downloads/rworkflows_neurogenomics_b017b7a1aeda0026dda330b01cb798ddb5f1d264.json")
39+ # j$packages
40+
41+ # ### Method 2: Webscraping ####
42+ all_dat <- github_dependents_scrape(owner = owner ,
43+ repo = repo ,
44+ token = token ,
45+ max_pages = max_pages ,
46+ verbose = verbose )
7247 # ### Report ####
7348 messager(" Found" ,formatC(nrow(all_dat ),big.mark = " ," ),
7449 " dependents." ,v = verbose )
0 commit comments