Skip to content

Commit bf26890

Browse files
bschilderclaude
andcommitted
Standardize rworkflows CI, update code, and add new functions
- Replace old rworkflows.yml with canonical template (docker on ghcr.io, GITHUB_TOKEN, write-all permissions, devel/RELEASE branches) - Add github_dependents_scrape.R and sourcegraph_code.R - Remove deprecated get_github_url.R - Refactor github_dependents, update github_files_download - Sync documentation with devtools::document() Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent a4e4b26 commit bf26890

15 files changed

Lines changed: 158 additions & 101 deletions

.github/workflows/rworkflows.yml

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,17 @@ name: rworkflows
44
branches:
55
- master
66
- main
7+
- devel
8+
- RELEASE_**
79
pull_request:
810
branches:
911
- master
1012
- main
13+
- devel
14+
- RELEASE_**
1115
jobs:
1216
rworkflows:
17+
permissions: write-all
1318
runs-on: ${{ matrix.config.os }}
1419
name: ${{ matrix.config.os }} (${{ matrix.config.r }})
1520
container: ${{ matrix.config.cont }}
@@ -18,30 +23,32 @@ jobs:
1823
matrix:
1924
config:
2025
- os: ubuntu-latest
21-
r: devel
2226
bioc: devel
23-
cont: bioconductor/bioconductor_docker:devel
24-
rspm: https://packagemanager.rstudio.com/cran/__linux__/focal/release
27+
r: auto
28+
cont: ghcr.io/bioconductor/bioconductor_docker:devel
29+
rspm: ~
2530
- os: macOS-latest
26-
r: latest
2731
bioc: release
32+
r: auto
33+
cont: ~
34+
rspm: ~
2835
- os: windows-latest
29-
r: latest
3036
bioc: release
37+
r: auto
38+
cont: ~
39+
rspm: ~
3140
steps:
3241
- uses: neurogenomics/rworkflows@master
3342
with:
3443
run_bioccheck: ${{ false }}
35-
run_crancheck: ${{ true }}
44+
run_rcmdcheck: ${{ true }}
45+
as_cran: ${{ true }}
3646
run_vignettes: ${{ true }}
3747
has_testthat: ${{ true }}
3848
run_covr: ${{ true }}
3949
run_pkgdown: ${{ true }}
4050
has_runit: ${{ false }}
41-
GITHUB_TOKEN: ${{ secrets.PAT_GITHUB }}
42-
run_docker: ${{ false }}
43-
DOCKER_USERNAME: bschilder
44-
DOCKER_ORG: bschilder
45-
DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
46-
runner_os: ${{ runner.os }}
47-
cache_version: cache-v1
51+
has_latex: ${{ false }}
52+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
53+
run_docker: ${{ true }}
54+
docker_registry: ghcr.io

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ Suggests:
6161
Remotes:
6262
github::neurogenomics/cranlogs,
6363
github::neurogenomics/rworkflows
64-
RoxygenNote: 7.3.2
64+
RoxygenNote: 7.3.3
6565
VignetteBuilder: knitr
6666
License: GPL-3
6767
Config/testthat/edition: 3

NEWS.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44

55
* `github_branches`
66
- Handle repos with more than 100 branches.
7-
7+
* `github_files_download`
8+
- Can now download files in private repos.
89

910
# echogithub 0.99.2
1011

R/description_extract_i.R

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ description_extract_i <- function(desc_file = NULL,
99
verbose = TRUE){
1010
#### Find or read DESCRIPTION file ####
1111

12+
get_github_url_desc <- utils::getFromNamespace("get_github_url_desc",
13+
ns = "rworkflows")
14+
1215
if(is.null(desc_file)) {
1316
messager("desc_file is required for description_extract.",
1417
"Returning NULL.",v=verbose)
@@ -45,10 +48,10 @@ description_extract_i <- function(desc_file = NULL,
4548
} else if(desc_file$has_fields(f)){
4649
return(desc_file$get_field(f))
4750
} else if(f=="github_url"){
48-
gh_url <- get_github_url(desc_file = desc_file)
51+
gh_url <- get_github_url_desc(desc_file = desc_file)
4952
return(gh_url)
5053
} else if(f=="owner"){
51-
gh_url <- get_github_url(desc_file = desc_file)
54+
gh_url <- get_github_url_desc(desc_file = desc_file)
5255
if(is.null(gh_url)) {
5356
return(NULL)
5457
} else {
@@ -57,7 +60,7 @@ description_extract_i <- function(desc_file = NULL,
5760
)
5861
}
5962
} else if(f=="repo"){
60-
gh_url <- get_github_url(desc_file = desc_file)
63+
gh_url <- get_github_url_desc(desc_file = desc_file)
6164
if(is.null(gh_url)) {
6265
return(NULL)
6366
} else {

R/get_github_url.R

Lines changed: 0 additions & 28 deletions
This file was deleted.

R/github_code.R

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,22 @@
1414
#' @examples
1515
#' \dontrun{
1616
#' ## easily exceeds API limit
17-
#' repos <- github_code(query="Package path:DESCRIPTION", .limit=5)
17+
#' repos <- github_code(query="Package: path:DESCRIPTION", .limit=5)
1818
#' }
1919
github_code <- function(query,
2020
token = gh::gh_token(),
2121
.limit = Inf,
2222
verbose = TRUE){
23+
# devoptera::args2vars(github_code)
2324
owner_repo <- repo <- NULL;
2425

25-
endpoint <- "https://api.github.com/search/code"
26+
endpoint <- "https://api.github.com/search/code"
2627
res <- gh::gh(endpoint,
2728
.token = token,
2829
.limit = .limit,
2930
q = query,
3031
#### only beta version supports full-on regex ####
31-
# q = "/(?-i)Package/ path:/(?-i)^DESCRIPTION$/",
32+
# q = "/(?-i)Package:/ path:/(?-i)^DESCRIPTION$/",
3233
per_page = 100)
3334
dt <- gh_to_dt(gh_response = res$items,
3435
verbose = verbose)

R/github_dependents.R

Lines changed: 13 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -28,47 +28,22 @@ github_dependents <- function(owner,
2828
max_pages = 1000,
2929
verbose = TRUE) {
3030

31-
#devoptera::args2vars(github_insights)
31+
# devoptera::args2vars(github_dependents)
3232

33-
requireNamespace("rvest")
34-
owner_repo <- NULL;
3533
messager("Searching for dependents of:",paste(owner,repo,sep="/"),
3634
v=verbose)
37-
url <- paste0("https://github.com/", owner,"/",repo, "/network/dependents")
38-
#### Loop over the specified number of pages ####
39-
all_dat <- list()
40-
for (i in seq_len(max_pages)) {
41-
# Print a message indicating the URL being scraped
42-
messager(paste0("+ Scraping page ",i,"."),v=verbose)
43-
# Retrieve the HTML content of the page
44-
page <- rvest::read_html(url)
45-
box_rows <- rvest::html_elements(page,".Box-row")
46-
dt <- (rvest::html_text2(box_rows)) |>
47-
stringr::str_split(" / |\n|[ ]", simplify = TRUE) |>
48-
data.table::data.table() |>
49-
`colnames<-`(c("owner","repo","stargazers_count","forks_count"))
50-
dt[,owner_repo:=paste(owner,repo,sep="/")]
51-
dt <- cbind(target=paste(owner,repo,sep="/"),dt)
52-
all_dat[[i]] <- dt
53-
#### Find the button for the next page ####
54-
buttons <- page |> rvest::html_nodes(".paginate-container .btn")
55-
next_buttons <- buttons[rvest::html_text(buttons)=="Next"]
56-
#### Check if the button is disabled ####
57-
is_disabled <- any(sapply(next_buttons, function(btn) {
58-
btn_attr <- rvest::html_attr(btn, "disabled")
59-
!is.na(btn_attr) && btn_attr == "disabled"
60-
}))
61-
#### If the button isn't disable, update the URL to scrape ####
62-
if (isFALSE(is_disabled)) {
63-
url <- next_buttons |> rvest::html_attr("href")
64-
#### Otherwise, break the loop ####
65-
} else {
66-
break
67-
}
68-
}
69-
#### Bind data from all pages ####
70-
all_dat <- data.table::rbindlist(all_dat,
71-
use.names = TRUE, idcol = "page")
35+
36+
#### Method 1: JSON file ####
37+
# URL <- paste0("https://github.com/", owner,"/",repo, "/dependency-graph/sbom")
38+
# j <- jsonlite::fromJSON("~/Downloads/rworkflows_neurogenomics_b017b7a1aeda0026dda330b01cb798ddb5f1d264.json")
39+
# j$packages
40+
41+
#### Method 2: Webscraping ####
42+
all_dat <- github_dependents_scrape(owner = owner,
43+
repo = repo,
44+
token = token,
45+
max_pages = max_pages,
46+
verbose = verbose)
7247
#### Report ####
7348
messager("Found",formatC(nrow(all_dat),big.mark = ","),
7449
"dependents.",v=verbose)

R/github_dependents_scrape.R

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
github_dependents_scrape <- function(owner,
2+
repo,
3+
token = gh::gh_token(),
4+
max_pages = 1000,
5+
verbose = TRUE){
6+
7+
requireNamespace("rvest")
8+
owner_repo <- NULL;
9+
url <- paste0("https://github.com/", owner,"/",repo, "/network/dependents")
10+
#### Loop over the specified number of pages ####
11+
all_dat <- list()
12+
for (i in seq_len(max_pages)) {
13+
# Print a message indicating the URL being scraped
14+
messager(paste0("+ Scraping page ",i,"."),v=verbose)
15+
# Retrieve the HTML content of the page
16+
page <- rvest::read_html(url)
17+
box_rows <- rvest::html_elements(page,".Box-row")
18+
dt <- (rvest::html_text2(box_rows)) |>
19+
stringr::str_split(" / |\n|[ ]", simplify = TRUE) |>
20+
data.table::data.table() |>
21+
`colnames<-`(c("owner","repo","stargazers_count","forks_count"))
22+
dt[,owner_repo:=paste(owner,repo,sep="/")]
23+
dt <- cbind(target=paste(owner,repo,sep="/"),dt)
24+
all_dat[[i]] <- dt
25+
#### Find the button for the next page ####
26+
buttons <- page |> rvest::html_nodes(".paginate-container .btn")
27+
next_buttons <- buttons[rvest::html_text(buttons)=="Next"]
28+
#### Check if the button is disabled ####
29+
is_disabled <- any(sapply(next_buttons, function(btn) {
30+
btn_attr <- rvest::html_attr(btn, "disabled")
31+
!is.na(btn_attr) && btn_attr == "disabled"
32+
}))
33+
#### If the button isn't disable, update the URL to scrape ####
34+
if (isFALSE(is_disabled)) {
35+
url <- next_buttons |> rvest::html_attr("href")
36+
#### Otherwise, break the loop ####
37+
} else {
38+
break
39+
}
40+
}
41+
### Bind data from all pages ####
42+
all_dat <- data.table::rbindlist(all_dat,
43+
use.names = TRUE, idcol = "page")
44+
return(all_dat)
45+
}

R/github_files.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,9 @@ github_files <- function(owner,
8686
if(is.null(dt)) return(NULL)
8787
#### Add download link ####
8888
dt[,link_raw:=paste(
89-
"https://github.com", owner, repo, "raw",
90-
branch, path, sep="/"
91-
)]
89+
"https://raw.githubusercontent.com",
90+
owner,repo,branch,path, sep="/")
91+
]
9292
#### Unlist cols ####
9393
unlist_dt(dt = dt,
9494
exclude = "size",

R/github_files_download.R

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,15 @@
1515
#' query = ".md$")
1616
#' filelist_local <- github_files_download(filelist = dt$link_raw)
1717
github_files_download <- function(filelist,
18+
token = gh::gh_token(),
1819
download_dir = tempdir(),
1920
overwrite = FALSE,
2021
timeout = 5*60,
2122
nThread = 1,
2223
verbose = TRUE) {
24+
# devoptera::args2vars(github_files_download)
2325

26+
options(timeout = timeout)
2427
messager("+ Downloading", length(filelist), "files.", v = verbose)
2528
local_files <- parallel::mclapply(stats::setNames(filelist,
2629
filelist),
@@ -40,9 +43,21 @@ github_files_download <- function(filelist,
4043
)
4144
if (!file.exists(destfile) &
4245
isFALSE(overwrite)) {
43-
messager(paste("Downloading:", x),v=verbose)
44-
options(timeout = timeout)
45-
utils::download.file(url = x,
46+
messager("Downloading:",x,v=verbose)
47+
#### Add token to header ####
48+
extra <- getOption("download.file.extra")
49+
if(!is.null(token)) {
50+
extra <- c(extra, "--fail", "-L")
51+
headers <- c(Authorization = paste("token", token))
52+
qh <- shQuote(paste0(names(headers), ": ", headers))
53+
extra <- c(extra, paste("-H", qh))
54+
}
55+
#### Download ####
56+
utils::download.file(url = x,
57+
method = "curl",
58+
quiet = verbose<2,
59+
mode = "wb",
60+
extra = extra,
4661
destfile = destfile)
4762
} else {
4863
messager("Returning pre-existing file:",x,v=verbose)

0 commit comments

Comments
 (0)