From 61d8ca7bb072fba1a3547c5b90a46367357414b4 Mon Sep 17 00:00:00 2001 From: Vlad Pronsky Date: Mon, 21 Oct 2024 20:28:33 +0300 Subject: [PATCH] feat: private repos support #17 --- Cargo.lock | 8 +-- Cargo.toml | 4 +- readme.md | 4 +- src/gh_client.rs | 5 +- src/helpers.rs | 157 ++++++++++++++++++++++++----------------------- src/main.rs | 50 +++------------ src/state.rs | 52 ++++++++++++++++ 7 files changed, 152 insertions(+), 128 deletions(-) create mode 100644 src/state.rs diff --git a/Cargo.lock b/Cargo.lock index 21cf938..d12e770 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -52,9 +52,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.89" +version = "1.0.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6" +checksum = "37bf3594c4c988a53154954629820791dde498571819ae4ca50ca811e060cc95" [[package]] name = "async-trait" @@ -1424,9 +1424,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.128" +version = "1.0.132" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" +checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" dependencies = [ "itoa", "memchr", diff --git a/Cargo.toml b/Cargo.toml index 7a537bb..e08abc6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,14 +4,14 @@ version = "0.5.0" edition = "2021" [dependencies] -anyhow = "1.0.89" +anyhow = "1.0.90" axum = "0.7.7" chrono = { version = "0.4.38", features = ["serde"] } dotenvy = "0.15.7" maud = { version = "0.26.0", features = ["axum"] } reqwest = { version = "0.12.8", features = ["json", "rustls-tls"], default-features = false } serde = { version = "1.0.210", features = ["serde_derive"] } -serde_json = "1.0.128" +serde_json = "1.0.132" serde_variant = "0.1.3" sqlx = { version = "0.8.2", features = ["runtime-tokio", "sqlite"] } thousands = "0.2.0" diff --git a/readme.md b/readme.md index 78de01c..b9d396f 100644 --- a/readme.md +++ b/readme.md @@ -51,7 +51,9 @@ services: 2. Generate new token > Generate new token (classic) 3. Enter name, e.g.: `ghstats`. Scopes: `public_repo` 4. Click genereate token & copy it -5. Save token to `.env` file with name `GITHUB_TOKEN=???` +5. Save token to `.env` file with name `GITHUB_TOKEN=ghp_XXX` + +Note: If you want to access private repos too, choose full `repo` scope and set `GHS_INCLUDE_PRIVATE=true` to env. ## How it works? diff --git a/src/gh_client.rs b/src/gh_client.rs index 58991b1..dfb8c21 100644 --- a/src/gh_client.rs +++ b/src/gh_client.rs @@ -128,8 +128,9 @@ impl GhClient { } // https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-repositories-for-the-authenticated-user - pub async fn get_repos(&self) -> Res> { - let url = format!("{}/user/repos?visibility=public", self.base_url); + pub async fn get_repos(&self, include_private: bool) -> Res> { + let visibility = if include_private { "all" } else { "public" }; + let url = format!("{}/user/repos?visibility={}", self.base_url, visibility); let req = self.client.get(url); let dat: Vec = self.with_pagination(req).await?; Ok(dat) diff --git a/src/helpers.rs b/src/helpers.rs index 3d811cf..f7cac51 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -1,10 +1,11 @@ -use std::collections::HashMap; +use std::{collections::HashMap, sync::Arc}; use axum::extract::Request; use crate::{ db_client::DbClient, gh_client::{GhClient, Repo}, + state::AppState, types::Res, }; @@ -24,22 +25,22 @@ async fn check_hidden_repos(db: &DbClient, repos: &Vec) -> Res { Ok(()) } -pub async fn update_metrics(db: &DbClient, gh: &GhClient, filter: &GhsFilter) -> Res { +pub async fn update_metrics(state: Arc) -> Res { let stime = std::time::Instant::now(); let date = chrono::Utc::now().to_utc().to_rfc3339(); let date = date.split("T").next().unwrap().to_owned() + "T00:00:00Z"; - let repos = gh.get_repos().await?; - let _ = check_hidden_repos(db, &repos).await?; + let repos = state.gh.get_repos(state.include_private).await?; + let _ = check_hidden_repos(&state.db, &repos).await?; let repos = repos // - .into_iter() - .filter(|r| filter.is_included(&r.full_name, r.fork, r.archived)) + .iter() + .filter(|r| state.filter.is_included(&r.full_name, r.fork, r.archived)) .collect::>(); for repo in &repos { - match update_repo_metrics(db, gh, &repo, &date).await { + match update_repo_metrics(&state.db, &state.gh, &repo, &date).await { Err(e) => { tracing::warn!("failed to update metrics for {}: {:?}", repo.full_name, e); continue; @@ -50,8 +51,8 @@ pub async fn update_metrics(db: &DbClient, gh: &GhClient, filter: &GhsFilter) -> } tracing::info!("update_metrics took {:?} for {} repos", stime.elapsed(), repos.len()); - db.update_deltas().await?; - sync_stars(db, gh).await?; + state.db.update_deltas().await?; + sync_stars(&state.db, &state.gh).await?; Ok(()) } @@ -137,6 +138,7 @@ pub async fn sync_stars(db: &DbClient, gh: &GhClient) -> Res { Ok(()) } +#[derive(Debug)] pub struct GhsFilter { pub include_repos: Vec, pub exclude_repos: Vec, @@ -148,8 +150,8 @@ pub struct GhsFilter { impl GhsFilter { pub fn new(rules: &str) -> Self { let mut default_all = false; - let mut exclude_forks = true; - let mut exclude_archs = true; + let mut exclude_forks = false; + let mut exclude_archs = false; let mut include_repos: Vec<&str> = Vec::new(); let mut exclude_repos: Vec<&str> = Vec::new(); @@ -216,7 +218,7 @@ impl GhsFilter { } // skip wildcards for forks / archived - if is_fork || is_arch { + if (self.exclude_forks && is_fork) || (self.exclude_archs && is_arch) { continue; } @@ -246,66 +248,65 @@ mod tests { use super::*; #[test] - fn test_included_with_empty_env() { + fn test_empty_fitler() { let r = &GhsFilter::new(""); + assert!(r.is_included("foo/bar", false, false)); assert!(r.is_included("foo/baz", false, false)); assert!(r.is_included("abc/123", false, false)); assert!(r.is_included("abc/xyz-123", false, false)); - // negative tests – non repo patterns + + // exclude invalid names assert!(!r.is_included("foo/", false, false)); assert!(!r.is_included("/bar", false, false)); assert!(!r.is_included("foo", false, false)); assert!(!r.is_included("foo/bar/baz", false, false)); + + // include forks / archived + assert!(r.is_included("foo/bar", true, false)); + assert!(r.is_included("foo/bar", false, true)); + assert!(r.is_included("foo/bar", true, true)); } #[test] - fn test_included_with_env() { + fn test_filter_names() { let r = &GhsFilter::new("foo/*,abc/xyz"); + assert!(r.is_included("foo/bar", false, false)); - assert!(r.is_included("foo/abc", false, false)); - assert!(r.is_included("foo/abc-123", false, false)); + assert!(r.is_included("foo/123", false, false)); assert!(r.is_included("abc/xyz", false, false)); - assert!(!r.is_included("abc/123", false, false)); - assert!(!r.is_included("foo/bar/baz", false, false)); - // check case sensitivity - assert!(r.is_included("FOO/BAR", false, false)); - assert!(r.is_included("Foo/Bar", false, false)); + assert!(!r.is_included("foo/bar/baz", false, false)); + assert!(!r.is_included("abc/123", false, false)); - let r = &GhsFilter::new("FOO/*,Abc/XYZ"); - assert!(r.is_included("foo/bar", false, false)); - assert!(r.is_included("foo/abc", false, false)); - assert!(r.is_included("foo/abc-123", false, false)); - assert!(r.is_included("abc/xyz", false, false)); + // include forks / archived + assert!(r.is_included("foo/bar", true, false)); + assert!(r.is_included("foo/bar", false, true)); + // exact org/user match let r = &GhsFilter::new("foo/*"); assert!(!r.is_included("fooo/bar", false, false)); } #[test] - fn test_include_with_exclude_rule() { - let r = &GhsFilter::new("foo/*,!foo/bar"); - assert!(!r.is_included("foo/bar", false, false)); - assert!(!r.is_included("FOO/Bar", false, false)); - - assert!(r.is_included("foo/abc", false, false)); - assert!(r.is_included("foo/abc-123", false, false)); - assert!(!r.is_included("abc/xyz", false, false)); + fn test_filter_names_case() { + let r = &GhsFilter::new("foo/*,abc/xyz"); + assert!(r.is_included("FOO/BAR", false, false)); + assert!(r.is_included("Foo/Bar", false, false)); - let r = &GhsFilter::new("foo/*,!foo/bar,!foo/baz,abc/xyz"); - assert!(!r.is_included("foo/bar", false, false)); - assert!(!r.is_included("foo/baz", false, false)); + let r = &GhsFilter::new("FOO/*,Abc/XYZ"); + assert!(r.is_included("foo/bar", false, false)); + assert!(r.is_included("foo/baz", false, false)); assert!(r.is_included("abc/xyz", false, false)); - assert!(r.is_included("foo/123", false, false)); - assert!(!r.is_included("abc/123", false, false)); // not in rules, so excluded } #[test] - fn test_include_all_expect() { + fn test_filter_all_expect() { let r = &GhsFilter::new("*"); assert!(r.is_included("foo/bar", false, false)); assert!(r.is_included("abc/123", false, false)); + assert!(r.is_included("abc/123", true, false)); + assert!(r.is_included("abc/123", true, true)); let r = &GhsFilter::new("-*"); // single rule invalid, include all assert!(r.is_included("foo/bar", false, false)); @@ -325,55 +326,59 @@ mod tests { } #[test] - fn test_exclude_forks() { - let r = &GhsFilter::new("*,!fork"); - assert!(r.is_included("foo/bar", false, false)); - assert!(!r.is_included("abc/123", true, false)); + fn test_filter_names_only() { + let r = &GhsFilter::new("foo/*,!foo/bar"); + assert!(!r.is_included("abc/xyz", false, false)); + assert!(!r.is_included("foo/bar", false, false)); + assert!(!r.is_included("FOO/Bar", false, false)); - let r = &GhsFilter::new("!fork"); - assert!(r.is_included("foo/bar", false, false)); - assert!(!r.is_included("abc/123", true, false)); + assert!(r.is_included("foo/abc", false, false)); + assert!(r.is_included("foo/abc", true, false)); + assert!(r.is_included("foo/abc", true, true)); + + let r = &GhsFilter::new("foo/*,!foo/bar,!foo/baz,abc/xyz"); + assert!(!r.is_included("foo/bar", false, false)); + assert!(!r.is_included("foo/baz", false, false)); + assert!(!r.is_included("abc/123", false, false)); - let r = &GhsFilter::new("!fork,abc/123"); - assert!(r.is_included("abc/123", true, false)); // explicitly added - assert!(!r.is_included("abc/xyz", true, false)); + assert!(r.is_included("foo/123", false, false)); + assert!(r.is_included("foo/123", true, false)); + assert!(r.is_included("foo/123", false, true)); - let r = &GhsFilter::new("!fork,abc/*,abc/xyz"); - assert!(!r.is_included("abc/123", true, false)); // no wildcard for forks - assert!(r.is_included("abc/xyz", true, false)); // explicitly added + assert!(r.is_included("abc/xyz", false, false)); + assert!(r.is_included("abc/xyz", true, false)); + assert!(r.is_included("abc/xyz", false, true)); } #[test] - fn test_exclude_archived() { - let r = &GhsFilter::new("*,!archived"); + fn test_filter_meta() { + let r = &GhsFilter::new("*,!fork,!archived,foo/baz"); + assert!(r.exclude_forks); assert!(r.exclude_archs); - assert!(r.is_included("foo/bar", false, false)); - assert!(!r.is_included("abc/123", false, true)); + assert!(r.default_all); - let r = &GhsFilter::new("!archived"); assert!(r.is_included("foo/bar", false, false)); - assert!(!r.is_included("abc/123", false, true)); + assert!(!r.is_included("foo/bar", true, false)); + assert!(!r.is_included("foo/bar", false, true)); - let r = &GhsFilter::new("!archived,abc/123"); - assert!(r.is_included("abc/123", false, true)); // explicitly added - assert!(!r.is_included("abc/xyz", false, true)); + assert!(r.is_included("abc/123", false, false)); + assert!(!r.is_included("abc/123", true, false)); + assert!(!r.is_included("abc/123", false, true)); - let r = &GhsFilter::new("!archived,abc/*,abc/xyz"); - assert!(!r.is_included("abc/123", false, true)); // no wildcard for archived - assert!(r.is_included("abc/xyz", false, true)); // explicitly added + // explicitly added + assert!(r.is_included("foo/baz", false, false)); + assert!(r.is_included("foo/baz", true, false)); + assert!(r.is_included("foo/baz", false, true)); } #[test] - fn test_exclude_meta() { - let r = &GhsFilter::new("*,!fork,!archived,abc/xyz"); - assert!(r.exclude_forks); - assert!(r.exclude_archs); - - assert!(r.is_included("abc/123", false, false)); - assert!(!r.is_included("abc/123", true, true)); - assert!(r.is_included("abc/xyz", true, true)); + fn test_filter_meta_wildcard() { + let r = &GhsFilter::new("!fork,abc/*,abc/xyz"); + assert!(!r.is_included("abc/123", true, false)); // no wildcard for forks + assert!(r.is_included("abc/xyz", true, false)); // explicitly added - let r = &GhsFilter::new("*,abc/xyz,!fork,!archived"); - assert!(r.is_included("abc/xyz", true, true)); + let r = &GhsFilter::new("!archived,abc/*,abc/xyz"); + assert!(!r.is_included("abc/123", false, true)); // no wildcard for archived + assert!(r.is_included("abc/xyz", false, true)); // explicitly added } } diff --git a/src/main.rs b/src/main.rs index ec6a540..eda3077 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,4 @@ -use std::sync::{Arc, Mutex}; +use std::sync::Arc; use axum::routing::get; use axum::{http::StatusCode, response::IntoResponse, Router}; @@ -7,49 +7,13 @@ mod db_client; mod gh_client; mod helpers; mod routes; +mod state; mod types; -use db_client::{DbClient, RepoFilter, RepoTotals}; -use gh_client::GhClient; -use helpers::GhsFilter; +use db_client::RepoFilter; +use state::AppState; use types::Res; -struct AppState { - db: DbClient, - gh: GhClient, - filter: GhsFilter, - last_release: Mutex, -} - -impl AppState { - async fn new() -> Res { - let gh_token = std::env::var("GITHUB_TOKEN").unwrap_or_default(); - if gh_token.is_empty() { - tracing::error!("missing GITHUB_TOKEN"); - std::process::exit(1); - } - - let db_path = std::env::var("DB_PATH").unwrap_or("./data/ghstats.db".to_string()); - tracing::info!("db_path: {}", db_path); - - let db = DbClient::new(&db_path).await?; - let gh = GhClient::new(gh_token)?; - - let filter = std::env::var("GHS_FILTER").unwrap_or_default(); - let filter = GhsFilter::new(&filter); - - let last_release = Mutex::new(env!("CARGO_PKG_VERSION").to_string()); - Ok(Self { db, gh, filter, last_release }) - } - - async fn get_repos_filtered(&self, qs: &RepoFilter) -> Res> { - let repos = self.db.get_repos(&qs).await?; - let repos = repos.into_iter().filter(|x| self.filter.is_included(&x.name, x.fork, x.archived)); - let repos = repos.collect::>(); - Ok(repos) - } -} - async fn check_new_release(state: Arc) -> Res { let tag = state.gh.get_latest_release_ver("vladkens/ghstats").await?; let mut last_tag = state.last_release.lock().unwrap(); @@ -65,13 +29,13 @@ async fn start_cron(state: Arc) -> Res { use tokio_cron_scheduler::{Job, JobScheduler}; // note: for development, uncomment to update metrics on start - // helpers::update_metrics(&state.db, &state.gh, &state.filter).await?; + helpers::update_metrics(state.clone()).await?; // if new db, update metrics immediately let repos = state.db.get_repos(&RepoFilter::default()).await?; if repos.len() == 0 { tracing::info!("no repos found, load initial metrics"); - match helpers::update_metrics(&state.db, &state.gh, &state.filter).await { + match helpers::update_metrics(state.clone()).await { Err(e) => tracing::error!("failed to update metrics: {:?}", e), Ok(_) => {} } @@ -91,7 +55,7 @@ async fn start_cron(state: Arc) -> Res { Box::pin(async move { let _ = check_new_release(state.clone()).await; - match helpers::update_metrics(&state.db, &state.gh, &state.filter).await { + match helpers::update_metrics(state.clone()).await { Err(e) => tracing::error!("failed to update metrics: {:?}", e), Ok(_) => {} } diff --git a/src/state.rs b/src/state.rs new file mode 100644 index 0000000..5941372 --- /dev/null +++ b/src/state.rs @@ -0,0 +1,52 @@ +use std::sync::Mutex; + +use crate::{ + db_client::{DbClient, RepoFilter, RepoTotals}, + gh_client::GhClient, + helpers::GhsFilter, + types::Res, +}; + +fn env_bool(key: &str) -> bool { + let val = std::env::var(key).unwrap_or_else(|_| "false".to_string()).to_lowercase(); + return val == "true" || val == "1"; +} + +pub struct AppState { + pub db: DbClient, + pub gh: GhClient, + pub filter: GhsFilter, + pub include_private: bool, + pub last_release: Mutex, +} + +impl AppState { + pub async fn new() -> Res { + let gh_token = std::env::var("GITHUB_TOKEN").unwrap_or_default(); + if gh_token.is_empty() { + tracing::error!("missing GITHUB_TOKEN"); + std::process::exit(1); + } + + let db_path = std::env::var("DB_PATH").unwrap_or("./data/ghstats.db".to_string()); + tracing::info!("db_path: {}", db_path); + + let db = DbClient::new(&db_path).await?; + let gh = GhClient::new(gh_token)?; + + let filter = std::env::var("GHS_FILTER").unwrap_or_default(); + let filter = GhsFilter::new(&filter); + + let include_private = env_bool("GHS_INCLUDE_PRIVATE"); + + let last_release = Mutex::new(env!("CARGO_PKG_VERSION").to_string()); + Ok(Self { db, gh, filter, include_private, last_release }) + } + + pub async fn get_repos_filtered(&self, qs: &RepoFilter) -> Res> { + let repos = self.db.get_repos(&qs).await?; + let repos = repos.into_iter().filter(|x| self.filter.is_included(&x.name, x.fork, x.archived)); + let repos = repos.collect::>(); + Ok(repos) + } +}