From b1ce99a12ae512ddea9b251d9ff29b949b100767 Mon Sep 17 00:00:00 2001 From: "Dr. Gernot Starke" Date: Mon, 29 Jan 2024 22:38:44 +0100 Subject: [PATCH] added caching of results, expiration time 100sec --- docs/_pages/statsTable.md | 22 +-------- documentation/adrs/0015-caching-of-results.md | 32 +++++++++++++ go-app/cmd/cache/tryCaching.go | 47 ++++++++++++++++++ go-app/cmd/githubGraphQL/main.go | 2 +- go-app/go.mod | 2 + go-app/go.sum | 4 ++ go-app/internal/api/apiGateway.go | 12 ++--- go-app/internal/domain/domain.go | 48 ++++++++++++++++--- go-app/internal/github/issuesAndBugs.go | 2 +- go-app/internal/plausible/vpvStatistics.go | 2 +- go-app/internal/types/types.go | 12 ++--- go-app/main.go | 17 +++++-- 12 files changed, 155 insertions(+), 47 deletions(-) create mode 100644 documentation/adrs/0015-caching-of-results.md create mode 100644 go-app/cmd/cache/tryCaching.go diff --git a/docs/_pages/statsTable.md b/docs/_pages/statsTable.md index a1bddeb..30ed2d7 100644 --- a/docs/_pages/statsTable.md +++ b/docs/_pages/statsTable.md @@ -32,26 +32,6 @@ header:
- - - - - - - - - - - - - - - - - - - - -
7 Days30 Days12 MonthIssues
VisitorsPageViewsVisitorsPageViewsVisitorsPageViews
collecting data...
+
diff --git a/documentation/adrs/0015-caching-of-results.md b/documentation/adrs/0015-caching-of-results.md new file mode 100644 index 0000000..858ac25 --- /dev/null +++ b/documentation/adrs/0015-caching-of-results.md @@ -0,0 +1,32 @@ +# 15. caching of results + +Date: 2024-01-28 + +## Status + +Accepted + +## Context + +Using the external APIs from Plausible and GitHub is resource intensive, and their results don't change too often. + +## Decision + +Introduce caching, related to [ADR 0011 (rate limit)](./0011-rate-limiter-with-persistently-stored-last-query-time.md) + +For Golang, a few caching libraries/packages exist, most targeting large volumes of data and/or high-throughput applications. + +We tested the simple packages +* [go-cache](https://github.com/patrickmn/go-cache) and +* [zcache](https://github.com/arc242/zcache) + +as both hav both global and entry-specific expiration times and is simple to use. + +`zcache` is an updated fork of `go-cache`, and go-cache is no longer actively maintained. Therefore we use `zcache`. + +A small example can be found in /cmd/cache/try-caching.go. + +## Consequences + +* cache needs to be typed +* expiration needs to be set when pushing data into the cache diff --git a/go-app/cmd/cache/tryCaching.go b/go-app/cmd/cache/tryCaching.go new file mode 100644 index 0000000..cab36a5 --- /dev/null +++ b/go-app/cmd/cache/tryCaching.go @@ -0,0 +1,47 @@ +package main + +import ( + "fmt" + "time" + "zgo.at/zcache/v2" + + // go-cache is not actively maintained any longer + "github.com/patrickmn/go-cache" +) + +// small test of the caching package "/go-cache v2.1.0+incompatible" +// source inspired by https://github.com/patrickmn/go-cache + +func main() { + // Create a cache with a default expiration time of 5 minutes, and which + // purges expired items every 10 minutes + c := cache.New(5*time.Minute, 10*time.Minute) + + // in zcache we need to use the function `SetWithExpire` + z := zcache.New[string, any](zcache.NoExpiration, zcache.NoExpiration) + z.SetWithExpire("foo", "bar", zcache.DefaultExpiration) + + // Set the value of the key "foo" to "bar", with the default expiration time + c.Set("foo", "bar", cache.DefaultExpiration) + + // set "exp" to expire after 5 Milliseconds + c.Set("exp", "42", 3000*time.Millisecond) + + c.Set("baz", 42, cache.NoExpiration) + + // wait for 1 second + time.Sleep(time.Second) + + // Get the string associated with the key "foo" from the cache + foo, found := c.Get("foo") + if found { + fmt.Println(foo) + } + + exp, found := c.Get("exp") + if found { + fmt.Println(exp) + } else { + fmt.Println("exp not found in cache, expired?") + } +} diff --git a/go-app/cmd/githubGraphQL/main.go b/go-app/cmd/githubGraphQL/main.go index 04668a8..6bc210c 100644 --- a/go-app/cmd/githubGraphQL/main.go +++ b/go-app/cmd/githubGraphQL/main.go @@ -7,7 +7,7 @@ import ( func main() { - var stats4Repos = make([]types.RepoStats, len(types.Arc42sites)) + var stats4Repos = make([]types.RepoStatsType, len(types.Arc42sites)) github.StatsForRepo("faq.arc42.org-site", &stats4Repos[0]) github.StatsForRepo("arc42.org-site", &stats4Repos[1]) diff --git a/go-app/go.mod b/go-app/go.mod index eb4c5ff..72860ba 100644 --- a/go-app/go.mod +++ b/go-app/go.mod @@ -21,6 +21,7 @@ require ( github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/mattn/go-sqlite3 v1.14.19 // indirect + github.com/patrickmn/go-cache v2.1.0+incompatible // indirect github.com/shurcooL/graphql v0.0.0-20230722043721-ed46e5a46466 // indirect github.com/valyala/bytebufferpool v1.0.0 // indirect github.com/valyala/fasthttp v1.50.0 // indirect @@ -31,4 +32,5 @@ require ( google.golang.org/appengine v1.6.7 // indirect google.golang.org/protobuf v1.31.0 // indirect nhooyr.io/websocket v1.8.7 // indirect + zgo.at/zcache/v2 v2.1.0 // indirect ) diff --git a/go-app/go.sum b/go-app/go.sum index f5bf3dd..cd71b61 100644 --- a/go-app/go.sum +++ b/go-app/go.sum @@ -67,6 +67,8 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742 h1:Esafd1046DLDQ0W1YjYsBW+p8U2u7vzgW2SQVmlNazg= github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc= +github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= @@ -143,3 +145,5 @@ gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= nhooyr.io/websocket v1.8.7 h1:usjR2uOr/zjjkVMy0lW+PPohFok7PCow5sDjLgX4P4g= nhooyr.io/websocket v1.8.7/go.mod h1:B70DZP8IakI65RVQ51MsWP/8jndNma26DVA/nFSCgW0= +zgo.at/zcache/v2 v2.1.0 h1:USo+ubK+R4vtjw4viGzTe/zjXyPw6R7SK/RL3epBBxs= +zgo.at/zcache/v2 v2.1.0/go.mod h1:gyCeoLVo01QjDZynjime8xUGHHMbsLiPyUTBpDGd4Gk= diff --git a/go-app/internal/api/apiGateway.go b/go-app/internal/api/apiGateway.go index 6690fd5..b9714f0 100644 --- a/go-app/internal/api/apiGateway.go +++ b/go-app/internal/api/apiGateway.go @@ -29,14 +29,14 @@ func init() { // embed templates into compiled binary, so we don't need to read from file system // embeds the templates folder into variable embeddedTemplatesFolder -// === DON'T REMOVE THE COMMENT BELOW +// === KEEP THE COMMENT BELOW // //go:embed *.gohtml var embeddedTemplatesFolder embed.FS // statsHTMLTableHandler returns the usage statistics as html table // 1. sets required http headers needed for CORS -// 2a. for the preflight OPTIONS request, just return the CORS header and OK. +// 2a for the preflight OPTIONS request, return the CORS header and OK. // otherwise: // 2b. start timer // 3. update ArcStats @@ -58,8 +58,8 @@ func statsHTMLTableHandler(w http.ResponseWriter, r *http.Request) { // 2b. set timer var startOfProcessing = time.Now() - // 3. update ArcStats - domain.ArcStats = domain.LoadStats4AllSites() + // 3. get ArcStats (hopefully from cache) + domain.ArcStats = domain.Stats4AllSites() // remember how long it took to update statistics domain.ArcStats.HowLongDidItTake = strconv.FormatInt(time.Since(startOfProcessing).Milliseconds(), 10) @@ -91,7 +91,7 @@ func pingHandler(w http.ResponseWriter, r *http.Request) { executeTemplate(w, filepath.Join(TemplatesDir, PingTmpl), r) } -// setCORSHeaders sets specific headers +// SetCORSHeaders sets specific headers // * calls from the "official" URL status.arc42.org are allowed // * calls from localhost or "null" are also allowed func SetCORSHeaders(w *http.ResponseWriter, r *http.Request) { @@ -161,7 +161,7 @@ func LogServerDetails(appVersion string) { log.Info().Msgf("Server region is%s %s", region, location) } -// StartAPIServer creates an http ServeMux with a few predefined routes. +// StartAPIServer creates http ServeMux with a few predefined routes. func StartAPIServer() { mux := http.NewServeMux() diff --git a/go-app/internal/domain/domain.go b/go-app/internal/domain/domain.go index eebad88..6c37c5f 100644 --- a/go-app/internal/domain/domain.go +++ b/go-app/internal/domain/domain.go @@ -9,6 +9,7 @@ import ( "golang.org/x/text/message" "sync" "time" + "zgo.at/zcache/v2" ) var AppVersion string @@ -16,13 +17,28 @@ var AppVersion string // ArcStats collects all data var ArcStats types.Arc42Statistics +// cache expiration should be 5 or 10 minutes +// for testing, set expiration to a few seconds only +const cacheExpirationTime = time.Second * 100 + +// cacheStatsKey is the key under which the results are stored in the cache +const cacheStatsKey = "arc42Stats" + +// create a cache with a default expiration time of 5 minutes, which +// purges expired items every 5 minutes +var cache = zcache.New[string, types.Arc42Statistics](cacheExpirationTime, cacheExpirationTime) + func SetAppVersion(appVersion string) { AppVersion = appVersion log.Debug().Msg("App version set to " + appVersion) } +func GetAppVersion() string { + return AppVersion +} + func setServerMetaInfo(a42s *types.Arc42Statistics) { - a42s.AppVersion = AppVersion + a42s.AppVersion = GetAppVersion() location, _ := time.LoadLocation("Europe/Berlin") @@ -33,6 +49,26 @@ func setServerMetaInfo(a42s *types.Arc42Statistics) { a42s.LastUpdatedString = bielefeldTime.Format("2. January 2006, 15:04:03h") } +// Stats4AllSites tries to return the value from the cache instead of calling +// the external APIs. +// If the value is expired, then new data is loaded. +// If it is still available, the existing value is returned +func Stats4AllSites() types.Arc42Statistics { + + var a42s, found = cache.Get(cacheStatsKey) + + // if not found, LoadStats4AllSites() again + if !found { + log.Info().Msg("cache miss, data expired") + a42s = LoadStats4AllSites() + cache.Set(cacheStatsKey, a42s) + } else { + log.Info().Msg("cache hit, data still valid") + a42s.HowLongDidItTake = "0 msec (cached result)" + } + return a42s +} + // LoadStats4AllSites retrieves the statistics for all sites from plausible.io and GitHub repositories. func LoadStats4AllSites() types.Arc42Statistics { @@ -41,8 +77,8 @@ func LoadStats4AllSites() types.Arc42Statistics { var a42s = types.Arc42Statistics{} - var Stats4Sites = make([]types.SiteStats, len(types.Arc42sites)) - var Stats4Repos = make([]types.RepoStats, len(types.Arc42sites)) + var Stats4Sites = make([]types.SiteStatsType, len(types.Arc42sites)) + var Stats4Repos = make([]types.RepoStatsType, len(types.Arc42sites)) // 1.) set meta info setServerMetaInfo(&a42s) @@ -81,7 +117,7 @@ func LoadStats4AllSites() types.Arc42Statistics { return a42s } -func calculateTotals(stats [len(types.Arc42sites)]types.SiteStats) types.TotalsForAllSites { +func calculateTotals(stats [len(types.Arc42sites)]types.SiteStatsType) types.TotalsForAllSites { var totals types.TotalsForAllSites for index := range types.Arc42sites { @@ -118,7 +154,7 @@ func calculateTotals(stats [len(types.Arc42sites)]types.SiteStats) types.TotalsF // getUsageStatisticsForSite retrieves the statistics for a single site from plausible.io. // This func is called as Goroutine. -func getUsageStatisticsForSite(site string, thisSiteStats *types.SiteStats, wg *sync.WaitGroup) { +func getUsageStatisticsForSite(site string, thisSiteStats *types.SiteStatsType, wg *sync.WaitGroup) { defer wg.Done() // to avoid repeating the expression, introduce local var @@ -129,7 +165,7 @@ func getUsageStatisticsForSite(site string, thisSiteStats *types.SiteStats, wg * } -func getRepoStatisticsForSite(site string, thisRepoStats *types.RepoStats, wg *sync.WaitGroup) { +func getRepoStatisticsForSite(site string, thisRepoStats *types.RepoStatsType, wg *sync.WaitGroup) { defer wg.Done() thisRepoStats.Site = site diff --git a/go-app/internal/github/issuesAndBugs.go b/go-app/internal/github/issuesAndBugs.go index 1891e1d..9492648 100644 --- a/go-app/internal/github/issuesAndBugs.go +++ b/go-app/internal/github/issuesAndBugs.go @@ -52,7 +52,7 @@ func initGitHubGraphQLClient() *githubv4.Client { } -func StatsForRepo(thisSite string, stats *types.RepoStats) { +func StatsForRepo(thisSite string, stats *types.RepoStatsType) { // Initialize GitHub GraphQL client client := initGitHubGraphQLClient() diff --git a/go-app/internal/plausible/vpvStatistics.go b/go-app/internal/plausible/vpvStatistics.go index 6ebf2a5..465bee0 100644 --- a/go-app/internal/plausible/vpvStatistics.go +++ b/go-app/internal/plausible/vpvStatistics.go @@ -62,7 +62,7 @@ func initPlausibleHandler() *plausible.Client { // StatsForSite collects all relevant statistics for a given site // (currently 7D, 30D and 12M) -func StatsForSite(thisSite string, stats *types.SiteStats) { +func StatsForSite(thisSite string, stats *types.SiteStatsType) { // init the required handler // the function ensures it's initialized only once. diff --git a/go-app/internal/types/types.go b/go-app/internal/types/types.go index 6b14cf1..3b0c212 100644 --- a/go-app/internal/types/types.go +++ b/go-app/internal/types/types.go @@ -14,8 +14,8 @@ var Arc42sites = [7]string{ "status.arc42.org", } -// SiteStats contains visitor and pageviews statistics for a single arc42 site or subdomain. -type SiteStats struct { +// SiteStatsType contains visitor and pageviews statistics for a single arc42 site or subdomain. +type SiteStatsType struct { Site string // site name Visitors7d string Visitors7dNr int @@ -37,8 +37,8 @@ type SiteStats struct { NrOfOpenPRs int } -// RepoStats contains information about the repository underlying the site -type RepoStats struct { +// RepoStatsType contains information about the repository underlying the site +type RepoStatsType struct { Site string // site name Repo string // the URL of the GitHub repository NrOfOpenBugs int // the number of open bugs in that repo @@ -74,7 +74,7 @@ type Arc42Statistics struct { AppVersion string // LastUpdated contains the time.Time when the stats have - // been updated. Can help to avoid flooding plausible.io with requests. + // been updated. LastUpdated time.Time LastUpdatedString string // as we cannot directly use Golang functions from templates @@ -89,7 +89,7 @@ type Arc42Statistics struct { // Stats4Site contains the statistics per site or subdomain // it also contains Repo stats, like issues and bugs - Stats4Site [len(Arc42sites)]SiteStats + Stats4Site [len(Arc42sites)]SiteStatsType // Totals: sum of all the statistics over all sites Totals TotalsForAllSites diff --git a/go-app/main.go b/go-app/main.go index 25fa60b..d3b4aeb 100644 --- a/go-app/main.go +++ b/go-app/main.go @@ -11,12 +11,14 @@ import ( "time" ) -const AppVersion = "0.5.3" +const appVersion = "0.5.5" // version history // 0.5.x rate limit: limit amount of queries to external APIs // 0.5.2: distinct env package, distinct DB for DEV, handle OPTIONS request // 0.5.3: BUG and BUGS are both recognized +// 0.5.4: start with empty table on homepage +// 0.5.5: caching with zcache // 0.4.7 replace most inline styles by css // 0.4.6 sortable table (a: initial, b...e: fix layout issues), f: fix #94 // 0.4.5 fix missing separators in large numbers @@ -76,15 +78,20 @@ func init() { func main() { // As the main package cannot be imported, constants defined here // cannot directly be used in internal/* packages. - // Therefore, we set the AppVersion via a func. - domain.SetAppVersion(AppVersion) + // Therefore, we set the appVersion via a func. + domain.SetAppVersion(appVersion) // Save the startup metadata persistently, see ADR-0012 - database.SaveStartupTime(time.Now(), AppVersion, env.GetEnv()) + database.SaveStartupTime(time.Now(), appVersion, env.GetEnv()) + + // log the server details + api.LogServerDetails(appVersion) + + // load statistics and add results to cache // Start a server which runs in the background, and waits for http requests // to arrive at predefined routes. // THIS IS A BLOCKING CALL, therefore server details are printed prior to starting the server - api.LogServerDetails(AppVersion) + api.StartAPIServer() }