Skip to content

Commit

Permalink
scylla blockstore
Browse files Browse the repository at this point in the history
also toy sqlite blockstore
  • Loading branch information
brianolson committed Jan 21, 2025
1 parent e1d4639 commit 5cdb0fe
Show file tree
Hide file tree
Showing 21 changed files with 1,776 additions and 305 deletions.
41 changes: 34 additions & 7 deletions bgs/admin.go
Original file line number Diff line number Diff line change
Expand Up @@ -356,13 +356,35 @@ func (bgs *BGS) handleAdminUnbanDomain(c echo.Context) error {
})
}

type PDSRates struct {
PerSecond int64 `json:"per_second,omitempty"`
PerHour int64 `json:"per_hour,omitempty"`
PerDay int64 `json:"per_day,omitempty"`
CrawlRate int64 `json:"crawl_rate,omitempty"`
RepoLimit int64 `json:"repo_limit,omitempty"`
}

func (pr *PDSRates) FromSlurper(s *Slurper) {
if pr.PerSecond == 0 {
pr.PerHour = s.DefaultPerSecondLimit
}
if pr.PerHour == 0 {
pr.PerHour = s.DefaultPerHourLimit
}
if pr.PerDay == 0 {
pr.PerDay = s.DefaultPerDayLimit
}
if pr.CrawlRate == 0 {
pr.CrawlRate = int64(s.DefaultCrawlLimit)
}
if pr.RepoLimit == 0 {
pr.RepoLimit = s.DefaultRepoLimit
}
}

type RateLimitChangeRequest struct {
Host string `json:"host"`
PerSecond int64 `json:"per_second"`
PerHour int64 `json:"per_hour"`
PerDay int64 `json:"per_day"`
CrawlRate int64 `json:"crawl_rate"`
RepoLimit int64 `json:"repo_limit"`
Host string `json:"host"`
PDSRates
}

func (bgs *BGS) handleAdminChangePDSRateLimits(e echo.Context) error {
Expand Down Expand Up @@ -595,6 +617,9 @@ func (bgs *BGS) handleAdminAddTrustedDomain(e echo.Context) error {

type AdminRequestCrawlRequest struct {
Hostname string `json:"hostname"`

// optional:
PDSRates
}

func (bgs *BGS) handleAdminRequestCrawl(e echo.Context) error {
Expand Down Expand Up @@ -647,6 +672,8 @@ func (bgs *BGS) handleAdminRequestCrawl(e echo.Context) error {
}

// Skip checking if the server is online for now
rateOverrides := body.PDSRates
rateOverrides.FromSlurper(bgs.slurper)

return bgs.slurper.SubscribeToPds(ctx, host, true, true) // Override Trusted Domain Check
return bgs.slurper.SubscribeToPds(ctx, host, true, true, &rateOverrides) // Override Trusted Domain Check
}
9 changes: 8 additions & 1 deletion bgs/fedmgr.go
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ func (s *Slurper) canSlurpHost(host string) bool {
return !s.newSubsDisabled
}

func (s *Slurper) SubscribeToPds(ctx context.Context, host string, reg bool, adminOverride bool) error {
func (s *Slurper) SubscribeToPds(ctx context.Context, host string, reg bool, adminOverride bool, rateOverrides *PDSRates) error {
// TODO: for performance, lock on the hostname instead of global
s.lk.Lock()
defer s.lk.Unlock()
Expand Down Expand Up @@ -397,6 +397,13 @@ func (s *Slurper) SubscribeToPds(ctx context.Context, host string, reg bool, adm
CrawlRateLimit: float64(s.DefaultCrawlLimit),
RepoLimit: s.DefaultRepoLimit,
}
if rateOverrides != nil {
npds.RateLimit = float64(rateOverrides.PerSecond)
npds.HourlyEventLimit = rateOverrides.PerHour
npds.DailyEventLimit = rateOverrides.PerDay
npds.CrawlRateLimit = float64(rateOverrides.CrawlRate)
npds.RepoLimit = rateOverrides.RepoLimit
}
if err := s.db.Create(&npds).Error; err != nil {
return err
}
Expand Down
2 changes: 1 addition & 1 deletion bgs/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ func (s *BGS) handleComAtprotoSyncRequestCrawl(ctx context.Context, body *comatp
}
}

return s.slurper.SubscribeToPds(ctx, host, true, false)
return s.slurper.SubscribeToPds(ctx, host, true, false, nil)
}

func (s *BGS) handleComAtprotoSyncNotifyOfUpdate(ctx context.Context, body *comatprototypes.SyncNotifyOfUpdate_Input) error {
Expand Down
41 changes: 41 additions & 0 deletions carstore/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Carstore

Store a zillion users of PDS-like repo, with more limited operations (mainly: firehose in, firehose out).

## [ScyllaStore](scylla.go)

Blocks stored in ScyllaDB.
User and PDS metadata stored in gorm (PostgreSQL or sqlite3).

## [FileCarStore](bs.go)

Store 'car slices' from PDS source subscribeRepo firehose streams to filesystem.
Store metadata to gorm postgresql (or sqlite3).
Periodic compaction of car slices into fewer larger car slices.
User and PDS metadata stored in gorm (PostgreSQL or sqlite3).
FileCarStore was the first production carstore and used through at least 2024-11.

## [SQLiteStore](sqlite_store.go)

Experimental/demo.
Blocks stored in trivial local sqlite3 schema.
Minimal reference implementation from which fancy scalable/performant implementations may be derived.

```sql
CREATE TABLE IF NOT EXISTS blocks (uid int, cid blob, rev varchar, root blob, block blob, PRIMARY KEY(uid,cid))
CREATE INDEX IF NOT EXISTS blocx_by_rev ON blocks (uid, rev DESC)

INSERT INTO blocks (uid, cid, rev, root, block) VALUES (?, ?, ?, ?, ?) ON CONFLICT (uid,cid) DO UPDATE SET rev=excluded.rev, root=excluded.root, block=excluded.block

SELECT rev, root FROM blocks WHERE uid = ? ORDER BY rev DESC LIMIT 1

SELECT cid,rev,root,block FROM blocks WHERE uid = ? AND rev > ? ORDER BY rev DESC

DELETE FROM blocks WHERE uid = ?

SELECT rev, root FROM blocks WHERE uid = ? AND cid = ? LIMIT 1

SELECT block FROM blocks WHERE uid = ? AND cid = ? LIMIT 1

SELECT length(block) FROM blocks WHERE uid = ? AND cid = ? LIMIT 1
```
Loading

0 comments on commit 5cdb0fe

Please sign in to comment.