Skip to content

Commit

Permalink
feat(V2 Clients): Client refactor (#1470)
Browse files Browse the repository at this point in the history
Second step of the client refactor after #1464 

Changes:
- vulnerability client interface as described in the design doc, this no
longer performs paging by itself.
- "Matchers" as implementations to the vulnerability interface
- osvmatcher: Matcher that takes in a osv.dev client and gets/hydrates
the vulns. This also performs paging.
  - localmatcher: Replacement/migrated version of the local package.

Commented out Local client in resolve package to be completed in the
followup.

Followup: Decide and implement the VulnClients in resolution.
  • Loading branch information
another-rex authored Jan 6, 2025
1 parent a3ce0d0 commit b5983bd
Show file tree
Hide file tree
Showing 25 changed files with 737 additions and 371 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ require (
github.com/go-git/go-git/v5 v5.12.0
github.com/google/go-cmp v0.6.0
github.com/google/go-containerregistry v0.20.2
github.com/google/osv-scalibr v0.1.6-0.20241219225011-fd6877f0b783
github.com/google/osv-scalibr v0.1.6-0.20250105222824-56e5c3bfb149
github.com/ianlancetaylor/demangle v0.0.0-20240912202439-0a2b6291aafd
github.com/jedib0t/go-pretty/v6 v6.6.5
github.com/muesli/reflow v0.3.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-containerregistry v0.20.2 h1:B1wPJ1SN/S7pB+ZAimcciVD+r+yV/l/DSArMxlbwseo=
github.com/google/go-containerregistry v0.20.2/go.mod h1:z38EKdKh4h7IP2gSfUUqEvalZBqs6AoLeWfUy34nQC8=
github.com/google/osv-scalibr v0.1.6-0.20241219225011-fd6877f0b783 h1:YzLIdmgxXdnYO0oGnS+i0s7kC3uwlVBZe53YIfvtrh4=
github.com/google/osv-scalibr v0.1.6-0.20241219225011-fd6877f0b783/go.mod h1:S8mrRjoWESAOOTq25lJqzxiKR6tbWSFYG8SVb5EFLHk=
github.com/google/osv-scalibr v0.1.6-0.20250105222824-56e5c3bfb149 h1:NR/j8m7lWb1V/izQi7oJlCZ5U/Z6GqM8hkoHghABdTQ=
github.com/google/osv-scalibr v0.1.6-0.20250105222824-56e5c3bfb149/go.mod h1:S8mrRjoWESAOOTq25lJqzxiKR6tbWSFYG8SVb5EFLHk=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8=
Expand Down
File renamed without changes.
File renamed without changes.
164 changes: 164 additions & 0 deletions internal/clients/clientimpl/localmatcher/localmatcher.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
package localmatcher

import (
"context"
"errors"
"fmt"
"os"
"path"
"slices"
"strings"

"github.com/google/osv-scalibr/extractor"
"github.com/google/osv-scanner/internal/imodels"
"github.com/google/osv-scanner/internal/imodels/ecosystem"
"github.com/google/osv-scanner/pkg/models"
"github.com/google/osv-scanner/pkg/reporter"
"github.com/ossf/osv-schema/bindings/go/osvschema"
)

const zippedDBRemoteHost = "https://osv-vulnerabilities.storage.googleapis.com"
const envKeyLocalDBCacheDirectory = "OSV_SCANNER_LOCAL_DB_CACHE_DIRECTORY"

// LocalMatcher implements the VulnerabilityMatcher interface by downloading the osv export zip files,
// and performing the matching locally.
type LocalMatcher struct {
dbBasePath string
dbs map[osvschema.Ecosystem]*ZipDB
downloadDB bool
// TODO(v2 logging): Remove this reporter
r reporter.Reporter
}

func NewLocalMatcher(r reporter.Reporter, localDBPath string, downloadDB bool) (*LocalMatcher, error) {
dbBasePath, err := setupLocalDBDirectory(localDBPath)
if err != nil {
return nil, fmt.Errorf("could not create %s: %w", dbBasePath, err)
}

return &LocalMatcher{
dbBasePath: dbBasePath,
dbs: make(map[osvschema.Ecosystem]*ZipDB),
downloadDB: downloadDB,
r: r,
}, nil
}

func (matcher *LocalMatcher) Match(ctx context.Context, invs []*extractor.Inventory) ([][]*models.Vulnerability, error) {
results := make([][]*models.Vulnerability, 0, len(invs))

// slice to track ecosystems that did not have an offline database available
var missingDBs []string

for _, inv := range invs {
if ctx.Err() != nil {
return nil, ctx.Err()
}

pkg := imodels.FromInventory(inv)
if pkg.Ecosystem.IsEmpty() {
if pkg.Commit == "" {
// This should never happen, as those results will be filtered out before matching
return nil, errors.New("ecosystem is empty and there is no commit hash")
}

// Is a commit based query, skip local scanning
results = append(results, []*models.Vulnerability{})
// TODO (V2 logging):
matcher.r.Infof("Skipping commit scanning for: %s\n", pkg.Commit)

continue
}

db, err := matcher.loadDBFromCache(ctx, pkg.Ecosystem)

if err != nil {
if errors.Is(err, ErrOfflineDatabaseNotFound) {
missingDBs = append(missingDBs, string(pkg.Ecosystem.Ecosystem))
} else {
// TODO(V2 logging):
// the most likely error at this point is that the PURL could not be parsed
matcher.r.Errorf("could not load db for %s ecosystem: %v\n", pkg.Ecosystem, err)
}

results = append(results, []*models.Vulnerability{})

continue
}

results = append(results, db.VulnerabilitiesAffectingPackage(pkg))
}

if len(missingDBs) > 0 {
missingDBs = slices.Compact(missingDBs)
slices.Sort(missingDBs)

// TODO(v2 logging):
matcher.r.Errorf("could not find local databases for ecosystems: %s\n", strings.Join(missingDBs, ", "))
}

return results, nil
}

func (matcher *LocalMatcher) loadDBFromCache(ctx context.Context, ecosystem ecosystem.Parsed) (*ZipDB, error) {
if db, ok := matcher.dbs[ecosystem.Ecosystem]; ok {
return db, nil
}

db, err := NewZippedDB(ctx, matcher.dbBasePath, string(ecosystem.Ecosystem), fmt.Sprintf("%s/%s/all.zip", zippedDBRemoteHost, ecosystem.Ecosystem), !matcher.downloadDB)

if err != nil {
return nil, err
}

// TODO(v2 logging): Replace with slog / another logger
matcher.r.Infof("Loaded %s local db from %s\n", db.Name, db.StoredAt)

matcher.dbs[ecosystem.Ecosystem] = db

return db, nil
}

// setupLocalDBDirectory attempts to set up the directory the scanner should
// use to store local databases.
//
// if a local path is explicitly provided either by the localDBPath parameter
// or via the envKeyLocalDBCacheDirectory environment variable, the scanner will
// attempt to use the user cache directory if possible or otherwise the temp directory
//
// if an error occurs at any point when a local path is not explicitly provided,
// the scanner will fall back to the temp directory first before finally erroring
func setupLocalDBDirectory(localDBPath string) (string, error) {
var err error

// fallback to the env variable if a local database path has not been provided
if localDBPath == "" {
if p, envSet := os.LookupEnv(envKeyLocalDBCacheDirectory); envSet {
localDBPath = p
}
}

implicitPath := localDBPath == ""

// if we're implicitly picking a path, use the user cache directory if available
if implicitPath {
localDBPath, err = os.UserCacheDir()

if err != nil {
localDBPath = os.TempDir()
}
}

altPath := path.Join(localDBPath, "osv-scanner")
err = os.MkdirAll(altPath, 0750)
if err == nil {
return altPath, nil
}

// if we're implicitly picking a path, try the temp directory before giving up
if implicitPath && localDBPath != os.TempDir() {
return setupLocalDBDirectory(os.TempDir())
}

return "", err
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package local
package localmatcher

import (
"archive/zip"
Expand All @@ -16,6 +16,7 @@ import (
"path"
"strings"

"github.com/google/osv-scanner/internal/imodels"
"github.com/google/osv-scanner/internal/utility/vulns"
"github.com/google/osv-scanner/pkg/lockfile"
"github.com/google/osv-scanner/pkg/models"
Expand All @@ -37,8 +38,8 @@ type ZipDB struct {

var ErrOfflineDatabaseNotFound = errors.New("no offline version of the OSV database is available")

func fetchRemoteArchiveCRC32CHash(url string) (uint32, error) {
req, err := http.NewRequestWithContext(context.Background(), http.MethodHead, url, nil)
func fetchRemoteArchiveCRC32CHash(ctx context.Context, url string) (uint32, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodHead, url, nil)

if err != nil {
return 0, err
Expand Down Expand Up @@ -75,7 +76,7 @@ func fetchLocalArchiveCRC32CHash(data []byte) uint32 {
return crc32.Checksum(data, crc32.MakeTable(crc32.Castagnoli))
}

func (db *ZipDB) fetchZip() ([]byte, error) {
func (db *ZipDB) fetchZip(ctx context.Context) ([]byte, error) {
cache, err := os.ReadFile(db.StoredAt)

if db.Offline {
Expand All @@ -87,7 +88,7 @@ func (db *ZipDB) fetchZip() ([]byte, error) {
}

if err == nil {
remoteHash, err := fetchRemoteArchiveCRC32CHash(db.ArchiveURL)
remoteHash, err := fetchRemoteArchiveCRC32CHash(ctx, db.ArchiveURL)

if err != nil {
return nil, err
Expand All @@ -98,7 +99,7 @@ func (db *ZipDB) fetchZip() ([]byte, error) {
}
}

req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, db.ArchiveURL, nil)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, db.ArchiveURL, nil)

if err != nil {
return nil, fmt.Errorf("could not retrieve OSV database archive: %w", err)
Expand Down Expand Up @@ -176,10 +177,10 @@ func (db *ZipDB) loadZipFile(zipFile *zip.File) {
// Internally, the archive is cached along with the date that it was fetched
// so that a new version of the archive is only downloaded if it has been
// modified, per HTTP caching standards.
func (db *ZipDB) load() error {
func (db *ZipDB) load(ctx context.Context) error {
db.vulnerabilities = []models.Vulnerability{}

body, err := db.fetchZip()
body, err := db.fetchZip(ctx)

if err != nil {
return err
Expand All @@ -202,14 +203,14 @@ func (db *ZipDB) load() error {
return nil
}

func NewZippedDB(dbBasePath, name, url string, offline bool) (*ZipDB, error) {
func NewZippedDB(ctx context.Context, dbBasePath, name, url string, offline bool) (*ZipDB, error) {
db := &ZipDB{
Name: name,
ArchiveURL: url,
Offline: offline,
StoredAt: path.Join(dbBasePath, name, "all.zip"),
}
if err := db.load(); err != nil {
if err := db.load(ctx); err != nil {
return nil, fmt.Errorf("unable to fetch OSV database: %w", err)
}

Expand All @@ -232,20 +233,30 @@ func (db *ZipDB) Vulnerabilities(includeWithdrawn bool) []models.Vulnerability {
return vulnerabilities
}

func (db *ZipDB) VulnerabilitiesAffectingPackage(pkg lockfile.PackageDetails) models.Vulnerabilities {
var vulnerabilities models.Vulnerabilities
func (db *ZipDB) VulnerabilitiesAffectingPackage(pkg imodels.PackageInfo) []*models.Vulnerability {
var vulnerabilities []*models.Vulnerability

// TODO (V2 Models): remove this once PackageDetails has been migrated
mappedPackageDetails := lockfile.PackageDetails{
Name: pkg.Name,
Version: pkg.Version,
Commit: pkg.Commit,
Ecosystem: lockfile.Ecosystem(pkg.Ecosystem.String()),
CompareAs: lockfile.Ecosystem(pkg.Ecosystem.String()),
DepGroups: pkg.DepGroups,
}

for _, vulnerability := range db.Vulnerabilities(false) {
if vulns.IsAffected(vulnerability, pkg) && !vulns.Include(vulnerabilities, vulnerability) {
vulnerabilities = append(vulnerabilities, vulnerability)
if vulns.IsAffected(vulnerability, mappedPackageDetails) && !vulns.Include(vulnerabilities, vulnerability) {
vulnerabilities = append(vulnerabilities, &vulnerability)
}
}

return vulnerabilities
}

func (db *ZipDB) Check(pkgs []lockfile.PackageDetails) (models.Vulnerabilities, error) {
vulnerabilities := make(models.Vulnerabilities, 0, len(pkgs))
func (db *ZipDB) Check(pkgs []imodels.PackageInfo) ([]*models.Vulnerability, error) {
vulnerabilities := make([]*models.Vulnerability, 0, len(pkgs))

for _, pkg := range pkgs {
vulnerabilities = append(vulnerabilities, db.VulnerabilitiesAffectingPackage(pkg)...)
Expand Down
Loading

0 comments on commit b5983bd

Please sign in to comment.