Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(info): track gpud process self resource usage (file descriptors, RSS, start time, db size) #296

Merged
merged 1 commit into from
Jan 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 68 additions & 1 deletion components/info/component.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,38 @@ package info

import (
"context"
"database/sql"
"encoding/json"
"fmt"
"net"
"os"
"time"

"github.com/leptonai/gpud/components"
info_id "github.com/leptonai/gpud/components/info/id"
"github.com/leptonai/gpud/components/state"
"github.com/leptonai/gpud/log"
"github.com/leptonai/gpud/manager"
"github.com/leptonai/gpud/pkg/file"
"github.com/leptonai/gpud/pkg/memory"
"github.com/leptonai/gpud/pkg/uptime"
"github.com/leptonai/gpud/version"

"github.com/dustin/go-humanize"
)

func New(annotations map[string]string) components.Component {
func New(annotations map[string]string, dbRO *sql.DB) components.Component {
return &component{
annotations: annotations,
dbRO: dbRO,
}
}

var _ components.Component = (*component)(nil)

type component struct {
annotations map[string]string
dbRO *sql.DB
}

func (c *component) Name() string { return info_id.Name }
Expand All @@ -36,6 +46,19 @@ const (
StateKeyMacAddress = "mac_address"
StateKeyPackages = "packages"

StateKeyGPUdPID = "gpud_pid"

StateKeyGPUdUsageFileDescriptors = "gpud_usage_file_descriptors"

StateKeyGPUdUsageMemoryInBytes = "gpud_usage_memory_in_bytes"
StateKeyGPUdUsageMemoryHumanized = "gpud_usage_memory_humanized"

StateKeyGPUdUsageDBInBytes = "gpud_usage_db_in_bytes"
StateKeyGPUdUsageDBHumanized = "gpud_usage_db_humanized"

StateKeyGPUdStartTimeInUnixTime = "gpud_start_time_in_unix_time"
StateKeyGPUdStartTimeHumanized = "gpud_start_time_humanized"

StateNameAnnotations = "annotations"
)

Expand Down Expand Up @@ -63,6 +86,37 @@ func (c *component) States(ctx context.Context) ([]components.State, error) {
rawPayload, _ := json.Marshal(&packageStatus)
managedPackages = string(rawPayload)
}

pid := os.Getpid()
gpudUsageFileDescriptors, err := file.GetCurrentProcessUsage()
if err != nil {
return nil, err
}

gpudUsageMemoryInBytes, err := memory.GetCurrentProcessRSSInBytes()
if err != nil {
return nil, err
}
gpudUsageMemoryHumanized := humanize.Bytes(gpudUsageMemoryInBytes)

var (
dbSize uint64
dbSizeHumanized string
)
if c.dbRO != nil {
dbSize, err = state.ReadDBSize(ctx, c.dbRO)
if err != nil {
return nil, err
}
dbSizeHumanized = humanize.Bytes(dbSize)
}

gpudStartTimeInUnixTime, err := uptime.GetCurrentProcessStartTimeInUnixTime()
if err != nil {
return nil, err
}
gpudStartTimeHumanized := humanize.Time(time.Unix(int64(gpudStartTimeInUnixTime), 0))

return []components.State{
{
Name: StateNameDaemon,
Expand All @@ -72,6 +126,19 @@ func (c *component) States(ctx context.Context) ([]components.State, error) {
StateKeyDaemonVersion: version.Version,
StateKeyMacAddress: mac,
StateKeyPackages: managedPackages,

StateKeyGPUdPID: fmt.Sprintf("%d", pid),

StateKeyGPUdUsageFileDescriptors: fmt.Sprintf("%d", gpudUsageFileDescriptors),

StateKeyGPUdUsageMemoryInBytes: fmt.Sprintf("%d", gpudUsageMemoryInBytes),
StateKeyGPUdUsageMemoryHumanized: gpudUsageMemoryHumanized,

StateKeyGPUdUsageDBInBytes: fmt.Sprintf("%d", dbSize),
StateKeyGPUdUsageDBHumanized: dbSizeHumanized,

StateKeyGPUdStartTimeInUnixTime: fmt.Sprintf("%d", gpudStartTimeInUnixTime),
StateKeyGPUdStartTimeHumanized: gpudStartTimeHumanized,
},
},
{
Expand Down
2 changes: 1 addition & 1 deletion components/info/component_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import (
func TestComponent(t *testing.T) {
t.Parallel()

component := New(map[string]string{"a": "b"})
component := New(map[string]string{"a": "b"}, nil)

ctx, cancel := context.WithCancel(context.Background())
defer cancel()
Expand Down
32 changes: 14 additions & 18 deletions components/state/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,14 +166,6 @@ UPDATE %s SET %s = '%s' WHERE %s = '%s';
}

var (
currentPages = prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: "gpud",
Subsystem: "state_sqlite",
Name: "current_pages",
Help: "current number of pages",
},
)
currentSize = prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: "gpud",
Expand All @@ -185,36 +177,40 @@ var (
)

func Register(reg *prometheus.Registry) error {
if err := reg.Register(currentPages); err != nil {
return err
}
if err := reg.Register(currentSize); err != nil {
return err
}
return nil
}

// Requires read-write db instance.
func RecordMetrics(ctx context.Context, db *sql.DB) error {
func ReadDBSize(ctx context.Context, db *sql.DB) (uint64, error) {
var pageCount uint64
err := db.QueryRowContext(ctx, "PRAGMA page_count").Scan(&pageCount)
if err == sql.ErrNoRows {
return errors.New("no page count")
return 0, errors.New("no page count")
}
if err != nil {
return err
return 0, err
}
currentPages.Set(float64(pageCount))

var pageSize uint64
err = db.QueryRowContext(ctx, "PRAGMA page_size").Scan(&pageSize)
if err == sql.ErrNoRows {
return errors.New("no page size")
return 0, errors.New("no page size")
}
if err != nil {
return 0, err
}

return pageCount * pageSize, nil
}

func RecordMetrics(ctx context.Context, db *sql.DB) error {
dbSize, err := ReadDBSize(ctx, db)
if err != nil {
return err
}
currentSize.Set(float64(pageCount * pageSize))
currentSize.Set(float64(dbSize))

return nil
}
Expand Down
34 changes: 28 additions & 6 deletions components/state/state_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ import (
"time"

"github.com/leptonai/gpud/pkg/sqlite"

"github.com/prometheus/client_golang/prometheus"
)

func TestOpenMemory(t *testing.T) {
Expand Down Expand Up @@ -46,16 +48,36 @@ func TestOpenMemory(t *testing.T) {
func TestRecordMetrics(t *testing.T) {
t.Parallel()

dbRW, dbRO, close := sqlite.OpenTestDB(t)
defer close()
reg := prometheus.NewRegistry()
if err := Register(reg); err != nil {
t.Fatal("failed to register metrics:", err)
}

dbRW, dbRO, cleanup := sqlite.OpenTestDB(t)
defer cleanup()

ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()

if err := RecordMetrics(ctx, dbRO); err == nil {
t.Fatal("expected error but got nil")
if err := CreateTableMachineMetadata(ctx, dbRW); err != nil {
t.Fatal("failed to create table:", err)
}
id, err := CreateMachineIDIfNotExist(ctx, dbRW, dbRW, "")
if err != nil {
t.Fatal("failed to create machine id:", err)
}
if err := RecordMetrics(ctx, dbRW); err != nil {
t.Log(id)

if err := RecordMetrics(ctx, dbRO); err != nil {
t.Fatal("failed to record metrics:", err)
}
if err := Compact(ctx, dbRW); err != nil {
t.Fatal("failed to compact database:", err)
}

size, err := ReadDBSize(ctx, dbRO)
if err != nil {
t.Fatal("failed to read db size:", err)
}
t.Log(size)
}
2 changes: 1 addition & 1 deletion internal/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -649,7 +649,7 @@ func New(ctx context.Context, config *lepconfig.Config, endpoint string, cliUID
}

case info_id.Name:
allComponents = append(allComponents, info.New(config.Annotations))
allComponents = append(allComponents, info.New(config.Annotations, dbRO))

case memory_id.Name:
cfg := memory.Config{Query: defaultQueryCfg}
Expand Down
6 changes: 6 additions & 0 deletions pkg/file/descriptors_darwin.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,9 @@ func GetFileHandles() (uint64, uint64, error) {
func GetUsage() (uint64, error) {
return 0, nil
}

// Returns the number of allocated file handles for the current process.
// Not implemented for this architecture.
func GetCurrentProcessUsage() (uint64, error) {
return 0, nil
}
13 changes: 13 additions & 0 deletions pkg/file/descriptors_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,16 @@ func GetUsage() (uint64, error) {
}
return total, nil
}

// Returns the number of allocated file handles for the current process.
func GetCurrentProcessUsage() (uint64, error) {
proc, err := procfs.Self()
if err != nil {
return 0, err
}
fdLen, err := proc.FileDescriptorsLen()
if err != nil {
return 0, err
}
return uint64(fdLen), nil
}
8 changes: 8 additions & 0 deletions pkg/file/descriptors_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,11 @@ func Test_getFileHandles(t *testing.T) {
t.Fatalf("unused is not 0: %v", unused)
}
}

func TestGetCurrentProcessUsage(t *testing.T) {
usage, err := GetCurrentProcessUsage()
if err != nil {
t.Fatalf("failed to get current process usage: %v", err)
}
t.Logf("usage: %v", usage)
}
6 changes: 6 additions & 0 deletions pkg/file/descriptors_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,9 @@ func GetFileHandles() (uint64, uint64, error) {
func GetUsage() (uint64, error) {
return 0, nil
}

// Returns the number of allocated file handles for the current process.
// Not implemented for this architecture.
func GetCurrentProcessUsage() (uint64, error) {
return 0, nil
}
2 changes: 2 additions & 0 deletions pkg/memory/memory.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
// Package memory provides utilities for memory usage.
package memory
8 changes: 8 additions & 0 deletions pkg/memory/memory_darwin.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
//go:build darwin
// +build darwin

package memory

func GetCurrentProcessRSSInBytes() (uint64, error) {
return 0, nil
}
34 changes: 34 additions & 0 deletions pkg/memory/memory_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Copyright 2019 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build linux
// +build linux

package memory

import "github.com/prometheus/procfs"

// Returns the number of allocated memory RSS usage in bytes for the current process.
func GetCurrentProcessRSSInBytes() (uint64, error) {
proc, err := procfs.Self()
if err != nil {
return 0, err
}

stat, err := proc.Stat()
if err != nil {
return 0, err
}

return uint64(stat.ResidentMemory()), nil
}
13 changes: 13 additions & 0 deletions pkg/memory/memory_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package memory

import (
"testing"
)

func TestGetCurrentProcessRSSInBytes(t *testing.T) {
bytes, err := GetCurrentProcessRSSInBytes()
if err != nil {
t.Fatalf("failed to get bytes: %v", err)
}
t.Logf("bytes: %v", bytes)
}
8 changes: 8 additions & 0 deletions pkg/memory/memory_windows.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
//go:build windows
// +build windows

package memory

func GetCurrentProcessRSSInBytes() (uint64, error) {
return 0, nil
}
2 changes: 2 additions & 0 deletions pkg/uptime/uptime.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
// Package uptime provides utilities for uptime.
package uptime
8 changes: 8 additions & 0 deletions pkg/uptime/uptime_darwin.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
//go:build darwin
// +build darwin

package uptime

func GetCurrentProcessStartTimeInUnixTime() (uint64, error) {
return 0, nil
}
Loading
Loading