From 7ac602dbb4e66bd957b5e8d99e30fc333cbbe5fd Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Sat, 18 Jan 2025 13:23:33 +0800 Subject: [PATCH] feat(info): track gpud process self resource usage (file descriptors, RSS, start time, db size) Signed-off-by: Gyuho Lee --- components/info/component.go | 69 ++++++++++++++++++++++++++++++- components/info/component_test.go | 2 +- components/state/state.go | 32 +++++++------- components/state/state_test.go | 34 ++++++++++++--- internal/server/server.go | 2 +- pkg/file/descriptors_darwin.go | 6 +++ pkg/file/descriptors_linux.go | 13 ++++++ pkg/file/descriptors_test.go | 8 ++++ pkg/file/descriptors_windows.go | 6 +++ pkg/memory/memory.go | 2 + pkg/memory/memory_darwin.go | 8 ++++ pkg/memory/memory_linux.go | 34 +++++++++++++++ pkg/memory/memory_test.go | 13 ++++++ pkg/memory/memory_windows.go | 8 ++++ pkg/uptime/uptime.go | 2 + pkg/uptime/uptime_darwin.go | 8 ++++ pkg/uptime/uptime_linux.go | 39 +++++++++++++++++ pkg/uptime/uptime_windows.go | 8 ++++ 18 files changed, 267 insertions(+), 27 deletions(-) create mode 100644 pkg/memory/memory.go create mode 100644 pkg/memory/memory_darwin.go create mode 100644 pkg/memory/memory_linux.go create mode 100644 pkg/memory/memory_test.go create mode 100644 pkg/memory/memory_windows.go create mode 100644 pkg/uptime/uptime.go create mode 100644 pkg/uptime/uptime_darwin.go create mode 100644 pkg/uptime/uptime_linux.go create mode 100644 pkg/uptime/uptime_windows.go diff --git a/components/info/component.go b/components/info/component.go index b51516d3..56a92980 100644 --- a/components/info/component.go +++ b/components/info/component.go @@ -3,21 +3,30 @@ package info import ( "context" + "database/sql" "encoding/json" "fmt" "net" + "os" "time" "github.com/leptonai/gpud/components" info_id "github.com/leptonai/gpud/components/info/id" + "github.com/leptonai/gpud/components/state" "github.com/leptonai/gpud/log" "github.com/leptonai/gpud/manager" + "github.com/leptonai/gpud/pkg/file" + "github.com/leptonai/gpud/pkg/memory" + "github.com/leptonai/gpud/pkg/uptime" "github.com/leptonai/gpud/version" + + "github.com/dustin/go-humanize" ) -func New(annotations map[string]string) components.Component { +func New(annotations map[string]string, dbRO *sql.DB) components.Component { return &component{ annotations: annotations, + dbRO: dbRO, } } @@ -25,6 +34,7 @@ var _ components.Component = (*component)(nil) type component struct { annotations map[string]string + dbRO *sql.DB } func (c *component) Name() string { return info_id.Name } @@ -36,6 +46,19 @@ const ( StateKeyMacAddress = "mac_address" StateKeyPackages = "packages" + StateKeyGPUdPID = "gpud_pid" + + StateKeyGPUdUsageFileDescriptors = "gpud_usage_file_descriptors" + + StateKeyGPUdUsageMemoryInBytes = "gpud_usage_memory_in_bytes" + StateKeyGPUdUsageMemoryHumanized = "gpud_usage_memory_humanized" + + StateKeyGPUdUsageDBInBytes = "gpud_usage_db_in_bytes" + StateKeyGPUdUsageDBHumanized = "gpud_usage_db_humanized" + + StateKeyGPUdStartTimeInUnixTime = "gpud_start_time_in_unix_time" + StateKeyGPUdStartTimeHumanized = "gpud_start_time_humanized" + StateNameAnnotations = "annotations" ) @@ -63,6 +86,37 @@ func (c *component) States(ctx context.Context) ([]components.State, error) { rawPayload, _ := json.Marshal(&packageStatus) managedPackages = string(rawPayload) } + + pid := os.Getpid() + gpudUsageFileDescriptors, err := file.GetCurrentProcessUsage() + if err != nil { + return nil, err + } + + gpudUsageMemoryInBytes, err := memory.GetCurrentProcessRSSInBytes() + if err != nil { + return nil, err + } + gpudUsageMemoryHumanized := humanize.Bytes(gpudUsageMemoryInBytes) + + var ( + dbSize uint64 + dbSizeHumanized string + ) + if c.dbRO != nil { + dbSize, err = state.ReadDBSize(ctx, c.dbRO) + if err != nil { + return nil, err + } + dbSizeHumanized = humanize.Bytes(dbSize) + } + + gpudStartTimeInUnixTime, err := uptime.GetCurrentProcessStartTimeInUnixTime() + if err != nil { + return nil, err + } + gpudStartTimeHumanized := humanize.Time(time.Unix(int64(gpudStartTimeInUnixTime), 0)) + return []components.State{ { Name: StateNameDaemon, @@ -72,6 +126,19 @@ func (c *component) States(ctx context.Context) ([]components.State, error) { StateKeyDaemonVersion: version.Version, StateKeyMacAddress: mac, StateKeyPackages: managedPackages, + + StateKeyGPUdPID: fmt.Sprintf("%d", pid), + + StateKeyGPUdUsageFileDescriptors: fmt.Sprintf("%d", gpudUsageFileDescriptors), + + StateKeyGPUdUsageMemoryInBytes: fmt.Sprintf("%d", gpudUsageMemoryInBytes), + StateKeyGPUdUsageMemoryHumanized: gpudUsageMemoryHumanized, + + StateKeyGPUdUsageDBInBytes: fmt.Sprintf("%d", dbSize), + StateKeyGPUdUsageDBHumanized: dbSizeHumanized, + + StateKeyGPUdStartTimeInUnixTime: fmt.Sprintf("%d", gpudStartTimeInUnixTime), + StateKeyGPUdStartTimeHumanized: gpudStartTimeHumanized, }, }, { diff --git a/components/info/component_test.go b/components/info/component_test.go index 2f019a23..252621b1 100644 --- a/components/info/component_test.go +++ b/components/info/component_test.go @@ -8,7 +8,7 @@ import ( func TestComponent(t *testing.T) { t.Parallel() - component := New(map[string]string{"a": "b"}) + component := New(map[string]string{"a": "b"}, nil) ctx, cancel := context.WithCancel(context.Background()) defer cancel() diff --git a/components/state/state.go b/components/state/state.go index c689090a..75cada61 100644 --- a/components/state/state.go +++ b/components/state/state.go @@ -166,14 +166,6 @@ UPDATE %s SET %s = '%s' WHERE %s = '%s'; } var ( - currentPages = prometheus.NewGauge( - prometheus.GaugeOpts{ - Namespace: "gpud", - Subsystem: "state_sqlite", - Name: "current_pages", - Help: "current number of pages", - }, - ) currentSize = prometheus.NewGauge( prometheus.GaugeOpts{ Namespace: "gpud", @@ -185,36 +177,40 @@ var ( ) func Register(reg *prometheus.Registry) error { - if err := reg.Register(currentPages); err != nil { - return err - } if err := reg.Register(currentSize); err != nil { return err } return nil } -// Requires read-write db instance. -func RecordMetrics(ctx context.Context, db *sql.DB) error { +func ReadDBSize(ctx context.Context, db *sql.DB) (uint64, error) { var pageCount uint64 err := db.QueryRowContext(ctx, "PRAGMA page_count").Scan(&pageCount) if err == sql.ErrNoRows { - return errors.New("no page count") + return 0, errors.New("no page count") } if err != nil { - return err + return 0, err } - currentPages.Set(float64(pageCount)) var pageSize uint64 err = db.QueryRowContext(ctx, "PRAGMA page_size").Scan(&pageSize) if err == sql.ErrNoRows { - return errors.New("no page size") + return 0, errors.New("no page size") + } + if err != nil { + return 0, err } + + return pageCount * pageSize, nil +} + +func RecordMetrics(ctx context.Context, db *sql.DB) error { + dbSize, err := ReadDBSize(ctx, db) if err != nil { return err } - currentSize.Set(float64(pageCount * pageSize)) + currentSize.Set(float64(dbSize)) return nil } diff --git a/components/state/state_test.go b/components/state/state_test.go index ab576e9b..5306f81b 100644 --- a/components/state/state_test.go +++ b/components/state/state_test.go @@ -6,6 +6,8 @@ import ( "time" "github.com/leptonai/gpud/pkg/sqlite" + + "github.com/prometheus/client_golang/prometheus" ) func TestOpenMemory(t *testing.T) { @@ -46,16 +48,36 @@ func TestOpenMemory(t *testing.T) { func TestRecordMetrics(t *testing.T) { t.Parallel() - dbRW, dbRO, close := sqlite.OpenTestDB(t) - defer close() + reg := prometheus.NewRegistry() + if err := Register(reg); err != nil { + t.Fatal("failed to register metrics:", err) + } + + dbRW, dbRO, cleanup := sqlite.OpenTestDB(t) + defer cleanup() - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() - if err := RecordMetrics(ctx, dbRO); err == nil { - t.Fatal("expected error but got nil") + if err := CreateTableMachineMetadata(ctx, dbRW); err != nil { + t.Fatal("failed to create table:", err) + } + id, err := CreateMachineIDIfNotExist(ctx, dbRW, dbRW, "") + if err != nil { + t.Fatal("failed to create machine id:", err) } - if err := RecordMetrics(ctx, dbRW); err != nil { + t.Log(id) + + if err := RecordMetrics(ctx, dbRO); err != nil { t.Fatal("failed to record metrics:", err) } + if err := Compact(ctx, dbRW); err != nil { + t.Fatal("failed to compact database:", err) + } + + size, err := ReadDBSize(ctx, dbRO) + if err != nil { + t.Fatal("failed to read db size:", err) + } + t.Log(size) } diff --git a/internal/server/server.go b/internal/server/server.go index 16820137..63c3b75f 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -649,7 +649,7 @@ func New(ctx context.Context, config *lepconfig.Config, endpoint string, cliUID } case info_id.Name: - allComponents = append(allComponents, info.New(config.Annotations)) + allComponents = append(allComponents, info.New(config.Annotations, dbRO)) case memory_id.Name: cfg := memory.Config{Query: defaultQueryCfg} diff --git a/pkg/file/descriptors_darwin.go b/pkg/file/descriptors_darwin.go index 97c93a88..f97d17bf 100644 --- a/pkg/file/descriptors_darwin.go +++ b/pkg/file/descriptors_darwin.go @@ -28,3 +28,9 @@ func GetFileHandles() (uint64, uint64, error) { func GetUsage() (uint64, error) { return 0, nil } + +// Returns the number of allocated file handles for the current process. +// Not implemented for this architecture. +func GetCurrentProcessUsage() (uint64, error) { + return 0, nil +} diff --git a/pkg/file/descriptors_linux.go b/pkg/file/descriptors_linux.go index 777300d5..6d6139ca 100644 --- a/pkg/file/descriptors_linux.go +++ b/pkg/file/descriptors_linux.go @@ -79,3 +79,16 @@ func GetUsage() (uint64, error) { } return total, nil } + +// Returns the number of allocated file handles for the current process. +func GetCurrentProcessUsage() (uint64, error) { + proc, err := procfs.Self() + if err != nil { + return 0, err + } + fdLen, err := proc.FileDescriptorsLen() + if err != nil { + return 0, err + } + return uint64(fdLen), nil +} diff --git a/pkg/file/descriptors_test.go b/pkg/file/descriptors_test.go index 663aa1bb..f03ed576 100644 --- a/pkg/file/descriptors_test.go +++ b/pkg/file/descriptors_test.go @@ -37,3 +37,11 @@ func Test_getFileHandles(t *testing.T) { t.Fatalf("unused is not 0: %v", unused) } } + +func TestGetCurrentProcessUsage(t *testing.T) { + usage, err := GetCurrentProcessUsage() + if err != nil { + t.Fatalf("failed to get current process usage: %v", err) + } + t.Logf("usage: %v", usage) +} diff --git a/pkg/file/descriptors_windows.go b/pkg/file/descriptors_windows.go index d14228e3..5b1d1886 100644 --- a/pkg/file/descriptors_windows.go +++ b/pkg/file/descriptors_windows.go @@ -22,3 +22,9 @@ func GetFileHandles() (uint64, uint64, error) { func GetUsage() (uint64, error) { return 0, nil } + +// Returns the number of allocated file handles for the current process. +// Not implemented for this architecture. +func GetCurrentProcessUsage() (uint64, error) { + return 0, nil +} diff --git a/pkg/memory/memory.go b/pkg/memory/memory.go new file mode 100644 index 00000000..20437841 --- /dev/null +++ b/pkg/memory/memory.go @@ -0,0 +1,2 @@ +// Package memory provides utilities for memory usage. +package memory diff --git a/pkg/memory/memory_darwin.go b/pkg/memory/memory_darwin.go new file mode 100644 index 00000000..f0c33651 --- /dev/null +++ b/pkg/memory/memory_darwin.go @@ -0,0 +1,8 @@ +//go:build darwin +// +build darwin + +package memory + +func GetCurrentProcessRSSInBytes() (uint64, error) { + return 0, nil +} diff --git a/pkg/memory/memory_linux.go b/pkg/memory/memory_linux.go new file mode 100644 index 00000000..25dfac90 --- /dev/null +++ b/pkg/memory/memory_linux.go @@ -0,0 +1,34 @@ +// Copyright 2019 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build linux +// +build linux + +package memory + +import "github.com/prometheus/procfs" + +// Returns the number of allocated memory RSS usage in bytes for the current process. +func GetCurrentProcessRSSInBytes() (uint64, error) { + proc, err := procfs.Self() + if err != nil { + return 0, err + } + + stat, err := proc.Stat() + if err != nil { + return 0, err + } + + return uint64(stat.ResidentMemory()), nil +} diff --git a/pkg/memory/memory_test.go b/pkg/memory/memory_test.go new file mode 100644 index 00000000..03adbc64 --- /dev/null +++ b/pkg/memory/memory_test.go @@ -0,0 +1,13 @@ +package memory + +import ( + "testing" +) + +func TestGetCurrentProcessRSSInBytes(t *testing.T) { + bytes, err := GetCurrentProcessRSSInBytes() + if err != nil { + t.Fatalf("failed to get bytes: %v", err) + } + t.Logf("bytes: %v", bytes) +} diff --git a/pkg/memory/memory_windows.go b/pkg/memory/memory_windows.go new file mode 100644 index 00000000..a217d511 --- /dev/null +++ b/pkg/memory/memory_windows.go @@ -0,0 +1,8 @@ +//go:build windows +// +build windows + +package memory + +func GetCurrentProcessRSSInBytes() (uint64, error) { + return 0, nil +} diff --git a/pkg/uptime/uptime.go b/pkg/uptime/uptime.go new file mode 100644 index 00000000..2101e037 --- /dev/null +++ b/pkg/uptime/uptime.go @@ -0,0 +1,2 @@ +// Package uptime provides utilities for uptime. +package uptime diff --git a/pkg/uptime/uptime_darwin.go b/pkg/uptime/uptime_darwin.go new file mode 100644 index 00000000..c4754e62 --- /dev/null +++ b/pkg/uptime/uptime_darwin.go @@ -0,0 +1,8 @@ +//go:build darwin +// +build darwin + +package uptime + +func GetCurrentProcessStartTimeInUnixTime() (uint64, error) { + return 0, nil +} diff --git a/pkg/uptime/uptime_linux.go b/pkg/uptime/uptime_linux.go new file mode 100644 index 00000000..bff29c81 --- /dev/null +++ b/pkg/uptime/uptime_linux.go @@ -0,0 +1,39 @@ +// Copyright 2019 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build linux +// +build linux + +package uptime + +import "github.com/prometheus/procfs" + +// Returns the current process start time in unix time. +func GetCurrentProcessStartTimeInUnixTime() (uint64, error) { + proc, err := procfs.Self() + if err != nil { + return 0, err + } + + stat, err := proc.Stat() + if err != nil { + return 0, err + } + + startTime, err := stat.StartTime() + if err != nil { + return 0, err + } + + return uint64(startTime), nil +} diff --git a/pkg/uptime/uptime_windows.go b/pkg/uptime/uptime_windows.go new file mode 100644 index 00000000..0f84c681 --- /dev/null +++ b/pkg/uptime/uptime_windows.go @@ -0,0 +1,8 @@ +//go:build windows +// +build windows + +package uptime + +func GetCurrentProcessStartTimeInUnixTime() (uint64, error) { + return 0, nil +}