Skip to content

Commit

Permalink
fix(info): correctly compute GPUd sqlite metrics delta (#349)
Browse files Browse the repository at this point in the history
Fix the following output:

```json
        "name": "daemon",
        "healthy": true,
        "reason": "daemon version: v0.3.7, mac address: c4:cb:e1:d2:4d:4e",
        "extra_info": {
          "daemon_version": "v0.3.7",
          "gpud_pid": "1300055",
          "gpud_start_time_humanized": "2 days ago",
          "gpud_start_time_in_unix_time": "1738302894",
          "gpud_usage_db_humanized": "39 MB",
          "gpud_usage_db_in_bytes": "39202816",
          "gpud_usage_delete_avg_latency_in_seconds": "0.1172523",
          "gpud_usage_delete_avg_qps": "26.769",
          "gpud_usage_delete_total": "39",
          "gpud_usage_file_descriptors": "72",
          "gpud_usage_insert_update_avg_latency_in_seconds": "0.0000957",
          "gpud_usage_insert_update_avg_qps": "610163.653",
          "gpud_usage_insert_update_total": "888951",
          "gpud_usage_memory_humanized": "330 MB",
          "gpud_usage_memory_in_bytes": "329768960",
          "gpud_usage_select_avg_latency_in_seconds": "0.0052156",
          "gpud_usage_select_avg_qps": "575505.957",
          "gpud_usage_select_total": "838458",
```

---------

Signed-off-by: Gyuho Lee <[email protected]>
  • Loading branch information
gyuho authored Feb 4, 2025
1 parent 76eed9b commit e71bd8b
Show file tree
Hide file tree
Showing 3 changed files with 203 additions and 25 deletions.
34 changes: 9 additions & 25 deletions components/info/component.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,32 +135,16 @@ func (c *component) States(ctx context.Context) ([]components.State, error) {
dbSizeHumanized = humanize.Bytes(dbSize)
}

currMetrics, err := sqlite.ReadMetrics(c.gatherer)
curMetrics, err := sqlite.ReadMetrics(c.gatherer)
if err != nil {
return nil, err
}

var prev sqlite.Metrics
lastSQLiteMetricsMu.Lock()
if !lastSQLiteMetrics.IsZero() {
prev = lastSQLiteMetrics
}
if !currMetrics.IsZero() {
lastSQLiteMetrics = currMetrics
}
insertUpdateAvgQPS, deleteAvgQPS, selectAvgQPS := lastSQLiteMetrics.QPS(curMetrics)
lastSQLiteMetrics = curMetrics
lastSQLiteMetricsMu.Unlock()

elapsedSeconds := currMetrics.Time.Sub(prev.Time).Seconds()

insertUpdateAvgQPS := float64(0)
deleteAvgQPS := float64(0)
selectAvgQPS := float64(0)
if !prev.IsZero() && !currMetrics.IsZero() {
insertUpdateAvgQPS = float64(currMetrics.InsertUpdateTotal) / elapsedSeconds
deleteAvgQPS = float64(currMetrics.DeleteTotal) / elapsedSeconds
selectAvgQPS = float64(currMetrics.SelectTotal) / elapsedSeconds
}

gpudStartTimeInUnixTime, err := uptime.GetCurrentProcessStartTimeInUnixTime()
if err != nil {
return nil, err
Expand All @@ -187,17 +171,17 @@ func (c *component) States(ctx context.Context) ([]components.State, error) {
StateKeyGPUdUsageDBInBytes: fmt.Sprintf("%d", dbSize),
StateKeyGPUdUsageDBHumanized: dbSizeHumanized,

StateKeyGPUdUsageInsertUpdateTotal: fmt.Sprintf("%d", currMetrics.InsertUpdateTotal),
StateKeyGPUdUsageInsertUpdateTotal: fmt.Sprintf("%d", curMetrics.InsertUpdateTotal),
StateKeyGPUdUsageInsertUpdateAvgQPS: fmt.Sprintf("%.3f", insertUpdateAvgQPS),
StateKeyGPUdUsageInsertUpdateAvgLatencyInSeconds: fmt.Sprintf("%.7f", currMetrics.InsertUpdateSecondsAvg),
StateKeyGPUdUsageInsertUpdateAvgLatencyInSeconds: fmt.Sprintf("%.7f", curMetrics.InsertUpdateSecondsAvg),

StateKeyGPUdUsageDeleteTotal: fmt.Sprintf("%d", currMetrics.DeleteTotal),
StateKeyGPUdUsageDeleteTotal: fmt.Sprintf("%d", curMetrics.DeleteTotal),
StateKeyGPUdUsageDeleteAvgQPS: fmt.Sprintf("%.3f", deleteAvgQPS),
StateKeyGPUdUsageDeleteAvgLatencyInSeconds: fmt.Sprintf("%.7f", currMetrics.DeleteSecondsAvg),
StateKeyGPUdUsageDeleteAvgLatencyInSeconds: fmt.Sprintf("%.7f", curMetrics.DeleteSecondsAvg),

StateKeyGPUdUsageSelectTotal: fmt.Sprintf("%d", currMetrics.SelectTotal),
StateKeyGPUdUsageSelectTotal: fmt.Sprintf("%d", curMetrics.SelectTotal),
StateKeyGPUdUsageSelectAvgQPS: fmt.Sprintf("%.3f", selectAvgQPS),
StateKeyGPUdUsageSelectAvgLatencyInSeconds: fmt.Sprintf("%.7f", currMetrics.SelectSecondsAvg),
StateKeyGPUdUsageSelectAvgLatencyInSeconds: fmt.Sprintf("%.7f", curMetrics.SelectSecondsAvg),

StateKeyGPUdStartTimeInUnixTime: fmt.Sprintf("%d", gpudStartTimeInUnixTime),
StateKeyGPUdStartTimeHumanized: gpudStartTimeHumanized,
Expand Down
19 changes: 19 additions & 0 deletions pkg/sqlite/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,14 +102,17 @@ func RecordSelect(tookSeconds float64) {
type Metrics struct {
Time time.Time

// The total number of inserts and updates in cumulative count.
InsertUpdateTotal int64
InsertUpdateSecondsTotal float64
InsertUpdateSecondsAvg float64

// The total number of deletes in cumulative count.
DeleteTotal int64
DeleteSecondsTotal float64
DeleteSecondsAvg float64

// The total number of selects in cumulative count.
SelectTotal int64
SelectSecondsTotal float64
SelectSecondsAvg float64
Expand Down Expand Up @@ -185,3 +188,19 @@ func ReadMetrics(gatherer prometheus.Gatherer) (Metrics, error) {

return mtr, nil
}

// Computes the QPS for insert/updates, deletes, and selects, based on the previous and current metrics time.
func (prev Metrics) QPS(cur Metrics) (insertUpdateAvgQPS float64, deleteAvgQPS float64, selectAvgQPS float64) {
insertUpdateAvgQPS = float64(0)
deleteAvgQPS = float64(0)
selectAvgQPS = float64(0)

elapsedSeconds := cur.Time.Sub(prev.Time).Seconds()
if !prev.IsZero() && !cur.IsZero() && elapsedSeconds > 0 {
insertUpdateAvgQPS = float64(cur.InsertUpdateTotal-prev.InsertUpdateTotal) / elapsedSeconds
deleteAvgQPS = float64(cur.DeleteTotal-prev.DeleteTotal) / elapsedSeconds
selectAvgQPS = float64(cur.SelectTotal-prev.SelectTotal) / elapsedSeconds
}

return insertUpdateAvgQPS, deleteAvgQPS, selectAvgQPS
}
175 changes: 175 additions & 0 deletions pkg/sqlite/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package sqlite
import (
"math"
"testing"
"time"

"github.com/prometheus/client_golang/prometheus"
)
Expand Down Expand Up @@ -123,3 +124,177 @@ func TestMetrics(t *testing.T) {
func floatEquals(a, b float64) bool {
return math.Abs(a-b) < 0.0005
}

func TestCalculateQPS(t *testing.T) {
t.Parallel()

tests := []struct {
name string
lastMetrics Metrics
currMetrics Metrics
wantInsertUpdateQPS float64
wantDeleteQPS float64
wantSelectQPS float64
}{
{
name: "both metrics zero",
lastMetrics: Metrics{},
currMetrics: Metrics{},
wantInsertUpdateQPS: 0,
wantDeleteQPS: 0,
wantSelectQPS: 0,
},
{
name: "normal case with 10 second interval",
lastMetrics: Metrics{
Time: time.Unix(1000, 0),
InsertUpdateTotal: 100,
DeleteTotal: 50,
SelectTotal: 200,
},
currMetrics: Metrics{
Time: time.Unix(1010, 0),
InsertUpdateTotal: 200,
DeleteTotal: 70,
SelectTotal: 400,
},
wantInsertUpdateQPS: 10, // (200-100)/10
wantDeleteQPS: 2, // (70-50)/10
wantSelectQPS: 20, // (400-200)/10
},
{
name: "last metrics zero",
lastMetrics: Metrics{},
currMetrics: Metrics{
Time: time.Unix(1000, 0),
InsertUpdateTotal: 100,
DeleteTotal: 50,
SelectTotal: 200,
},
wantInsertUpdateQPS: 0,
wantDeleteQPS: 0,
wantSelectQPS: 0,
},
{
name: "current metrics zero",
lastMetrics: Metrics{
Time: time.Unix(1000, 0),
InsertUpdateTotal: 100,
DeleteTotal: 50,
SelectTotal: 200,
},
currMetrics: Metrics{},
wantInsertUpdateQPS: 0,
wantDeleteQPS: 0,
wantSelectQPS: 0,
},
{
name: "sub-second interval",
lastMetrics: Metrics{
Time: time.Unix(1000, 0),
InsertUpdateTotal: 100,
DeleteTotal: 50,
SelectTotal: 200,
},
currMetrics: Metrics{
Time: time.Unix(1000, 500000000), // 500ms later
InsertUpdateTotal: 150,
DeleteTotal: 75,
SelectTotal: 300,
},
wantInsertUpdateQPS: 100, // (150-100)/0.5
wantDeleteQPS: 50, // (75-50)/0.5
wantSelectQPS: 200, // (300-200)/0.5
},
{
name: "no changes in counts",
lastMetrics: Metrics{
Time: time.Unix(1000, 0),
InsertUpdateTotal: 100,
DeleteTotal: 50,
SelectTotal: 200,
},
currMetrics: Metrics{
Time: time.Unix(1010, 0),
InsertUpdateTotal: 100,
DeleteTotal: 50,
SelectTotal: 200,
},
wantInsertUpdateQPS: 0,
wantDeleteQPS: 0,
wantSelectQPS: 0,
},
{
name: "very short time interval",
lastMetrics: Metrics{
Time: time.Unix(1000, 0),
InsertUpdateTotal: 100,
DeleteTotal: 50,
SelectTotal: 200,
},
currMetrics: Metrics{
Time: time.Unix(1000, 1000000), // 1ms later
InsertUpdateTotal: 101,
DeleteTotal: 51,
SelectTotal: 201,
},
wantInsertUpdateQPS: 1000, // (101-100)/0.001
wantDeleteQPS: 1000, // (51-50)/0.001
wantSelectQPS: 1000, // (201-200)/0.001
},
{
name: "mixed activity - some metrics changing, others not",
lastMetrics: Metrics{
Time: time.Unix(1000, 0),
InsertUpdateTotal: 100,
DeleteTotal: 50,
SelectTotal: 200,
},
currMetrics: Metrics{
Time: time.Unix(1010, 0),
InsertUpdateTotal: 100, // no change
DeleteTotal: 70, // changed
SelectTotal: 400, // changed
},
wantInsertUpdateQPS: 0, // (100-100)/10
wantDeleteQPS: 2, // (70-50)/10
wantSelectQPS: 20, // (400-200)/10
},
{
name: "zero elapsed time - identical timestamps",
lastMetrics: Metrics{
Time: time.Unix(1000, 0),
InsertUpdateTotal: 100,
DeleteTotal: 50,
SelectTotal: 200,
},
currMetrics: Metrics{
Time: time.Unix(1000, 0), // same timestamp
InsertUpdateTotal: 150,
DeleteTotal: 75,
SelectTotal: 300,
},
wantInsertUpdateQPS: 0, // should return 0 when elapsed time is 0
wantDeleteQPS: 0,
wantSelectQPS: 0,
},
}

for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
gotInsertUpdateQPS, gotDeleteQPS, gotSelectQPS := tt.lastMetrics.QPS(tt.currMetrics)

if gotInsertUpdateQPS != tt.wantInsertUpdateQPS {
t.Errorf("calculateMetrics() gotInsertUpdateQPS = %v, want %v", gotInsertUpdateQPS, tt.wantInsertUpdateQPS)
}
if gotDeleteQPS != tt.wantDeleteQPS {
t.Errorf("calculateMetrics() gotDeleteQPS = %v, want %v", gotDeleteQPS, tt.wantDeleteQPS)
}
if gotSelectQPS != tt.wantSelectQPS {
t.Errorf("calculateMetrics() gotSelectQPS = %v, want %v", gotSelectQPS, tt.wantSelectQPS)
}
})
}
}

0 comments on commit e71bd8b

Please sign in to comment.