metrics: measure idle time during builds

This measures the amount of time spent idle during the build. This is
done by collecting the set of task times, determining which sections
contain gaps where no task is running, and aggregating that duration
into a metric.

Signed-off-by: Jonathan A. Sternberg <jonathan.sternberg@docker.com>
This commit is contained in:
Jonathan A. Sternberg 2024-03-07 14:52:26 -06:00
parent c0c3a55fca
commit a4a8846e46
No known key found for this signature in database
GPG Key ID: 6603D4B96394F6B1
2 changed files with 139 additions and 0 deletions

View File

@ -3,7 +3,9 @@ package progress
import ( import (
"context" "context"
"regexp" "regexp"
"sort"
"strings" "strings"
"sync"
"time" "time"
"github.com/docker/buildx/util/metricutil" "github.com/docker/buildx/util/metricutil"
@ -26,6 +28,7 @@ func newMetrics(mp metric.MeterProvider, attrs attribute.Set) *metricWriter {
newImageSourceTransferMetricRecorder(meter, attrs), newImageSourceTransferMetricRecorder(meter, attrs),
newExecMetricRecorder(meter, attrs), newExecMetricRecorder(meter, attrs),
newExportImageMetricRecorder(meter, attrs), newExportImageMetricRecorder(meter, attrs),
newIdleMetricRecorder(meter, attrs),
}, },
attrs: attrs, attrs: attrs,
} }
@ -333,3 +336,93 @@ func detectExportImageType(vertexName string) string {
} }
return format return format
} }
type idleMetricRecorder struct {
// Attributes holds the set of base attributes for all metrics produced.
Attributes attribute.Set
// Duration tracks the amount of time spent idle during this build.
Duration metric.Float64ObservableGauge
// Started stores the set of times when tasks were started.
Started []time.Time
// Completed stores the set of times when tasks were completed.
Completed []time.Time
mu sync.Mutex
}
func newIdleMetricRecorder(meter metric.Meter, attrs attribute.Set) *idleMetricRecorder {
mr := &idleMetricRecorder{
Attributes: attrs,
}
mr.Duration, _ = meter.Float64ObservableGauge("builder.idle.time",
metric.WithDescription("Measures the length of time the builder spends idle."),
metric.WithUnit("ms"),
metric.WithFloat64Callback(mr.calculateIdleTime))
return mr
}
func (mr *idleMetricRecorder) Record(ss *client.SolveStatus) {
mr.mu.Lock()
defer mr.mu.Unlock()
for _, v := range ss.Vertexes {
if v.Started == nil || v.Completed == nil {
continue
}
mr.Started = append(mr.Started, *v.Started)
mr.Completed = append(mr.Completed, *v.Completed)
}
}
// calculateIdleTime will use the recorded vertices that have been completed to determine the
// amount of time spent idle.
//
// This calculation isn't accurate until the build itself is completed. At the moment,
// metrics are only ever sent when a build is completed. If that changes, this calculation
// will likely be inaccurate.
func (mr *idleMetricRecorder) calculateIdleTime(_ context.Context, o metric.Float64Observer) error {
mr.mu.Lock()
defer mr.mu.Unlock()
dur := calculateIdleTime(mr.Started, mr.Completed)
o.Observe(float64(dur)/float64(time.Millisecond), metric.WithAttributeSet(mr.Attributes))
return nil
}
func calculateIdleTime(started, completed []time.Time) time.Duration {
sort.Slice(started, func(i, j int) bool {
return started[i].Before(started[j])
})
sort.Slice(completed, func(i, j int) bool {
return completed[i].Before(completed[j])
})
if len(started) == 0 {
return 0
}
var (
idleStart time.Time
elapsed time.Duration
)
for active := 0; len(started) > 0 && len(completed) > 0; {
if started[0].Before(completed[0]) {
if active == 0 && !idleStart.IsZero() {
elapsed += started[0].Sub(idleStart)
}
active++
started = started[1:]
continue
}
active--
if active == 0 {
idleStart = completed[0]
}
completed = completed[1:]
}
return elapsed
}

View File

@ -0,0 +1,46 @@
package progress
import (
"testing"
"time"
"github.com/stretchr/testify/assert"
)
func TestCalculateIdleTime(t *testing.T) {
for _, tt := range []struct {
started []int64
completed []int64
ms int64
}{
{
started: []int64{0, 1, 3},
completed: []int64{2, 10, 5},
ms: 0,
},
{
started: []int64{0, 3},
completed: []int64{2, 5},
ms: 1,
},
{
started: []int64{3, 0, 7},
completed: []int64{5, 2, 10},
ms: 3,
},
} {
started := unixMillis(tt.started...)
completed := unixMillis(tt.completed...)
actual := int64(calculateIdleTime(started, completed) / time.Millisecond)
assert.Equal(t, tt.ms, actual)
}
}
func unixMillis(ts ...int64) []time.Time {
times := make([]time.Time, len(ts))
for i, ms := range ts {
times[i] = time.UnixMilli(ms)
}
return times
}