otel: include service instance id attribute to resource and move to metricutil package

Add the service instance id to the resource attributes to prevent
downstream OTEL processors and exporters from thinking that the CLI
invocations are a single process that keeps restarting. The unique id
can be removed through downstream aggregation to prevent cardinality
issues, but we need some way to tell OTEL that it shouldn't reset the
counters.

Move the check for the experimental flag to its own package and then use
that invocation to prevent creating exporters so metrics are disabled
completely. This makes it so we don't have to check for the experimental
flag in every place we add metrics until we decide to make metrics
stable in general.

This also moves the OTEL initialization to a `util/metricutil` package
to be more consistent with the existing util naming and to differentiate
it from the upstream `metric` name. Using both `metrics` and `metric` as
import names was confusing since `metric` was an upstream dependency and
`metrics` was a local utility. `metricutil` matches with the existing
utilities and makes clear that it isn't a spelling mistake.

The record version metric has been removed since we weren't planning on
keeping that metric anyway and most of the information is now included
in the instrumentation library name and version. That function is
included as a utility in the `otel/sdk/metric` package to retrieve the
appropriate meter from the meter provider.

Signed-off-by: Jonathan A. Sternberg <jonathan.sternberg@docker.com>
This commit is contained in:
Jonathan A. Sternberg 2024-01-30 13:23:30 -06:00
parent 2c3d7dab3f
commit c65b7ed24f
No known key found for this signature in database
GPG Key ID: 6603D4B96394F6B1
7 changed files with 151 additions and 103 deletions

View File

@ -19,7 +19,6 @@ import (
"github.com/docker/buildx/util/confutil" "github.com/docker/buildx/util/confutil"
"github.com/docker/buildx/util/desktop" "github.com/docker/buildx/util/desktop"
"github.com/docker/buildx/util/dockerutil" "github.com/docker/buildx/util/dockerutil"
"github.com/docker/buildx/util/metrics"
"github.com/docker/buildx/util/progress" "github.com/docker/buildx/util/progress"
"github.com/docker/buildx/util/tracing" "github.com/docker/buildx/util/tracing"
"github.com/docker/cli/cli/command" "github.com/docker/cli/cli/command"
@ -43,14 +42,6 @@ type bakeOptions struct {
} }
func runBake(ctx context.Context, dockerCli command.Cli, targets []string, in bakeOptions, cFlags commonFlags) (err error) { func runBake(ctx context.Context, dockerCli command.Cli, targets []string, in bakeOptions, cFlags commonFlags) (err error) {
mp, report, err := metrics.MeterProvider(dockerCli)
if err != nil {
return err
}
defer report()
recordVersionInfo(mp, "bake")
ctx, end, err := tracing.TraceCurrentCommand(ctx, "bake") ctx, end, err := tracing.TraceCurrentCommand(ctx, "bake")
if err != nil { if err != nil {
return err return err

View File

@ -30,10 +30,9 @@ import (
"github.com/docker/buildx/util/cobrautil" "github.com/docker/buildx/util/cobrautil"
"github.com/docker/buildx/util/desktop" "github.com/docker/buildx/util/desktop"
"github.com/docker/buildx/util/ioset" "github.com/docker/buildx/util/ioset"
"github.com/docker/buildx/util/metrics" "github.com/docker/buildx/util/metricutil"
"github.com/docker/buildx/util/progress" "github.com/docker/buildx/util/progress"
"github.com/docker/buildx/util/tracing" "github.com/docker/buildx/util/tracing"
"github.com/docker/buildx/version"
"github.com/docker/cli-docs-tool/annotation" "github.com/docker/cli-docs-tool/annotation"
"github.com/docker/cli/cli" "github.com/docker/cli/cli"
"github.com/docker/cli/cli/command" "github.com/docker/cli/cli/command"
@ -53,9 +52,6 @@ import (
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
"github.com/spf13/cobra" "github.com/spf13/cobra"
"github.com/spf13/pflag" "github.com/spf13/pflag"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
"google.golang.org/grpc/codes" "google.golang.org/grpc/codes"
) )
@ -216,13 +212,11 @@ func (o *buildOptions) toDisplayMode() (progressui.DisplayMode, error) {
} }
func runBuild(ctx context.Context, dockerCli command.Cli, options buildOptions) (err error) { func runBuild(ctx context.Context, dockerCli command.Cli, options buildOptions) (err error) {
mp, report, err := metrics.MeterProvider(dockerCli) mp, err := metricutil.NewMeterProvider(ctx, dockerCli)
if err != nil { if err != nil {
return err return err
} }
defer report() defer mp.Report(context.Background())
recordVersionInfo(mp, "build")
ctx, end, err := tracing.TraceCurrentCommand(ctx, "build") ctx, end, err := tracing.TraceCurrentCommand(ctx, "build")
if err != nil { if err != nil {
@ -936,30 +930,3 @@ func maybeJSONArray(v string) []string {
} }
return []string{v} return []string{v}
} }
func recordVersionInfo(mp metric.MeterProvider, command string) {
// Still in the process of testing/stabilizing these counters.
if !isExperimental() {
return
}
meter := mp.Meter("github.com/docker/buildx",
metric.WithInstrumentationVersion(version.Version),
)
counter, err := meter.Int64Counter("docker.cli.count",
metric.WithDescription("Number of invocations of the docker buildx command."),
)
if err != nil {
otel.Handle(err)
}
counter.Add(context.Background(), 1,
metric.WithAttributes(
attribute.String("command", command),
attribute.String("package", version.Package),
attribute.String("version", version.Version),
attribute.String("revision", version.Revision),
),
)
}

2
go.mod
View File

@ -42,6 +42,7 @@ require (
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.42.0 go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.42.0
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v0.42.0 go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v0.42.0
go.opentelemetry.io/otel/metric v1.19.0 go.opentelemetry.io/otel/metric v1.19.0
go.opentelemetry.io/otel/sdk v1.19.0
go.opentelemetry.io/otel/sdk/metric v1.19.0 go.opentelemetry.io/otel/sdk/metric v1.19.0
go.opentelemetry.io/otel/trace v1.19.0 go.opentelemetry.io/otel/trace v1.19.0
golang.org/x/mod v0.13.0 golang.org/x/mod v0.13.0
@ -147,7 +148,6 @@ require (
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.19.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.19.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.19.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.19.0 // indirect
go.opentelemetry.io/otel/exporters/prometheus v0.42.0 // indirect go.opentelemetry.io/otel/exporters/prometheus v0.42.0 // indirect
go.opentelemetry.io/otel/sdk v1.19.0 // indirect
go.opentelemetry.io/proto/otlp v1.0.0 // indirect go.opentelemetry.io/proto/otlp v1.0.0 // indirect
golang.org/x/crypto v0.17.0 // indirect golang.org/x/crypto v0.17.0 // indirect
golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1 // indirect golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1 // indirect

15
util/confutil/exp.go Normal file
View File

@ -0,0 +1,15 @@
package confutil
import (
"os"
"strconv"
)
// IsExperimental checks if the experimental flag has been configured.
func IsExperimental() bool {
if v, ok := os.LookupEnv("BUILDX_EXPERIMENTAL"); ok {
vv, _ := strconv.ParseBool(v)
return vv
}
return false
}

View File

@ -1,4 +1,4 @@
package metrics package metricutil
import ( import (
"context" "context"
@ -7,8 +7,9 @@ import (
"path" "path"
"time" "time"
"github.com/docker/buildx/util/confutil"
"github.com/docker/buildx/version"
"github.com/docker/cli/cli/command" "github.com/docker/cli/cli/command"
"github.com/moby/buildkit/util/tracing/detect"
"github.com/pkg/errors" "github.com/pkg/errors"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
"go.opentelemetry.io/otel/metric" "go.opentelemetry.io/otel/metric"
@ -20,75 +21,86 @@ import (
const ( const (
otelConfigFieldName = "otel" otelConfigFieldName = "otel"
shutdownTimeout = 2 * time.Second reportTimeout = 2 * time.Second
) )
// ReportFunc is invoked to signal the metrics should be sent to the // MeterProvider holds a MeterProvider for metric generation and the configured
// desired endpoint. It should be invoked on application shutdown. // exporters for reporting metrics from the CLI.
type ReportFunc func() type MeterProvider struct {
metric.MeterProvider
reader *sdkmetric.ManualReader
exporters []sdkmetric.Exporter
}
// MeterProvider returns a MeterProvider suitable for CLI usage. // NewMeterProvider configures a MeterProvider from the CLI context.
// The primary difference between this metric reader and a more typical func NewMeterProvider(ctx context.Context, cli command.Cli) (*MeterProvider, error) {
// usage is that metric reporting only happens once when ReportFunc
// is invoked.
func MeterProvider(cli command.Cli) (metric.MeterProvider, ReportFunc, error) {
var exps []sdkmetric.Exporter var exps []sdkmetric.Exporter
if exp, err := dockerOtelExporter(cli); err != nil { // Only metric exporters if the experimental flag is set.
return nil, nil, err if confutil.IsExperimental() {
} else if exp != nil { if exp, err := dockerOtelExporter(cli); err != nil {
exps = append(exps, exp) return nil, err
} } else if exp != nil {
exps = append(exps, exp)
}
if exp, err := detectOtlpExporter(context.Background()); err != nil { if exp, err := detectOtlpExporter(ctx); err != nil {
return nil, nil, err return nil, err
} else if exp != nil { } else if exp != nil {
exps = append(exps, exp) exps = append(exps, exp)
}
} }
if len(exps) == 0 { if len(exps) == 0 {
// No exporters are configured so use a noop provider. // No exporters are configured so use a noop provider.
return noop.NewMeterProvider(), func() {}, nil return &MeterProvider{
MeterProvider: noop.NewMeterProvider(),
}, nil
} }
// Use delta temporality because, since this is a CLI program, we can never
// know the cumulative value.
reader := sdkmetric.NewManualReader( reader := sdkmetric.NewManualReader(
sdkmetric.WithTemporalitySelector(deltaTemporality), sdkmetric.WithTemporalitySelector(deltaTemporality),
) )
mp := sdkmetric.NewMeterProvider( mp := sdkmetric.NewMeterProvider(
sdkmetric.WithResource(detect.Resource()), sdkmetric.WithResource(Resource()),
sdkmetric.WithReader(reader), sdkmetric.WithReader(reader),
) )
return mp, reportFunc(reader, exps), nil return &MeterProvider{
MeterProvider: mp,
reader: reader,
exporters: exps,
}, nil
} }
// reportFunc returns a ReportFunc for collecting ResourceMetrics and then // Report exports metrics to the configured exporter. This should be done before the CLI
// exporting them to the configured Exporter. // exits.
func reportFunc(reader sdkmetric.Reader, exps []sdkmetric.Exporter) ReportFunc { func (m *MeterProvider) Report(ctx context.Context) {
return func() { if m.reader == nil {
ctx, cancel := context.WithTimeout(context.Background(), shutdownTimeout) // Not configured.
defer cancel() return
var rm metricdata.ResourceMetrics
if err := reader.Collect(ctx, &rm); err != nil {
// Error when collecting metrics. Do not send any.
return
}
var eg errgroup.Group
for _, exp := range exps {
exp := exp
eg.Go(func() error {
_ = exp.Export(ctx, &rm)
_ = exp.Shutdown(ctx)
return nil
})
}
// Can't report an error because we don't allow it to.
_ = eg.Wait()
} }
ctx, cancel := context.WithTimeout(ctx, reportTimeout)
defer cancel()
var rm metricdata.ResourceMetrics
if err := m.reader.Collect(ctx, &rm); err != nil {
// Error when collecting metrics. Do not send any.
return
}
var eg errgroup.Group
for _, exp := range m.exporters {
exp := exp
eg.Go(func() error {
_ = exp.Export(ctx, &rm)
_ = exp.Shutdown(ctx)
return nil
})
}
// Can't report an error because we don't allow it to.
_ = eg.Wait()
} }
// dockerOtelExporter reads the CLI metadata to determine an OTLP exporter // dockerOtelExporter reads the CLI metadata to determine an OTLP exporter
@ -184,6 +196,20 @@ func otelExporterOtlpEndpoint(cli command.Cli) (string, error) {
} }
// deltaTemporality sets the Temporality of every instrument to delta. // deltaTemporality sets the Temporality of every instrument to delta.
//
// This isn't really needed since we create a unique resource on each invocation,
// but it can help with cardinality concerns for downstream processors since they can
// perform aggregation for a time interval and then discard the data once that time
// period has passed. Cumulative temporality would imply to the downstream processor
// that they might receive a successive point and they may unnecessarily keep state
// they really shouldn't.
func deltaTemporality(_ sdkmetric.InstrumentKind) metricdata.Temporality { func deltaTemporality(_ sdkmetric.InstrumentKind) metricdata.Temporality {
return metricdata.DeltaTemporality return metricdata.DeltaTemporality
} }
// Meter returns a Meter from the MetricProvider that indicates the measurement
// comes from buildx with the appropriate version.
func Meter(mp metric.MeterProvider) metric.Meter {
return mp.Meter(version.Package,
metric.WithInstrumentationVersion(version.Version))
}

View File

@ -1,4 +1,4 @@
package metrics package metricutil
import ( import (
"context" "context"
@ -35,13 +35,9 @@ func detectOtlpExporter(ctx context.Context) (sdkmetric.Exporter, error) {
switch proto { switch proto {
case "grpc": case "grpc":
return otlpmetricgrpc.New(ctx, return otlpmetricgrpc.New(ctx)
otlpmetricgrpc.WithTemporalitySelector(deltaTemporality),
)
case "http/protobuf": case "http/protobuf":
return otlpmetrichttp.New(ctx, return otlpmetrichttp.New(ctx)
otlpmetrichttp.WithTemporalitySelector(deltaTemporality),
)
// case "http/json": // unsupported by library // case "http/json": // unsupported by library
default: default:
return nil, errors.Errorf("unsupported otlp protocol %v", proto) return nil, errors.Errorf("unsupported otlp protocol %v", proto)

View File

@ -0,0 +1,53 @@
package metricutil
import (
"context"
"os"
"path/filepath"
"sync"
"github.com/google/uuid"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/sdk/resource"
semconv "go.opentelemetry.io/otel/semconv/v1.21.0"
)
var (
res *resource.Resource
resOnce sync.Once
)
// Resource retrieves the OTEL resource for the buildx CLI.
func Resource() *resource.Resource {
resOnce.Do(func() {
var err error
res, err = resource.New(context.Background(),
resource.WithDetectors(serviceNameDetector{}),
resource.WithAttributes(
// Use a unique instance id so OTEL knows that each invocation
// of the CLI is its own instance. Without this, downstream
// OTEL processors may think the same process is restarting
// continuously and reset the metric counters.
semconv.ServiceInstanceID(uuid.New().String()),
),
resource.WithFromEnv(),
resource.WithTelemetrySDK(),
)
if err != nil {
otel.Handle(err)
}
})
return res
}
type serviceNameDetector struct{}
func (serviceNameDetector) Detect(ctx context.Context) (*resource.Resource, error) {
return resource.StringDetector(
semconv.SchemaURL,
semconv.ServiceNameKey,
func() (string, error) {
return filepath.Base(os.Args[0]), nil
},
).Detect(ctx)
}