diff --git a/commands/bake.go b/commands/bake.go index e17a8ebc..565a72b5 100644 --- a/commands/bake.go +++ b/commands/bake.go @@ -19,7 +19,6 @@ import ( "github.com/docker/buildx/util/confutil" "github.com/docker/buildx/util/desktop" "github.com/docker/buildx/util/dockerutil" - "github.com/docker/buildx/util/metrics" "github.com/docker/buildx/util/progress" "github.com/docker/buildx/util/tracing" "github.com/docker/cli/cli/command" @@ -43,14 +42,6 @@ type bakeOptions struct { } func runBake(ctx context.Context, dockerCli command.Cli, targets []string, in bakeOptions, cFlags commonFlags) (err error) { - mp, report, err := metrics.MeterProvider(dockerCli) - if err != nil { - return err - } - defer report() - - recordVersionInfo(mp, "bake") - ctx, end, err := tracing.TraceCurrentCommand(ctx, "bake") if err != nil { return err diff --git a/commands/build.go b/commands/build.go index e142befd..843c6a07 100644 --- a/commands/build.go +++ b/commands/build.go @@ -30,10 +30,9 @@ import ( "github.com/docker/buildx/util/cobrautil" "github.com/docker/buildx/util/desktop" "github.com/docker/buildx/util/ioset" - "github.com/docker/buildx/util/metrics" + "github.com/docker/buildx/util/metricutil" "github.com/docker/buildx/util/progress" "github.com/docker/buildx/util/tracing" - "github.com/docker/buildx/version" "github.com/docker/cli-docs-tool/annotation" "github.com/docker/cli/cli" "github.com/docker/cli/cli/command" @@ -53,9 +52,6 @@ import ( "github.com/sirupsen/logrus" "github.com/spf13/cobra" "github.com/spf13/pflag" - "go.opentelemetry.io/otel" - "go.opentelemetry.io/otel/attribute" - "go.opentelemetry.io/otel/metric" "google.golang.org/grpc/codes" ) @@ -216,13 +212,11 @@ func (o *buildOptions) toDisplayMode() (progressui.DisplayMode, error) { } func runBuild(ctx context.Context, dockerCli command.Cli, options buildOptions) (err error) { - mp, report, err := metrics.MeterProvider(dockerCli) + mp, err := metricutil.NewMeterProvider(ctx, dockerCli) if err != nil { return err } - defer report() - - recordVersionInfo(mp, "build") + defer mp.Report(context.Background()) ctx, end, err := tracing.TraceCurrentCommand(ctx, "build") if err != nil { @@ -936,30 +930,3 @@ func maybeJSONArray(v string) []string { } return []string{v} } - -func recordVersionInfo(mp metric.MeterProvider, command string) { - // Still in the process of testing/stabilizing these counters. - if !isExperimental() { - return - } - - meter := mp.Meter("github.com/docker/buildx", - metric.WithInstrumentationVersion(version.Version), - ) - - counter, err := meter.Int64Counter("docker.cli.count", - metric.WithDescription("Number of invocations of the docker buildx command."), - ) - if err != nil { - otel.Handle(err) - } - - counter.Add(context.Background(), 1, - metric.WithAttributes( - attribute.String("command", command), - attribute.String("package", version.Package), - attribute.String("version", version.Version), - attribute.String("revision", version.Revision), - ), - ) -} diff --git a/go.mod b/go.mod index 19049f90..98f14fed 100644 --- a/go.mod +++ b/go.mod @@ -42,6 +42,7 @@ require ( go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.42.0 go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v0.42.0 go.opentelemetry.io/otel/metric v1.19.0 + go.opentelemetry.io/otel/sdk v1.19.0 go.opentelemetry.io/otel/sdk/metric v1.19.0 go.opentelemetry.io/otel/trace v1.19.0 golang.org/x/mod v0.13.0 @@ -147,7 +148,6 @@ require ( go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.19.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.19.0 // indirect go.opentelemetry.io/otel/exporters/prometheus v0.42.0 // indirect - go.opentelemetry.io/otel/sdk v1.19.0 // indirect go.opentelemetry.io/proto/otlp v1.0.0 // indirect golang.org/x/crypto v0.17.0 // indirect golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1 // indirect diff --git a/util/confutil/exp.go b/util/confutil/exp.go new file mode 100644 index 00000000..03c6b97a --- /dev/null +++ b/util/confutil/exp.go @@ -0,0 +1,15 @@ +package confutil + +import ( + "os" + "strconv" +) + +// IsExperimental checks if the experimental flag has been configured. +func IsExperimental() bool { + if v, ok := os.LookupEnv("BUILDX_EXPERIMENTAL"); ok { + vv, _ := strconv.ParseBool(v) + return vv + } + return false +} diff --git a/util/metrics/metrics.go b/util/metricutil/metric.go similarity index 60% rename from util/metrics/metrics.go rename to util/metricutil/metric.go index 36e1cc62..ee8cbaa2 100644 --- a/util/metrics/metrics.go +++ b/util/metricutil/metric.go @@ -1,4 +1,4 @@ -package metrics +package metricutil import ( "context" @@ -7,8 +7,9 @@ import ( "path" "time" + "github.com/docker/buildx/util/confutil" + "github.com/docker/buildx/version" "github.com/docker/cli/cli/command" - "github.com/moby/buildkit/util/tracing/detect" "github.com/pkg/errors" "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" "go.opentelemetry.io/otel/metric" @@ -20,75 +21,86 @@ import ( const ( otelConfigFieldName = "otel" - shutdownTimeout = 2 * time.Second + reportTimeout = 2 * time.Second ) -// ReportFunc is invoked to signal the metrics should be sent to the -// desired endpoint. It should be invoked on application shutdown. -type ReportFunc func() +// MeterProvider holds a MeterProvider for metric generation and the configured +// exporters for reporting metrics from the CLI. +type MeterProvider struct { + metric.MeterProvider + reader *sdkmetric.ManualReader + exporters []sdkmetric.Exporter +} -// MeterProvider returns a MeterProvider suitable for CLI usage. -// The primary difference between this metric reader and a more typical -// usage is that metric reporting only happens once when ReportFunc -// is invoked. -func MeterProvider(cli command.Cli) (metric.MeterProvider, ReportFunc, error) { +// NewMeterProvider configures a MeterProvider from the CLI context. +func NewMeterProvider(ctx context.Context, cli command.Cli) (*MeterProvider, error) { var exps []sdkmetric.Exporter - if exp, err := dockerOtelExporter(cli); err != nil { - return nil, nil, err - } else if exp != nil { - exps = append(exps, exp) - } + // Only metric exporters if the experimental flag is set. + if confutil.IsExperimental() { + if exp, err := dockerOtelExporter(cli); err != nil { + return nil, err + } else if exp != nil { + exps = append(exps, exp) + } - if exp, err := detectOtlpExporter(context.Background()); err != nil { - return nil, nil, err - } else if exp != nil { - exps = append(exps, exp) + if exp, err := detectOtlpExporter(ctx); err != nil { + return nil, err + } else if exp != nil { + exps = append(exps, exp) + } } if len(exps) == 0 { // No exporters are configured so use a noop provider. - return noop.NewMeterProvider(), func() {}, nil + return &MeterProvider{ + MeterProvider: noop.NewMeterProvider(), + }, nil } - // Use delta temporality because, since this is a CLI program, we can never - // know the cumulative value. reader := sdkmetric.NewManualReader( sdkmetric.WithTemporalitySelector(deltaTemporality), ) mp := sdkmetric.NewMeterProvider( - sdkmetric.WithResource(detect.Resource()), + sdkmetric.WithResource(Resource()), sdkmetric.WithReader(reader), ) - return mp, reportFunc(reader, exps), nil + return &MeterProvider{ + MeterProvider: mp, + reader: reader, + exporters: exps, + }, nil } -// reportFunc returns a ReportFunc for collecting ResourceMetrics and then -// exporting them to the configured Exporter. -func reportFunc(reader sdkmetric.Reader, exps []sdkmetric.Exporter) ReportFunc { - return func() { - ctx, cancel := context.WithTimeout(context.Background(), shutdownTimeout) - defer cancel() - - var rm metricdata.ResourceMetrics - if err := reader.Collect(ctx, &rm); err != nil { - // Error when collecting metrics. Do not send any. - return - } - - var eg errgroup.Group - for _, exp := range exps { - exp := exp - eg.Go(func() error { - _ = exp.Export(ctx, &rm) - _ = exp.Shutdown(ctx) - return nil - }) - } - - // Can't report an error because we don't allow it to. - _ = eg.Wait() +// Report exports metrics to the configured exporter. This should be done before the CLI +// exits. +func (m *MeterProvider) Report(ctx context.Context) { + if m.reader == nil { + // Not configured. + return } + + ctx, cancel := context.WithTimeout(ctx, reportTimeout) + defer cancel() + + var rm metricdata.ResourceMetrics + if err := m.reader.Collect(ctx, &rm); err != nil { + // Error when collecting metrics. Do not send any. + return + } + + var eg errgroup.Group + for _, exp := range m.exporters { + exp := exp + eg.Go(func() error { + _ = exp.Export(ctx, &rm) + _ = exp.Shutdown(ctx) + return nil + }) + } + + // Can't report an error because we don't allow it to. + _ = eg.Wait() } // dockerOtelExporter reads the CLI metadata to determine an OTLP exporter @@ -184,6 +196,20 @@ func otelExporterOtlpEndpoint(cli command.Cli) (string, error) { } // deltaTemporality sets the Temporality of every instrument to delta. +// +// This isn't really needed since we create a unique resource on each invocation, +// but it can help with cardinality concerns for downstream processors since they can +// perform aggregation for a time interval and then discard the data once that time +// period has passed. Cumulative temporality would imply to the downstream processor +// that they might receive a successive point and they may unnecessarily keep state +// they really shouldn't. func deltaTemporality(_ sdkmetric.InstrumentKind) metricdata.Temporality { return metricdata.DeltaTemporality } + +// Meter returns a Meter from the MetricProvider that indicates the measurement +// comes from buildx with the appropriate version. +func Meter(mp metric.MeterProvider) metric.Meter { + return mp.Meter(version.Package, + metric.WithInstrumentationVersion(version.Version)) +} diff --git a/util/metrics/otlp.go b/util/metricutil/otlp.go similarity index 86% rename from util/metrics/otlp.go rename to util/metricutil/otlp.go index b121ac3c..c07d244e 100644 --- a/util/metrics/otlp.go +++ b/util/metricutil/otlp.go @@ -1,4 +1,4 @@ -package metrics +package metricutil import ( "context" @@ -35,13 +35,9 @@ func detectOtlpExporter(ctx context.Context) (sdkmetric.Exporter, error) { switch proto { case "grpc": - return otlpmetricgrpc.New(ctx, - otlpmetricgrpc.WithTemporalitySelector(deltaTemporality), - ) + return otlpmetricgrpc.New(ctx) case "http/protobuf": - return otlpmetrichttp.New(ctx, - otlpmetrichttp.WithTemporalitySelector(deltaTemporality), - ) + return otlpmetrichttp.New(ctx) // case "http/json": // unsupported by library default: return nil, errors.Errorf("unsupported otlp protocol %v", proto) diff --git a/util/metricutil/resource.go b/util/metricutil/resource.go new file mode 100644 index 00000000..65e95683 --- /dev/null +++ b/util/metricutil/resource.go @@ -0,0 +1,53 @@ +package metricutil + +import ( + "context" + "os" + "path/filepath" + "sync" + + "github.com/google/uuid" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/sdk/resource" + semconv "go.opentelemetry.io/otel/semconv/v1.21.0" +) + +var ( + res *resource.Resource + resOnce sync.Once +) + +// Resource retrieves the OTEL resource for the buildx CLI. +func Resource() *resource.Resource { + resOnce.Do(func() { + var err error + res, err = resource.New(context.Background(), + resource.WithDetectors(serviceNameDetector{}), + resource.WithAttributes( + // Use a unique instance id so OTEL knows that each invocation + // of the CLI is its own instance. Without this, downstream + // OTEL processors may think the same process is restarting + // continuously and reset the metric counters. + semconv.ServiceInstanceID(uuid.New().String()), + ), + resource.WithFromEnv(), + resource.WithTelemetrySDK(), + ) + if err != nil { + otel.Handle(err) + } + }) + return res +} + +type serviceNameDetector struct{} + +func (serviceNameDetector) Detect(ctx context.Context) (*resource.Resource, error) { + return resource.StringDetector( + semconv.SchemaURL, + semconv.ServiceNameKey, + func() (string, error) { + return filepath.Base(os.Args[0]), nil + }, + ).Detect(ctx) +}