OpenTelemetry Runtime Metrics

Most OpenTelemetry SDK (e.g., Go, Java, Python SDK) supports automatically collecting and exporting runtime metrics (e.g., CPU, memory, threads) as OpenTelemetry metrics. If used in conjunction with APM, Kloudfuse shows the runtime metrics of each APM service (shown in the Runtime tab of the corresponding service detail) .

Screenshot 2024-07-07 at 11.24.15 PM.png

Node.js

Currently the OpenTelemetry Node.js SDK does not natively support collecting and exposing runtime metrics. Using Node.js prom-client https://github.com/siimon/prom-client library, runtime metrics can also be integrated with the Kloudfuse APM Service detail page. Refer to the documentation of prom-client regarding enabling default metrics.

 

TypeScript Example


Below shows an example of how to use the prom-client in conjunction with Node.js OpenTelemetry SDK. In the example below, a RunTimeInstrumentation class is defined that instantiates and collects the runtime metrics using prom-client. The RunTimeInstrumentation class can be added as part of the OpenTelemetry NodeSDK list of instrumentations

 

... import { MetricReader, PeriodicExportingMetricReader, PushMetricExporter, AggregationTemporality, } from '@opentelemetry/sdk-metrics' import { RunTimeInstrumentation } from './opentelemetry/runTimeMetric.class' ... const metricsExporter: PushMetricExporter = new OTLPMetricExporter({ url: process.env.OTEL_EXPORTER_OTLP_METRICS_ENDPOINT, headers: { service: process.env.OTEL_SERVICE_NAME }, keepAlive: true, temporalityPreference: AggregationTemporality.DELTA, }) const metricReader: MetricReader = new PeriodicExportingMetricReader({ exporter: metricsExporter, exportIntervalMillis: 5000, }) const sdk = new NodeSDK({ ... instrumentations: [ ... new RunTimeInstrumentation(), ], metricReader: metricReader, ... }) sdk.start()

runTimeMetric.class.ts

import { BatchObservableResult, Histogram, Observable, ObservableCounter, ObservableGauge, } from '@opentelemetry/api' import * as Prometheus from 'prom-client' import { PerformanceEntry, PerformanceObserver, constants } from 'perf_hooks' import { InstrumentationBase } from '@opentelemetry/instrumentation'; import type { InstrumentationConfig } from '@opentelemetry/instrumentation'; const NODEJS_GC_DURATION_SECONDS = 'nodejs_gc_duration_seconds' export class RunTimeInstrumentation extends InstrumentationBase { static instance: RunTimeInstrumentation registry: Prometheus.Registry private metricMap: Map<string, Observable> private enabled: boolean constructor(config: InstrumentationConfig = {}) { super('@opentelemetry/instrumentation-node-run-time', '1.0', config); } init() { // Not instrumenting or patching a Node.js module } override _updateMetricInstruments() { this.metricMap = new Map<string, Observable>() this.registry = new Prometheus.Registry() this.registry.setContentType( // eslint-disable-next-line @typescript-eslint/ban-ts-comment // @ts-ignore Prometheus.openMetricsContentType, ) Prometheus.collectDefaultMetrics({ register: this.registry }) this.registry.removeSingleMetric(NODEJS_GC_DURATION_SECONDS) this.createOtelObservers() } override enable() { this.enabled = true } override disable() { this.enabled = false } private createOtelObservers() { const metrics: Prometheus.MetricObject[] = this.registry.getMetricsAsArray() for (const metric of metrics) { switch (metric?.type?.toString()) { case 'counter': this.handleCounter(metric) break case 'gauge': this.handleGuage(metric) break default: // eslint-disable-next-line no-console console.log(`Not supported name: ${metric.name} type: ${metric?.type?.toString()}`) } } this.collectGC() this.meter.addBatchObservableCallback( async (observableResult: BatchObservableResult) => { await this.batchObservableCallback(observableResult) }, [...this.metricMap.values()], ) } async batchObservableCallback(observableResult: BatchObservableResult) { if (!this.enabled) { return } const metrics: Prometheus.MetricObjectWithValues<Prometheus.MetricValue<string>>[] = await this.registry.getMetricsAsJSON() this.registry.resetMetrics() for (const [metricName, observableMetric] of this.metricMap.entries()) { const metric: Prometheus.MetricObjectWithValues<Prometheus.MetricValue<string>> = metrics.find( (metric) => metric.name === metricName, ) for (const metricValue of metric.values || []) { const { value, labels = {} } = metricValue observableResult.observe(observableMetric, value, labels) } } } handleCounter(metric: Prometheus.MetricObject) { const counter: ObservableCounter = this.meter.createObservableCounter(this.getMetricName(metric.name), { description: metric.help, }) this.metricMap.set(metric.name, counter) } handleGuage(metric: Prometheus.MetricObject) { const gauge: ObservableGauge = this.meter.createObservableGauge(this.getMetricName(metric.name), { description: metric.help, }) this.metricMap.set(metric.name, gauge) } collectGC() { const histogram: Histogram = this.meter.createHistogram(NODEJS_GC_DURATION_SECONDS, { description: 'Garbage collection duration by kind, one of major, minor, incremental or weakcb.', }) const labels = {} const kinds = { [constants.NODE_PERFORMANCE_GC_MAJOR]: { ...labels, kind: 'major' }, [constants.NODE_PERFORMANCE_GC_MINOR]: { ...labels, kind: 'minor' }, [constants.NODE_PERFORMANCE_GC_INCREMENTAL]: { ...labels, kind: 'incremental' }, [constants.NODE_PERFORMANCE_GC_WEAKCB]: { ...labels, kind: 'weakcb' }, } const obs = new PerformanceObserver((list) => { if (!this.enabled) { return } const entry: PerformanceEntry = list.getEntries()[0] // eslint-disable-next-line @typescript-eslint/ban-ts-comment // @ts-ignore const kind: number = entry.detail ? entry.detail.kind : entry.kind // Convert duration from milliseconds to seconds histogram.record(entry.duration / 1000, kinds[kind]) }) obs.observe({ entryTypes: ['gc'] }) } private getMetricName(metricName: string) { if (metricName.startsWith('nodejs_')) { return metricName } return `nodejs_${metricName}` } }

 

JavaScript Example

 

Below shows an example of how to use the prom-client in conjunction with Node.js OpenTelemetry SDK. In the example below, a RunTimeInstrumentation class is defined that instantiates and collects the runtime metrics using prom-client. The RunTimeInstrumentation class can be added as part of the OpenTelemetry NodeSDK list of instrumentations

 

const { RunTimeInstrumentation } = require('./runTimeMetric.class'); const {PeriodicExportingMetricReader} = require('@opentelemetry/sdk-metrics'); const sdk = new opentelemetry.NodeSDK({ ... instrumentations: [ ... new RunTimeInstrumentation() ], metricReader: new PeriodicExportingMetricReader({ exporter: new OTLPMetricExporter(), }), ... });

runTimeMetric.class.js