Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,10 @@ AZURE_TENANT_ID=
AZURE_SUBSCRIPTION_ID=
AZURE_STORAGE_CONNECTION_STRING=

# OpenTelemetry trace export (OTLP/HTTP). Point this at an OTLP collector to
# enable distributed tracing; leave empty to disable. e.g. http://localhost:4318
OTEL_EXPORTER_OTLP_ENDPOINT=

FIXER_BASE_URL=
FIXER_API_KEY=

Expand Down
2,636 changes: 1,780 additions & 856 deletions package-lock.json

Large diffs are not rendered by default.

6 changes: 5 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@
"@nestjs/typeorm": "^9.0.1",
"@nestjs/websockets": "^9.4.3",
"@noble/curves": "^1.9.7",
"@opentelemetry/api": "^1.9.1",
"@opentelemetry/auto-instrumentations-node": "^0.76.0",
"@opentelemetry/exporter-trace-otlp-http": "^0.218.0",
"@opentelemetry/sdk-node": "^0.218.0",
"@opentelemetry/sdk-trace-base": "^2.7.1",
"@railgun-community/engine": "^9.4.0",
"@scure/bip32": "^1.6.2",
"@scure/bip39": "^1.5.4",
Expand All @@ -72,7 +77,6 @@
"@uniswap/v3-sdk": "^3.25.2",
"@zano-project/zano-utils-js": "^0.0.3",
"alchemy-sdk": "^3.6.5",
"applicationinsights": "^2.9.8",
"arweave": "^1.15.7",
"axios": "^1.15.0",
"basic-ftp": "^5.0.5",
Expand Down
91 changes: 91 additions & 0 deletions src/__tests__/tracing.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
const mockStart = jest.fn();

jest.mock('@opentelemetry/sdk-node', () => ({
NodeSDK: jest.fn().mockImplementation(() => ({ start: mockStart })),
}));
jest.mock('@opentelemetry/auto-instrumentations-node', () => ({
getNodeAutoInstrumentations: jest.fn(() => []),
}));
jest.mock('@opentelemetry/exporter-trace-otlp-http', () => ({
OTLPTraceExporter: jest.fn(),
}));

import { SpanKind, SpanStatusCode } from '@opentelemetry/api';
import { ReadableSpan } from '@opentelemetry/sdk-trace-base';
import { ClientErrorSpanProcessor, isClientError, startTracing } from '../tracing';

function fakeSpan(kind: SpanKind, statusCode: SpanStatusCode, httpStatus?: number): ReadableSpan {
return {
kind,
status: { code: statusCode },
attributes: httpStatus == null ? {} : { 'http.response.status_code': httpStatus },
} as unknown as ReadableSpan;
}

describe('isClientError', () => {
it.each([
[400, true],
[404, true],
[499, true],
[500, false],
[503, false],
[399, false],
[200, false],
])('classifies %i as client error = %s', (code, expected) => {
expect(isClientError(code)).toBe(expected);
});

it('treats a missing status code as not a client error', () => {
expect(isClientError(undefined)).toBe(false);
});
});

describe('ClientErrorSpanProcessor', () => {
const processor = new ClientErrorSpanProcessor();

it('resets a 4xx SERVER span that was marked ERROR back to UNSET', () => {
const span = fakeSpan(SpanKind.SERVER, SpanStatusCode.ERROR, 404);
processor.onEnd(span);
expect(span.status.code).toBe(SpanStatusCode.UNSET);
});

it('keeps 5xx SERVER spans as ERROR', () => {
const span = fakeSpan(SpanKind.SERVER, SpanStatusCode.ERROR, 500);
processor.onEnd(span);
expect(span.status.code).toBe(SpanStatusCode.ERROR);
});

it('does not touch CLIENT spans (outbound dependency 4xx stay visible)', () => {
const span = fakeSpan(SpanKind.CLIENT, SpanStatusCode.ERROR, 404);
processor.onEnd(span);
expect(span.status.code).toBe(SpanStatusCode.ERROR);
});

it('leaves an already non-error 4xx span untouched', () => {
const span = fakeSpan(SpanKind.SERVER, SpanStatusCode.UNSET, 404);
processor.onEnd(span);
expect(span.status.code).toBe(SpanStatusCode.UNSET);
});
});

describe('startTracing', () => {
const original = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;

afterEach(() => {
mockStart.mockClear();
if (original === undefined) delete process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
else process.env.OTEL_EXPORTER_OTLP_ENDPOINT = original;
});

it('is disabled (returns undefined) without OTEL_EXPORTER_OTLP_ENDPOINT', () => {
delete process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
expect(startTracing()).toBeUndefined();
});

it('starts the SDK when an endpoint is configured', () => {
process.env.OTEL_EXPORTER_OTLP_ENDPOINT = 'http://localhost:4318';
const sdk = startTracing();
expect(sdk).toBeDefined();
expect(mockStart).toHaveBeenCalledTimes(1);
});
});
25 changes: 6 additions & 19 deletions src/main.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
// Must be imported first: starts the OpenTelemetry SDK before any instrumented
// library is loaded (see src/tracing.ts).
import './tracing';
import { ValidationPipe, VersioningType } from '@nestjs/common';
import { NestFactory } from '@nestjs/core';
import { WsAdapter } from '@nestjs/platform-ws';
import { DocumentBuilder, SwaggerModule } from '@nestjs/swagger';
import * as AppInsights from 'applicationinsights';
import { spawnSync } from 'child_process';
import { useContainer } from 'class-validator';
import cors from 'cors';
Expand Down Expand Up @@ -40,24 +42,9 @@ process.on('uncaughtException', (error) => {
});

async function bootstrap() {
if (process.env.APPINSIGHTS_INSTRUMENTATIONKEY) {
AppInsights.setup().setAutoDependencyCorrelation(true).setAutoCollectConsole(true, true);
AppInsights.defaultClient.context.tags[AppInsights.defaultClient.context.keys.cloudRole] = 'dfx-api';

// Don't mark 4xx client errors as failures - only 5xx are real server errors
AppInsights.defaultClient.addTelemetryProcessor((envelope) => {
const data = envelope.data as { baseType?: string; baseData?: { responseCode?: string; success?: boolean } };
if (data.baseType === 'RequestData' && data.baseData?.responseCode) {
const responseCode = parseInt(data.baseData.responseCode, 10);
if (responseCode >= 400 && responseCode < 500) {
data.baseData.success = true;
}
}
return true;
});

AppInsights.start();
}
// Observability is initialized in src/tracing.ts (imported above): the
// OpenTelemetry SDK auto-instruments HTTP/DB/NestJS and exports traces via
// OTLP. 4xx-not-a-failure is handled there in the HTTP response hook.

const app = await NestFactory.create(AppModule, { bodyParser: false });

Expand Down
68 changes: 68 additions & 0 deletions src/shared/services/__tests__/dfx-logger.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import { Logger } from '@nestjs/common';
import { Span, SpanStatusCode, trace } from '@opentelemetry/api';
import { DfxLogger } from '../dfx-logger';

function fakeSpan(traceId = 'a'.repeat(32), spanId = 'b'.repeat(16)): jest.Mocked<Span> {
return {
recordException: jest.fn(),
addEvent: jest.fn(),
setStatus: jest.fn(),
spanContext: () => ({ traceId, spanId, traceFlags: 1 }),
} as unknown as jest.Mocked<Span>;
}

describe('DfxLogger', () => {
afterEach(() => jest.restoreAllMocks());

describe('without an active span', () => {
beforeEach(() => jest.spyOn(trace, 'getActiveSpan').mockReturnValue(undefined));

it('never throws and does not append a trace id', () => {
const logSpy = jest.spyOn(Logger.prototype, 'log').mockImplementation();
const errSpy = jest.spyOn(Logger.prototype, 'error').mockImplementation();

const logger = new DfxLogger('Test');
expect(() => logger.error('boom', new Error('x'))).not.toThrow();
expect(() => logger.info('hi')).not.toThrow();

expect(logSpy).toHaveBeenCalledWith('hi');
expect(errSpy).toHaveBeenCalledWith(expect.not.stringContaining('trace_id='));
});
});

describe('with an active span', () => {
it('records the exception and marks the span as error on error()', () => {
const span = fakeSpan();
jest.spyOn(trace, 'getActiveSpan').mockReturnValue(span);
jest.spyOn(Logger.prototype, 'error').mockImplementation();

const error = new Error('db down');
new DfxLogger('Test').error('failure', error);

expect(span.recordException).toHaveBeenCalledWith(error);
expect(span.setStatus).toHaveBeenCalledWith({ code: SpanStatusCode.ERROR, message: 'failure' });
});

it('adds an event but does not mark an error status on warn()', () => {
const span = fakeSpan();
jest.spyOn(trace, 'getActiveSpan').mockReturnValue(span);
jest.spyOn(Logger.prototype, 'warn').mockImplementation();

new DfxLogger('Test').warn('be careful');

expect(span.addEvent).toHaveBeenCalledWith('be careful');
expect(span.setStatus).not.toHaveBeenCalled();
});

it('appends the trace id to the log line so Loki can link to Tempo', () => {
const traceId = 'c'.repeat(32);
const span = fakeSpan(traceId);
jest.spyOn(trace, 'getActiveSpan').mockReturnValue(span);
const logSpy = jest.spyOn(Logger.prototype, 'log').mockImplementation();

new DfxLogger('Test').info('processing');

expect(logSpy).toHaveBeenCalledWith(expect.stringContaining(`trace_id=${traceId}`));
});
});
});
42 changes: 28 additions & 14 deletions src/shared/services/dfx-logger.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import { Logger } from '@nestjs/common';
import { TelemetryClient } from 'applicationinsights';
import { SeverityLevel } from 'applicationinsights/out/Declarations/Contracts';
import * as AppInsights from 'applicationinsights';
import { isSpanContextValid, SpanStatusCode, trace } from '@opentelemetry/api';

export enum LogLevel {
CRITICAL = 'Critical',
Expand Down Expand Up @@ -45,42 +43,58 @@ export class DfxLogger {
}

critical(message: string, error?: Error) {
this.trace(SeverityLevel.Critical, message, error);
this.recordOnSpan(message, error, true);
this.logger.error(this.format(message, error));
}

error(message: string, error?: Error) {
this.trace(SeverityLevel.Error, message, error);
this.recordOnSpan(message, error, true);
this.logger.error(this.format(message, error));
}

warn(message: string, error?: Error) {
this.trace(SeverityLevel.Warning, message, error);
this.recordOnSpan(message, error, false);
this.logger.warn(this.format(message, error));
}

info(message: string, error?: Error) {
this.trace(SeverityLevel.Information, message, error);
this.logger.log(this.format(message, error));
}

verbose(message: string, error?: Error) {
this.trace(SeverityLevel.Verbose, message, error);
this.logger.verbose(this.format(message, error));
}

// --- HELPER METHODS --- //

private trace(severity: SeverityLevel, message: string, error?: Error) {
const trace = (this.context ? `[${this.context}] ` : '') + this.format(message, error);
this.client?.trackTrace({ severity, message: trace });
// Attach the log to the active trace span so warnings/errors surface on the
// request's trace and exceptions are correlated to it — the OpenTelemetry
// equivalent of App Insights' exception/trace tracking. No-op when there is
// no active span (background jobs, startup).
private recordOnSpan(message: string, error: Error | undefined, isError: boolean) {
const span = trace.getActiveSpan();
if (!span) return;

if (error) {
span.recordException(error);
} else {
span.addEvent(message);
}

if (isError) span.setStatus({ code: SpanStatusCode.ERROR, message });
}

private format(message: string, error?: Error): string {
return message + (error ? ` ${error?.stack}` : '');
return message + (error ? ` ${error?.stack}` : '') + this.traceContext();
}

private get client(): TelemetryClient | undefined {
return AppInsights.defaultClient;
// Append the active trace id so Grafana can link a Loki log line straight to
// its Tempo trace (the Loki data source matches `trace_id=<hex>`).
private traceContext(): string {
const span = trace.getActiveSpan();
if (!span) return '';

const ctx = span.spanContext();
return isSpanContextValid(ctx) ? ` trace_id=${ctx.traceId} span_id=${ctx.spanId}` : '';
}
}
Loading