From 98723a5f867a5874e8ebe2535517673300048414 Mon Sep 17 00:00:00 2001 From: Eric Doughty-Papassideris Date: Sun, 8 Dec 2024 22:49:10 +0100 Subject: [PATCH 01/15] =?UTF-8?q?=F0=9F=9A=A7=20back:=20WIP=20Initial=20di?= =?UTF-8?q?agnostic=20endpoint=20(#762)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../config/custom-environment-variables.json | 8 + tdrive/backend/node/config/default.json | 7 + .../platform/framework/api/diagnostics.ts | 218 ++++++++++++++++++ .../core/platform/services/database/api.ts | 3 +- .../services/database/services/index.ts | 13 ++ .../orm/connectors/abstract-connector.ts | 5 + .../database/services/orm/connectors/index.ts | 3 +- .../orm/connectors/mongodb/mongodb.ts | 22 ++ .../orm/connectors/postgres/postgres.ts | 81 ++++--- .../platform/services/diagnostics/index.ts | 35 +++ .../services/diagnostics/providers/db-ping.ts | 9 + .../services/diagnostics/providers/index.ts | 9 + .../diagnostics/providers/platform-started.ts | 14 ++ .../services/diagnostics/providers/process.ts | 16 ++ .../services/diagnostics/service-provider.ts | 5 + .../platform/services/diagnostics/service.ts | 5 + .../platform/services/diagnostics/utils.ts | 40 ++++ .../services/diagnostics/web/heap-routes.ts | 86 +++++++ .../services/diagnostics/web/index.ts | 11 + .../services/diagnostics/web/probe-routes.ts | 25 ++ .../node/src/services/av/service/index.ts | 4 +- .../node/src/services/global-resolver.ts | 9 +- 22 files changed, 596 insertions(+), 32 deletions(-) create mode 100644 tdrive/backend/node/src/core/platform/framework/api/diagnostics.ts create mode 100644 tdrive/backend/node/src/core/platform/services/diagnostics/index.ts create mode 100644 tdrive/backend/node/src/core/platform/services/diagnostics/providers/db-ping.ts create mode 100644 tdrive/backend/node/src/core/platform/services/diagnostics/providers/index.ts create mode 100644 tdrive/backend/node/src/core/platform/services/diagnostics/providers/platform-started.ts create mode 100644 tdrive/backend/node/src/core/platform/services/diagnostics/providers/process.ts create mode 100644 tdrive/backend/node/src/core/platform/services/diagnostics/service-provider.ts create mode 100644 tdrive/backend/node/src/core/platform/services/diagnostics/service.ts create mode 100644 tdrive/backend/node/src/core/platform/services/diagnostics/utils.ts create mode 100644 tdrive/backend/node/src/core/platform/services/diagnostics/web/heap-routes.ts create mode 100644 tdrive/backend/node/src/core/platform/services/diagnostics/web/index.ts create mode 100644 tdrive/backend/node/src/core/platform/services/diagnostics/web/probe-routes.ts diff --git a/tdrive/backend/node/config/custom-environment-variables.json b/tdrive/backend/node/config/custom-environment-variables.json index 3e337b868..97880a2b6 100644 --- a/tdrive/backend/node/config/custom-environment-variables.json +++ b/tdrive/backend/node/config/custom-environment-variables.json @@ -16,6 +16,14 @@ "logger": { "level": "LOG_LEVEL" }, + "diagnostics": { + "skipKeys": { + "__name": "DIAG_SKIP_KEYS", + "__format": "json" + }, + "probeSecret": "DIAG_PROBE_SECRET", + "secret": "DIAG_SECRET" + }, "webserver": { "host": "TWAKE_DRIVE_HOST", "logger": { diff --git a/tdrive/backend/node/config/default.json b/tdrive/backend/node/config/default.json index 30da3a6f7..d59e76643 100644 --- a/tdrive/backend/node/config/default.json +++ b/tdrive/backend/node/config/default.json @@ -35,6 +35,12 @@ "logger": { "level": "debug" }, + "diagnostics": { + "skipKeys": [], + "probeSecret": "", + "secret": "", + "secretChallengeRefreshS": 15 + }, "tracker": { "type": "segment", "segment": { @@ -226,6 +232,7 @@ }, "services": [ "auth", + "diagnostics", "push", "storage", "webserver", diff --git a/tdrive/backend/node/src/core/platform/framework/api/diagnostics.ts b/tdrive/backend/node/src/core/platform/framework/api/diagnostics.ts new file mode 100644 index 000000000..d26010e15 --- /dev/null +++ b/tdrive/backend/node/src/core/platform/framework/api/diagnostics.ts @@ -0,0 +1,218 @@ +import assert from "node:assert"; +import config from "../../../config"; + +/** + * Values that can match a set of diagnostic providers. + * + * `startup`, `ready` and `live` are meant to match the meanings of the corresponding + * kubernetes probes: + * https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/ + */ +export type TDiagnosticTag = + | "*" // Special value that includes everything, and is always included + | "startup" // Tests that are absolutely required (and light) to even begin the other tests + | "ready" // Tests required before traffic can be sent our way + | "live" // Tests required to prevent from being restarted + | "stats"; // Expensive diagnostics that should not often be ran + +/** Detail requested from platform service self-diagnostics */ +export enum TServiceDiagnosticDepth { + /** Minimal cost information that tests functioning service */ + critical = 0, + /** Statistics that have a little impact enough for regular tracking into a time series */ + tracked_statistics = 1, + /** Statistics possibly expensive and large to calculate, for occasional debug operations */ + deep_statistics = 2, +} + +interface IDiagnosticsConfig { + // Diagnostic keys that should be considered ok without evaluation + skipKeys?: string[]; + // This secret must be provided to the diagnostic endpoints as a query param + // therefor it's likely to leak, through logs etc, for ex. and should not + // relied on for security because disabling diagnostics. At worst this + // provides access to the DB statistics. + probeSecret?: string; + // This secret is required to sign more dangerous diagnostic endpoints, such + // the heap snapshot. It should never be sent over the network. + secret?: string; + // Maximum time to keep the same challenge token for diagnostic endpoints, in seconds. + // Must be large enough to have a reasonable chance of running the token request + // then the action on the same backend instance. + secretChallengeRefreshS: number; +} + +export const getConfig = (): IDiagnosticsConfig => { + let configSection = config.get("diagnostics") as IDiagnosticsConfig; + if (typeof configSection.skipKeys === "string") + configSection = { + ...configSection, + skipKeys: (configSection.skipKeys as string) + .trim() + .split(/[,\s]+/g) + .filter(x => !!x), + }; + if (typeof configSection.secretChallengeRefreshS === "string") + configSection = { + ...configSection, + secretChallengeRefreshS: parseInt(configSection.secretChallengeRefreshS, 10), + }; + return configSection; +}; + +/** Code-wide unique key for each provider */ +export type TDiagnosticKey = string; + +/** Each provider should return an object of this format. The key of the provider defines the schema. */ +export type TDiagnosticResult = { ok: boolean; warn?: string } & { [key: string]: unknown }; + +/** Implemented by objects that want to provide data to the diagnostic check */ +export interface IDiagnosticProvider { + /** Code-wide unique key underwhich the result of `get` will be included */ + key: TDiagnosticKey; + + /** This result is present in any included request tag */ + tags: TDiagnosticTag[] | "*"; + + /** + * If set, this provider will be polled at that interval. + * If `undefined`, this provider will be ran at each request. + */ + pollPeriodMs?: number; + + /** + * Returns an object as presented to a diagnostic requester. + * Warning: this could be public and readable to the internet. + * @param completeButSlow If `true`, perform additional operations for a + * more informative + */ + get(): Promise; +} + +/** + * Platform services that can provide generic diagnostic implementations may use this interface. + * + * Matching from {@link TDiagnosticTag} to {@link TServiceDiagnosticDepth} is expected to + * be done by intermediary providers. This is because not all services are equally critical, + * or have the same tolerable down times. + */ +export interface IServiceDiagnosticProvider { + /** The return format is specific to each service, but should include a `{ok: boolean}` field. */ + getDiagnostics(depth: TServiceDiagnosticDepth): Promise; +} + +const isProviderIncludedInTag = ( + tag: TDiagnosticTag, + provider: IDiagnosticProvider, + config: IDiagnosticsConfig, +) => + (provider.tags === "*" || provider.tags.indexOf(tag) >= 0 || provider.tags.indexOf("*") >= 0) && + (!config.skipKeys?.length || !config.skipKeys.includes(provider.key)); + +// registered providers with `pollPeriodMs === undefined` +const immediateDiagnosticProviders: IDiagnosticProvider[] = []; +// registered providers with `pollPeriodMs !== undefined` +const periodicDiagnosticProviders: IDiagnosticProvider[] = []; + +const now = () => Math.round(process.uptime() * 1000); + +const isKeyAlreadyRegistered = (key: TDiagnosticKey) => + immediateDiagnosticProviders.some(provider => key == provider.key) || + periodicDiagnosticProviders.some(provider => key == provider.key); + +// stores results of all the `pollPeriodMs` truthy providers +const latestPeriodicDiagnostics: { [key: TDiagnosticKey]: object } = {}; +const recordDiagnostic = (startMs: number, key: TDiagnosticKey, data?: object, error?: object) => + (latestPeriodicDiagnostics[key] = { + durationMs: Math.round(now() - startMs), + ...(error ? { ok: false, error } : { ...data }), + }); + +const runProvider = async provider => { + const startMs = now(); + try { + const result = await provider.get(); + if (!result.ok || result.warn) + logger.error( + { provider: provider.key, result }, + "Got diagnostic provider result with ok=false", + ); + else if (result.warn) + logger.warn( + { provider: provider.key, result }, + "Got diagnostic provider result with ok=true but a warning", + ); + return recordDiagnostic(startMs, provider.key, result); + } catch (error) { + return recordDiagnostic(startMs, provider.key, undefined, error); + } +}; + +const pendingTimeouts: number[] = []; // Pending return values from `setTimeout` calls +const forgetPendingTimeout = (timeoutId: number) => { + const index = pendingTimeouts.indexOf(timeoutId); + assert(index >= 0); + pendingTimeouts.splice(index, 1); +}; + +let hasShutdown = false; +const ensureHasntShutdown = () => { + if (hasShutdown) throw new Error("Diagnostics service already shutdown"); +}; + +export default { + /** Add a provider to be included in diagnostics output */ + registerProviders(...providers: IDiagnosticProvider[]) { + ensureHasntShutdown(); + providers.forEach(provider => { + if (isKeyAlreadyRegistered(provider.key)) throw new Error("Provider with duplicate key"); + if (provider.pollPeriodMs) { + periodicDiagnosticProviders.push(provider); + } else { + immediateDiagnosticProviders.push(provider); + return; + } + let triggerUpdate: () => void = () => undefined; // The empty function is for the linter. I love you linter <3 + const updateProvider = (timeoutId: number) => async () => { + forgetPendingTimeout(timeoutId); + await runProvider(provider); + triggerUpdate(); + }; + triggerUpdate = () => pendingTimeouts.push(setTimeout(updateProvider, provider.pollPeriodMs)); + triggerUpdate(); + }); + }, + + /** Cancel all pending diagnostic updates */ + shutdown() { + ensureHasntShutdown(); + pendingTimeouts.forEach(timeout => clearTimeout(timeout)); + hasShutdown = true; + }, + + /** Return the values of all providers which include the provided tag */ + async get( + tag: TDiagnosticTag, + ): Promise<{ ok: boolean } | { [key: TDiagnosticKey]: TDiagnosticResult }> { + const config = getConfig(); + const result = { ok: true }; + let atLeastOneCheck = false; + periodicDiagnosticProviders.forEach(provider => { + if (!isProviderIncludedInTag(tag, provider, config)) return; + atLeastOneCheck = true; + result[provider.key] = latestPeriodicDiagnostics[provider.key]; + if (!result[provider.key].ok) result.ok = false; + }); + await Promise.all( + immediateDiagnosticProviders.map(async provider => { + if (!isProviderIncludedInTag(tag, provider, config)) return; + atLeastOneCheck = true; + const providerResult = await runProvider(provider); + if (!providerResult.ok) result.ok = false; + return (result[provider.key] = providerResult); + }), + ); + if (!atLeastOneCheck) result.ok = false; + return result; + }, +}; diff --git a/tdrive/backend/node/src/core/platform/services/database/api.ts b/tdrive/backend/node/src/core/platform/services/database/api.ts index e9a3ab96c..259fc38f7 100644 --- a/tdrive/backend/node/src/core/platform/services/database/api.ts +++ b/tdrive/backend/node/src/core/platform/services/database/api.ts @@ -1,10 +1,11 @@ import { TdriveServiceProvider } from "../../framework"; +import type { IServiceDiagnosticProvider } from "../../framework/api/diagnostics"; import { Connector } from "./services/orm/connectors"; import Manager from "./services/orm/manager"; import Repository from "./services/orm/repository/repository"; import { EntityTarget } from "./services/orm/types"; -export interface DatabaseServiceAPI extends TdriveServiceProvider { +export interface DatabaseServiceAPI extends TdriveServiceProvider, IServiceDiagnosticProvider { /** * Get the database connector */ diff --git a/tdrive/backend/node/src/core/platform/services/database/services/index.ts b/tdrive/backend/node/src/core/platform/services/database/services/index.ts index d389d621a..5d0df0158 100644 --- a/tdrive/backend/node/src/core/platform/services/database/services/index.ts +++ b/tdrive/backend/node/src/core/platform/services/database/services/index.ts @@ -7,6 +7,10 @@ import { MongoConnectionOptions } from "./orm/connectors/mongodb/mongodb"; import { EntityTarget } from "./orm/types"; import { RepositoryManager } from "./orm/repository/manager"; import { PostgresConnectionOptions } from "./orm/connectors/postgres/postgres"; +import type { + TDiagnosticResult, + TServiceDiagnosticDepth, +} from "../../../framework/api/diagnostics"; export default class DatabaseService implements DatabaseServiceAPI { version = "1"; @@ -38,6 +42,15 @@ export default class DatabaseService implements DatabaseServiceAPI { } } + async getDiagnostics(depth: TServiceDiagnosticDepth): Promise { + const connector = this.getConnector(); + const result = await connector.getDiagnostics(depth); + return { + type: connector.getType(), + ...result, + }; + } + getManager(): Manager { return new Manager(this.connector); } diff --git a/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/abstract-connector.ts b/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/abstract-connector.ts index daaf58102..50db7c98a 100644 --- a/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/abstract-connector.ts +++ b/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/abstract-connector.ts @@ -4,12 +4,17 @@ import { ConnectionOptions, DatabaseType } from "../.."; import { FindOptions } from "../repository/repository"; import { ColumnDefinition, EntityDefinition } from "../types"; import { ListResult, Paginable, Pagination } from "../../../../../framework/api/crud-service"; +import type { + TDiagnosticResult, + TServiceDiagnosticDepth, +} from "../../../../../framework/api/diagnostics"; export abstract class AbstractConnector implements Connector { constructor(protected type: DatabaseType, protected options: T, protected secret: string) {} abstract connect(): Promise; abstract disconnect(): Promise; + abstract getDiagnostics(depth: TServiceDiagnosticDepth): Promise; abstract drop(): Promise; diff --git a/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/index.ts b/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/index.ts index 6e1f50d64..55e9538dd 100644 --- a/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/index.ts +++ b/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/index.ts @@ -5,6 +5,7 @@ import { ColumnDefinition, EntityDefinition } from "../types"; import { FindOptions } from "../repository/repository"; import { ListResult, Paginable, Pagination } from "../../../../../framework/api/crud-service"; import { PostgresConnectionOptions } from "./postgres/postgres"; +import type { IServiceDiagnosticProvider } from "../../../../../framework/api/diagnostics"; export * from "./mongodb/mongodb"; @@ -14,7 +15,7 @@ export type UpsertOptions = { export type RemoveOptions = any; -export interface Connector extends Initializable { +export interface Connector extends Initializable, IServiceDiagnosticProvider { /** * Connect to the database */ diff --git a/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/mongodb/mongodb.ts b/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/mongodb/mongodb.ts index 55c7c2bfd..395fffc4c 100644 --- a/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/mongodb/mongodb.ts +++ b/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/mongodb/mongodb.ts @@ -8,6 +8,10 @@ import { AbstractConnector } from "../abstract-connector"; import { buildSelectQuery } from "./query-builder"; import { transformValueFromDbString, transformValueToDbString } from "./typeTransforms"; import { logger } from "../../../../../../framework"; +import { + TDiagnosticResult, + TServiceDiagnosticDepth, +} from "../../../../../../framework/api/diagnostics"; export interface MongoConnectionOptions { // TODO: More options @@ -42,6 +46,24 @@ export class MongoConnector extends AbstractConnector { return this; } + private async ping(): Promise { + const wasConnected = !!this.client; + await (await this.getDatabase()).admin().ping(); + return !wasConnected; + } + + async getDiagnostics(depth: TServiceDiagnosticDepth): Promise { + switch (depth) { + case TServiceDiagnosticDepth.critical: + return { ok: true, didConnect: await this.ping() }; + case TServiceDiagnosticDepth.deep_statistics: + case TServiceDiagnosticDepth.tracked_statistics: + return { ok: true, warn: "unsupported_depth" }; + + default: + throw new Error(`Unexpected TServiceDiagnosticDepth: ${JSON.stringify(depth)}`); + } + } getClient(): mongo.MongoClient { return this.client; } diff --git a/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/postgres/postgres.ts b/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/postgres/postgres.ts index 696e88998..0fb00784c 100644 --- a/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/postgres/postgres.ts +++ b/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/postgres/postgres.ts @@ -8,6 +8,10 @@ import { getEntityDefinition } from "../../../orm/utils"; import { UpsertOptions } from "src/core/platform/services/database/services/orm/connectors"; import { PostgresDataTransformer, TypeMappings } from "./postgres-data-transform"; import { PostgresQueryBuilder, Query } from "./postgres-query-builder"; +import { + TDiagnosticResult, + TServiceDiagnosticDepth, +} from "../../../../../../framework/api/diagnostics"; export interface PostgresConnectionOptions { database: string; @@ -25,6 +29,7 @@ export interface PostgresConnectionOptions { export class PostgresConnector extends AbstractConnector { private logger = getLogger("PostgresConnector"); private client: Client = new Client(this.options); + private connected = false; private dataTransformer = new PostgresDataTransformer({ secret: this.secret }); private queryBuilder = new PostgresQueryBuilder(this.secret); @@ -40,10 +45,12 @@ export class PostgresConnector extends AbstractConnector { - this.logger.warn(err, "PostgreSQL connection error"); + this.logger.error(err, "PostgreSQL connection error"); }); + this.client.on("end", () => (this.connected = false)); await this.client.connect(); + this.connected = true; this.logger.info("Connection pool created"); await this.healthcheck(); } @@ -56,6 +63,26 @@ export class PostgresConnector extends AbstractConnector { + const wasConnected = this.connected; + if (wasConnected) await this.healthcheck(); + else await this.connect(); + return !wasConnected; + } + + async getDiagnostics(depth: TServiceDiagnosticDepth): Promise { + switch (depth) { + case TServiceDiagnosticDepth.critical: + return { ok: true, didConnect: await this.ping() }; + case TServiceDiagnosticDepth.stats_basic: + case TServiceDiagnosticDepth.stats_track: + case TServiceDiagnosticDepth.stats_deep: + return { ok: true, warn: "unsupported_depth" }; + default: + throw new Error(`Unexpected TServiceDiagnosticDepth: ${JSON.stringify(depth)}`); + } + } + async init(): Promise { if (!this.client) { await this.connect(); @@ -111,7 +138,7 @@ export class PostgresConnector extends AbstractConnector { const query = ` - DO $$ - DECLARE + DO $$ + DECLARE tablename text; - BEGIN - FOR tablename IN (SELECT table_name FROM information_schema.tables WHERE table_schema = 'public') - LOOP - EXECUTE 'DELETE FROM "' || tablename || '" CASCADE'; - END LOOP; + BEGIN + FOR tablename IN (SELECT table_name FROM information_schema.tables WHERE table_schema = 'public') + LOOP + EXECUTE 'DELETE FROM "' || tablename || '" CASCADE'; + END LOOP; END $$;`; logger.debug(`service.database.orm.postgres.drop - Query: "${query}"`); await this.client.query(query); @@ -188,14 +215,14 @@ export class PostgresConnector extends AbstractConnector { const query = ` - DO $$ - DECLARE + DO $$ + DECLARE tablename text; - BEGIN - FOR tablename IN (SELECT table_name FROM information_schema.tables WHERE table_schema = 'public') - LOOP - EXECUTE 'DROP TABLE IF EXISTS "' || tablename || '" CASCADE'; - END LOOP; + BEGIN + FOR tablename IN (SELECT table_name FROM information_schema.tables WHERE table_schema = 'public') + LOOP + EXECUTE 'DROP TABLE IF EXISTS "' || tablename || '" CASCADE'; + END LOOP; END $$;`; logger.debug(`service.database.orm.postgres.dropTables - Query: "${query}"`); await this.client.query(query); @@ -343,13 +370,13 @@ export class PostgresConnector extends AbstractConnector { try { - const query = `SELECT - table_name, - column_name, - data_type - FROM + const query = `SELECT + table_name, + column_name, + data_type + FROM information_schema.columns - WHERE + WHERE table_name = $1`; const dbResult: QueryResult = await this.client.query(query, [name]); return dbResult.rows.map(row => row.column_name); diff --git a/tdrive/backend/node/src/core/platform/services/diagnostics/index.ts b/tdrive/backend/node/src/core/platform/services/diagnostics/index.ts new file mode 100644 index 000000000..6d71d25ed --- /dev/null +++ b/tdrive/backend/node/src/core/platform/services/diagnostics/index.ts @@ -0,0 +1,35 @@ +import { TdriveService, Consumes, Prefix, ServiceName } from "../../framework"; +import web from "./web"; +import DiagnosticsServiceAPI from "./service-provider"; +import DiagnosticsServiceImpl from "./service"; +import WebServerAPI from "../webserver/provider"; +import registerBasicProviders from "./providers"; + +/** + * The diagnostics service exposes endpoint that are of use for operational reasons. + * + */ +@Prefix("/api/diagnostics") +@Consumes(["webserver"]) +@ServiceName("diagnostics") +export default class DiagnosticsService extends TdriveService { + name = "diagnostics"; + service: DiagnosticsServiceAPI; + + api(): DiagnosticsServiceAPI { + return this.service; + } + + public async doInit(): Promise { + this.service = new DiagnosticsServiceImpl(); + const fastify = this.context.getProvider("webserver").getServer(); + registerBasicProviders(); + + fastify.register((instance, _opts, next) => { + web(instance, { prefix: this.prefix }); + next(); + }); + + return this; + } +} diff --git a/tdrive/backend/node/src/core/platform/services/diagnostics/providers/db-ping.ts b/tdrive/backend/node/src/core/platform/services/diagnostics/providers/db-ping.ts new file mode 100644 index 000000000..eef99c7c9 --- /dev/null +++ b/tdrive/backend/node/src/core/platform/services/diagnostics/providers/db-ping.ts @@ -0,0 +1,9 @@ +import diagnostics, { TServiceDiagnosticDepth } from "../../../framework/api/diagnostics"; +import globalResolver from "../../../../../services/global-resolver"; + +export default () => + diagnostics.registerProviders({ + key: "db-ping", + tags: ["startup", "live", "ready"], + get: () => globalResolver.database.getDiagnostics(TServiceDiagnosticDepth.critical), + }); diff --git a/tdrive/backend/node/src/core/platform/services/diagnostics/providers/index.ts b/tdrive/backend/node/src/core/platform/services/diagnostics/providers/index.ts new file mode 100644 index 000000000..10e24bd62 --- /dev/null +++ b/tdrive/backend/node/src/core/platform/services/diagnostics/providers/index.ts @@ -0,0 +1,9 @@ +import registerDBPingProvider from "./db-ping"; +import registerPlatformProvider from "./platform-started"; +import registerProcessProvider from "./process"; + +export default () => { + registerDBPingProvider(); + registerPlatformProvider(); + registerProcessProvider(); +}; diff --git a/tdrive/backend/node/src/core/platform/services/diagnostics/providers/platform-started.ts b/tdrive/backend/node/src/core/platform/services/diagnostics/providers/platform-started.ts new file mode 100644 index 000000000..d3eff017d --- /dev/null +++ b/tdrive/backend/node/src/core/platform/services/diagnostics/providers/platform-started.ts @@ -0,0 +1,14 @@ +import diagnostics from "../../../framework/api/diagnostics"; +import globalResolver from "../../../../../services/global-resolver"; + +export default () => + diagnostics.registerProviders({ + key: "platform", + tags: "*", + get: async () => { + return { + ok: globalResolver.isPlatformStarted(), + uptimeMs: Math.floor((process.uptime() + Math.random()) * 1000), + }; + }, + }); diff --git a/tdrive/backend/node/src/core/platform/services/diagnostics/providers/process.ts b/tdrive/backend/node/src/core/platform/services/diagnostics/providers/process.ts new file mode 100644 index 000000000..4abe1b186 --- /dev/null +++ b/tdrive/backend/node/src/core/platform/services/diagnostics/providers/process.ts @@ -0,0 +1,16 @@ +import diagnostics from "../../../framework/api/diagnostics"; + +export default () => + diagnostics.registerProviders({ + key: "process", + tags: ["live", "ready"], + async get() { + return { + ok: true, + gc: !!global.gc, + mem: process.memoryUsage(), + pid: process.pid, + res: process.resourceUsage(), + }; + }, + }); diff --git a/tdrive/backend/node/src/core/platform/services/diagnostics/service-provider.ts b/tdrive/backend/node/src/core/platform/services/diagnostics/service-provider.ts new file mode 100644 index 000000000..00b727865 --- /dev/null +++ b/tdrive/backend/node/src/core/platform/services/diagnostics/service-provider.ts @@ -0,0 +1,5 @@ +import type { TdriveServiceProvider } from "../../framework"; + +type DiagnosticsServiceAPI = TdriveServiceProvider; + +export default DiagnosticsServiceAPI; diff --git a/tdrive/backend/node/src/core/platform/services/diagnostics/service.ts b/tdrive/backend/node/src/core/platform/services/diagnostics/service.ts new file mode 100644 index 000000000..614e10c63 --- /dev/null +++ b/tdrive/backend/node/src/core/platform/services/diagnostics/service.ts @@ -0,0 +1,5 @@ +import DiagnosticsServiceAPI from "./service-provider"; + +export default class DiagnosticsServiceImpl implements DiagnosticsServiceAPI { + version: "1"; +} diff --git a/tdrive/backend/node/src/core/platform/services/diagnostics/utils.ts b/tdrive/backend/node/src/core/platform/services/diagnostics/utils.ts new file mode 100644 index 000000000..375a1ebeb --- /dev/null +++ b/tdrive/backend/node/src/core/platform/services/diagnostics/utils.ts @@ -0,0 +1,40 @@ +import { logger } from "../../framework"; +import { Readable, Transform } from "node:stream"; +import { Session } from "node:inspector"; + +/** Time the execution of the callback, and return its duration in ms with the result */ +export async function timeCallback( + cb: () => Promise, +): Promise<{ durationMs: number; result: T }> { + const startMs = new Date().getTime(); + const result = await cb(); + return { durationMs: new Date().getTime() - startMs, result }; +} + +/** Use `node:inspector` to create a snapshot of the heap, and + * synchroneously (because of session.post) pipe it to a stream + * readable from the first argument of the callback. + */ +export function getHeapSnapshotSync(cb: (readable: Readable) => void) { + logger.info({ gcExposed: !!global.gc }, "Beginning heap snapshot"); + if (global.gc) global.gc(); + const session = new Session(); + session.connect(); + try { + const transform = new Transform(); + try { + let size = 0; + session.on("HeapProfiler.addHeapSnapshotChunk", message => { + size += message.params.chunk.length; + transform.push(message.params.chunk); + }); + cb(transform); + session.post("HeapProfiler.takeHeapSnapshot", null); + logger.info({ size }, "Heap snapshot sent"); + } finally { + transform.end(); + } + } finally { + session.disconnect(); + } +} diff --git a/tdrive/backend/node/src/core/platform/services/diagnostics/web/heap-routes.ts b/tdrive/backend/node/src/core/platform/services/diagnostics/web/heap-routes.ts new file mode 100644 index 000000000..72f6385d2 --- /dev/null +++ b/tdrive/backend/node/src/core/platform/services/diagnostics/web/heap-routes.ts @@ -0,0 +1,86 @@ +import type { FastifyInstance, FastifyPluginCallback } from "fastify"; +import { getConfig as getDiagnosticsGetConfig } from "../../../framework/api/diagnostics"; +import { getHeapSnapshotSync } from "../utils"; +import { createHash, randomUUID } from "node:crypto"; + +const routes: FastifyPluginCallback = (fastify: FastifyInstance, _opts, next) => { + const diagnosticsConfig = getDiagnosticsGetConfig(); + + if (diagnosticsConfig?.probeSecret?.length && diagnosticsConfig?.secret?.length) { + const getRunningToken = (() => { + const newToken = () => [randomUUID(), randomUUID()].join("*"); + let token = newToken(); + let updatedS = process.uptime(); + return () => { + const nowS = process.uptime(); + if (nowS - updatedS > diagnosticsConfig.secretChallengeRefreshS) { + token = newToken(); + updatedS = nowS; + } + return token; + }; + })(); + const hashToken = (token: string = getRunningToken()) => + createHash("sha512").update(`+${token}+${diagnosticsConfig.secret}+`).digest("hex"); + + /* + Example flow: + Configuration: + { secret: 'secretValue', probeSecret: 'probeSecretValue' } + GET current token using the probeSecret: + $ curl "$SERVER/api/diagnostics/heap?secret=probeSecretValue" + {"token":"d5888f91-d929-4775-b9dc-de6e2fb4d7cd*b2a57b32-7029-4957-82e9-c7c25794727f"} + Hash `+${token}+${secret}+` in SHA-512: + Shell example: + $ echo -n '+d5888f91-d929-4775-b9dc-de6e2fb4d7cd*b2a57b32-7029-4957-82e9-c7c25794727f+secretValue+' | sha512sum | cut -f1 -d' ' + 320070a508da218baf0f3363e837080fdb902bd0d986bd0f33b806b1230608c0868accdd10bd25261ca91b57c0459edf76218deb26571c72f6b93b077846abe3 + JS example: + require('crypto').createHash('sha512').update(`+${token}+${secret}+`).digest('hex') + POST to download snapshot: + $ curl "$SERVER/api/diagnostics/heap" + + Shell script example to download heap snapshot (requires curl, node and jq): + set -euo pipefail + SERVER="http://localhost:4000" + DIAG_PROBE_SECRET="probeSecretValue" + DIAG_SECRET="secretValue" + + urlencode() { node -e 'console.log(encodeURIComponent(process.argv[1]))' "$@" ; } + DIAG_PROBE_SECRET_URLENCODED="$(urlencode "$DIAG_PROBE_SECRET")" + TOKEN="$(curl --fail-with-body "$SERVER/api/diagnostics/heap?secret=$DIAG_PROBE_SECRET_URLENCODED" | jq -r .token)" + HASHED="$(echo -n "+$TOKEN+$DIAG_SECRET+" | sha512sum | cut -f1 -d' ')" + curl --fail-with-body --remote-name --remote-header-name \ + --header "Content-Type: application/json" \ + --data '{"hash":"'"$HASHED"'"}' \ + "$SERVER/api/diagnostics/heap" + # The file should be downloaded as `twake-drive-snap-$date.heapsnapshot` + ls twake-drive-snap-*.heapsnapshot + */ + fastify.get("/heap", async (request, reply) => { + if ((request.query as { secret: string }).secret !== diagnosticsConfig.probeSecret) + return reply.status(403).send(); + return reply.send({ token: getRunningToken() }); + }); + + fastify.post("/heap", async (request, reply) => { + const hashExpected = hashToken(); + const hashProvided = (request.body as { hash: string })?.hash; + if (hashProvided !== hashExpected) return reply.status(403).send(); + const filenameTimestamp = new Date() + .toISOString() + .replace(/(\.\d\d\d)?Z$/, "") + .replace(/\D/g, "-"); + reply.header( + "Content-Disposition", + `attachment; filename="twake-drive-snap-${filenameTimestamp}.heapsnapshot"`, + ); + let replyResult; + getHeapSnapshotSync(readable => (replyResult = reply.send(readable))); + return replyResult; + }); + } + + next(); +}; + +export default routes; diff --git a/tdrive/backend/node/src/core/platform/services/diagnostics/web/index.ts b/tdrive/backend/node/src/core/platform/services/diagnostics/web/index.ts new file mode 100644 index 000000000..3091541c6 --- /dev/null +++ b/tdrive/backend/node/src/core/platform/services/diagnostics/web/index.ts @@ -0,0 +1,11 @@ +import type { FastifyInstance, FastifyRegisterOptions } from "fastify"; +import probeRoutes from "./probe-routes"; +import heapRoutes from "./heap-routes"; + +export default ( + fastify: FastifyInstance, + opts: FastifyRegisterOptions<{ prefix: string }>, +): void => { + fastify.register(probeRoutes, opts); + fastify.register(heapRoutes, opts); +}; diff --git a/tdrive/backend/node/src/core/platform/services/diagnostics/web/probe-routes.ts b/tdrive/backend/node/src/core/platform/services/diagnostics/web/probe-routes.ts new file mode 100644 index 000000000..054fcabc4 --- /dev/null +++ b/tdrive/backend/node/src/core/platform/services/diagnostics/web/probe-routes.ts @@ -0,0 +1,25 @@ +import type { FastifyInstance, FastifyPluginCallback } from "fastify"; +import diagnostics, { + getConfig as getDiagnosticsGetConfig, +} from "../../../framework/api/diagnostics"; + +const routes: FastifyPluginCallback = (fastify: FastifyInstance, _opts, next) => { + const diagnosticsConfig = getDiagnosticsGetConfig(); + if (diagnosticsConfig?.probeSecret?.length) { + const tagParamName = "diagnosticTags"; + fastify.get(`/t/:${tagParamName}`, async (request, reply) => { + const tag = request.params[tagParamName]; + if ( + tag === "*" || + (request.query as { secret: string }).secret !== diagnosticsConfig.probeSecret + ) + return reply.status(403).send(); + const results = await diagnostics.get(tag); + if (!results.ok) reply.status(503); + return reply.send(results); + }); + } + next(); +}; + +export default routes; diff --git a/tdrive/backend/node/src/services/av/service/index.ts b/tdrive/backend/node/src/services/av/service/index.ts index 8bcf44fbf..867f54f56 100644 --- a/tdrive/backend/node/src/services/av/service/index.ts +++ b/tdrive/backend/node/src/services/av/service/index.ts @@ -32,8 +32,8 @@ export class AVServiceImpl implements TdriveServiceProvider, Initializable { }, }); } - } catch (error) { - logger.error({ error: `${error}` }, "Error while initializing Antivirus Service"); + } catch (err) { + logger.error({ err }, "Error while initializing Antivirus Service"); throw AVException.initializationFailed("Failed to initialize Antivirus service"); } return this; diff --git a/tdrive/backend/node/src/services/global-resolver.ts b/tdrive/backend/node/src/services/global-resolver.ts index 3e68328b4..42088f6e9 100644 --- a/tdrive/backend/node/src/services/global-resolver.ts +++ b/tdrive/backend/node/src/services/global-resolver.ts @@ -15,7 +15,7 @@ import TrackerAPI from "../core/platform/services/tracker/provider"; import WebServerAPI from "../core/platform/services/webserver/provider"; import assert from "assert"; -import { logger } from "../core/platform/framework"; +import { logger, TdriveServiceState } from "../core/platform/framework"; import { ApplicationServiceImpl } from "./applications/services/applications"; import { CompanyApplicationServiceImpl } from "./applications/services/company-applications"; import { ApplicationHooksService } from "./applications/services/hooks"; @@ -78,6 +78,7 @@ class GlobalResolver { public services: TdriveServices; public platformServices: PlatformServices; public database: DatabaseServiceAPI; + private platform?: TdrivePlatform; public fastify: FastifyInstance; @@ -87,6 +88,7 @@ class GlobalResolver { if (this.alreadyInitialized) { return; } + this.platform = platform; this.database = platform.getProvider("database"); this.platformServices = { @@ -152,6 +154,11 @@ class GlobalResolver { logger.info("Global resolver finished initializing services"); this.alreadyInitialized = true; } + + /** `true` if all components are in the started state. This means we should respond to http at least (for kubernetes Startup probe). */ + isPlatformStarted(): boolean { + return this.alreadyInitialized && this.platform?.state.getValue() == TdriveServiceState.Started; + } } export default new GlobalResolver(); From 6f8ff34051a168477dfe768032e564823b942862 Mon Sep 17 00:00:00 2001 From: Eric Doughty-Papassideris Date: Sun, 8 Dec 2024 22:51:58 +0100 Subject: [PATCH 02/15] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20back:=20separate=20a?= =?UTF-8?q?nd=20generic=20service=20level=20diagnostics=20(#762)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../platform/framework/api/diagnostics.ts | 45 ++++++++++++++++--- .../orm/connectors/mongodb/mongodb.ts | 35 +++++++++++++-- .../orm/connectors/postgres/postgres.ts | 3 +- .../diagnostics/providers/database-service.ts | 4 ++ .../services/diagnostics/providers/db-ping.ts | 9 ---- .../services/diagnostics/providers/index.ts | 2 +- .../services/diagnostics/providers/process.ts | 2 +- 7 files changed, 79 insertions(+), 21 deletions(-) create mode 100644 tdrive/backend/node/src/core/platform/services/diagnostics/providers/database-service.ts delete mode 100644 tdrive/backend/node/src/core/platform/services/diagnostics/providers/db-ping.ts diff --git a/tdrive/backend/node/src/core/platform/framework/api/diagnostics.ts b/tdrive/backend/node/src/core/platform/framework/api/diagnostics.ts index d26010e15..a4e31d806 100644 --- a/tdrive/backend/node/src/core/platform/framework/api/diagnostics.ts +++ b/tdrive/backend/node/src/core/platform/framework/api/diagnostics.ts @@ -13,18 +13,28 @@ export type TDiagnosticTag = | "startup" // Tests that are absolutely required (and light) to even begin the other tests | "ready" // Tests required before traffic can be sent our way | "live" // Tests required to prevent from being restarted - | "stats"; // Expensive diagnostics that should not often be ran + | "stats" // Expensive diagnostics that should not often be ran, but can be system wide + | "stats-full"; // Expensive diagnostics that should not often be ran, and then probably for a single key /** Detail requested from platform service self-diagnostics */ export enum TServiceDiagnosticDepth { /** Minimal cost information that tests functioning service */ - critical = 0, - /** Statistics that have a little impact enough for regular tracking into a time series */ - tracked_statistics = 1, + alive = "alive", + /** Statistics that have a little impact enough for periodic tracking into a time series */ + stats_track = "stats_track", + /** Statistics that should be included when looking specifically at general statistics */ + stats_basic = "stats_basic", /** Statistics possibly expensive and large to calculate, for occasional debug operations */ - deep_statistics = 2, + stats_deep = "stats_deep", } +const serviceDiagnosticDepthToTags: { [depth in TServiceDiagnosticDepth]: TDiagnosticTag[] } = { + [TServiceDiagnosticDepth.alive]: ["startup", "live", "ready"], + [TServiceDiagnosticDepth.stats_track]: ["ready", "stats"], + [TServiceDiagnosticDepth.stats_basic]: ["stats", "stats-full"], + [TServiceDiagnosticDepth.stats_deep]: ["stats-full"], +}; + interface IDiagnosticsConfig { // Diagnostic keys that should be considered ok without evaluation skipKeys?: string[]; @@ -183,6 +193,31 @@ export default { }); }, + /** Create providers to match from {@link IServiceDiagnosticProvider} to multiple {@link IDiagnosticProvider}s */ + registerServiceProviders( + name: string, + getService: () => IServiceDiagnosticProvider, + overrideTags: Partial< + typeof serviceDiagnosticDepthToTags | { [key in TServiceDiagnosticDepth]: false } + > = {}, + ) { + this.registerProviders( + ...Object.values(TServiceDiagnosticDepth) + .map(depth => { + const defaultTags = serviceDiagnosticDepthToTags[depth]; + if (!defaultTags) throw new Error(`Unknown depth ${JSON.stringify(depth)}`); + const tags = overrideTags[depth] ?? defaultTags; + if (tags === false) return null; + return { + key: `${name}-${depth}`, + tags, + get: () => getService().getDiagnostics(depth), + }; + }) + .filter(x => !!x), + ); + }, + /** Cancel all pending diagnostic updates */ shutdown() { ensureHasntShutdown(); diff --git a/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/mongodb/mongodb.ts b/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/mongodb/mongodb.ts index 395fffc4c..3890361e8 100644 --- a/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/mongodb/mongodb.ts +++ b/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/mongodb/mongodb.ts @@ -52,18 +52,45 @@ export class MongoConnector extends AbstractConnector { return !wasConnected; } + private async dbStats(): Promise { + return (await this.getDatabase()).stats(); + } + + private async collectionsStats(deep: boolean): Promise { + const db = await this.getDatabase(); + const result = { collections: {} }; + for (const collection of await db.collections()) { + const stats = await collection.aggregate([ + { + $collStats: { + latencyStats: { histograms: true }, + storageStats: deep ? {} : undefined, // Really a lot of keys with 0 occurances + count: {}, + queryExecStats: {}, + }, + }, + ]); + result.collections[collection.collectionName] = await stats.toArray(); + } + return result; + } + async getDiagnostics(depth: TServiceDiagnosticDepth): Promise { switch (depth) { - case TServiceDiagnosticDepth.critical: + case TServiceDiagnosticDepth.alive: return { ok: true, didConnect: await this.ping() }; - case TServiceDiagnosticDepth.deep_statistics: - case TServiceDiagnosticDepth.tracked_statistics: - return { ok: true, warn: "unsupported_depth" }; + case TServiceDiagnosticDepth.stats_track: + return { ok: true, ...(await this.dbStats()) }; + case TServiceDiagnosticDepth.stats_basic: + return { ok: true, ...(await this.collectionsStats(false)) }; + case TServiceDiagnosticDepth.stats_deep: + return { ok: true, ...(await this.collectionsStats(true)) }; default: throw new Error(`Unexpected TServiceDiagnosticDepth: ${JSON.stringify(depth)}`); } } + getClient(): mongo.MongoClient { return this.client; } diff --git a/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/postgres/postgres.ts b/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/postgres/postgres.ts index 0fb00784c..f2a06f957 100644 --- a/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/postgres/postgres.ts +++ b/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/postgres/postgres.ts @@ -72,12 +72,13 @@ export class PostgresConnector extends AbstractConnector { switch (depth) { - case TServiceDiagnosticDepth.critical: + case TServiceDiagnosticDepth.alive: return { ok: true, didConnect: await this.ping() }; case TServiceDiagnosticDepth.stats_basic: case TServiceDiagnosticDepth.stats_track: case TServiceDiagnosticDepth.stats_deep: return { ok: true, warn: "unsupported_depth" }; + default: throw new Error(`Unexpected TServiceDiagnosticDepth: ${JSON.stringify(depth)}`); } diff --git a/tdrive/backend/node/src/core/platform/services/diagnostics/providers/database-service.ts b/tdrive/backend/node/src/core/platform/services/diagnostics/providers/database-service.ts new file mode 100644 index 000000000..ed403b49c --- /dev/null +++ b/tdrive/backend/node/src/core/platform/services/diagnostics/providers/database-service.ts @@ -0,0 +1,4 @@ +import diagnostics from "../../../framework/api/diagnostics"; +import globalResolver from "../../../../../services/global-resolver"; + +export default () => diagnostics.registerServiceProviders("db", () => globalResolver.database, {}); diff --git a/tdrive/backend/node/src/core/platform/services/diagnostics/providers/db-ping.ts b/tdrive/backend/node/src/core/platform/services/diagnostics/providers/db-ping.ts deleted file mode 100644 index eef99c7c9..000000000 --- a/tdrive/backend/node/src/core/platform/services/diagnostics/providers/db-ping.ts +++ /dev/null @@ -1,9 +0,0 @@ -import diagnostics, { TServiceDiagnosticDepth } from "../../../framework/api/diagnostics"; -import globalResolver from "../../../../../services/global-resolver"; - -export default () => - diagnostics.registerProviders({ - key: "db-ping", - tags: ["startup", "live", "ready"], - get: () => globalResolver.database.getDiagnostics(TServiceDiagnosticDepth.critical), - }); diff --git a/tdrive/backend/node/src/core/platform/services/diagnostics/providers/index.ts b/tdrive/backend/node/src/core/platform/services/diagnostics/providers/index.ts index 10e24bd62..d9350f7fc 100644 --- a/tdrive/backend/node/src/core/platform/services/diagnostics/providers/index.ts +++ b/tdrive/backend/node/src/core/platform/services/diagnostics/providers/index.ts @@ -1,4 +1,4 @@ -import registerDBPingProvider from "./db-ping"; +import registerDBPingProvider from "./database-service"; import registerPlatformProvider from "./platform-started"; import registerProcessProvider from "./process"; diff --git a/tdrive/backend/node/src/core/platform/services/diagnostics/providers/process.ts b/tdrive/backend/node/src/core/platform/services/diagnostics/providers/process.ts index 4abe1b186..35d085d4c 100644 --- a/tdrive/backend/node/src/core/platform/services/diagnostics/providers/process.ts +++ b/tdrive/backend/node/src/core/platform/services/diagnostics/providers/process.ts @@ -8,8 +8,8 @@ export default () => return { ok: true, gc: !!global.gc, - mem: process.memoryUsage(), pid: process.pid, + mem: process.memoryUsage(), res: process.resourceUsage(), }; }, From e6593986f1261d0af6dc83a32e2d0ac3af0f8c0d Mon Sep 17 00:00:00 2001 From: Eric Doughty-Papassideris Date: Sun, 8 Dec 2024 22:52:52 +0100 Subject: [PATCH 03/15] =?UTF-8?q?=E2=9C=A8=20back:=20adding=20postgresql?= =?UTF-8?q?=20statistics=20(#762)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../orm/connectors/postgres/postgres.ts | 18 ++++++++++++++++-- .../diagnostics/providers/database-service.ts | 2 +- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/postgres/postgres.ts b/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/postgres/postgres.ts index f2a06f957..12dba80c9 100644 --- a/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/postgres/postgres.ts +++ b/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/postgres/postgres.ts @@ -74,10 +74,24 @@ export class PostgresConnector extends AbstractConnector diagnostics.registerServiceProviders("db", () => globalResolver.database, {}); +export default () => diagnostics.registerServiceProviders("db", () => globalResolver.database); From 150dadeab5f624b29c954a1ce2f962b09725e778 Mon Sep 17 00:00:00 2001 From: Eric Doughty-Papassideris Date: Sun, 8 Dec 2024 22:53:39 +0100 Subject: [PATCH 04/15] =?UTF-8?q?=F0=9F=A7=91=E2=80=8D=F0=9F=92=BB=20back:?= =?UTF-8?q?=20log=20diagnostic=20tool=20errors=20as=20last=20resort=20(#76?= =?UTF-8?q?2)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../node/src/core/platform/framework/api/diagnostics.ts | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tdrive/backend/node/src/core/platform/framework/api/diagnostics.ts b/tdrive/backend/node/src/core/platform/framework/api/diagnostics.ts index a4e31d806..d49e596de 100644 --- a/tdrive/backend/node/src/core/platform/framework/api/diagnostics.ts +++ b/tdrive/backend/node/src/core/platform/framework/api/diagnostics.ts @@ -1,5 +1,6 @@ import assert from "node:assert"; import config from "../../../config"; +import { logger } from "../logger"; /** * Values that can match a set of diagnostic providers. @@ -142,7 +143,7 @@ const runProvider = async provider => { const startMs = now(); try { const result = await provider.get(); - if (!result.ok || result.warn) + if (!result.ok) logger.error( { provider: provider.key, result }, "Got diagnostic provider result with ok=false", @@ -153,8 +154,9 @@ const runProvider = async provider => { "Got diagnostic provider result with ok=true but a warning", ); return recordDiagnostic(startMs, provider.key, result); - } catch (error) { - return recordDiagnostic(startMs, provider.key, undefined, error); + } catch (err) { + logger.error({ err, provider: provider.key }, "Failed to read diagnostic provider"); + return recordDiagnostic(startMs, provider.key, undefined, err); } }; From 7c0f3597599a098cae09bdc0e98179b5854d5a2c Mon Sep 17 00:00:00 2001 From: Eric Doughty-Papassideris Date: Sun, 8 Dec 2024 22:53:43 +0100 Subject: [PATCH 05/15] =?UTF-8?q?=E2=9C=A8=20back:=20adding=20storage=20se?= =?UTF-8?q?rvice=20diagnostics=20(#762)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../services/diagnostics/providers/index.ts | 6 ++++-- .../diagnostics/providers/storage-service.ts | 9 +++++++++ .../services/storage/connectors/S3/s3-service.ts | 15 +++++++++++++++ .../services/storage/connectors/local/service.ts | 14 ++++++++++++++ .../services/storage/default-storage-strategy.ts | 4 ++++ .../services/storage/oneof-storage-strategy.ts | 12 ++++++++++++ .../core/platform/services/storage/provider.ts | 3 ++- .../platform/services/storage/storage-service.ts | 4 ++++ 8 files changed, 64 insertions(+), 3 deletions(-) create mode 100644 tdrive/backend/node/src/core/platform/services/diagnostics/providers/storage-service.ts diff --git a/tdrive/backend/node/src/core/platform/services/diagnostics/providers/index.ts b/tdrive/backend/node/src/core/platform/services/diagnostics/providers/index.ts index d9350f7fc..08f2b5d70 100644 --- a/tdrive/backend/node/src/core/platform/services/diagnostics/providers/index.ts +++ b/tdrive/backend/node/src/core/platform/services/diagnostics/providers/index.ts @@ -1,9 +1,11 @@ -import registerDBPingProvider from "./database-service"; +import registerDBServiceProvider from "./database-service"; +import registerStorageServiceProvider from "./storage-service"; import registerPlatformProvider from "./platform-started"; import registerProcessProvider from "./process"; export default () => { - registerDBPingProvider(); + registerDBServiceProvider(); + registerStorageServiceProvider(); registerPlatformProvider(); registerProcessProvider(); }; diff --git a/tdrive/backend/node/src/core/platform/services/diagnostics/providers/storage-service.ts b/tdrive/backend/node/src/core/platform/services/diagnostics/providers/storage-service.ts new file mode 100644 index 000000000..5ad434560 --- /dev/null +++ b/tdrive/backend/node/src/core/platform/services/diagnostics/providers/storage-service.ts @@ -0,0 +1,9 @@ +import diagnostics from "../../../framework/api/diagnostics"; +import globalResolver from "../../../../../services/global-resolver"; + +export default () => + diagnostics.registerServiceProviders("storage", () => globalResolver.platformServices.storage, { + stats_basic: false, + stats_track: false, + stats_deep: false, + }); diff --git a/tdrive/backend/node/src/core/platform/services/storage/connectors/S3/s3-service.ts b/tdrive/backend/node/src/core/platform/services/storage/connectors/S3/s3-service.ts index e70cfc4a0..a2c05b086 100644 --- a/tdrive/backend/node/src/core/platform/services/storage/connectors/S3/s3-service.ts +++ b/tdrive/backend/node/src/core/platform/services/storage/connectors/S3/s3-service.ts @@ -4,6 +4,7 @@ import { Readable } from "stream"; import { StorageConnectorAPI, WriteMetadata } from "../../provider"; import { randomUUID } from "crypto"; import _ from "lodash"; +import { TDiagnosticResult, TServiceDiagnosticDepth } from "../../../../framework/api/diagnostics"; export type S3Configuration = { id: string; @@ -43,6 +44,20 @@ export default class S3ConnectorService implements StorageConnectorAPI { return this.id; } + async getDiagnostics(depth: TServiceDiagnosticDepth): Promise { + switch (depth) { + case TServiceDiagnosticDepth.alive: + return { ok: await this.client.bucketExists(this.minioConfiguration.bucket) }; + case TServiceDiagnosticDepth.stats_basic: + case TServiceDiagnosticDepth.stats_track: + case TServiceDiagnosticDepth.stats_deep: + return { ok: true, warn: "s3_statistics_not_implemented" }; + + default: + throw new Error(`Unexpected TServiceDiagnosticDepth: ${JSON.stringify(depth)}`); + } + } + write(path: string, stream: Readable): Promise { return new Promise((resolve, reject) => { let totalSize = 0; diff --git a/tdrive/backend/node/src/core/platform/services/storage/connectors/local/service.ts b/tdrive/backend/node/src/core/platform/services/storage/connectors/local/service.ts index 16cf356f6..ed7fa9afb 100644 --- a/tdrive/backend/node/src/core/platform/services/storage/connectors/local/service.ts +++ b/tdrive/backend/node/src/core/platform/services/storage/connectors/local/service.ts @@ -6,6 +6,7 @@ import { StorageConnectorAPI, WriteMetadata } from "../../provider"; import fs from "fs"; import { logger } from "../../../../framework/logger"; import { randomUUID } from "crypto"; +import { TDiagnosticResult, TServiceDiagnosticDepth } from "../../../../framework/api/diagnostics"; export type LocalConfiguration = { id: string; @@ -29,6 +30,19 @@ export default class LocalConnectorService implements StorageConnectorAPI { return this.id; } + async getDiagnostics(depth: TServiceDiagnosticDepth): Promise { + switch (depth) { + case TServiceDiagnosticDepth.alive: + case TServiceDiagnosticDepth.stats_basic: + case TServiceDiagnosticDepth.stats_track: + case TServiceDiagnosticDepth.stats_deep: + return { ok: true, warn: "local_s3_alway_ok" }; + + default: + throw new Error(`Unexpected TServiceDiagnosticDepth: ${JSON.stringify(depth)}`); + } + } + write(relativePath: string, stream: Readable): Promise { const path = this.getFullPath(relativePath); logger.trace(`Writing file ${path}`); diff --git a/tdrive/backend/node/src/core/platform/services/storage/default-storage-strategy.ts b/tdrive/backend/node/src/core/platform/services/storage/default-storage-strategy.ts index c14034cab..8e14112d2 100644 --- a/tdrive/backend/node/src/core/platform/services/storage/default-storage-strategy.ts +++ b/tdrive/backend/node/src/core/platform/services/storage/default-storage-strategy.ts @@ -19,6 +19,10 @@ export class DefaultStorageStrategy implements StorageConnectorAPI { return this.connector.getId(); } + getDiagnostics(depth) { + return this.connector.getDiagnostics(depth); + } + write = ( path: string, stream: Stream, diff --git a/tdrive/backend/node/src/core/platform/services/storage/oneof-storage-strategy.ts b/tdrive/backend/node/src/core/platform/services/storage/oneof-storage-strategy.ts index b71c70268..0223b8a64 100644 --- a/tdrive/backend/node/src/core/platform/services/storage/oneof-storage-strategy.ts +++ b/tdrive/backend/node/src/core/platform/services/storage/oneof-storage-strategy.ts @@ -8,6 +8,7 @@ import { } from "../storage/provider"; import { logger } from "../../../platform/framework"; import { FileNotFountException, WriteFileException } from "./exceptions"; +import type { TDiagnosticResult, TServiceDiagnosticDepth } from "../../framework/api/diagnostics"; /** * OneOfStorageStrategy is responsible for managing multiple storage backends. @@ -34,6 +35,17 @@ export class OneOfStorageStrategy implements StorageConnectorAPI { return this.id; } + async getDiagnostics(depth: TServiceDiagnosticDepth): Promise { + const states = await Promise.all( + this.storages.map(async s => ({ id: s.getId(), ...(await s.getDiagnostics(depth)) })), + ); + return { + ...(states.every(s => s.ok) ? {} : { warn: "not_all_storages_ok" }), + ok: states.some(s => s.ok), + states: states, + }; + } + /** * Writes a file to all configured storages. * The write operation is considered successful if one of the storage succeed. diff --git a/tdrive/backend/node/src/core/platform/services/storage/provider.ts b/tdrive/backend/node/src/core/platform/services/storage/provider.ts index d46a554c8..fb2588769 100644 --- a/tdrive/backend/node/src/core/platform/services/storage/provider.ts +++ b/tdrive/backend/node/src/core/platform/services/storage/provider.ts @@ -1,6 +1,7 @@ import { Stream, Readable } from "stream"; import { TdriveServiceProvider } from "../../framework"; import { ExecutionContext } from "../../framework/api/crud-service"; +import { IServiceDiagnosticProvider } from "../../framework/api/diagnostics"; export type WriteMetadata = { size: number; @@ -22,7 +23,7 @@ export type DeleteOptions = { totalChunks?: number; }; -export interface StorageConnectorAPI { +export interface StorageConnectorAPI extends IServiceDiagnosticProvider { /** * Returns identifier of a storage that should've been set in configuration. * diff --git a/tdrive/backend/node/src/core/platform/services/storage/storage-service.ts b/tdrive/backend/node/src/core/platform/services/storage/storage-service.ts index 4eaea4ba1..b27632506 100644 --- a/tdrive/backend/node/src/core/platform/services/storage/storage-service.ts +++ b/tdrive/backend/node/src/core/platform/services/storage/storage-service.ts @@ -64,6 +64,10 @@ export default class StorageService extends TdriveService implements return this.connector; } + getDiagnostics(depth) { + return this.getConnector().getDiagnostics(depth); + } + getHomeDir(): string { return this.homeDir; } From dab4d0e3ff838cc380fe7ca167c11c3810d72150 Mon Sep 17 00:00:00 2001 From: Eric Doughty-Papassideris Date: Sun, 8 Dec 2024 22:53:43 +0100 Subject: [PATCH 06/15] =?UTF-8?q?=E2=9C=85=20back:=20very=20basic=20e2e=20?= =?UTF-8?q?for=20diagnostics=20(#762)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test/e2e/diagnostics/diagnostics.spec.ts | 51 +++++++++++++++++++ .../docker-compose.dev.tests.opensearch.yml | 2 + tdrive/docker-compose.tests.yml | 2 + 3 files changed, 55 insertions(+) create mode 100644 tdrive/backend/node/test/e2e/diagnostics/diagnostics.spec.ts diff --git a/tdrive/backend/node/test/e2e/diagnostics/diagnostics.spec.ts b/tdrive/backend/node/test/e2e/diagnostics/diagnostics.spec.ts new file mode 100644 index 000000000..685ac62be --- /dev/null +++ b/tdrive/backend/node/test/e2e/diagnostics/diagnostics.spec.ts @@ -0,0 +1,51 @@ +import "reflect-metadata"; +import { afterAll, beforeEach, describe, expect, it, jest } from "@jest/globals"; +import { init, TestPlatform } from "../setup"; +import { getConfig as getDiagnosticsConfig } from "../../../src/core/platform/framework/api/diagnostics"; + +describe("The diagnostics infrastucture", () => { + let platform: TestPlatform; + const diagnosticConfig = getDiagnosticsConfig(); + beforeEach(async () => { + platform = await init({ + services: [ + "webserver", + "database", + "applications", + "search", + "storage", + "diagnostics", + "message-queue", + "user", + "files", + "auth", + "statistics", + "platform-services", + "documents", + ], + }); + }); + + afterAll(async () => { + await platform?.tearDown(); + // @ts-ignore + platform = null; + }); + + const getDiagnosticTags = (tag: string, secret: string) => + platform.app.inject({ + method: "GET", + url: `/api/diagnostics/t/${encodeURIComponent(tag)}?secret=${encodeURIComponent(secret)}`, + }); + + it("should refuse invalid probe secrets", async () => { + const result = await getDiagnosticTags("ready", "ooooh look at me ! I'm like totally such an invalid probeSecret value"); + expect(result.statusCode).toBe(403); + }); + + it("should at least report alive", async () => { + const result = await getDiagnosticTags("ready", diagnosticConfig.probeSecret!); + expect(result.statusCode).toBe(200); + expect(result.json().ok).toBe(true); + }); +}); diff --git a/tdrive/docker-compose.dev.tests.opensearch.yml b/tdrive/docker-compose.dev.tests.opensearch.yml index 7c025bd0d..14f82fb60 100644 --- a/tdrive/docker-compose.dev.tests.opensearch.yml +++ b/tdrive/docker-compose.dev.tests.opensearch.yml @@ -68,6 +68,8 @@ services: - SEARCH_OS_PASSWORD=admin - SEARCH_OS_USE_AUTH=true - SEARCH_OS_USERNAME=admin + - DIAG_PROBE_SECRET=super_diagnostic_probe_secret + - DIAG_SECRET=super_diagnostic_secret depends_on: - postgres - opensearch-node1 diff --git a/tdrive/docker-compose.tests.yml b/tdrive/docker-compose.tests.yml index 515b6cc4e..983d74fce 100644 --- a/tdrive/docker-compose.tests.yml +++ b/tdrive/docker-compose.tests.yml @@ -84,6 +84,8 @@ services: - STORAGE_S3_ENDPOINT=minio - STORAGE_S3_ACCESS_KEY=admin - STORAGE_S3_SECRET_KEY=adminminio + - DIAG_PROBE_SECRET=super_diagnostic_probe_secret + - DIAG_SECRET=super_diagnostic_secret depends_on: minio: condition: service_healthy From 7da9062916ff260f651d70f7b79cb04472079628 Mon Sep 17 00:00:00 2001 From: Eric Doughty-Papassideris Date: Thu, 12 Dec 2024 04:06:20 +0100 Subject: [PATCH 07/15] =?UTF-8?q?=F0=9F=90=9B=20back:=20tolerate=20postgre?= =?UTF-8?q?s=20individual=20errors=20in=20statistic=20gathering=20(#762)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../orm/connectors/postgres/postgres.ts | 33 ++++++++++++++----- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/postgres/postgres.ts b/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/postgres/postgres.ts index 12dba80c9..6534ca1fa 100644 --- a/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/postgres/postgres.ts +++ b/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/postgres/postgres.ts @@ -71,26 +71,41 @@ export class PostgresConnector extends AbstractConnector { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const safeRequest = async (query: string, values?: any[]) => { + try { + return (await this.client.query(query, values)).rows; + } catch (err) { + const logId = "pg-diags-error-" + Math.floor(process.uptime() * 1000); + logger.error( + { err, query, values, logId, errCode: err.code }, + `Error running postgresql statistics at ${depth} ( ${logId} ) `, + ); + return { error: true, logId }; + } + }; switch (depth) { + // This is the only required `ok` case TServiceDiagnosticDepth.alive: return { ok: true, didConnect: await this.ping() }; + + // Statistics can silently fail, and do it granularly if there is + // a permission issue only on some of the stats case TServiceDiagnosticDepth.stats_track: return { ok: true, - db: ( - await this.client.query("select * from pg_stat_database where datname = $1", [ - this.options.database, - ]) - ).rows, + db: await safeRequest("select * from pg_stat_database where datname = $1", [ + this.options.database, + ]), }; case TServiceDiagnosticDepth.stats_basic: - return { ok: true, warn: "pgsql_basic_has_nothing_more_than_track" }; + return { ok: true, warn: "pgsql_basic_has_no_basic_level_stats" }; case TServiceDiagnosticDepth.stats_deep: return { ok: true, - databases: (await this.client.query("select * from pg_stat_database")).rows, - tables: (await this.client.query("select * from pg_stat_user_tables")).rows, - indexes: (await this.client.query("select * from pg_stat_user_indexes")).rows, + databases: await safeRequest("select * from pg_stat_database"), + tables: await safeRequest("select * from pg_stat_user_tables"), + indexes: await safeRequest("select * from pg_stat_user_indexes"), }; default: From 4da82ecfcc587df3dfde38d6c4e6ef7b79bebf01 Mon Sep 17 00:00:00 2001 From: Eric Doughty-Papassideris Date: Thu, 12 Dec 2024 04:07:39 +0100 Subject: [PATCH 08/15] =?UTF-8?q?=E2=99=BB=EF=B8=8F=F0=9F=90=9B=20back:=20?= =?UTF-8?q?removing=20heap=20roundtrip=20and=20minor=20cleanup=20(#762)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../config/custom-environment-variables.json | 3 +- tdrive/backend/node/config/default.json | 4 +- .../platform/framework/api/diagnostics.ts | 14 ----- .../services/diagnostics/web/heap-routes.ts | 63 +------------------ .../storage/connectors/local/service.ts | 2 +- .../docker-compose.dev.tests.opensearch.yml | 1 - tdrive/docker-compose.tests.yml | 1 - 7 files changed, 5 insertions(+), 83 deletions(-) diff --git a/tdrive/backend/node/config/custom-environment-variables.json b/tdrive/backend/node/config/custom-environment-variables.json index 97880a2b6..d1ba0971b 100644 --- a/tdrive/backend/node/config/custom-environment-variables.json +++ b/tdrive/backend/node/config/custom-environment-variables.json @@ -21,8 +21,7 @@ "__name": "DIAG_SKIP_KEYS", "__format": "json" }, - "probeSecret": "DIAG_PROBE_SECRET", - "secret": "DIAG_SECRET" + "probeSecret": "DIAG_PROBE_SECRET" }, "webserver": { "host": "TWAKE_DRIVE_HOST", diff --git a/tdrive/backend/node/config/default.json b/tdrive/backend/node/config/default.json index d59e76643..de2e98b3c 100644 --- a/tdrive/backend/node/config/default.json +++ b/tdrive/backend/node/config/default.json @@ -37,9 +37,7 @@ }, "diagnostics": { "skipKeys": [], - "probeSecret": "", - "secret": "", - "secretChallengeRefreshS": 15 + "probeSecret": "" }, "tracker": { "type": "segment", diff --git a/tdrive/backend/node/src/core/platform/framework/api/diagnostics.ts b/tdrive/backend/node/src/core/platform/framework/api/diagnostics.ts index d49e596de..32e4b6c71 100644 --- a/tdrive/backend/node/src/core/platform/framework/api/diagnostics.ts +++ b/tdrive/backend/node/src/core/platform/framework/api/diagnostics.ts @@ -44,13 +44,6 @@ interface IDiagnosticsConfig { // relied on for security because disabling diagnostics. At worst this // provides access to the DB statistics. probeSecret?: string; - // This secret is required to sign more dangerous diagnostic endpoints, such - // the heap snapshot. It should never be sent over the network. - secret?: string; - // Maximum time to keep the same challenge token for diagnostic endpoints, in seconds. - // Must be large enough to have a reasonable chance of running the token request - // then the action on the same backend instance. - secretChallengeRefreshS: number; } export const getConfig = (): IDiagnosticsConfig => { @@ -63,11 +56,6 @@ export const getConfig = (): IDiagnosticsConfig => { .split(/[,\s]+/g) .filter(x => !!x), }; - if (typeof configSection.secretChallengeRefreshS === "string") - configSection = { - ...configSection, - secretChallengeRefreshS: parseInt(configSection.secretChallengeRefreshS, 10), - }; return configSection; }; @@ -94,8 +82,6 @@ export interface IDiagnosticProvider { /** * Returns an object as presented to a diagnostic requester. * Warning: this could be public and readable to the internet. - * @param completeButSlow If `true`, perform additional operations for a - * more informative */ get(): Promise; } diff --git a/tdrive/backend/node/src/core/platform/services/diagnostics/web/heap-routes.ts b/tdrive/backend/node/src/core/platform/services/diagnostics/web/heap-routes.ts index 72f6385d2..eb294cf83 100644 --- a/tdrive/backend/node/src/core/platform/services/diagnostics/web/heap-routes.ts +++ b/tdrive/backend/node/src/core/platform/services/diagnostics/web/heap-routes.ts @@ -1,71 +1,13 @@ import type { FastifyInstance, FastifyPluginCallback } from "fastify"; import { getConfig as getDiagnosticsGetConfig } from "../../../framework/api/diagnostics"; import { getHeapSnapshotSync } from "../utils"; -import { createHash, randomUUID } from "node:crypto"; const routes: FastifyPluginCallback = (fastify: FastifyInstance, _opts, next) => { const diagnosticsConfig = getDiagnosticsGetConfig(); - - if (diagnosticsConfig?.probeSecret?.length && diagnosticsConfig?.secret?.length) { - const getRunningToken = (() => { - const newToken = () => [randomUUID(), randomUUID()].join("*"); - let token = newToken(); - let updatedS = process.uptime(); - return () => { - const nowS = process.uptime(); - if (nowS - updatedS > diagnosticsConfig.secretChallengeRefreshS) { - token = newToken(); - updatedS = nowS; - } - return token; - }; - })(); - const hashToken = (token: string = getRunningToken()) => - createHash("sha512").update(`+${token}+${diagnosticsConfig.secret}+`).digest("hex"); - - /* - Example flow: - Configuration: - { secret: 'secretValue', probeSecret: 'probeSecretValue' } - GET current token using the probeSecret: - $ curl "$SERVER/api/diagnostics/heap?secret=probeSecretValue" - {"token":"d5888f91-d929-4775-b9dc-de6e2fb4d7cd*b2a57b32-7029-4957-82e9-c7c25794727f"} - Hash `+${token}+${secret}+` in SHA-512: - Shell example: - $ echo -n '+d5888f91-d929-4775-b9dc-de6e2fb4d7cd*b2a57b32-7029-4957-82e9-c7c25794727f+secretValue+' | sha512sum | cut -f1 -d' ' - 320070a508da218baf0f3363e837080fdb902bd0d986bd0f33b806b1230608c0868accdd10bd25261ca91b57c0459edf76218deb26571c72f6b93b077846abe3 - JS example: - require('crypto').createHash('sha512').update(`+${token}+${secret}+`).digest('hex') - POST to download snapshot: - $ curl "$SERVER/api/diagnostics/heap" - - Shell script example to download heap snapshot (requires curl, node and jq): - set -euo pipefail - SERVER="http://localhost:4000" - DIAG_PROBE_SECRET="probeSecretValue" - DIAG_SECRET="secretValue" - - urlencode() { node -e 'console.log(encodeURIComponent(process.argv[1]))' "$@" ; } - DIAG_PROBE_SECRET_URLENCODED="$(urlencode "$DIAG_PROBE_SECRET")" - TOKEN="$(curl --fail-with-body "$SERVER/api/diagnostics/heap?secret=$DIAG_PROBE_SECRET_URLENCODED" | jq -r .token)" - HASHED="$(echo -n "+$TOKEN+$DIAG_SECRET+" | sha512sum | cut -f1 -d' ')" - curl --fail-with-body --remote-name --remote-header-name \ - --header "Content-Type: application/json" \ - --data '{"hash":"'"$HASHED"'"}' \ - "$SERVER/api/diagnostics/heap" - # The file should be downloaded as `twake-drive-snap-$date.heapsnapshot` - ls twake-drive-snap-*.heapsnapshot - */ - fastify.get("/heap", async (request, reply) => { + if (diagnosticsConfig?.probeSecret?.length) { + fastify.post("/heap", async (request, reply) => { if ((request.query as { secret: string }).secret !== diagnosticsConfig.probeSecret) return reply.status(403).send(); - return reply.send({ token: getRunningToken() }); - }); - - fastify.post("/heap", async (request, reply) => { - const hashExpected = hashToken(); - const hashProvided = (request.body as { hash: string })?.hash; - if (hashProvided !== hashExpected) return reply.status(403).send(); const filenameTimestamp = new Date() .toISOString() .replace(/(\.\d\d\d)?Z$/, "") @@ -79,7 +21,6 @@ const routes: FastifyPluginCallback = (fastify: FastifyInstance, _opts, next) => return replyResult; }); } - next(); }; diff --git a/tdrive/backend/node/src/core/platform/services/storage/connectors/local/service.ts b/tdrive/backend/node/src/core/platform/services/storage/connectors/local/service.ts index ed7fa9afb..9828b52fb 100644 --- a/tdrive/backend/node/src/core/platform/services/storage/connectors/local/service.ts +++ b/tdrive/backend/node/src/core/platform/services/storage/connectors/local/service.ts @@ -36,7 +36,7 @@ export default class LocalConnectorService implements StorageConnectorAPI { case TServiceDiagnosticDepth.stats_basic: case TServiceDiagnosticDepth.stats_track: case TServiceDiagnosticDepth.stats_deep: - return { ok: true, warn: "local_s3_alway_ok" }; + return { ok: true, warn: "local_storage_always_ok" }; default: throw new Error(`Unexpected TServiceDiagnosticDepth: ${JSON.stringify(depth)}`); diff --git a/tdrive/docker-compose.dev.tests.opensearch.yml b/tdrive/docker-compose.dev.tests.opensearch.yml index 14f82fb60..69cb853d3 100644 --- a/tdrive/docker-compose.dev.tests.opensearch.yml +++ b/tdrive/docker-compose.dev.tests.opensearch.yml @@ -69,7 +69,6 @@ services: - SEARCH_OS_USE_AUTH=true - SEARCH_OS_USERNAME=admin - DIAG_PROBE_SECRET=super_diagnostic_probe_secret - - DIAG_SECRET=super_diagnostic_secret depends_on: - postgres - opensearch-node1 diff --git a/tdrive/docker-compose.tests.yml b/tdrive/docker-compose.tests.yml index 983d74fce..2b887a955 100644 --- a/tdrive/docker-compose.tests.yml +++ b/tdrive/docker-compose.tests.yml @@ -85,7 +85,6 @@ services: - STORAGE_S3_ACCESS_KEY=admin - STORAGE_S3_SECRET_KEY=adminminio - DIAG_PROBE_SECRET=super_diagnostic_probe_secret - - DIAG_SECRET=super_diagnostic_secret depends_on: minio: condition: service_healthy From e9346711f1005d050ab97784e9272db6b330fea1 Mon Sep 17 00:00:00 2001 From: Eric Doughty-Papassideris Date: Thu, 12 Dec 2024 04:08:47 +0100 Subject: [PATCH 09/15] =?UTF-8?q?=E2=9C=85=20back:=20diagnostics=20e2e=20a?= =?UTF-8?q?dding=20aggregate=20failure=20result=20(#762)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../services/diagnostics/providers/process.ts | 7 ++++++- .../node/test/e2e/diagnostics/diagnostics.spec.ts | 12 ++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/tdrive/backend/node/src/core/platform/services/diagnostics/providers/process.ts b/tdrive/backend/node/src/core/platform/services/diagnostics/providers/process.ts index 35d085d4c..eca74dfcc 100644 --- a/tdrive/backend/node/src/core/platform/services/diagnostics/providers/process.ts +++ b/tdrive/backend/node/src/core/platform/services/diagnostics/providers/process.ts @@ -1,12 +1,17 @@ import diagnostics from "../../../framework/api/diagnostics"; +export const e2eTestOverride = { + // Set to true to force fail to test diagnostic aggregation + forceFail: false, +}; + export default () => diagnostics.registerProviders({ key: "process", tags: ["live", "ready"], async get() { return { - ok: true, + ok: !e2eTestOverride.forceFail, gc: !!global.gc, pid: process.pid, mem: process.memoryUsage(), diff --git a/tdrive/backend/node/test/e2e/diagnostics/diagnostics.spec.ts b/tdrive/backend/node/test/e2e/diagnostics/diagnostics.spec.ts index 685ac62be..b6c0536bd 100644 --- a/tdrive/backend/node/test/e2e/diagnostics/diagnostics.spec.ts +++ b/tdrive/backend/node/test/e2e/diagnostics/diagnostics.spec.ts @@ -2,6 +2,7 @@ import "reflect-metadata"; import { afterAll, beforeEach, describe, expect, it, jest } from "@jest/globals"; import { init, TestPlatform } from "../setup"; import { getConfig as getDiagnosticsConfig } from "../../../src/core/platform/framework/api/diagnostics"; +import { e2eTestOverride as processDiagnosticProviderE2EOverride } from "../../../src/core/platform/services/diagnostics/providers/process"; describe("The diagnostics infrastucture", () => { let platform: TestPlatform; @@ -48,4 +49,15 @@ describe("The diagnostics infrastucture", () => { expect(result.statusCode).toBe(200); expect(result.json().ok).toBe(true); }); + + it("should aggregate failure", async () => { + try { + processDiagnosticProviderE2EOverride.forceFail = true; + const result = await getDiagnosticTags("ready", diagnosticConfig.probeSecret!); + expect(result.statusCode).toBe(503); + expect(result.json().ok).toBe(false); + } finally { + processDiagnosticProviderE2EOverride.forceFail = false; + } + }); }); From 633f8ff71b5e3baeb1b27d07849ee6f7ae0aa794 Mon Sep 17 00:00:00 2001 From: Eric Doughty-Papassideris Date: Thu, 12 Dec 2024 04:11:34 +0100 Subject: [PATCH 10/15] =?UTF-8?q?=F0=9F=9A=9A=20back:=20move=20diagnostics?= =?UTF-8?q?=20end=20points=20outside=20of=20exposed=20routes=20(#762)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../node/src/core/platform/services/diagnostics/index.ts | 2 +- tdrive/backend/node/test/e2e/diagnostics/diagnostics.spec.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tdrive/backend/node/src/core/platform/services/diagnostics/index.ts b/tdrive/backend/node/src/core/platform/services/diagnostics/index.ts index 6d71d25ed..40fc932eb 100644 --- a/tdrive/backend/node/src/core/platform/services/diagnostics/index.ts +++ b/tdrive/backend/node/src/core/platform/services/diagnostics/index.ts @@ -9,7 +9,7 @@ import registerBasicProviders from "./providers"; * The diagnostics service exposes endpoint that are of use for operational reasons. * */ -@Prefix("/api/diagnostics") +@Prefix("/diagnostics") @Consumes(["webserver"]) @ServiceName("diagnostics") export default class DiagnosticsService extends TdriveService { diff --git a/tdrive/backend/node/test/e2e/diagnostics/diagnostics.spec.ts b/tdrive/backend/node/test/e2e/diagnostics/diagnostics.spec.ts index b6c0536bd..276bd6899 100644 --- a/tdrive/backend/node/test/e2e/diagnostics/diagnostics.spec.ts +++ b/tdrive/backend/node/test/e2e/diagnostics/diagnostics.spec.ts @@ -36,7 +36,7 @@ describe("The diagnostics infrastucture", () => { const getDiagnosticTags = (tag: string, secret: string) => platform.app.inject({ method: "GET", - url: `/api/diagnostics/t/${encodeURIComponent(tag)}?secret=${encodeURIComponent(secret)}`, + url: `/diagnostics/t/${encodeURIComponent(tag)}?secret=${encodeURIComponent(secret)}`, }); it("should refuse invalid probe secrets", async () => { From f6d3072bfeee4bea92ce2787ffbb93cd7fc5a247 Mon Sep 17 00:00:00 2001 From: Eric Doughty-Papassideris Date: Fri, 13 Dec 2024 19:50:37 +0100 Subject: [PATCH 11/15] =?UTF-8?q?=F0=9F=94=8A=20back:=20output=20statistic?= =?UTF-8?q?s=20periodically=20to=20logs=20(#762)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../config/custom-environment-variables.json | 4 +- tdrive/backend/node/config/default.json | 4 +- .../platform/framework/api/diagnostics.ts | 39 +++++++++++++++---- .../platform/services/diagnostics/index.ts | 28 +++++++++++++ .../services/diagnostics/web/probe-routes.ts | 2 +- 5 files changed, 66 insertions(+), 11 deletions(-) diff --git a/tdrive/backend/node/config/custom-environment-variables.json b/tdrive/backend/node/config/custom-environment-variables.json index d1ba0971b..5a0d030e6 100644 --- a/tdrive/backend/node/config/custom-environment-variables.json +++ b/tdrive/backend/node/config/custom-environment-variables.json @@ -21,7 +21,9 @@ "__name": "DIAG_SKIP_KEYS", "__format": "json" }, - "probeSecret": "DIAG_PROBE_SECRET" + "probeSecret": "DIAG_PROBE_SECRET", + "statsLogPeriodMs": "DIAG_STATS_PRINT_PERIOD_MS", + "statsFullStatsLogPeriodMs": "DIAG_FULL_STATS_PRINT_PERIOD_MS" }, "webserver": { "host": "TWAKE_DRIVE_HOST", diff --git a/tdrive/backend/node/config/default.json b/tdrive/backend/node/config/default.json index de2e98b3c..22c107d5c 100644 --- a/tdrive/backend/node/config/default.json +++ b/tdrive/backend/node/config/default.json @@ -37,7 +37,9 @@ }, "diagnostics": { "skipKeys": [], - "probeSecret": "" + "probeSecret": "", + "statsLogPeriodMs": 120000, + "statsFullStatsLogPeriodMs": 600000 }, "tracker": { "type": "segment", diff --git a/tdrive/backend/node/src/core/platform/framework/api/diagnostics.ts b/tdrive/backend/node/src/core/platform/framework/api/diagnostics.ts index 32e4b6c71..2ff877031 100644 --- a/tdrive/backend/node/src/core/platform/framework/api/diagnostics.ts +++ b/tdrive/backend/node/src/core/platform/framework/api/diagnostics.ts @@ -1,6 +1,8 @@ import assert from "node:assert"; import config from "../../../config"; -import { logger } from "../logger"; +import { getLogger } from "../logger"; + +const logger = getLogger("Diagnostics"); /** * Values that can match a set of diagnostic providers. @@ -44,6 +46,10 @@ interface IDiagnosticsConfig { // relied on for security because disabling diagnostics. At worst this // provides access to the DB statistics. probeSecret?: string; + // Period at which to log TDiagnosticTag `stats`. 0 to disable. + statsLogPeriodMs: number; + // Period at which to log TDiagnosticTag `stats-full`. 0 to disable. + statsFullStatsLogPeriodMs: number; } export const getConfig = (): IDiagnosticsConfig => { @@ -56,7 +62,17 @@ export const getConfig = (): IDiagnosticsConfig => { .split(/[,\s]+/g) .filter(x => !!x), }; - return configSection; + + const getNumberFromConfig = (value): number => { + if (typeof value == "number") return value; + if (typeof value == "string") return parseInt(value, 10) ?? 0; + return 0; + }; + return { + ...configSection, + statsLogPeriodMs: getNumberFromConfig(configSection.statsLogPeriodMs), + statsFullStatsLogPeriodMs: getNumberFromConfig(configSection.statsFullStatsLogPeriodMs), + }; }; /** Code-wide unique key for each provider */ @@ -125,20 +141,22 @@ const recordDiagnostic = (startMs: number, key: TDiagnosticKey, data?: object, e ...(error ? { ok: false, error } : { ...data }), }); -const runProvider = async provider => { +const runProvider = async (provider, log) => { const startMs = now(); try { const result = await provider.get(); if (!result.ok) logger.error( - { provider: provider.key, result }, + { diagnostic: provider.key, ...result }, "Got diagnostic provider result with ok=false", ); else if (result.warn) logger.warn( - { provider: provider.key, result }, + { diagnostic: provider.key, ...result }, "Got diagnostic provider result with ok=true but a warning", ); + else if (log) + logger.info({ diagnostic: provider.key, ...result }, "Diagnostic provider result"); return recordDiagnostic(startMs, provider.key, result); } catch (err) { logger.error({ err, provider: provider.key }, "Failed to read diagnostic provider"); @@ -173,7 +191,7 @@ export default { let triggerUpdate: () => void = () => undefined; // The empty function is for the linter. I love you linter <3 const updateProvider = (timeoutId: number) => async () => { forgetPendingTimeout(timeoutId); - await runProvider(provider); + await runProvider(provider, false); triggerUpdate(); }; triggerUpdate = () => pendingTimeouts.push(setTimeout(updateProvider, provider.pollPeriodMs)); @@ -213,9 +231,14 @@ export default { hasShutdown = true; }, - /** Return the values of all providers which include the provided tag */ + /** + * Return the values of all providers which include the provided tag. + * + * @param log if `true`, print each individual log output even if succesful + */ async get( tag: TDiagnosticTag, + log: boolean, ): Promise<{ ok: boolean } | { [key: TDiagnosticKey]: TDiagnosticResult }> { const config = getConfig(); const result = { ok: true }; @@ -230,7 +253,7 @@ export default { immediateDiagnosticProviders.map(async provider => { if (!isProviderIncludedInTag(tag, provider, config)) return; atLeastOneCheck = true; - const providerResult = await runProvider(provider); + const providerResult = await runProvider(provider, log); if (!providerResult.ok) result.ok = false; return (result[provider.key] = providerResult); }), diff --git a/tdrive/backend/node/src/core/platform/services/diagnostics/index.ts b/tdrive/backend/node/src/core/platform/services/diagnostics/index.ts index 40fc932eb..95fe4d594 100644 --- a/tdrive/backend/node/src/core/platform/services/diagnostics/index.ts +++ b/tdrive/backend/node/src/core/platform/services/diagnostics/index.ts @@ -4,6 +4,10 @@ import DiagnosticsServiceAPI from "./service-provider"; import DiagnosticsServiceImpl from "./service"; import WebServerAPI from "../webserver/provider"; import registerBasicProviders from "./providers"; +import diagnostics, { + getConfig as getDiagnosticsGetConfig, + TDiagnosticTag, +} from "../../framework/api/diagnostics"; /** * The diagnostics service exposes endpoint that are of use for operational reasons. @@ -15,6 +19,8 @@ import registerBasicProviders from "./providers"; export default class DiagnosticsService extends TdriveService { name = "diagnostics"; service: DiagnosticsServiceAPI; + private runningIntervalStatsLog?: ReturnType; + private runningIntervalStatsFullLog?: ReturnType; api(): DiagnosticsServiceAPI { return this.service; @@ -30,6 +36,28 @@ export default class DiagnosticsService extends TdriveService () => diagnostics.get(tag, true); + const startLoggingStats = (tag, periodMs) => + periodMs && periodMs > 0 ? setInterval(printStatsToLog(tag), periodMs) : undefined; + this.runningIntervalStatsLog = startLoggingStats("stats", config.statsLogPeriodMs); + this.runningIntervalStatsFullLog = startLoggingStats( + "stats-full", + config.statsFullStatsLogPeriodMs, + ); + + return this; + } + + public async doStop(): Promise { + if (this.runningIntervalStatsLog) { + clearInterval(this.runningIntervalStatsLog); + this.runningIntervalStatsLog = null; + } + if (this.runningIntervalStatsFullLog) { + clearInterval(this.runningIntervalStatsFullLog); + this.runningIntervalStatsFullLog = null; + } return this; } } diff --git a/tdrive/backend/node/src/core/platform/services/diagnostics/web/probe-routes.ts b/tdrive/backend/node/src/core/platform/services/diagnostics/web/probe-routes.ts index 054fcabc4..2ffc21307 100644 --- a/tdrive/backend/node/src/core/platform/services/diagnostics/web/probe-routes.ts +++ b/tdrive/backend/node/src/core/platform/services/diagnostics/web/probe-routes.ts @@ -14,7 +14,7 @@ const routes: FastifyPluginCallback = (fastify: FastifyInstance, _opts, next) => (request.query as { secret: string }).secret !== diagnosticsConfig.probeSecret ) return reply.status(403).send(); - const results = await diagnostics.get(tag); + const results = await diagnostics.get(tag, false); if (!results.ok) reply.status(503); return reply.send(results); }); From 8d61f6310cf3c05227c0f143bd43c38f7befa636 Mon Sep 17 00:00:00 2001 From: Eric Doughty-Papassideris Date: Fri, 13 Dec 2024 19:52:26 +0100 Subject: [PATCH 12/15] =?UTF-8?q?=F0=9F=94=87=20back:=20silent=20output=20?= =?UTF-8?q?of=20purposefully=20non=20implemented=20diagnostics=20(#762)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../node/src/core/platform/framework/api/diagnostics.ts | 6 ++++-- .../database/services/orm/connectors/postgres/postgres.ts | 2 +- .../platform/services/storage/connectors/S3/s3-service.ts | 3 ++- .../platform/services/storage/connectors/local/service.ts | 2 +- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tdrive/backend/node/src/core/platform/framework/api/diagnostics.ts b/tdrive/backend/node/src/core/platform/framework/api/diagnostics.ts index 2ff877031..09b2aaa8d 100644 --- a/tdrive/backend/node/src/core/platform/framework/api/diagnostics.ts +++ b/tdrive/backend/node/src/core/platform/framework/api/diagnostics.ts @@ -79,7 +79,9 @@ export const getConfig = (): IDiagnosticsConfig => { export type TDiagnosticKey = string; /** Each provider should return an object of this format. The key of the provider defines the schema. */ -export type TDiagnosticResult = { ok: boolean; warn?: string } & { [key: string]: unknown }; +export type TDiagnosticResult = { ok: boolean; warn?: string; empty?: boolean } & { + [key: string]: unknown; +}; /** Implemented by objects that want to provide data to the diagnostic check */ export interface IDiagnosticProvider { @@ -155,7 +157,7 @@ const runProvider = async (provider, log) => { { diagnostic: provider.key, ...result }, "Got diagnostic provider result with ok=true but a warning", ); - else if (log) + else if (log && !result.empty) logger.info({ diagnostic: provider.key, ...result }, "Diagnostic provider result"); return recordDiagnostic(startMs, provider.key, result); } catch (err) { diff --git a/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/postgres/postgres.ts b/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/postgres/postgres.ts index 6534ca1fa..209bc206b 100644 --- a/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/postgres/postgres.ts +++ b/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/postgres/postgres.ts @@ -99,7 +99,7 @@ export class PostgresConnector extends AbstractConnector Date: Fri, 13 Dec 2024 19:53:45 +0100 Subject: [PATCH 13/15] =?UTF-8?q?=F0=9F=9A=A7=20back:=20preparing=20attemp?= =?UTF-8?q?t=20at=20route=20statistics,=20non=20functional=20without=20fas?= =?UTF-8?q?tify=20upgrade=20(#762)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../platform/services/diagnostics/index.ts | 3 +++ .../services/diagnostics/web/provider.ts | 23 +++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 tdrive/backend/node/src/core/platform/services/diagnostics/web/provider.ts diff --git a/tdrive/backend/node/src/core/platform/services/diagnostics/index.ts b/tdrive/backend/node/src/core/platform/services/diagnostics/index.ts index 95fe4d594..352179650 100644 --- a/tdrive/backend/node/src/core/platform/services/diagnostics/index.ts +++ b/tdrive/backend/node/src/core/platform/services/diagnostics/index.ts @@ -8,6 +8,7 @@ import diagnostics, { getConfig as getDiagnosticsGetConfig, TDiagnosticTag, } from "../../framework/api/diagnostics"; +import registerFastifyRoutesDiagnosticsProvider from "./web/provider"; /** * The diagnostics service exposes endpoint that are of use for operational reasons. @@ -29,7 +30,9 @@ export default class DiagnosticsService extends TdriveService { this.service = new DiagnosticsServiceImpl(); const fastify = this.context.getProvider("webserver").getServer(); + registerBasicProviders(); + registerFastifyRoutesDiagnosticsProvider(fastify); fastify.register((instance, _opts, next) => { web(instance, { prefix: this.prefix }); diff --git a/tdrive/backend/node/src/core/platform/services/diagnostics/web/provider.ts b/tdrive/backend/node/src/core/platform/services/diagnostics/web/provider.ts new file mode 100644 index 000000000..87a543a90 --- /dev/null +++ b/tdrive/backend/node/src/core/platform/services/diagnostics/web/provider.ts @@ -0,0 +1,23 @@ +import diagnostics from "../../../framework/api/diagnostics"; + +export default fastify => { + // TODO: registering @fastify/routes-stats creates errors (performance mark not found) + // on the request performance marks, the hook doesn't seem to be called for every route + // and some of the more important ones like browse start failing... + // Version tested had to be 3.4.0 because 4+ needs Fastify 5. + return; + // Don't require it at all until it's fixed + // eslint-disable-next-line @typescript-eslint/no-var-requires + fastify.register(require("@fastify/routes-stats"), { + printInterval: 4000, // milliseconds + decoratorName: "performanceMarked", // decorator is set to true if a performace.mark was called for the request + }); + diagnostics.registerProviders({ + key: "fastify-routes", + tags: ["stats", "stats-full"], + async get() { + fastify.measurements(); + return { ok: true, ...fastify.stats() }; + }, + }); +}; From 0341321641bdc9e07e7702dbbf1ef614a7e04a1a Mon Sep 17 00:00:00 2001 From: Eric Doughty-Papassideris Date: Fri, 13 Dec 2024 19:54:36 +0100 Subject: [PATCH 14/15] =?UTF-8?q?=F0=9F=A9=B9=20back:=20flatten=20postgres?= =?UTF-8?q?=20diagnostics=20(a=20bit)=20(#762)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../services/orm/connectors/postgres/postgres.ts | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/postgres/postgres.ts b/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/postgres/postgres.ts index 209bc206b..663f06fda 100644 --- a/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/postgres/postgres.ts +++ b/tdrive/backend/node/src/core/platform/services/database/services/orm/connectors/postgres/postgres.ts @@ -72,9 +72,10 @@ export class PostgresConnector extends AbstractConnector { // eslint-disable-next-line @typescript-eslint/no-explicit-any - const safeRequest = async (query: string, values?: any[]) => { + const safeRequest = async (query: string, values?: any[], singleRow = false) => { try { - return (await this.client.query(query, values)).rows; + const rows = (await this.client.query(query, values)).rows; + return singleRow && rows?.length === 1 ? rows[0] : rows; } catch (err) { const logId = "pg-diags-error-" + Math.floor(process.uptime() * 1000); logger.error( @@ -94,9 +95,11 @@ export class PostgresConnector extends AbstractConnector Date: Tue, 17 Dec 2024 22:06:20 +0100 Subject: [PATCH 15/15] =?UTF-8?q?=F0=9F=94=8A=20back:=20log=20http=20heade?= =?UTF-8?q?rs=20at=20login=20to=20get=20trace=20of=20IP=20and=20user=20age?= =?UTF-8?q?nt=20as=20relates=20(#762)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../node/src/services/console/web/controller.ts | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tdrive/backend/node/src/services/console/web/controller.ts b/tdrive/backend/node/src/services/console/web/controller.ts index 35a3217f4..703149f20 100644 --- a/tdrive/backend/node/src/services/console/web/controller.ts +++ b/tdrive/backend/node/src/services/console/web/controller.ts @@ -37,6 +37,20 @@ export class ConsoleController { } async auth(request: FastifyRequest<{ Body: AuthRequest }>): Promise { + const censoredHeadersLC = { + authorization: true, + cookie: true, + }; + logger.info( + { + http_headers: Object.fromEntries( + Object.entries(request.headers) + .map(([k, v]) => (censoredHeadersLC[k.toLowerCase()] ? null : [k, v])) + .filter(x => !!x), + ), + }, + "auth_attempt", + ); if (request.body.oidc_id_token) { return { access_token: await this.authByToken(request.body.oidc_id_token) }; } else if (request.body.email && request.body.password) {