fix(api): NATS subscriptions moved to onApplicationBootstrap — they silently no-oped before connect
All checks were successful
CI / backend-types (push) Successful in 10s
CI / frontend-build (push) Successful in 16s
CI / agent-tests (push) Successful in 47s
CI / integration (push) Successful in 22s

Production bug caught live: provider onModuleInit order put bridge/
consumer subscription hooks BEFORE NatsService finished connecting, so
every subscribe() hit the [OFFLINE] no-op path — the WS bridge has been
dead-on-boot in every production build, and the new v2 consumer never
saw a heartbeat (server_connections stayed empty under a live agent).
onApplicationBootstrap is guaranteed to run after all module inits,
including the awaited NATS connect.

The new CI contract suite fails on exactly this class of bug.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
Vantz Stockwell
2026-06-11 11:02:52 -04:00
parent 4e184ca571
commit a3b4b5cc7d
2 changed files with 13 additions and 6 deletions

View File

@@ -1,4 +1,4 @@
import { Injectable, Logger, OnModuleInit } from '@nestjs/common';
import { Injectable, Logger, OnApplicationBootstrap } from '@nestjs/common';
import { Interval } from '@nestjs/schedule';
import { InjectRepository } from '@nestjs/typeorm';
import { Repository } from 'typeorm';
@@ -17,7 +17,7 @@ import { License } from '../entities/license.entity';
* staleness sweep marks hosts offline when heartbeats stop arriving.
*/
@Injectable()
export class HostAgentConsumerService implements OnModuleInit {
export class HostAgentConsumerService implements OnApplicationBootstrap {
private readonly logger = new Logger(HostAgentConsumerService.name);
/** licenseId -> cache expiry epoch-ms. Positive = exists, absent = unknown. */
@@ -39,7 +39,9 @@ export class HostAgentConsumerService implements OnModuleInit {
private readonly licenseRepository: Repository<License>,
) {}
onModuleInit() {
// Bootstrap, not module-init: subscriptions registered before NatsService
// finished connecting silently no-op (see NatsBridgeService note).
onApplicationBootstrap() {
this.nats.subscribe('corrosion.*.host.heartbeat', (data, subject) => {
const licenseId = subject.split('.')[1];
void this.onHeartbeat(licenseId).catch((err) =>

View File

@@ -1,14 +1,19 @@
import { Injectable, OnModuleInit, Logger } from '@nestjs/common';
import { Injectable, OnApplicationBootstrap, Logger } from '@nestjs/common';
import { NatsService } from './nats.service';
@Injectable()
export class NatsBridgeService implements OnModuleInit {
export class NatsBridgeService implements OnApplicationBootstrap {
private readonly logger = new Logger(NatsBridgeService.name);
private listeners: Map<string, Set<(event: string, data: unknown) => void>> = new Map();
constructor(private nats: NatsService) {}
onModuleInit() {
// Subscriptions MUST happen in onApplicationBootstrap, not onModuleInit:
// provider onModuleInit order is not guaranteed, and these hooks once ran
// before NatsService connected — every subscribe() silently no-oped and the
// WS bridge was dead from boot. Bootstrap runs after ALL module inits
// (including the awaited NATS connect) complete.
onApplicationBootstrap() {
this.nats.subscribe('corrosion.*.companion.heartbeat', (data, subject) => {
const licenseId = subject.split('.')[1];
this.emit(licenseId, 'heartbeat', data);