fix(observability): resolve Flynn status from user-level systemd units

This commit is contained in:
William Valentin
2026-02-22 21:05:51 -08:00
parent 7206a94871
commit 07f4f99187
2 changed files with 147 additions and 25 deletions
@@ -316,4 +316,70 @@ describe('ObservabilityCollector', () => {
}
}
});
it('prefers user-level Flynn unit when it is active', async () => {
const originalInvocationId = process.env.INVOCATION_ID;
process.env.INVOCATION_ID = 'unit-test';
const runner = async (command: string, args: string[]) => {
const key = `${command} ${args.join(' ')}`;
if (key === 'systemctl show flynn.service --property=LoadState,ActiveState,SubState,Description,ExecMainPID,Result --no-pager') {
return {
stdout: 'LoadState=loaded\nActiveState=inactive\nSubState=dead\nDescription=Flynn daemon\nExecMainPID=0\nResult=success\n',
stderr: '',
};
}
if (key === 'systemctl --user show flynn.service --property=LoadState,ActiveState,SubState,Description,ExecMainPID,Result --no-pager') {
return {
stdout: 'LoadState=loaded\nActiveState=active\nSubState=running\nDescription=Flynn daemon\nExecMainPID=4242\nResult=success\n',
stderr: '',
};
}
if (key === 'systemctl --user show ollama.service --property=LoadState,ActiveState,SubState,UnitFileState,Description,ExecMainPID,Result --no-pager') {
return { stdout: 'LoadState=not-found\nActiveState=inactive\nSubState=dead\nDescription=Ollama\nExecMainPID=0\nResult=not-found\n', stderr: '' };
}
if (key === 'systemctl --user show llama-server.service --property=LoadState,ActiveState,SubState,UnitFileState,Description,ExecMainPID,Result --no-pager') {
return { stdout: 'LoadState=not-found\nActiveState=inactive\nSubState=dead\nDescription=llama.cpp\nExecMainPID=0\nResult=not-found\n', stderr: '' };
}
if (key === 'docker compose -f docker-compose.yml config --profiles') {
return { stdout: '', stderr: '' };
}
if (key === 'docker compose -f docker-compose.yml config --services') {
return { stdout: 'flynn\n', stderr: '' };
}
if (key === 'docker compose -f docker-compose.yml ps --all --format json') {
return { stdout: '[]', stderr: '' };
}
if (key === 'journalctl --user -u flynn.service --since 900 seconds ago --no-pager --output short-iso-precise -n 200') {
return {
stdout: '2026-02-23 12:10:10.000000+0000 host flynn[4242]: user scope log line\n',
stderr: '',
};
}
throw new Error(`Unexpected command: ${key}`);
};
try {
const collector = new ObservabilityCollector({
config: createConfig(),
runner,
});
await collector.forceSample();
const sources = await collector.listSources();
const flynn = sources.find((entry) => entry.id === 'systemd:flynn');
expect(flynn?.status).toBe('running');
expect(flynn?.runtime).toBe('systemd_user');
expect(flynn?.kind).toBe('systemd_user');
expect(flynn?.logCapable).toBe(true);
const logs = await collector.getServiceLogs({ sourceId: 'systemd:flynn' });
expect(logs.lines[0]?.text).toContain('user scope log line');
} finally {
if (originalInvocationId === undefined) {
delete process.env.INVOCATION_ID;
} else {
process.env.INVOCATION_ID = originalInvocationId;
}
}
});
});
+81 -25
View File
@@ -383,6 +383,58 @@ interface SystemdStatus {
error?: string;
}
function unavailableSystemdStatus(unit: string, error: unknown): SystemdStatus {
return {
unit,
name: unit,
loadState: 'unknown',
activeState: 'unknown',
subState: 'unknown',
statusText: 'unavailable',
pid: null,
result: 'unknown',
error: normalizeError(error),
};
}
function isLoadedSystemdStatus(status: SystemdStatus): boolean {
return status.loadState !== 'not-found' && status.loadState !== 'unknown' && status.loadState.trim().length > 0;
}
function chooseFlynnUnitScope(systemStatus: SystemdStatus, userStatus: SystemdStatus): {
scope: 'system' | 'user';
status: SystemdStatus;
mapped: { status: ObservabilitySourceStatus; stateCode: number; healthCode: number };
} {
const systemMapped = mapSystemdStatus(systemStatus.activeState, systemStatus.error);
const userMapped = mapSystemdStatus(userStatus.activeState, userStatus.error);
const systemHasError = Boolean(systemStatus.error);
const userHasError = Boolean(userStatus.error);
if (userMapped.status === 'running') {
return { scope: 'user', status: userStatus, mapped: userMapped };
}
if (systemMapped.status === 'running') {
return { scope: 'system', status: systemStatus, mapped: systemMapped };
}
if (isLoadedSystemdStatus(userStatus) && !userHasError) {
return { scope: 'user', status: userStatus, mapped: userMapped };
}
if (isLoadedSystemdStatus(systemStatus) && !systemHasError) {
return { scope: 'system', status: systemStatus, mapped: systemMapped };
}
if (!userHasError && systemHasError) {
return { scope: 'user', status: userStatus, mapped: userMapped };
}
if (!systemHasError && userHasError) {
return { scope: 'system', status: systemStatus, mapped: systemMapped };
}
return { scope: 'system', status: systemStatus, mapped: systemMapped };
}
async function fetchSystemdUnitStatus(
runner: CommandRunner,
opts: { unit: string; name: string; user: boolean },
@@ -626,12 +678,17 @@ export class ObservabilityCollector {
private async collectSample(): Promise<void> {
const sampleTime = this.now();
const [flynnResult, localBackendsResult, dockerDependenciesResult] = await Promise.allSettled([
const [flynnSystemResult, flynnUserResult, localBackendsResult, dockerDependenciesResult] = await Promise.allSettled([
fetchSystemdUnitStatus(this.runner, {
unit: this.flynnSystemdUnit,
name: 'Flynn daemon',
user: false,
}),
fetchSystemdUnitStatus(this.runner, {
unit: this.flynnSystemdUnit,
name: 'Flynn daemon',
user: true,
}),
listLocalBackendStatuses(this.config, async (args: string[]) => {
return this.runner('systemctl', args, {
timeoutMs: DEFAULT_TIMEOUT_MS,
@@ -646,19 +703,12 @@ export class ObservabilityCollector {
}),
]);
const flynnStatus = flynnResult.status === 'fulfilled'
? flynnResult.value
: {
unit: this.flynnSystemdUnit,
name: 'Flynn daemon',
loadState: 'unknown',
activeState: 'unknown',
subState: 'unknown',
statusText: 'unavailable',
pid: null,
result: 'unknown',
error: normalizeError(flynnResult.reason),
};
const flynnSystemStatus = flynnSystemResult.status === 'fulfilled'
? flynnSystemResult.value
: unavailableSystemdStatus(this.flynnSystemdUnit, flynnSystemResult.reason);
const flynnUserStatus = flynnUserResult.status === 'fulfilled'
? flynnUserResult.value
: unavailableSystemdStatus(this.flynnSystemdUnit, flynnUserResult.reason);
const localBackends = localBackendsResult.status === 'fulfilled'
? localBackendsResult.value
: [];
@@ -668,30 +718,36 @@ export class ObservabilityCollector {
const snapshots: SourceSnapshot[] = [];
const flynnMapped = mapSystemdStatus(flynnStatus.activeState, flynnStatus.error);
const fallbackToProcessRuntime = !isGatewayRunningUnderSystemd() && flynnMapped.status !== 'running';
const flynnStatusValue = fallbackToProcessRuntime ? 'running' : flynnMapped.status;
const flynnStateCode = fallbackToProcessRuntime ? STATE_RUNNING : flynnMapped.stateCode;
const flynnHealthCode = fallbackToProcessRuntime ? HEALTH_HEALTHY : flynnMapped.healthCode;
const flynnChosen = chooseFlynnUnitScope(flynnSystemStatus, flynnUserStatus);
const fallbackToProcessRuntime = !isGatewayRunningUnderSystemd() && flynnChosen.mapped.status !== 'running';
const flynnStatusValue = fallbackToProcessRuntime ? 'running' : flynnChosen.mapped.status;
const flynnStateCode = fallbackToProcessRuntime ? STATE_RUNNING : flynnChosen.mapped.stateCode;
const flynnHealthCode = fallbackToProcessRuntime ? HEALTH_HEALTHY : flynnChosen.mapped.healthCode;
const flynnRuntime = fallbackToProcessRuntime
? 'systemd_system'
: (flynnChosen.scope === 'user' ? 'systemd_user' : 'systemd_system');
const flynnKind = fallbackToProcessRuntime
? 'systemd_system'
: (flynnChosen.scope === 'user' ? 'systemd_user' : 'systemd_system');
snapshots.push({
source: {
id: 'systemd:flynn',
name: 'Flynn daemon',
kind: 'systemd_system',
runtime: 'systemd_system',
kind: flynnKind,
runtime: flynnRuntime,
status: flynnStatusValue,
graphCapable: true,
logCapable: !fallbackToProcessRuntime,
metadata: {
unit: this.flynnSystemdUnit,
state: fallbackToProcessRuntime ? 'running' : flynnStatus.activeState,
statusText: fallbackToProcessRuntime ? 'running (gateway process)' : flynnStatus.statusText,
state: fallbackToProcessRuntime ? 'running' : flynnChosen.status.activeState,
statusText: fallbackToProcessRuntime ? 'running (gateway process)' : flynnChosen.status.statusText,
},
},
stateCode: flynnStateCode,
healthCode: flynnHealthCode,
hasError: fallbackToProcessRuntime ? false : Boolean(flynnStatus.error),
fingerprint: flynnStatus.pid ? `pid:${flynnStatus.pid}` : null,
hasError: fallbackToProcessRuntime ? false : Boolean(flynnChosen.status.error),
fingerprint: flynnChosen.status.pid ? `pid:${flynnChosen.status.pid}` : null,
});
for (const backend of localBackends) {