fix(observability): resolve Flynn status from user-level systemd units
This commit is contained in:
@@ -316,4 +316,70 @@ describe('ObservabilityCollector', () => {
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it('prefers user-level Flynn unit when it is active', async () => {
|
||||
const originalInvocationId = process.env.INVOCATION_ID;
|
||||
process.env.INVOCATION_ID = 'unit-test';
|
||||
|
||||
const runner = async (command: string, args: string[]) => {
|
||||
const key = `${command} ${args.join(' ')}`;
|
||||
if (key === 'systemctl show flynn.service --property=LoadState,ActiveState,SubState,Description,ExecMainPID,Result --no-pager') {
|
||||
return {
|
||||
stdout: 'LoadState=loaded\nActiveState=inactive\nSubState=dead\nDescription=Flynn daemon\nExecMainPID=0\nResult=success\n',
|
||||
stderr: '',
|
||||
};
|
||||
}
|
||||
if (key === 'systemctl --user show flynn.service --property=LoadState,ActiveState,SubState,Description,ExecMainPID,Result --no-pager') {
|
||||
return {
|
||||
stdout: 'LoadState=loaded\nActiveState=active\nSubState=running\nDescription=Flynn daemon\nExecMainPID=4242\nResult=success\n',
|
||||
stderr: '',
|
||||
};
|
||||
}
|
||||
if (key === 'systemctl --user show ollama.service --property=LoadState,ActiveState,SubState,UnitFileState,Description,ExecMainPID,Result --no-pager') {
|
||||
return { stdout: 'LoadState=not-found\nActiveState=inactive\nSubState=dead\nDescription=Ollama\nExecMainPID=0\nResult=not-found\n', stderr: '' };
|
||||
}
|
||||
if (key === 'systemctl --user show llama-server.service --property=LoadState,ActiveState,SubState,UnitFileState,Description,ExecMainPID,Result --no-pager') {
|
||||
return { stdout: 'LoadState=not-found\nActiveState=inactive\nSubState=dead\nDescription=llama.cpp\nExecMainPID=0\nResult=not-found\n', stderr: '' };
|
||||
}
|
||||
if (key === 'docker compose -f docker-compose.yml config --profiles') {
|
||||
return { stdout: '', stderr: '' };
|
||||
}
|
||||
if (key === 'docker compose -f docker-compose.yml config --services') {
|
||||
return { stdout: 'flynn\n', stderr: '' };
|
||||
}
|
||||
if (key === 'docker compose -f docker-compose.yml ps --all --format json') {
|
||||
return { stdout: '[]', stderr: '' };
|
||||
}
|
||||
if (key === 'journalctl --user -u flynn.service --since 900 seconds ago --no-pager --output short-iso-precise -n 200') {
|
||||
return {
|
||||
stdout: '2026-02-23 12:10:10.000000+0000 host flynn[4242]: user scope log line\n',
|
||||
stderr: '',
|
||||
};
|
||||
}
|
||||
throw new Error(`Unexpected command: ${key}`);
|
||||
};
|
||||
|
||||
try {
|
||||
const collector = new ObservabilityCollector({
|
||||
config: createConfig(),
|
||||
runner,
|
||||
});
|
||||
await collector.forceSample();
|
||||
const sources = await collector.listSources();
|
||||
const flynn = sources.find((entry) => entry.id === 'systemd:flynn');
|
||||
expect(flynn?.status).toBe('running');
|
||||
expect(flynn?.runtime).toBe('systemd_user');
|
||||
expect(flynn?.kind).toBe('systemd_user');
|
||||
expect(flynn?.logCapable).toBe(true);
|
||||
|
||||
const logs = await collector.getServiceLogs({ sourceId: 'systemd:flynn' });
|
||||
expect(logs.lines[0]?.text).toContain('user scope log line');
|
||||
} finally {
|
||||
if (originalInvocationId === undefined) {
|
||||
delete process.env.INVOCATION_ID;
|
||||
} else {
|
||||
process.env.INVOCATION_ID = originalInvocationId;
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
@@ -383,6 +383,58 @@ interface SystemdStatus {
|
||||
error?: string;
|
||||
}
|
||||
|
||||
function unavailableSystemdStatus(unit: string, error: unknown): SystemdStatus {
|
||||
return {
|
||||
unit,
|
||||
name: unit,
|
||||
loadState: 'unknown',
|
||||
activeState: 'unknown',
|
||||
subState: 'unknown',
|
||||
statusText: 'unavailable',
|
||||
pid: null,
|
||||
result: 'unknown',
|
||||
error: normalizeError(error),
|
||||
};
|
||||
}
|
||||
|
||||
function isLoadedSystemdStatus(status: SystemdStatus): boolean {
|
||||
return status.loadState !== 'not-found' && status.loadState !== 'unknown' && status.loadState.trim().length > 0;
|
||||
}
|
||||
|
||||
function chooseFlynnUnitScope(systemStatus: SystemdStatus, userStatus: SystemdStatus): {
|
||||
scope: 'system' | 'user';
|
||||
status: SystemdStatus;
|
||||
mapped: { status: ObservabilitySourceStatus; stateCode: number; healthCode: number };
|
||||
} {
|
||||
const systemMapped = mapSystemdStatus(systemStatus.activeState, systemStatus.error);
|
||||
const userMapped = mapSystemdStatus(userStatus.activeState, userStatus.error);
|
||||
const systemHasError = Boolean(systemStatus.error);
|
||||
const userHasError = Boolean(userStatus.error);
|
||||
|
||||
if (userMapped.status === 'running') {
|
||||
return { scope: 'user', status: userStatus, mapped: userMapped };
|
||||
}
|
||||
if (systemMapped.status === 'running') {
|
||||
return { scope: 'system', status: systemStatus, mapped: systemMapped };
|
||||
}
|
||||
|
||||
if (isLoadedSystemdStatus(userStatus) && !userHasError) {
|
||||
return { scope: 'user', status: userStatus, mapped: userMapped };
|
||||
}
|
||||
if (isLoadedSystemdStatus(systemStatus) && !systemHasError) {
|
||||
return { scope: 'system', status: systemStatus, mapped: systemMapped };
|
||||
}
|
||||
|
||||
if (!userHasError && systemHasError) {
|
||||
return { scope: 'user', status: userStatus, mapped: userMapped };
|
||||
}
|
||||
if (!systemHasError && userHasError) {
|
||||
return { scope: 'system', status: systemStatus, mapped: systemMapped };
|
||||
}
|
||||
|
||||
return { scope: 'system', status: systemStatus, mapped: systemMapped };
|
||||
}
|
||||
|
||||
async function fetchSystemdUnitStatus(
|
||||
runner: CommandRunner,
|
||||
opts: { unit: string; name: string; user: boolean },
|
||||
@@ -626,12 +678,17 @@ export class ObservabilityCollector {
|
||||
private async collectSample(): Promise<void> {
|
||||
const sampleTime = this.now();
|
||||
|
||||
const [flynnResult, localBackendsResult, dockerDependenciesResult] = await Promise.allSettled([
|
||||
const [flynnSystemResult, flynnUserResult, localBackendsResult, dockerDependenciesResult] = await Promise.allSettled([
|
||||
fetchSystemdUnitStatus(this.runner, {
|
||||
unit: this.flynnSystemdUnit,
|
||||
name: 'Flynn daemon',
|
||||
user: false,
|
||||
}),
|
||||
fetchSystemdUnitStatus(this.runner, {
|
||||
unit: this.flynnSystemdUnit,
|
||||
name: 'Flynn daemon',
|
||||
user: true,
|
||||
}),
|
||||
listLocalBackendStatuses(this.config, async (args: string[]) => {
|
||||
return this.runner('systemctl', args, {
|
||||
timeoutMs: DEFAULT_TIMEOUT_MS,
|
||||
@@ -646,19 +703,12 @@ export class ObservabilityCollector {
|
||||
}),
|
||||
]);
|
||||
|
||||
const flynnStatus = flynnResult.status === 'fulfilled'
|
||||
? flynnResult.value
|
||||
: {
|
||||
unit: this.flynnSystemdUnit,
|
||||
name: 'Flynn daemon',
|
||||
loadState: 'unknown',
|
||||
activeState: 'unknown',
|
||||
subState: 'unknown',
|
||||
statusText: 'unavailable',
|
||||
pid: null,
|
||||
result: 'unknown',
|
||||
error: normalizeError(flynnResult.reason),
|
||||
};
|
||||
const flynnSystemStatus = flynnSystemResult.status === 'fulfilled'
|
||||
? flynnSystemResult.value
|
||||
: unavailableSystemdStatus(this.flynnSystemdUnit, flynnSystemResult.reason);
|
||||
const flynnUserStatus = flynnUserResult.status === 'fulfilled'
|
||||
? flynnUserResult.value
|
||||
: unavailableSystemdStatus(this.flynnSystemdUnit, flynnUserResult.reason);
|
||||
const localBackends = localBackendsResult.status === 'fulfilled'
|
||||
? localBackendsResult.value
|
||||
: [];
|
||||
@@ -668,30 +718,36 @@ export class ObservabilityCollector {
|
||||
|
||||
const snapshots: SourceSnapshot[] = [];
|
||||
|
||||
const flynnMapped = mapSystemdStatus(flynnStatus.activeState, flynnStatus.error);
|
||||
const fallbackToProcessRuntime = !isGatewayRunningUnderSystemd() && flynnMapped.status !== 'running';
|
||||
const flynnStatusValue = fallbackToProcessRuntime ? 'running' : flynnMapped.status;
|
||||
const flynnStateCode = fallbackToProcessRuntime ? STATE_RUNNING : flynnMapped.stateCode;
|
||||
const flynnHealthCode = fallbackToProcessRuntime ? HEALTH_HEALTHY : flynnMapped.healthCode;
|
||||
const flynnChosen = chooseFlynnUnitScope(flynnSystemStatus, flynnUserStatus);
|
||||
const fallbackToProcessRuntime = !isGatewayRunningUnderSystemd() && flynnChosen.mapped.status !== 'running';
|
||||
const flynnStatusValue = fallbackToProcessRuntime ? 'running' : flynnChosen.mapped.status;
|
||||
const flynnStateCode = fallbackToProcessRuntime ? STATE_RUNNING : flynnChosen.mapped.stateCode;
|
||||
const flynnHealthCode = fallbackToProcessRuntime ? HEALTH_HEALTHY : flynnChosen.mapped.healthCode;
|
||||
const flynnRuntime = fallbackToProcessRuntime
|
||||
? 'systemd_system'
|
||||
: (flynnChosen.scope === 'user' ? 'systemd_user' : 'systemd_system');
|
||||
const flynnKind = fallbackToProcessRuntime
|
||||
? 'systemd_system'
|
||||
: (flynnChosen.scope === 'user' ? 'systemd_user' : 'systemd_system');
|
||||
snapshots.push({
|
||||
source: {
|
||||
id: 'systemd:flynn',
|
||||
name: 'Flynn daemon',
|
||||
kind: 'systemd_system',
|
||||
runtime: 'systemd_system',
|
||||
kind: flynnKind,
|
||||
runtime: flynnRuntime,
|
||||
status: flynnStatusValue,
|
||||
graphCapable: true,
|
||||
logCapable: !fallbackToProcessRuntime,
|
||||
metadata: {
|
||||
unit: this.flynnSystemdUnit,
|
||||
state: fallbackToProcessRuntime ? 'running' : flynnStatus.activeState,
|
||||
statusText: fallbackToProcessRuntime ? 'running (gateway process)' : flynnStatus.statusText,
|
||||
state: fallbackToProcessRuntime ? 'running' : flynnChosen.status.activeState,
|
||||
statusText: fallbackToProcessRuntime ? 'running (gateway process)' : flynnChosen.status.statusText,
|
||||
},
|
||||
},
|
||||
stateCode: flynnStateCode,
|
||||
healthCode: flynnHealthCode,
|
||||
hasError: fallbackToProcessRuntime ? false : Boolean(flynnStatus.error),
|
||||
fingerprint: flynnStatus.pid ? `pid:${flynnStatus.pid}` : null,
|
||||
hasError: fallbackToProcessRuntime ? false : Boolean(flynnChosen.status.error),
|
||||
fingerprint: flynnChosen.status.pid ? `pid:${flynnChosen.status.pid}` : null,
|
||||
});
|
||||
|
||||
for (const backend of localBackends) {
|
||||
|
||||
Reference in New Issue
Block a user