fix(observability): resolve Flynn status from user-level systemd units
This commit is contained in:
@@ -316,4 +316,70 @@ describe('ObservabilityCollector', () => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('prefers user-level Flynn unit when it is active', async () => {
|
||||||
|
const originalInvocationId = process.env.INVOCATION_ID;
|
||||||
|
process.env.INVOCATION_ID = 'unit-test';
|
||||||
|
|
||||||
|
const runner = async (command: string, args: string[]) => {
|
||||||
|
const key = `${command} ${args.join(' ')}`;
|
||||||
|
if (key === 'systemctl show flynn.service --property=LoadState,ActiveState,SubState,Description,ExecMainPID,Result --no-pager') {
|
||||||
|
return {
|
||||||
|
stdout: 'LoadState=loaded\nActiveState=inactive\nSubState=dead\nDescription=Flynn daemon\nExecMainPID=0\nResult=success\n',
|
||||||
|
stderr: '',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (key === 'systemctl --user show flynn.service --property=LoadState,ActiveState,SubState,Description,ExecMainPID,Result --no-pager') {
|
||||||
|
return {
|
||||||
|
stdout: 'LoadState=loaded\nActiveState=active\nSubState=running\nDescription=Flynn daemon\nExecMainPID=4242\nResult=success\n',
|
||||||
|
stderr: '',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (key === 'systemctl --user show ollama.service --property=LoadState,ActiveState,SubState,UnitFileState,Description,ExecMainPID,Result --no-pager') {
|
||||||
|
return { stdout: 'LoadState=not-found\nActiveState=inactive\nSubState=dead\nDescription=Ollama\nExecMainPID=0\nResult=not-found\n', stderr: '' };
|
||||||
|
}
|
||||||
|
if (key === 'systemctl --user show llama-server.service --property=LoadState,ActiveState,SubState,UnitFileState,Description,ExecMainPID,Result --no-pager') {
|
||||||
|
return { stdout: 'LoadState=not-found\nActiveState=inactive\nSubState=dead\nDescription=llama.cpp\nExecMainPID=0\nResult=not-found\n', stderr: '' };
|
||||||
|
}
|
||||||
|
if (key === 'docker compose -f docker-compose.yml config --profiles') {
|
||||||
|
return { stdout: '', stderr: '' };
|
||||||
|
}
|
||||||
|
if (key === 'docker compose -f docker-compose.yml config --services') {
|
||||||
|
return { stdout: 'flynn\n', stderr: '' };
|
||||||
|
}
|
||||||
|
if (key === 'docker compose -f docker-compose.yml ps --all --format json') {
|
||||||
|
return { stdout: '[]', stderr: '' };
|
||||||
|
}
|
||||||
|
if (key === 'journalctl --user -u flynn.service --since 900 seconds ago --no-pager --output short-iso-precise -n 200') {
|
||||||
|
return {
|
||||||
|
stdout: '2026-02-23 12:10:10.000000+0000 host flynn[4242]: user scope log line\n',
|
||||||
|
stderr: '',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
throw new Error(`Unexpected command: ${key}`);
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
const collector = new ObservabilityCollector({
|
||||||
|
config: createConfig(),
|
||||||
|
runner,
|
||||||
|
});
|
||||||
|
await collector.forceSample();
|
||||||
|
const sources = await collector.listSources();
|
||||||
|
const flynn = sources.find((entry) => entry.id === 'systemd:flynn');
|
||||||
|
expect(flynn?.status).toBe('running');
|
||||||
|
expect(flynn?.runtime).toBe('systemd_user');
|
||||||
|
expect(flynn?.kind).toBe('systemd_user');
|
||||||
|
expect(flynn?.logCapable).toBe(true);
|
||||||
|
|
||||||
|
const logs = await collector.getServiceLogs({ sourceId: 'systemd:flynn' });
|
||||||
|
expect(logs.lines[0]?.text).toContain('user scope log line');
|
||||||
|
} finally {
|
||||||
|
if (originalInvocationId === undefined) {
|
||||||
|
delete process.env.INVOCATION_ID;
|
||||||
|
} else {
|
||||||
|
process.env.INVOCATION_ID = originalInvocationId;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -383,6 +383,58 @@ interface SystemdStatus {
|
|||||||
error?: string;
|
error?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function unavailableSystemdStatus(unit: string, error: unknown): SystemdStatus {
|
||||||
|
return {
|
||||||
|
unit,
|
||||||
|
name: unit,
|
||||||
|
loadState: 'unknown',
|
||||||
|
activeState: 'unknown',
|
||||||
|
subState: 'unknown',
|
||||||
|
statusText: 'unavailable',
|
||||||
|
pid: null,
|
||||||
|
result: 'unknown',
|
||||||
|
error: normalizeError(error),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function isLoadedSystemdStatus(status: SystemdStatus): boolean {
|
||||||
|
return status.loadState !== 'not-found' && status.loadState !== 'unknown' && status.loadState.trim().length > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
function chooseFlynnUnitScope(systemStatus: SystemdStatus, userStatus: SystemdStatus): {
|
||||||
|
scope: 'system' | 'user';
|
||||||
|
status: SystemdStatus;
|
||||||
|
mapped: { status: ObservabilitySourceStatus; stateCode: number; healthCode: number };
|
||||||
|
} {
|
||||||
|
const systemMapped = mapSystemdStatus(systemStatus.activeState, systemStatus.error);
|
||||||
|
const userMapped = mapSystemdStatus(userStatus.activeState, userStatus.error);
|
||||||
|
const systemHasError = Boolean(systemStatus.error);
|
||||||
|
const userHasError = Boolean(userStatus.error);
|
||||||
|
|
||||||
|
if (userMapped.status === 'running') {
|
||||||
|
return { scope: 'user', status: userStatus, mapped: userMapped };
|
||||||
|
}
|
||||||
|
if (systemMapped.status === 'running') {
|
||||||
|
return { scope: 'system', status: systemStatus, mapped: systemMapped };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isLoadedSystemdStatus(userStatus) && !userHasError) {
|
||||||
|
return { scope: 'user', status: userStatus, mapped: userMapped };
|
||||||
|
}
|
||||||
|
if (isLoadedSystemdStatus(systemStatus) && !systemHasError) {
|
||||||
|
return { scope: 'system', status: systemStatus, mapped: systemMapped };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!userHasError && systemHasError) {
|
||||||
|
return { scope: 'user', status: userStatus, mapped: userMapped };
|
||||||
|
}
|
||||||
|
if (!systemHasError && userHasError) {
|
||||||
|
return { scope: 'system', status: systemStatus, mapped: systemMapped };
|
||||||
|
}
|
||||||
|
|
||||||
|
return { scope: 'system', status: systemStatus, mapped: systemMapped };
|
||||||
|
}
|
||||||
|
|
||||||
async function fetchSystemdUnitStatus(
|
async function fetchSystemdUnitStatus(
|
||||||
runner: CommandRunner,
|
runner: CommandRunner,
|
||||||
opts: { unit: string; name: string; user: boolean },
|
opts: { unit: string; name: string; user: boolean },
|
||||||
@@ -626,12 +678,17 @@ export class ObservabilityCollector {
|
|||||||
private async collectSample(): Promise<void> {
|
private async collectSample(): Promise<void> {
|
||||||
const sampleTime = this.now();
|
const sampleTime = this.now();
|
||||||
|
|
||||||
const [flynnResult, localBackendsResult, dockerDependenciesResult] = await Promise.allSettled([
|
const [flynnSystemResult, flynnUserResult, localBackendsResult, dockerDependenciesResult] = await Promise.allSettled([
|
||||||
fetchSystemdUnitStatus(this.runner, {
|
fetchSystemdUnitStatus(this.runner, {
|
||||||
unit: this.flynnSystemdUnit,
|
unit: this.flynnSystemdUnit,
|
||||||
name: 'Flynn daemon',
|
name: 'Flynn daemon',
|
||||||
user: false,
|
user: false,
|
||||||
}),
|
}),
|
||||||
|
fetchSystemdUnitStatus(this.runner, {
|
||||||
|
unit: this.flynnSystemdUnit,
|
||||||
|
name: 'Flynn daemon',
|
||||||
|
user: true,
|
||||||
|
}),
|
||||||
listLocalBackendStatuses(this.config, async (args: string[]) => {
|
listLocalBackendStatuses(this.config, async (args: string[]) => {
|
||||||
return this.runner('systemctl', args, {
|
return this.runner('systemctl', args, {
|
||||||
timeoutMs: DEFAULT_TIMEOUT_MS,
|
timeoutMs: DEFAULT_TIMEOUT_MS,
|
||||||
@@ -646,19 +703,12 @@ export class ObservabilityCollector {
|
|||||||
}),
|
}),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
const flynnStatus = flynnResult.status === 'fulfilled'
|
const flynnSystemStatus = flynnSystemResult.status === 'fulfilled'
|
||||||
? flynnResult.value
|
? flynnSystemResult.value
|
||||||
: {
|
: unavailableSystemdStatus(this.flynnSystemdUnit, flynnSystemResult.reason);
|
||||||
unit: this.flynnSystemdUnit,
|
const flynnUserStatus = flynnUserResult.status === 'fulfilled'
|
||||||
name: 'Flynn daemon',
|
? flynnUserResult.value
|
||||||
loadState: 'unknown',
|
: unavailableSystemdStatus(this.flynnSystemdUnit, flynnUserResult.reason);
|
||||||
activeState: 'unknown',
|
|
||||||
subState: 'unknown',
|
|
||||||
statusText: 'unavailable',
|
|
||||||
pid: null,
|
|
||||||
result: 'unknown',
|
|
||||||
error: normalizeError(flynnResult.reason),
|
|
||||||
};
|
|
||||||
const localBackends = localBackendsResult.status === 'fulfilled'
|
const localBackends = localBackendsResult.status === 'fulfilled'
|
||||||
? localBackendsResult.value
|
? localBackendsResult.value
|
||||||
: [];
|
: [];
|
||||||
@@ -668,30 +718,36 @@ export class ObservabilityCollector {
|
|||||||
|
|
||||||
const snapshots: SourceSnapshot[] = [];
|
const snapshots: SourceSnapshot[] = [];
|
||||||
|
|
||||||
const flynnMapped = mapSystemdStatus(flynnStatus.activeState, flynnStatus.error);
|
const flynnChosen = chooseFlynnUnitScope(flynnSystemStatus, flynnUserStatus);
|
||||||
const fallbackToProcessRuntime = !isGatewayRunningUnderSystemd() && flynnMapped.status !== 'running';
|
const fallbackToProcessRuntime = !isGatewayRunningUnderSystemd() && flynnChosen.mapped.status !== 'running';
|
||||||
const flynnStatusValue = fallbackToProcessRuntime ? 'running' : flynnMapped.status;
|
const flynnStatusValue = fallbackToProcessRuntime ? 'running' : flynnChosen.mapped.status;
|
||||||
const flynnStateCode = fallbackToProcessRuntime ? STATE_RUNNING : flynnMapped.stateCode;
|
const flynnStateCode = fallbackToProcessRuntime ? STATE_RUNNING : flynnChosen.mapped.stateCode;
|
||||||
const flynnHealthCode = fallbackToProcessRuntime ? HEALTH_HEALTHY : flynnMapped.healthCode;
|
const flynnHealthCode = fallbackToProcessRuntime ? HEALTH_HEALTHY : flynnChosen.mapped.healthCode;
|
||||||
|
const flynnRuntime = fallbackToProcessRuntime
|
||||||
|
? 'systemd_system'
|
||||||
|
: (flynnChosen.scope === 'user' ? 'systemd_user' : 'systemd_system');
|
||||||
|
const flynnKind = fallbackToProcessRuntime
|
||||||
|
? 'systemd_system'
|
||||||
|
: (flynnChosen.scope === 'user' ? 'systemd_user' : 'systemd_system');
|
||||||
snapshots.push({
|
snapshots.push({
|
||||||
source: {
|
source: {
|
||||||
id: 'systemd:flynn',
|
id: 'systemd:flynn',
|
||||||
name: 'Flynn daemon',
|
name: 'Flynn daemon',
|
||||||
kind: 'systemd_system',
|
kind: flynnKind,
|
||||||
runtime: 'systemd_system',
|
runtime: flynnRuntime,
|
||||||
status: flynnStatusValue,
|
status: flynnStatusValue,
|
||||||
graphCapable: true,
|
graphCapable: true,
|
||||||
logCapable: !fallbackToProcessRuntime,
|
logCapable: !fallbackToProcessRuntime,
|
||||||
metadata: {
|
metadata: {
|
||||||
unit: this.flynnSystemdUnit,
|
unit: this.flynnSystemdUnit,
|
||||||
state: fallbackToProcessRuntime ? 'running' : flynnStatus.activeState,
|
state: fallbackToProcessRuntime ? 'running' : flynnChosen.status.activeState,
|
||||||
statusText: fallbackToProcessRuntime ? 'running (gateway process)' : flynnStatus.statusText,
|
statusText: fallbackToProcessRuntime ? 'running (gateway process)' : flynnChosen.status.statusText,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
stateCode: flynnStateCode,
|
stateCode: flynnStateCode,
|
||||||
healthCode: flynnHealthCode,
|
healthCode: flynnHealthCode,
|
||||||
hasError: fallbackToProcessRuntime ? false : Boolean(flynnStatus.error),
|
hasError: fallbackToProcessRuntime ? false : Boolean(flynnChosen.status.error),
|
||||||
fingerprint: flynnStatus.pid ? `pid:${flynnStatus.pid}` : null,
|
fingerprint: flynnChosen.status.pid ? `pid:${flynnChosen.status.pid}` : null,
|
||||||
});
|
});
|
||||||
|
|
||||||
for (const backend of localBackends) {
|
for (const backend of localBackends) {
|
||||||
|
|||||||
Reference in New Issue
Block a user