docs: map existing codebase

2026-02-09 19:31:05 -08:00
parent 85b1401440
commit d2d64f3361
7 changed files with 2042 additions and 0 deletions
@@ -0,0 +1,428 @@
+# Testing Patterns
+
+**Analysis Date:** 2026-02-09
+
+## Test Framework
+
+**Runner:**
+- Vitest v3.x
+- Config: No `vitest.config.ts` file — uses Vitest defaults with `package.json` `type: "module"`
+
+**Assertion Library:**
+- Vitest built-in `expect()` API (Chai-compatible)
+
+**Run Commands:**
+```bash
+pnpm test                    # Run all tests in watch mode
+pnpm test:run                # Run all tests once (no watch)
+pnpm test:run src/path/to/file.test.ts  # Run a single test file
+```
+
+## Test File Organization
+
+**Location:**
+- Co-located with source files — test files live next to the code they test
+- `src/models/router.ts` → `src/models/router.test.ts`
+- `src/tools/policy.ts` → `src/tools/policy.test.ts`
+- `src/backends/native/agent.ts` → `src/backends/native/agent.test.ts`
+
+**Naming:**
+- `*.test.ts` suffix (no `.spec.ts` files exist)
+- Test file name matches source file name: `schema.test.ts` tests `schema.ts`
+
+**Statistics:**
+- 88 test files across the codebase
+- ~16,676 total lines of test code
+- 152 source (non-test) `.ts` files
+
+## Test Structure
+
+**Suite Organization:**
+```typescript
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { ClassName } from './source-file.js';
+
+describe('ClassName', () => {
+  it('does something specific', () => {
+    // arrange, act, assert
+  });
+
+  it('handles error case', () => {
+    // ...
+  });
+});
+```
+
+**Nested Describes for Grouping:**
+```typescript
+describe('ToolPolicy', () => {
+  describe('default config (full profile)', () => {
+    it('allows all tools when profile is full', () => { ... });
+  });
+
+  describe('profile filtering', () => {
+    it('minimal profile only allows read-only tools', () => { ... });
+    it('coding profile includes file writes and shell', () => { ... });
+  });
+
+  describe('edge cases', () => {
+    it('handles empty tool list', () => { ... });
+  });
+});
+```
+
+**Naming convention for `it()` blocks:**
+- Start with verb: "returns", "creates", "handles", "uses", "fires", "respects"
+- Describe expected behavior: `'returns silent action for non-matching tools'`
+- Error cases: `'returns error for missing file'`, `'fails if old_string not found'`
+
+## Setup and Teardown
+
+**beforeEach/afterEach for test isolation:**
+```typescript
+// File system tests — create temp dir, clean up after
+let testDir: string;
+
+beforeEach(() => {
+  testDir = mkdtempSync(join(tmpdir(), 'flynn-file-test-'));
+});
+
+afterEach(() => {
+  rmSync(testDir, { recursive: true });
+});
+```
+
+**Database tests — create store, close and clean:**
+```typescript
+let store: SessionStore;
+
+beforeEach(() => {
+  store = new SessionStore(dbPath);
+});
+
+afterEach(() => {
+  store.close();
+  if (existsSync(dbPath)) {
+    unlinkSync(dbPath);
+  }
+});
+```
+
+**Mock cleanup:**
+```typescript
+beforeEach(() => {
+  vi.clearAllMocks();
+});
+```
+
+## Mocking
+
+**Framework:** Vitest built-in `vi.fn()` and `vi.mock()`
+
+**Pattern 1: Inline mock objects (most common):**
+```typescript
+const createMockClient = (): ModelClient => ({
+  chat: vi.fn().mockResolvedValue({
+    content: 'Hello!',
+    stopReason: 'end_turn',
+    usage: { inputTokens: 10, outputTokens: 5 },
+  } satisfies ChatResponse),
+});
+```
+
+**Pattern 2: Mock factory functions for reusable test doubles:**
+```typescript
+function mockMemoryStore(results: SearchResult[]): MemoryStore {
+  return {
+    search: vi.fn(() => results),
+    read: vi.fn(() => ''),
+    write: vi.fn(),
+    listNamespaces: vi.fn(() => []),
+  } as unknown as MemoryStore;
+}
+
+function mockVectorStore(results: VectorSearchResult[]): VectorStore {
+  return {
+    search: vi.fn(() => results),
+    upsertChunks: vi.fn(),
+    deleteNamespace: vi.fn(),
+  } as unknown as VectorStore;
+}
+```
+
+**Pattern 3: `vi.mock()` for module mocking:**
+```typescript
+vi.mock('./docker.js', () => ({
+  DockerSandbox: vi.fn().mockImplementation(() => ({
+    create: vi.fn().mockResolvedValue(undefined),
+    destroy: vi.fn().mockResolvedValue(undefined),
+    exec: vi.fn().mockResolvedValue({ stdout: '', stderr: '' }),
+  })),
+}));
+```
+
+**Pattern 4: `vi.mock()` with hoisted shared mocks:**
+```typescript
+const mockGenerateContent = vi.fn();
+const mockGetGenerativeModel = vi.fn().mockReturnValue({
+  generateContent: mockGenerateContent,
+});
+
+vi.mock('@google/generative-ai', () => ({
+  GoogleGenerativeAI: vi.fn().mockImplementation(() => ({
+    getGenerativeModel: mockGetGenerativeModel,
+  })),
+}));
+```
+
+**Pattern 5: Sequential mock returns (multi-step interactions):**
+```typescript
+let callCount = 0;
+const mockClient: ModelClient = {
+  chat: vi.fn().mockImplementation(() => {
+    callCount++;
+    if (callCount === 1) {
+      return {
+        content: '',
+        stopReason: 'tool_use',
+        toolCalls: [{ id: 'call_1', name: 'test.echo', args: { text: 'hello' } }],
+        usage: { inputTokens: 10, outputTokens: 5 },
+      };
+    }
+    return {
+      content: 'The tool returned: hello',
+      stopReason: 'end_turn',
+      usage: { inputTokens: 15, outputTokens: 10 },
+    };
+  }),
+};
+```
+
+**Pattern 6: Spying on console methods:**
+```typescript
+const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
+// ... test code ...
+expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('Output channel'));
+warnSpy.mockRestore();
+```
+
+**Pattern 7: `as any` / `as unknown as T` for partial mocks:**
+```typescript
+scheduler = new CronScheduler(jobs, mockChannelRegistry as any);
+
+const mockClient = { chat: vi.fn() } as unknown as ModelClient;
+```
+
+**What to Mock:**
+- External API clients (Anthropic, OpenAI, Gemini SDKs)
+- Docker/container interactions
+- Model clients in agent/orchestrator tests
+- Channel adapters and registries in integration tests
+- `console.warn`/`console.error` when testing warning paths
+
+**What NOT to Mock:**
+- Zod schema validation — test against real schemas
+- In-memory data structures (Maps, Sets)
+- Pure functions (formatters, parsers)
+- File system when the test IS about file operations (use temp dirs instead)
+
+## Fixtures and Factories
+
+**Test Data — Helper functions over fixtures:**
+```typescript
+// Factory function for config objects
+function defaultConfig(overrides: Partial<ToolsConfig> = {}): ToolsConfig {
+  return {
+    profile: 'full',
+    allow: [],
+    deny: [],
+    agents: {},
+    providers: {},
+    ...overrides,
+  };
+}
+
+// Factory function for domain objects
+function makeTool(name: string): Tool {
+  return {
+    name,
+    description: `Mock ${name}`,
+    inputSchema: { type: 'object', properties: {} },
+    execute: async () => ({ success: true, output: '' }),
+  };
+}
+
+// Factory for test messages
+function makeMessages(count: number): Message[] {
+  const msgs: Message[] = [];
+  for (let i = 0; i < count; i++) {
+    msgs.push({
+      role: i % 2 === 0 ? 'user' : 'assistant',
+      content: `Message ${i}`,
+    });
+  }
+  return msgs;
+}
+```
+
+**Minimal configs for schema tests:**
+```typescript
+const minimalConfig = {
+  telegram: { bot_token: 'test', allowed_chat_ids: [1] },
+  models: { default: { provider: 'anthropic', model: 'claude-3' } },
+};
+```
+
+**Location:**
+- No separate fixtures directory — helper functions defined at top of each test file
+- No shared test utilities file — each test is self-contained
+
+## Coverage
+
+**Requirements:** No coverage thresholds enforced. No coverage config detected.
+
+**View Coverage:**
+```bash
+pnpm test:run -- --coverage    # If @vitest/coverage-v8 is installed
+```
+
+## Test Types
+
+**Unit Tests (majority):**
+- Test individual classes and functions in isolation
+- Mock external dependencies
+- Files: `src/models/router.test.ts`, `src/tools/policy.test.ts`, `src/hooks/engine.test.ts`
+
+**Integration Tests (some):**
+- Test real interactions between components
+- File system tools use real temp directories: `src/tools/builtin/file.test.ts`
+- Session store uses real SQLite: `src/session/store.test.ts`
+- Gateway tests spin up real WebSocket server: `src/gateway/server.test.ts`
+
+**E2E Tests:**
+- Not present — no end-to-end test framework
+
+## Common Patterns
+
+**Async Testing:**
+```typescript
+it('processes messages', async () => {
+  const response = await agent.process('Hi');
+  expect(response).toBe('Hello!');
+});
+```
+
+**Error/Rejection Testing:**
+```typescript
+it('throws when all providers fail', async () => {
+  await expect(router.chat({ messages: [{ role: 'user', content: 'Hi' }] }))
+    .rejects.toThrow('All model providers failed');
+});
+```
+
+**Zod Schema Rejection Testing:**
+```typescript
+it('rejects cron job with empty name', () => {
+  expect(() => configSchema.parse({
+    ...baseConfig,
+    automation: {
+      cron: [{ name: '', schedule: '0 9 * * *', ... }],
+    },
+  })).toThrow();
+});
+```
+
+**Testing Callback Invocation:**
+```typescript
+it('calls onToolUse callback on start and end', async () => {
+  const onToolUse = vi.fn();
+  // ... setup ...
+  await agent.process('echo hi');
+
+  expect(onToolUse).toHaveBeenCalledTimes(2);
+  expect(onToolUse).toHaveBeenNthCalledWith(1, expect.objectContaining({
+    type: 'start',
+    tool: 'test.echo',
+  }));
+});
+```
+
+**Testing with `satisfies` for type-safe mocks:**
+```typescript
+chat: vi.fn().mockResolvedValue({
+  content: 'Hello!',
+  stopReason: 'end_turn',
+  usage: { inputTokens: 10, outputTokens: 5 },
+} satisfies ChatResponse),
+```
+
+**Testing Collection Contents:**
+```typescript
+const names = result.map(t => t.name);
+expect(names).toContain('file.read');
+expect(names).not.toContain('shell.exec');
+```
+
+**Async Stream Testing:**
+```typescript
+it('streams from primary client', async () => {
+  const mockStream = async function* (): AsyncIterable<ChatStreamEvent> {
+    yield { type: 'content', content: 'Hello' };
+    yield { type: 'done', usage: { inputTokens: 5, outputTokens: 3 } };
+  };
+
+  const mockClient = {
+    chat: vi.fn(),
+    chatStream: vi.fn().mockReturnValue(mockStream()),
+  };
+
+  const chunks: string[] = [];
+  for await (const event of router.chatStream({ messages: [] })) {
+    if (event.type === 'content' && event.content) {
+      chunks.push(event.content);
+    }
+  }
+  expect(chunks).toEqual(['Hello']);
+});
+```
+
+**Integration Test with Real Server (beforeAll/afterAll):**
+```typescript
+let server: GatewayServer;
+
+beforeAll(async () => {
+  server = new GatewayServer(config);
+  await server.start();
+});
+
+afterAll(async () => {
+  await server.stop();
+});
+
+function createClient(): Promise<WebSocket> {
+  return new Promise((resolve, reject) => {
+    const ws = new WebSocket(`ws://127.0.0.1:${TEST_PORT}`);
+    ws.on('open', () => resolve(ws));
+    ws.on('error', reject);
+  });
+}
+```
+
+## Conventions Summary
+
+When writing tests for Flynn:
+
+1. **Import from vitest:** `import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';`
+2. **Import source with `.js` extension:** `import { ClassName } from './source-file.js';`
+3. **Co-locate test file** next to source file as `source-file.test.ts`
+4. **Use `describe`/`it` blocks** with descriptive behavior-focused names
+5. **Create mock factories** as functions at the top of the test file
+6. **Use `vi.fn()` for mocks**, `vi.mock()` for module mocking, `as unknown as T` for partial mocks
+7. **Clean up resources** in `afterEach`: temp dirs, database files, mock spies
+8. **Test both success and failure paths** — every feature should have at least one error test
+9. **Use helper factories** to build test data, not shared fixture files
+10. **Keep tests self-contained** — each test file should be independently understandable
+
+---
+
+*Testing analysis: 2026-02-09*