Use local Whisper compose stack

2026-02-13 22:22:04 -08:00
parent 151b48310e
commit 5f833e84a6
3 changed files with 49 additions and 30 deletions
@@ -7,7 +7,8 @@

 telegram:
  bot_token: ${FLYNN_TELEGRAM_TOKEN}
-  allowed_chat_ids: []  # Add your Telegram chat ID
+  allowed_chat_ids: [-1003673132186]
+  require_mention: false

 server:
  tailscale_only: true
@@ -167,10 +168,9 @@ hooks:
 #     --host 0.0.0.0 --port 8080 --convert --language en \
 #     --inference-path /v1/audio/transcriptions
 #
-# audio:
-#   enabled: true
-#   provider:
-#     type: custom                            # openai, groq, ollama, llamacpp, custom
-#     endpoint: "http://localhost:18801/v1/audio/transcriptions"
-#     api_key: "${WHISPER_API_KEY}"            # Optional Bearer token
-#     model: "whisper-1"                       # Model name (default: whisper-1)
+audio:
+  enabled: true
+  provider:
+    type: custom
+    endpoint: "http://whisper-server:8080/v1/audio/transcriptions"
+    model: "whisper-1"
@@ -1,3 +1,5 @@
+version: '3.9'
+
 services:
  flynn:
    build: .
@@ -29,28 +31,39 @@ services:
      timeout: 5s
      start_period: 15s
      retries: 3
+    depends_on:
+      whisper-server:
+        condition: service_healthy
+    networks:
+      - flynn-net

  # Optional: Whisper server for audio transcription
-  # Uncomment and adjust as needed for voice message support
-  # whisper-server:
-  #   image: ghcr.io/ggml-org/whisper.cpp:main
-  #   container_name: whisper-server
-  #   restart: unless-stopped
-  #   ports:
-  #     - "18801:8080"
-  #   command: whisper-server
-  #     --model /app/models/ggml-base.en.bin
-  #     --host 0.0.0.0
-  #     --port 8080
-  #     --convert
-  #     --language en
-  #     --inference-path /v1/audio/transcriptions
-  #   healthcheck:
-  #     test: ["CMD-SHELL", "curl", "-f", "http://localhost:8080/"]
-  #     interval: 30s
-  #     timeout: 5s
-  #     start_period: 15s
-  #     retries: 3
+  whisper-server:
+    image: ghcr.io/ggml-org/whisper.cpp:main
+    container_name: whisper-server
+    restart: unless-stopped
+    ports:
+      - "18801:8080"
+    command: >
+      whisper-server
+      --model /app/models/ggml-base.en.bin
+      --host 0.0.0.0
+      --port 8080
+      --convert
+      --language en
+      --inference-path /v1/audio/transcriptions
+    volumes:
+      - ./whisper-models:/app/models:ro
+    healthcheck:
+      test: ["CMD-SHELL", "curl", "-f", "http://localhost:8080/"]
+      interval: 30s
+      timeout: 5s
+      start_period: 15s
+      retries: 3
+    networks:
+      - flynn-net

 volumes:
  flynn-data:
+networks:
+  flynn-net:
@@ -56,7 +56,10 @@ export class SessionStore {
    const stmt = this.db.prepare(
      'INSERT INTO messages (session_id, role, content, metadata) VALUES (?, ?, ?, ?)',
    );
-    stmt.run(sessionId, message.role, message.content, metadata ? JSON.stringify(metadata) : null);
+    const contentString = typeof message.content === 'string'
+      ? message.content
+      : JSON.stringify(message.content);
+    stmt.run(sessionId, message.role, contentString, metadata ? JSON.stringify(metadata) : null);
  }

  getMessages(sessionId: string): Message[] {
@@ -84,7 +87,10 @@ export class SessionStore {
        'INSERT INTO messages (session_id, role, content, metadata) VALUES (?, ?, ?, ?)',
      );
      for (const msg of messages) {
-        insert.run(sessionId, msg.role, msg.content, null);
+        const contentString = typeof msg.content === 'string'
+          ? msg.content
+          : JSON.stringify(msg.content);
+        insert.run(sessionId, msg.role, contentString, null);
      }
    });
    transaction();