chore(openclaw): sync automation state

This commit is contained in:
William Valentin
2026-05-20 17:36:42 -07:00
parent d8f9640564
commit 6a5365feee
8 changed files with 3661 additions and 139 deletions
+1 -1
View File
@@ -4,7 +4,7 @@
# ── VM provisioning ────────────────────────────────────────────────────────
vm_domain: "zap [claw]"
vm_hostname: zap
vm_memory_mib: 4096
vm_memory_mib: 6144
vm_vcpus: 4
vm_disk_path: /var/lib/libvirt/images/claw.qcow2
vm_disk_size: "60G"
+29 -16
View File
@@ -92,7 +92,8 @@
"cacheWrite": 0
},
"contextWindow": 202800,
"maxTokens": 131100
"maxTokens": 131100,
"api": "openai-completions"
},
{
"id": "glm-5-turbo",
@@ -108,7 +109,8 @@
"cacheWrite": 0
},
"contextWindow": 202800,
"maxTokens": 131100
"maxTokens": 131100,
"api": "openai-completions"
},
{
"id": "glm-4.7",
@@ -124,7 +126,8 @@
"cacheWrite": 0
},
"contextWindow": 204800,
"maxTokens": 131072
"maxTokens": 131072,
"api": "openai-completions"
},
{
"id": "glm-4.7-flash",
@@ -140,7 +143,8 @@
"cacheWrite": 0
},
"contextWindow": 200000,
"maxTokens": 131072
"maxTokens": 131072,
"api": "openai-completions"
},
{
"id": "glm-4.7-flashx",
@@ -156,7 +160,8 @@
"cacheWrite": 0
},
"contextWindow": 200000,
"maxTokens": 128000
"maxTokens": 128000,
"api": "openai-completions"
},
{
"id": "glm-4.6",
@@ -172,7 +177,8 @@
"cacheWrite": 0
},
"contextWindow": 204800,
"maxTokens": 131072
"maxTokens": 131072,
"api": "openai-completions"
},
{
"id": "glm-4.6v",
@@ -189,7 +195,8 @@
"cacheWrite": 0
},
"contextWindow": 128000,
"maxTokens": 32768
"maxTokens": 32768,
"api": "openai-completions"
},
{
"id": "glm-4.5",
@@ -205,7 +212,8 @@
"cacheWrite": 0
},
"contextWindow": 131072,
"maxTokens": 98304
"maxTokens": 98304,
"api": "openai-completions"
},
{
"id": "glm-4.5-air",
@@ -221,7 +229,8 @@
"cacheWrite": 0
},
"contextWindow": 131072,
"maxTokens": 98304
"maxTokens": 98304,
"api": "openai-completions"
},
{
"id": "glm-4.5-flash",
@@ -237,7 +246,8 @@
"cacheWrite": 0
},
"contextWindow": 131072,
"maxTokens": 98304
"maxTokens": 98304,
"api": "openai-completions"
},
{
"id": "glm-4.5v",
@@ -254,7 +264,8 @@
"cacheWrite": 0
},
"contextWindow": 64000,
"maxTokens": 16384
"maxTokens": 16384,
"api": "openai-completions"
},
{
"id": "glm-5.1",
@@ -270,19 +281,21 @@
"cacheWrite": 0
},
"contextWindow": 204800,
"maxTokens": 131072
"maxTokens": 131072,
"api": "openai-completions"
}
]
},
"local": {
"baseUrl": "http://192.168.153.130:18806/v1",
"apiKey": "not-needed",
"api": "openai-completions",
"models": [
{
"id": "gemma-3-12b-local",
"name": "Gemma 3 12B Instruct (local)",
"id": "gemma-4-26b-local",
"name": "Gemma 4 26B A4B Instruct (local)",
"api": "openai-completions",
"reasoning": false,
"reasoning": true,
"input": [
"text"
],
@@ -292,7 +305,7 @@
"cacheRead": 0,
"cacheWrite": 0
},
"contextWindow": 131072,
"contextWindow": 262144,
"maxTokens": 8192
}
]
+9 -5
View File
@@ -160,11 +160,15 @@
"kind": "cron",
"expr": "0 3 * * *"
},
"sessionTarget": "main",
"sessionTarget": "isolated",
"wakeMode": "now",
"payload": {
"kind": "systemEvent",
"text": "__openclaw_memory_core_short_term_promotion_dream__"
"kind": "agentTurn",
"message": "__openclaw_memory_core_short_term_promotion_dream__",
"lightContext": true
},
"delivery": {
"mode": "none"
},
"state": {}
},
@@ -174,7 +178,7 @@
"sessionKey": "agent:main:discord:channel:1494746041604509848",
"name": "n8n Gmail trigger failure monitor",
"description": "Read-only monitor for n8n Gmail workflow whtdorf7yJMVYeHm. Runs 2x/day and alerts #ops-alerts only for factual failure states: workflow inactive, Gmail Trigger node missing, or recent failed executions. Do not alert on mere inactivity/idle time alone.",
"enabled": true,
"enabled": false,
"createdAtMs": 1776975556829,
"schedule": {
"kind": "cron",
@@ -186,7 +190,7 @@
"payload": {
"kind": "agentTurn",
"message": "Run `/home/openclaw/.openclaw/workspace/scripts/check-n8n-gmail-trigger.sh` from the workspace. If it prints an ALERT line or exits non-zero, send a concise factual alert to this Discord channel summarizing only the confirmed condition from the script output. Do not speculate about OAuth, deactivation, or root cause unless the script output explicitly proves it. If it prints OK, stay quiet. Do not modify workflows or credentials.",
"model": "local/gemma-3-12b-local",
"model": "local/gemma-4-26b-local",
"thinking": "off",
"timeoutSeconds": 120,
"toolsAllow": [
+1 -1
View File
@@ -1 +1 @@
/home/openclaw/.local/share/pnpm/5/node_modules/.pnpm/openclaw@2026.4.9_@napi-rs+canvas@0.1.97_@types+express@5.0.6_apache-arrow@18.1.0/node_modules/openclaw
/home/openclaw/.local/share/pnpm/5/.pnpm/openclaw@2026.4.29/node_modules/openclaw
+57 -58
View File
@@ -1,11 +1,11 @@
{
"meta": {
"lastTouchedVersion": "2026.4.23",
"lastTouchedAt": "2026-04-29T16:46:42.428Z"
"lastTouchedVersion": "2026.4.29",
"lastTouchedAt": "2026-05-01T23:21:29.865Z"
},
"wizard": {
"lastRunAt": "2026-04-09T22:23:03.566Z",
"lastRunVersion": "2026.4.9",
"lastRunAt": "2026-05-01T20:32:33.527Z",
"lastRunVersion": "2026.4.29",
"lastRunCommand": "doctor",
"lastRunMode": "local"
},
@@ -253,10 +253,10 @@
"api": "openai-completions",
"models": [
{
"id": "gemma-3-12b-local",
"name": "Gemma 3 12B Instruct (local)",
"id": "gemma-4-26b-local",
"name": "Gemma 4 26B A4B Instruct (local)",
"api": "openai-completions",
"reasoning": false,
"reasoning": true,
"input": [
"text"
],
@@ -266,48 +266,19 @@
"cacheRead": 0,
"cacheWrite": 0
},
"contextWindow": 131072,
"contextWindow": 262144,
"maxTokens": 8192
}
]
],
"apiKey": "not-needed"
}
}
},
"agents": {
"defaults": {
"model": {
"primary": "github-copilot/claude-sonnet-4.6",
"fallbacks": [
"zai/glm-5.1",
"zai/glm-5",
"zai/glm-4.7",
"openai-codex/gpt-5.3-codex",
"openai-codex/gpt-5.4",
"openai-codex/gpt-5.3-codex-spark",
"openai/gpt-5.2",
"openai/gpt-5.3-codex",
"github-copilot/gpt-5.3-codex",
"github-copilot/gpt-5.4",
"github-copilot/grok-code-fast-1",
"github-copilot/claude-opus-4.5",
"github-copilot/claude-opus-4.6",
"github-copilot/claude-sonnet-4.5",
"github-copilot/claude-sonnet-4.6",
"github-copilot/gemini-2.5-pro",
"github-copilot/gemini-3-flash-preview",
"github-copilot/gemini-3-pro-preview",
"github-copilot/gemini-3.1-pro-preview",
"github-copilot/gpt-5",
"github-copilot/gpt-5-mini",
"github-copilot/gpt-5.1",
"github-copilot/gpt-5.1-codex",
"github-copilot/gpt-5.1-codex-max",
"github-copilot/gpt-5.1-codex-mini",
"github-copilot/gpt-5.2-codex",
"github-copilot/gpt-5.2",
"anthropic/claude-opus-4-6",
"anthropic/claude-sonnet-4-6"
]
"primary": "openai-codex/gpt-5.5",
"fallbacks": []
},
"models": {
"openai-codex/gpt-5.4": {
@@ -332,7 +303,7 @@
"alias": "GPT-5.3 Codex"
},
"openai/gpt-5.5": {
"alias": "GPT-5.5"
"alias": "GPT-5.5 API Key"
},
"openai/gpt-5.4-mini": {
"alias": "GPT-5.4 Mini"
@@ -411,6 +382,12 @@
},
"zai/glm-4.5v": {
"alias": "GLM-4.5V"
},
"local/gemma-4-26b-local": {
"alias": "Gemma 4 26B (local)"
},
"openai-codex/gpt-5.5": {
"alias": "GPT-5.5"
}
},
"memorySearch": {
@@ -473,9 +450,8 @@
"id": "automation",
"name": "Automation",
"model": {
"primary": "local/gemma-3-12b-local",
"primary": "local/gemma-4-26b-local",
"fallbacks": [
"openai-codex/gpt-5.4-nano",
"openai-codex/gpt-5.4-mini",
"zai/glm-4.7-flash"
]
@@ -486,11 +462,8 @@
"default": true,
"name": "Main",
"model": {
"primary": "openai-codex/gpt-5.4",
"fallbacks": [
"openai-codex/gpt-5.4",
"zai/glm-5.1"
]
"primary": "openai-codex/gpt-5.5",
"fallbacks": []
},
"subagents": {
"allowAgents": [
@@ -747,6 +720,9 @@
"voice": "af_heart"
}
}
},
"groupChat": {
"visibleReplies": "automatic"
}
},
"commands": {
@@ -760,13 +736,17 @@
"maintenance": {
"mode": "warn",
"pruneAfter": "30d",
"maxEntries": 500,
"rotateBytes": "50mb"
"maxEntries": 500
}
},
"hooks": {
"internal": {
"enabled": true
"enabled": true,
"entries": {
"boot-md": {
"enabled": false
}
}
}
},
"channels": {
@@ -805,6 +785,13 @@
"groupPolicy": "allowlist",
"streaming": {
"mode": "partial"
},
"network": {
"autoSelectFamily": false,
"dnsResultOrder": "ipv4first"
},
"commands": {
"native": false
}
},
"discord": {
@@ -830,7 +817,10 @@
},
"allowFrom": [
"425208577846935553"
]
],
"commands": {
"native": false
}
}
},
"gateway": {
@@ -852,7 +842,7 @@
"config": {}
},
"brave": {
"enabled": true,
"enabled": false,
"config": {
"webSearch": {
"apiKey": "BSAgLuWVVMnrGvobOt7pDQjmVJ5u380"
@@ -860,24 +850,33 @@
}
},
"github-copilot": {
"enabled": true
"enabled": false
},
"zai": {
"enabled": true
"enabled": false
},
"openai": {
"enabled": true
},
"anthropic": {
"enabled": true
"enabled": false
},
"memory-core": {
"config": {
"dreaming": {
"enabled": true
}
}
},
"enabled": false
}
},
"allow": [
"discord",
"telegram",
"openai"
],
"slots": {
"memory": "none"
}
},
"ui": {
+57 -58
View File
@@ -1,11 +1,11 @@
{
"meta": {
"lastTouchedVersion": "2026.4.23",
"lastTouchedAt": "2026-04-29T16:46:42.428Z"
"lastTouchedVersion": "2026.4.29",
"lastTouchedAt": "2026-05-01T23:21:29.865Z"
},
"wizard": {
"lastRunAt": "2026-04-09T22:23:03.566Z",
"lastRunVersion": "2026.4.9",
"lastRunAt": "2026-05-01T20:32:33.527Z",
"lastRunVersion": "2026.4.29",
"lastRunCommand": "doctor",
"lastRunMode": "local"
},
@@ -253,10 +253,10 @@
"api": "openai-completions",
"models": [
{
"id": "gemma-3-12b-local",
"name": "Gemma 3 12B Instruct (local)",
"id": "gemma-4-26b-local",
"name": "Gemma 4 26B A4B Instruct (local)",
"api": "openai-completions",
"reasoning": false,
"reasoning": true,
"input": [
"text"
],
@@ -266,48 +266,19 @@
"cacheRead": 0,
"cacheWrite": 0
},
"contextWindow": 131072,
"contextWindow": 262144,
"maxTokens": 8192
}
]
],
"apiKey": "not-needed"
}
}
},
"agents": {
"defaults": {
"model": {
"primary": "github-copilot/claude-sonnet-4.6",
"fallbacks": [
"zai/glm-5.1",
"zai/glm-5",
"zai/glm-4.7",
"openai-codex/gpt-5.3-codex",
"openai-codex/gpt-5.4",
"openai-codex/gpt-5.3-codex-spark",
"openai/gpt-5.2",
"openai/gpt-5.3-codex",
"github-copilot/gpt-5.3-codex",
"github-copilot/gpt-5.4",
"github-copilot/grok-code-fast-1",
"github-copilot/claude-opus-4.5",
"github-copilot/claude-opus-4.6",
"github-copilot/claude-sonnet-4.5",
"github-copilot/claude-sonnet-4.6",
"github-copilot/gemini-2.5-pro",
"github-copilot/gemini-3-flash-preview",
"github-copilot/gemini-3-pro-preview",
"github-copilot/gemini-3.1-pro-preview",
"github-copilot/gpt-5",
"github-copilot/gpt-5-mini",
"github-copilot/gpt-5.1",
"github-copilot/gpt-5.1-codex",
"github-copilot/gpt-5.1-codex-max",
"github-copilot/gpt-5.1-codex-mini",
"github-copilot/gpt-5.2-codex",
"github-copilot/gpt-5.2",
"anthropic/claude-opus-4-6",
"anthropic/claude-sonnet-4-6"
]
"primary": "openai-codex/gpt-5.5",
"fallbacks": []
},
"models": {
"openai-codex/gpt-5.4": {
@@ -332,7 +303,7 @@
"alias": "GPT-5.3 Codex"
},
"openai/gpt-5.5": {
"alias": "GPT-5.5"
"alias": "GPT-5.5 API Key"
},
"openai/gpt-5.4-mini": {
"alias": "GPT-5.4 Mini"
@@ -411,6 +382,12 @@
},
"zai/glm-4.5v": {
"alias": "GLM-4.5V"
},
"local/gemma-4-26b-local": {
"alias": "Gemma 4 26B (local)"
},
"openai-codex/gpt-5.5": {
"alias": "GPT-5.5"
}
},
"memorySearch": {
@@ -473,9 +450,8 @@
"id": "automation",
"name": "Automation",
"model": {
"primary": "local/gemma-3-12b-local",
"primary": "local/gemma-4-26b-local",
"fallbacks": [
"openai-codex/gpt-5.4-nano",
"openai-codex/gpt-5.4-mini",
"zai/glm-4.7-flash"
]
@@ -486,11 +462,8 @@
"default": true,
"name": "Main",
"model": {
"primary": "openai-codex/gpt-5.4",
"fallbacks": [
"openai-codex/gpt-5.4",
"zai/glm-5.1"
]
"primary": "openai-codex/gpt-5.5",
"fallbacks": []
},
"subagents": {
"allowAgents": [
@@ -747,6 +720,9 @@
"voice": "af_heart"
}
}
},
"groupChat": {
"visibleReplies": "automatic"
}
},
"commands": {
@@ -760,13 +736,17 @@
"maintenance": {
"mode": "warn",
"pruneAfter": "30d",
"maxEntries": 500,
"rotateBytes": "50mb"
"maxEntries": 500
}
},
"hooks": {
"internal": {
"enabled": true
"enabled": true,
"entries": {
"boot-md": {
"enabled": false
}
}
}
},
"channels": {
@@ -805,6 +785,13 @@
"groupPolicy": "allowlist",
"streaming": {
"mode": "partial"
},
"network": {
"autoSelectFamily": false,
"dnsResultOrder": "ipv4first"
},
"commands": {
"native": false
}
},
"discord": {
@@ -830,7 +817,10 @@
},
"allowFrom": [
"425208577846935553"
]
],
"commands": {
"native": false
}
}
},
"gateway": {
@@ -852,7 +842,7 @@
"config": {}
},
"brave": {
"enabled": true,
"enabled": false,
"config": {
"webSearch": {
"apiKey": "BSAgLuWVVMnrGvobOt7pDQjmVJ5u380"
@@ -860,24 +850,33 @@
}
},
"github-copilot": {
"enabled": true
"enabled": false
},
"zai": {
"enabled": true
"enabled": false
},
"openai": {
"enabled": true
},
"anthropic": {
"enabled": true
"enabled": false
},
"memory-core": {
"config": {
"dreaming": {
"enabled": true
}
}
},
"enabled": false
}
},
"allow": [
"discord",
"telegram",
"openai"
],
"slots": {
"memory": "none"
}
},
"ui": {
File diff suppressed because it is too large Load Diff
+299
View File
@@ -0,0 +1,299 @@
#!/usr/bin/env python3
"""Voice Memo Pipeline Service - native voice ingress + Kokoro TTS read-back."""
from __future__ import annotations
import base64, json, os, re, time
import urllib.error, urllib.request, uuid
from http.server import HTTPServer, BaseHTTPRequestHandler
from pathlib import Path
PORT = int(os.environ.get("VOICE_MEMO_PORT", "18813"))
WHISPER_URL = os.environ.get("WHISPER_BASE_URL", "http://127.0.0.1:18811")
LLM_URL = os.environ.get("LLAMA_CPP_BASE_URL", "http://127.0.0.1:18806")
KOKORO_URL = os.environ.get("KOKORO_BASE_URL", "http://127.0.0.1:18805")
TELEGRAM_BOT_TOKEN = os.environ.get("TELEGRAM_BOT_TOKEN", "")
DISCORD_BOT_TOKEN = os.environ.get("DISCORD_BOT_TOKEN", "")
KOKORO_VOICE = os.environ.get("KOKORO_VOICE", "af_heart")
AUDIO_DIR = Path(os.environ.get("VOICE_MEMO_AUDIO_DIR", "/tmp/voice-memo-audio"))
LLM_MODEL = os.environ.get("VOICE_MEMO_LLM_MODEL", "local")
AUDIO_DIR.mkdir(parents=True, exist_ok=True)
def log(msg):
print(f"[voice-memo] {time.strftime('%H:%M:%S')} {msg}", flush=True)
def encode_multipart(fields, files):
boundary = "----voice-memo-" + uuid.uuid4().hex
parts = []
for n, v in fields.items():
parts.append(f"--{boundary}\r\n".encode())
parts.append(f'Content-Disposition: form-data; name="{n}"\r\n\r\n'.encode())
parts.append(str(v).encode())
parts.append(b"\r\n")
for n, (fn, data, ct) in files.items():
parts.append(f"--{boundary}\r\n".encode())
parts.append(f'Content-Disposition: form-data; name="{n}"; filename="{fn}"\r\n'.encode())
parts.append(f"Content-Type: {ct}\r\n\r\n".encode())
parts.append(data)
parts.append(b"\r\n")
parts.append(f"--{boundary}--\r\n".encode())
return b"".join(parts), f"multipart/form-data; boundary={boundary}"
def http_get_json(url, headers=None, timeout=30):
req = urllib.request.Request(url, method="GET")
for k, v in (headers or {}).items():
req.add_header(k, v)
with urllib.request.urlopen(req, timeout=timeout) as r:
return json.loads(r.read().decode())
def http_download(url, headers=None, timeout=120):
req = urllib.request.Request(url, method="GET")
for k, v in (headers or {}).items():
req.add_header(k, v)
with urllib.request.urlopen(req, timeout=timeout) as r:
return r.read()
def download_telegram_voice(file_id):
if not TELEGRAM_BOT_TOKEN:
raise ValueError("TELEGRAM_BOT_TOKEN not configured")
base = f"https://api.telegram.org/bot{TELEGRAM_BOT_TOKEN}"
info = http_get_json(f"{base}/getFile?file_id={file_id}")
if not info.get("ok"):
raise ValueError(f"Telegram getFile failed: {info}")
fp = info["result"]["file_path"]
return http_download(f"https://api.telegram.org/file/bot{TELEGRAM_BOT_TOKEN}/{fp}")
def download_discord_attachment(url):
hdrs = {}
if DISCORD_BOT_TOKEN:
hdrs["Authorization"] = f"Bot {DISCORD_BOT_TOKEN}"
return http_download(url, headers=hdrs)
def transcribe_audio(audio_data, filename="audio.ogg", language="en"):
fields = {"response_format": "json", "language": language or "en", "temperature": "0.0"}
files = {"file": (filename, audio_data, "application/octet-stream")}
body, ct = encode_multipart(fields, files)
url = WHISPER_URL.rstrip("/") + "/v1/audio/transcriptions"
req = urllib.request.Request(url, data=body, headers={"Content-Type": ct}, method="POST")
try:
with urllib.request.urlopen(req, timeout=300) as r:
raw = r.read().decode()
except urllib.error.HTTPError as e:
raise RuntimeError(f"Whisper HTTP {e.code}: {e.read().decode()[:300]}")
data = json.loads(raw)
text = str(data.get("text", data.get("transcript", ""))).strip() if isinstance(data, dict) else raw.strip()
if not text:
raise RuntimeError("Whisper returned no transcript")
return text
SUMMARY_PROMPT = """You process voice memos. Given the transcript, produce a JSON object with:
- "summary": 2-4 sentence summary
- "action_items": list of tasks/reminders/follow-ups (empty list if none)
Output ONLY valid JSON.
TRANSCRIPT:
{transcript}"""
def summarize_transcript(transcript):
payload = {
"model": LLM_MODEL,
"messages": [
{"role": "system", "content": "You output only valid JSON."},
{"role": "user", "content": SUMMARY_PROMPT.format(transcript=transcript)}
],
"temperature": 0.3, "max_tokens": 1024, "stream": False
}
url = LLM_URL.rstrip("/") + "/v1/chat/completions"
req = urllib.request.Request(url, data=json.dumps(payload).encode(),
headers={"Content-Type": "application/json"}, method="POST")
try:
with urllib.request.urlopen(req, timeout=120) as r:
result = json.loads(r.read().decode())
except urllib.error.HTTPError as e:
raise RuntimeError(f"LLM HTTP {e.code}: {e.read().decode()[:300]}")
content = result.get("choices", [{}])[0].get("message", {}).get("content", "").strip()
m = re.search(r"\{[\s\S]*\}", content)
if m:
try:
p = json.loads(m.group())
return {"summary": p.get("summary", content), "action_items": p.get("action_items", [])}
except json.JSONDecodeError:
pass
return {"summary": content, "action_items": []}
def generate_tts(text, voice=None, fmt="mp3", speed=1.0):
payload = {"model": "kokoro", "input": text, "voice": voice or KOKORO_VOICE,
"response_format": fmt, "speed": speed, "stream": False}
url = KOKORO_URL.rstrip("/") + "/v1/audio/speech"
req = urllib.request.Request(url, data=json.dumps(payload).encode(),
headers={"Content-Type": "application/json", "Accept": "audio/*"}, method="POST")
try:
with urllib.request.urlopen(req, timeout=120) as r:
return r.read()
except urllib.error.HTTPError as e:
raise RuntimeError(f"Kokoro HTTP {e.code}: {e.read().decode()[:300]}")
def save_audio(data, fmt="mp3"):
fname = f"{uuid.uuid4().hex}.{fmt}"
(AUDIO_DIR / fname).write_bytes(data)
return f"/audio/{fname}"
def process_memo(body, uploaded_audio=None):
t0 = time.time()
language = body.get("language", "en")
audio_fmt = body.get("audio_format", "ogg")
tts_readback = body.get("tts_readback", False)
tts_voice = body.get("tts_voice", KOKORO_VOICE)
tts_format = body.get("tts_format", "mp3")
source = body.get("source", "unknown")
if uploaded_audio:
audio_data = uploaded_audio
source = source or "upload"
elif body.get("telegram_file_id"):
log(f"Downloading Telegram voice: {body['telegram_file_id'][:20]}...")
audio_data = download_telegram_voice(body["telegram_file_id"])
source = "telegram"
elif body.get("discord_audio_url"):
log(f"Downloading Discord attachment...")
audio_data = download_discord_attachment(body["discord_audio_url"])
source = "discord"
elif body.get("audio_url"):
log(f"Downloading audio URL...")
audio_data = http_download(body["audio_url"])
source = source or "url"
elif body.get("audio_base64"):
audio_data = base64.b64decode(body["audio_base64"])
source = source or "base64"
else:
raise ValueError("No audio source. Send: audio_url, telegram_file_id, discord_audio_url, audio_base64, or upload.")
if not audio_data:
raise ValueError("Audio data is empty")
log(f"Got {len(audio_data)} bytes from {source}")
ext = "ogg" if source == "telegram" else audio_fmt
log("Transcribing...")
transcript = transcribe_audio(audio_data, filename=f"voice_memo.{ext}", language=language)
log(f"Transcript ({len(transcript)} chars)")
log("Summarizing...")
result = summarize_transcript(transcript)
audio_url = None
if tts_readback and result.get("summary"):
log("Generating TTS read-back...")
try:
tts_data = generate_tts(result["summary"], voice=tts_voice, fmt=tts_format)
audio_url = save_audio(tts_data, fmt=tts_format)
log(f"TTS saved: {audio_url}")
except Exception as exc:
log(f"TTS failed (non-fatal): {exc}")
elapsed = round(time.time() - t0, 2)
log(f"Done in {elapsed}s")
return {"ok": True, "transcript": transcript, "summary": result.get("summary", ""),
"action_items": result.get("action_items", []), "audio_url": audio_url,
"source": source, "duration_s": elapsed, "metadata": body.get("metadata", {})}
class VoiceMemoHandler(BaseHTTPRequestHandler):
def do_GET(self):
path = self.path.split("?")[0].rstrip("/")
if path == "/healthz":
self._json({"status": "ok", "service": "voice-memo", "port": PORT})
elif path.startswith("/audio/"):
self._serve_audio(path)
else:
self._json({"error": "not found"}, 404)
def do_POST(self):
path = self.path.split("?")[0].rstrip("/")
if path == "/memo":
self._handle_json()
elif path == "/memo/upload":
self._handle_upload()
else:
self._json({"error": "not found"}, 404)
def _handle_json(self):
try:
n = int(self.headers.get("Content-Length", 0))
body = json.loads(self.rfile.read(n).decode())
except Exception as e:
return self._json({"ok": False, "error": f"Bad body: {e}"}, 400)
try:
self._json(process_memo(body))
except Exception as e:
log(f"Error: {e}")
self._json({"ok": False, "error": str(e)}, 500)
def _handle_upload(self):
try:
ct = self.headers.get("Content-Type", "")
n = int(self.headers.get("Content-Length", 0))
raw = self.rfile.read(n)
audio_data = None
audio_fmt = "ogg"
if "multipart/form-data" in ct:
boundary = ct.split("boundary=")[-1].strip()
for part in raw.split(f"--{boundary}".encode()):
if not part or part.strip() in (b"--", b"--\r\n"):
continue
try:
hend = part.index(b"\r\n\r\n")
except ValueError:
continue
hdrs = part[:hend].decode("utf-8", errors="replace")
bdata = part[hend+4:]
if bdata.endswith(b"\r\n"):
bdata = bdata[:-2]
if 'name="file"' in hdrs or 'name="audio"' in hdrs:
audio_data = bdata
fm = re.search(r'filename="([^"]+)"', hdrs)
if fm:
e = fm.group(1).rsplit(".", 1)[-1].lower()
if e in ("ogg","mp3","wav","webm","m4a","flac","opus"):
audio_fmt = e
else:
audio_data = raw
self._json(process_memo({"source": "upload", "audio_format": audio_fmt}, uploaded_audio=audio_data))
except Exception as e:
log(f"Upload error: {e}")
self._json({"ok": False, "error": str(e)}, 500)
def _serve_audio(self, path):
fname = path.split("/")[-1]
fpath = AUDIO_DIR / fname
if not fpath.exists():
return self._json({"error": "audio not found"}, 404)
ext = fname.rsplit(".", 1)[-1].lower()
mime = {"mp3":"audio/mpeg","ogg":"audio/ogg","wav":"audio/wav",
"flac":"audio/flac","opus":"audio/opus"}.get(ext, "application/octet-stream")
data = fpath.read_bytes()
self.send_response(200)
self.send_header("Content-Type", mime)
self.send_header("Content-Length", str(len(data)))
self.end_headers()
self.wfile.write(data)
def _json(self, data, status=200):
body = json.dumps(data, indent=2, ensure_ascii=False).encode()
self.send_response(status)
self.send_header("Content-Type", "application/json")
self.send_header("Content-Length", str(len(body)))
self.send_header("Access-Control-Allow-Origin", "*")
self.end_headers()
self.wfile.write(body)
def log_message(self, fmt, *args):
pass
def main():
srv = HTTPServer(("0.0.0.0", PORT), VoiceMemoHandler)
log(f"Voice Memo Service on 0.0.0.0:{PORT}")
log(f" Whisper: {WHISPER_URL} LLM: {LLM_URL} Kokoro: {KOKORO_URL}")
try:
srv.serve_forever()
except KeyboardInterrupt:
pass
srv.server_close()
if __name__ == "__main__":
main()