/** * Embedding provider interface and implementations for multiple backends. */ import type { EmbeddingConfig } from '../config/schema.js'; /** * Interface for embedding providers that convert text to vectors. */ export interface EmbeddingProvider { /** Generate embeddings for one or more texts. Returns one vector per text. */ embed(texts: string[]): Promise; /** The dimensionality of the embedding vectors. */ dimensions: number; } // --------------------------------------------------------------------------- // OpenAI // --------------------------------------------------------------------------- export class OpenAIEmbeddingProvider implements EmbeddingProvider { private _model: string; private _dimensions: number; private _apiKey: string; private _endpoint?: string; constructor(config: EmbeddingConfig) { this._model = config.model; this._dimensions = config.dimensions ?? 1536; this._apiKey = config.api_key ?? process.env.OPENAI_API_KEY ?? ''; this._endpoint = config.endpoint; } get dimensions(): number { return this._dimensions; } async embed(texts: string[]): Promise { const { default: OpenAI } = await import('openai'); const client = new OpenAI({ apiKey: this._apiKey, ...(this._endpoint ? { baseURL: this._endpoint } : {}), }); const response = await client.embeddings.create({ model: this._model, input: texts, ...(this._dimensions ? { dimensions: this._dimensions } : {}), }); // Sort by index to ensure order matches input const sorted = response.data.sort((a, b) => a.index - b.index); return sorted.map((item) => item.embedding); } } // --------------------------------------------------------------------------- // Gemini // --------------------------------------------------------------------------- export class GeminiEmbeddingProvider implements EmbeddingProvider { private _model: string; private _dimensions: number; private _apiKey: string; constructor(config: EmbeddingConfig) { this._model = config.model; this._dimensions = config.dimensions ?? 768; this._apiKey = config.api_key ?? process.env.GOOGLE_API_KEY ?? ''; } get dimensions(): number { return this._dimensions; } async embed(texts: string[]): Promise { const { GoogleGenerativeAI } = await import('@google/generative-ai'); const genAI = new GoogleGenerativeAI(this._apiKey); const model = genAI.getGenerativeModel({ model: this._model }); // Use batchEmbedContents for efficiency const requests = texts.map((text) => ({ content: { role: 'user' as const, parts: [{ text }] }, })); const response = await model.batchEmbedContents({ requests }); return response.embeddings.map((e) => e.values); } } // --------------------------------------------------------------------------- // Ollama // --------------------------------------------------------------------------- export class OllamaEmbeddingProvider implements EmbeddingProvider { private _model: string; private _dimensions: number; private _host?: string; constructor(config: EmbeddingConfig) { this._model = config.model; this._dimensions = config.dimensions ?? 768; this._host = config.endpoint; } get dimensions(): number { return this._dimensions; } async embed(texts: string[]): Promise { const { Ollama } = await import('ollama'); const client = new Ollama({ host: this._host }); const response = await client.embed({ model: this._model, input: texts, }); return response.embeddings; } } // --------------------------------------------------------------------------- // LlamaCpp // --------------------------------------------------------------------------- export class LlamaCppEmbeddingProvider implements EmbeddingProvider { private _dimensions: number; private _endpoint: string; constructor(config: EmbeddingConfig) { this._dimensions = config.dimensions ?? 768; this._endpoint = config.endpoint ?? 'http://localhost:8080'; } get dimensions(): number { return this._dimensions; } async embed(texts: string[]): Promise { const results: number[][] = []; for (const text of texts) { const response = await fetch(`${this._endpoint}/embedding`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ content: text }), }); if (!response.ok) { throw new Error(`LlamaCpp embedding request failed: ${response.status} ${response.statusText}`); } const data = (await response.json()) as { embedding: number[] }; results.push(data.embedding); } return results; } } // --------------------------------------------------------------------------- // Factory // --------------------------------------------------------------------------- /** * Create an embedding provider from config. */ export function createEmbeddingProvider(config: EmbeddingConfig): EmbeddingProvider { switch (config.provider) { case 'openai': return new OpenAIEmbeddingProvider(config); case 'gemini': return new GeminiEmbeddingProvider(config); case 'ollama': return new OllamaEmbeddingProvider(config); case 'llamacpp': return new LlamaCppEmbeddingProvider(config); default: throw new Error(`Unknown embedding provider: ${(config as Record).provider}`); } }