Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 38 additions & 15 deletions src/embeddings/ollama.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ import { fetchWithRetry } from './retry.js';
const DEFAULT_BATCH_SIZE = 100;

/** Default concurrency for Ollama (parallel batch requests) */
const DEFAULT_CONCURRENCY = 100;
const DEFAULT_CONCURRENCY = 10;

/** Default timeout for embedding requests (5 minutes) */
const DEFAULT_TIMEOUT_MS = 5 * 60 * 1000;

/** Default Ollama model optimized for code search */
export const DEFAULT_OLLAMA_MODEL = 'qwen3-embedding:0.6b';
Expand Down Expand Up @@ -93,28 +96,48 @@ export class OllamaBackend implements EmbeddingBackend {
const batches = chunkArray(texts, this.batchSize);
const results: number[][] = new Array(texts.length);

console.error(
`[lance-context] Embedding ${texts.length} texts in ${batches.length} batches (concurrency: ${this.concurrency})`
);

// Process batches in parallel groups controlled by concurrency
for (let i = 0; i < batches.length; i += this.concurrency) {
const batchGroup = batches.slice(i, i + this.concurrency);
const groupStart = Date.now();

const batchPromises = batchGroup.map(async (batch, groupIndex) => {
const response = await fetchWithRetry(`${this.baseUrl}/api/embed`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model: this.model,
input: batch,
}),
});

if (!response.ok) {
throw new Error(`Ollama embedding failed: ${response.status}`);
// Create abort controller with timeout
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), DEFAULT_TIMEOUT_MS);

try {
const response = await fetchWithRetry(`${this.baseUrl}/api/embed`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model: this.model,
input: batch,
}),
signal: controller.signal,
});

if (!response.ok) {
throw new Error(`Ollama embedding failed: ${response.status}`);
}

const data = (await response.json()) as { embeddings: number[][] };
return { batchIndex: i + groupIndex, embeddings: data.embeddings };
} finally {
clearTimeout(timeoutId);
}

const data = (await response.json()) as { embeddings: number[][] };
return { batchIndex: i + groupIndex, embeddings: data.embeddings };
});

const batchResults = await Promise.all(batchPromises);
const groupElapsed = ((Date.now() - groupStart) / 1000).toFixed(1);
const processedSoFar = Math.min((i + this.concurrency) * this.batchSize, texts.length);
console.error(
`[lance-context] Embedded batch group ${Math.floor(i / this.concurrency) + 1}/${Math.ceil(batches.length / this.concurrency)} (${processedSoFar}/${texts.length} texts) in ${groupElapsed}s`
);

// Place results in correct positions
for (const { batchIndex, embeddings } of batchResults) {
Expand Down
Loading