Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 10 additions & 9 deletions src/__tests__/embeddings/ollama.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ describe('OllamaBackend', () => {
expect(result).toEqual([[0.1], [0.2], [0.3], [0.4], [0.5]]);
});

it('should use default batch size of 10', async () => {
it('should use default batch size of 50', async () => {
const mockFetch = vi.fn().mockImplementation(async (_url, options) => {
const body = JSON.parse(options.body as string);
const inputLen = body.input.length;
Expand All @@ -260,14 +260,14 @@ describe('OllamaBackend', () => {
});
vi.stubGlobal('fetch', mockFetch);

// Create backend without custom batchSize (should use default 10)
// Create backend without custom batchSize (should use default 50)
const backend = new OllamaBackend({ backend: 'ollama' });
const texts = Array.from({ length: 25 }, (_, i) => `text${i}`);
const texts = Array.from({ length: 120 }, (_, i) => `text${i}`);
const result = await backend.embedBatch(texts);

// Should make 3 batch requests (10+10+5)
// Should make 3 batch requests (50+50+20)
expect(mockFetch).toHaveBeenCalledTimes(3);
expect(result).toHaveLength(25);
expect(result).toHaveLength(120);
});

it('should process batches in parallel based on concurrency', async () => {
Expand All @@ -292,16 +292,17 @@ describe('OllamaBackend', () => {
expect(result).toHaveLength(4);
});

it('should use default concurrency of 4', async () => {
it('should use default concurrency of 20', async () => {
const mockFetch = vi.fn().mockResolvedValue(createOllamaBatchEmbeddingResponse([[0.1]]));
vi.stubGlobal('fetch', mockFetch);

// Create backend with small batchSize to trigger multiple batches
const backend = new OllamaBackend({ backend: 'ollama', batchSize: 1 });
await backend.embedBatch(['t1', 't2', 't3', 't4', 't5', 't6', 't7', 't8']);
const texts = Array.from({ length: 25 }, (_, i) => `t${i}`);
await backend.embedBatch(texts);

// With concurrency=4 and 8 items with batchSize=1, should still make 8 requests
expect(mockFetch).toHaveBeenCalledTimes(8);
// With concurrency=20 and 25 items with batchSize=1, should make 25 requests
expect(mockFetch).toHaveBeenCalledTimes(25);
});

it('should preserve result order with parallel processing', async () => {
Expand Down
10 changes: 5 additions & 5 deletions src/embeddings/ollama.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@ import type { EmbeddingBackend, EmbeddingConfig } from './types.js';
import { chunkArray } from './types.js';
import { fetchWithRetry } from './retry.js';

/** Default batch size for Ollama (texts per batch request) */
const DEFAULT_BATCH_SIZE = 10;
/** Default batch size for Ollama (texts per request) */
const DEFAULT_BATCH_SIZE = 50;

/** Default concurrency for Ollama (parallel batch requests) */
const DEFAULT_CONCURRENCY = 4;
const DEFAULT_CONCURRENCY = 20;

/** Default timeout for embedding requests (2 minutes per batch) */
const DEFAULT_TIMEOUT_MS = 2 * 60 * 1000;
/** Default timeout for embedding requests (5 minutes per batch) */
const DEFAULT_TIMEOUT_MS = 5 * 60 * 1000;

/** Default Ollama model optimized for code search */
export const DEFAULT_OLLAMA_MODEL = 'qwen3-embedding:0.6b';
Expand Down
3 changes: 3 additions & 0 deletions src/search/indexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1438,6 +1438,9 @@ export class CodeIndexer {
for (let i = 0; i < chunks.length; i += embeddingBatchSize) {
const batch = chunks.slice(i, i + embeddingBatchSize);
const texts = batch.map((c) => c.content);
console.error(
`[lance-context] Sending ${texts.length} texts to embedding backend (batch ${Math.floor(i / embeddingBatchSize) + 1}/${Math.ceil(chunks.length / embeddingBatchSize)})...`
);
const embeddings = await this.embeddingBackend.embedBatch(texts);
batch.forEach((chunk, idx) => {
chunk.embedding = embeddings[idx];
Expand Down
Loading