Skip to content

Commit 312c8af

Browse files
authored
Merge pull request #1 from techiejd/multitables
Adds multiple tables ability for embedding
2 parents 3cad568 + 16b7db8 commit 312c8af

File tree

15 files changed

+539
-258
lines changed

15 files changed

+539
-258
lines changed

README.md

Lines changed: 141 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ A Payload CMS plugin that adds vector search capabilities to your collections us
1111
- 🎯 **Flexible Chunking**: You provide the custom chunkers for different field types (text, rich text, etc.)
1212
- 🔧 **Configurable**: Choose which collections and fields to vectorize
1313
- 🌐 **REST API**: Built-in vector-search endpoint for querying vectorized content
14+
- 🏊 **Multiple Knowledge Pools**: Separate knowledge pools with independent configurations (dims, ivfflatLists, embedding functions)
1415

1516
## Prerequisites
1617

@@ -65,12 +66,19 @@ const chunkRichText = async (richText: SerializedEditorState,
6566
return /* string array */
6667
}
6768

68-
// Create the integration
69+
// Create the integration with static configs (dims, ivfflatLists)
6970
const { afterSchemaInitHook, payloadcmsVectorize } = createVectorizeIntegration({
7071
// Note limitation: Changing these values is currently not supported.
7172
// Migration is necessary.
72-
dims: 1536, // Vector dimensions
73-
ivfflatLists: 100, // IVFFLAT index parameter
73+
main: {
74+
dims: 1536, // Vector dimensions
75+
ivfflatLists: 100, // IVFFLAT index parameter
76+
},
77+
// You can add more knowledge pools with different static configs
78+
// products: {
79+
// dims: 384,
80+
// ivfflatLists: 50,
81+
// },
7482
})
7583

7684
export default buildConfig({
@@ -83,18 +91,37 @@ export default buildConfig({
8391
}),
8492
plugins: [
8593
payloadcmsVectorize({
86-
// The collection-fields you want vectorized
87-
collections: {
88-
posts: {
89-
fields: {
90-
title: { chunker: chunkText },
91-
content: { chunker: chunkRichText },
94+
// Knowledge pools - dynamic configs (collections, embedding functions)
95+
knowledgePools: {
96+
main: {
97+
// The collection-fields you want vectorized in this pool
98+
collections: {
99+
posts: {
100+
fields: {
101+
title: { chunker: chunkText },
102+
content: { chunker: chunkRichText },
103+
},
104+
},
92105
},
106+
embedDocs,
107+
embedQuery,
108+
embeddingVersion: 'v1.0.0',
93109
},
110+
// You can add more knowledge pools with different dynamic configs
111+
// products: {
112+
// collections: { ... },
113+
// embedDocs: differentEmbedDocs,
114+
// embedQuery: differentEmbedQuery,
115+
// embeddingVersion: 'v2.0.0',
116+
// },
94117
},
95-
embedDocs,
96-
embedQuery,
97-
embeddingVersion: 'v1.0.0',
118+
// Optional plugin options:
119+
// queueName: 'custom-queue',
120+
// endpointOverrides: {
121+
// path: '/custom-vector-search',
122+
// enabled: true,
123+
// },
124+
// disabled: false,
98125
}),
99126
],
100127
})
@@ -108,7 +135,10 @@ The plugin automatically creates a `/api/vector-search` endpoint:
108135
const response = await fetch('/api/vector-search', {
109136
method: 'POST',
110137
headers: { 'Content-Type': 'application/json' },
111-
body: JSON.stringify({ query: 'What is machine learning?' }),
138+
body: JSON.stringify({
139+
query: 'What is machine learning?', // Required: query
140+
knowledgePool: 'main', // Required: specify which knowledge pool to search
141+
}),
112142
})
113143

114144
const results = await response.json()
@@ -119,15 +149,30 @@ const results = await response.json()
119149

120150
### Plugin Options
121151

122-
| Option | Type | Required | Description |
123-
| ------------------- | ------------------------------------------- | -------- | ----------------------------------------- |
124-
| `collections` | `Record<string, CollectionVectorizeOption>` || Collections and fields to vectorize |
125-
| `embedDocs` | `EmbedDocsFn` || Function to embed multiple documents |
126-
| `embedQuery` | `EmbedQueryFn` || Function to embed search queries |
127-
| `embeddingVersion` | `string` || Version string for tracking model changes |
128-
| `queueName` | `string` || Custom queue name for background jobs |
129-
| `endpointOverrides` | `object` || Customize the search endpoint |
130-
| `disabled` | `boolean` || Disable plugin while keeping schema |
152+
| Option | Type | Required | Description |
153+
| ------------------- | -------------------------------------------- | -------- | ---------------------------------------- |
154+
| `knowledgePools` | `Record<KnowledgePool, KnowledgePoolConfig>` || Knowledge pools and their configurations |
155+
| `queueName` | `string` || Custom queue name for background jobs |
156+
| `endpointOverrides` | `object` || Customize the search endpoint |
157+
| `disabled` | `boolean` || Disable plugin while keeping schema |
158+
159+
### Knowledge Pool Config
160+
161+
Knowledge pools are configured in two steps. The static configs define the database schema (migration required), while dynamic configs define runtime behavior (no migration required).
162+
163+
**1. Static Config** (passed to `createVectorizeIntegration`):
164+
165+
- `dims`: `number` - Vector dimensions for pgvector column
166+
- `ivfflatLists`: `number` - IVFFLAT index parameter
167+
168+
The embeddings collection name will be the same as the knowledge pool name.
169+
170+
**2. Dynamic Config** (passed to `payloadcmsVectorize`):
171+
172+
- `collections`: `Record<string, CollectionVectorizeOption>` - Collections and fields to vectorize
173+
- `embedDocs`: `EmbedDocsFn` - Function to embed multiple documents
174+
- `embedQuery`: `EmbedQueryFn` - Function to embed search queries
175+
- `embeddingVersion`: `string` - Version string for tracking model changes
131176

132177
## Chunkers
133178

@@ -187,10 +232,16 @@ Search for similar content using vector similarity.
187232

188233
```json
189234
{
190-
"query": "Your search query"
235+
"query": "Your search query",
236+
"knowledgePool": "main"
191237
}
192238
```
193239

240+
**Parameters:**
241+
242+
- `query` (required): The search query string
243+
- `knowledgePool` (required): The knowledge pool identifier to search in
244+
194245
**Response:**
195246

196247
```json
@@ -210,6 +261,73 @@ Search for similar content using vector similarity.
210261
}
211262
```
212263

264+
## Migration from v0.1.0 to v0.2.0
265+
266+
Version 0.2.0 introduces support for multiple knowledge pools. This is a **breaking change** that requires updating your configuration.
267+
268+
### Before (v0.1.0):
269+
270+
```typescript
271+
const { afterSchemaInitHook, payloadcmsVectorize } = createVectorizeIntegration({
272+
dims: 1536,
273+
ivfflatLists: 100,
274+
})
275+
276+
payloadcmsVectorize({
277+
collections: {
278+
posts: { fields: { ... } },
279+
},
280+
embedDocs,
281+
embedQuery,
282+
embeddingVersion: 'v1.0.0',
283+
})
284+
```
285+
286+
### After (v0.2.0):
287+
288+
```typescript
289+
// Static configs (schema-related) passed to createVectorizeIntegration
290+
const { afterSchemaInitHook, payloadcmsVectorize } = createVectorizeIntegration({
291+
main: {
292+
dims: 1536,
293+
ivfflatLists: 100,
294+
},
295+
})
296+
297+
// Dynamic configs (runtime behavior) passed to payloadcmsVectorize
298+
payloadcmsVectorize({
299+
knowledgePools: {
300+
main: {
301+
collections: {
302+
posts: { fields: { ... } },
303+
},
304+
embedDocs,
305+
embedQuery,
306+
embeddingVersion: 'v1.0.0',
307+
},
308+
},
309+
})
310+
```
311+
312+
### API Changes
313+
314+
The vector search endpoint now requires a `knowledgePool` parameter:
315+
316+
```typescript
317+
// Before
318+
{ query: 'search term' }
319+
320+
// After
321+
{ query: 'search term', knowledgePool: 'main' }
322+
```
323+
324+
### Benefits of Multiple Knowledge Pools
325+
326+
- **Separate knowledge domains**: Keep different types of content in separate pools
327+
- **Different technical requirements**: Each pool can have different `dims`, `ivfflatLists`, and embedding functions
328+
- **Flexible organization**: Collections can appear in multiple pools if needed
329+
- **Independent versioning**: Each pool can track its own embedding model version
330+
213331
## Requirements
214332

215333
- Payload CMS ^3.37.0

dev/payload-types.ts

Lines changed: 8 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,7 @@ export interface Config {
6868
blocks: {};
6969
collections: {
7070
posts: Post;
71-
media: Media;
72-
embeddings: Embedding;
71+
default: Default;
7372
users: User;
7473
'payload-jobs': PayloadJob;
7574
'payload-locked-documents': PayloadLockedDocument;
@@ -79,8 +78,7 @@ export interface Config {
7978
collectionsJoins: {};
8079
collectionsSelect: {
8180
posts: PostsSelect<false> | PostsSelect<true>;
82-
media: MediaSelect<false> | MediaSelect<true>;
83-
embeddings: EmbeddingsSelect<false> | EmbeddingsSelect<true>;
81+
default: DefaultSelect<false> | DefaultSelect<true>;
8482
users: UsersSelect<false> | UsersSelect<true>;
8583
'payload-jobs': PayloadJobsSelect<false> | PayloadJobsSelect<true>;
8684
'payload-locked-documents': PayloadLockedDocumentsSelect<false> | PayloadLockedDocumentsSelect<true>;
@@ -150,31 +148,13 @@ export interface Post {
150148
updatedAt: string;
151149
createdAt: string;
152150
}
153-
/**
154-
* This interface was referenced by `Config`'s JSON-Schema
155-
* via the `definition` "media".
156-
*/
157-
export interface Media {
158-
id: number;
159-
updatedAt: string;
160-
createdAt: string;
161-
url?: string | null;
162-
thumbnailURL?: string | null;
163-
filename?: string | null;
164-
mimeType?: string | null;
165-
filesize?: number | null;
166-
width?: number | null;
167-
height?: number | null;
168-
focalX?: number | null;
169-
focalY?: number | null;
170-
}
171151
/**
172152
* Vector embeddings for search and similarity queries. Created by the payloadcms-vectorize plugin. Embeddings cannot be added or modified, only deleted, through the admin panel. No other restrictions enforced.
173153
*
174154
* This interface was referenced by `Config`'s JSON-Schema
175-
* via the `definition` "embeddings".
155+
* via the `definition` "default".
176156
*/
177-
export interface Embedding {
157+
export interface Default {
178158
id: number;
179159
/**
180160
* The collection that this embedding belongs to
@@ -324,12 +304,8 @@ export interface PayloadLockedDocument {
324304
value: number | Post;
325305
} | null)
326306
| ({
327-
relationTo: 'media';
328-
value: number | Media;
329-
} | null)
330-
| ({
331-
relationTo: 'embeddings';
332-
value: number | Embedding;
307+
relationTo: 'default';
308+
value: number | Default;
333309
} | null)
334310
| ({
335311
relationTo: 'users';
@@ -393,26 +369,9 @@ export interface PostsSelect<T extends boolean = true> {
393369
}
394370
/**
395371
* This interface was referenced by `Config`'s JSON-Schema
396-
* via the `definition` "media_select".
397-
*/
398-
export interface MediaSelect<T extends boolean = true> {
399-
updatedAt?: T;
400-
createdAt?: T;
401-
url?: T;
402-
thumbnailURL?: T;
403-
filename?: T;
404-
mimeType?: T;
405-
filesize?: T;
406-
width?: T;
407-
height?: T;
408-
focalX?: T;
409-
focalY?: T;
410-
}
411-
/**
412-
* This interface was referenced by `Config`'s JSON-Schema
413-
* via the `definition` "embeddings_select".
372+
* via the `definition` "default_select".
414373
*/
415-
export interface EmbeddingsSelect<T extends boolean = true> {
374+
export interface DefaultSelect<T extends boolean = true> {
416375
sourceCollection?: T;
417376
docId?: T;
418377
fieldPath?: T;

dev/payload.config.ts

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,11 @@ import { postgresAdapter } from '@payloadcms/db-postgres'
22
import { lexicalEditor } from '@payloadcms/richtext-lexical'
33
import path from 'path'
44
import { buildConfig } from 'payload'
5-
import { createVectorizeIntegration, StaticIntegrationConfig } from 'payloadcms-vectorize'
5+
import { createVectorizeIntegration } from 'payloadcms-vectorize'
66
import {
77
makeDummyEmbedDocs,
88
testEmbeddingVersion,
99
voyageEmbedDocs,
10-
voyageEmbedDims,
1110
voyageEmbedQuery,
1211
makeDummyEmbedQuery,
1312
} from './helpers/embed.js'
@@ -43,12 +42,12 @@ const ssl =
4342
}
4443
: undefined
4544

46-
const integrationConfig: StaticIntegrationConfig = {
47-
dims,
48-
ivfflatLists, // Rule of thumb: ivfflatLists = sqrt(total_number_of_vectors). Helps with working memory usage.
49-
}
50-
51-
const { afterSchemaInitHook, payloadcmsVectorize } = createVectorizeIntegration(integrationConfig)
45+
const { afterSchemaInitHook, payloadcmsVectorize } = createVectorizeIntegration({
46+
default: {
47+
dims,
48+
ivfflatLists, // Rule of thumb: ivfflatLists = sqrt(total_number_of_vectors). Helps with working memory usage.
49+
},
50+
})
5251

5352
const buildConfigWithPostgres = async () => {
5453
return buildConfig({
@@ -100,17 +99,21 @@ const buildConfigWithPostgres = async () => {
10099
},
101100
plugins: [
102101
payloadcmsVectorize({
103-
collections: {
104-
posts: {
105-
fields: {
106-
title: { chunker: chunkText },
107-
content: { chunker: chunkRichText },
102+
knowledgePools: {
103+
default: {
104+
collections: {
105+
posts: {
106+
fields: {
107+
title: { chunker: chunkText },
108+
content: { chunker: chunkRichText },
109+
},
110+
},
108111
},
112+
embedDocs,
113+
embedQuery,
114+
embeddingVersion: testEmbeddingVersion,
109115
},
110116
},
111-
embedDocs,
112-
embedQuery,
113-
embeddingVersion: testEmbeddingVersion,
114117
}),
115118
],
116119
secret: process.env.PAYLOAD_SECRET || 'test-secret_key',

0 commit comments

Comments
 (0)