feat: Allow using Vector Stores directly as Tools (#12311)

Co-authored-by: Oleg Ivaniv <me@olegivaniv.com>
This commit is contained in:
Mutasem Aldmour
2025-01-06 09:07:59 +01:00
committed by GitHub
parent d7cc789d79
commit 76dded4bea
16 changed files with 402 additions and 43 deletions

View File

@@ -15,15 +15,15 @@ import { getConnectionHintNoticeField } from '@utils/sharedFields';
export class ToolVectorStore implements INodeType {
description: INodeTypeDescription = {
displayName: 'Vector Store Tool',
displayName: 'Vector Store Question Answer Tool',
name: 'toolVectorStore',
icon: 'fa:database',
iconColor: 'black',
group: ['transform'],
version: [1],
description: 'Retrieve context from vector store',
description: 'Answer questions with a vector store',
defaults: {
name: 'Vector Store Tool',
name: 'Answer questions with a vector store',
},
codex: {
categories: ['AI'],
@@ -60,20 +60,23 @@ export class ToolVectorStore implements INodeType {
properties: [
getConnectionHintNoticeField([NodeConnectionType.AiAgent]),
{
displayName: 'Name',
displayName: 'Data Name',
name: 'name',
type: 'string',
default: '',
placeholder: 'e.g. company_knowledge_base',
placeholder: 'e.g. users_info',
validateType: 'string-alphanumeric',
description: 'Name of the vector store',
description:
'Name of the data in vector store. This will be used to fill this tool description: Useful for when you need to answer questions about [name]. Whenever you need information about [data description], you should ALWAYS use this. Input should be a fully formed question.',
},
{
displayName: 'Description',
displayName: 'Description of Data',
name: 'description',
type: 'string',
default: '',
placeholder: 'Retrieves data about [insert information about your data here]...',
placeholder: "[Describe your data here, e.g. a user's name, email, etc.]",
description:
'Describe the data in vector store. This will be used to fill this tool description: Useful for when you need to answer questions about [name]. Whenever you need information about [data description], you should ALWAYS use this. Input should be a fully formed question.',
typeOptions: {
rows: 3,
},

View File

@@ -228,7 +228,7 @@ export class VectorStorePGVector extends createVectorStoreNode({
testedBy: 'postgresConnectionTest',
},
],
operationModes: ['load', 'insert', 'retrieve'],
operationModes: ['load', 'insert', 'retrieve', 'retrieve-as-tool'],
},
sharedFields,
insertFields,

View File

@@ -65,7 +65,7 @@ export class VectorStorePinecone extends createVectorStoreNode({
required: true,
},
],
operationModes: ['load', 'insert', 'retrieve', 'update'],
operationModes: ['load', 'insert', 'retrieve', 'update', 'retrieve-as-tool'],
},
methods: { listSearch: { pineconeIndexSearch } },
retrieveFields,

View File

@@ -55,7 +55,7 @@ export class VectorStoreSupabase extends createVectorStoreNode({
required: true,
},
],
operationModes: ['load', 'insert', 'retrieve', 'update'],
operationModes: ['load', 'insert', 'retrieve', 'update', 'retrieve-as-tool'],
},
methods: {
listSearch: { supabaseTableNameSearch },

View File

@@ -0,0 +1,161 @@
import type { DocumentInterface } from '@langchain/core/documents';
import type { Embeddings } from '@langchain/core/embeddings';
import type { VectorStore } from '@langchain/core/vectorstores';
import { mock } from 'jest-mock-extended';
import type { DynamicTool } from 'langchain/tools';
import type { ISupplyDataFunctions, NodeParameterValueType } from 'n8n-workflow';
import type { VectorStoreNodeConstructorArgs } from './createVectorStoreNode';
import { createVectorStoreNode } from './createVectorStoreNode';
jest.mock('@utils/logWrapper', () => ({
logWrapper: jest.fn().mockImplementation((val: DynamicTool) => ({ logWrapped: val })),
}));
const DEFAULT_PARAMETERS = {
options: {},
topK: 1,
};
const MOCK_DOCUMENTS: Array<[DocumentInterface, number]> = [
[
{
pageContent: 'first page',
metadata: {
id: 123,
},
},
0,
],
[
{
pageContent: 'second page',
metadata: {
id: 567,
},
},
0,
],
];
const MOCK_SEARCH_VALUE = 'search value';
const MOCK_EMBEDDED_SEARCH_VALUE = [1, 2, 3];
describe('createVectorStoreNode', () => {
const vectorStore = mock<VectorStore>({
similaritySearchVectorWithScore: jest.fn().mockResolvedValue(MOCK_DOCUMENTS),
});
const vectorStoreNodeArgs = mock<VectorStoreNodeConstructorArgs>({
sharedFields: [],
insertFields: [],
loadFields: [],
retrieveFields: [],
updateFields: [],
getVectorStoreClient: jest.fn().mockReturnValue(vectorStore),
});
const embeddings = mock<Embeddings>({
embedQuery: jest.fn().mockResolvedValue(MOCK_EMBEDDED_SEARCH_VALUE),
});
const context = mock<ISupplyDataFunctions>({
getNodeParameter: jest.fn(),
getInputConnectionData: jest.fn().mockReturnValue(embeddings),
});
describe('retrieve mode', () => {
it('supplies vector store as data', async () => {
// ARRANGE
const parameters: Record<string, NodeParameterValueType | object> = {
...DEFAULT_PARAMETERS,
mode: 'retrieve',
};
context.getNodeParameter.mockImplementation(
(parameterName: string): NodeParameterValueType | object => parameters[parameterName],
);
// ACT
const VectorStoreNodeType = createVectorStoreNode(vectorStoreNodeArgs);
const nodeType = new VectorStoreNodeType();
const data = await nodeType.supplyData.call(context, 1);
const wrappedVectorStore = (data.response as { logWrapped: VectorStore }).logWrapped;
// ASSERT
expect(wrappedVectorStore).toEqual(vectorStore);
expect(vectorStoreNodeArgs.getVectorStoreClient).toHaveBeenCalled();
});
});
describe('retrieve-as-tool mode', () => {
it('supplies DynamicTool that queries vector store and returns documents with metadata', async () => {
// ARRANGE
const parameters: Record<string, NodeParameterValueType | object> = {
...DEFAULT_PARAMETERS,
mode: 'retrieve-as-tool',
description: 'tool description',
toolName: 'tool name',
includeDocumentMetadata: true,
};
context.getNodeParameter.mockImplementation(
(parameterName: string): NodeParameterValueType | object => parameters[parameterName],
);
// ACT
const VectorStoreNodeType = createVectorStoreNode(vectorStoreNodeArgs);
const nodeType = new VectorStoreNodeType();
const data = await nodeType.supplyData.call(context, 1);
const tool = (data.response as { logWrapped: DynamicTool }).logWrapped;
const output = await tool?.func(MOCK_SEARCH_VALUE);
// ASSERT
expect(tool?.getName()).toEqual(parameters.toolName);
expect(tool?.description).toEqual(parameters.toolDescription);
expect(embeddings.embedQuery).toHaveBeenCalledWith(MOCK_SEARCH_VALUE);
expect(vectorStore.similaritySearchVectorWithScore).toHaveBeenCalledWith(
MOCK_EMBEDDED_SEARCH_VALUE,
parameters.topK,
parameters.filter,
);
expect(output).toEqual([
{ type: 'text', text: JSON.stringify(MOCK_DOCUMENTS[0][0]) },
{ type: 'text', text: JSON.stringify(MOCK_DOCUMENTS[1][0]) },
]);
});
it('supplies DynamicTool that queries vector store and returns documents without metadata', async () => {
// ARRANGE
const parameters: Record<string, NodeParameterValueType | object> = {
...DEFAULT_PARAMETERS,
mode: 'retrieve-as-tool',
description: 'tool description',
toolName: 'tool name',
includeDocumentMetadata: false,
};
context.getNodeParameter.mockImplementation(
(parameterName: string): NodeParameterValueType | object => parameters[parameterName],
);
// ACT
const VectorStoreNodeType = createVectorStoreNode(vectorStoreNodeArgs);
const nodeType = new VectorStoreNodeType();
const data = await nodeType.supplyData.call(context, 1);
const tool = (data.response as { logWrapped: DynamicTool }).logWrapped;
const output = await tool?.func(MOCK_SEARCH_VALUE);
// ASSERT
expect(tool?.getName()).toEqual(parameters.toolName);
expect(tool?.description).toEqual(parameters.toolDescription);
expect(embeddings.embedQuery).toHaveBeenCalledWith(MOCK_SEARCH_VALUE);
expect(vectorStore.similaritySearchVectorWithScore).toHaveBeenCalledWith(
MOCK_EMBEDDED_SEARCH_VALUE,
parameters.topK,
parameters.filter,
);
expect(output).toEqual([
{ type: 'text', text: JSON.stringify({ pageContent: MOCK_DOCUMENTS[0][0].pageContent }) },
{ type: 'text', text: JSON.stringify({ pageContent: MOCK_DOCUMENTS[1][0].pageContent }) },
]);
});
});
});

View File

@@ -3,6 +3,7 @@
import type { Document } from '@langchain/core/documents';
import type { Embeddings } from '@langchain/core/embeddings';
import type { VectorStore } from '@langchain/core/vectorstores';
import { DynamicTool } from 'langchain/tools';
import { NodeConnectionType, NodeOperationError } from 'n8n-workflow';
import type {
IExecuteFunctions,
@@ -28,9 +29,14 @@ import { getConnectionHintNoticeField } from '@utils/sharedFields';
import { processDocument } from './processDocuments';
type NodeOperationMode = 'insert' | 'load' | 'retrieve' | 'update';
type NodeOperationMode = 'insert' | 'load' | 'retrieve' | 'update' | 'retrieve-as-tool';
const DEFAULT_OPERATION_MODES: NodeOperationMode[] = ['load', 'insert', 'retrieve'];
const DEFAULT_OPERATION_MODES: NodeOperationMode[] = [
'load',
'insert',
'retrieve',
'retrieve-as-tool',
];
interface NodeMeta {
displayName: string;
@@ -43,7 +49,7 @@ interface NodeMeta {
operationModes?: NodeOperationMode[];
}
interface VectorStoreNodeConstructorArgs {
export interface VectorStoreNodeConstructorArgs {
meta: NodeMeta;
methods?: {
listSearch?: {
@@ -102,10 +108,18 @@ function getOperationModeOptions(args: VectorStoreNodeConstructorArgs): INodePro
action: 'Add documents to vector store',
},
{
name: 'Retrieve Documents (For Agent/Chain)',
name: 'Retrieve Documents (As Vector Store for AI Agent)',
value: 'retrieve',
description: 'Retrieve documents from vector store to be used with AI nodes',
action: 'Retrieve documents for AI processing',
description: 'Retrieve documents from vector store to be used as vector store with AI nodes',
action: 'Retrieve documents for AI processing as Vector Store',
outputConnectionType: NodeConnectionType.AiVectorStore,
},
{
name: 'Retrieve Documents (As Tool for AI Agent)',
value: 'retrieve-as-tool',
description: 'Retrieve documents from vector store to be used as tool with AI nodes',
action: 'Retrieve documents for AI processing as Tool',
outputConnectionType: NodeConnectionType.AiTool,
},
{
name: 'Update Documents',
@@ -136,7 +150,8 @@ export const createVectorStoreNode = (args: VectorStoreNodeConstructorArgs) =>
codex: {
categories: ['AI'],
subcategories: {
AI: ['Vector Stores', 'Root Nodes'],
AI: ['Vector Stores', 'Tools', 'Root Nodes'],
Tools: ['Other Tools'],
},
resources: {
primaryDocumentation: [
@@ -153,6 +168,10 @@ export const createVectorStoreNode = (args: VectorStoreNodeConstructorArgs) =>
const mode = parameters?.mode;
const inputs = [{ displayName: "Embedding", type: "${NodeConnectionType.AiEmbedding}", required: true, maxConnections: 1}]
if (mode === 'retrieve-as-tool') {
return inputs;
}
if (['insert', 'load', 'update'].includes(mode)) {
inputs.push({ displayName: "", type: "${NodeConnectionType.Main}"})
}
@@ -166,6 +185,11 @@ export const createVectorStoreNode = (args: VectorStoreNodeConstructorArgs) =>
outputs: `={{
((parameters) => {
const mode = parameters?.mode ?? 'retrieve';
if (mode === 'retrieve-as-tool') {
return [{ displayName: "Tool", type: "${NodeConnectionType.AiTool}"}]
}
if (mode === 'retrieve') {
return [{ displayName: "Vector Store", type: "${NodeConnectionType.AiVectorStore}"}]
}
@@ -189,6 +213,37 @@ export const createVectorStoreNode = (args: VectorStoreNodeConstructorArgs) =>
},
},
},
{
displayName: 'Name',
name: 'toolName',
type: 'string',
default: '',
required: true,
description: 'Name of the vector store',
placeholder: 'e.g. company_knowledge_base',
validateType: 'string-alphanumeric',
displayOptions: {
show: {
mode: ['retrieve-as-tool'],
},
},
},
{
displayName: 'Description',
name: 'toolDescription',
type: 'string',
default: '',
required: true,
typeOptions: { rows: 2 },
description:
'Explain to the LLM what this tool does, a good, specific description would allow LLMs to produce expected results much more often',
placeholder: `e.g. ${args.meta.description}`,
displayOptions: {
show: {
mode: ['retrieve-as-tool'],
},
},
},
...args.sharedFields,
...transformDescriptionForOperationMode(args.insertFields ?? [], 'insert'),
// Prompt and topK are always used for the load operation
@@ -214,7 +269,19 @@ export const createVectorStoreNode = (args: VectorStoreNodeConstructorArgs) =>
description: 'Number of top results to fetch from vector store',
displayOptions: {
show: {
mode: ['load'],
mode: ['load', 'retrieve-as-tool'],
},
},
},
{
displayName: 'Include Metadata',
name: 'includeDocumentMetadata',
type: 'boolean',
default: true,
description: 'Whether or not to include document metadata',
displayOptions: {
show: {
mode: ['load', 'retrieve-as-tool'],
},
},
},
@@ -271,10 +338,16 @@ export const createVectorStoreNode = (args: VectorStoreNodeConstructorArgs) =>
filter,
);
const includeDocumentMetadata = this.getNodeParameter(
'includeDocumentMetadata',
itemIndex,
true,
) as boolean;
const serializedDocs = docs.map(([doc, score]) => {
const document = {
metadata: doc.metadata,
pageContent: doc.pageContent,
...(includeDocumentMetadata ? { metadata: doc.metadata } : {}),
};
return {
@@ -381,12 +454,12 @@ export const createVectorStoreNode = (args: VectorStoreNodeConstructorArgs) =>
throw new NodeOperationError(
this.getNode(),
'Only the "load" and "insert" operation modes are supported with execute',
'Only the "load", "update" and "insert" operation modes are supported with execute',
);
}
async supplyData(this: ISupplyDataFunctions, itemIndex: number): Promise<SupplyData> {
const mode = this.getNodeParameter('mode', 0) as 'load' | 'insert' | 'retrieve';
const mode = this.getNodeParameter('mode', 0) as NodeOperationMode;
const filter = getMetadataFiltersValues(this, itemIndex);
const embeddings = (await this.getInputConnectionData(
NodeConnectionType.AiEmbedding,
@@ -400,9 +473,54 @@ export const createVectorStoreNode = (args: VectorStoreNodeConstructorArgs) =>
};
}
if (mode === 'retrieve-as-tool') {
const toolDescription = this.getNodeParameter('toolDescription', itemIndex) as string;
const toolName = this.getNodeParameter('toolName', itemIndex) as string;
const topK = this.getNodeParameter('topK', itemIndex, 4) as number;
const includeDocumentMetadata = this.getNodeParameter(
'includeDocumentMetadata',
itemIndex,
true,
) as boolean;
const vectorStoreTool = new DynamicTool({
name: toolName,
description: toolDescription,
func: async (input) => {
const vectorStore = await args.getVectorStoreClient(
this,
filter,
embeddings,
itemIndex,
);
const embeddedPrompt = await embeddings.embedQuery(input);
const documents = await vectorStore.similaritySearchVectorWithScore(
embeddedPrompt,
topK,
filter,
);
return documents
.map((document) => {
if (includeDocumentMetadata) {
return { type: 'text', text: JSON.stringify(document[0]) };
}
return {
type: 'text',
text: JSON.stringify({ pageContent: document[0].pageContent }),
};
})
.filter((document) => !!document);
},
});
return {
response: logWrapper(vectorStoreTool, this),
};
}
throw new NodeOperationError(
this.getNode(),
'Only the "retrieve" operation mode is supported to supply data',
'Only the "retrieve" and "retrieve-as-tool" operation mode is supported to supply data',
);
}
};