feat(n8n Evaluation Node): Add pre-defined metrics to the "Set Metrics" operation (#17127)

2025-12-16 09:36:44 +00:00 · 2025-07-11 13:58:26 +02:00
parent ba7b2d8fd9
commit a34b30acc7
23 changed files with 1975 additions and 146 deletions
--- a/packages/@n8n/nodes-langchain/nodes/mcp/McpTrigger/McpTrigger.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/mcp/McpTrigger/McpTrigger.node.ts
@@ -1,9 +1,9 @@
 import { WebhookAuthorizationError } from 'n8n-nodes-base/dist/nodes/Webhook/error';
 import { validateWebhookAuthentication } from 'n8n-nodes-base/dist/nodes/Webhook/utils';
 import type { INodeTypeDescription, IWebhookFunctions, IWebhookResponseData } from 'n8n-workflow';
-import { NodeConnectionTypes, Node } from 'n8n-workflow';
+import { NodeConnectionTypes, Node, nodeNameToToolName } from 'n8n-workflow';

-import { getConnectedTools, nodeNameToToolName } from '@utils/helpers';
+import { getConnectedTools } from '@utils/helpers';

 import type { CompressionResponse } from './FlushingTransport';
 import { McpServerManager } from './McpServer';
--- a/packages/@n8n/nodes-langchain/nodes/tools/ToolCode/ToolCode.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/tools/ToolCode/ToolCode.node.ts
@@ -12,7 +12,12 @@ import type {
 	ExecutionError,
 	IDataObject,
 } from 'n8n-workflow';
-import { jsonParse, NodeConnectionTypes, NodeOperationError } from 'n8n-workflow';
+import {
+	jsonParse,
+	NodeConnectionTypes,
+	NodeOperationError,
+	nodeNameToToolName,
+} from 'n8n-workflow';

 import {
 	buildInputSchemaField,
@@ -20,7 +25,6 @@ import {
 	buildJsonSchemaExampleNotice,
 	schemaTypeField,
 } from '@utils/descriptions';
-import { nodeNameToToolName } from '@utils/helpers';
 import { convertJsonSchemaToZod, generateSchemaFromExample } from '@utils/schemaParsing';
 import { getConnectionHintNoticeField } from '@utils/sharedFields';

--- a/packages/@n8n/nodes-langchain/nodes/tools/ToolVectorStore/ToolVectorStore.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/tools/ToolVectorStore/ToolVectorStore.node.ts
@@ -8,9 +8,8 @@ import type {
 	ISupplyDataFunctions,
 	SupplyData,
 } from 'n8n-workflow';
-import { NodeConnectionTypes } from 'n8n-workflow';
+import { NodeConnectionTypes, nodeNameToToolName } from 'n8n-workflow';

-import { nodeNameToToolName } from '@utils/helpers';
 import { logWrapper } from '@utils/logWrapper';
 import { getConnectionHintNoticeField } from '@utils/sharedFields';

--- a/packages/@n8n/nodes-langchain/nodes/tools/ToolWorkflow/v2/ToolWorkflowV2.node.ts
+++ b/packages/@n8n/nodes-langchain/nodes/tools/ToolWorkflow/v2/ToolWorkflowV2.node.ts
@@ -6,7 +6,7 @@ import type {
 	INodeTypeDescription,
 } from 'n8n-workflow';

-import { nodeNameToToolName } from '@utils/helpers';
+import { nodeNameToToolName } from 'n8n-workflow';

 import { localResourceMapping } from './methods';
 import { WorkflowToolService } from './utils/WorkflowToolService';
--- a/packages/@n8n/nodes-langchain/nodes/vector_store/shared/createVectorStoreNode/operations/retrieveAsToolOperation.ts
+++ b/packages/@n8n/nodes-langchain/nodes/vector_store/shared/createVectorStoreNode/operations/retrieveAsToolOperation.ts
@@ -4,7 +4,8 @@ import type { VectorStore } from '@langchain/core/vectorstores';
 import { DynamicTool } from 'langchain/tools';
 import { NodeConnectionTypes, type ISupplyDataFunctions, type SupplyData } from 'n8n-workflow';

-import { getMetadataFiltersValues, nodeNameToToolName } from '@utils/helpers';
+import { getMetadataFiltersValues } from '@utils/helpers';
+import { nodeNameToToolName } from 'n8n-workflow';
 import { logWrapper } from '@utils/logWrapper';

 import type { VectorStoreNodeConstructorArgs } from '../types';
--- a/packages/@n8n/nodes-langchain/utils/helpers.ts
+++ b/packages/@n8n/nodes-langchain/utils/helpers.ts
@@ -8,7 +8,6 @@ import type { BaseChatMemory } from 'langchain/memory';
 import { NodeConnectionTypes, NodeOperationError, jsonStringify } from 'n8n-workflow';
 import type {
 	AiEvent,
-	INode,
 	IDataObject,
 	IExecuteFunctions,
 	ISupplyDataFunctions,
@@ -251,14 +250,6 @@ export function unwrapNestedOutput(output: Record<string, unknown>): Record<stri
 	return output;
 }

-/**
- * Converts a node name to a valid tool name by replacing special characters with underscores
- * and collapsing consecutive underscores into a single one.
- */
-export function nodeNameToToolName(node: INode): string {
-	return node.name.replace(/[\s.?!=+#@&*()[\]{}:;,<>\/\\'"^%$]/g, '_').replace(/_+/g, '_');
-}
-
 /**
 * Detects if a text contains a character that repeats sequentially for a specified threshold.
 * This is used to prevent performance issues with tiktoken on highly repetitive content.
--- a/packages/@n8n/nodes-langchain/utils/tests/helpers.test.ts
+++ b/packages/@n8n/nodes-langchain/utils/tests/helpers.test.ts
@@ -9,59 +9,10 @@ import {
 	escapeSingleCurlyBrackets,
 	getConnectedTools,
 	hasLongSequentialRepeat,
-	nodeNameToToolName,
 	unwrapNestedOutput,
 } from '../helpers';
 import { N8nTool } from '../N8nTool';

-describe('nodeNameToToolName', () => {
-	const getNodeWithName = (name: string): INode => ({
-		id: 'test-node',
-		name,
-		type: 'test',
-		typeVersion: 1,
-		position: [0, 0] as [number, number],
-		parameters: {},
-	});
-	it('should replace spaces with underscores', () => {
-		expect(nodeNameToToolName(getNodeWithName('Test Node'))).toBe('Test_Node');
-	});
-
-	it('should replace dots with underscores', () => {
-		expect(nodeNameToToolName(getNodeWithName('Test.Node'))).toBe('Test_Node');
-	});
-
-	it('should replace question marks with underscores', () => {
-		expect(nodeNameToToolName(getNodeWithName('Test?Node'))).toBe('Test_Node');
-	});
-
-	it('should replace exclamation marks with underscores', () => {
-		expect(nodeNameToToolName(getNodeWithName('Test!Node'))).toBe('Test_Node');
-	});
-
-	it('should replace equals signs with underscores', () => {
-		expect(nodeNameToToolName(getNodeWithName('Test=Node'))).toBe('Test_Node');
-	});
-
-	it('should replace multiple special characters with underscores', () => {
-		expect(nodeNameToToolName(getNodeWithName('Test.Node?With!Special=Chars'))).toBe(
-			'Test_Node_With_Special_Chars',
-		);
-	});
-
-	it('should handle names that already have underscores', () => {
-		expect(nodeNameToToolName(getNodeWithName('Test_Node'))).toBe('Test_Node');
-	});
-
-	it('should handle names with consecutive special characters', () => {
-		expect(nodeNameToToolName(getNodeWithName('Test..!!??==Node'))).toBe('Test_Node');
-	});
-
-	it('should replace various special characters with underscores', () => {
-		expect(nodeNameToToolName(getNodeWithName('Test#+*()[]{}:;,<>/\\\'"%$Node'))).toBe('Test_Node');
-	});
-});
-
 describe('escapeSingleCurlyBrackets', () => {
 	it('should return undefined when input is undefined', () => {
 		expect(escapeSingleCurlyBrackets(undefined)).toBeUndefined();
--- a/packages/cli/src/evaluation.ee/test-runner/tests/test-runner.service.ee.test.ts
+++ b/packages/cli/src/evaluation.ee/test-runner/tests/test-runner.service.ee.test.ts
@@ -867,6 +867,7 @@ describe('TestRunnerService', () => {
 						position: [0, 0],
 						parameters: {
 							operation: 'setMetrics',
+							metric: 'customMetrics',
 							metrics: {
 								assignments: [
 									{
@@ -930,6 +931,7 @@ describe('TestRunnerService', () => {
 						position: [0, 0],
 						parameters: {
 							operation: 'setMetrics',
+							metric: 'customMetrics',
 							metrics: undefined,
 						},
 					},
@@ -961,6 +963,7 @@ describe('TestRunnerService', () => {
 						position: [0, 0],
 						parameters: {
 							operation: 'setMetrics',
+							metric: 'customMetrics',
 							metrics: {
 								assignments: [],
 							},
@@ -994,6 +997,7 @@ describe('TestRunnerService', () => {
 						position: [0, 0],
 						parameters: {
 							operation: 'setMetrics',
+							metric: 'customMetrics',
 							metrics: {
 								assignments: [
 									{
@@ -1073,6 +1077,7 @@ describe('TestRunnerService', () => {
 						position: [0, 0],
 						parameters: {
 							operation: 'setMetrics',
+							metric: 'customMetrics',
 							metrics: {
 								assignments: [
 									{
@@ -1112,6 +1117,7 @@ describe('TestRunnerService', () => {
 						position: [0, 0],
 						parameters: {
 							operation: 'setMetrics',
+							metric: 'customMetrics',
 							metrics: {
 								assignments: [
 									{
@@ -1131,6 +1137,7 @@ describe('TestRunnerService', () => {
 						position: [100, 0],
 						parameters: {
 							operation: 'setMetrics',
+							metric: 'customMetrics',
 							metrics: {
 								assignments: [
 									{
@@ -1150,6 +1157,250 @@ describe('TestRunnerService', () => {
 				(testRunnerService as any).validateSetMetricsNodes(workflow);
 			}).not.toThrow();
 		});
+
+		describe('Version-based validation', () => {
+			it('should pass for version < 4.7 with valid custom metrics (no metric parameter needed)', () => {
+				const workflow = mock<IWorkflowBase>({
+					nodes: [
+						{
+							id: 'node1',
+							name: 'Set Metrics',
+							type: EVALUATION_NODE_TYPE,
+							typeVersion: 4.6,
+							position: [0, 0],
+							parameters: {
+								operation: 'setMetrics',
+								// No metric parameter - this is expected for versions < 4.7
+								metrics: {
+									assignments: [
+										{
+											id: '1',
+											name: 'accuracy',
+											value: 0.95,
+										},
+									],
+								},
+							},
+						},
+					],
+					connections: {},
+				});
+
+				expect(() => {
+					(testRunnerService as any).validateSetMetricsNodes(workflow);
+				}).not.toThrow();
+			});
+
+			it('should fail for version < 4.7 with invalid custom metrics configuration', () => {
+				const workflow = mock<IWorkflowBase>({
+					nodes: [
+						{
+							id: 'node1',
+							name: 'Set Metrics',
+							type: EVALUATION_NODE_TYPE,
+							typeVersion: 4.6,
+							position: [0, 0],
+							parameters: {
+								operation: 'setMetrics',
+								// No metric parameter - this is expected for versions < 4.7
+								metrics: {
+									assignments: [], // Empty assignments should fail
+								},
+							},
+						},
+					],
+					connections: {},
+				});
+
+				expect(() => {
+					(testRunnerService as any).validateSetMetricsNodes(workflow);
+				}).toThrow(TestRunError);
+
+				try {
+					(testRunnerService as any).validateSetMetricsNodes(workflow);
+				} catch (error) {
+					expect(error).toBeInstanceOf(TestRunError);
+					expect(error.code).toBe('SET_METRICS_NODE_NOT_CONFIGURED');
+					expect(error.extra).toEqual({ node_name: 'Set Metrics' });
+				}
+			});
+
+			it('should fail for version >= 4.7 with missing metric parameter', () => {
+				const workflow = mock<IWorkflowBase>({
+					nodes: [
+						{
+							id: 'node1',
+							name: 'Set Metrics',
+							type: EVALUATION_NODE_TYPE,
+							typeVersion: 4.7,
+							position: [0, 0],
+							parameters: {
+								operation: 'setMetrics',
+								metrics: {
+									assignments: [
+										{
+											id: '1',
+											name: 'accuracy',
+											value: 0.95,
+										},
+									],
+								},
+							},
+						},
+					],
+					connections: {},
+				});
+
+				// Missing metric parameter - this should fail for versions >= 4.7
+				workflow.nodes[0].parameters.metric = undefined;
+
+				expect(() => {
+					(testRunnerService as any).validateSetMetricsNodes(workflow);
+				}).toThrow(TestRunError);
+
+				try {
+					(testRunnerService as any).validateSetMetricsNodes(workflow);
+				} catch (error) {
+					expect(error).toBeInstanceOf(TestRunError);
+					expect(error.code).toBe('SET_METRICS_NODE_NOT_CONFIGURED');
+					expect(error.extra).toEqual({ node_name: 'Set Metrics' });
+				}
+			});
+
+			it('should pass for version >= 4.7 with valid customMetrics configuration', () => {
+				const workflow = mock<IWorkflowBase>({
+					nodes: [
+						{
+							id: 'node1',
+							name: 'Set Metrics',
+							type: EVALUATION_NODE_TYPE,
+							typeVersion: 4.7,
+							position: [0, 0],
+							parameters: {
+								operation: 'setMetrics',
+								metric: 'customMetrics',
+								metrics: {
+									assignments: [
+										{
+											id: '1',
+											name: 'accuracy',
+											value: 0.95,
+										},
+									],
+								},
+							},
+						},
+					],
+					connections: {},
+				});
+
+				expect(() => {
+					(testRunnerService as any).validateSetMetricsNodes(workflow);
+				}).not.toThrow();
+			});
+
+			it('should pass for version >= 4.7 with non-customMetrics metric (no metrics validation needed)', () => {
+				const workflow = mock<IWorkflowBase>({
+					nodes: [
+						{
+							id: 'node1',
+							name: 'Set Metrics',
+							type: EVALUATION_NODE_TYPE,
+							typeVersion: 4.7,
+							position: [0, 0],
+							parameters: {
+								operation: 'setMetrics',
+								metric: 'correctness',
+								// No metrics parameter needed for non-customMetrics
+							},
+						},
+					],
+					connections: {},
+				});
+
+				expect(() => {
+					(testRunnerService as any).validateSetMetricsNodes(workflow);
+				}).not.toThrow();
+			});
+
+			it('should fail for version >= 4.7 with customMetrics but invalid metrics configuration', () => {
+				const workflow = mock<IWorkflowBase>({
+					nodes: [
+						{
+							id: 'node1',
+							name: 'Set Metrics',
+							type: EVALUATION_NODE_TYPE,
+							typeVersion: 4.7,
+							position: [0, 0],
+							parameters: {
+								operation: 'setMetrics',
+								metric: 'customMetrics',
+								metrics: {
+									assignments: [], // Empty assignments should fail
+								},
+							},
+						},
+					],
+					connections: {},
+				});
+
+				expect(() => {
+					(testRunnerService as any).validateSetMetricsNodes(workflow);
+				}).toThrow(TestRunError);
+
+				try {
+					(testRunnerService as any).validateSetMetricsNodes(workflow);
+				} catch (error) {
+					expect(error).toBeInstanceOf(TestRunError);
+					expect(error.code).toBe('SET_METRICS_NODE_NOT_CONFIGURED');
+					expect(error.extra).toEqual({ node_name: 'Set Metrics' });
+				}
+			});
+
+			it('should handle mixed versions correctly', () => {
+				const workflow = mock<IWorkflowBase>({
+					nodes: [
+						{
+							id: 'node1',
+							name: 'Set Metrics Old',
+							type: EVALUATION_NODE_TYPE,
+							typeVersion: 4.6,
+							position: [0, 0],
+							parameters: {
+								operation: 'setMetrics',
+								// No metric parameter for old version
+								metrics: {
+									assignments: [
+										{
+											id: '1',
+											name: 'accuracy',
+											value: 0.95,
+										},
+									],
+								},
+							},
+						},
+						{
+							id: 'node2',
+							name: 'Set Metrics New',
+							type: EVALUATION_NODE_TYPE,
+							typeVersion: 4.7,
+							position: [100, 0],
+							parameters: {
+								operation: 'setMetrics',
+								metric: 'correctness',
+								// No metrics parameter needed for non-customMetrics
+							},
+						},
+					],
+					connections: {},
+				});
+
+				expect(() => {
+					(testRunnerService as any).validateSetMetricsNodes(workflow);
+				}).not.toThrow();
+			});
+		});
 	});

 	describe('validateSetOutputsNodes', () => {
--- a/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts
+++ b/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts
@@ -106,16 +106,34 @@ export class TestRunnerService {
 			throw new TestRunError('SET_METRICS_NODE_NOT_FOUND');
 		}

-		const unconfiguredMetricsNode = metricsNodes.find(
-			(node) =>
-				node.disabled === true ||
-				!node.parameters ||
-				!node.parameters.metrics ||
-				(node.parameters.metrics as AssignmentCollectionValue).assignments?.length === 0 ||
-				(node.parameters.metrics as AssignmentCollectionValue).assignments?.some(
-					(assignment) => !assignment.name || assignment.value === null,
-				),
-		);
+		const unconfiguredMetricsNode = metricsNodes.find((node) => {
+			if (node.disabled === true || !node.parameters) {
+				return true;
+			}
+
+			// For versions 4.7+, check if metric parameter is missing
+			if (node.typeVersion >= 4.7 && !node.parameters.metric) {
+				return true;
+			}
+
+			// Check customMetrics configuration if:
+			// - Version 4.7+ and metric is 'customMetrics'
+			// - Version < 4.7 (customMetrics is default)
+			const isCustomMetricsMode =
+				node.typeVersion >= 4.7 ? node.parameters.metric === 'customMetrics' : true;
+
+			if (isCustomMetricsMode) {
+				return (
+					!node.parameters.metrics ||
+					(node.parameters.metrics as AssignmentCollectionValue).assignments?.length === 0 ||
+					(node.parameters.metrics as AssignmentCollectionValue).assignments?.some(
+						(assignment) => !assignment.name || assignment.value === null,
+					)
+				);
+			}
+
+			return false;
+		});

 		if (unconfiguredMetricsNode) {
 			throw new TestRunError('SET_METRICS_NODE_NOT_CONFIGURED', {
--- a/packages/core/src/execution-engine/node-execution-context/utils/create-node-as-tool.ts
+++ b/packages/core/src/execution-engine/node-execution-context/utils/create-node-as-tool.ts
@@ -5,6 +5,7 @@ import {
 	NodeOperationError,
 	traverseNodeParameters,
 	NodeHelpers,
+	nodeNameToToolName,
 } from 'n8n-workflow';
 import { z } from 'zod';

@@ -88,15 +89,6 @@ function getSchema(node: INode) {
 	return z.object(schemaObj).required();
 }

-/**
- * Converts a node name to a valid tool name by replacing special characters with underscores
- * and collapsing consecutive underscores into a single one.
- * This method is copied from `packages/@n8n/nodes-langchain/utils/helpers.ts`.
- */
-export function nodeNameToToolName(node: INode): string {
-	return node.name.replace(/[\s.?!=+#@&*()[\]{}:;,<>\/\\'"^%$]/g, '_').replace(/_+/g, '_');
-}
-
 /**
 * Creates a DynamicStructuredTool from a node.
 * @returns A DynamicStructuredTool instance.
--- a/packages/frontend/editor-ui/src/stores/canvas.store.ts
+++ b/packages/frontend/editor-ui/src/stores/canvas.store.ts
@@ -11,7 +11,11 @@ export const useCanvasStore = defineStore('canvas', () => {
 	const newNodeInsertPosition = ref<XYPosition | null>(null);
 	const nodes = computed<INodeUi[]>(() => workflowStore.allNodes);
 	const aiNodes = computed<INodeUi[]>(() =>
-		nodes.value.filter((node) => node.type.includes('langchain')),
+		nodes.value.filter(
+			(node) =>
+				node.type.includes('langchain') ||
+				(node.type === 'n8n-nodes-base.evaluation' && node.parameters?.operation === 'setMetrics'),
+		),
 	);
 	const hasRangeSelection = ref(false);

--- a/packages/nodes-base/nodes/Evaluation/Evaluation/CannedMetricPrompts.ee.ts
+++ b/packages/nodes-base/nodes/Evaluation/Evaluation/CannedMetricPrompts.ee.ts
@@ -0,0 +1,154 @@
+export const CORRECTNESS_PROMPT = `You are an expert factual evaluator assessing the accuracy of answers compared to established ground truths.
+
+Evaluate the factual correctness of a given output compared to the provided ground truth on a scale from 1 to 5. Use detailed reasoning to thoroughly analyze all claims before determining the final score.
+
+# Scoring Criteria
+
+- 5: Highly similar - The output and ground truth are nearly identical, with only minor, insignificant differences.
+- 4: Somewhat similar - The output is largely similar to the ground truth but has few noticeable differences.
+- 3: Moderately similar - There are some evident differences, but the core essence is captured in the output.
+- 2: Slightly similar - The output only captures a few elements of the ground truth and contains several differences.
+- 1: Not similar - The output is significantly different from the ground truth, with few or no matching elements.
+
+# Evaluation Steps
+
+1. Identify and list the key elements present in both the output and the ground truth.
+2. Compare these key elements to evaluate their similarities and differences, considering both content and structure.
+3. Analyze the semantic meaning conveyed by both the output and the ground truth, noting any significant deviations.
+4. Consider factual accuracy of specific details, including names, dates, numbers, and relationships.
+5. Assess whether the output maintains the factual integrity of the ground truth, even if phrased differently.
+6. Determine the overall level of similarity and accuracy according to the defined criteria.
+
+# Output Format
+
+Provide:
+- A detailed analysis of the comparison (extended reasoning)
+- A one-sentence summary highlighting key differences (not similarities)
+- The final similarity score as an integer (1, 2, 3, 4, or 5)
+
+Always follow the JSON format below and return nothing else:
+{
+  "extended_reasoning": "<detailed step-by-step analysis of factual accuracy and similarity>",
+  "reasoning_summary": "<one sentence summary focusing on key differences>",
+  "score": <number: integer from 1 to 5>
+}
+
+# Examples
+
+**Example 1:**
+
+Input:
+- Output: "The cat sat on the mat."
+- Ground Truth: "The feline is sitting on the rug."
+
+Expected Output:
+{
+  "extended_reasoning": "I need to compare 'The cat sat on the mat' with 'The feline is sitting on the rug.' First, let me identify the key elements: both describe an animal ('cat' vs 'feline') in a position ('sat' vs 'sitting') on a surface ('mat' vs 'rug'). The subject is semantically identical - 'cat' and 'feline' refer to the same animal. The action is also semantically equivalent - 'sat' and 'sitting' both describe the same position, though one is past tense and one is present continuous. The location differs in specific wording ('mat' vs 'rug') but both refer to floor coverings that serve the same function. The basic structure and meaning of both sentences are preserved, though they use different vocabulary and slightly different tense. The core information being conveyed is the same, but there are noticeable wording differences.",
+  "reasoning_summary": "The sentences differ in vocabulary choice ('cat' vs 'feline', 'mat' vs 'rug') and verb tense ('sat' vs 'is sitting').",
+  "score": 3
+}
+
+**Example 2:**
+
+Input:
+- Output: "The quick brown fox jumps over the lazy dog."
+- Ground Truth: "A fast brown animal leaps over a sleeping canine."
+
+Expected Output:
+{
+  "extended_reasoning": "I need to compare 'The quick brown fox jumps over the lazy dog' with 'A fast brown animal leaps over a sleeping canine.' Starting with the subjects: 'quick brown fox' vs 'fast brown animal'. Both describe the same entity (a fox is a type of animal) with the same attributes (quick/fast and brown). The action is described as 'jumps' vs 'leaps', which are synonymous verbs describing the same motion. The object in both sentences is a dog, described as 'lazy' in one and 'sleeping' in the other, which are related concepts (a sleeping dog could be perceived as lazy). The structure follows the same pattern: subject + action + over + object. The sentences convey the same scene with slightly different word choices that maintain the core meaning. The level of specificity differs slightly ('fox' vs 'animal', 'dog' vs 'canine'), but the underlying information and imagery remain very similar.",
+  "reasoning_summary": "The sentences use different but synonymous terminology ('quick' vs 'fast', 'jumps' vs 'leaps', 'lazy' vs 'sleeping') and varying levels of specificity ('fox' vs 'animal', 'dog' vs 'canine').",
+  "score": 4
+}
+
+# Notes
+
+- Focus primarily on factual accuracy and semantic similarity, not writing style or phrasing differences.
+- Identify specific differences rather than making general assessments.
+- Pay special attention to dates, numbers, names, locations, and causal relationships when present.
+- Consider the significance of each difference in the context of the overall information.
+- Be consistent in your scoring approach across different evaluations.`;
+
+export const CORRECTNESS_INPUT_PROMPT: string[] = [
+	`Output: {actual_answer}
+
+Ground truth: {expected_answer}`,
+	'Requires the placeholders <code>{actual_answer}</code> and <code>{expected_answer}</code>',
+];
+
+export const HELPFULNESS_PROMPT = `You are an expert evaluator assessing the helpfulness of responses to user queries.
+
+Evaluate how helpful and useful a given response is to the user's question or request on a scale from 1 to 5. Consider whether the response addresses the user's needs, provides actionable information, and is relevant to their query.
+
+# Scoring Criteria
+
+- 5: Extremely helpful - The response fully addresses the user's needs, provides comprehensive and actionable information, and goes above and beyond to be useful.
+- 4: Very helpful - The response addresses most of the user's needs, provides useful information, and is highly relevant.
+- 3: Moderately helpful - The response addresses some of the user's needs, provides some useful information, but may lack completeness or depth.
+- 2: Slightly helpful - The response provides minimal useful information and only partially addresses the user's needs.
+- 1: Not helpful - The response fails to address the user's needs, provides no useful information, or is irrelevant.
+
+# Evaluation Steps
+
+1. Analyze the user's question or request to understand what they're looking for.
+2. Evaluate how well the response addresses the specific needs expressed in the query.
+3. Assess the completeness and quality of the information provided.
+4. Consider the relevance and applicability of the response to the user's situation.
+5. Evaluate whether the response provides actionable insights or next steps.
+6. Determine the overall helpfulness according to the defined criteria.
+
+# Output Format
+
+Provide:
+- A detailed analysis of the response's helpfulness (extended reasoning)
+- A one-sentence summary highlighting the key strengths or weaknesses
+- The final helpfulness score as an integer (1, 2, 3, 4, or 5)
+
+Always follow the JSON format below and return nothing else:
+{
+  "extended_reasoning": "<detailed step-by-step analysis of the response's helpfulness>",
+  "reasoning_summary": "<one sentence summary of the response's helpfulness>",
+  "score": <number: integer from 1 to 5>
+}
+
+# Examples
+
+**Example 1:**
+
+Input:
+- Query: "How do I fix a leaky faucet?"
+- Response: "A leaky faucet is usually caused by a worn washer or O-ring. Turn off the water supply, remove the handle, replace the washer or O-ring, and reassemble. If the leak persists, you may need to replace the entire cartridge."
+
+Expected Output:
+{
+  "extended_reasoning": "The user asked for help fixing a leaky faucet, which is a practical home maintenance question. The response directly addresses the query by identifying the most common cause (worn washer or O-ring) and provides a clear step-by-step solution. It includes important safety information (turning off water supply) and offers a backup solution if the initial fix doesn't work. The response is concise, actionable, and comprehensive for this common problem.",
+  "reasoning_summary": "The response provides a complete, actionable solution with clear steps and troubleshooting advice.",
+  "score": 5
+}
+
+**Example 2:**
+
+Input:
+- Query: "What's the weather like?"
+- Response: "Weather can be sunny, rainy, cloudy, or snowy depending on various atmospheric conditions."
+
+Expected Output:
+{
+  "extended_reasoning": "The user asked about the weather, which typically implies they want current weather conditions for their location or a specific place. However, the response provides only generic information about weather types without addressing the specific query. It doesn't provide current conditions, forecasts, or ask for location clarification. The response is factually correct but completely unhelpful for the user's actual need.",
+  "reasoning_summary": "The response provides generic weather information instead of addressing the user's likely need for current conditions.",
+  "score": 1
+}
+
+# Notes
+
+- Focus on practical utility and how well the response serves the user's actual needs
+- Consider completeness, accuracy, and actionability of the information
+- Pay attention to whether the response asks for clarification when needed
+- Evaluate whether the response is appropriately detailed for the query complexity`;
+
+export const HELPFULNESS_INPUT_PROMPT: string[] = [
+	`Query: {user_query}
+
+Response: {actual_answer}`,
+	'Requires the placeholders <code>{user_query}</code> and <code>{actual_answer}</code>',
+];
--- a/packages/nodes-base/nodes/Evaluation/Evaluation/Description.node.ts
+++ b/packages/nodes-base/nodes/Evaluation/Evaluation/Description.node.ts
@@ -1,5 +1,11 @@
 import type { INodeProperties } from 'n8n-workflow';

+import {
+	CORRECTNESS_PROMPT,
+	CORRECTNESS_INPUT_PROMPT,
+	HELPFULNESS_PROMPT,
+	HELPFULNESS_INPUT_PROMPT,
+} from './CannedMetricPrompts.ee';
 import { document, sheet } from '../../Google/Sheet/GoogleSheetsTrigger.node';

 export const setOutputProperties: INodeProperties[] = [
@@ -80,10 +86,188 @@ export const setCheckIfEvaluatingProperties: INodeProperties[] = [
 	},
 ];

+const correctnessFields: INodeProperties[] = [
+	{
+		displayName: 'Expected Answer',
+		name: 'expectedAnswer',
+		type: 'string',
+		default: '',
+		description: 'The expected output defined in your evaluation dataset, used as ground truth',
+		displayOptions: {
+			show: {
+				operation: ['setMetrics'],
+				metric: ['correctness', 'stringSimilarity', 'categorization'],
+			},
+		},
+	},
+	{
+		displayName: 'Actual Answer',
+		name: 'actualAnswer',
+		type: 'string',
+		default: '',
+		description: 'The real response generated by AI (e.g. an agent or LLM in the workflow)',
+		displayOptions: {
+			show: {
+				operation: ['setMetrics'],
+				metric: ['correctness', 'stringSimilarity', 'categorization'],
+			},
+		},
+	},
+];
+
+const helpfulnessFields: INodeProperties[] = [
+	{
+		displayName: 'User Query',
+		name: 'userQuery',
+		type: 'string',
+		default: '',
+		description: 'The original input or question submitted by the user',
+		displayOptions: {
+			show: {
+				operation: ['setMetrics'],
+				metric: ['helpfulness'],
+			},
+		},
+	},
+	{
+		displayName: 'Response',
+		name: 'actualAnswer',
+		type: 'string',
+		default: '',
+		description: 'The response generated by AI (e.g. an agent or LLM in the workflow)',
+		displayOptions: {
+			show: {
+				operation: ['setMetrics'],
+				metric: ['helpfulness'],
+			},
+		},
+	},
+];
+
+function promptFieldForMetric(metric: string, prompt: string): INodeProperties[] {
+	return [
+		{
+			displayName: 'Prompt',
+			name: 'prompt',
+			type: 'string',
+			default: prompt,
+			description: `Instruction used to guide the model in scoring the actual answer’s ${metric} against the expected answer`,
+			typeOptions: {
+				rows: 4,
+			},
+			displayOptions: {
+				show: {
+					operation: ['setMetrics'],
+					metric: [metric],
+				},
+			},
+		},
+	];
+}
+
+function optionsForMetric(
+	metric: string,
+	prompt: string[],
+	defaultName: string,
+): INodeProperties[] {
+	return [
+		{
+			displayName: 'Options',
+			name: 'options',
+			type: 'collection',
+			default: {},
+			placeholder: 'Add Option',
+			options: [
+				{
+					displayName: 'Metric Name',
+					name: 'metricName',
+					type: 'string',
+					default: defaultName,
+					description: 'Set this parameter if you want to set a custom name to the metric',
+				},
+				// eslint-disable-next-line n8n-nodes-base/node-param-default-missing
+				{
+					displayName: 'Input Prompt',
+					name: 'inputPrompt',
+					type: 'string',
+					default: prompt[0] ?? '',
+					typeOptions: {
+						rows: 4,
+					},
+					hint: prompt[1],
+				},
+			],
+			displayOptions: {
+				show: {
+					operation: ['setMetrics'],
+					metric: [metric],
+				},
+			},
+		},
+	];
+}
+
+function optionsForMetricBasic(metric: string, defaultName: string): INodeProperties[] {
+	return [
+		{
+			displayName: 'Options',
+			name: 'options',
+			type: 'collection',
+			default: {},
+			placeholder: 'Add Option',
+			options: [
+				{
+					displayName: 'Metric Name',
+					name: 'metricName',
+					type: 'string',
+					default: defaultName,
+				},
+			],
+			displayOptions: {
+				show: {
+					operation: ['setMetrics'],
+					metric: [metric],
+				},
+			},
+		},
+	];
+}
+
+const toolsUsedFields: INodeProperties[] = [
+	{
+		displayName: 'Expected Tools',
+		name: 'expectedTools',
+		type: 'string',
+		default: '',
+		description: 'Enter the name(s) of the tool(s) you expect the AI to call (separated by commas)',
+		placeholder: 'Get Events, Send Email, Search Database',
+		displayOptions: {
+			show: {
+				operation: ['setMetrics'],
+				metric: ['toolsUsed'],
+			},
+		},
+	},
+	{
+		displayName: 'Intermediate Steps (of Agent)',
+		name: 'intermediateSteps',
+		type: 'string',
+		default: '',
+		hint: 'The output field of the agent containing the tools called. To see it, enable returning intermediate steps in the agent’s options',
+		displayOptions: {
+			show: {
+				operation: ['setMetrics'],
+				metric: ['toolsUsed'],
+			},
+		},
+	},
+];
+
 export const setMetricsProperties: INodeProperties[] = [
 	{
 		displayName:
-			"Calculate the score(s) for the evaluation, then map them into this node. They will be displayed in the ‘evaluations’ tab, not the Google Sheet. <a href='https://docs.n8n.io/advanced-ai/evaluations/metric-based-evaluations/#2-calculate-metrics' target='_blank'>View metric examples</a>",
+			'Metrics measure the quality of an execution. They will be displayed in the ‘evaluations’ tab, not the Google Sheet.',
+		//			"Calculate the score(s) for the evaluation, then map them into this node. They will be displayed in the ‘evaluations’ tab, not the Google Sheet. <a href='https://docs.n8n.io/advanced-ai/evaluations/metric-based-evaluations/#2-calculate-metrics' target='_blank'>View metric examples</a>",
 		name: 'notice',
 		type: 'notice',
 		default: '',
@@ -93,6 +277,88 @@ export const setMetricsProperties: INodeProperties[] = [
 			},
 		},
 	},
+	{
+		displayName: 'Metric',
+		name: 'metric',
+		type: 'hidden',
+		default: 'customMetrics',
+		displayOptions: {
+			show: {
+				operation: ['setMetrics'],
+				'@version': [4.6],
+			},
+		},
+	},
+	{
+		displayName: 'Metric',
+		name: 'metric',
+		type: 'options',
+		noDataExpression: true,
+		// eslint-disable-next-line n8n-nodes-base/node-param-options-type-unsorted-items
+		options: [
+			{
+				// eslint-disable-next-line n8n-nodes-base/node-param-display-name-miscased
+				name: 'Correctness (AI-based)',
+				value: 'correctness',
+				description:
+					'Whether the answer’s meaning is consistent with a reference answer. Uses a scale of 1 (worst) to 5 (best).',
+			},
+			{
+				// eslint-disable-next-line n8n-nodes-base/node-param-display-name-miscased
+				name: 'Helpfulness (AI-based)',
+				value: 'helpfulness',
+				description:
+					'Whether the response addresses the query. Uses a scale of 1 (worst) to 5 (best).',
+			},
+			{
+				name: 'String Similarity',
+				value: 'stringSimilarity',
+				description:
+					'How close the answer is to a reference answer, measured character-by-character (edit distance). Returns a score between 0 and 1.',
+			},
+			{
+				name: 'Categorization',
+				value: 'categorization',
+				description:
+					'Whether the answer exactly matches the reference answer. Returns 1 if so and 0 otherwise.',
+			},
+			{
+				name: 'Tools Used',
+				value: 'toolsUsed',
+				description: 'Whether tool(s) were used or not. Returns a score between 0 and 1.',
+			},
+			{
+				name: 'Custom Metrics',
+				value: 'customMetrics',
+				description: 'Define your own metric(s)',
+			},
+		],
+		default: 'correctness',
+		displayOptions: {
+			show: {
+				operation: ['setMetrics'],
+				'@version': [{ _cnd: { gte: 4.7 } }],
+			},
+		},
+	},
+	...correctnessFields,
+	...helpfulnessFields,
+	...toolsUsedFields,
+	...promptFieldForMetric('correctness', CORRECTNESS_PROMPT),
+	...promptFieldForMetric('helpfulness', HELPFULNESS_PROMPT),
+	{
+		displayName:
+			"Calculate the custom metrics before this node, then map them below. <a href='https://docs.n8n.io/advanced-ai/evaluations/metric-based-evaluations/#2-calculate-metrics' target='_blank'>View metric examples</a>",
+		name: 'notice',
+		type: 'notice',
+		default: '',
+		displayOptions: {
+			show: {
+				operation: ['setMetrics'],
+				metric: ['customMetrics'],
+			},
+		},
+	},
 	{
 		displayName: 'Metrics to Return',
 		name: 'metrics',
@@ -115,7 +381,13 @@ export const setMetricsProperties: INodeProperties[] = [
 		displayOptions: {
 			show: {
 				operation: ['setMetrics'],
+				metric: ['customMetrics'],
 			},
 		},
 	},
+	...optionsForMetric('correctness', CORRECTNESS_INPUT_PROMPT, 'Correctness'),
+	...optionsForMetric('helpfulness', HELPFULNESS_INPUT_PROMPT, 'Helpfulness'),
+	...optionsForMetricBasic('categorization', 'Categorization'),
+	...optionsForMetricBasic('stringSimilarity', 'String similarity'),
+	...optionsForMetricBasic('toolsUsed', 'Tools Used'),
 ];
--- a/packages/nodes-base/nodes/Evaluation/Evaluation/Evaluation.node.ee.ts
+++ b/packages/nodes-base/nodes/Evaluation/Evaluation/Evaluation.node.ee.ts
@@ -5,7 +5,6 @@ import type {
 	INodeTypeDescription,
 	INodeExecutionData,
 } from 'n8n-workflow';
-import { NodeConnectionTypes } from 'n8n-workflow';

 import {
 	setCheckIfEvaluatingProperties,
@@ -14,7 +13,13 @@ import {
 } from './Description.node';
 import { authentication } from '../../Google/Sheet/v2/actions/versionDescription';
 import { listSearch, loadOptions, credentialTest } from '../methods';
-import { checkIfEvaluating, setMetrics, setOutputs, setOutput } from '../utils/evaluationUtils';
+import {
+	checkIfEvaluating,
+	setMetrics,
+	setInputs,
+	setOutputs,
+	setOutput,
+} from '../utils/evaluationUtils';

 export class Evaluation implements INodeType {
 	description: INodeTypeDescription = {
@@ -22,7 +27,7 @@ export class Evaluation implements INodeType {
 		icon: 'fa:check-double',
 		name: 'evaluation',
 		group: ['transform'],
-		version: 4.6,
+		version: [4.6, 4.7],
 		description: 'Runs an evaluation',
 		eventTriggerDescription: '',
 		subtitle: '={{$parameter["operation"]}}',
@@ -30,7 +35,7 @@ export class Evaluation implements INodeType {
 			name: 'Evaluation',
 			color: '#c3c9d5',
 		},
-		inputs: [NodeConnectionTypes.Main],
+		inputs: `={{(${setInputs})($parameter)}}`,
 		outputs: `={{(${setOutputs})($parameter)}}`,
 		codex: {
 			alias: ['Test', 'Metrics', 'Evals', 'Set Output', 'Set Metrics'],
--- a/packages/nodes-base/nodes/Evaluation/test/Evaluation.node.test.ts
+++ b/packages/nodes-base/nodes/Evaluation/test/Evaluation.node.test.ts
@@ -195,6 +195,9 @@ describe('Test Evaluation', () => {
 					if (param === 'operation') {
 						return 'setMetrics';
 					}
+					if (param === 'metric') {
+						return 'customMetrics';
+					}
 					return param;
 				}),

--- a/packages/nodes-base/nodes/Evaluation/test/metricHandlers.test.ts
+++ b/packages/nodes-base/nodes/Evaluation/test/metricHandlers.test.ts
@@ -0,0 +1,762 @@
+import { mock } from 'jest-mock-extended';
+import { NodeOperationError } from 'n8n-workflow';
+import type { IExecuteFunctions, INode, AssignmentCollectionValue } from 'n8n-workflow';
+import type { BaseLanguageModel } from '@langchain/core/language_models/base';
+import { ChatPromptTemplate } from '@langchain/core/prompts';
+import type { Runnable } from '@langchain/core/runnables';
+
+import { metricHandlers } from '../utils/metricHandlers';
+
+// Mock the validateEntry function
+jest.mock('../../Set/v2/helpers/utils', () => ({
+	validateEntry: jest.fn((name: string, _type: string, value: any) => ({
+		name,
+		value,
+	})),
+}));
+
+describe('metricHandlers', () => {
+	let mockExecuteFunctions: jest.Mocked<IExecuteFunctions>;
+	let mockNode: INode;
+
+	beforeEach(() => {
+		mockExecuteFunctions = mock<IExecuteFunctions>();
+		mockNode = {
+			id: 'test-node',
+			name: 'Test Node',
+			type: 'n8n-nodes-base.evaluation',
+			typeVersion: 1,
+			position: [0, 0],
+			parameters: {},
+		};
+		mockExecuteFunctions.getNode.mockReturnValue(mockNode);
+	});
+
+	afterEach(() => {
+		jest.clearAllMocks();
+	});
+
+	describe('customMetrics', () => {
+		it('should process valid custom metrics', async () => {
+			const metricsData: AssignmentCollectionValue = {
+				assignments: [
+					{ id: '1', name: 'Metric1', value: 5, type: 'number' },
+					{ id: '2', name: 'Metric2', value: '10', type: 'number' },
+					{ id: '3', name: 'Metric3', value: 7.5, type: 'number' },
+				],
+			};
+
+			mockExecuteFunctions.getNodeParameter.mockReturnValue(metricsData);
+
+			const result = await metricHandlers.customMetrics.call(mockExecuteFunctions, 0);
+
+			expect(result).toEqual({
+				Metric1: 5,
+				Metric2: 10,
+				Metric3: 7.5,
+			});
+		});
+
+		it('should throw error for non-numeric values', async () => {
+			const metricsData: AssignmentCollectionValue = {
+				assignments: [{ id: '1', name: 'Metric1', value: 'not-a-number', type: 'number' }],
+			};
+
+			mockExecuteFunctions.getNodeParameter.mockReturnValue(metricsData);
+
+			await expect(metricHandlers.customMetrics.call(mockExecuteFunctions, 0)).rejects.toThrow(
+				NodeOperationError,
+			);
+		});
+
+		it('should throw error for missing metric name', async () => {
+			const metricsData: AssignmentCollectionValue = {
+				assignments: [{ id: '1', name: '', value: 5, type: 'number' }],
+			};
+
+			mockExecuteFunctions.getNodeParameter.mockReturnValue(metricsData);
+
+			await expect(metricHandlers.customMetrics.call(mockExecuteFunctions, 0)).rejects.toThrow(
+				NodeOperationError,
+			);
+		});
+
+		it('should handle empty assignments array', async () => {
+			const metricsData: AssignmentCollectionValue = {
+				assignments: [],
+			};
+
+			mockExecuteFunctions.getNodeParameter.mockReturnValue(metricsData);
+
+			const result = await metricHandlers.customMetrics.call(mockExecuteFunctions, 0);
+
+			expect(result).toEqual({});
+		});
+
+		it('should handle undefined assignments', async () => {
+			const metricsData: AssignmentCollectionValue = { assignments: [] };
+
+			mockExecuteFunctions.getNodeParameter.mockReturnValue(metricsData);
+
+			const result = await metricHandlers.customMetrics.call(mockExecuteFunctions, 0);
+
+			expect(result).toEqual({});
+		});
+	});
+
+	describe('toolsUsed', () => {
+		it('should return correct tool usage metrics', async () => {
+			const expectedTools = 'calculator, search';
+			const intermediateSteps = [
+				{ action: { tool: 'calculator' } },
+				{ action: { tool: 'calculator' } },
+				{ action: { tool: 'search' } },
+			];
+
+			mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+				if (paramName === 'expectedTools') return expectedTools;
+				if (paramName === 'intermediateSteps') return intermediateSteps;
+				if (paramName === 'options.metricName') return 'Tools Used';
+				return undefined;
+			});
+
+			const result = await metricHandlers.toolsUsed.call(mockExecuteFunctions, 0);
+
+			expect(result).toEqual({
+				'Tools Used': 1,
+			});
+		});
+
+		it('should return 0 for unused tools', async () => {
+			const expectedTools = 'calculator, search';
+			const intermediateSteps = [{ action: { tool: 'calculator' } }];
+
+			mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+				if (paramName === 'expectedTools') return expectedTools;
+				if (paramName === 'intermediateSteps') return intermediateSteps;
+				if (paramName === 'options.metricName') return 'Tools Used';
+				return undefined;
+			});
+
+			const result = await metricHandlers.toolsUsed.call(mockExecuteFunctions, 0);
+
+			expect(result).toEqual({
+				'Tools Used': 0.5,
+			});
+		});
+
+		it('should handle tool names with spaces and special characters', async () => {
+			const expectedTools = 'Get Events, Send Email, Search Database';
+			const intermediateSteps = [
+				{ action: { tool: 'Get_Events' } },
+				{ action: { tool: 'Send_Email' } },
+				{ action: { tool: 'Search_Database' } },
+			];
+
+			mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+				if (paramName === 'expectedTools') return expectedTools;
+				if (paramName === 'intermediateSteps') return intermediateSteps;
+				if (paramName === 'options.metricName') return 'Tools Used';
+				return undefined;
+			});
+
+			const result = await metricHandlers.toolsUsed.call(mockExecuteFunctions, 0);
+
+			expect(result).toEqual({
+				'Tools Used': 1,
+			});
+		});
+
+		it('should work case-insensitively', async () => {
+			const expectedTools = 'Get Events, send email, SEARCH DATABASE';
+			const intermediateSteps = [
+				{ action: { tool: 'get_events' } },
+				{ action: { tool: 'SEND_EMAIL' } },
+				{ action: { tool: 'Search_Database' } },
+			];
+
+			mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+				if (paramName === 'expectedTools') return expectedTools;
+				if (paramName === 'intermediateSteps') return intermediateSteps;
+				if (paramName === 'options.metricName') return 'Tools Used';
+				return undefined;
+			});
+
+			const result = await metricHandlers.toolsUsed.call(mockExecuteFunctions, 0);
+
+			expect(result).toEqual({
+				'Tools Used': 1,
+			});
+		});
+
+		it('should handle mixed case and format variations', async () => {
+			const expectedTools = 'calculator tool, Search Engine, data-processor';
+			const intermediateSteps = [
+				{ action: { tool: 'Calculator_Tool' } },
+				{ action: { tool: 'search_engine' } },
+				// data-processor is not used, so partial match
+			];
+
+			mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+				if (paramName === 'expectedTools') return expectedTools;
+				if (paramName === 'intermediateSteps') return intermediateSteps;
+				if (paramName === 'options.metricName') return 'Tools Used';
+				return undefined;
+			});
+
+			const result = await metricHandlers.toolsUsed.call(mockExecuteFunctions, 0);
+
+			// 2 out of 3 tools used = 2/3 ≈ 0.6667
+			expect(result).toEqual({
+				'Tools Used': 2 / 3,
+			});
+		});
+
+		it('should throw error for missing expected tools', async () => {
+			const expectedTools = '';
+			const intermediateSteps: any[] = [];
+
+			mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+				if (paramName === 'expectedTools') return expectedTools;
+				if (paramName === 'intermediateSteps') return intermediateSteps;
+				return undefined;
+			});
+
+			await expect(metricHandlers.toolsUsed.call(mockExecuteFunctions, 0)).rejects.toThrow(
+				NodeOperationError,
+			);
+		});
+
+		it('should throw error for undefined expected tools', async () => {
+			const expectedTools = undefined;
+			const intermediateSteps: any[] = [];
+
+			mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+				if (paramName === 'expectedTools') return expectedTools;
+				if (paramName === 'intermediateSteps') return intermediateSteps;
+				return undefined;
+			});
+
+			await expect(metricHandlers.toolsUsed.call(mockExecuteFunctions, 0)).rejects.toThrow(
+				NodeOperationError,
+			);
+		});
+
+		describe('intermediate steps validation', () => {
+			it('should throw error for missing intermediate steps parameter', async () => {
+				const expectedTools = 'calculator';
+
+				mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+					if (paramName === 'expectedTools') return expectedTools;
+					if (paramName === 'intermediateSteps') return undefined;
+					return undefined;
+				});
+
+				await expect(metricHandlers.toolsUsed.call(mockExecuteFunctions, 0)).rejects.toThrow(
+					new NodeOperationError(mockNode, 'Intermediate steps missing', {
+						description:
+							"Make sure to enable returning intermediate steps in your agent node's options, then map them in here",
+					}),
+				);
+			});
+
+			it('should throw error for empty object intermediate steps', async () => {
+				const expectedTools = 'calculator';
+				const intermediateSteps = {};
+
+				mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+					if (paramName === 'expectedTools') return expectedTools;
+					if (paramName === 'intermediateSteps') return intermediateSteps;
+					return undefined;
+				});
+
+				await expect(metricHandlers.toolsUsed.call(mockExecuteFunctions, 0)).rejects.toThrow(
+					NodeOperationError,
+				);
+			});
+
+			it('should throw error for string intermediate steps', async () => {
+				const expectedTools = 'calculator';
+				const intermediateSteps = 'not an array';
+
+				mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+					if (paramName === 'expectedTools') return expectedTools;
+					if (paramName === 'intermediateSteps') return intermediateSteps;
+					return undefined;
+				});
+
+				await expect(metricHandlers.toolsUsed.call(mockExecuteFunctions, 0)).rejects.toThrow(
+					NodeOperationError,
+				);
+			});
+
+			it('should throw error for null intermediate steps', async () => {
+				const expectedTools = 'calculator';
+				const intermediateSteps = null;
+
+				mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+					if (paramName === 'expectedTools') return expectedTools;
+					if (paramName === 'intermediateSteps') return intermediateSteps;
+					return undefined;
+				});
+
+				await expect(metricHandlers.toolsUsed.call(mockExecuteFunctions, 0)).rejects.toThrow(
+					NodeOperationError,
+				);
+			});
+
+			it('should handle empty array intermediate steps gracefully', async () => {
+				const expectedTools = 'calculator, search';
+				const intermediateSteps: any[] = [];
+
+				mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+					if (paramName === 'expectedTools') return expectedTools;
+					if (paramName === 'intermediateSteps') return intermediateSteps;
+					if (paramName === 'options.metricName') return 'Tools Used';
+					return undefined;
+				});
+
+				const result = await metricHandlers.toolsUsed.call(mockExecuteFunctions, 0);
+
+				expect(result).toEqual({
+					'Tools Used': 0,
+				});
+			});
+
+			it('should handle malformed intermediate steps objects', async () => {
+				const expectedTools = 'calculator, search';
+				const intermediateSteps = [
+					{ action: { tool: 'calculator' } }, // valid
+					{ action: {} }, // missing tool property
+					{ notAction: { tool: 'search' } }, // wrong structure
+					{}, // completely empty
+				];
+
+				mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+					if (paramName === 'expectedTools') return expectedTools;
+					if (paramName === 'intermediateSteps') return intermediateSteps;
+					if (paramName === 'options.metricName') return 'Tools Used';
+					return undefined;
+				});
+
+				const result = await metricHandlers.toolsUsed.call(mockExecuteFunctions, 0);
+
+				// Only 'calculator' should match (1 out of 2 expected tools)
+				expect(result).toEqual({
+					'Tools Used': 0.5,
+				});
+			});
+
+			it('should handle intermediate steps with null/undefined tool names', async () => {
+				const expectedTools = 'calculator, search';
+				const intermediateSteps = [
+					{ action: { tool: 'calculator' } }, // valid
+					{ action: { tool: null } }, // null tool
+					{ action: { tool: undefined } }, // undefined tool
+					{ action: { tool: '' } }, // empty string tool
+				];
+
+				mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+					if (paramName === 'expectedTools') return expectedTools;
+					if (paramName === 'intermediateSteps') return intermediateSteps;
+					if (paramName === 'options.metricName') return 'Tools Used';
+					return undefined;
+				});
+
+				const result = await metricHandlers.toolsUsed.call(mockExecuteFunctions, 0);
+
+				// Only 'calculator' should match (1 out of 2 expected tools)
+				expect(result).toEqual({
+					'Tools Used': 0.5,
+				});
+			});
+
+			it('should handle intermediate steps with non-string tool names', async () => {
+				const expectedTools = 'calculator, search';
+				const intermediateSteps = [
+					{ action: { tool: 'calculator' } }, // valid
+					{ action: { tool: 123 } }, // number
+					{ action: { tool: { name: 'search' } } }, // object
+					{ action: { tool: ['search'] } }, // array
+				];
+
+				mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+					if (paramName === 'expectedTools') return expectedTools;
+					if (paramName === 'intermediateSteps') return intermediateSteps;
+					if (paramName === 'options.metricName') return 'Tools Used';
+					return undefined;
+				});
+
+				// This should not throw an error, but might have unexpected behavior
+				// depending on how the comparison works
+				const result = await metricHandlers.toolsUsed.call(mockExecuteFunctions, 0);
+
+				// Only 'calculator' should match reliably (1 out of 2 expected tools)
+				expect(result).toEqual({
+					'Tools Used': 0.5,
+				});
+			});
+		});
+	});
+
+	describe('categorization', () => {
+		it('should return 1 for exact match', async () => {
+			mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+				if (paramName === 'expectedAnswer') return 'expected answer';
+				if (paramName === 'actualAnswer') return 'expected answer';
+				if (paramName === 'options.metricName') return 'Categorization';
+				return undefined;
+			});
+
+			const result = await metricHandlers.categorization.call(mockExecuteFunctions, 0);
+
+			expect(result).toEqual({ Categorization: 1 });
+		});
+
+		it('should return 0 for non-match', async () => {
+			mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+				if (paramName === 'expectedAnswer') return 'expected answer';
+				if (paramName === 'actualAnswer') return 'different answer';
+				if (paramName === 'options.metricName') return 'Categorization';
+				return undefined;
+			});
+
+			const result = await metricHandlers.categorization.call(mockExecuteFunctions, 0);
+
+			expect(result).toEqual({ Categorization: 0 });
+		});
+
+		it('should use custom metric name', async () => {
+			mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+				if (paramName === 'expectedAnswer') return 'expected answer';
+				if (paramName === 'actualAnswer') return 'expected answer';
+				if (paramName === 'options.metricName') return 'Custom Categorization';
+				return undefined;
+			});
+
+			const result = await metricHandlers.categorization.call(mockExecuteFunctions, 0);
+
+			expect(result).toEqual({ 'Custom Categorization': 1 });
+		});
+
+		it('should handle whitespace trimming', async () => {
+			mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+				if (paramName === 'expectedAnswer') return '  expected answer  ';
+				if (paramName === 'actualAnswer') return 'expected answer';
+				if (paramName === 'options.metricName') return 'Categorization';
+				return undefined;
+			});
+
+			const result = await metricHandlers.categorization.call(mockExecuteFunctions, 0);
+
+			expect(result).toEqual({ Categorization: 1 });
+		});
+
+		it('should throw error for missing expected answer', async () => {
+			mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+				if (paramName === 'expectedAnswer') return '';
+				if (paramName === 'actualAnswer') return 'actual answer';
+				return undefined;
+			});
+
+			await expect(metricHandlers.categorization.call(mockExecuteFunctions, 0)).rejects.toThrow(
+				NodeOperationError,
+			);
+		});
+
+		it('should throw error for missing actual answer', async () => {
+			mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+				if (paramName === 'expectedAnswer') return 'expected answer';
+				if (paramName === 'actualAnswer') return '';
+				return undefined;
+			});
+
+			await expect(metricHandlers.categorization.call(mockExecuteFunctions, 0)).rejects.toThrow(
+				NodeOperationError,
+			);
+		});
+	});
+
+	describe('stringSimilarity', () => {
+		it('should return Levenshtein distance', async () => {
+			mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+				if (paramName === 'expectedAnswer') return 'hello';
+				if (paramName === 'actualAnswer') return 'helo';
+				if (paramName === 'options.metricName') return 'String similarity';
+				return undefined;
+			});
+
+			const result = await metricHandlers.stringSimilarity.call(mockExecuteFunctions, 0);
+
+			expect(result).toEqual({ 'String similarity': 1 });
+		});
+
+		it('should return 0 for identical strings', async () => {
+			mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+				if (paramName === 'expectedAnswer') return 'hello';
+				if (paramName === 'actualAnswer') return 'hello';
+				if (paramName === 'options.metricName') return 'String similarity';
+				return undefined;
+			});
+
+			const result = await metricHandlers.stringSimilarity.call(mockExecuteFunctions, 0);
+
+			expect(result).toEqual({ 'String similarity': 0 });
+		});
+
+		it('should handle whitespace trimming', async () => {
+			mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+				if (paramName === 'expectedAnswer') return '  hello  ';
+				if (paramName === 'actualAnswer') return 'hello';
+				if (paramName === 'options.metricName') return 'String similarity';
+				return undefined;
+			});
+
+			const result = await metricHandlers.stringSimilarity.call(mockExecuteFunctions, 0);
+
+			expect(result).toEqual({ 'String similarity': 0 });
+		});
+
+		it('should throw error for missing expected answer', async () => {
+			mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+				if (paramName === 'expectedAnswer') return '';
+				if (paramName === 'actualAnswer') return 'actual answer';
+				return undefined;
+			});
+
+			await expect(metricHandlers.stringSimilarity.call(mockExecuteFunctions, 0)).rejects.toThrow(
+				NodeOperationError,
+			);
+		});
+
+		it('should throw error for missing actual answer', async () => {
+			mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+				if (paramName === 'expectedAnswer') return 'expected answer';
+				if (paramName === 'actualAnswer') return '';
+				return undefined;
+			});
+
+			await expect(metricHandlers.stringSimilarity.call(mockExecuteFunctions, 0)).rejects.toThrow(
+				NodeOperationError,
+			);
+		});
+	});
+
+	describe('helpfulness', () => {
+		let mockLLM: jest.Mocked<BaseLanguageModel>;
+
+		beforeEach(() => {
+			mockLLM = mock<BaseLanguageModel>();
+			mockExecuteFunctions.getInputConnectionData.mockResolvedValue(mockLLM);
+		});
+
+		it('should return helpfulness score from LLM', async () => {
+			const mockResponse = {
+				extended_reasoning: 'The response is very helpful...',
+				reasoning_summary: 'Response directly addresses the query',
+				score: 4,
+			};
+
+			// Mock the LLM with withStructuredOutput
+			const mockLLMWithStructuredOutput = mock<Runnable>();
+			mockLLMWithStructuredOutput.invoke.mockResolvedValue(mockResponse);
+
+			mockLLM.withStructuredOutput = jest.fn().mockReturnValue(mockLLMWithStructuredOutput);
+
+			// Mock ChatPromptTemplate.fromMessages to return a chain that can be piped
+			const mockChatPromptTemplate = mock<ChatPromptTemplate>();
+			mockChatPromptTemplate.pipe.mockReturnValue(mockLLMWithStructuredOutput);
+
+			// Mock the static method
+			jest.spyOn(ChatPromptTemplate, 'fromMessages').mockReturnValue(mockChatPromptTemplate);
+
+			mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+				if (paramName === 'userQuery') return 'What is the capital of France?';
+				if (paramName === 'actualAnswer') return 'Paris is the capital of France.';
+				if (paramName === 'prompt') return 'You are an AI assistant...';
+				if (paramName === 'options.inputPrompt')
+					return 'Query: {user_query}\\nResponse: {actual_answer}';
+				if (paramName === 'options.metricName') return 'Helpfulness';
+				return undefined;
+			});
+
+			const result = await metricHandlers.helpfulness.call(mockExecuteFunctions, 0);
+
+			expect(result).toEqual({ Helpfulness: 4 });
+		});
+
+		it('should throw error for missing user query', async () => {
+			mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+				if (paramName === 'userQuery') return '';
+				if (paramName === 'actualAnswer') return 'Some response';
+				return undefined;
+			});
+
+			await expect(metricHandlers.helpfulness.call(mockExecuteFunctions, 0)).rejects.toThrow(
+				NodeOperationError,
+			);
+		});
+
+		it('should throw error for missing actual answer', async () => {
+			mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+				if (paramName === 'userQuery') return 'Some query';
+				if (paramName === 'actualAnswer') return '';
+				return undefined;
+			});
+
+			await expect(metricHandlers.helpfulness.call(mockExecuteFunctions, 0)).rejects.toThrow(
+				NodeOperationError,
+			);
+		});
+
+		it('should throw error when no LLM is connected', async () => {
+			mockExecuteFunctions.getInputConnectionData.mockResolvedValue(null);
+
+			mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+				if (paramName === 'userQuery') return 'What is the capital of France?';
+				if (paramName === 'actualAnswer') return 'Paris is the capital of France.';
+				return undefined;
+			});
+
+			await expect(metricHandlers.helpfulness.call(mockExecuteFunctions, 0)).rejects.toThrow(
+				NodeOperationError,
+			);
+		});
+
+		it('should handle LLM errors gracefully', async () => {
+			const mockError = new Error('LLM processing failed');
+			const mockFinalChain = mock<Runnable>();
+			mockFinalChain.invoke.mockRejectedValue(mockError);
+
+			const mockMiddleChain = mock<Runnable>();
+			mockMiddleChain.pipe.mockReturnValue(mockFinalChain);
+
+			const mockChatPromptTemplate = mock<ChatPromptTemplate>();
+			mockChatPromptTemplate.pipe.mockReturnValue(mockMiddleChain);
+
+			jest.spyOn(ChatPromptTemplate, 'fromMessages').mockReturnValue(mockChatPromptTemplate);
+
+			mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+				if (paramName === 'userQuery') return 'What is the capital of France?';
+				if (paramName === 'actualAnswer') return 'Paris is the capital of France.';
+				if (paramName === 'prompt') return 'You are an AI assistant...';
+				if (paramName === 'options.inputPrompt')
+					return 'Query: {user_query}\\nResponse: {actual_answer}';
+				if (paramName === 'options.metricName') return 'Helpfulness';
+				return undefined;
+			});
+
+			await expect(metricHandlers.helpfulness.call(mockExecuteFunctions, 0)).rejects.toThrow(
+				NodeOperationError,
+			);
+		});
+	});
+
+	describe('correctness', () => {
+		let mockLLM: jest.Mocked<BaseLanguageModel>;
+
+		beforeEach(() => {
+			mockLLM = mock<BaseLanguageModel>();
+			mockExecuteFunctions.getInputConnectionData.mockResolvedValue(mockLLM);
+		});
+
+		it('should return correctness score from LLM', async () => {
+			const mockResponse = {
+				extended_reasoning: 'The response is factually correct...',
+				reasoning_summary: 'Response matches expected answer',
+				score: 5,
+			};
+
+			// Mock the LLM with withStructuredOutput
+			const mockLLMWithStructuredOutput = mock<Runnable>();
+			mockLLMWithStructuredOutput.invoke.mockResolvedValue(mockResponse);
+
+			mockLLM.withStructuredOutput = jest.fn().mockReturnValue(mockLLMWithStructuredOutput);
+
+			const mockChatPromptTemplate = mock<ChatPromptTemplate>();
+			mockChatPromptTemplate.pipe.mockReturnValue(mockLLMWithStructuredOutput);
+
+			jest.spyOn(ChatPromptTemplate, 'fromMessages').mockReturnValue(mockChatPromptTemplate);
+
+			mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+				if (paramName === 'expectedAnswer') return 'Paris';
+				if (paramName === 'actualAnswer') return 'Paris is the capital of France.';
+				if (paramName === 'prompt') return 'You are an AI assistant...';
+				if (paramName === 'options.inputPrompt')
+					return 'Expected: {expected_answer}\\nActual: {actual_answer}';
+				if (paramName === 'options.metricName') return 'Correctness';
+				return undefined;
+			});
+
+			const result = await metricHandlers.correctness.call(mockExecuteFunctions, 0);
+
+			expect(result).toEqual({ Correctness: 5 });
+		});
+
+		it('should throw error for missing expected answer', async () => {
+			mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+				if (paramName === 'expectedAnswer') return '';
+				if (paramName === 'actualAnswer') return 'Some response';
+				return undefined;
+			});
+
+			await expect(metricHandlers.correctness.call(mockExecuteFunctions, 0)).rejects.toThrow(
+				NodeOperationError,
+			);
+		});
+
+		it('should throw error for missing actual answer', async () => {
+			mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+				if (paramName === 'expectedAnswer') return 'Expected answer';
+				if (paramName === 'actualAnswer') return '';
+				return undefined;
+			});
+
+			await expect(metricHandlers.correctness.call(mockExecuteFunctions, 0)).rejects.toThrow(
+				NodeOperationError,
+			);
+		});
+
+		it('should throw error when no LLM is connected', async () => {
+			mockExecuteFunctions.getInputConnectionData.mockResolvedValue(null);
+
+			mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+				if (paramName === 'expectedAnswer') return 'Paris';
+				if (paramName === 'actualAnswer') return 'Paris is the capital of France.';
+				return undefined;
+			});
+
+			await expect(metricHandlers.correctness.call(mockExecuteFunctions, 0)).rejects.toThrow(
+				NodeOperationError,
+			);
+		});
+
+		it('should handle LLM errors gracefully', async () => {
+			const mockError = new Error('LLM processing failed');
+			const mockFinalChain = mock<Runnable>();
+			mockFinalChain.invoke.mockRejectedValue(mockError);
+
+			const mockMiddleChain = mock<Runnable>();
+			mockMiddleChain.pipe.mockReturnValue(mockFinalChain);
+
+			const mockChatPromptTemplate = mock<ChatPromptTemplate>();
+			mockChatPromptTemplate.pipe.mockReturnValue(mockMiddleChain);
+
+			jest.spyOn(ChatPromptTemplate, 'fromMessages').mockReturnValue(mockChatPromptTemplate);
+
+			mockExecuteFunctions.getNodeParameter.mockImplementation((paramName: string) => {
+				if (paramName === 'expectedAnswer') return 'Paris';
+				if (paramName === 'actualAnswer') return 'Paris is the capital of France.';
+				if (paramName === 'prompt') return 'You are an AI assistant...';
+				if (paramName === 'options.inputPrompt')
+					return 'Expected: {expected_answer}\\nActual: {actual_answer}';
+				if (paramName === 'options.metricName') return 'Correctness';
+				return undefined;
+			});
+
+			await expect(metricHandlers.correctness.call(mockExecuteFunctions, 0)).rejects.toThrow(
+				NodeOperationError,
+			);
+		});
+	});
+});
--- a/packages/nodes-base/nodes/Evaluation/utils/evaluationUtils.ts
+++ b/packages/nodes-base/nodes/Evaluation/utils/evaluationUtils.ts
@@ -1,15 +1,14 @@
-import { NodeOperationError, UserError } from 'n8n-workflow';
+import { UserError, NodeOperationError } from 'n8n-workflow';
 import type {
-	FieldType,
 	INodeParameters,
-	AssignmentCollectionValue,
 	IDataObject,
 	IExecuteFunctions,
 	INodeExecutionData,
 } from 'n8n-workflow';

+import { metricHandlers } from './metricHandlers';
 import { getGoogleSheet, getSheet } from './evaluationTriggerUtils';
-import { composeReturnItem, validateEntry } from '../../Set/v2/helpers/utils';
+import { composeReturnItem } from '../../Set/v2/helpers/utils';

 export async function setOutput(this: IExecuteFunctions): Promise<INodeExecutionData[][]> {
 	const evaluationNode = this.getNode();
@@ -96,46 +95,16 @@ export async function setMetrics(this: IExecuteFunctions): Promise<INodeExecutio
 	const metrics: INodeExecutionData[] = [];

 	for (let i = 0; i < items.length; i++) {
-		const dataToSave = this.getNodeParameter('metrics', i, {}) as AssignmentCollectionValue;
+		const metric = this.getNodeParameter('metric', i, {}) as keyof typeof metricHandlers;
+		if (!metricHandlers.hasOwnProperty(metric)) {
+			throw new NodeOperationError(this.getNode(), 'Unknown metric');
+		}
+		const newData = await metricHandlers[metric].call(this, i);

 		const newItem: INodeExecutionData = {
 			json: {},
 			pairedItem: { item: i },
 		};
-		const newData = Object.fromEntries(
-			(dataToSave?.assignments ?? []).map((assignment) => {
-				const assignmentValue =
-					typeof assignment.value === 'number' ? assignment.value : Number(assignment.value);
-
-				if (isNaN(assignmentValue)) {
-					throw new NodeOperationError(
-						this.getNode(),
-						`Value for '${assignment.name}' isn't a number`,
-						{
-							description: `It’s currently '${assignment.value}'. Metrics must be numeric.`,
-						},
-					);
-				}
-
-				if (!assignment.name || isNaN(assignmentValue)) {
-					throw new NodeOperationError(this.getNode(), 'Metric name missing', {
-						description: 'Make sure each metric you define has a name',
-					});
-				}
-
-				const { name, value } = validateEntry(
-					assignment.name,
-					assignment.type as FieldType,
-					assignmentValue,
-					this.getNode(),
-					i,
-					false,
-					1,
-				);
-
-				return [name, value];
-			}),
-		);

 		const returnItem = composeReturnItem.call(
 			this,
@@ -180,3 +149,17 @@ export function setOutputs(parameters: INodeParameters) {

 	return [{ type: 'main' }];
 }
+
+export function setInputs(parameters: INodeParameters) {
+	if (
+		parameters.operation === 'setMetrics' &&
+		['correctness', 'helpfulness'].includes(parameters.metric as string)
+	) {
+		return [
+			{ type: 'main' },
+			{ type: 'ai_languageModel', displayName: 'Model', maxConnections: 1 },
+		];
+	}
+
+	return [{ type: 'main' }];
+}
--- a/packages/nodes-base/nodes/Evaluation/utils/metricHandlers.ts
+++ b/packages/nodes-base/nodes/Evaluation/utils/metricHandlers.ts
@@ -0,0 +1,353 @@
+import {
+	ChatPromptTemplate,
+	SystemMessagePromptTemplate,
+	HumanMessagePromptTemplate,
+} from '@langchain/core/prompts';
+import type { BaseLanguageModel } from '@langchain/core/language_models/base';
+import { distance } from 'fastest-levenshtein';
+import { NodeOperationError, nodeNameToToolName } from 'n8n-workflow';
+import type {
+	FieldType,
+	AssignmentCollectionValue,
+	IDataObject,
+	IExecuteFunctions,
+} from 'n8n-workflow';
+import { z } from 'zod';
+
+import { validateEntry } from '../../Set/v2/helpers/utils';
+import {
+	CORRECTNESS_PROMPT,
+	CORRECTNESS_INPUT_PROMPT,
+	HELPFULNESS_PROMPT,
+	HELPFULNESS_INPUT_PROMPT,
+} from '../Evaluation/CannedMetricPrompts.ee';
+
+export const metricHandlers = {
+	async customMetrics(this: IExecuteFunctions, i: number): Promise<IDataObject> {
+		const dataToSave = this.getNodeParameter('metrics', i, {}) as AssignmentCollectionValue;
+
+		return Object.fromEntries(
+			(dataToSave?.assignments ?? []).map((assignment) => {
+				const assignmentValue =
+					typeof assignment.value === 'number' ? assignment.value : Number(assignment.value);
+
+				if (isNaN(assignmentValue)) {
+					throw new NodeOperationError(
+						this.getNode(),
+						`Value for '${assignment.name}' isn't a number`,
+						{
+							description: `It's currently '${assignment.value}'. Metrics must be numeric.`,
+						},
+					);
+				}
+
+				if (!assignment.name || isNaN(assignmentValue)) {
+					throw new NodeOperationError(this.getNode(), 'Metric name missing', {
+						description: 'Make sure each metric you define has a name',
+					});
+				}
+
+				const { name, value } = validateEntry(
+					assignment.name,
+					assignment.type as FieldType,
+					assignmentValue,
+					this.getNode(),
+					i,
+					false,
+					1,
+				);
+
+				return [name, value];
+			}),
+		);
+	},
+
+	async toolsUsed(this: IExecuteFunctions, i: number): Promise<IDataObject> {
+		const expectedToolsParam = this.getNodeParameter('expectedTools', i, '');
+		const expectedToolsString = (expectedToolsParam as string)?.trim() || '';
+		const expectedTools: string[] = expectedToolsString
+			? expectedToolsString
+					.split(',')
+					.map((tool) => tool.trim())
+					.filter((tool) => tool !== '')
+			: [];
+
+		const intermediateSteps = this.getNodeParameter('intermediateSteps', i, {}) as Array<{
+			action: { tool: string };
+		}>;
+
+		if (!expectedTools || expectedTools.length === 0) {
+			throw new NodeOperationError(this.getNode(), 'Expected tool name missing', {
+				description:
+					'Make sure you add at least one expected tool name (comma-separated if multiple)',
+			});
+		}
+		if (!intermediateSteps || !Array.isArray(intermediateSteps)) {
+			throw new NodeOperationError(this.getNode(), 'Intermediate steps missing', {
+				description:
+					"Make sure to enable returning intermediate steps in your agent node's options, then map them in here",
+			});
+		}
+
+		// Convert user-entered tool names to the format used in intermediate steps (case-insensitive)
+		const normalizedExpectedTools = expectedTools.map((tool) =>
+			nodeNameToToolName(tool).toLowerCase(),
+		);
+
+		// Calculate individual tool usage (1 if used, 0 if not used)
+		const toolUsageScores = normalizedExpectedTools.map((normalizedTool) => {
+			return intermediateSteps.some((step) => {
+				// Handle malformed intermediate steps gracefully
+				if (!step || !step.action || typeof step.action.tool !== 'string') {
+					return false;
+				}
+				return step.action.tool.toLowerCase() === normalizedTool;
+			})
+				? 1
+				: 0;
+		});
+
+		// Calculate the average of all tool usage scores
+		const averageScore =
+			toolUsageScores.reduce((sum: number, score: number) => sum + score, 0) /
+			toolUsageScores.length;
+
+		const metricName = this.getNodeParameter('options.metricName', i, 'Tools Used') as string;
+
+		return {
+			[metricName]: averageScore,
+		};
+	},
+
+	async categorization(this: IExecuteFunctions, i: number): Promise<IDataObject> {
+		const expectedAnswer = (this.getNodeParameter('expectedAnswer', i, '') as string)
+			.toString()
+			.trim();
+		const actualAnswer = (this.getNodeParameter('actualAnswer', i, '') as string).toString().trim();
+
+		if (!expectedAnswer) {
+			throw new NodeOperationError(this.getNode(), 'Expected answer is missing', {
+				description: 'Make sure to fill in an expected answer',
+			});
+		}
+		if (!actualAnswer) {
+			throw new NodeOperationError(this.getNode(), 'Actual answer is missing', {
+				description: 'Make sure to fill in an actual answer',
+			});
+		}
+
+		const metricName = this.getNodeParameter('options.metricName', i, 'Categorization') as string;
+
+		return {
+			[metricName]: expectedAnswer === actualAnswer ? 1 : 0,
+		};
+	},
+
+	async stringSimilarity(this: IExecuteFunctions, i: number): Promise<IDataObject> {
+		const expectedAnswer = (this.getNodeParameter('expectedAnswer', i, '') as string)
+			.toString()
+			.trim();
+		const actualAnswer = (this.getNodeParameter('actualAnswer', i, '') as string).toString().trim();
+
+		if (!expectedAnswer) {
+			throw new NodeOperationError(this.getNode(), 'Expected answer is missing', {
+				description: 'Make sure to fill in an expected answer',
+			});
+		}
+		if (!actualAnswer) {
+			throw new NodeOperationError(this.getNode(), 'Actual answer is missing', {
+				description: 'Make sure to fill in an actual answer',
+			});
+		}
+
+		const metricName = this.getNodeParameter(
+			'options.metricName',
+			i,
+			'String similarity',
+		) as string;
+
+		return {
+			[metricName]: distance(expectedAnswer, actualAnswer),
+		};
+	},
+
+	async helpfulness(this: IExecuteFunctions, i: number): Promise<IDataObject> {
+		const userQuery = (this.getNodeParameter('userQuery', i, '') as string).toString().trim();
+		const actualAnswer = (this.getNodeParameter('actualAnswer', i, '') as string).toString().trim();
+
+		if (!userQuery) {
+			throw new NodeOperationError(this.getNode(), 'User query is missing', {
+				description: 'Make sure to fill in the user query in the User Query field',
+			});
+		}
+		if (!actualAnswer) {
+			throw new NodeOperationError(this.getNode(), 'Response is missing', {
+				description: 'Make sure to fill in the response to evaluate in the Response field',
+			});
+		}
+
+		// Get the connected LLM model
+		const llm = (await this.getInputConnectionData('ai_languageModel', 0)) as BaseLanguageModel;
+
+		if (!llm) {
+			throw new NodeOperationError(this.getNode(), 'No language model connected', {
+				description: 'Connect a language model to the Model input to use the helpfulness metric',
+			});
+		}
+
+		// Get the system prompt and input prompt template, using defaults if not provided
+		const systemPrompt = this.getNodeParameter('prompt', i, HELPFULNESS_PROMPT) as string;
+		const inputPromptTemplate = this.getNodeParameter(
+			'options.inputPrompt',
+			i,
+			HELPFULNESS_INPUT_PROMPT[0],
+		) as string;
+
+		// Define the expected response schema
+		const responseSchema = z.object({
+			extended_reasoning: z
+				.string()
+				.describe('detailed step-by-step analysis of the response helpfulness'),
+			reasoning_summary: z.string().describe('one sentence summary of the response helpfulness'),
+			score: z
+				.number()
+				.int()
+				.min(1)
+				.max(5)
+				.describe('integer from 1 to 5 representing the helpfulness score'),
+		});
+
+		// Create LangChain prompt templates
+		const systemMessageTemplate = SystemMessagePromptTemplate.fromTemplate('{systemPrompt}');
+		const humanMessageTemplate = HumanMessagePromptTemplate.fromTemplate(inputPromptTemplate);
+
+		// Create the chat prompt template
+		const chatPrompt = ChatPromptTemplate.fromMessages([
+			systemMessageTemplate,
+			humanMessageTemplate,
+		]);
+
+		// Create chain with structured output
+		if (!llm.withStructuredOutput) {
+			throw new NodeOperationError(
+				this.getNode(),
+				'Language model does not support structured output',
+				{
+					description:
+						'The connected language model does not support structured output. Please use a compatible model.',
+				},
+			);
+		}
+		const chain = chatPrompt.pipe(llm.withStructuredOutput(responseSchema));
+
+		try {
+			const response = await chain.invoke({
+				systemPrompt,
+				user_query: userQuery,
+				actual_answer: actualAnswer,
+			});
+
+			const metricName = this.getNodeParameter('options.metricName', i, 'Helpfulness') as string;
+
+			// Return the score as the main metric
+			return {
+				[metricName]: response.score,
+			};
+		} catch (error) {
+			throw new NodeOperationError(this.getNode(), 'Failed to evaluate helpfulness', {
+				description: `Error from language model: ${error instanceof Error ? error.message : String(error)}`,
+			});
+		}
+	},
+
+	async correctness(this: IExecuteFunctions, i: number): Promise<IDataObject> {
+		const expectedAnswer = (this.getNodeParameter('expectedAnswer', i, '') as string)
+			.toString()
+			.trim();
+		const actualAnswer = (this.getNodeParameter('actualAnswer', i, '') as string).toString().trim();
+
+		if (!expectedAnswer) {
+			throw new NodeOperationError(this.getNode(), 'Expected answer is missing', {
+				description: 'Make sure to fill in an expected answer',
+			});
+		}
+		if (!actualAnswer) {
+			throw new NodeOperationError(this.getNode(), 'Actual answer is missing', {
+				description: 'Make sure to fill in an actual answer',
+			});
+		}
+
+		// Get the connected LLM model
+		const llm = (await this.getInputConnectionData('ai_languageModel', 0)) as BaseLanguageModel;
+
+		if (!llm) {
+			throw new NodeOperationError(this.getNode(), 'No language model connected', {
+				description: 'Connect a language model to the Model input to use the correctness metric',
+			});
+		}
+
+		// Get the system prompt and input prompt template, using defaults if not provided
+		const systemPrompt = this.getNodeParameter('prompt', i, CORRECTNESS_PROMPT) as string;
+		const inputPromptTemplate = this.getNodeParameter(
+			'options.inputPrompt',
+			i,
+			CORRECTNESS_INPUT_PROMPT[0],
+		) as string;
+
+		// Define the expected response schema
+		const responseSchema = z.object({
+			extended_reasoning: z
+				.string()
+				.describe('detailed step-by-step analysis of factual accuracy and similarity'),
+			reasoning_summary: z.string().describe('one sentence summary focusing on key differences'),
+			score: z
+				.number()
+				.int()
+				.min(1)
+				.max(5)
+				.describe('integer from 1 to 5 representing the similarity score'),
+		});
+
+		// Create LangChain prompt templates
+		const systemMessageTemplate = SystemMessagePromptTemplate.fromTemplate('{systemPrompt}');
+		const humanMessageTemplate = HumanMessagePromptTemplate.fromTemplate(inputPromptTemplate);
+
+		// Create the chat prompt template
+		const chatPrompt = ChatPromptTemplate.fromMessages([
+			systemMessageTemplate,
+			humanMessageTemplate,
+		]);
+
+		// Create chain with structured output
+		if (!llm.withStructuredOutput) {
+			throw new NodeOperationError(
+				this.getNode(),
+				'Language model does not support structured output',
+				{
+					description:
+						'The connected language model does not support structured output. Please use a compatible model.',
+				},
+			);
+		}
+		const chain = chatPrompt.pipe(llm.withStructuredOutput(responseSchema));
+
+		try {
+			const response = await chain.invoke({
+				systemPrompt,
+				actual_answer: actualAnswer,
+				expected_answer: expectedAnswer,
+			});
+
+			const metricName = this.getNodeParameter('options.metricName', i, 'Correctness') as string;
+
+			// Return the score as the main metric
+			return {
+				[metricName]: response.score,
+			};
+		} catch (error) {
+			throw new NodeOperationError(this.getNode(), 'Failed to evaluate correctness', {
+				description: `Error from language model: ${error instanceof Error ? error.message : String(error)}`,
+			});
+		}
+	},
+};
--- a/packages/nodes-base/package.json
+++ b/packages/nodes-base/package.json
@@ -896,6 +896,7 @@
    "currency-codes": "2.1.0",
    "eventsource": "2.0.2",
    "fast-glob": "catalog:",
+    "fastest-levenshtein": "^1.0.16",
    "fflate": "0.7.4",
    "generate-schema": "2.6.0",
    "get-system-fonts": "2.0.2",
--- a/packages/workflow/src/index.ts
+++ b/packages/workflow/src/index.ts
@@ -15,6 +15,7 @@ export * from './expression';
 export * from './expressions/expression-helpers';
 export * from './from-ai-parse-utils';
 export * from './node-helpers';
+export * from './tool-helpers';
 export * from './node-reference-parser-utils';
 export * from './metadata-utils';
 export * from './workflow';
--- a/packages/workflow/src/tool-helpers.ts
+++ b/packages/workflow/src/tool-helpers.ts
@@ -0,0 +1,10 @@
+import type { INode } from './interfaces';
+
+/**
+ * Converts a node name to a valid tool name by replacing special characters with underscores
+ * and collapsing consecutive underscores into a single one.
+ */
+export function nodeNameToToolName(nodeOrName: INode | string): string {
+	const name = typeof nodeOrName === 'string' ? nodeOrName : nodeOrName.name;
+	return name.replace(/[\s.?!=+#@&*()[\]{}:;,<>\/\\'"^%$_]+/g, '_');
+}
--- a/packages/workflow/test/tool-helpers.test.ts
+++ b/packages/workflow/test/tool-helpers.test.ts
@@ -0,0 +1,75 @@
+import type { INode } from '../src/interfaces';
+import { nodeNameToToolName } from '../src/tool-helpers';
+
+describe('nodeNameToToolName', () => {
+	const getNodeWithName = (name: string): INode => ({
+		id: 'test-node',
+		name,
+		type: 'test',
+		typeVersion: 1,
+		position: [0, 0] as [number, number],
+		parameters: {},
+	});
+
+	it('should replace spaces with underscores', () => {
+		expect(nodeNameToToolName(getNodeWithName('Test Node'))).toBe('Test_Node');
+	});
+
+	it('should replace dots with underscores', () => {
+		expect(nodeNameToToolName(getNodeWithName('Test.Node'))).toBe('Test_Node');
+	});
+
+	it('should replace question marks with underscores', () => {
+		expect(nodeNameToToolName(getNodeWithName('Test?Node'))).toBe('Test_Node');
+	});
+
+	it('should replace exclamation marks with underscores', () => {
+		expect(nodeNameToToolName(getNodeWithName('Test!Node'))).toBe('Test_Node');
+	});
+
+	it('should replace equals signs with underscores', () => {
+		expect(nodeNameToToolName(getNodeWithName('Test=Node'))).toBe('Test_Node');
+	});
+
+	it('should replace multiple special characters with underscores', () => {
+		expect(nodeNameToToolName(getNodeWithName('Test.Node?With!Special=Chars'))).toBe(
+			'Test_Node_With_Special_Chars',
+		);
+	});
+
+	it('should handle names that already have underscores', () => {
+		expect(nodeNameToToolName(getNodeWithName('Test_Node'))).toBe('Test_Node');
+	});
+
+	it('should handle names with consecutive special characters', () => {
+		expect(nodeNameToToolName(getNodeWithName('Test..!!??==Node'))).toBe('Test_Node');
+	});
+
+	it('should replace various special characters with underscores', () => {
+		expect(nodeNameToToolName(getNodeWithName('Test#+*()[]{}:;,<>/\\\'"%$Node'))).toBe('Test_Node');
+	});
+
+	describe('when passed a string directly', () => {
+		it('should replace spaces with underscores', () => {
+			expect(nodeNameToToolName('Test Node')).toBe('Test_Node');
+		});
+
+		it('should replace dots with underscores', () => {
+			expect(nodeNameToToolName('Test.Node')).toBe('Test_Node');
+		});
+
+		it('should replace multiple special characters with underscores', () => {
+			expect(nodeNameToToolName('Test.Node?With!Special=Chars')).toBe(
+				'Test_Node_With_Special_Chars',
+			);
+		});
+
+		it('should handle consecutive special characters', () => {
+			expect(nodeNameToToolName('Test..!!??==Node')).toBe('Test_Node');
+		});
+
+		it('should replace various special characters with underscores', () => {
+			expect(nodeNameToToolName('Test#+*()[]{}:;,<>/\\\'"%$Node')).toBe('Test_Node');
+		});
+	});
+});
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -871,7 +871,7 @@ importers:
        version: 4.3.0
      '@getzep/zep-cloud':
        specifier: 1.0.12
-        version: 1.0.12(1a792e11aeaf9de4c46582c2a158f676)
+        version: 1.0.12(bf0746fe28f165a259d7498318b00157)
      '@getzep/zep-js':
        specifier: 0.9.0
        version: 0.9.0
@@ -898,7 +898,7 @@ importers:
        version: 0.3.4(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(encoding@0.1.13)
      '@langchain/community':
        specifier: 'catalog:'
-        version: 0.3.47(fe5c91724b6df225451a5efa63588a7e)
+        version: 0.3.47(316b601a5b191eb3f62cba3a28205aa9)
      '@langchain/core':
        specifier: 'catalog:'
        version: 0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))
@@ -1009,7 +1009,7 @@ importers:
        version: 23.0.1
      langchain:
        specifier: 0.3.29
-        version: 0.3.29(@langchain/anthropic@0.3.23(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/aws@0.1.11(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/cohere@0.3.4(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(encoding@0.1.13))(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(@langchain/google-genai@0.2.13(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/google-vertexai@0.2.13(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/groq@0.2.3(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(encoding@0.1.13))(@langchain/mistralai@0.2.1(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(zod@3.25.67))(@langchain/ollama@0.2.3(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(axios@1.10.0)(cheerio@1.0.0)(handlebars@4.7.8)(openai@5.8.1(ws@8.18.2)(zod@3.25.67))(ws@8.18.2)
+        version: 0.3.29(@langchain/anthropic@0.3.23(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/aws@0.1.11(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/cohere@0.3.4(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(encoding@0.1.13))(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(@langchain/google-genai@0.2.13(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/google-vertexai@0.2.13(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/groq@0.2.3(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(encoding@0.1.13))(@langchain/mistralai@0.2.1(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(zod@3.25.67))(@langchain/ollama@0.2.3(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(axios@1.10.0(debug@4.4.1))(cheerio@1.0.0)(handlebars@4.7.8)(openai@5.8.1(ws@8.18.2)(zod@3.25.67))(ws@8.18.2)
      lodash:
        specifier: 'catalog:'
        version: 4.17.21
@@ -2660,6 +2660,9 @@ importers:
      fast-glob:
        specifier: 'catalog:'
        version: 3.2.12
+      fastest-levenshtein:
+        specifier: ^1.0.16
+        version: 1.0.16
      fflate:
        specifier: 0.7.4
        version: 0.7.4
@@ -18056,7 +18059,7 @@ snapshots:
  '@gar/promisify@1.1.3':
    optional: true

-  '@getzep/zep-cloud@1.0.12(1a792e11aeaf9de4c46582c2a158f676)':
+  '@getzep/zep-cloud@1.0.12(bf0746fe28f165a259d7498318b00157)':
    dependencies:
      form-data: 4.0.0
      node-fetch: 2.7.0(encoding@0.1.13)
@@ -18065,7 +18068,7 @@ snapshots:
      zod: 3.25.67
    optionalDependencies:
      '@langchain/core': 0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))
-      langchain: 0.3.29(@langchain/anthropic@0.3.23(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/aws@0.1.11(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/cohere@0.3.4(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(encoding@0.1.13))(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(@langchain/google-genai@0.2.13(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/google-vertexai@0.2.13(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/groq@0.2.3(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(encoding@0.1.13))(@langchain/mistralai@0.2.1(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(zod@3.25.67))(@langchain/ollama@0.2.3(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(axios@1.10.0)(cheerio@1.0.0)(handlebars@4.7.8)(openai@5.8.1(ws@8.18.2)(zod@3.25.67))(ws@8.18.2)
+      langchain: 0.3.29(@langchain/anthropic@0.3.23(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/aws@0.1.11(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/cohere@0.3.4(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(encoding@0.1.13))(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(@langchain/google-genai@0.2.13(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/google-vertexai@0.2.13(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/groq@0.2.3(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(encoding@0.1.13))(@langchain/mistralai@0.2.1(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(zod@3.25.67))(@langchain/ollama@0.2.3(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(axios@1.10.0(debug@4.4.1))(cheerio@1.0.0)(handlebars@4.7.8)(openai@5.8.1(ws@8.18.2)(zod@3.25.67))(ws@8.18.2)
    transitivePeerDependencies:
      - encoding

@@ -18619,7 +18622,7 @@ snapshots:
      - aws-crt
      - encoding

-  '@langchain/community@0.3.47(fe5c91724b6df225451a5efa63588a7e)':
+  '@langchain/community@0.3.47(316b601a5b191eb3f62cba3a28205aa9)':
    dependencies:
      '@browserbasehq/stagehand': 1.9.0(@playwright/test@1.53.0)(deepmerge@4.3.1)(dotenv@16.5.0)(encoding@0.1.13)(openai@5.8.1(ws@8.18.2)(zod@3.25.67))(zod@3.25.67)
      '@ibm-cloud/watsonx-ai': 1.1.2
@@ -18631,7 +18634,7 @@ snapshots:
      flat: 5.0.2
      ibm-cloud-sdk-core: 5.3.2
      js-yaml: 4.1.0
-      langchain: 0.3.29(@langchain/anthropic@0.3.23(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/aws@0.1.11(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/cohere@0.3.4(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(encoding@0.1.13))(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(@langchain/google-genai@0.2.13(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/google-vertexai@0.2.13(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/groq@0.2.3(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(encoding@0.1.13))(@langchain/mistralai@0.2.1(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(zod@3.25.67))(@langchain/ollama@0.2.3(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(axios@1.10.0)(cheerio@1.0.0)(handlebars@4.7.8)(openai@5.8.1(ws@8.18.2)(zod@3.25.67))(ws@8.18.2)
+      langchain: 0.3.29(@langchain/anthropic@0.3.23(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/aws@0.1.11(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/cohere@0.3.4(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(encoding@0.1.13))(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(@langchain/google-genai@0.2.13(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/google-vertexai@0.2.13(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/groq@0.2.3(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(encoding@0.1.13))(@langchain/mistralai@0.2.1(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(zod@3.25.67))(@langchain/ollama@0.2.3(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(axios@1.10.0(debug@4.4.1))(cheerio@1.0.0)(handlebars@4.7.8)(openai@5.8.1(ws@8.18.2)(zod@3.25.67))(ws@8.18.2)
      langsmith: 0.3.33(openai@5.8.1(ws@8.18.2)(zod@3.25.67))
      openai: 5.8.1(ws@8.18.2)(zod@3.25.67)
      uuid: 10.0.0
@@ -18645,7 +18648,7 @@ snapshots:
      '@aws-sdk/credential-provider-node': 3.808.0
      '@azure/storage-blob': 12.26.0
      '@browserbasehq/sdk': 2.6.0(encoding@0.1.13)
-      '@getzep/zep-cloud': 1.0.12(1a792e11aeaf9de4c46582c2a158f676)
+      '@getzep/zep-cloud': 1.0.12(bf0746fe28f165a259d7498318b00157)
      '@getzep/zep-js': 0.9.0
      '@google-ai/generativelanguage': 2.6.0(encoding@0.1.13)
      '@google-cloud/storage': 7.12.1(encoding@0.1.13)
@@ -22303,7 +22306,7 @@ snapshots:

  axios@1.10.0(debug@4.4.1):
    dependencies:
-      follow-redirects: 1.15.9(debug@4.4.1)
+      follow-redirects: 1.15.9(debug@4.3.6)
      form-data: 4.0.2
      proxy-from-env: 1.1.0
    transitivePeerDependencies:
@@ -24693,10 +24696,6 @@ snapshots:
    optionalDependencies:
      debug: 4.4.0

-  follow-redirects@1.15.9(debug@4.4.1):
-    optionalDependencies:
-      debug: 4.4.1(supports-color@8.1.1)
-
  for-each@0.3.3:
    dependencies:
      is-callable: 1.2.7
@@ -25380,7 +25379,7 @@ snapshots:
      isstream: 0.1.2
      jsonwebtoken: 9.0.2
      mime-types: 2.1.35
-      retry-axios: 2.6.0(axios@1.10.0)
+      retry-axios: 2.6.0(axios@1.10.0(debug@4.4.1))
      tough-cookie: 4.1.4
    transitivePeerDependencies:
      - supports-color
@@ -26588,7 +26587,7 @@ snapshots:

  kuler@2.0.0: {}

-  langchain@0.3.29(@langchain/anthropic@0.3.23(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/aws@0.1.11(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/cohere@0.3.4(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(encoding@0.1.13))(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(@langchain/google-genai@0.2.13(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/google-vertexai@0.2.13(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/groq@0.2.3(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(encoding@0.1.13))(@langchain/mistralai@0.2.1(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(zod@3.25.67))(@langchain/ollama@0.2.3(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(axios@1.10.0)(cheerio@1.0.0)(handlebars@4.7.8)(openai@5.8.1(ws@8.18.2)(zod@3.25.67))(ws@8.18.2):
+  langchain@0.3.29(@langchain/anthropic@0.3.23(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/aws@0.1.11(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/cohere@0.3.4(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(encoding@0.1.13))(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(@langchain/google-genai@0.2.13(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/google-vertexai@0.2.13(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(@langchain/groq@0.2.3(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(encoding@0.1.13))(@langchain/mistralai@0.2.1(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(zod@3.25.67))(@langchain/ollama@0.2.3(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))))(axios@1.10.0(debug@4.4.1))(cheerio@1.0.0)(handlebars@4.7.8)(openai@5.8.1(ws@8.18.2)(zod@3.25.67))(ws@8.18.2):
    dependencies:
      '@langchain/core': 0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67))
      '@langchain/openai': 0.5.16(@langchain/core@0.3.61(openai@5.8.1(ws@8.18.2)(zod@3.25.67)))(ws@8.18.2)
@@ -29090,7 +29089,7 @@ snapshots:
      onetime: 5.1.2
      signal-exit: 3.0.7

-  retry-axios@2.6.0(axios@1.10.0):
+  retry-axios@2.6.0(axios@1.10.0(debug@4.4.1)):
    dependencies:
      axios: 1.10.0(debug@4.4.1)