From 03c75c365bb7d384924ccaf1114fc267c7569327 Mon Sep 17 00:00:00 2001
From: jeanpaul <jeanpaul@users.noreply.github.com>
Date: Fri, 8 Aug 2025 12:38:58 +0200
Subject: [PATCH] fix(core): Fix metric default value handling and add AI model
 connection validation for setMetric operation in Evaluation (#18088)

---
 .../__tests__/test-runner.service.ee.test.ts  | 279 ++++++++++++++++--
 .../test-runner/test-runner.service.ee.ts     |  28 +-
 .../Evaluation/Evaluation/Description.node.ts |   3 +-
 .../Evaluation/Evaluation.node.ee.ts          |   4 +-
 .../nodes/Evaluation/utils/evaluationUtils.ts |   7 +-
 packages/workflow/src/evaluation-helpers.ts   |  25 ++
 packages/workflow/src/index.ts                |   1 +
 7 files changed, 307 insertions(+), 40 deletions(-)
 create mode 100644 packages/workflow/src/evaluation-helpers.ts
diff --git a/packages/cli/src/evaluation.ee/test-runner/__tests__/test-runner.service.ee.test.ts b/packages/cli/src/evaluation.ee/test-runner/__tests__/test-runner.service.ee.test.ts
index cafed71322..a17c0dc834 100644
--- a/packages/cli/src/evaluation.ee/test-runner/__tests__/test-runner.service.ee.test.ts
+++ b/packages/cli/src/evaluation.ee/test-runner/__tests__/test-runner.service.ee.test.ts
@@ -9,7 +9,11 @@ import { readFileSync } from 'fs';
 import type { Mock } from 'jest-mock';
 import { mock } from 'jest-mock-extended';
 import type { ErrorReporter } from 'n8n-core';
-import { EVALUATION_NODE_TYPE, EVALUATION_TRIGGER_NODE_TYPE } from 'n8n-workflow';
+import {
+	EVALUATION_NODE_TYPE,
+	EVALUATION_TRIGGER_NODE_TYPE,
+	NodeConnectionTypes,
+} from 'n8n-workflow';
 import type { IWorkflowBase, IRun, ExecutionError } from 'n8n-workflow';
 import path from 'path';
 
@@ -1225,46 +1229,45 @@ describe('TestRunnerService', () => {
 				}
 			});
 
-			it('should fail for version >= 4.7 with missing metric parameter', () => {
+			it('should pass for version >= 4.7 with missing metric parameter (uses default correctness) when model connected', () => {
 				const workflow = mock<IWorkflowBase>({
 					nodes: [
+						{
+							id: 'model1',
+							name: 'OpenAI Model',
+							type: '@n8n/n8n-nodes-langchain.lmChatOpenAi',
+							typeVersion: 1,
+							position: [0, 0],
+							parameters: {},
+						},
 						{
 							id: 'node1',
 							name: 'Set Metrics',
 							type: EVALUATION_NODE_TYPE,
 							typeVersion: 4.7,
-							position: [0, 0],
+							position: [100, 0],
 							parameters: {
 								operation: 'setMetrics',
-								metrics: {
-									assignments: [
-										{
-											id: '1',
-											name: 'accuracy',
-											value: 0.95,
-										},
-									],
-								},
+								// metric parameter is undefined, which means it uses the default value 'correctness'
+								// This should pass since correctness is valid and has model connected
 							},
 						},
 					],
-					connections: {},
+					connections: {
+						'OpenAI Model': {
+							[NodeConnectionTypes.AiLanguageModel]: [
+								[{ node: 'Set Metrics', type: 'ai_languageModel', index: 0 }],
+							],
+						},
+					},
 				});
 
-				// Missing metric parameter - this should fail for versions >= 4.7
-				workflow.nodes[0].parameters.metric = undefined;
+				// Missing metric parameter - this should pass for versions >= 4.7 since it defaults to 'correctness' and has model
+				workflow.nodes[1].parameters.metric = undefined;
 
 				expect(() => {
 					(testRunnerService as any).validateSetMetricsNodes(workflow);
-				}).toThrow(TestRunError);
-
-				try {
-					(testRunnerService as any).validateSetMetricsNodes(workflow);
-				} catch (error) {
-					expect(error).toBeInstanceOf(TestRunError);
-					expect(error.code).toBe('SET_METRICS_NODE_NOT_CONFIGURED');
-					expect(error.extra).toEqual({ node_name: 'Set Metrics' });
-				}
+				}).not.toThrow();
 			});
 
 			it('should pass for version >= 4.7 with valid customMetrics configuration', () => {
@@ -1299,7 +1302,7 @@ describe('TestRunnerService', () => {
 				}).not.toThrow();
 			});
 
-			it('should pass for version >= 4.7 with non-customMetrics metric (no metrics validation needed)', () => {
+			it('should pass for version >= 4.7 with non-AI metric (no model connection needed)', () => {
 				const workflow = mock<IWorkflowBase>({
 					nodes: [
 						{
@@ -1310,8 +1313,8 @@ describe('TestRunnerService', () => {
 							position: [0, 0],
 							parameters: {
 								operation: 'setMetrics',
-								metric: 'correctness',
-								// No metrics parameter needed for non-customMetrics
+								metric: 'stringSimilarity',
+								// Non-AI metrics don't need model connection
 							},
 						},
 					],
@@ -1360,12 +1363,20 @@ describe('TestRunnerService', () => {
 			it('should handle mixed versions correctly', () => {
 				const workflow = mock<IWorkflowBase>({
 					nodes: [
+						{
+							id: 'model1',
+							name: 'OpenAI Model',
+							type: '@n8n/n8n-nodes-langchain.lmChatOpenAi',
+							typeVersion: 1,
+							position: [0, 0],
+							parameters: {},
+						},
 						{
 							id: 'node1',
 							name: 'Set Metrics Old',
 							type: EVALUATION_NODE_TYPE,
 							typeVersion: 4.6,
-							position: [0, 0],
+							position: [100, 0],
 							parameters: {
 								operation: 'setMetrics',
 								// No metric parameter for old version
@@ -1385,21 +1396,227 @@ describe('TestRunnerService', () => {
 							name: 'Set Metrics New',
 							type: EVALUATION_NODE_TYPE,
 							typeVersion: 4.7,
-							position: [100, 0],
+							position: [200, 0],
 							parameters: {
 								operation: 'setMetrics',
 								metric: 'correctness',
-								// No metrics parameter needed for non-customMetrics
+								// Correctness needs model connection for version 4.7+
 							},
 						},
 					],
-					connections: {},
+					connections: {
+						'OpenAI Model': {
+							[NodeConnectionTypes.AiLanguageModel]: [
+								[{ node: 'Set Metrics New', type: 'ai_languageModel', index: 0 }],
+							],
+						},
+					},
 				});
 
 				expect(() => {
 					(testRunnerService as any).validateSetMetricsNodes(workflow);
 				}).not.toThrow();
 			});
+
+			describe('Model connection validation', () => {
+				it('should pass when correctness metric has model connected', () => {
+					const workflow = mock<IWorkflowBase>({
+						nodes: [
+							{
+								id: 'model1',
+								name: 'OpenAI Model',
+								type: '@n8n/n8n-nodes-langchain.lmChatOpenAi',
+								typeVersion: 1,
+								position: [0, 0],
+								parameters: {},
+							},
+							{
+								id: 'metrics1',
+								name: 'Set Metrics',
+								type: EVALUATION_NODE_TYPE,
+								typeVersion: 4.7,
+								position: [100, 0],
+								parameters: {
+									operation: 'setMetrics',
+									metric: 'correctness',
+								},
+							},
+						],
+						connections: {
+							'OpenAI Model': {
+								[NodeConnectionTypes.AiLanguageModel]: [
+									[{ node: 'Set Metrics', type: 'ai_languageModel', index: 0 }],
+								],
+							},
+						},
+					});
+
+					expect(() => {
+						(testRunnerService as any).validateSetMetricsNodes(workflow);
+					}).not.toThrow();
+				});
+
+				it('should fail when correctness metric has no model connected', () => {
+					const workflow = mock<IWorkflowBase>({
+						nodes: [
+							{
+								id: 'metrics1',
+								name: 'Set Metrics',
+								type: EVALUATION_NODE_TYPE,
+								typeVersion: 4.7,
+								position: [0, 0],
+								parameters: {
+									operation: 'setMetrics',
+									metric: 'correctness',
+								},
+							},
+						],
+						connections: {},
+					});
+
+					expect(() => {
+						(testRunnerService as any).validateSetMetricsNodes(workflow);
+					}).toThrow(TestRunError);
+				});
+
+				it('should pass when helpfulness metric has model connected', () => {
+					const workflow = mock<IWorkflowBase>({
+						nodes: [
+							{
+								id: 'model1',
+								name: 'OpenAI Model',
+								type: '@n8n/n8n-nodes-langchain.lmChatOpenAi',
+								typeVersion: 1,
+								position: [0, 0],
+								parameters: {},
+							},
+							{
+								id: 'metrics1',
+								name: 'Set Metrics',
+								type: EVALUATION_NODE_TYPE,
+								typeVersion: 4.7,
+								position: [100, 0],
+								parameters: {
+									operation: 'setMetrics',
+									metric: 'helpfulness',
+								},
+							},
+						],
+						connections: {
+							'OpenAI Model': {
+								[NodeConnectionTypes.AiLanguageModel]: [
+									[{ node: 'Set Metrics', type: 'ai_languageModel', index: 0 }],
+								],
+							},
+						},
+					});
+
+					expect(() => {
+						(testRunnerService as any).validateSetMetricsNodes(workflow);
+					}).not.toThrow();
+				});
+
+				it('should fail when helpfulness metric has no model connected', () => {
+					const workflow = mock<IWorkflowBase>({
+						nodes: [
+							{
+								id: 'metrics1',
+								name: 'Set Metrics',
+								type: EVALUATION_NODE_TYPE,
+								typeVersion: 4.7,
+								position: [0, 0],
+								parameters: {
+									operation: 'setMetrics',
+									metric: 'helpfulness',
+								},
+							},
+						],
+						connections: {},
+					});
+
+					expect(() => {
+						(testRunnerService as any).validateSetMetricsNodes(workflow);
+					}).toThrow(TestRunError);
+				});
+
+				it('should fail when default correctness metric (undefined) has no model connected', () => {
+					const workflow = mock<IWorkflowBase>({
+						nodes: [
+							{
+								id: 'metrics1',
+								name: 'Set Metrics',
+								type: EVALUATION_NODE_TYPE,
+								typeVersion: 4.7,
+								position: [0, 0],
+								parameters: {
+									operation: 'setMetrics',
+									metric: undefined, // explicitly set to undefined to test default behavior
+								},
+							},
+						],
+						connections: {},
+					});
+
+					expect(() => {
+						(testRunnerService as any).validateSetMetricsNodes(workflow);
+					}).toThrow(TestRunError);
+				});
+
+				it('should pass when non-AI metrics (customMetrics) have no model connected', () => {
+					const workflow = mock<IWorkflowBase>({
+						nodes: [
+							{
+								id: 'metrics1',
+								name: 'Set Metrics',
+								type: EVALUATION_NODE_TYPE,
+								typeVersion: 4.7,
+								position: [0, 0],
+								parameters: {
+									operation: 'setMetrics',
+									metric: 'customMetrics',
+									metrics: {
+										assignments: [
+											{
+												id: '1',
+												name: 'accuracy',
+												value: 0.95,
+											},
+										],
+									},
+								},
+							},
+						],
+						connections: {},
+					});
+
+					expect(() => {
+						(testRunnerService as any).validateSetMetricsNodes(workflow);
+					}).not.toThrow();
+				});
+
+				it('should pass when stringSimilarity metric has no model connected', () => {
+					const workflow = mock<IWorkflowBase>({
+						nodes: [
+							{
+								id: 'metrics1',
+								name: 'Set Metrics',
+								type: EVALUATION_NODE_TYPE,
+								typeVersion: 4.7,
+								position: [0, 0],
+								parameters: {
+									operation: 'setMetrics',
+									metric: 'stringSimilarity',
+								},
+							},
+						],
+						connections: {},
+					});
+
+					expect(() => {
+						(testRunnerService as any).validateSetMetricsNodes(workflow);
+					}).not.toThrow();
+				});
+			});
 		});
 	});
 
diff --git a/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts b/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts
index 8c79d7af41..68d1b605f4 100644
--- a/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts
+++ b/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts
@@ -8,6 +8,8 @@ import {
 	EVALUATION_TRIGGER_NODE_TYPE,
 	ExecutionCancelledError,
 	NodeConnectionTypes,
+	metricRequiresModelConnection,
+	DEFAULT_EVALUATION_METRIC,
 } from 'n8n-workflow';
 import type {
 	IDataObject,
@@ -102,6 +104,16 @@ export class TestRunnerService {
 	 * Checks if the Evaluation Set Metrics nodes are present in the workflow
 	 * and are configured correctly.
 	 */
+	private hasModelNodeConnected(workflow: IWorkflowBase, targetNodeName: string): boolean {
+		// Check if there's a node connected to the target node via ai_languageModel connection type
+		return Object.keys(workflow.connections).some((sourceNodeName) => {
+			const connections = workflow.connections[sourceNodeName];
+			return connections?.[NodeConnectionTypes.AiLanguageModel]?.[0]?.some(
+				(connection) => connection.node === targetNodeName,
+			);
+		});
+	}
+
 	private validateSetMetricsNodes(workflow: IWorkflowBase) {
 		const metricsNodes = TestRunnerService.getEvaluationMetricsNodes(workflow);
 		if (metricsNodes.length === 0) {
@@ -113,11 +125,6 @@ export class TestRunnerService {
 				return true;
 			}
 
-			// For versions 4.7+, check if metric parameter is missing
-			if (node.typeVersion >= 4.7 && !node.parameters.metric) {
-				return true;
-			}
-
 			// Check customMetrics configuration if:
 			// - Version 4.7+ and metric is 'customMetrics'
 			// - Version < 4.7 (customMetrics is default)
@@ -134,6 +141,17 @@ export class TestRunnerService {
 				);
 			}
 
+			// For version 4.7+, check if AI-based metrics require model connection
+			if (node.typeVersion >= 4.7) {
+				const metric = (node.parameters.metric ?? DEFAULT_EVALUATION_METRIC) as string;
+				if (
+					metricRequiresModelConnection(metric) && // See packages/workflow/src/evaluation-helpers.ts
+					!this.hasModelNodeConnected(workflow, node.name)
+				) {
+					return true;
+				}
+			}
+
 			return false;
 		});
 
diff --git a/packages/nodes-base/nodes/Evaluation/Evaluation/Description.node.ts b/packages/nodes-base/nodes/Evaluation/Evaluation/Description.node.ts
index b6aceebdce..385aa01a4d 100644
--- a/packages/nodes-base/nodes/Evaluation/Evaluation/Description.node.ts
+++ b/packages/nodes-base/nodes/Evaluation/Evaluation/Description.node.ts
@@ -1,4 +1,5 @@
 import type { INodeProperties } from 'n8n-workflow';
+import { DEFAULT_EVALUATION_METRIC } from 'n8n-workflow';
 
 import {
 	CORRECTNESS_PROMPT,
@@ -386,7 +387,7 @@ export const setMetricsProperties: INodeProperties[] = [
 				description: 'Define your own metric(s)',
 			},
 		],
-		default: 'correctness',
+		default: DEFAULT_EVALUATION_METRIC,
 		displayOptions: {
 			show: {
 				operation: ['setMetrics'],
diff --git a/packages/nodes-base/nodes/Evaluation/Evaluation/Evaluation.node.ee.ts b/packages/nodes-base/nodes/Evaluation/Evaluation/Evaluation.node.ee.ts
index af2e39af3e..e89e3ff41b 100644
--- a/packages/nodes-base/nodes/Evaluation/Evaluation/Evaluation.node.ee.ts
+++ b/packages/nodes-base/nodes/Evaluation/Evaluation/Evaluation.node.ee.ts
@@ -22,6 +22,7 @@ import {
 	setOutputs,
 	setInputs,
 } from '../utils/evaluationUtils';
+import { metricRequiresModelConnection } from 'n8n-workflow'; // See packages/workflow/src/evaluation-helpers.ts
 
 export class Evaluation implements INodeType {
 	description: INodeTypeDescription = {
@@ -37,7 +38,8 @@ export class Evaluation implements INodeType {
 			name: 'Evaluation',
 			color: '#c3c9d5',
 		},
-		inputs: `={{(${getInputConnectionTypes})($parameter)}}`,
+		// Pass function explicitly since expression context doesn't allow imports in getInputConnectionTypes
+		inputs: `={{(${getInputConnectionTypes})($parameter, ${metricRequiresModelConnection})}}`,
 		outputs: `={{(${getOutputConnectionTypes})($parameter)}}`,
 		codex: {
 			alias: ['Test', 'Metrics', 'Evals', 'Set Output', 'Set Metrics'],
diff --git a/packages/nodes-base/nodes/Evaluation/utils/evaluationUtils.ts b/packages/nodes-base/nodes/Evaluation/utils/evaluationUtils.ts
index a46af3e19b..8275abd69a 100644
--- a/packages/nodes-base/nodes/Evaluation/utils/evaluationUtils.ts
+++ b/packages/nodes-base/nodes/Evaluation/utils/evaluationUtils.ts
@@ -240,10 +240,13 @@ export function getOutputConnectionTypes(parameters: INodeParameters) {
 	return [{ type: 'main' }];
 }
 
-export function getInputConnectionTypes(parameters: INodeParameters) {
+export function getInputConnectionTypes(
+	parameters: INodeParameters,
+	metricRequiresModelConnectionFn: (metric: string) => boolean,
+) {
 	if (
 		parameters.operation === 'setMetrics' &&
-		['correctness', 'helpfulness'].includes(parameters.metric as string)
+		metricRequiresModelConnectionFn(parameters.metric as string)
 	) {
 		return [
 			{ type: 'main' },
diff --git a/packages/workflow/src/evaluation-helpers.ts b/packages/workflow/src/evaluation-helpers.ts
new file mode 100644
index 0000000000..fb3a064fe6
--- /dev/null
+++ b/packages/workflow/src/evaluation-helpers.ts
@@ -0,0 +1,25 @@
+/**
+ * Evaluation-related utility functions
+ *
+ * This file contains utilities that need to be shared between different packages
+ * to avoid circular dependencies. For example, the evaluation test-runner (in CLI package)
+ * and the Evaluation node (in nodes-base package) both need to know which metrics
+ * require AI model connections, but they can't import from each other directly.
+ *
+ * By placing shared utilities here in the workflow package (which both packages depend on),
+ * we avoid circular dependency issues.
+ */
+
+/**
+ * Default metric type used in evaluations
+ */
+export const DEFAULT_EVALUATION_METRIC = 'correctness';
+
+/**
+ * Determines if a given evaluation metric requires an AI model connection
+ * @param metric The metric name to check
+ * @returns true if the metric requires an AI model connection
+ */
+export function metricRequiresModelConnection(metric: string): boolean {
+	return ['correctness', 'helpfulness'].includes(metric);
+}
diff --git a/packages/workflow/src/index.ts b/packages/workflow/src/index.ts
index 4be0cbb82a..9676365ee0 100644
--- a/packages/workflow/src/index.ts
+++ b/packages/workflow/src/index.ts
@@ -67,6 +67,7 @@ export { ExpressionExtensions } from './extensions';
 export * as ExpressionParser from './extensions/expression-parser';
 export { NativeMethods } from './native-methods';
 export * from './node-parameters/filter-parameter';
+export * from './evaluation-helpers';
 
 export type {
 	DocMetadata,