From 03c75c365bb7d384924ccaf1114fc267c7569327 Mon Sep 17 00:00:00 2001 From: jeanpaul Date: Fri, 8 Aug 2025 12:38:58 +0200 Subject: [PATCH] fix(core): Fix metric default value handling and add AI model connection validation for setMetric operation in Evaluation (#18088) --- .../__tests__/test-runner.service.ee.test.ts | 279 ++++++++++++++++-- .../test-runner/test-runner.service.ee.ts | 28 +- .../Evaluation/Evaluation/Description.node.ts | 3 +- .../Evaluation/Evaluation.node.ee.ts | 4 +- .../nodes/Evaluation/utils/evaluationUtils.ts | 7 +- packages/workflow/src/evaluation-helpers.ts | 25 ++ packages/workflow/src/index.ts | 1 + 7 files changed, 307 insertions(+), 40 deletions(-) create mode 100644 packages/workflow/src/evaluation-helpers.ts diff --git a/packages/cli/src/evaluation.ee/test-runner/__tests__/test-runner.service.ee.test.ts b/packages/cli/src/evaluation.ee/test-runner/__tests__/test-runner.service.ee.test.ts index cafed71322..a17c0dc834 100644 --- a/packages/cli/src/evaluation.ee/test-runner/__tests__/test-runner.service.ee.test.ts +++ b/packages/cli/src/evaluation.ee/test-runner/__tests__/test-runner.service.ee.test.ts @@ -9,7 +9,11 @@ import { readFileSync } from 'fs'; import type { Mock } from 'jest-mock'; import { mock } from 'jest-mock-extended'; import type { ErrorReporter } from 'n8n-core'; -import { EVALUATION_NODE_TYPE, EVALUATION_TRIGGER_NODE_TYPE } from 'n8n-workflow'; +import { + EVALUATION_NODE_TYPE, + EVALUATION_TRIGGER_NODE_TYPE, + NodeConnectionTypes, +} from 'n8n-workflow'; import type { IWorkflowBase, IRun, ExecutionError } from 'n8n-workflow'; import path from 'path'; @@ -1225,46 +1229,45 @@ describe('TestRunnerService', () => { } }); - it('should fail for version >= 4.7 with missing metric parameter', () => { + it('should pass for version >= 4.7 with missing metric parameter (uses default correctness) when model connected', () => { const workflow = mock({ nodes: [ + { + id: 'model1', + name: 'OpenAI Model', + type: '@n8n/n8n-nodes-langchain.lmChatOpenAi', + typeVersion: 1, + position: [0, 0], + parameters: {}, + }, { id: 'node1', name: 'Set Metrics', type: EVALUATION_NODE_TYPE, typeVersion: 4.7, - position: [0, 0], + position: [100, 0], parameters: { operation: 'setMetrics', - metrics: { - assignments: [ - { - id: '1', - name: 'accuracy', - value: 0.95, - }, - ], - }, + // metric parameter is undefined, which means it uses the default value 'correctness' + // This should pass since correctness is valid and has model connected }, }, ], - connections: {}, + connections: { + 'OpenAI Model': { + [NodeConnectionTypes.AiLanguageModel]: [ + [{ node: 'Set Metrics', type: 'ai_languageModel', index: 0 }], + ], + }, + }, }); - // Missing metric parameter - this should fail for versions >= 4.7 - workflow.nodes[0].parameters.metric = undefined; + // Missing metric parameter - this should pass for versions >= 4.7 since it defaults to 'correctness' and has model + workflow.nodes[1].parameters.metric = undefined; expect(() => { (testRunnerService as any).validateSetMetricsNodes(workflow); - }).toThrow(TestRunError); - - try { - (testRunnerService as any).validateSetMetricsNodes(workflow); - } catch (error) { - expect(error).toBeInstanceOf(TestRunError); - expect(error.code).toBe('SET_METRICS_NODE_NOT_CONFIGURED'); - expect(error.extra).toEqual({ node_name: 'Set Metrics' }); - } + }).not.toThrow(); }); it('should pass for version >= 4.7 with valid customMetrics configuration', () => { @@ -1299,7 +1302,7 @@ describe('TestRunnerService', () => { }).not.toThrow(); }); - it('should pass for version >= 4.7 with non-customMetrics metric (no metrics validation needed)', () => { + it('should pass for version >= 4.7 with non-AI metric (no model connection needed)', () => { const workflow = mock({ nodes: [ { @@ -1310,8 +1313,8 @@ describe('TestRunnerService', () => { position: [0, 0], parameters: { operation: 'setMetrics', - metric: 'correctness', - // No metrics parameter needed for non-customMetrics + metric: 'stringSimilarity', + // Non-AI metrics don't need model connection }, }, ], @@ -1360,12 +1363,20 @@ describe('TestRunnerService', () => { it('should handle mixed versions correctly', () => { const workflow = mock({ nodes: [ + { + id: 'model1', + name: 'OpenAI Model', + type: '@n8n/n8n-nodes-langchain.lmChatOpenAi', + typeVersion: 1, + position: [0, 0], + parameters: {}, + }, { id: 'node1', name: 'Set Metrics Old', type: EVALUATION_NODE_TYPE, typeVersion: 4.6, - position: [0, 0], + position: [100, 0], parameters: { operation: 'setMetrics', // No metric parameter for old version @@ -1385,21 +1396,227 @@ describe('TestRunnerService', () => { name: 'Set Metrics New', type: EVALUATION_NODE_TYPE, typeVersion: 4.7, - position: [100, 0], + position: [200, 0], parameters: { operation: 'setMetrics', metric: 'correctness', - // No metrics parameter needed for non-customMetrics + // Correctness needs model connection for version 4.7+ }, }, ], - connections: {}, + connections: { + 'OpenAI Model': { + [NodeConnectionTypes.AiLanguageModel]: [ + [{ node: 'Set Metrics New', type: 'ai_languageModel', index: 0 }], + ], + }, + }, }); expect(() => { (testRunnerService as any).validateSetMetricsNodes(workflow); }).not.toThrow(); }); + + describe('Model connection validation', () => { + it('should pass when correctness metric has model connected', () => { + const workflow = mock({ + nodes: [ + { + id: 'model1', + name: 'OpenAI Model', + type: '@n8n/n8n-nodes-langchain.lmChatOpenAi', + typeVersion: 1, + position: [0, 0], + parameters: {}, + }, + { + id: 'metrics1', + name: 'Set Metrics', + type: EVALUATION_NODE_TYPE, + typeVersion: 4.7, + position: [100, 0], + parameters: { + operation: 'setMetrics', + metric: 'correctness', + }, + }, + ], + connections: { + 'OpenAI Model': { + [NodeConnectionTypes.AiLanguageModel]: [ + [{ node: 'Set Metrics', type: 'ai_languageModel', index: 0 }], + ], + }, + }, + }); + + expect(() => { + (testRunnerService as any).validateSetMetricsNodes(workflow); + }).not.toThrow(); + }); + + it('should fail when correctness metric has no model connected', () => { + const workflow = mock({ + nodes: [ + { + id: 'metrics1', + name: 'Set Metrics', + type: EVALUATION_NODE_TYPE, + typeVersion: 4.7, + position: [0, 0], + parameters: { + operation: 'setMetrics', + metric: 'correctness', + }, + }, + ], + connections: {}, + }); + + expect(() => { + (testRunnerService as any).validateSetMetricsNodes(workflow); + }).toThrow(TestRunError); + }); + + it('should pass when helpfulness metric has model connected', () => { + const workflow = mock({ + nodes: [ + { + id: 'model1', + name: 'OpenAI Model', + type: '@n8n/n8n-nodes-langchain.lmChatOpenAi', + typeVersion: 1, + position: [0, 0], + parameters: {}, + }, + { + id: 'metrics1', + name: 'Set Metrics', + type: EVALUATION_NODE_TYPE, + typeVersion: 4.7, + position: [100, 0], + parameters: { + operation: 'setMetrics', + metric: 'helpfulness', + }, + }, + ], + connections: { + 'OpenAI Model': { + [NodeConnectionTypes.AiLanguageModel]: [ + [{ node: 'Set Metrics', type: 'ai_languageModel', index: 0 }], + ], + }, + }, + }); + + expect(() => { + (testRunnerService as any).validateSetMetricsNodes(workflow); + }).not.toThrow(); + }); + + it('should fail when helpfulness metric has no model connected', () => { + const workflow = mock({ + nodes: [ + { + id: 'metrics1', + name: 'Set Metrics', + type: EVALUATION_NODE_TYPE, + typeVersion: 4.7, + position: [0, 0], + parameters: { + operation: 'setMetrics', + metric: 'helpfulness', + }, + }, + ], + connections: {}, + }); + + expect(() => { + (testRunnerService as any).validateSetMetricsNodes(workflow); + }).toThrow(TestRunError); + }); + + it('should fail when default correctness metric (undefined) has no model connected', () => { + const workflow = mock({ + nodes: [ + { + id: 'metrics1', + name: 'Set Metrics', + type: EVALUATION_NODE_TYPE, + typeVersion: 4.7, + position: [0, 0], + parameters: { + operation: 'setMetrics', + metric: undefined, // explicitly set to undefined to test default behavior + }, + }, + ], + connections: {}, + }); + + expect(() => { + (testRunnerService as any).validateSetMetricsNodes(workflow); + }).toThrow(TestRunError); + }); + + it('should pass when non-AI metrics (customMetrics) have no model connected', () => { + const workflow = mock({ + nodes: [ + { + id: 'metrics1', + name: 'Set Metrics', + type: EVALUATION_NODE_TYPE, + typeVersion: 4.7, + position: [0, 0], + parameters: { + operation: 'setMetrics', + metric: 'customMetrics', + metrics: { + assignments: [ + { + id: '1', + name: 'accuracy', + value: 0.95, + }, + ], + }, + }, + }, + ], + connections: {}, + }); + + expect(() => { + (testRunnerService as any).validateSetMetricsNodes(workflow); + }).not.toThrow(); + }); + + it('should pass when stringSimilarity metric has no model connected', () => { + const workflow = mock({ + nodes: [ + { + id: 'metrics1', + name: 'Set Metrics', + type: EVALUATION_NODE_TYPE, + typeVersion: 4.7, + position: [0, 0], + parameters: { + operation: 'setMetrics', + metric: 'stringSimilarity', + }, + }, + ], + connections: {}, + }); + + expect(() => { + (testRunnerService as any).validateSetMetricsNodes(workflow); + }).not.toThrow(); + }); + }); }); }); diff --git a/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts b/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts index 8c79d7af41..68d1b605f4 100644 --- a/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts +++ b/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts @@ -8,6 +8,8 @@ import { EVALUATION_TRIGGER_NODE_TYPE, ExecutionCancelledError, NodeConnectionTypes, + metricRequiresModelConnection, + DEFAULT_EVALUATION_METRIC, } from 'n8n-workflow'; import type { IDataObject, @@ -102,6 +104,16 @@ export class TestRunnerService { * Checks if the Evaluation Set Metrics nodes are present in the workflow * and are configured correctly. */ + private hasModelNodeConnected(workflow: IWorkflowBase, targetNodeName: string): boolean { + // Check if there's a node connected to the target node via ai_languageModel connection type + return Object.keys(workflow.connections).some((sourceNodeName) => { + const connections = workflow.connections[sourceNodeName]; + return connections?.[NodeConnectionTypes.AiLanguageModel]?.[0]?.some( + (connection) => connection.node === targetNodeName, + ); + }); + } + private validateSetMetricsNodes(workflow: IWorkflowBase) { const metricsNodes = TestRunnerService.getEvaluationMetricsNodes(workflow); if (metricsNodes.length === 0) { @@ -113,11 +125,6 @@ export class TestRunnerService { return true; } - // For versions 4.7+, check if metric parameter is missing - if (node.typeVersion >= 4.7 && !node.parameters.metric) { - return true; - } - // Check customMetrics configuration if: // - Version 4.7+ and metric is 'customMetrics' // - Version < 4.7 (customMetrics is default) @@ -134,6 +141,17 @@ export class TestRunnerService { ); } + // For version 4.7+, check if AI-based metrics require model connection + if (node.typeVersion >= 4.7) { + const metric = (node.parameters.metric ?? DEFAULT_EVALUATION_METRIC) as string; + if ( + metricRequiresModelConnection(metric) && // See packages/workflow/src/evaluation-helpers.ts + !this.hasModelNodeConnected(workflow, node.name) + ) { + return true; + } + } + return false; }); diff --git a/packages/nodes-base/nodes/Evaluation/Evaluation/Description.node.ts b/packages/nodes-base/nodes/Evaluation/Evaluation/Description.node.ts index b6aceebdce..385aa01a4d 100644 --- a/packages/nodes-base/nodes/Evaluation/Evaluation/Description.node.ts +++ b/packages/nodes-base/nodes/Evaluation/Evaluation/Description.node.ts @@ -1,4 +1,5 @@ import type { INodeProperties } from 'n8n-workflow'; +import { DEFAULT_EVALUATION_METRIC } from 'n8n-workflow'; import { CORRECTNESS_PROMPT, @@ -386,7 +387,7 @@ export const setMetricsProperties: INodeProperties[] = [ description: 'Define your own metric(s)', }, ], - default: 'correctness', + default: DEFAULT_EVALUATION_METRIC, displayOptions: { show: { operation: ['setMetrics'], diff --git a/packages/nodes-base/nodes/Evaluation/Evaluation/Evaluation.node.ee.ts b/packages/nodes-base/nodes/Evaluation/Evaluation/Evaluation.node.ee.ts index af2e39af3e..e89e3ff41b 100644 --- a/packages/nodes-base/nodes/Evaluation/Evaluation/Evaluation.node.ee.ts +++ b/packages/nodes-base/nodes/Evaluation/Evaluation/Evaluation.node.ee.ts @@ -22,6 +22,7 @@ import { setOutputs, setInputs, } from '../utils/evaluationUtils'; +import { metricRequiresModelConnection } from 'n8n-workflow'; // See packages/workflow/src/evaluation-helpers.ts export class Evaluation implements INodeType { description: INodeTypeDescription = { @@ -37,7 +38,8 @@ export class Evaluation implements INodeType { name: 'Evaluation', color: '#c3c9d5', }, - inputs: `={{(${getInputConnectionTypes})($parameter)}}`, + // Pass function explicitly since expression context doesn't allow imports in getInputConnectionTypes + inputs: `={{(${getInputConnectionTypes})($parameter, ${metricRequiresModelConnection})}}`, outputs: `={{(${getOutputConnectionTypes})($parameter)}}`, codex: { alias: ['Test', 'Metrics', 'Evals', 'Set Output', 'Set Metrics'], diff --git a/packages/nodes-base/nodes/Evaluation/utils/evaluationUtils.ts b/packages/nodes-base/nodes/Evaluation/utils/evaluationUtils.ts index a46af3e19b..8275abd69a 100644 --- a/packages/nodes-base/nodes/Evaluation/utils/evaluationUtils.ts +++ b/packages/nodes-base/nodes/Evaluation/utils/evaluationUtils.ts @@ -240,10 +240,13 @@ export function getOutputConnectionTypes(parameters: INodeParameters) { return [{ type: 'main' }]; } -export function getInputConnectionTypes(parameters: INodeParameters) { +export function getInputConnectionTypes( + parameters: INodeParameters, + metricRequiresModelConnectionFn: (metric: string) => boolean, +) { if ( parameters.operation === 'setMetrics' && - ['correctness', 'helpfulness'].includes(parameters.metric as string) + metricRequiresModelConnectionFn(parameters.metric as string) ) { return [ { type: 'main' }, diff --git a/packages/workflow/src/evaluation-helpers.ts b/packages/workflow/src/evaluation-helpers.ts new file mode 100644 index 0000000000..fb3a064fe6 --- /dev/null +++ b/packages/workflow/src/evaluation-helpers.ts @@ -0,0 +1,25 @@ +/** + * Evaluation-related utility functions + * + * This file contains utilities that need to be shared between different packages + * to avoid circular dependencies. For example, the evaluation test-runner (in CLI package) + * and the Evaluation node (in nodes-base package) both need to know which metrics + * require AI model connections, but they can't import from each other directly. + * + * By placing shared utilities here in the workflow package (which both packages depend on), + * we avoid circular dependency issues. + */ + +/** + * Default metric type used in evaluations + */ +export const DEFAULT_EVALUATION_METRIC = 'correctness'; + +/** + * Determines if a given evaluation metric requires an AI model connection + * @param metric The metric name to check + * @returns true if the metric requires an AI model connection + */ +export function metricRequiresModelConnection(metric: string): boolean { + return ['correctness', 'helpfulness'].includes(metric); +} diff --git a/packages/workflow/src/index.ts b/packages/workflow/src/index.ts index 4be0cbb82a..9676365ee0 100644 --- a/packages/workflow/src/index.ts +++ b/packages/workflow/src/index.ts @@ -67,6 +67,7 @@ export { ExpressionExtensions } from './extensions'; export * as ExpressionParser from './extensions/expression-parser'; export { NativeMethods } from './native-methods'; export * from './node-parameters/filter-parameter'; +export * from './evaluation-helpers'; export type { DocMetadata,