feat(editor): Evaluations frontend (no-changelog) (#15550)

Co-authored-by: Yiorgis Gozadinos <yiorgis@n8n.io> Co-authored-by: JP van Oosten <jp@n8n.io> Co-authored-by: Giulio Andreini <g.andreini@gmail.com> Co-authored-by: Michael Kret <michael.k@radency.com>
2025-12-17 01:56:46 +00:00 · 2025-05-26 12:26:28 +02:00
parent 3ee15a8331
commit ca8f087a47
87 changed files with 3460 additions and 5103 deletions
--- a/packages/cli/src/constants.ts
+++ b/packages/cli/src/constants.ts
@@ -157,6 +157,3 @@ export const WsStatusCodes = {
 } as const;

 export const FREE_AI_CREDITS_CREDENTIAL_NAME = 'n8n free OpenAI API credits';
-
-export const EVALUATION_NODE = `${NODE_PACKAGE_PREFIX}base.evaluation`;
-export const EVALUATION_DATASET_TRIGGER_NODE = `${NODE_PACKAGE_PREFIX}base.evaluationTrigger`;
--- a/packages/cli/src/controllers/e2e.controller.ts
+++ b/packages/cli/src/controllers/e2e.controller.ts
@@ -117,6 +117,7 @@ export class E2EController {
 		[LICENSE_QUOTAS.INSIGHTS_MAX_HISTORY_DAYS]: 7,
 		[LICENSE_QUOTAS.INSIGHTS_RETENTION_MAX_AGE_DAYS]: 30,
 		[LICENSE_QUOTAS.INSIGHTS_RETENTION_PRUNE_INTERVAL_DAYS]: 180,
+		[LICENSE_QUOTAS.WORKFLOWS_WITH_EVALUATION_LIMIT]: 1,
 	};

 	private numericFeatures: Record<NumericLicenseFeature, number> = {
@@ -137,6 +138,8 @@ export class E2EController {
 			E2EController.numericFeaturesDefaults[LICENSE_QUOTAS.INSIGHTS_RETENTION_MAX_AGE_DAYS],
 		[LICENSE_QUOTAS.INSIGHTS_RETENTION_PRUNE_INTERVAL_DAYS]:
 			E2EController.numericFeaturesDefaults[LICENSE_QUOTAS.INSIGHTS_RETENTION_PRUNE_INTERVAL_DAYS],
+		[LICENSE_QUOTAS.WORKFLOWS_WITH_EVALUATION_LIMIT]:
+			E2EController.numericFeaturesDefaults[LICENSE_QUOTAS.WORKFLOWS_WITH_EVALUATION_LIMIT],
 	};

 	constructor(
--- a/packages/cli/src/evaluation.ee/test-runner/tests/test-runner.service.ee.test.ts
+++ b/packages/cli/src/evaluation.ee/test-runner/tests/test-runner.service.ee.test.ts
@@ -5,12 +5,12 @@ import type { WorkflowRepository } from '@n8n/db';
 import { readFileSync } from 'fs';
 import { mock } from 'jest-mock-extended';
 import type { ErrorReporter } from 'n8n-core';
+import { EVALUATION_NODE_TYPE, EVALUATION_TRIGGER_NODE_TYPE } from 'n8n-workflow';
 import type { IWorkflowBase } from 'n8n-workflow';
-import type { IRun } from 'n8n-workflow';
+import type { IRun, ExecutionError } from 'n8n-workflow';
 import path from 'path';

 import type { ActiveExecutions } from '@/active-executions';
-import { EVALUATION_DATASET_TRIGGER_NODE } from '@/constants';
 import { TestRunError } from '@/evaluation.ee/test-runner/errors.ee';
 import { LoadNodesAndCredentials } from '@/load-nodes-and-credentials';
 import type { Telemetry } from '@/telemetry';
@@ -59,7 +59,7 @@ describe('TestRunnerService', () => {
 		jest.resetAllMocks();
 	});

-	describe('findTriggerNode', () => {
+	describe('findEvaluationTriggerNode', () => {
 		test('should find the trigger node in a workflow', () => {
 			// Setup a test workflow with a trigger node
 			const workflowWithTrigger = mock<IWorkflowBase>({
@@ -67,7 +67,7 @@ describe('TestRunnerService', () => {
 					{
 						id: 'node1',
 						name: 'Dataset Trigger',
-						type: EVALUATION_DATASET_TRIGGER_NODE,
+						type: EVALUATION_TRIGGER_NODE_TYPE,
 						typeVersion: 1,
 						position: [0, 0],
 						parameters: {},
@@ -85,11 +85,11 @@ describe('TestRunnerService', () => {
 			});

 			// Use the protected method via any type casting
-			const result = (testRunnerService as any).findTriggerNode(workflowWithTrigger);
+			const result = (testRunnerService as any).findEvaluationTriggerNode(workflowWithTrigger);

 			// Assert the result is the correct node
 			expect(result).toBeDefined();
-			expect(result.type).toBe(EVALUATION_DATASET_TRIGGER_NODE);
+			expect(result.type).toBe(EVALUATION_TRIGGER_NODE_TYPE);
 			expect(result.name).toBe('Dataset Trigger');
 		});

@@ -118,16 +118,16 @@ describe('TestRunnerService', () => {
 			});

 			// Call the function and expect undefined result
-			const result = (testRunnerService as any).findTriggerNode(workflowWithoutTrigger);
+			const result = (testRunnerService as any).findEvaluationTriggerNode(workflowWithoutTrigger);
 			expect(result).toBeUndefined();
 		});

 		test('should work with the actual workflow.under-test.json', () => {
-			const result = (testRunnerService as any).findTriggerNode(wfUnderTestJson);
+			const result = (testRunnerService as any).findEvaluationTriggerNode(wfUnderTestJson);

 			// Assert the result is the correct node
 			expect(result).toBeDefined();
-			expect(result.type).toBe(EVALUATION_DATASET_TRIGGER_NODE);
+			expect(result.type).toBe(EVALUATION_TRIGGER_NODE_TYPE);
 			expect(result.name).toBe('When fetching a dataset row');
 		});
 	});
@@ -140,7 +140,7 @@ describe('TestRunnerService', () => {
 					{
 						id: 'triggerNodeId',
 						name: 'TriggerNode',
-						type: EVALUATION_DATASET_TRIGGER_NODE,
+						type: EVALUATION_TRIGGER_NODE_TYPE,
 						typeVersion: 1,
 						position: [0, 0],
 						parameters: {},
@@ -164,6 +164,7 @@ describe('TestRunnerService', () => {
 									data: {
 										main: [mockOutputItems],
 									},
+									error: undefined,
 								},
 							],
 						},
@@ -185,7 +186,7 @@ describe('TestRunnerService', () => {
 					{
 						id: 'triggerNodeId',
 						name: 'TriggerNode',
-						type: EVALUATION_DATASET_TRIGGER_NODE,
+						type: EVALUATION_TRIGGER_NODE_TYPE,
 						typeVersion: 1,
 						position: [0, 0],
 						parameters: {},
@@ -217,6 +218,51 @@ describe('TestRunnerService', () => {
 			}
 		});

+		test('should throw an error if evaluation trigger could not fetch data', () => {
+			// Create workflow with a trigger node
+			const workflow = mock<IWorkflowBase>({
+				nodes: [
+					{
+						id: 'triggerNodeId',
+						name: 'TriggerNode',
+						type: EVALUATION_TRIGGER_NODE_TYPE,
+						typeVersion: 1,
+						position: [0, 0],
+						parameters: {},
+					},
+				],
+				connections: {},
+			});
+
+			// Create execution data with missing output
+			const execution = mock<IRun>({
+				data: {
+					resultData: {
+						runData: {
+							TriggerNode: [
+								{
+									error: mock<ExecutionError>(),
+								},
+							],
+						},
+					},
+				},
+			});
+
+			// Expect the method to throw an error
+			expect(() => {
+				(testRunnerService as any).extractDatasetTriggerOutput(execution, workflow);
+			}).toThrow(TestRunError);
+
+			// Verify the error has the correct code
+			try {
+				(testRunnerService as any).extractDatasetTriggerOutput(execution, workflow);
+			} catch (error) {
+				expect(error).toBeInstanceOf(TestRunError);
+				expect(error.code).toBe('CANT_FETCH_TEST_CASES');
+			}
+		});
+
 		test('should throw an error if trigger node output is empty list', () => {
 			// Create workflow with a trigger node
 			const workflow = mock<IWorkflowBase>({
@@ -224,7 +270,7 @@ describe('TestRunnerService', () => {
 					{
 						id: 'triggerNodeId',
 						name: 'TriggerNode',
-						type: EVALUATION_DATASET_TRIGGER_NODE,
+						type: EVALUATION_TRIGGER_NODE_TYPE,
 						typeVersion: 1,
 						position: [0, 0],
 						parameters: {},
@@ -243,6 +289,7 @@ describe('TestRunnerService', () => {
 									data: {
 										main: [[]], // Empty list
 									},
+									error: undefined,
 								},
 							],
 						},
@@ -271,7 +318,7 @@ describe('TestRunnerService', () => {
 					{
 						id: 'triggerNodeId',
 						name: "When clicking 'Execute workflow'",
-						type: EVALUATION_DATASET_TRIGGER_NODE,
+						type: EVALUATION_TRIGGER_NODE_TYPE,
 						typeVersion: 1,
 						position: [0, 0],
 						parameters: {},
@@ -297,6 +344,7 @@ describe('TestRunnerService', () => {
 									data: {
 										main: [expectedItems],
 									},
+									error: undefined,
 								},
 							],
 						},
@@ -374,7 +422,7 @@ describe('TestRunnerService', () => {
 					{
 						id: 'node1',
 						name: triggerNodeName,
-						type: EVALUATION_DATASET_TRIGGER_NODE,
+						type: EVALUATION_TRIGGER_NODE_TYPE,
 						typeVersion: 1,
 						position: [0, 0],
 						parameters: {},
@@ -427,7 +475,7 @@ describe('TestRunnerService', () => {
 					{
 						id: 'node1',
 						name: triggerNodeName,
-						type: EVALUATION_DATASET_TRIGGER_NODE,
+						type: EVALUATION_TRIGGER_NODE_TYPE,
 						typeVersion: 1,
 						position: [0, 0],
 						parameters: {},
@@ -531,7 +579,7 @@ describe('TestRunnerService', () => {
 					{
 						id: 'node1',
 						name: triggerNodeName,
-						type: EVALUATION_DATASET_TRIGGER_NODE,
+						type: EVALUATION_TRIGGER_NODE_TYPE,
 						typeVersion: 1,
 						position: [0, 0],
 						parameters: {},
@@ -586,7 +634,7 @@ describe('TestRunnerService', () => {
 					{
 						id: 'node1',
 						name: triggerNodeName,
-						type: EVALUATION_DATASET_TRIGGER_NODE,
+						type: EVALUATION_TRIGGER_NODE_TYPE,
 						typeVersion: 1,
 						position: [0, 0],
 						parameters: {},
@@ -632,4 +680,554 @@ describe('TestRunnerService', () => {
 			}
 		});
 	});
+
+	describe('validateSetMetricsNodes', () => {
+		it('should pass when metrics nodes are properly configured', () => {
+			const workflow = mock<IWorkflowBase>({
+				nodes: [
+					{
+						id: 'node1',
+						name: 'Set Metrics',
+						type: EVALUATION_NODE_TYPE,
+						typeVersion: 1,
+						position: [0, 0],
+						parameters: {
+							operation: 'setMetrics',
+							metrics: {
+								assignments: [
+									{
+										id: '1',
+										name: 'accuracy',
+										value: 0.95,
+									},
+									{
+										id: '2',
+										name: 'precision',
+										value: 0.87,
+									},
+								],
+							},
+						},
+					},
+				],
+				connections: {},
+			});
+
+			expect(() => {
+				(testRunnerService as any).validateSetMetricsNodes(workflow);
+			}).not.toThrow();
+		});
+
+		it('should throw SET_METRICS_NODE_NOT_FOUND when no metrics nodes exist', () => {
+			const workflow = mock<IWorkflowBase>({
+				nodes: [
+					{
+						id: 'node1',
+						name: 'Regular Node',
+						type: 'n8n-nodes-base.noOp',
+						typeVersion: 1,
+						position: [0, 0],
+						parameters: {},
+					},
+				],
+				connections: {},
+			});
+
+			expect(() => {
+				(testRunnerService as any).validateSetMetricsNodes(workflow);
+			}).toThrow(TestRunError);
+
+			try {
+				(testRunnerService as any).validateSetMetricsNodes(workflow);
+			} catch (error) {
+				expect(error).toBeInstanceOf(TestRunError);
+				expect(error.code).toBe('SET_METRICS_NODE_NOT_FOUND');
+			}
+		});
+
+		it('should throw SET_METRICS_NODE_NOT_CONFIGURED when metrics node has no parameters', () => {
+			const workflow = mock<IWorkflowBase>({
+				nodes: [
+					{
+						id: 'node1',
+						name: 'Set Metrics',
+						type: EVALUATION_NODE_TYPE,
+						typeVersion: 1,
+						position: [0, 0],
+						parameters: {
+							operation: 'setMetrics',
+							metrics: undefined,
+						},
+					},
+				],
+				connections: {},
+			});
+
+			expect(() => {
+				(testRunnerService as any).validateSetMetricsNodes(workflow);
+			}).toThrow(TestRunError);
+
+			try {
+				(testRunnerService as any).validateSetMetricsNodes(workflow);
+			} catch (error) {
+				expect(error).toBeInstanceOf(TestRunError);
+				expect(error.code).toBe('SET_METRICS_NODE_NOT_CONFIGURED');
+				expect(error.extra).toEqual({ node_name: 'Set Metrics' });
+			}
+		});
+
+		it('should throw SET_METRICS_NODE_NOT_CONFIGURED when metrics node has empty assignments', () => {
+			const workflow = mock<IWorkflowBase>({
+				nodes: [
+					{
+						id: 'node1',
+						name: 'Set Metrics',
+						type: EVALUATION_NODE_TYPE,
+						typeVersion: 1,
+						position: [0, 0],
+						parameters: {
+							operation: 'setMetrics',
+							metrics: {
+								assignments: [],
+							},
+						},
+					},
+				],
+				connections: {},
+			});
+
+			expect(() => {
+				(testRunnerService as any).validateSetMetricsNodes(workflow);
+			}).toThrow(TestRunError);
+
+			try {
+				(testRunnerService as any).validateSetMetricsNodes(workflow);
+			} catch (error) {
+				expect(error).toBeInstanceOf(TestRunError);
+				expect(error.code).toBe('SET_METRICS_NODE_NOT_CONFIGURED');
+				expect(error.extra).toEqual({ node_name: 'Set Metrics' });
+			}
+		});
+
+		it('should throw SET_METRICS_NODE_NOT_CONFIGURED when assignment has no name', () => {
+			const workflow = mock<IWorkflowBase>({
+				nodes: [
+					{
+						id: 'node1',
+						name: 'Set Metrics',
+						type: EVALUATION_NODE_TYPE,
+						typeVersion: 1,
+						position: [0, 0],
+						parameters: {
+							operation: 'setMetrics',
+							metrics: {
+								assignments: [
+									{
+										id: '1',
+										name: '',
+										value: 0.95,
+									},
+								],
+							},
+						},
+					},
+				],
+				connections: {},
+			});
+
+			expect(() => {
+				(testRunnerService as any).validateSetMetricsNodes(workflow);
+			}).toThrow(TestRunError);
+
+			try {
+				(testRunnerService as any).validateSetMetricsNodes(workflow);
+			} catch (error) {
+				expect(error).toBeInstanceOf(TestRunError);
+				expect(error.code).toBe('SET_METRICS_NODE_NOT_CONFIGURED');
+				expect(error.extra).toEqual({ node_name: 'Set Metrics' });
+			}
+		});
+
+		it('should throw SET_METRICS_NODE_NOT_CONFIGURED when assignment has null value', () => {
+			const workflow = mock<IWorkflowBase>({
+				nodes: [
+					{
+						id: 'node1',
+						name: 'Set Metrics',
+						type: EVALUATION_NODE_TYPE,
+						typeVersion: 1,
+						position: [0, 0],
+						parameters: {
+							operation: 'setMetrics',
+							metrics: {
+								assignments: [
+									{
+										id: '1',
+										name: 'accuracy',
+										value: null,
+									},
+								],
+							},
+						},
+					},
+				],
+				connections: {},
+			});
+
+			expect(() => {
+				(testRunnerService as any).validateSetMetricsNodes(workflow);
+			}).toThrow(TestRunError);
+
+			try {
+				(testRunnerService as any).validateSetMetricsNodes(workflow);
+			} catch (error) {
+				expect(error).toBeInstanceOf(TestRunError);
+				expect(error.code).toBe('SET_METRICS_NODE_NOT_CONFIGURED');
+				expect(error.extra).toEqual({ node_name: 'Set Metrics' });
+			}
+		});
+
+		it('should validate multiple metrics nodes successfully', () => {
+			const workflow = mock<IWorkflowBase>({
+				nodes: [
+					{
+						id: 'node1',
+						name: 'Set Metrics 1',
+						type: EVALUATION_NODE_TYPE,
+						typeVersion: 1,
+						position: [0, 0],
+						parameters: {
+							operation: 'setMetrics',
+							metrics: {
+								assignments: [
+									{
+										id: '1',
+										name: 'accuracy',
+										value: 0.95,
+									},
+								],
+							},
+						},
+					},
+					{
+						id: 'node2',
+						name: 'Set Metrics 2',
+						type: EVALUATION_NODE_TYPE,
+						typeVersion: 1,
+						position: [100, 0],
+						parameters: {
+							operation: 'setMetrics',
+							metrics: {
+								assignments: [
+									{
+										id: '2',
+										name: 'precision',
+										value: 0.87,
+									},
+								],
+							},
+						},
+					},
+				],
+				connections: {},
+			});
+
+			expect(() => {
+				(testRunnerService as any).validateSetMetricsNodes(workflow);
+			}).not.toThrow();
+		});
+	});
+
+	describe('validateSetOutputsNodes', () => {
+		it('should pass when outputs nodes are properly configured', () => {
+			const workflow = mock<IWorkflowBase>({
+				nodes: [
+					{
+						id: 'node1',
+						name: 'Set Outputs',
+						type: EVALUATION_NODE_TYPE,
+						typeVersion: 1,
+						position: [0, 0],
+						parameters: {
+							operation: 'setOutputs',
+							outputs: {
+								assignments: [
+									{
+										id: '1',
+										name: 'result',
+										value: 'success',
+									},
+									{
+										id: '2',
+										name: 'score',
+										value: 95,
+									},
+								],
+							},
+						},
+					},
+				],
+				connections: {},
+			});
+
+			expect(() => {
+				(testRunnerService as any).validateSetOutputsNodes(workflow);
+			}).not.toThrow();
+		});
+
+		it('should pass when operation is default (undefined)', () => {
+			const workflow = mock<IWorkflowBase>({
+				nodes: [
+					{
+						id: 'node1',
+						name: 'Set Outputs',
+						type: EVALUATION_NODE_TYPE,
+						typeVersion: 1,
+						position: [0, 0],
+						parameters: {
+							operation: undefined,
+							outputs: {
+								assignments: [
+									{
+										id: '1',
+										name: 'result',
+										value: 'success',
+									},
+									{
+										id: '2',
+										name: 'score',
+										value: 95,
+									},
+								],
+							},
+						},
+					},
+				],
+				connections: {},
+			});
+
+			expect(() => {
+				(testRunnerService as any).validateSetOutputsNodes(workflow);
+			}).not.toThrow();
+		});
+
+		it('should throw SET_OUTPUTS_NODE_NOT_FOUND when no outputs nodes exist', () => {
+			const workflow = mock<IWorkflowBase>({
+				nodes: [
+					{
+						id: 'node1',
+						name: 'Regular Node',
+						type: 'n8n-nodes-base.noOp',
+						typeVersion: 1,
+						position: [0, 0],
+						parameters: {},
+					},
+				],
+				connections: {},
+			});
+
+			expect(() => {
+				(testRunnerService as any).validateSetOutputsNodes(workflow);
+			}).toThrow(TestRunError);
+
+			try {
+				(testRunnerService as any).validateSetOutputsNodes(workflow);
+			} catch (error) {
+				expect(error).toBeInstanceOf(TestRunError);
+				expect(error.code).toBe('SET_OUTPUTS_NODE_NOT_FOUND');
+			}
+		});
+
+		it('should throw SET_OUTPUTS_NODE_NOT_CONFIGURED when outputs node has no parameters', () => {
+			const workflow = mock<IWorkflowBase>({
+				nodes: [
+					{
+						id: 'node1',
+						name: 'Set Outputs',
+						type: EVALUATION_NODE_TYPE,
+						typeVersion: 1,
+						position: [0, 0],
+						parameters: {
+							operation: 'setOutputs',
+							outputs: undefined,
+						},
+					},
+				],
+				connections: {},
+			});
+
+			expect(() => {
+				(testRunnerService as any).validateSetOutputsNodes(workflow);
+			}).toThrow(TestRunError);
+
+			try {
+				(testRunnerService as any).validateSetOutputsNodes(workflow);
+			} catch (error) {
+				expect(error).toBeInstanceOf(TestRunError);
+				expect(error.code).toBe('SET_OUTPUTS_NODE_NOT_CONFIGURED');
+				expect(error.extra).toEqual({ node_name: 'Set Outputs' });
+			}
+		});
+
+		it('should throw SET_OUTPUTS_NODE_NOT_CONFIGURED when outputs node has empty assignments', () => {
+			const workflow = mock<IWorkflowBase>({
+				nodes: [
+					{
+						id: 'node1',
+						name: 'Set Outputs',
+						type: EVALUATION_NODE_TYPE,
+						typeVersion: 1,
+						position: [0, 0],
+						parameters: {
+							operation: 'setOutputs',
+							outputs: {
+								assignments: [],
+							},
+						},
+					},
+				],
+				connections: {},
+			});
+
+			expect(() => {
+				(testRunnerService as any).validateSetOutputsNodes(workflow);
+			}).toThrow(TestRunError);
+
+			try {
+				(testRunnerService as any).validateSetOutputsNodes(workflow);
+			} catch (error) {
+				expect(error).toBeInstanceOf(TestRunError);
+				expect(error.code).toBe('SET_OUTPUTS_NODE_NOT_CONFIGURED');
+				expect(error.extra).toEqual({ node_name: 'Set Outputs' });
+			}
+		});
+
+		it('should throw SET_OUTPUTS_NODE_NOT_CONFIGURED when assignment has no name', () => {
+			const workflow = mock<IWorkflowBase>({
+				nodes: [
+					{
+						id: 'node1',
+						name: 'Set Outputs',
+						type: EVALUATION_NODE_TYPE,
+						typeVersion: 1,
+						position: [0, 0],
+						parameters: {
+							operation: 'setOutputs',
+							outputs: {
+								assignments: [
+									{
+										id: '1',
+										name: '',
+										value: 'result',
+									},
+								],
+							},
+						},
+					},
+				],
+				connections: {},
+			});
+
+			expect(() => {
+				(testRunnerService as any).validateSetOutputsNodes(workflow);
+			}).toThrow(TestRunError);
+
+			try {
+				(testRunnerService as any).validateSetOutputsNodes(workflow);
+			} catch (error) {
+				expect(error).toBeInstanceOf(TestRunError);
+				expect(error.code).toBe('SET_OUTPUTS_NODE_NOT_CONFIGURED');
+				expect(error.extra).toEqual({ node_name: 'Set Outputs' });
+			}
+		});
+
+		it('should throw SET_OUTPUTS_NODE_NOT_CONFIGURED when assignment has null value', () => {
+			const workflow = mock<IWorkflowBase>({
+				nodes: [
+					{
+						id: 'node1',
+						name: 'Set Outputs',
+						type: EVALUATION_NODE_TYPE,
+						typeVersion: 1,
+						position: [0, 0],
+						parameters: {
+							operation: 'setOutputs',
+							outputs: {
+								assignments: [
+									{
+										id: '1',
+										name: 'result',
+										value: null,
+									},
+								],
+							},
+						},
+					},
+				],
+				connections: {},
+			});
+
+			expect(() => {
+				(testRunnerService as any).validateSetOutputsNodes(workflow);
+			}).toThrow(TestRunError);
+
+			try {
+				(testRunnerService as any).validateSetOutputsNodes(workflow);
+			} catch (error) {
+				expect(error).toBeInstanceOf(TestRunError);
+				expect(error.code).toBe('SET_OUTPUTS_NODE_NOT_CONFIGURED');
+				expect(error.extra).toEqual({ node_name: 'Set Outputs' });
+			}
+		});
+
+		it('should validate multiple outputs nodes successfully', () => {
+			const workflow = mock<IWorkflowBase>({
+				nodes: [
+					{
+						id: 'node1',
+						name: 'Set Outputs 1',
+						type: EVALUATION_NODE_TYPE,
+						typeVersion: 1,
+						position: [0, 0],
+						parameters: {
+							operation: 'setOutputs',
+							outputs: {
+								assignments: [
+									{
+										id: '1',
+										name: 'result',
+										value: 'success',
+									},
+								],
+							},
+						},
+					},
+					{
+						id: 'node2',
+						name: 'Set Outputs 2',
+						type: EVALUATION_NODE_TYPE,
+						typeVersion: 1,
+						position: [100, 0],
+						parameters: {
+							operation: 'setOutputs',
+							outputs: {
+								assignments: [
+									{
+										id: '2',
+										name: 'score',
+										value: 95,
+									},
+								],
+							},
+						},
+					},
+				],
+				connections: {},
+			});
+
+			expect(() => {
+				(testRunnerService as any).validateSetOutputsNodes(workflow);
+			}).not.toThrow();
+		});
+	});
 });
--- a/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts
+++ b/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts
@@ -2,7 +2,11 @@ import type { User, TestRun } from '@n8n/db';
 import { TestCaseExecutionRepository, TestRunRepository, WorkflowRepository } from '@n8n/db';
 import { Service } from '@n8n/di';
 import { ErrorReporter, Logger } from 'n8n-core';
-import { ExecutionCancelledError } from 'n8n-workflow';
+import {
+	EVALUATION_NODE_TYPE,
+	EVALUATION_TRIGGER_NODE_TYPE,
+	ExecutionCancelledError,
+} from 'n8n-workflow';
 import type {
 	IDataObject,
 	IRun,
@@ -10,13 +14,14 @@ import type {
 	IWorkflowExecutionDataProcess,
 	IExecuteData,
 	INodeExecutionData,
+	AssignmentCollectionValue,
 } from 'n8n-workflow';
 import assert from 'node:assert';

 import { ActiveExecutions } from '@/active-executions';
 import config from '@/config';
-import { EVALUATION_DATASET_TRIGGER_NODE, EVALUATION_NODE } from '@/constants';
 import { TestCaseExecutionError, TestRunError } from '@/evaluation.ee/test-runner/errors.ee';
+import { checkNodeParameterNotEmpty } from '@/evaluation.ee/test-runner/utils.ee';
 import { Telemetry } from '@/telemetry';
 import { WorkflowRunner } from '@/workflow-runner';

@@ -59,8 +64,97 @@ export class TestRunnerService {
 	/**
 	 * Finds the dataset trigger node in the workflow
 	 */
-	private findTriggerNode(workflow: IWorkflowBase) {
-		return workflow.nodes.find((node) => node.type === EVALUATION_DATASET_TRIGGER_NODE);
+	private findEvaluationTriggerNode(workflow: IWorkflowBase) {
+		return workflow.nodes.find((node) => node.type === EVALUATION_TRIGGER_NODE_TYPE);
+	}
+
+	/**
+	 * Validates the evaluation trigger node is present in the workflow
+	 * and is configured correctly.
+	 */
+	private validateEvaluationTriggerNode(workflow: IWorkflowBase) {
+		const triggerNode = this.findEvaluationTriggerNode(workflow);
+		if (!triggerNode) {
+			throw new TestRunError('EVALUATION_TRIGGER_NOT_FOUND');
+		}
+
+		if (
+			!triggerNode.credentials ||
+			!checkNodeParameterNotEmpty(triggerNode.parameters?.documentId) ||
+			!checkNodeParameterNotEmpty(triggerNode.parameters?.sheetName)
+		) {
+			throw new TestRunError('EVALUATION_TRIGGER_NOT_CONFIGURED', { node_name: triggerNode.name });
+		}
+
+		if (triggerNode?.disabled) {
+			throw new TestRunError('EVALUATION_TRIGGER_DISABLED');
+		}
+	}
+
+	/**
+	 * Checks if the Evaluation Set Metrics nodes are present in the workflow
+	 * and are configured correctly.
+	 */
+	private validateSetMetricsNodes(workflow: IWorkflowBase) {
+		const metricsNodes = TestRunnerService.getEvaluationMetricsNodes(workflow);
+		if (metricsNodes.length === 0) {
+			throw new TestRunError('SET_METRICS_NODE_NOT_FOUND');
+		}
+
+		const unconfiguredMetricsNode = metricsNodes.find(
+			(node) =>
+				!node.parameters ||
+				!node.parameters.metrics ||
+				(node.parameters.metrics as AssignmentCollectionValue).assignments?.length === 0 ||
+				(node.parameters.metrics as AssignmentCollectionValue).assignments?.some(
+					(assignment) => !assignment.name || assignment.value === null,
+				),
+		);
+
+		if (unconfiguredMetricsNode) {
+			throw new TestRunError('SET_METRICS_NODE_NOT_CONFIGURED', {
+				node_name: unconfiguredMetricsNode.name,
+			});
+		}
+	}
+
+	/**
+	 * Checks if the Evaluation Set Outputs nodes are present in the workflow
+	 * and are configured correctly.
+	 */
+	private validateSetOutputsNodes(workflow: IWorkflowBase) {
+		const setOutputsNodes = TestRunnerService.getEvaluationSetOutputsNodes(workflow);
+		if (setOutputsNodes.length === 0) {
+			throw new TestRunError('SET_OUTPUTS_NODE_NOT_FOUND');
+		}
+
+		const unconfiguredSetOutputsNode = setOutputsNodes.find(
+			(node) =>
+				!node.parameters ||
+				!node.parameters.outputs ||
+				(node.parameters.outputs as AssignmentCollectionValue).assignments?.length === 0 ||
+				(node.parameters.outputs as AssignmentCollectionValue).assignments?.some(
+					(assignment) => !assignment.name || assignment.value === null,
+				),
+		);
+
+		if (unconfiguredSetOutputsNode) {
+			throw new TestRunError('SET_OUTPUTS_NODE_NOT_CONFIGURED', {
+				node_name: unconfiguredSetOutputsNode.name,
+			});
+		}
+	}
+
+	/**
+	 * Validates workflow configuration for evaluation
+	 * Throws appropriate TestRunError if validation fails
+	 */
+	private validateWorkflowConfiguration(workflow: IWorkflowBase): void {
+		this.validateEvaluationTriggerNode(workflow);
+
+		this.validateSetOutputsNodes(workflow);
+
+		this.validateSetMetricsNodes(workflow);
 	}

 	/**
@@ -83,7 +177,7 @@ export class TestRunnerService {
 		// Evaluation executions should run the same way as manual,
 		// because they need pinned data and partial execution logic

-		const triggerNode = this.findTriggerNode(workflow);
+		const triggerNode = this.findEvaluationTriggerNode(workflow);
 		assert(triggerNode);

 		const pinData = {
@@ -148,7 +242,7 @@ export class TestRunnerService {
 		// Evaluation executions should run the same way as manual,
 		// because they need pinned data and partial execution logic

-		const triggerNode = this.findTriggerNode(workflow);
+		const triggerNode = this.findEvaluationTriggerNode(workflow);

 		if (!triggerNode) {
 			throw new TestRunError('EVALUATION_TRIGGER_NOT_FOUND');
@@ -219,11 +313,22 @@ export class TestRunnerService {
 	}

 	/**
-	 * Get the evaluation metrics nodes from a workflow.
+	 * Get the evaluation set metrics nodes from a workflow.
 	 */
 	static getEvaluationMetricsNodes(workflow: IWorkflowBase) {
 		return workflow.nodes.filter(
-			(node) => node.type === EVALUATION_NODE && node.parameters.operation === 'setMetrics',
+			(node) => node.type === EVALUATION_NODE_TYPE && node.parameters.operation === 'setMetrics',
+		);
+	}
+
+	/**
+	 * Get the evaluation set outputs nodes from a workflow.
+	 */
+	static getEvaluationSetOutputsNodes(workflow: IWorkflowBase) {
+		return workflow.nodes.filter(
+			(node) =>
+				node.type === EVALUATION_NODE_TYPE &&
+				(node.parameters.operation === 'setOutputs' || node.parameters.operation === undefined),
 		);
 	}

@@ -231,10 +336,17 @@ export class TestRunnerService {
 	 * Extract the dataset trigger output
 	 */
 	private extractDatasetTriggerOutput(execution: IRun, workflow: IWorkflowBase) {
-		const triggerNode = this.findTriggerNode(workflow);
+		const triggerNode = this.findEvaluationTriggerNode(workflow);
 		assert(triggerNode);

 		const triggerOutputData = execution.data.resultData.runData[triggerNode.name][0];
+
+		if (triggerOutputData?.error) {
+			throw new TestRunError('CANT_FETCH_TEST_CASES', {
+				message: triggerOutputData.error.message,
+			});
+		}
+
 		const triggerOutput = triggerOutputData?.data?.main?.[0];

 		if (!triggerOutput || triggerOutput.length === 0) {
@@ -248,16 +360,16 @@ export class TestRunnerService {
 	 * Evaluation result is collected from all Evaluation Metrics nodes
 	 */
 	private extractEvaluationResult(execution: IRun, workflow: IWorkflowBase): IDataObject {
-		// TODO: Do not fail if not all metric nodes were executed
 		const metricsNodes = TestRunnerService.getEvaluationMetricsNodes(workflow);
-		const metricsRunData = metricsNodes.flatMap(
-			(node) => execution.data.resultData.runData[node.name],
-		);
+
+		// If a metrics node did not execute, ignore it.
+		const metricsRunData = metricsNodes
+			.flatMap((node) => execution.data.resultData.runData[node.name])
+			.filter((data) => data !== undefined);
 		const metricsData = metricsRunData
 			.reverse()
 			.map((data) => data.data?.main?.[0]?.[0]?.json ?? {});
 		const metricsResult = metricsData.reduce((acc, curr) => ({ ...acc, ...curr }), {});
-
 		return metricsResult;
 	}

@@ -294,6 +406,9 @@ export class TestRunnerService {
 			// Update test run status
 			await this.testRunRepository.markAsRunning(testRun.id);

+			// Check if the workflow is ready for evaluation
+			this.validateWorkflowConfiguration(workflow);
+
 			this.telemetry.track('User ran test', {
 				user_id: user.id,
 				run_id: testRun.id,
@@ -377,19 +492,31 @@ export class TestRunnerService {
 						this.extractEvaluationResult(testCaseExecution, workflow),
 					);

-					this.logger.debug('Test case metrics extracted', addedMetrics);
-
-					// Create a new test case execution in DB
-					await this.testCaseExecutionRepository.createTestCaseExecution({
-						executionId: testCaseExecutionId,
-						testRun: {
-							id: testRun.id,
-						},
-						runAt,
-						completedAt,
-						status: 'success',
-						metrics: addedMetrics,
-					});
+					if (Object.keys(addedMetrics).length === 0) {
+						await this.testCaseExecutionRepository.createTestCaseExecution({
+							executionId: testCaseExecutionId,
+							testRun: {
+								id: testRun.id,
+							},
+							runAt,
+							completedAt,
+							status: 'error',
+							errorCode: 'NO_METRICS_COLLECTED',
+						});
+					} else {
+						this.logger.debug('Test case metrics extracted', addedMetrics);
+						// Create a new test case execution in DB
+						await this.testCaseExecutionRepository.createTestCaseExecution({
+							executionId: testCaseExecutionId,
+							testRun: {
+								id: testRun.id,
+							},
+							runAt,
+							completedAt,
+							status: 'success',
+							metrics: addedMetrics,
+						});
+					}
 				} catch (e) {
 					const completedAt = new Date();
 					// FIXME: this is a temporary log
@@ -500,7 +627,7 @@ export class TestRunnerService {
 		} else {
 			const { manager: dbManager } = this.testRunRepository;

-			// If there is no abort controller - just mark the test run and all its' pending test case executions as cancelled
+			// If there is no abort controller - just mark the test run and all its pending test case executions as cancelled
 			await dbManager.transaction(async (trx) => {
 				await this.testRunRepository.markAsCancelled(testRunId, trx);
 				await this.testCaseExecutionRepository.markAllPendingAsCancelled(testRunId, trx);
--- a/packages/cli/src/evaluation.ee/test-runner/utils.ee.ts
+++ b/packages/cli/src/evaluation.ee/test-runner/utils.ee.ts
@@ -0,0 +1,19 @@
+import type { NodeParameterValueType, INodeParameterResourceLocator } from 'n8n-workflow';
+
+function isRlcValue(value: NodeParameterValueType): value is INodeParameterResourceLocator {
+	return Boolean(
+		typeof value === 'object' && value && 'value' in value && '__rl' in value && value.__rl,
+	);
+}
+
+export function checkNodeParameterNotEmpty(value: NodeParameterValueType) {
+	if (value === undefined || value === null || value === '') {
+		return false;
+	}
+
+	if (isRlcValue(value)) {
+		return checkNodeParameterNotEmpty(value.value);
+	}
+
+	return true;
+}
--- a/packages/cli/src/interfaces.ts
+++ b/packages/cli/src/interfaces.ts
@@ -200,6 +200,10 @@ export interface ILicenseReadResponse {
 			value: number;
 			warningThreshold: number;
 		};
+		workflowsHavingEvaluations: {
+			limit: number;
+			value: number;
+		};
 	};
 	license: {
 		planId: string;
--- a/packages/cli/src/license/tests/license.service.test.ts
+++ b/packages/cli/src/license/tests/license.service.test.ts
@@ -1,3 +1,4 @@
+import type { LicenseState } from '@n8n/backend-common';
 import type { WorkflowRepository } from '@n8n/db';
 import type { TEntitlement } from '@n8n_io/license-sdk';
 import axios, { AxiosError } from 'axios';
@@ -12,12 +13,14 @@ jest.mock('axios');

 describe('LicenseService', () => {
 	const license = mock<License>();
+	const licenseState = mock<LicenseState>();
 	const workflowRepository = mock<WorkflowRepository>();
 	const entitlement = mock<TEntitlement>({ productId: '123' });
 	const eventService = mock<EventService>();
 	const licenseService = new LicenseService(
 		mock(),
 		license,
+		licenseState,
 		workflowRepository,
 		mock(),
 		eventService,
@@ -26,7 +29,9 @@ describe('LicenseService', () => {
 	license.getMainPlan.mockReturnValue(entitlement);
 	license.getTriggerLimit.mockReturnValue(400);
 	license.getPlanName.mockReturnValue('Test Plan');
+	licenseState.getMaxWorkflowsWithEvaluations.mockReturnValue(2);
 	workflowRepository.getActiveTriggerCount.mockResolvedValue(7);
+	workflowRepository.getWorkflowsWithEvaluationCount.mockResolvedValue(1);

 	beforeEach(() => jest.clearAllMocks());

@@ -46,6 +51,10 @@ describe('LicenseService', () => {
 						value: 7,
 						warningThreshold: 0.8,
 					},
+					workflowsHavingEvaluations: {
+						limit: 2,
+						value: 1,
+					},
 				},
 				license: {
 					planId: '123',
--- a/packages/cli/src/license/license.service.ts
+++ b/packages/cli/src/license/license.service.ts
@@ -1,3 +1,4 @@
+import { LicenseState } from '@n8n/backend-common';
 import type { User } from '@n8n/db';
 import { WorkflowRepository } from '@n8n/db';
 import { Service } from '@n8n/di';
@@ -26,6 +27,7 @@ export class LicenseService {
 	constructor(
 		private readonly logger: Logger,
 		private readonly license: License,
+		private readonly licenseState: LicenseState,
 		private readonly workflowRepository: WorkflowRepository,
 		private readonly urlService: UrlService,
 		private readonly eventService: EventService,
@@ -33,6 +35,8 @@ export class LicenseService {

 	async getLicenseData() {
 		const triggerCount = await this.workflowRepository.getActiveTriggerCount();
+		const workflowsWithEvaluationsCount =
+			await this.workflowRepository.getWorkflowsWithEvaluationCount();
 		const mainPlan = this.license.getMainPlan();

 		return {
@@ -42,6 +46,10 @@ export class LicenseService {
 					limit: this.license.getTriggerLimit(),
 					warningThreshold: 0.8,
 				},
+				workflowsHavingEvaluations: {
+					value: workflowsWithEvaluationsCount,
+					limit: this.licenseState.getMaxWorkflowsWithEvaluations(),
+				},
 			},
 			license: {
 				planId: mainPlan?.productId ?? '',
--- a/packages/cli/src/metrics/tests/license-metrics.service.test.ts
+++ b/packages/cli/src/metrics/tests/license-metrics.service.test.ts
@@ -37,7 +37,11 @@ describe('LicenseMetricsService', () => {
 	describe('collectUsageMetrics', () => {
 		test('should return an array of expected usage metrics', async () => {
 			const mockActiveTriggerCount = 1234;
+			const mockWorkflowsWithEvaluationsCount = 5;
 			workflowRepository.getActiveTriggerCount.mockResolvedValue(mockActiveTriggerCount);
+			workflowRepository.getWorkflowsWithEvaluationCount.mockResolvedValue(
+				mockWorkflowsWithEvaluationsCount,
+			);

 			const mockRenewalMetrics = {
 				activeWorkflows: 100,
@@ -48,6 +52,7 @@ describe('LicenseMetricsService', () => {
 				productionExecutions: 600,
 				productionRootExecutions: 550,
 				manualExecutions: 700,
+				evaluations: 5,
 			};

 			licenseMetricsRespository.getLicenseRenewalMetrics.mockResolvedValue(mockRenewalMetrics);
@@ -67,6 +72,7 @@ describe('LicenseMetricsService', () => {
 				},
 				{ name: 'manualExecutions', value: mockRenewalMetrics.manualExecutions },
 				{ name: 'activeWorkflowTriggers', value: mockActiveTriggerCount },
+				{ name: 'evaluations', value: mockRenewalMetrics.evaluations },
 			]);
 		});
 	});
--- a/packages/cli/src/metrics/license-metrics.service.ts
+++ b/packages/cli/src/metrics/license-metrics.service.ts
@@ -20,7 +20,10 @@ export class LicenseMetricsService {
 			manualExecutions,
 		} = await this.licenseMetricsRepository.getLicenseRenewalMetrics();

-		const activeTriggerCount = await this.workflowRepository.getActiveTriggerCount();
+		const [activeTriggerCount, workflowsWithEvaluationsCount] = await Promise.all([
+			this.workflowRepository.getActiveTriggerCount(),
+			this.workflowRepository.getWorkflowsWithEvaluationCount(),
+		]);

 		return [
 			{ name: 'activeWorkflows', value: activeWorkflows },
@@ -32,6 +35,7 @@ export class LicenseMetricsService {
 			{ name: 'productionRootExecutions', value: productionRootExecutions },
 			{ name: 'manualExecutions', value: manualExecutions },
 			{ name: 'activeWorkflowTriggers', value: activeTriggerCount },
+			{ name: 'evaluations', value: workflowsWithEvaluationsCount },
 		];
 	}

--- a/packages/cli/src/services/frontend.service.ts
+++ b/packages/cli/src/services/frontend.service.ts
@@ -256,6 +256,9 @@ export class FrontendService {
 			logsView: {
 				enabled: false,
 			},
+			evaluation: {
+				quota: this.licenseState.getMaxWorkflowsWithEvaluations(),
+			},
 		};
 	}

@@ -395,6 +398,9 @@ export class FrontendService {

 		this.settings.logsView.enabled = config.get('logs_view.enabled');

+		// Refresh evaluation settings
+		this.settings.evaluation.quota = this.licenseState.getMaxWorkflowsWithEvaluations();
+
 		return this.settings;
 	}

--- a/packages/cli/test/integration/database/repositories/workflow.repository.test.ts
+++ b/packages/cli/test/integration/database/repositories/workflow.repository.test.ts
@@ -1,6 +1,7 @@
 import { WorkflowRepository } from '@n8n/db';
 import { Container } from '@n8n/di';

+import { createTestRun } from '../../shared/db/evaluation';
 import {
 	createWorkflowWithTrigger,
 	createWorkflow,
@@ -115,4 +116,73 @@ describe('WorkflowRepository', () => {
 			expect(activeIds).toHaveLength(1);
 		});
 	});
+
+	describe('getWorkflowsWithEvaluationCount', () => {
+		it('should return 0 when no workflows have test runs', async () => {
+			//
+			// ARRANGE
+			//
+			const workflowRepository = Container.get(WorkflowRepository);
+			await createWorkflow();
+			await createWorkflow();
+
+			//
+			// ACT
+			//
+			const count = await workflowRepository.getWorkflowsWithEvaluationCount();
+
+			//
+			// ASSERT
+			//
+			expect(count).toBe(0);
+		});
+
+		it('should return correct count when some workflows have test runs', async () => {
+			//
+			// ARRANGE
+			//
+			const workflowRepository = Container.get(WorkflowRepository);
+			const workflow1 = await createWorkflow();
+			await createWorkflow();
+			const workflow3 = await createWorkflow();
+
+			await createTestRun(workflow1.id);
+			await createTestRun(workflow3.id);
+
+			//
+			// ACT
+			//
+			const count = await workflowRepository.getWorkflowsWithEvaluationCount();
+
+			//
+			// ASSERT
+			//
+			expect(count).toBe(2);
+		});
+
+		it('should count each workflow only once even with multiple test runs', async () => {
+			//
+			// ARRANGE
+			//
+			const workflowRepository = Container.get(WorkflowRepository);
+			const workflow1 = await createWorkflow();
+			const workflow2 = await createWorkflow();
+
+			await createTestRun(workflow1.id);
+			await createTestRun(workflow1.id);
+			await createTestRun(workflow1.id);
+			await createTestRun(workflow2.id);
+			await createTestRun(workflow2.id);
+
+			//
+			// ACT
+			//
+			const count = await workflowRepository.getWorkflowsWithEvaluationCount();
+
+			//
+			// ASSERT
+			//
+			expect(count).toBe(2);
+		});
+	});
 });
--- a/packages/cli/test/integration/license-metrics.repository.test.ts
+++ b/packages/cli/test/integration/license-metrics.repository.test.ts
@@ -83,6 +83,7 @@ describe('LicenseMetricsRepository', () => {
 				productionExecutions: 3,
 				productionRootExecutions: 3,
 				manualExecutions: 2,
+				evaluations: 0,
 			});
 		});

@@ -100,6 +101,7 @@ describe('LicenseMetricsRepository', () => {
 				productionExecutions: 0, // not NaN
 				productionRootExecutions: 0, // not NaN
 				manualExecutions: 0, // not NaN
+				evaluations: 0,
 			});
 		});
 	});
--- a/packages/cli/test/integration/license.api.test.ts
+++ b/packages/cli/test/integration/license.api.test.ts
@@ -119,6 +119,10 @@ const DEFAULT_LICENSE_RESPONSE: { data: ILicenseReadResponse } = {
 				limit: -1,
 				warningThreshold: 0.8,
 			},
+			workflowsHavingEvaluations: {
+				value: 0,
+				limit: 0,
+			},
 		},
 		license: {
 			planId: '',
@@ -135,6 +139,10 @@ const DEFAULT_POST_RESPONSE: { data: ILicensePostResponse } = {
 				limit: -1,
 				warningThreshold: 0.8,
 			},
+			workflowsHavingEvaluations: {
+				value: 0,
+				limit: 0,
+			},
 		},
 		license: {
 			planId: '',