feat(core): Evaluations backend (no-changelog) (#15542)

Co-authored-by: Yiorgis Gozadinos <yiorgis@n8n.io> Co-authored-by: Mutasem Aldmour <4711238+mutdmour@users.noreply.github.com>
2025-12-17 10:02:05 +00:00 · 2025-05-23 09:05:13 +02:00
parent cf8b611d14
commit fa620f2d5b
18 changed files with 1266 additions and 601 deletions
--- a/packages/@n8n/db/src/entities/types-db.ts
+++ b/packages/@n8n/db/src/entities/types-db.ts
@@ -283,19 +283,15 @@ export type FolderWithWorkflowAndSubFolderCountAndPath = FolderWithWorkflowAndSu
 export type TestRunFinalResult = 'success' | 'error' | 'warning';
 export type TestRunErrorCode =
-	| 'PAST_EXECUTIONS_NOT_FOUND'
+	| 'TEST_CASES_NOT_FOUND'
 	| 'EVALUATION_WORKFLOW_NOT_FOUND'
 	| 'INTERRUPTED'
-	| 'UNKNOWN_ERROR';
+	| 'UNKNOWN_ERROR'
 	| 'EVALUATION_TRIGGER_NOT_FOUND';
 export type TestCaseExecutionErrorCode =
-	| 'MOCKED_NODE_DOES_NOT_EXIST'
+	| 'MOCKED_NODE_NOT_FOUND' // This will be used when node mocking will be implemented
 	| 'TRIGGER_NO_LONGER_EXISTS'
 	| 'FAILED_TO_EXECUTE_WORKFLOW'
 	| 'EVALUATION_WORKFLOW_DOES_NOT_EXIST'
 	| 'FAILED_TO_EXECUTE_EVALUATION_WORKFLOW'
 	| 'INVALID_METRICS'
 	| 'PAYLOAD_LIMIT_EXCEEDED'
 	| 'UNKNOWN_ERROR';
 export type AggregatedTestRunMetrics = Record<string, number | boolean>;
--- a/packages/@n8n/db/src/repositories/test-case-execution.repository.ee.ts
+++ b/packages/@n8n/db/src/repositories/test-case-execution.repository.ee.ts
@@ -32,6 +32,12 @@ export class TestCaseExecutionRepository extends Repository<TestCaseExecution> {
 		super(TestCaseExecution, dataSource.manager);
 	}
 	async createTestCaseExecution(testCaseExecutionProps: DeepPartial<TestCaseExecution>) {
 		const testCaseExecution = this.create(testCaseExecutionProps);
 		return await this.save(testCaseExecution);
 	}
 	async createBatch(testRunId: string, testCases: string[]) {
 		const mappings = this.create(
 			testCases.map<DeepPartial<TestCaseExecution>>(() => ({
--- a/packages/@n8n/db/src/repositories/test-run.repository.ee.ts
+++ b/packages/@n8n/db/src/repositories/test-run.repository.ee.ts
@@ -46,7 +46,7 @@ export class TestRunRepository extends Repository<TestRun> {
 	async markAsCancelled(id: string, trx?: EntityManager) {
 		trx = trx ?? this.manager;
-		return await trx.update(TestRun, id, { status: 'cancelled' });
+		return await trx.update(TestRun, id, { status: 'cancelled', completedAt: new Date() });
 	}
 	async markAsError(id: string, errorCode: TestRunErrorCode, errorDetails?: IDataObject) {
@@ -54,13 +54,14 @@ export class TestRunRepository extends Repository<TestRun> {
 			status: 'error',
 			errorCode,
 			errorDetails,
 			completedAt: new Date(),
 		});
 	}
 	async markAllIncompleteAsFailed() {
 		return await this.update(
 			{ status: In(['new', 'running']) },
-			{ status: 'error', errorCode: 'INTERRUPTED' },
+			{ status: 'error', errorCode: 'INTERRUPTED', completedAt: new Date() },
 		);
 	}
--- a/packages/cli/src/constants.ts
+++ b/packages/cli/src/constants.ts
@@ -158,4 +158,5 @@ export const WsStatusCodes = {
 export const FREE_AI_CREDITS_CREDENTIAL_NAME = 'n8n free OpenAI API credits';
-export const EVALUATION_METRICS_NODE = `${NODE_PACKAGE_PREFIX}base.evaluationMetrics`;
+export const EVALUATION_NODE = `${NODE_PACKAGE_PREFIX}base.evaluation`;
 export const EVALUATION_DATASET_TRIGGER_NODE = `${NODE_PACKAGE_PREFIX}base.evaluationTrigger`;
--- a/packages/cli/src/evaluation.ee/test-runner/tests/create-pin-data.ee.test.ts
+++ b/packages/cli/src/evaluation.ee/test-runner/tests/create-pin-data.ee.test.ts
@@ -1,104 +0,0 @@
 import { readFileSync } from 'fs';
 import path from 'path';
 import { createPinData } from '../utils.ee';
 const wfUnderTestJson = JSON.parse(
 	readFileSync(path.join(__dirname, './mock-data/workflow.under-test.json'), { encoding: 'utf-8' }),
 );
 const wfUnderTestRenamedNodesJson = JSON.parse(
 	readFileSync(path.join(__dirname, './mock-data/workflow.under-test-renamed-nodes.json'), {
 		encoding: 'utf-8',
 	}),
 );
 const executionDataJson = JSON.parse(
 	readFileSync(path.join(__dirname, './mock-data/execution-data.json'), { encoding: 'utf-8' }),
 );
 describe('createPinData', () => {
 	test('should create pin data from past execution data', () => {
 		const mockedNodes = [
 			{
 				id: '72256d90-3a67-4e29-b032-47df4e5768af',
 				name: 'When clicking ‘Execute workflow’',
 			},
 		];
 		const pinData = createPinData(wfUnderTestJson, mockedNodes, executionDataJson);
 		expect(pinData).toEqual(
 			expect.objectContaining({
 				'When clicking ‘Execute workflow’': expect.anything(),
 			}),
 		);
 	});
 	test('should not create pin data for non-existing mocked nodes', () => {
 		const mockedNodes = ['non-existing-ID'].map((id) => ({ id }));
 		const pinData = createPinData(wfUnderTestJson, mockedNodes, executionDataJson);
 		expect(pinData).toEqual({});
 	});
 	test('should create pin data for all mocked nodes', () => {
 		const mockedNodes = [
 			{
 				id: '72256d90-3a67-4e29-b032-47df4e5768af', // 'When clicking ‘Execute workflow’'
 			},
 			{
 				id: '319f29bc-1dd4-4122-b223-c584752151a4', // 'Edit Fields'
 			},
 			{
 				id: 'd2474215-63af-40a4-a51e-0ea30d762621', // 'Code'
 			},
 		];
 		const pinData = createPinData(wfUnderTestJson, mockedNodes, executionDataJson);
 		expect(pinData).toEqual(
 			expect.objectContaining({
 				'When clicking ‘Execute workflow’': expect.anything(),
 				'Edit Fields': expect.anything(),
 				Code: expect.anything(),
 			}),
 		);
 	});
 	test('should return empty object if no mocked nodes are provided', () => {
 		const pinData = createPinData(wfUnderTestJson, [], executionDataJson);
 		expect(pinData).toEqual({});
 	});
 	test('should create pin data for all mocked nodes with renamed nodes', () => {
 		const mockedNodes = [
 			{
 				id: '72256d90-3a67-4e29-b032-47df4e5768af', // 'Manual Run'
 			},
 			{
 				id: '319f29bc-1dd4-4122-b223-c584752151a4', // 'Set Attribute'
 			},
 			{
 				id: 'd2474215-63af-40a4-a51e-0ea30d762621', // 'Code'
 			},
 		];
 		const pinData = createPinData(
 			wfUnderTestRenamedNodesJson,
 			mockedNodes,
 			executionDataJson,
 			wfUnderTestJson, // Pass original workflow JSON as pastWorkflowData
 		);
 		expect(pinData).toEqual(
 			expect.objectContaining({
 				'Manual Run': expect.anything(),
 				'Set Attribute': expect.anything(),
 				Code: expect.anything(),
 			}),
 		);
 	});
 });
--- a/packages/cli/src/evaluation.ee/test-runner/tests/get-start-node.ee.test.ts
+++ b/packages/cli/src/evaluation.ee/test-runner/tests/get-start-node.ee.test.ts
@@ -1,40 +0,0 @@
 import { readFileSync } from 'fs';
 import path from 'path';
 import { getPastExecutionTriggerNode } from '../utils.ee';
 const executionDataJson = JSON.parse(
 	readFileSync(path.join(__dirname, './mock-data/execution-data.json'), { encoding: 'utf-8' }),
 );
 const executionDataMultipleTriggersJson = JSON.parse(
 	readFileSync(path.join(__dirname, './mock-data/execution-data.multiple-triggers.json'), {
 		encoding: 'utf-8',
 	}),
 );
 const executionDataMultipleTriggersJson2 = JSON.parse(
 	readFileSync(path.join(__dirname, './mock-data/execution-data.multiple-triggers-2.json'), {
 		encoding: 'utf-8',
 	}),
 );
 describe('getPastExecutionStartNode', () => {
 	test('should return the start node of the past execution', () => {
 		const startNode = getPastExecutionTriggerNode(executionDataJson);
 		expect(startNode).toEqual('When clicking ‘Execute workflow’');
 	});
 	test('should return the start node of the past execution with multiple triggers', () => {
 		const startNode = getPastExecutionTriggerNode(executionDataMultipleTriggersJson);
 		expect(startNode).toEqual('When clicking ‘Execute workflow’');
 	});
 	test('should return the start node of the past execution with multiple triggers - chat trigger', () => {
 		const startNode = getPastExecutionTriggerNode(executionDataMultipleTriggersJson2);
 		expect(startNode).toEqual('When chat message received');
 	});
 });
--- a/packages/cli/src/evaluation.ee/test-runner/tests/mock-data/workflow.under-test.json
+++ b/packages/cli/src/evaluation.ee/test-runner/tests/mock-data/workflow.under-test.json
@@ -1,78 +1,126 @@
 {
 	"name": "Workflow Under Test",
 	"nodes": [
 		{
-			"parameters": {},
+			"parameters": {
-			"type": "n8n-nodes-base.manualTrigger",
+				"documentId": {
-			"typeVersion": 1,
+					"__rl": true,
-			"position": [-80, 0],
+					"value": "mock",
-			"id": "72256d90-3a67-4e29-b032-47df4e5768af",
+					"mode": "list"
-			"name": "When clicking ‘Execute workflow’"
+				},
 				"sheetName": {
 					"__rl": true,
 					"value": "mock",
 					"mode": "list"
 				}
 			},
 			"type": "n8n-nodes-base.evaluationTrigger",
 			"typeVersion": 4.6,
 			"position": [0, 0],
 			"id": "3c9068ec-4880-4fbe-a1c8-f7a1cb3f13e9",
 			"name": "When fetching a dataset row",
 			"credentials": {
 				"googleSheetsOAuth2Api": {
 					"id": "mock",
 					"name": "Google Sheets account"
 				}
 			}
 		},
 		{
 			"parameters": {
-				"assignments": {
+				"documentId": {
-					"assignments": [
+					"__rl": true,
 					"value": "mock",
 					"mode": "list"
 				},
 				"sheetName": {
 					"__rl": true,
 					"value": "mock",
 					"mode": "list"
 				},
 				"outputs": {
 					"values": [
 						{
-							"id": "acfeecbe-443c-4220-b63b-d44d69216902",
+							"outputName": "reply",
-							"name": "foo",
+							"outputValue": "test"
 							"value": "bar",
 							"type": "string"
 						}
 					]
-				},
+				}
 				"options": {}
 			},
-			"type": "n8n-nodes-base.set",
+			"type": "n8n-nodes-base.evaluation",
-			"typeVersion": 3.4,
+			"typeVersion": 4.6,
-			"position": [140, 0],
+			"position": [440, 0],
-			"id": "319f29bc-1dd4-4122-b223-c584752151a4",
+			"id": "9e0be4fb-faa3-4344-ba80-e18ceb1d22f1",
-			"name": "Edit Fields"
+			"name": "Set Outputs",
 			"credentials": {
 				"googleSheetsOAuth2Api": {
 					"id": "mock",
 					"name": "Google Sheets account"
 				}
 			}
 		},
 		{
 			"parameters": {},
 			"type": "n8n-nodes-base.noOp",
 			"typeVersion": 1,
 			"position": [220, 0],
 			"id": "335047aa-fb77-43a1-8135-873d34e7ccc1",
 			"name": "No Operation, do nothing"
 		},
 		{
 			"parameters": {
-				"jsCode": "for (const item of $input.all()) {\n  item.json.random = Math.random();\n}\n\nreturn $input.all();"
+				"operation": "setMetrics",
 				"metrics": {
 					"assignments": [
 						{
 							"name": "test",
 							"value": 0,
 							"type": "number",
 							"id": "cc598090-09c8-489d-84d5-e7b9ee5576b5"
 						}
 					]
 				}
 			},
-			"type": "n8n-nodes-base.code",
+			"type": "n8n-nodes-base.evaluation",
-			"typeVersion": 2,
+			"typeVersion": 4.6,
-			"position": [380, 0],
+			"position": [660, 0],
-			"id": "d2474215-63af-40a4-a51e-0ea30d762621",
+			"id": "7a795bad-08e1-4e5c-bfe4-2c43129b6da5",
-			"name": "Code"
+			"name": "Set Metrics"
 		}
 	],
 	"connections": {
-		"When clicking ‘Execute workflow’": {
+		"When fetching a dataset row": {
 			"main": [
 				[
 					{
-						"node": "Edit Fields",
+						"node": "No Operation, do nothing",
 						"type": "main",
 						"index": 0
 					}
 				]
 			]
 		},
-		"Edit Fields": {
+		"Set Outputs": {
 			"main": [
 				[
 					{
-						"node": "Wait",
+						"node": "Set Metrics",
 						"type": "main",
 						"index": 0
 					}
 				]
 			]
 		},
-		"Wait": {
+		"No Operation, do nothing": {
 			"main": [
 				[
 					{
-						"node": "Code",
+						"node": "Set Outputs",
 						"type": "main",
 						"index": 0
 					}
 				]
 			]
 		}
-	}
+	},
 	"pinData": {}
 }
--- a/packages/cli/src/evaluation.ee/test-runner/tests/test-runner.service.ee.test.ts
+++ b/packages/cli/src/evaluation.ee/test-runner/tests/test-runner.service.ee.test.ts
@@ -0,0 +1,635 @@
 import type { TestRun } from '@n8n/db';
 import type { TestCaseExecutionRepository } from '@n8n/db';
 import type { TestRunRepository } from '@n8n/db';
 import type { WorkflowRepository } from '@n8n/db';
 import { readFileSync } from 'fs';
 import { mock } from 'jest-mock-extended';
 import type { ErrorReporter } from 'n8n-core';
 import type { IWorkflowBase } from 'n8n-workflow';
 import type { IRun } from 'n8n-workflow';
 import path from 'path';
 import type { ActiveExecutions } from '@/active-executions';
 import { EVALUATION_DATASET_TRIGGER_NODE } from '@/constants';
 import { TestRunError } from '@/evaluation.ee/test-runner/errors.ee';
 import { LoadNodesAndCredentials } from '@/load-nodes-and-credentials';
 import type { Telemetry } from '@/telemetry';
 import type { WorkflowRunner } from '@/workflow-runner';
 import { mockInstance, mockLogger } from '@test/mocking';
 import { mockNodeTypesData } from '@test-integration/utils/node-types-data';
 import { TestRunnerService } from '../test-runner.service.ee';
 const wfUnderTestJson = JSON.parse(
 	readFileSync(path.join(__dirname, './mock-data/workflow.under-test.json'), { encoding: 'utf-8' }),
 );
 const errorReporter = mock<ErrorReporter>();
 const logger = mockLogger();
 const telemetry = mock<Telemetry>();
 describe('TestRunnerService', () => {
 	const workflowRepository = mock<WorkflowRepository>();
 	const workflowRunner = mock<WorkflowRunner>();
 	const activeExecutions = mock<ActiveExecutions>();
 	const testRunRepository = mock<TestRunRepository>();
 	const testCaseExecutionRepository = mock<TestCaseExecutionRepository>();
 	let testRunnerService: TestRunnerService;
 	mockInstance(LoadNodesAndCredentials, {
 		loadedNodes: mockNodeTypesData(['manualTrigger', 'set', 'if', 'code', 'evaluation']),
 	});
 	beforeEach(() => {
 		testRunnerService = new TestRunnerService(
 			logger,
 			telemetry,
 			workflowRepository,
 			workflowRunner,
 			activeExecutions,
 			testRunRepository,
 			testCaseExecutionRepository,
 			errorReporter,
 		);
 		testRunRepository.createTestRun.mockResolvedValue(mock<TestRun>({ id: 'test-run-id' }));
 	});
 	afterEach(() => {
 		jest.resetAllMocks();
 	});
 	describe('findTriggerNode', () => {
 		test('should find the trigger node in a workflow', () => {
 			// Setup a test workflow with a trigger node
 			const workflowWithTrigger = mock<IWorkflowBase>({
 				nodes: [
 					{
 						id: 'node1',
 						name: 'Dataset Trigger',
 						type: EVALUATION_DATASET_TRIGGER_NODE,
 						typeVersion: 1,
 						position: [0, 0],
 						parameters: {},
 					},
 					{
 						id: 'node2',
 						name: 'Regular Node',
 						type: 'n8n-nodes-base.noOp',
 						typeVersion: 1,
 						position: [100, 0],
 						parameters: {},
 					},
 				],
 				connections: {},
 			});
 			// Use the protected method via any type casting
 			const result = (testRunnerService as any).findTriggerNode(workflowWithTrigger);
 			// Assert the result is the correct node
 			expect(result).toBeDefined();
 			expect(result.type).toBe(EVALUATION_DATASET_TRIGGER_NODE);
 			expect(result.name).toBe('Dataset Trigger');
 		});
 		test('should return undefined when no trigger node is found', () => {
 			// Setup a test workflow without a trigger node
 			const workflowWithoutTrigger = mock<IWorkflowBase>({
 				nodes: [
 					{
 						id: 'node1',
 						name: 'Regular Node 1',
 						type: 'n8n-nodes-base.noOp',
 						typeVersion: 1,
 						position: [0, 0],
 						parameters: {},
 					},
 					{
 						id: 'node2',
 						name: 'Regular Node 2',
 						type: 'n8n-nodes-base.set',
 						typeVersion: 1,
 						position: [100, 0],
 						parameters: {},
 					},
 				],
 				connections: {},
 			});
 			// Call the function and expect undefined result
 			const result = (testRunnerService as any).findTriggerNode(workflowWithoutTrigger);
 			expect(result).toBeUndefined();
 		});
 		test('should work with the actual workflow.under-test.json', () => {
 			const result = (testRunnerService as any).findTriggerNode(wfUnderTestJson);
 			// Assert the result is the correct node
 			expect(result).toBeDefined();
 			expect(result.type).toBe(EVALUATION_DATASET_TRIGGER_NODE);
 			expect(result.name).toBe('When fetching a dataset row');
 		});
 	});
 	describe('extractDatasetTriggerOutput', () => {
 		test('should extract trigger output data from execution', () => {
 			// Create workflow with a trigger node
 			const workflow = mock<IWorkflowBase>({
 				nodes: [
 					{
 						id: 'triggerNodeId',
 						name: 'TriggerNode',
 						type: EVALUATION_DATASET_TRIGGER_NODE,
 						typeVersion: 1,
 						position: [0, 0],
 						parameters: {},
 					},
 				],
 				connections: {},
 			});
 			// Create execution data with output for the trigger node
 			const mockOutputItems = [
 				{ json: { id: 1, name: 'Test 1' } },
 				{ json: { id: 2, name: 'Test 2' } },
 			];
 			const execution = mock<IRun>({
 				data: {
 					resultData: {
 						runData: {
 							TriggerNode: [
 								{
 									data: {
 										main: [mockOutputItems],
 									},
 								},
 							],
 						},
 					},
 				},
 			});
 			// Call the method
 			const result = (testRunnerService as any).extractDatasetTriggerOutput(execution, workflow);
 			// Verify results
 			expect(result).toEqual(mockOutputItems);
 		});
 		test('should throw an error if trigger node output is not present', () => {
 			// Create workflow with a trigger node
 			const workflow = mock<IWorkflowBase>({
 				nodes: [
 					{
 						id: 'triggerNodeId',
 						name: 'TriggerNode',
 						type: EVALUATION_DATASET_TRIGGER_NODE,
 						typeVersion: 1,
 						position: [0, 0],
 						parameters: {},
 					},
 				],
 				connections: {},
 			});
 			// Create execution data with missing output
 			const execution = mock<IRun>({
 				data: {
 					resultData: {
 						runData: {},
 					},
 				},
 			});
 			// Expect the method to throw an error
 			expect(() => {
 				(testRunnerService as any).extractDatasetTriggerOutput(execution, workflow);
 			}).toThrow(TestRunError);
 			// Verify the error has the correct code
 			try {
 				(testRunnerService as any).extractDatasetTriggerOutput(execution, workflow);
 			} catch (error) {
 				expect(error).toBeInstanceOf(TestRunError);
 				expect(error.code).toBe('TEST_CASES_NOT_FOUND');
 			}
 		});
 		test('should throw an error if trigger node output is empty list', () => {
 			// Create workflow with a trigger node
 			const workflow = mock<IWorkflowBase>({
 				nodes: [
 					{
 						id: 'triggerNodeId',
 						name: 'TriggerNode',
 						type: EVALUATION_DATASET_TRIGGER_NODE,
 						typeVersion: 1,
 						position: [0, 0],
 						parameters: {},
 					},
 				],
 				connections: {},
 			});
 			// Create execution data with missing output
 			const execution = mock<IRun>({
 				data: {
 					resultData: {
 						runData: {
 							TriggerNode: [
 								{
 									data: {
 										main: [[]], // Empty list
 									},
 								},
 							],
 						},
 					},
 				},
 			});
 			// Expect the method to throw an error
 			expect(() => {
 				(testRunnerService as any).extractDatasetTriggerOutput(execution, workflow);
 			}).toThrow(TestRunError);
 			// Verify the error has the correct code
 			try {
 				(testRunnerService as any).extractDatasetTriggerOutput(execution, workflow);
 			} catch (error) {
 				expect(error).toBeInstanceOf(TestRunError);
 				expect(error.code).toBe('TEST_CASES_NOT_FOUND');
 			}
 		});
 		test('should work with actual execution data format', () => {
 			// Create workflow with a trigger node that matches the name in the actual data
 			const workflow = mock<IWorkflowBase>({
 				nodes: [
 					{
 						id: 'triggerNodeId',
 						name: "When clicking 'Execute workflow'",
 						type: EVALUATION_DATASET_TRIGGER_NODE,
 						typeVersion: 1,
 						position: [0, 0],
 						parameters: {},
 					},
 				],
 				connections: {},
 			});
 			// Mock execution data similar to actual format
 			const expectedItems = [
 				{ json: { query: 'First item' }, pairedItem: { item: 0 } },
 				{ json: { query: 'Second item' }, pairedItem: { item: 0 } },
 				{ json: { query: 'Third item' }, pairedItem: { item: 0 } },
 			];
 			// TODO: change with actual data
 			const execution = mock<IRun>({
 				data: {
 					resultData: {
 						runData: {
 							"When clicking 'Execute workflow'": [
 								{
 									data: {
 										main: [expectedItems],
 									},
 								},
 							],
 						},
 					},
 				},
 			});
 			// Call the method
 			const result = (testRunnerService as any).extractDatasetTriggerOutput(execution, workflow);
 			// Verify results
 			expect(result).toEqual(expectedItems);
 		});
 	});
 	describe('runDatasetTrigger', () => {
 		beforeEach(() => {
 			// Setup mock execution response
 			const mockExecutionId = 'mock-execution-id';
 			const mockExecutionData = mock<IRun>({
 				data: {
 					resultData: {
 						runData: {},
 					},
 				},
 			});
 			// Setup workflowRunner mock
 			workflowRunner.run.mockResolvedValue(mockExecutionId);
 			// Setup activeExecutions mock
 			activeExecutions.getPostExecutePromise.mockResolvedValue(mockExecutionData);
 		});
 		test('should throw an error if trigger node is not found', async () => {
 			// Create workflow without a trigger node
 			const workflow = mock<IWorkflowBase>({
 				nodes: [
 					{
 						id: 'node1',
 						name: 'Regular Node',
 						type: 'n8n-nodes-base.noOp',
 						typeVersion: 1,
 						position: [0, 0],
 						parameters: {},
 					},
 				],
 				connections: {},
 			});
 			const metadata = {
 				testRunId: 'test-run-id',
 				userId: 'user-id',
 			};
 			// Call the method and expect it to throw an error
 			await expect(
 				(testRunnerService as any).runDatasetTrigger(workflow, metadata),
 			).rejects.toThrow(TestRunError);
 			// Verify the error has the correct code
 			try {
 				await (testRunnerService as any).runDatasetTrigger(workflow, metadata);
 			} catch (error) {
 				expect(error).toBeInstanceOf(TestRunError);
 				expect(error.code).toBe('EVALUATION_TRIGGER_NOT_FOUND');
 			}
 		});
 		test('should call workflowRunner.run with correct data', async () => {
 			// Create workflow with a trigger node
 			const triggerNodeName = 'Dataset Trigger';
 			const workflow = mock<IWorkflowBase>({
 				nodes: [
 					{
 						id: 'node1',
 						name: triggerNodeName,
 						type: EVALUATION_DATASET_TRIGGER_NODE,
 						typeVersion: 1,
 						position: [0, 0],
 						parameters: {},
 					},
 				],
 				connections: {},
 				settings: {
 					saveDataErrorExecution: 'all',
 				},
 			});
 			const metadata = {
 				testRunId: 'test-run-id',
 				userId: 'user-id',
 			};
 			// Call the method
 			await (testRunnerService as any).runDatasetTrigger(workflow, metadata);
 			// Verify workflowRunner.run was called
 			expect(workflowRunner.run).toHaveBeenCalledTimes(1);
 			// Get the argument passed to workflowRunner.run
 			const runCallArg = workflowRunner.run.mock.calls[0][0];
 			// Verify it has the correct structure
 			expect(runCallArg).toHaveProperty('destinationNode', triggerNodeName);
 			expect(runCallArg).toHaveProperty('executionMode', 'manual');
 			expect(runCallArg).toHaveProperty('workflowData.settings.saveManualExecutions', false);
 			expect(runCallArg).toHaveProperty('workflowData.settings.saveDataErrorExecution', 'none');
 			expect(runCallArg).toHaveProperty('workflowData.settings.saveDataSuccessExecution', 'none');
 			expect(runCallArg).toHaveProperty('workflowData.settings.saveExecutionProgress', false);
 			expect(runCallArg).toHaveProperty('userId', metadata.userId);
 			expect(runCallArg).toHaveProperty('partialExecutionVersion', 2);
 			// Verify node execution stack contains the requestDataset flag
 			expect(runCallArg).toHaveProperty('executionData.executionData.nodeExecutionStack');
 			const nodeExecutionStack = runCallArg.executionData?.executionData?.nodeExecutionStack;
 			expect(nodeExecutionStack).toBeInstanceOf(Array);
 			expect(nodeExecutionStack).toHaveLength(1);
 			expect(nodeExecutionStack?.[0]).toHaveProperty('node.name', triggerNodeName);
 			expect(nodeExecutionStack?.[0]).toHaveProperty('data.main[0][0].json.requestDataset', true);
 		});
 		test('should wait for execution to finish and return result', async () => {
 			// Create workflow with a trigger node
 			const triggerNodeName = 'Dataset Trigger';
 			const workflow = mock<IWorkflowBase>({
 				nodes: [
 					{
 						id: 'node1',
 						name: triggerNodeName,
 						type: EVALUATION_DATASET_TRIGGER_NODE,
 						typeVersion: 1,
 						position: [0, 0],
 						parameters: {},
 					},
 				],
 				connections: {},
 			});
 			const metadata = {
 				testRunId: 'test-run-id',
 				userId: 'user-id',
 			};
 			// Setup mock for execution ID and result
 			const mockExecutionId = 'dataset-execution-id';
 			const mockExecutionResult = mock<IRun>({
 				data: {
 					resultData: {
 						runData: {
 							[triggerNodeName]: [
 								{
 									data: {
 										main: [[{ json: { test: 'data1' } }, { json: { test: 'data2' } }]],
 									},
 								},
 							],
 						},
 					},
 				},
 			});
 			workflowRunner.run.mockResolvedValue(mockExecutionId);
 			activeExecutions.getPostExecutePromise.mockResolvedValue(mockExecutionResult);
 			// Call the method
 			const result = await (testRunnerService as any).runDatasetTrigger(workflow, metadata);
 			// Verify the execution was waited for
 			expect(activeExecutions.getPostExecutePromise).toHaveBeenCalledWith(mockExecutionId);
 			// Verify the result is correct
 			expect(result).toEqual(mockExecutionResult);
 		});
 	});
 	describe('runTestCase', () => {
 		beforeEach(() => {
 			// Setup mock execution response
 			const mockExecutionId = 'mock-execution-id';
 			const mockExecutionData = mock<IRun>({
 				data: {
 					resultData: {
 						runData: {},
 					},
 				},
 			});
 			// Setup workflowRunner mock
 			workflowRunner.run.mockResolvedValue(mockExecutionId);
 			// Setup activeExecutions mock
 			activeExecutions.getPostExecutePromise.mockResolvedValue(mockExecutionData);
 		});
 		test('should return undefined if abortSignal is aborted', async () => {
 			// Create an aborted signal
 			const abortController = new AbortController();
 			abortController.abort();
 			// Create test data
 			const workflow = mock<IWorkflowBase>({
 				nodes: [],
 				connections: {},
 			});
 			const metadata = {
 				testRunId: 'test-run-id',
 				userId: 'user-id',
 			};
 			const testCase = { json: { id: 1, name: 'Test 1' } };
 			// Call the method
 			const result = await (testRunnerService as any).runTestCase(
 				workflow,
 				metadata,
 				testCase,
 				abortController.signal,
 			);
 			// Verify results
 			expect(result).toBeUndefined();
 			expect(workflowRunner.run).not.toHaveBeenCalled();
 		});
 		test('should call workflowRunner.run with correct data', async () => {
 			// Setup test data
 			const triggerNodeName = 'TriggerNode';
 			const workflow = mock<IWorkflowBase>({
 				nodes: [
 					{
 						id: 'node1',
 						name: triggerNodeName,
 						type: EVALUATION_DATASET_TRIGGER_NODE,
 						typeVersion: 1,
 						position: [0, 0],
 						parameters: {},
 					},
 				],
 				connections: {},
 			});
 			const metadata = {
 				testRunId: 'test-run-id',
 				userId: 'user-id',
 			};
 			const testCase = { json: { id: 1, name: 'Test 1' } };
 			const abortController = new AbortController();
 			// Call the method
 			await (testRunnerService as any).runTestCase(
 				workflow,
 				metadata,
 				testCase,
 				abortController.signal,
 			);
 			// Verify workflowRunner.run was called with the correct data
 			expect(workflowRunner.run).toHaveBeenCalledTimes(1);
 			const runCallArg = workflowRunner.run.mock.calls[0][0];
 			// Verify the expected structure
 			expect(runCallArg).toEqual(
 				expect.objectContaining({
 					executionMode: 'evaluation',
 					pinData: {
 						[triggerNodeName]: [testCase],
 					},
 					workflowData: workflow,
 					userId: metadata.userId,
 					partialExecutionVersion: 2,
 					triggerToStartFrom: {
 						name: triggerNodeName,
 					},
 				}),
 			);
 		});
 		test('should register abort event listener and return execution results', async () => {
 			// Setup test data
 			const triggerNodeName = 'TriggerNode';
 			const workflow = mock<IWorkflowBase>({
 				nodes: [
 					{
 						id: 'node1',
 						name: triggerNodeName,
 						type: EVALUATION_DATASET_TRIGGER_NODE,
 						typeVersion: 1,
 						position: [0, 0],
 						parameters: {},
 					},
 				],
 				connections: {},
 			});
 			const metadata = {
 				testRunId: 'test-run-id',
 				userId: 'user-id',
 			};
 			const testCase = { json: { id: 1, name: 'Test 1' } };
 			const abortController = new AbortController();
 			// Mock addEventListener on AbortSignal
 			const mockAddEventListener = jest.fn();
 			const originalAddEventListener = abortController.signal.addEventListener;
 			abortController.signal.addEventListener = mockAddEventListener;
 			try {
 				// Call the method
 				const result = await (testRunnerService as any).runTestCase(
 					workflow,
 					metadata,
 					testCase,
 					abortController.signal,
 				);
 				// Verify addEventListener was called
 				expect(mockAddEventListener).toHaveBeenCalledTimes(1);
 				expect(mockAddEventListener.mock.calls[0][0]).toBe('abort');
 				// Verify the expected result structure
 				expect(result).toHaveProperty('executionData');
 				expect(result.executionData?.data).toBeDefined();
 				expect(result).toHaveProperty('executionId');
 				expect(result.executionId).toEqual(expect.any(String));
 			} finally {
 				// Restore original method
 				abortController.signal.addEventListener = originalAddEventListener;
 			}
 		});
 	});
 });
--- a/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts
+++ b/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts
@@ -3,28 +3,43 @@ import { TestCaseExecutionRepository, TestRunRepository, WorkflowRepository } fr
 import { Service } from '@n8n/di';
 import { ErrorReporter, Logger } from 'n8n-core';
 import { ExecutionCancelledError } from 'n8n-workflow';
-import type { IRun, IWorkflowBase, IWorkflowExecutionDataProcess } from 'n8n-workflow';
+import type {
 	IDataObject,
 	IRun,
 	IWorkflowBase,
 	IWorkflowExecutionDataProcess,
 	IExecuteData,
 	INodeExecutionData,
 } from 'n8n-workflow';
 import assert from 'node:assert';
 import { ActiveExecutions } from '@/active-executions';
 import config from '@/config';
-import { EVALUATION_METRICS_NODE } from '@/constants';
+import { EVALUATION_DATASET_TRIGGER_NODE, EVALUATION_NODE } from '@/constants';
 import { TestCaseExecutionError, TestRunError } from '@/evaluation.ee/test-runner/errors.ee';
 import { Telemetry } from '@/telemetry';
 import { WorkflowRunner } from '@/workflow-runner';
 import { EvaluationMetrics } from './evaluation-metrics.ee';
 export interface TestRunMetadata {
 	testRunId: string;
 	userId: string;
 }
 export interface TestCaseExecutionResult {
 	executionData: IRun;
 	executionId: string;
 }
 /**
- * This service orchestrates the running of test cases.
+ * This service orchestrates the running of evaluations.
- * It uses the test definitions to find
+ * It makes a partial execution of the workflow under test to get the dataset
- * past executions, creates pin data from them,
+ * by running the evaluation trigger node only and capturing the output.
- * and runs the workflow-under-test with the pin data.
+ * Then it iterates over test cases (the items of a list produced by evaluation trigger node)
- * After the workflow-under-test finishes, it runs the evaluation workflow
+ * and runs the workflow under test with each test case as input.
- * with the original and new run data, and collects the metrics.
+ * After running each test case, it collects the metrics from the evaluation nodes output.
 * After all test cases are run, it aggregates the metrics and saves them to the database.
 */
@Service()
 export class TestRunnerService {
@@ -42,57 +57,48 @@ export class TestRunnerService {
 	) {}
 	/**
-	 * Prepares the start nodes and trigger node data props for the `workflowRunner.run` method input.
+	 * Finds the dataset trigger node in the workflow
 	 */
-	private getStartNodesData(
+	private findTriggerNode(workflow: IWorkflowBase) {
-		workflow: IWorkflowBase,
+		return workflow.nodes.find((node) => node.type === EVALUATION_DATASET_TRIGGER_NODE);
 	): Pick<IWorkflowExecutionDataProcess, 'triggerToStartFrom'> {
 		// Find the dataset trigger node
 		// TODO: replace with dataset trigger node
 		const triggerNode = workflow.nodes.find(
 			(node) => node.type === 'n8n-nodes-base.executeWorkflowTrigger',
 		);
 		if (!triggerNode) {
 			// TODO: Change error
 			throw new TestCaseExecutionError('TRIGGER_NO_LONGER_EXISTS');
 		}
 		const triggerToStartFrom = {
 			name: triggerNode.name,
 		};
 		return {
 			triggerToStartFrom,
 		};
 	}
 	/**
-	 * Runs a test case with the given pin data.
+	 * Runs a test case with the given input.
 	 * Injects the input data as pinned data of evaluation trigger node.
 	 * Waits for the workflow under test to finish execution.
 	 */
 	private async runTestCase(
 		workflow: IWorkflowBase,
 		metadata: TestRunMetadata,
 		testCase: INodeExecutionData,
 		abortSignal: AbortSignal,
-	): Promise<IRun | undefined> {
+	): Promise<TestCaseExecutionResult | undefined> {
 		// Do not run if the test run is cancelled
 		if (abortSignal.aborted) {
 			return;
 		}
 		const startNodesData = this.getStartNodesData(workflow);
 		// Prepare the data to run the workflow
 		// Evaluation executions should run the same way as manual,
 		// because they need pinned data and partial execution logic
 		const triggerNode = this.findTriggerNode(workflow);
 		assert(triggerNode);
 		const pinData = {
 			[triggerNode.name]: [testCase],
 		};
 		const data: IWorkflowExecutionDataProcess = {
 			...startNodesData,
 			executionMode: 'evaluation',
-			runData: {},
+			pinData,
 			// pinData,
 			workflowData: workflow,
 			userId: metadata.userId,
 			partialExecutionVersion: 2,
 			triggerToStartFrom: {
 				name: triggerNode.name,
 			},
 		};
 		// When in queue mode, we need to pass additional data to the execution
@@ -109,7 +115,9 @@ export class TestRunnerService {
 				manualData: {
 					userId: metadata.userId,
 					partialExecutionVersion: 2,
-					triggerToStartFrom: startNodesData.triggerToStartFrom,
+					triggerToStartFrom: {
 						name: triggerNode.name,
 					},
 				},
 			};
 		}
@@ -123,7 +131,86 @@ export class TestRunnerService {
 			this.activeExecutions.stopExecution(executionId);
 		});
-		// TODO: Update status of the test run execution
+		// Wait for the execution to finish
 		const executionData = await this.activeExecutions.getPostExecutePromise(executionId);
 		assert(executionData);
 		return { executionId, executionData };
 	}
 	/**
 	 * This method creates a partial workflow execution to run the dataset trigger only
 	 * to get the whole dataset.
 	 */
 	private async runDatasetTrigger(workflow: IWorkflowBase, metadata: TestRunMetadata) {
 		// Prepare the data to run the workflow
 		// Evaluation executions should run the same way as manual,
 		// because they need pinned data and partial execution logic
 		const triggerNode = this.findTriggerNode(workflow);
 		if (!triggerNode) {
 			throw new TestRunError('EVALUATION_TRIGGER_NOT_FOUND');
 		}
 		// Initialize the input data for dataset trigger
 		// Provide a flag indicating that we want to get the whole dataset
 		const nodeExecutionStack: IExecuteData[] = [];
 		nodeExecutionStack.push({
 			node: triggerNode,
 			data: {
 				main: [[{ json: { requestDataset: true } }]],
 			},
 			source: null,
 		});
 		const data: IWorkflowExecutionDataProcess = {
 			destinationNode: triggerNode.name,
 			executionMode: 'manual',
 			runData: {},
 			workflowData: {
 				...workflow,
 				settings: {
 					...workflow.settings,
 					saveManualExecutions: false,
 					saveDataErrorExecution: 'none',
 					saveDataSuccessExecution: 'none',
 					saveExecutionProgress: false,
 				},
 			},
 			userId: metadata.userId,
 			partialExecutionVersion: 2,
 			executionData: {
 				startData: {
 					destinationNode: triggerNode.name,
 				},
 				resultData: {
 					runData: {},
 				},
 				executionData: {
 					contextData: {},
 					metadata: {},
 					nodeExecutionStack,
 					waitingExecution: {},
 					waitingExecutionSource: {},
 				},
 				manualData: {
 					userId: metadata.userId,
 					partialExecutionVersion: 2,
 					triggerToStartFrom: {
 						name: triggerNode.name,
 					},
 				},
 			},
 			triggerToStartFrom: {
 				name: triggerNode.name,
 			},
 		};
 		// Trigger the workflow under test with mocked data
 		const executionId = await this.workflowRunner.run(data);
 		assert(executionId);
 		// Wait for the execution to finish
 		const executePromise = this.activeExecutions.getPostExecutePromise(executionId);
@@ -135,11 +222,47 @@ export class TestRunnerService {
 	 * Get the evaluation metrics nodes from a workflow.
 	 */
 	static getEvaluationMetricsNodes(workflow: IWorkflowBase) {
-		return workflow.nodes.filter((node) => node.type === EVALUATION_METRICS_NODE);
+		return workflow.nodes.filter(
 			(node) => node.type === EVALUATION_NODE && node.parameters.operation === 'setMetrics',
 		);
 	}
 	/**
-	 * Creates a new test run for the given test definition.
+	 * Extract the dataset trigger output
 	 */
 	private extractDatasetTriggerOutput(execution: IRun, workflow: IWorkflowBase) {
 		const triggerNode = this.findTriggerNode(workflow);
 		assert(triggerNode);
 		const triggerOutputData = execution.data.resultData.runData[triggerNode.name][0];
 		const triggerOutput = triggerOutputData?.data?.main?.[0];
 		if (!triggerOutput || triggerOutput.length === 0) {
 			throw new TestRunError('TEST_CASES_NOT_FOUND');
 		}
 		return triggerOutput;
 	}
 	/**
 	 * Evaluation result is collected from all Evaluation Metrics nodes
 	 */
 	private extractEvaluationResult(execution: IRun, workflow: IWorkflowBase): IDataObject {
 		// TODO: Do not fail if not all metric nodes were executed
 		const metricsNodes = TestRunnerService.getEvaluationMetricsNodes(workflow);
 		const metricsRunData = metricsNodes.flatMap(
 			(node) => execution.data.resultData.runData[node.name],
 		);
 		const metricsData = metricsRunData
 			.reverse()
 			.map((data) => data.data?.main?.[0]?.[0]?.json ?? {});
 		const metricsResult = metricsData.reduce((acc, curr) => ({ ...acc, ...curr }), {});
 		return metricsResult;
 	}
 	/**
 	 * Creates a new test run for the given workflow
 	 */
 	async runTest(user: User, workflowId: string): Promise<void> {
 		this.logger.debug('Starting new test run', { workflowId });
@@ -148,7 +271,6 @@ export class TestRunnerService {
 		assert(workflow, 'Workflow not found');
 		// 0. Create new Test Run
 		// TODO: Check that createTestRun takes workflowId as an argument
 		const testRun = await this.testRunRepository.createTestRun(workflowId);
 		assert(testRun, 'Unable to create a test run');
@@ -169,36 +291,8 @@ export class TestRunnerService {
 		const { manager: dbManager } = this.testRunRepository;
 		try {
 			///
 			// 1. Make test cases list
 			///
 			// TODO: Get the test cases from the dataset trigger node
 			const testCases = [{ id: 1 }];
 			this.logger.debug('Found test cases', { count: testCases.length });
 			if (testCases.length === 0) {
 				// TODO: Change error
 				throw new TestRunError('PAST_EXECUTIONS_NOT_FOUND');
 			}
 			// Add all past executions mappings to the test run.
 			// This will be used to track the status of each test case and keep the connection between test run and all related executions (past, current, and evaluation).
 			// await this.testCaseExecutionRepository.createBatch(
 			// 	testRun.id,
 			// 	testCases.map((e) => e.id),
 			// );
 			// TODO: Collect metric names from evaluation nodes of the workflow
 			// const testMetricNames = new Set<string>();
 			// 2. Run over all the test cases
 			// const pastExecutionIds = pastExecutions.map((e) => e.id);
 			// Update test run status
-			// TODO: mark test run as running
+			await this.testRunRepository.markAsRunning(testRun.id);
 			// await this.testRunRepository.markAsRunning(testRun.id);
 			this.telemetry.track('User ran test', {
 				user_id: user.id,
@@ -206,23 +300,39 @@ export class TestRunnerService {
 				workflow_id: workflowId,
 			});
 			///
 			// 1. Make test cases list
 			///
 			const datasetFetchExecution = await this.runDatasetTrigger(workflow, testRunMetadata);
 			assert(datasetFetchExecution);
 			const datasetTriggerOutput = this.extractDatasetTriggerOutput(
 				datasetFetchExecution,
 				workflow,
 			);
 			const testCases = datasetTriggerOutput.map((items) => ({ json: items.json }));
 			this.logger.debug('Found test cases', { count: testCases.length });
 			// Initialize object to collect the results of the evaluation workflow executions
-			// const metrics = new EvaluationMetrics();
+			const metrics = new EvaluationMetrics();
 			///
 			// 2. Run over all the test cases
 			///
-			for (const _testCase of testCases) {
+			for (const testCase of testCases) {
 				if (abortSignal.aborted) {
 					this.logger.debug('Test run was cancelled', {
 						workflowId,
 						// stoppedOn: pastExecutionId,
 					});
 					break;
 				}
 				this.logger.debug('Running test case');
 				const runAt = new Date();
 				try {
 					const testCaseMetadata = {
@@ -230,22 +340,58 @@ export class TestRunnerService {
 					};
 					// Run the test case and wait for it to finish
-					const testCaseExecution = await this.runTestCase(workflow, testCaseMetadata, abortSignal);
+					const testCaseResult = await this.runTestCase(
 						workflow,
 						testCaseMetadata,
 						testCase,
 						abortSignal,
 					);
 					assert(testCaseResult);
 					const { executionId: testCaseExecutionId, executionData: testCaseExecution } =
 						testCaseResult;
 					assert(testCaseExecution);
 					assert(testCaseExecutionId);
 					this.logger.debug('Test case execution finished');
 					// In case of a permission check issue, the test case execution will be undefined.
 					// If that happens, or if the test case execution produced an error, mark the test case as failed.
 					if (!testCaseExecution || testCaseExecution.data.resultData.error) {
-						// TODO: add failed test case execution to DB
+						// Save the failed test case execution in DB
 						await this.testCaseExecutionRepository.createTestCaseExecution({
 							executionId: testCaseExecutionId,
 							testRun: {
 								id: testRun.id,
 							},
 							status: 'error',
 							errorCode: 'FAILED_TO_EXECUTE_WORKFLOW',
 							metrics: {},
 						});
 						continue;
 					}
 					const completedAt = new Date();
-					// TODO: extract metrics
+					const { addedMetrics } = metrics.addResults(
 						this.extractEvaluationResult(testCaseExecution, workflow),
 					);
 					this.logger.debug('Test case metrics extracted', addedMetrics);
 					// Create a new test case execution in DB
-					// TODO: add successful test case execution to DB
+					await this.testCaseExecutionRepository.createTestCaseExecution({
 						executionId: testCaseExecutionId,
 						testRun: {
 							id: testRun.id,
 						},
 						runAt,
 						completedAt,
 						status: 'success',
 						metrics: addedMetrics,
 					});
 				} catch (e) {
 					const completedAt = new Date();
 					// FIXME: this is a temporary log
 					this.logger.error('Test case execution failed', {
 						workflowId,
@@ -255,9 +401,26 @@ export class TestRunnerService {
 					// In case of an unexpected error save it as failed test case execution and continue with the next test case
 					if (e instanceof TestCaseExecutionError) {
-						// TODO: add failed test case execution to DB
+						await this.testCaseExecutionRepository.createTestCaseExecution({
 							testRun: {
 								id: testRun.id,
 							},
 							runAt,
 							completedAt,
 							status: 'error',
 							errorCode: e.code,
 							errorDetails: e.extra as IDataObject,
 						});
 					} else {
-						// TODO: add failed test case execution to DB
+						await this.testCaseExecutionRepository.createTestCaseExecution({
 							testRun: {
 								id: testRun.id,
 							},
 							runAt,
 							completedAt,
 							status: 'error',
 							errorCode: 'UNKNOWN_ERROR',
 						});
 						// Report unexpected errors
 						this.errorReporter.error(e);
@@ -268,16 +431,17 @@ export class TestRunnerService {
 			// Mark the test run as completed or cancelled
 			if (abortSignal.aborted) {
 				await dbManager.transaction(async (trx) => {
-					// TODO: mark test run as cancelled
+					await this.testRunRepository.markAsCancelled(testRun.id, trx);
 					// await this.testRunRepository.markAsCancelled(testRun.id, trx);
 					await this.testCaseExecutionRepository.markAllPendingAsCancelled(testRun.id, trx);
 					testRunEndStatusForTelemetry = 'cancelled';
 				});
 			} else {
-				// const aggregatedMetrics = metrics.getAggregatedMetrics();
+				const aggregatedMetrics = metrics.getAggregatedMetrics();
-				// TODO: mark test run as completed in DB and save metrics
+				this.logger.debug('Aggregated metrics', aggregatedMetrics);
 				await this.testRunRepository.markAsCompleted(testRun.id, aggregatedMetrics);
 				this.logger.debug('Test run finished', { workflowId, testRunId: testRun.id });
@@ -291,16 +455,16 @@ export class TestRunnerService {
 				});
 				await dbManager.transaction(async (trx) => {
-					// TODO: mark test run as cancelled in DB
+					await this.testRunRepository.markAsCancelled(testRun.id, trx);
 					await this.testCaseExecutionRepository.markAllPendingAsCancelled(testRun.id, trx);
 				});
 				testRunEndStatusForTelemetry = 'cancelled';
 			} else if (e instanceof TestRunError) {
-				// TODO: mark test run as error
+				await this.testRunRepository.markAsError(testRun.id, e.code, e.extra as IDataObject);
 				testRunEndStatusForTelemetry = 'error';
 			} else {
-				// TODO: mark test run as error
+				await this.testRunRepository.markAsError(testRun.id, 'UNKNOWN_ERROR');
 				testRunEndStatusForTelemetry = 'error';
 				throw e;
 			}
@@ -338,7 +502,7 @@ export class TestRunnerService {
 			// If there is no abort controller - just mark the test run and all its' pending test case executions as cancelled
 			await dbManager.transaction(async (trx) => {
-				// TODO: mark test run as cancelled in DB
+				await this.testRunRepository.markAsCancelled(testRunId, trx);
 				await this.testCaseExecutionRepository.markAllPendingAsCancelled(testRunId, trx);
 			});
 		}
--- a/packages/cli/src/evaluation.ee/test-runner/utils.ee.ts
+++ b/packages/cli/src/evaluation.ee/test-runner/utils.ee.ts
@@ -1,67 +0,0 @@
 import assert from 'assert';
 import type { IRunExecutionData, IPinData, IWorkflowBase } from 'n8n-workflow';
 import { TestCaseExecutionError } from '@/evaluation.ee/test-runner/errors.ee';
 // Entity representing a node in a workflow under test, for which data should be mocked during test execution
 export type MockedNodeItem = {
 	name?: string;
 	id: string;
 };
 /**
 * Extracts the execution data from the past execution
 * and creates a pin data object from it for the given workflow.
 * It uses a list of mocked nodes defined in a test definition
 * to decide which nodes to pin.
 */
 export function createPinData(
 	workflow: IWorkflowBase,
 	mockedNodes: MockedNodeItem[],
 	executionData: IRunExecutionData,
 	pastWorkflowData?: IWorkflowBase,
 ) {
 	const pinData = {} as IPinData;
 	const workflowNodeIds = new Map(workflow.nodes.map((node) => [node.id, node.name]));
 	// If the past workflow data is provided, use it to create a map between node IDs and node names
 	const pastWorkflowNodeIds = new Map<string, string>();
 	if (pastWorkflowData) {
 		for (const node of pastWorkflowData.nodes) {
 			pastWorkflowNodeIds.set(node.id, node.name);
 		}
 	}
 	for (const mockedNode of mockedNodes) {
 		assert(mockedNode.id, 'Mocked node ID is missing');
 		const nodeName = workflowNodeIds.get(mockedNode.id);
 		// If mocked node is still present in the workflow
 		if (nodeName) {
 			// Try to restore node name from past execution data (it might have been renamed between past execution and up-to-date workflow)
 			const pastNodeName = pastWorkflowNodeIds.get(mockedNode.id) ?? nodeName;
 			const nodeData = executionData.resultData.runData[pastNodeName];
 			if (nodeData?.[0]?.data?.main?.[0]) {
 				pinData[nodeName] = nodeData[0]?.data?.main?.[0];
 			} else {
 				throw new TestCaseExecutionError('MOCKED_NODE_DOES_NOT_EXIST');
 			}
 		}
 	}
 	return pinData;
 }
 /**
 * Returns the trigger node of the past execution.
 * The trigger node is the node that has no source and has run data.
 */
 export function getPastExecutionTriggerNode(executionData: IRunExecutionData) {
 	return Object.keys(executionData.resultData.runData).find((nodeName) => {
 		const data = executionData.resultData.runData[nodeName];
 		return !data[0].source || data[0].source.length === 0 || data[0].source[0] === null;
 	});
 }
--- a/packages/cli/src/evaluation.ee/test-runs.controller.ee.ts
+++ b/packages/cli/src/evaluation.ee/test-runs.controller.ee.ts
@@ -65,7 +65,7 @@ export class TestRunsController {
 		}
 	}
-	@Get('/:workflowId/test-runs/:id/cases')
+	@Get('/:workflowId/test-runs/:id/test-cases')
 	async getTestCases(req: TestRunsRequest.GetCases) {
 		await this.getTestRun(req.params.id, req.params.workflowId, req.user);
--- a/packages/cli/test/integration/evaluation/test-runs.api.test.ts
+++ b/packages/cli/test/integration/evaluation/test-runs.api.test.ts
@@ -6,6 +6,7 @@ import { mockInstance } from 'n8n-core/test/utils';
 import type { IWorkflowBase } from 'n8n-workflow';
 import { TestRunnerService } from '@/evaluation.ee/test-runner/test-runner.service.ee';
 import { createTestRun, createTestCaseExecution } from '@test-integration/db/evaluation';
 import { createUserShell } from '@test-integration/db/users';
 import { createWorkflow } from '@test-integration/db/workflows';
 import * as testDb from '@test-integration/test-db';
@@ -18,6 +19,7 @@ let otherWorkflow: IWorkflowBase;
 let ownerShell: User;
 const testRunner = mockInstance(TestRunnerService);
 let testRunRepository: TestRunRepository;
 const testServer = utils.setupTestServer({
 	endpointGroups: ['workflows', 'evaluation'],
@@ -30,7 +32,9 @@ beforeAll(async () => {
 });
 beforeEach(async () => {
-	await testDb.truncate(['TestRun', 'WorkflowEntity', 'SharedWorkflow']);
+	await testDb.truncate(['TestRun', 'TestCaseExecution', 'WorkflowEntity', 'SharedWorkflow']);
 	testRunRepository = Container.get(TestRunRepository);
 	workflowUnderTest = await createWorkflow({ name: 'workflow-under-test' }, ownerShell);
 	otherWorkflow = await createWorkflow({ name: 'other-workflow' });
@@ -51,17 +55,15 @@ describe('GET /workflows/:workflowId/test-runs', () => {
 	// 	expect(resp.statusCode).toBe(404);
 	// });
-	// TODO: replace with workflow that is not accessible to the user
+	test('should return 404 if user does not have access to workflow', async () => {
-	// test('should return 404 if user does not have access to test definition', async () => {
+		const testRun = await testRunRepository.createTestRun(otherWorkflow.id);
 	// 	const resp = await authOwnerAgent.get(
 	// 		`/evaluation/test-definitions/${otherTestDefinition.id}/runs`,
 	// 	);
 	//
 	// 	expect(resp.statusCode).toBe(404);
 	// });
-	test('should retrieve list of runs for a workflow', async () => {
+		const resp = await authOwnerAgent.get(`/workflows/${otherWorkflow.id}/test-runs/${testRun.id}`);
-		const testRunRepository = Container.get(TestRunRepository);
+
 		expect(resp.statusCode).toBe(404);
 	});
 	test('should retrieve list of test runs for a workflow', async () => {
 		const testRun = await testRunRepository.createTestRun(workflowUnderTest.id);
 		const resp = await authOwnerAgent.get(`/workflows/${workflowUnderTest.id}/test-runs`);
@@ -78,7 +80,6 @@ describe('GET /workflows/:workflowId/test-runs', () => {
 	});
 	test('should retrieve list of test runs for a workflow with pagination', async () => {
 		const testRunRepository = Container.get(TestRunRepository);
 		const testRun1 = await testRunRepository.createTestRun(workflowUnderTest.id);
 		// Mark as running just to make a slight delay between the runs
 		await testRunRepository.markAsRunning(testRun1.id);
@@ -112,11 +113,34 @@ describe('GET /workflows/:workflowId/test-runs', () => {
 			}),
 		]);
 	});
 	test('should retrieve list of test runs for a shared workflow', async () => {
 		const memberShell = await createUserShell('global:member');
 		const memberAgent = testServer.authAgentFor(memberShell);
 		const memberPersonalProject = await Container.get(
 			ProjectRepository,
 		).getPersonalProjectForUserOrFail(memberShell.id);
 		// Share workflow with a member
 		const sharingResponse = await authOwnerAgent
 			.put(`/workflows/${workflowUnderTest.id}/share`)
 			.send({ shareWithIds: [memberPersonalProject.id] });
 		expect(sharingResponse.statusCode).toBe(200);
 		// Create a test run for the shared workflow
 		await testRunRepository.createTestRun(workflowUnderTest.id);
 		// Check if member can retrieve the test runs of a shared workflow
 		const resp = await memberAgent.get(`/workflows/${workflowUnderTest.id}/test-runs`);
 		expect(resp.statusCode).toBe(200);
 		expect(resp.body.data).toHaveLength(1);
 	});
 });
 describe('GET /workflows/:workflowId/test-runs/:id', () => {
 	test('should retrieve specific test run for a workflow', async () => {
 		const testRunRepository = Container.get(TestRunRepository);
 		const testRun = await testRunRepository.createTestRun(workflowUnderTest.id);
 		const resp = await authOwnerAgent.get(
@@ -141,7 +165,6 @@ describe('GET /workflows/:workflowId/test-runs/:id', () => {
 	});
 	test('should return 404 if user does not have access to the workflow', async () => {
 		const testRunRepository = Container.get(TestRunRepository);
 		const testRun = await testRunRepository.createTestRun(otherWorkflow.id);
 		const resp = await authOwnerAgent.get(`/workflows/${otherWorkflow.id}/test-runs/${testRun.id}`);
@@ -164,7 +187,6 @@ describe('GET /workflows/:workflowId/test-runs/:id', () => {
 		expect(sharingResponse.statusCode).toBe(200);
 		// Create a test run for the shared workflow
 		const testRunRepository = Container.get(TestRunRepository);
 		const testRun = await testRunRepository.createTestRun(workflowUnderTest.id);
 		// Check if member can retrieve the test run of a shared workflow
@@ -181,9 +203,8 @@ describe('GET /workflows/:workflowId/test-runs/:id', () => {
 	});
 });
-describe('DELETE /evaluation/test-definitions/:testDefinitionId/runs/:id', () => {
+describe('DELETE /workflows/:workflowId/test-runs/:id', () => {
-	test('should delete test run for a test definition', async () => {
+	test('should delete test run of a workflow', async () => {
 		const testRunRepository = Container.get(TestRunRepository);
 		const testRun = await testRunRepository.createTestRun(workflowUnderTest.id);
 		const resp = await authOwnerAgent.delete(
@@ -203,8 +224,7 @@ describe('DELETE /evaluation/test-definitions/:testDefinitionId/runs/:id', () =>
 		expect(resp.statusCode).toBe(404);
 	});
-	test('should return 404 if user does not have access to test definition', async () => {
+	test('should return 404 if user does not have access to workflow', async () => {
 		const testRunRepository = Container.get(TestRunRepository);
 		const testRun = await testRunRepository.createTestRun(otherWorkflow.id);
 		const resp = await authOwnerAgent.delete(
@@ -215,9 +235,8 @@ describe('DELETE /evaluation/test-definitions/:testDefinitionId/runs/:id', () =>
 	});
 });
-describe('POST /evaluation/test-definitions/:testDefinitionId/runs/:id/cancel', () => {
+describe('POST /workflows/:workflowId/test-runs/:id/cancel', () => {
 	test('should cancel test run', async () => {
 		const testRunRepository = Container.get(TestRunRepository);
 		const testRun = await testRunRepository.createTestRun(workflowUnderTest.id);
 		jest.spyOn(testRunRepository, 'markAsCancelled');
@@ -247,7 +266,6 @@ describe('POST /evaluation/test-definitions/:testDefinitionId/runs/:id/cancel',
 	});
 	test('should return 404 if user does not have access to the workflow', async () => {
 		const testRunRepository = Container.get(TestRunRepository);
 		const testRun = await testRunRepository.createTestRun(otherWorkflow.id);
 		const resp = await authOwnerAgent.post(
@@ -257,3 +275,111 @@ describe('POST /evaluation/test-definitions/:testDefinitionId/runs/:id/cancel',
 		expect(resp.statusCode).toBe(404);
 	});
 });
 describe('GET /workflows/:workflowId/test-runs/:id/test-cases', () => {
 	test('should retrieve test cases for a specific test run', async () => {
 		// Create a test run
 		const testRun = await createTestRun(workflowUnderTest.id);
 		// Create some test case executions for this test run
 		await createTestCaseExecution(testRun.id, {
 			status: 'success',
 			runAt: new Date(),
 			completedAt: new Date(),
 			metrics: { accuracy: 0.95 },
 		});
 		await createTestCaseExecution(testRun.id, {
 			status: 'error',
 			errorCode: 'UNKNOWN_ERROR',
 			runAt: new Date(),
 			completedAt: new Date(),
 		});
 		const resp = await authOwnerAgent.get(
 			`/workflows/${workflowUnderTest.id}/test-runs/${testRun.id}/test-cases`,
 		);
 		expect(resp.statusCode).toBe(200);
 		expect(resp.body.data).toHaveLength(2);
 		expect(resp.body.data).toEqual(
 			expect.arrayContaining([
 				expect.objectContaining({
 					status: 'success',
 					metrics: { accuracy: 0.95 },
 				}),
 				expect.objectContaining({
 					status: 'error',
 					errorCode: 'UNKNOWN_ERROR',
 				}),
 			]),
 		);
 	});
 	test('should return empty array when no test cases exist for a test run', async () => {
 		// Create a test run with no test cases
 		const testRun = await createTestRun(workflowUnderTest.id);
 		const resp = await authOwnerAgent.get(
 			`/workflows/${workflowUnderTest.id}/test-runs/${testRun.id}/test-cases`,
 		);
 		expect(resp.statusCode).toBe(200);
 		expect(resp.body.data).toEqual([]);
 	});
 	test('should return 404 if test run does not exist', async () => {
 		const resp = await authOwnerAgent.get(
 			`/workflows/${workflowUnderTest.id}/test-runs/non-existent-id/test-cases`,
 		);
 		expect(resp.statusCode).toBe(404);
 	});
 	test('should return 404 if user does not have access to the workflow', async () => {
 		const testRun = await createTestRun(otherWorkflow.id);
 		const resp = await authOwnerAgent.get(
 			`/workflows/${otherWorkflow.id}/test-runs/${testRun.id}/test-cases`,
 		);
 		expect(resp.statusCode).toBe(404);
 	});
 	test('should return test cases for a shared workflow', async () => {
 		const memberShell = await createUserShell('global:member');
 		const memberAgent = testServer.authAgentFor(memberShell);
 		const memberPersonalProject = await Container.get(
 			ProjectRepository,
 		).getPersonalProjectForUserOrFail(memberShell.id);
 		// Share workflow with a member
 		const sharingResponse = await authOwnerAgent
 			.put(`/workflows/${workflowUnderTest.id}/share`)
 			.send({ shareWithIds: [memberPersonalProject.id] });
 		expect(sharingResponse.statusCode).toBe(200);
 		// Create a test run with test cases
 		const testRun = await createTestRun(workflowUnderTest.id);
 		await createTestCaseExecution(testRun.id, {
 			status: 'success',
 			runAt: new Date(),
 			completedAt: new Date(),
 			metrics: { precision: 0.87 },
 		});
 		// Check if member can retrieve the test cases of a shared workflow
 		const resp = await memberAgent.get(
 			`/workflows/${workflowUnderTest.id}/test-runs/${testRun.id}/test-cases`,
 		);
 		expect(resp.statusCode).toBe(200);
 		expect(resp.body.data).toHaveLength(1);
 		expect(resp.body.data[0]).toMatchObject({
 			status: 'success',
 			metrics: { precision: 0.87 },
 		});
 	});
 });
--- a/packages/cli/test/integration/shared/db/evaluation.ts
+++ b/packages/cli/test/integration/shared/db/evaluation.ts
@@ -0,0 +1,68 @@
 import { TestRunRepository, TestCaseExecutionRepository } from '@n8n/db';
 import type { TestRun } from '@n8n/db';
 import type { TestCaseExecution } from '@n8n/db';
 import type { AggregatedTestRunMetrics } from '@n8n/db';
 import type { TestCaseExecutionErrorCode, TestRunErrorCode } from '@n8n/db';
 import { Container } from '@n8n/di';
 import type { IDataObject } from 'n8n-workflow';
 /**
 * Creates a test run for a workflow
 */
 export const createTestRun = async (
 	workflowId: string,
 	options: {
 		status?: TestRun['status'];
 		runAt?: Date | null;
 		completedAt?: Date | null;
 		metrics?: AggregatedTestRunMetrics;
 		errorCode?: TestRunErrorCode;
 		errorDetails?: IDataObject;
 	} = {},
 ) => {
 	const testRunRepository = Container.get(TestRunRepository);
 	const testRun = testRunRepository.create({
 		workflow: { id: workflowId },
 		status: options.status ?? 'new',
 		runAt: options.runAt ?? null,
 		completedAt: options.completedAt ?? null,
 		metrics: options.metrics ?? {},
 		errorCode: options.errorCode,
 		errorDetails: options.errorDetails,
 	});
 	return await testRunRepository.save(testRun);
 };
 /**
 * Creates a test case execution for a test run
 */
 export const createTestCaseExecution = async (
 	testRunId: string,
 	options: {
 		status?: TestCaseExecution['status'];
 		runAt?: Date | null;
 		completedAt?: Date | null;
 		metrics?: Record<string, number>;
 		errorCode?: TestCaseExecutionErrorCode;
 		errorDetails?: IDataObject;
 		executionId?: string;
 		pastExecutionId?: string;
 	} = {},
 ) => {
 	const testCaseExecutionRepository = Container.get(TestCaseExecutionRepository);
 	const testCaseExecution = testCaseExecutionRepository.create({
 		testRun: { id: testRunId },
 		status: options.status ?? 'success',
 		runAt: options.runAt ?? null,
 		completedAt: options.completedAt ?? null,
 		metrics: options.metrics ?? {},
 		errorCode: options.errorCode,
 		errorDetails: options.errorDetails,
 		executionId: options.executionId,
 	});
 	return await testCaseExecutionRepository.save(testCaseExecution);
 };
--- a/packages/cli/test/integration/test-run.repository.ee.test.ts
+++ b/packages/cli/test/integration/test-run.repository.ee.test.ts
@@ -0,0 +1,66 @@
 import { TestRunRepository } from '@n8n/db';
 import type { IWorkflowDb, WorkflowEntity } from '@n8n/db';
 import { Container } from '@n8n/di';
 import { createTestCaseExecution, createTestRun } from '@test-integration/db/evaluation';
 import { createWorkflow } from './shared/db/workflows';
 import * as testDb from './shared/test-db';
 describe('TestRunRepository', () => {
 	let testRunRepository: TestRunRepository;
 	beforeAll(async () => {
 		await testDb.init();
 		testRunRepository = Container.get(TestRunRepository);
 	});
 	afterEach(async () => {
 		await testDb.truncate(['User', 'WorkflowEntity', 'TestRun', 'TestCaseExecution']);
 	});
 	afterAll(async () => {
 		await testDb.terminate();
 	});
 	describe('getTestRunSummaryById', () => {
 		let workflow: IWorkflowDb & WorkflowEntity;
 		beforeAll(async () => {
 			workflow = await createWorkflow();
 		});
 		it('should return the final result of a test run', async () => {
 			const testRun = await createTestRun(workflow.id, {
 				status: 'completed',
 				runAt: new Date(),
 				completedAt: new Date(),
 				metrics: { total: 1, success: 1 },
 			});
 			await Promise.all([
 				createTestCaseExecution(testRun.id, {
 					status: 'success',
 				}),
 				createTestCaseExecution(testRun.id, {
 					status: 'success',
 				}),
 			]);
 			const result = await testRunRepository.getTestRunSummaryById(testRun.id);
 			expect(result).toEqual(
 				expect.objectContaining({
 					id: testRun.id,
 					workflowId: workflow.id,
 					status: 'completed',
 					finalResult: 'success',
 					runAt: expect.any(Date),
 					completedAt: expect.any(Date),
 					metrics: { total: 1, success: 1 },
 				}),
 			);
 		});
 	});
 });
--- a/packages/nodes-base/nodes/EvaluationMetrics/EvaluationMetrics.node.json
+++ b/packages/nodes-base/nodes/EvaluationMetrics/EvaluationMetrics.node.json
@@ -1,14 +0,0 @@
 {
 	"node": "n8n-nodes-base.evaluationMetrics",
 	"nodeVersion": "1.0",
 	"codexVersion": "1.0",
 	"categories": ["Evaluation", "Core Nodes"],
 	"resources": {
 		"primaryDocumentation": [
 			{
 				"url": "https://docs.n8n.io/integrations/builtin/core-nodes/n8n-nodes-base.evaluationmetrics/"
 			}
 		]
 	},
 	"alias": ["Metric"]
 }
--- a/packages/nodes-base/nodes/EvaluationMetrics/EvaluationMetrics.node.ts
+++ b/packages/nodes-base/nodes/EvaluationMetrics/EvaluationMetrics.node.ts
@@ -1,109 +0,0 @@
 import type {
 	AssignmentCollectionValue,
 	FieldType,
 	IExecuteFunctions,
 	INodeExecutionData,
 	INodeType,
 	INodeTypeDescription,
 } from 'n8n-workflow';
 import { NodeConnectionTypes, NodeOperationError } from 'n8n-workflow';
 import { composeReturnItem, validateEntry } from '../Set/v2/helpers/utils';
 export class EvaluationMetrics implements INodeType {
 	description: INodeTypeDescription = {
 		displayName: 'Evaluation Metrics',
 		name: 'evaluationMetrics',
 		icon: 'fa:check-double',
 		group: ['input'],
 		iconColor: 'light-green',
 		version: 1,
 		description: 'Define the metrics returned for workflow evaluation',
 		defaults: {
 			name: 'Evaluation Metrics',
 			color: '#29A568',
 		},
 		inputs: [NodeConnectionTypes.Main],
 		outputs: [NodeConnectionTypes.Main],
 		properties: [
 			{
 				displayName:
 					"Define the evaluation metrics returned in your report. Only numeric values are supported. <a href='https://docs.n8n.io/integrations/builtin/core-nodes/n8n-nodes-base.evaluationmetric/' target='_blank'>More Info</a>",
 				name: 'notice',
 				type: 'notice',
 				default: '',
 			},
 			{
 				displayName: 'Metrics to Return',
 				name: 'metrics',
 				type: 'assignmentCollection',
 				default: {
 					assignments: [
 						{
 							name: '',
 							value: '',
 							type: 'number',
 						},
 					],
 				},
 				typeOptions: {
 					assignment: {
 						disableType: true,
 						defaultType: 'number',
 					},
 				},
 			},
 		],
 	};
 	async execute(this: IExecuteFunctions): Promise<INodeExecutionData[][]> {
 		const items = this.getInputData();
 		const metrics: INodeExecutionData[] = [];
 		for (let i = 0; i < items.length; i++) {
 			const dataToSave = this.getNodeParameter('metrics', i, {}) as AssignmentCollectionValue;
 			const newItem: INodeExecutionData = {
 				json: {},
 				pairedItem: { item: i },
 			};
 			const newData = Object.fromEntries(
 				(dataToSave?.assignments ?? []).map((assignment) => {
 					const assignmentValue =
 						typeof assignment.value === 'number' ? assignment.value : Number(assignment.value);
 					if (isNaN(assignmentValue)) {
 						throw new NodeOperationError(
 							this.getNode(),
 							`Invalid numeric value: "${assignment.value}". Please provide a valid number.`,
 						);
 					}
 					const { name, value } = validateEntry(
 						assignment.name,
 						assignment.type as FieldType,
 						assignmentValue,
 						this.getNode(),
 						i,
 						false,
 						1,
 					);
 					return [name, value];
 				}),
 			);
 			const returnItem = composeReturnItem.call(
 				this,
 				i,
 				newItem,
 				newData,
 				{ dotNotation: false, include: 'none' },
 				1,
 			);
 			metrics.push(returnItem);
 		}
 		return [metrics];
 	}
 }
--- a/packages/nodes-base/nodes/EvaluationMetrics/tests/EvaluationMetrics.node.test.ts
+++ b/packages/nodes-base/nodes/EvaluationMetrics/tests/EvaluationMetrics.node.test.ts
@@ -1,111 +0,0 @@
 import { mock } from 'jest-mock-extended';
 import type { INodeTypes, IExecuteFunctions, AssignmentCollectionValue } from 'n8n-workflow';
 import { NodeOperationError } from 'n8n-workflow';
 import { EvaluationMetrics } from '../EvaluationMetrics.node';
 describe('EvaluationMetrics Node', () => {
 	const nodeTypes = mock<INodeTypes>();
 	const evaluationMetricsNode = new EvaluationMetrics();
 	let mockExecuteFunction: IExecuteFunctions;
 	function getMockExecuteFunction(metrics: AssignmentCollectionValue['assignments']) {
 		return {
 			getInputData: jest.fn().mockReturnValue([{}]),
 			getNodeParameter: jest.fn().mockReturnValueOnce({
 				assignments: metrics,
 			}),
 			getNode: jest.fn().mockReturnValue({
 				typeVersion: 1,
 			}),
 		} as unknown as IExecuteFunctions;
 	}
 	beforeAll(() => {
 		mockExecuteFunction = getMockExecuteFunction([
 			{
 				id: '1',
 				name: 'Accuracy',
 				value: 0.95,
 				type: 'number',
 			},
 			{
 				id: '2',
 				name: 'Latency',
 				value: 100,
 				type: 'number',
 			},
 		]);
 		nodeTypes.getByName.mockReturnValue(evaluationMetricsNode);
 		jest.clearAllMocks();
 	});
 	describe('execute', () => {
 		it('should output the defined metrics', async () => {
 			const result = await evaluationMetricsNode.execute.call(mockExecuteFunction);
 			expect(result).toHaveLength(1);
 			expect(result[0]).toHaveLength(1);
 			const outputItem = result[0][0].json;
 			expect(outputItem).toEqual({
 				Accuracy: 0.95,
 				Latency: 100,
 			});
 		});
 		it('should handle no metrics defined', async () => {
 			const result = await evaluationMetricsNode.execute.call(mockExecuteFunction);
 			expect(result).toHaveLength(1);
 			expect(result[0]).toHaveLength(1);
 			expect(result[0][0].json).toEqual({});
 		});
 		it('should convert string values to numbers', async () => {
 			const mockExecuteWithStringValues = getMockExecuteFunction([
 				{
 					id: '1',
 					name: 'Accuracy',
 					value: '0.95',
 					type: 'number',
 				},
 				{
 					id: '2',
 					name: 'Latency',
 					value: '100',
 					type: 'number',
 				},
 			]);
 			const result = await evaluationMetricsNode.execute.call(mockExecuteWithStringValues);
 			expect(result).toHaveLength(1);
 			expect(result[0]).toHaveLength(1);
 			const outputItem = result[0][0].json;
 			expect(outputItem).toEqual({
 				Accuracy: 0.95,
 				Latency: 100,
 			});
 		});
 		it('should throw error for non-numeric string values', async () => {
 			const mockExecuteWithInvalidValue = getMockExecuteFunction([
 				{
 					id: '1',
 					name: 'Accuracy',
 					value: 'not-a-number',
 					type: 'number',
 				},
 			]);
 			await expect(evaluationMetricsNode.execute.call(mockExecuteWithInvalidValue)).rejects.toThrow(
 				NodeOperationError,
 			);
 		});
 	});
 });
--- a/packages/nodes-base/package.json
+++ b/packages/nodes-base/package.json
@@ -514,7 +514,6 @@
      "dist/nodes/ExecuteWorkflow/ExecuteWorkflow/ExecuteWorkflow.node.js",
      "dist/nodes/ExecuteWorkflow/ExecuteWorkflowTrigger/ExecuteWorkflowTrigger.node.js",
      "dist/nodes/ExecutionData/ExecutionData.node.js",
      "dist/nodes/EvaluationMetrics/EvaluationMetrics.node.js",
      "dist/nodes/Facebook/FacebookGraphApi.node.js",
      "dist/nodes/Facebook/FacebookTrigger.node.js",
      "dist/nodes/FacebookLeadAds/FacebookLeadAdsTrigger.node.js",