feat(core): Evaluations backend (no-changelog) (#15542)

Co-authored-by: Yiorgis Gozadinos <yiorgis@n8n.io> Co-authored-by: Mutasem Aldmour <4711238+mutdmour@users.noreply.github.com>
2025-12-16 17:46:45 +00:00 · 2025-05-23 09:05:13 +02:00
parent cf8b611d14
commit fa620f2d5b
18 changed files with 1266 additions and 601 deletions
--- a/packages/@n8n/db/src/entities/types-db.ts
+++ b/packages/@n8n/db/src/entities/types-db.ts
@@ -283,19 +283,15 @@ export type FolderWithWorkflowAndSubFolderCountAndPath = FolderWithWorkflowAndSu
 export type TestRunFinalResult = 'success' | 'error' | 'warning';

 export type TestRunErrorCode =
-	| 'PAST_EXECUTIONS_NOT_FOUND'
-	| 'EVALUATION_WORKFLOW_NOT_FOUND'
+	| 'TEST_CASES_NOT_FOUND'
 	| 'INTERRUPTED'
-	| 'UNKNOWN_ERROR';
+	| 'UNKNOWN_ERROR'
+	| 'EVALUATION_TRIGGER_NOT_FOUND';

 export type TestCaseExecutionErrorCode =
-	| 'MOCKED_NODE_DOES_NOT_EXIST'
-	| 'TRIGGER_NO_LONGER_EXISTS'
+	| 'MOCKED_NODE_NOT_FOUND' // This will be used when node mocking will be implemented
 	| 'FAILED_TO_EXECUTE_WORKFLOW'
-	| 'EVALUATION_WORKFLOW_DOES_NOT_EXIST'
-	| 'FAILED_TO_EXECUTE_EVALUATION_WORKFLOW'
 	| 'INVALID_METRICS'
-	| 'PAYLOAD_LIMIT_EXCEEDED'
 	| 'UNKNOWN_ERROR';

 export type AggregatedTestRunMetrics = Record<string, number | boolean>;
--- a/packages/@n8n/db/src/repositories/test-case-execution.repository.ee.ts
+++ b/packages/@n8n/db/src/repositories/test-case-execution.repository.ee.ts
@@ -32,6 +32,12 @@ export class TestCaseExecutionRepository extends Repository<TestCaseExecution> {
 		super(TestCaseExecution, dataSource.manager);
 	}

+	async createTestCaseExecution(testCaseExecutionProps: DeepPartial<TestCaseExecution>) {
+		const testCaseExecution = this.create(testCaseExecutionProps);
+
+		return await this.save(testCaseExecution);
+	}
+
 	async createBatch(testRunId: string, testCases: string[]) {
 		const mappings = this.create(
 			testCases.map<DeepPartial<TestCaseExecution>>(() => ({
--- a/packages/@n8n/db/src/repositories/test-run.repository.ee.ts
+++ b/packages/@n8n/db/src/repositories/test-run.repository.ee.ts
@@ -46,7 +46,7 @@ export class TestRunRepository extends Repository<TestRun> {

 	async markAsCancelled(id: string, trx?: EntityManager) {
 		trx = trx ?? this.manager;
-		return await trx.update(TestRun, id, { status: 'cancelled' });
+		return await trx.update(TestRun, id, { status: 'cancelled', completedAt: new Date() });
 	}

 	async markAsError(id: string, errorCode: TestRunErrorCode, errorDetails?: IDataObject) {
@@ -54,13 +54,14 @@ export class TestRunRepository extends Repository<TestRun> {
 			status: 'error',
 			errorCode,
 			errorDetails,
+			completedAt: new Date(),
 		});
 	}

 	async markAllIncompleteAsFailed() {
 		return await this.update(
 			{ status: In(['new', 'running']) },
-			{ status: 'error', errorCode: 'INTERRUPTED' },
+			{ status: 'error', errorCode: 'INTERRUPTED', completedAt: new Date() },
 		);
 	}

--- a/packages/cli/src/constants.ts
+++ b/packages/cli/src/constants.ts
@@ -158,4 +158,5 @@ export const WsStatusCodes = {

 export const FREE_AI_CREDITS_CREDENTIAL_NAME = 'n8n free OpenAI API credits';

-export const EVALUATION_METRICS_NODE = `${NODE_PACKAGE_PREFIX}base.evaluationMetrics`;
+export const EVALUATION_NODE = `${NODE_PACKAGE_PREFIX}base.evaluation`;
+export const EVALUATION_DATASET_TRIGGER_NODE = `${NODE_PACKAGE_PREFIX}base.evaluationTrigger`;
--- a/packages/cli/src/evaluation.ee/test-runner/tests/create-pin-data.ee.test.ts
+++ b/packages/cli/src/evaluation.ee/test-runner/tests/create-pin-data.ee.test.ts
@@ -1,104 +0,0 @@
-import { readFileSync } from 'fs';
-import path from 'path';
-
-import { createPinData } from '../utils.ee';
-
-const wfUnderTestJson = JSON.parse(
-	readFileSync(path.join(__dirname, './mock-data/workflow.under-test.json'), { encoding: 'utf-8' }),
-);
-
-const wfUnderTestRenamedNodesJson = JSON.parse(
-	readFileSync(path.join(__dirname, './mock-data/workflow.under-test-renamed-nodes.json'), {
-		encoding: 'utf-8',
-	}),
-);
-
-const executionDataJson = JSON.parse(
-	readFileSync(path.join(__dirname, './mock-data/execution-data.json'), { encoding: 'utf-8' }),
-);
-
-describe('createPinData', () => {
-	test('should create pin data from past execution data', () => {
-		const mockedNodes = [
-			{
-				id: '72256d90-3a67-4e29-b032-47df4e5768af',
-				name: 'When clicking ‘Execute workflow’',
-			},
-		];
-
-		const pinData = createPinData(wfUnderTestJson, mockedNodes, executionDataJson);
-
-		expect(pinData).toEqual(
-			expect.objectContaining({
-				'When clicking ‘Execute workflow’': expect.anything(),
-			}),
-		);
-	});
-
-	test('should not create pin data for non-existing mocked nodes', () => {
-		const mockedNodes = ['non-existing-ID'].map((id) => ({ id }));
-
-		const pinData = createPinData(wfUnderTestJson, mockedNodes, executionDataJson);
-
-		expect(pinData).toEqual({});
-	});
-
-	test('should create pin data for all mocked nodes', () => {
-		const mockedNodes = [
-			{
-				id: '72256d90-3a67-4e29-b032-47df4e5768af', // 'When clicking ‘Execute workflow’'
-			},
-			{
-				id: '319f29bc-1dd4-4122-b223-c584752151a4', // 'Edit Fields'
-			},
-			{
-				id: 'd2474215-63af-40a4-a51e-0ea30d762621', // 'Code'
-			},
-		];
-
-		const pinData = createPinData(wfUnderTestJson, mockedNodes, executionDataJson);
-
-		expect(pinData).toEqual(
-			expect.objectContaining({
-				'When clicking ‘Execute workflow’': expect.anything(),
-				'Edit Fields': expect.anything(),
-				Code: expect.anything(),
-			}),
-		);
-	});
-
-	test('should return empty object if no mocked nodes are provided', () => {
-		const pinData = createPinData(wfUnderTestJson, [], executionDataJson);
-
-		expect(pinData).toEqual({});
-	});
-
-	test('should create pin data for all mocked nodes with renamed nodes', () => {
-		const mockedNodes = [
-			{
-				id: '72256d90-3a67-4e29-b032-47df4e5768af', // 'Manual Run'
-			},
-			{
-				id: '319f29bc-1dd4-4122-b223-c584752151a4', // 'Set Attribute'
-			},
-			{
-				id: 'd2474215-63af-40a4-a51e-0ea30d762621', // 'Code'
-			},
-		];
-
-		const pinData = createPinData(
-			wfUnderTestRenamedNodesJson,
-			mockedNodes,
-			executionDataJson,
-			wfUnderTestJson, // Pass original workflow JSON as pastWorkflowData
-		);
-
-		expect(pinData).toEqual(
-			expect.objectContaining({
-				'Manual Run': expect.anything(),
-				'Set Attribute': expect.anything(),
-				Code: expect.anything(),
-			}),
-		);
-	});
-});
--- a/packages/cli/src/evaluation.ee/test-runner/tests/get-start-node.ee.test.ts
+++ b/packages/cli/src/evaluation.ee/test-runner/tests/get-start-node.ee.test.ts
@@ -1,40 +0,0 @@
-import { readFileSync } from 'fs';
-import path from 'path';
-
-import { getPastExecutionTriggerNode } from '../utils.ee';
-
-const executionDataJson = JSON.parse(
-	readFileSync(path.join(__dirname, './mock-data/execution-data.json'), { encoding: 'utf-8' }),
-);
-
-const executionDataMultipleTriggersJson = JSON.parse(
-	readFileSync(path.join(__dirname, './mock-data/execution-data.multiple-triggers.json'), {
-		encoding: 'utf-8',
-	}),
-);
-
-const executionDataMultipleTriggersJson2 = JSON.parse(
-	readFileSync(path.join(__dirname, './mock-data/execution-data.multiple-triggers-2.json'), {
-		encoding: 'utf-8',
-	}),
-);
-
-describe('getPastExecutionStartNode', () => {
-	test('should return the start node of the past execution', () => {
-		const startNode = getPastExecutionTriggerNode(executionDataJson);
-
-		expect(startNode).toEqual('When clicking ‘Execute workflow’');
-	});
-
-	test('should return the start node of the past execution with multiple triggers', () => {
-		const startNode = getPastExecutionTriggerNode(executionDataMultipleTriggersJson);
-
-		expect(startNode).toEqual('When clicking ‘Execute workflow’');
-	});
-
-	test('should return the start node of the past execution with multiple triggers - chat trigger', () => {
-		const startNode = getPastExecutionTriggerNode(executionDataMultipleTriggersJson2);
-
-		expect(startNode).toEqual('When chat message received');
-	});
-});
--- a/packages/cli/src/evaluation.ee/test-runner/tests/mock-data/workflow.under-test.json
+++ b/packages/cli/src/evaluation.ee/test-runner/tests/mock-data/workflow.under-test.json
@@ -1,78 +1,126 @@
 {
-	"name": "Workflow Under Test",
 	"nodes": [
 		{
-			"parameters": {},
-			"type": "n8n-nodes-base.manualTrigger",
-			"typeVersion": 1,
-			"position": [-80, 0],
-			"id": "72256d90-3a67-4e29-b032-47df4e5768af",
-			"name": "When clicking ‘Execute workflow’"
+			"parameters": {
+				"documentId": {
+					"__rl": true,
+					"value": "mock",
+					"mode": "list"
+				},
+				"sheetName": {
+					"__rl": true,
+					"value": "mock",
+					"mode": "list"
+				}
+			},
+			"type": "n8n-nodes-base.evaluationTrigger",
+			"typeVersion": 4.6,
+			"position": [0, 0],
+			"id": "3c9068ec-4880-4fbe-a1c8-f7a1cb3f13e9",
+			"name": "When fetching a dataset row",
+			"credentials": {
+				"googleSheetsOAuth2Api": {
+					"id": "mock",
+					"name": "Google Sheets account"
+				}
+			}
 		},
 		{
 			"parameters": {
-				"assignments": {
-					"assignments": [
+				"documentId": {
+					"__rl": true,
+					"value": "mock",
+					"mode": "list"
+				},
+				"sheetName": {
+					"__rl": true,
+					"value": "mock",
+					"mode": "list"
+				},
+				"outputs": {
+					"values": [
 						{
-							"id": "acfeecbe-443c-4220-b63b-d44d69216902",
-							"name": "foo",
-							"value": "bar",
-							"type": "string"
+							"outputName": "reply",
+							"outputValue": "test"
 						}
 					]
-				},
-				"options": {}
+				}
 			},
-			"type": "n8n-nodes-base.set",
-			"typeVersion": 3.4,
-			"position": [140, 0],
-			"id": "319f29bc-1dd4-4122-b223-c584752151a4",
-			"name": "Edit Fields"
+			"type": "n8n-nodes-base.evaluation",
+			"typeVersion": 4.6,
+			"position": [440, 0],
+			"id": "9e0be4fb-faa3-4344-ba80-e18ceb1d22f1",
+			"name": "Set Outputs",
+			"credentials": {
+				"googleSheetsOAuth2Api": {
+					"id": "mock",
+					"name": "Google Sheets account"
+				}
+			}
+		},
+		{
+			"parameters": {},
+			"type": "n8n-nodes-base.noOp",
+			"typeVersion": 1,
+			"position": [220, 0],
+			"id": "335047aa-fb77-43a1-8135-873d34e7ccc1",
+			"name": "No Operation, do nothing"
 		},
 		{
 			"parameters": {
-				"jsCode": "for (const item of $input.all()) {\n  item.json.random = Math.random();\n}\n\nreturn $input.all();"
+				"operation": "setMetrics",
+				"metrics": {
+					"assignments": [
+						{
+							"name": "test",
+							"value": 0,
+							"type": "number",
+							"id": "cc598090-09c8-489d-84d5-e7b9ee5576b5"
+						}
+					]
+				}
 			},
-			"type": "n8n-nodes-base.code",
-			"typeVersion": 2,
-			"position": [380, 0],
-			"id": "d2474215-63af-40a4-a51e-0ea30d762621",
-			"name": "Code"
+			"type": "n8n-nodes-base.evaluation",
+			"typeVersion": 4.6,
+			"position": [660, 0],
+			"id": "7a795bad-08e1-4e5c-bfe4-2c43129b6da5",
+			"name": "Set Metrics"
 		}
 	],
 	"connections": {
-		"When clicking ‘Execute workflow’": {
+		"When fetching a dataset row": {
 			"main": [
 				[
 					{
-						"node": "Edit Fields",
+						"node": "No Operation, do nothing",
 						"type": "main",
 						"index": 0
 					}
 				]
 			]
 		},
-		"Edit Fields": {
+		"Set Outputs": {
 			"main": [
 				[
 					{
-						"node": "Wait",
+						"node": "Set Metrics",
 						"type": "main",
 						"index": 0
 					}
 				]
 			]
 		},
-		"Wait": {
+		"No Operation, do nothing": {
 			"main": [
 				[
 					{
-						"node": "Code",
+						"node": "Set Outputs",
 						"type": "main",
 						"index": 0
 					}
 				]
 			]
 		}
-	}
+	},
+	"pinData": {}
 }
--- a/packages/cli/src/evaluation.ee/test-runner/tests/test-runner.service.ee.test.ts
+++ b/packages/cli/src/evaluation.ee/test-runner/tests/test-runner.service.ee.test.ts
@@ -0,0 +1,635 @@
+import type { TestRun } from '@n8n/db';
+import type { TestCaseExecutionRepository } from '@n8n/db';
+import type { TestRunRepository } from '@n8n/db';
+import type { WorkflowRepository } from '@n8n/db';
+import { readFileSync } from 'fs';
+import { mock } from 'jest-mock-extended';
+import type { ErrorReporter } from 'n8n-core';
+import type { IWorkflowBase } from 'n8n-workflow';
+import type { IRun } from 'n8n-workflow';
+import path from 'path';
+
+import type { ActiveExecutions } from '@/active-executions';
+import { EVALUATION_DATASET_TRIGGER_NODE } from '@/constants';
+import { TestRunError } from '@/evaluation.ee/test-runner/errors.ee';
+import { LoadNodesAndCredentials } from '@/load-nodes-and-credentials';
+import type { Telemetry } from '@/telemetry';
+import type { WorkflowRunner } from '@/workflow-runner';
+import { mockInstance, mockLogger } from '@test/mocking';
+import { mockNodeTypesData } from '@test-integration/utils/node-types-data';
+
+import { TestRunnerService } from '../test-runner.service.ee';
+
+const wfUnderTestJson = JSON.parse(
+	readFileSync(path.join(__dirname, './mock-data/workflow.under-test.json'), { encoding: 'utf-8' }),
+);
+
+const errorReporter = mock<ErrorReporter>();
+const logger = mockLogger();
+const telemetry = mock<Telemetry>();
+
+describe('TestRunnerService', () => {
+	const workflowRepository = mock<WorkflowRepository>();
+	const workflowRunner = mock<WorkflowRunner>();
+	const activeExecutions = mock<ActiveExecutions>();
+	const testRunRepository = mock<TestRunRepository>();
+	const testCaseExecutionRepository = mock<TestCaseExecutionRepository>();
+	let testRunnerService: TestRunnerService;
+
+	mockInstance(LoadNodesAndCredentials, {
+		loadedNodes: mockNodeTypesData(['manualTrigger', 'set', 'if', 'code', 'evaluation']),
+	});
+
+	beforeEach(() => {
+		testRunnerService = new TestRunnerService(
+			logger,
+			telemetry,
+			workflowRepository,
+			workflowRunner,
+			activeExecutions,
+			testRunRepository,
+			testCaseExecutionRepository,
+			errorReporter,
+		);
+
+		testRunRepository.createTestRun.mockResolvedValue(mock<TestRun>({ id: 'test-run-id' }));
+	});
+
+	afterEach(() => {
+		jest.resetAllMocks();
+	});
+
+	describe('findTriggerNode', () => {
+		test('should find the trigger node in a workflow', () => {
+			// Setup a test workflow with a trigger node
+			const workflowWithTrigger = mock<IWorkflowBase>({
+				nodes: [
+					{
+						id: 'node1',
+						name: 'Dataset Trigger',
+						type: EVALUATION_DATASET_TRIGGER_NODE,
+						typeVersion: 1,
+						position: [0, 0],
+						parameters: {},
+					},
+					{
+						id: 'node2',
+						name: 'Regular Node',
+						type: 'n8n-nodes-base.noOp',
+						typeVersion: 1,
+						position: [100, 0],
+						parameters: {},
+					},
+				],
+				connections: {},
+			});
+
+			// Use the protected method via any type casting
+			const result = (testRunnerService as any).findTriggerNode(workflowWithTrigger);
+
+			// Assert the result is the correct node
+			expect(result).toBeDefined();
+			expect(result.type).toBe(EVALUATION_DATASET_TRIGGER_NODE);
+			expect(result.name).toBe('Dataset Trigger');
+		});
+
+		test('should return undefined when no trigger node is found', () => {
+			// Setup a test workflow without a trigger node
+			const workflowWithoutTrigger = mock<IWorkflowBase>({
+				nodes: [
+					{
+						id: 'node1',
+						name: 'Regular Node 1',
+						type: 'n8n-nodes-base.noOp',
+						typeVersion: 1,
+						position: [0, 0],
+						parameters: {},
+					},
+					{
+						id: 'node2',
+						name: 'Regular Node 2',
+						type: 'n8n-nodes-base.set',
+						typeVersion: 1,
+						position: [100, 0],
+						parameters: {},
+					},
+				],
+				connections: {},
+			});
+
+			// Call the function and expect undefined result
+			const result = (testRunnerService as any).findTriggerNode(workflowWithoutTrigger);
+			expect(result).toBeUndefined();
+		});
+
+		test('should work with the actual workflow.under-test.json', () => {
+			const result = (testRunnerService as any).findTriggerNode(wfUnderTestJson);
+
+			// Assert the result is the correct node
+			expect(result).toBeDefined();
+			expect(result.type).toBe(EVALUATION_DATASET_TRIGGER_NODE);
+			expect(result.name).toBe('When fetching a dataset row');
+		});
+	});
+
+	describe('extractDatasetTriggerOutput', () => {
+		test('should extract trigger output data from execution', () => {
+			// Create workflow with a trigger node
+			const workflow = mock<IWorkflowBase>({
+				nodes: [
+					{
+						id: 'triggerNodeId',
+						name: 'TriggerNode',
+						type: EVALUATION_DATASET_TRIGGER_NODE,
+						typeVersion: 1,
+						position: [0, 0],
+						parameters: {},
+					},
+				],
+				connections: {},
+			});
+
+			// Create execution data with output for the trigger node
+			const mockOutputItems = [
+				{ json: { id: 1, name: 'Test 1' } },
+				{ json: { id: 2, name: 'Test 2' } },
+			];
+
+			const execution = mock<IRun>({
+				data: {
+					resultData: {
+						runData: {
+							TriggerNode: [
+								{
+									data: {
+										main: [mockOutputItems],
+									},
+								},
+							],
+						},
+					},
+				},
+			});
+
+			// Call the method
+			const result = (testRunnerService as any).extractDatasetTriggerOutput(execution, workflow);
+
+			// Verify results
+			expect(result).toEqual(mockOutputItems);
+		});
+
+		test('should throw an error if trigger node output is not present', () => {
+			// Create workflow with a trigger node
+			const workflow = mock<IWorkflowBase>({
+				nodes: [
+					{
+						id: 'triggerNodeId',
+						name: 'TriggerNode',
+						type: EVALUATION_DATASET_TRIGGER_NODE,
+						typeVersion: 1,
+						position: [0, 0],
+						parameters: {},
+					},
+				],
+				connections: {},
+			});
+
+			// Create execution data with missing output
+			const execution = mock<IRun>({
+				data: {
+					resultData: {
+						runData: {},
+					},
+				},
+			});
+
+			// Expect the method to throw an error
+			expect(() => {
+				(testRunnerService as any).extractDatasetTriggerOutput(execution, workflow);
+			}).toThrow(TestRunError);
+
+			// Verify the error has the correct code
+			try {
+				(testRunnerService as any).extractDatasetTriggerOutput(execution, workflow);
+			} catch (error) {
+				expect(error).toBeInstanceOf(TestRunError);
+				expect(error.code).toBe('TEST_CASES_NOT_FOUND');
+			}
+		});
+
+		test('should throw an error if trigger node output is empty list', () => {
+			// Create workflow with a trigger node
+			const workflow = mock<IWorkflowBase>({
+				nodes: [
+					{
+						id: 'triggerNodeId',
+						name: 'TriggerNode',
+						type: EVALUATION_DATASET_TRIGGER_NODE,
+						typeVersion: 1,
+						position: [0, 0],
+						parameters: {},
+					},
+				],
+				connections: {},
+			});
+
+			// Create execution data with missing output
+			const execution = mock<IRun>({
+				data: {
+					resultData: {
+						runData: {
+							TriggerNode: [
+								{
+									data: {
+										main: [[]], // Empty list
+									},
+								},
+							],
+						},
+					},
+				},
+			});
+
+			// Expect the method to throw an error
+			expect(() => {
+				(testRunnerService as any).extractDatasetTriggerOutput(execution, workflow);
+			}).toThrow(TestRunError);
+
+			// Verify the error has the correct code
+			try {
+				(testRunnerService as any).extractDatasetTriggerOutput(execution, workflow);
+			} catch (error) {
+				expect(error).toBeInstanceOf(TestRunError);
+				expect(error.code).toBe('TEST_CASES_NOT_FOUND');
+			}
+		});
+
+		test('should work with actual execution data format', () => {
+			// Create workflow with a trigger node that matches the name in the actual data
+			const workflow = mock<IWorkflowBase>({
+				nodes: [
+					{
+						id: 'triggerNodeId',
+						name: "When clicking 'Execute workflow'",
+						type: EVALUATION_DATASET_TRIGGER_NODE,
+						typeVersion: 1,
+						position: [0, 0],
+						parameters: {},
+					},
+				],
+				connections: {},
+			});
+
+			// Mock execution data similar to actual format
+			const expectedItems = [
+				{ json: { query: 'First item' }, pairedItem: { item: 0 } },
+				{ json: { query: 'Second item' }, pairedItem: { item: 0 } },
+				{ json: { query: 'Third item' }, pairedItem: { item: 0 } },
+			];
+
+			// TODO: change with actual data
+			const execution = mock<IRun>({
+				data: {
+					resultData: {
+						runData: {
+							"When clicking 'Execute workflow'": [
+								{
+									data: {
+										main: [expectedItems],
+									},
+								},
+							],
+						},
+					},
+				},
+			});
+
+			// Call the method
+			const result = (testRunnerService as any).extractDatasetTriggerOutput(execution, workflow);
+
+			// Verify results
+			expect(result).toEqual(expectedItems);
+		});
+	});
+
+	describe('runDatasetTrigger', () => {
+		beforeEach(() => {
+			// Setup mock execution response
+			const mockExecutionId = 'mock-execution-id';
+			const mockExecutionData = mock<IRun>({
+				data: {
+					resultData: {
+						runData: {},
+					},
+				},
+			});
+
+			// Setup workflowRunner mock
+			workflowRunner.run.mockResolvedValue(mockExecutionId);
+
+			// Setup activeExecutions mock
+			activeExecutions.getPostExecutePromise.mockResolvedValue(mockExecutionData);
+		});
+
+		test('should throw an error if trigger node is not found', async () => {
+			// Create workflow without a trigger node
+			const workflow = mock<IWorkflowBase>({
+				nodes: [
+					{
+						id: 'node1',
+						name: 'Regular Node',
+						type: 'n8n-nodes-base.noOp',
+						typeVersion: 1,
+						position: [0, 0],
+						parameters: {},
+					},
+				],
+				connections: {},
+			});
+
+			const metadata = {
+				testRunId: 'test-run-id',
+				userId: 'user-id',
+			};
+
+			// Call the method and expect it to throw an error
+			await expect(
+				(testRunnerService as any).runDatasetTrigger(workflow, metadata),
+			).rejects.toThrow(TestRunError);
+
+			// Verify the error has the correct code
+			try {
+				await (testRunnerService as any).runDatasetTrigger(workflow, metadata);
+			} catch (error) {
+				expect(error).toBeInstanceOf(TestRunError);
+				expect(error.code).toBe('EVALUATION_TRIGGER_NOT_FOUND');
+			}
+		});
+
+		test('should call workflowRunner.run with correct data', async () => {
+			// Create workflow with a trigger node
+			const triggerNodeName = 'Dataset Trigger';
+			const workflow = mock<IWorkflowBase>({
+				nodes: [
+					{
+						id: 'node1',
+						name: triggerNodeName,
+						type: EVALUATION_DATASET_TRIGGER_NODE,
+						typeVersion: 1,
+						position: [0, 0],
+						parameters: {},
+					},
+				],
+				connections: {},
+				settings: {
+					saveDataErrorExecution: 'all',
+				},
+			});
+
+			const metadata = {
+				testRunId: 'test-run-id',
+				userId: 'user-id',
+			};
+
+			// Call the method
+			await (testRunnerService as any).runDatasetTrigger(workflow, metadata);
+
+			// Verify workflowRunner.run was called
+			expect(workflowRunner.run).toHaveBeenCalledTimes(1);
+
+			// Get the argument passed to workflowRunner.run
+			const runCallArg = workflowRunner.run.mock.calls[0][0];
+
+			// Verify it has the correct structure
+			expect(runCallArg).toHaveProperty('destinationNode', triggerNodeName);
+			expect(runCallArg).toHaveProperty('executionMode', 'manual');
+			expect(runCallArg).toHaveProperty('workflowData.settings.saveManualExecutions', false);
+			expect(runCallArg).toHaveProperty('workflowData.settings.saveDataErrorExecution', 'none');
+			expect(runCallArg).toHaveProperty('workflowData.settings.saveDataSuccessExecution', 'none');
+			expect(runCallArg).toHaveProperty('workflowData.settings.saveExecutionProgress', false);
+			expect(runCallArg).toHaveProperty('userId', metadata.userId);
+			expect(runCallArg).toHaveProperty('partialExecutionVersion', 2);
+
+			// Verify node execution stack contains the requestDataset flag
+			expect(runCallArg).toHaveProperty('executionData.executionData.nodeExecutionStack');
+			const nodeExecutionStack = runCallArg.executionData?.executionData?.nodeExecutionStack;
+			expect(nodeExecutionStack).toBeInstanceOf(Array);
+			expect(nodeExecutionStack).toHaveLength(1);
+			expect(nodeExecutionStack?.[0]).toHaveProperty('node.name', triggerNodeName);
+			expect(nodeExecutionStack?.[0]).toHaveProperty('data.main[0][0].json.requestDataset', true);
+		});
+
+		test('should wait for execution to finish and return result', async () => {
+			// Create workflow with a trigger node
+			const triggerNodeName = 'Dataset Trigger';
+			const workflow = mock<IWorkflowBase>({
+				nodes: [
+					{
+						id: 'node1',
+						name: triggerNodeName,
+						type: EVALUATION_DATASET_TRIGGER_NODE,
+						typeVersion: 1,
+						position: [0, 0],
+						parameters: {},
+					},
+				],
+				connections: {},
+			});
+
+			const metadata = {
+				testRunId: 'test-run-id',
+				userId: 'user-id',
+			};
+
+			// Setup mock for execution ID and result
+			const mockExecutionId = 'dataset-execution-id';
+			const mockExecutionResult = mock<IRun>({
+				data: {
+					resultData: {
+						runData: {
+							[triggerNodeName]: [
+								{
+									data: {
+										main: [[{ json: { test: 'data1' } }, { json: { test: 'data2' } }]],
+									},
+								},
+							],
+						},
+					},
+				},
+			});
+
+			workflowRunner.run.mockResolvedValue(mockExecutionId);
+			activeExecutions.getPostExecutePromise.mockResolvedValue(mockExecutionResult);
+
+			// Call the method
+			const result = await (testRunnerService as any).runDatasetTrigger(workflow, metadata);
+
+			// Verify the execution was waited for
+			expect(activeExecutions.getPostExecutePromise).toHaveBeenCalledWith(mockExecutionId);
+
+			// Verify the result is correct
+			expect(result).toEqual(mockExecutionResult);
+		});
+	});
+
+	describe('runTestCase', () => {
+		beforeEach(() => {
+			// Setup mock execution response
+			const mockExecutionId = 'mock-execution-id';
+			const mockExecutionData = mock<IRun>({
+				data: {
+					resultData: {
+						runData: {},
+					},
+				},
+			});
+
+			// Setup workflowRunner mock
+			workflowRunner.run.mockResolvedValue(mockExecutionId);
+
+			// Setup activeExecutions mock
+			activeExecutions.getPostExecutePromise.mockResolvedValue(mockExecutionData);
+		});
+
+		test('should return undefined if abortSignal is aborted', async () => {
+			// Create an aborted signal
+			const abortController = new AbortController();
+			abortController.abort();
+
+			// Create test data
+			const workflow = mock<IWorkflowBase>({
+				nodes: [],
+				connections: {},
+			});
+
+			const metadata = {
+				testRunId: 'test-run-id',
+				userId: 'user-id',
+			};
+
+			const testCase = { json: { id: 1, name: 'Test 1' } };
+
+			// Call the method
+			const result = await (testRunnerService as any).runTestCase(
+				workflow,
+				metadata,
+				testCase,
+				abortController.signal,
+			);
+
+			// Verify results
+			expect(result).toBeUndefined();
+			expect(workflowRunner.run).not.toHaveBeenCalled();
+		});
+
+		test('should call workflowRunner.run with correct data', async () => {
+			// Setup test data
+			const triggerNodeName = 'TriggerNode';
+			const workflow = mock<IWorkflowBase>({
+				nodes: [
+					{
+						id: 'node1',
+						name: triggerNodeName,
+						type: EVALUATION_DATASET_TRIGGER_NODE,
+						typeVersion: 1,
+						position: [0, 0],
+						parameters: {},
+					},
+				],
+				connections: {},
+			});
+
+			const metadata = {
+				testRunId: 'test-run-id',
+				userId: 'user-id',
+			};
+
+			const testCase = { json: { id: 1, name: 'Test 1' } };
+			const abortController = new AbortController();
+
+			// Call the method
+			await (testRunnerService as any).runTestCase(
+				workflow,
+				metadata,
+				testCase,
+				abortController.signal,
+			);
+
+			// Verify workflowRunner.run was called with the correct data
+			expect(workflowRunner.run).toHaveBeenCalledTimes(1);
+
+			const runCallArg = workflowRunner.run.mock.calls[0][0];
+
+			// Verify the expected structure
+			expect(runCallArg).toEqual(
+				expect.objectContaining({
+					executionMode: 'evaluation',
+					pinData: {
+						[triggerNodeName]: [testCase],
+					},
+					workflowData: workflow,
+					userId: metadata.userId,
+					partialExecutionVersion: 2,
+					triggerToStartFrom: {
+						name: triggerNodeName,
+					},
+				}),
+			);
+		});
+
+		test('should register abort event listener and return execution results', async () => {
+			// Setup test data
+			const triggerNodeName = 'TriggerNode';
+			const workflow = mock<IWorkflowBase>({
+				nodes: [
+					{
+						id: 'node1',
+						name: triggerNodeName,
+						type: EVALUATION_DATASET_TRIGGER_NODE,
+						typeVersion: 1,
+						position: [0, 0],
+						parameters: {},
+					},
+				],
+				connections: {},
+			});
+
+			const metadata = {
+				testRunId: 'test-run-id',
+				userId: 'user-id',
+			};
+
+			const testCase = { json: { id: 1, name: 'Test 1' } };
+			const abortController = new AbortController();
+
+			// Mock addEventListener on AbortSignal
+			const mockAddEventListener = jest.fn();
+			const originalAddEventListener = abortController.signal.addEventListener;
+			abortController.signal.addEventListener = mockAddEventListener;
+
+			try {
+				// Call the method
+				const result = await (testRunnerService as any).runTestCase(
+					workflow,
+					metadata,
+					testCase,
+					abortController.signal,
+				);
+
+				// Verify addEventListener was called
+				expect(mockAddEventListener).toHaveBeenCalledTimes(1);
+				expect(mockAddEventListener.mock.calls[0][0]).toBe('abort');
+
+				// Verify the expected result structure
+				expect(result).toHaveProperty('executionData');
+				expect(result.executionData?.data).toBeDefined();
+				expect(result).toHaveProperty('executionId');
+				expect(result.executionId).toEqual(expect.any(String));
+			} finally {
+				// Restore original method
+				abortController.signal.addEventListener = originalAddEventListener;
+			}
+		});
+	});
+});
--- a/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts
+++ b/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts
@@ -3,28 +3,43 @@ import { TestCaseExecutionRepository, TestRunRepository, WorkflowRepository } fr
 import { Service } from '@n8n/di';
 import { ErrorReporter, Logger } from 'n8n-core';
 import { ExecutionCancelledError } from 'n8n-workflow';
-import type { IRun, IWorkflowBase, IWorkflowExecutionDataProcess } from 'n8n-workflow';
+import type {
+	IDataObject,
+	IRun,
+	IWorkflowBase,
+	IWorkflowExecutionDataProcess,
+	IExecuteData,
+	INodeExecutionData,
+} from 'n8n-workflow';
 import assert from 'node:assert';

 import { ActiveExecutions } from '@/active-executions';
 import config from '@/config';
-import { EVALUATION_METRICS_NODE } from '@/constants';
+import { EVALUATION_DATASET_TRIGGER_NODE, EVALUATION_NODE } from '@/constants';
 import { TestCaseExecutionError, TestRunError } from '@/evaluation.ee/test-runner/errors.ee';
 import { Telemetry } from '@/telemetry';
 import { WorkflowRunner } from '@/workflow-runner';

+import { EvaluationMetrics } from './evaluation-metrics.ee';
+
 export interface TestRunMetadata {
 	testRunId: string;
 	userId: string;
 }

+export interface TestCaseExecutionResult {
+	executionData: IRun;
+	executionId: string;
+}
+
 /**
- * This service orchestrates the running of test cases.
- * It uses the test definitions to find
- * past executions, creates pin data from them,
- * and runs the workflow-under-test with the pin data.
- * After the workflow-under-test finishes, it runs the evaluation workflow
- * with the original and new run data, and collects the metrics.
+ * This service orchestrates the running of evaluations.
+ * It makes a partial execution of the workflow under test to get the dataset
+ * by running the evaluation trigger node only and capturing the output.
+ * Then it iterates over test cases (the items of a list produced by evaluation trigger node)
+ * and runs the workflow under test with each test case as input.
+ * After running each test case, it collects the metrics from the evaluation nodes output.
+ * After all test cases are run, it aggregates the metrics and saves them to the database.
 */
@Service()
 export class TestRunnerService {
@@ -42,57 +57,48 @@ export class TestRunnerService {
 	) {}

 	/**
-	 * Prepares the start nodes and trigger node data props for the `workflowRunner.run` method input.
+	 * Finds the dataset trigger node in the workflow
 	 */
-	private getStartNodesData(
-		workflow: IWorkflowBase,
-	): Pick<IWorkflowExecutionDataProcess, 'triggerToStartFrom'> {
-		// Find the dataset trigger node
-		// TODO: replace with dataset trigger node
-		const triggerNode = workflow.nodes.find(
-			(node) => node.type === 'n8n-nodes-base.executeWorkflowTrigger',
-		);
-		if (!triggerNode) {
-			// TODO: Change error
-			throw new TestCaseExecutionError('TRIGGER_NO_LONGER_EXISTS');
-		}
-
-		const triggerToStartFrom = {
-			name: triggerNode.name,
-		};
-
-		return {
-			triggerToStartFrom,
-		};
+	private findTriggerNode(workflow: IWorkflowBase) {
+		return workflow.nodes.find((node) => node.type === EVALUATION_DATASET_TRIGGER_NODE);
 	}

 	/**
-	 * Runs a test case with the given pin data.
+	 * Runs a test case with the given input.
+	 * Injects the input data as pinned data of evaluation trigger node.
 	 * Waits for the workflow under test to finish execution.
 	 */
 	private async runTestCase(
 		workflow: IWorkflowBase,
 		metadata: TestRunMetadata,
+		testCase: INodeExecutionData,
 		abortSignal: AbortSignal,
-	): Promise<IRun | undefined> {
+	): Promise<TestCaseExecutionResult | undefined> {
 		// Do not run if the test run is cancelled
 		if (abortSignal.aborted) {
 			return;
 		}

-		const startNodesData = this.getStartNodesData(workflow);
-
 		// Prepare the data to run the workflow
 		// Evaluation executions should run the same way as manual,
 		// because they need pinned data and partial execution logic
+
+		const triggerNode = this.findTriggerNode(workflow);
+		assert(triggerNode);
+
+		const pinData = {
+			[triggerNode.name]: [testCase],
+		};
+
 		const data: IWorkflowExecutionDataProcess = {
-			...startNodesData,
 			executionMode: 'evaluation',
-			runData: {},
-			// pinData,
+			pinData,
 			workflowData: workflow,
 			userId: metadata.userId,
 			partialExecutionVersion: 2,
+			triggerToStartFrom: {
+				name: triggerNode.name,
+			},
 		};

 		// When in queue mode, we need to pass additional data to the execution
@@ -109,7 +115,9 @@ export class TestRunnerService {
 				manualData: {
 					userId: metadata.userId,
 					partialExecutionVersion: 2,
-					triggerToStartFrom: startNodesData.triggerToStartFrom,
+					triggerToStartFrom: {
+						name: triggerNode.name,
+					},
 				},
 			};
 		}
@@ -123,7 +131,86 @@ export class TestRunnerService {
 			this.activeExecutions.stopExecution(executionId);
 		});

-		// TODO: Update status of the test run execution
+		// Wait for the execution to finish
+		const executionData = await this.activeExecutions.getPostExecutePromise(executionId);
+
+		assert(executionData);
+
+		return { executionId, executionData };
+	}
+
+	/**
+	 * This method creates a partial workflow execution to run the dataset trigger only
+	 * to get the whole dataset.
+	 */
+	private async runDatasetTrigger(workflow: IWorkflowBase, metadata: TestRunMetadata) {
+		// Prepare the data to run the workflow
+		// Evaluation executions should run the same way as manual,
+		// because they need pinned data and partial execution logic
+
+		const triggerNode = this.findTriggerNode(workflow);
+
+		if (!triggerNode) {
+			throw new TestRunError('EVALUATION_TRIGGER_NOT_FOUND');
+		}
+
+		// Initialize the input data for dataset trigger
+		// Provide a flag indicating that we want to get the whole dataset
+		const nodeExecutionStack: IExecuteData[] = [];
+		nodeExecutionStack.push({
+			node: triggerNode,
+			data: {
+				main: [[{ json: { requestDataset: true } }]],
+			},
+			source: null,
+		});
+
+		const data: IWorkflowExecutionDataProcess = {
+			destinationNode: triggerNode.name,
+			executionMode: 'manual',
+			runData: {},
+			workflowData: {
+				...workflow,
+				settings: {
+					...workflow.settings,
+					saveManualExecutions: false,
+					saveDataErrorExecution: 'none',
+					saveDataSuccessExecution: 'none',
+					saveExecutionProgress: false,
+				},
+			},
+			userId: metadata.userId,
+			partialExecutionVersion: 2,
+			executionData: {
+				startData: {
+					destinationNode: triggerNode.name,
+				},
+				resultData: {
+					runData: {},
+				},
+				executionData: {
+					contextData: {},
+					metadata: {},
+					nodeExecutionStack,
+					waitingExecution: {},
+					waitingExecutionSource: {},
+				},
+				manualData: {
+					userId: metadata.userId,
+					partialExecutionVersion: 2,
+					triggerToStartFrom: {
+						name: triggerNode.name,
+					},
+				},
+			},
+			triggerToStartFrom: {
+				name: triggerNode.name,
+			},
+		};
+
+		// Trigger the workflow under test with mocked data
+		const executionId = await this.workflowRunner.run(data);
+		assert(executionId);

 		// Wait for the execution to finish
 		const executePromise = this.activeExecutions.getPostExecutePromise(executionId);
@@ -135,11 +222,47 @@ export class TestRunnerService {
 	 * Get the evaluation metrics nodes from a workflow.
 	 */
 	static getEvaluationMetricsNodes(workflow: IWorkflowBase) {
-		return workflow.nodes.filter((node) => node.type === EVALUATION_METRICS_NODE);
+		return workflow.nodes.filter(
+			(node) => node.type === EVALUATION_NODE && node.parameters.operation === 'setMetrics',
+		);
 	}

 	/**
-	 * Creates a new test run for the given test definition.
+	 * Extract the dataset trigger output
+	 */
+	private extractDatasetTriggerOutput(execution: IRun, workflow: IWorkflowBase) {
+		const triggerNode = this.findTriggerNode(workflow);
+		assert(triggerNode);
+
+		const triggerOutputData = execution.data.resultData.runData[triggerNode.name][0];
+		const triggerOutput = triggerOutputData?.data?.main?.[0];
+
+		if (!triggerOutput || triggerOutput.length === 0) {
+			throw new TestRunError('TEST_CASES_NOT_FOUND');
+		}
+
+		return triggerOutput;
+	}
+
+	/**
+	 * Evaluation result is collected from all Evaluation Metrics nodes
+	 */
+	private extractEvaluationResult(execution: IRun, workflow: IWorkflowBase): IDataObject {
+		// TODO: Do not fail if not all metric nodes were executed
+		const metricsNodes = TestRunnerService.getEvaluationMetricsNodes(workflow);
+		const metricsRunData = metricsNodes.flatMap(
+			(node) => execution.data.resultData.runData[node.name],
+		);
+		const metricsData = metricsRunData
+			.reverse()
+			.map((data) => data.data?.main?.[0]?.[0]?.json ?? {});
+		const metricsResult = metricsData.reduce((acc, curr) => ({ ...acc, ...curr }), {});
+
+		return metricsResult;
+	}
+
+	/**
+	 * Creates a new test run for the given workflow
 	 */
 	async runTest(user: User, workflowId: string): Promise<void> {
 		this.logger.debug('Starting new test run', { workflowId });
@@ -148,7 +271,6 @@ export class TestRunnerService {
 		assert(workflow, 'Workflow not found');

 		// 0. Create new Test Run
-		// TODO: Check that createTestRun takes workflowId as an argument
 		const testRun = await this.testRunRepository.createTestRun(workflowId);
 		assert(testRun, 'Unable to create a test run');

@@ -169,36 +291,8 @@ export class TestRunnerService {
 		const { manager: dbManager } = this.testRunRepository;

 		try {
-			///
-			// 1. Make test cases list
-			///
-
-			// TODO: Get the test cases from the dataset trigger node
-			const testCases = [{ id: 1 }];
-
-			this.logger.debug('Found test cases', { count: testCases.length });
-
-			if (testCases.length === 0) {
-				// TODO: Change error
-				throw new TestRunError('PAST_EXECUTIONS_NOT_FOUND');
-			}
-
-			// Add all past executions mappings to the test run.
-			// This will be used to track the status of each test case and keep the connection between test run and all related executions (past, current, and evaluation).
-			// await this.testCaseExecutionRepository.createBatch(
-			// 	testRun.id,
-			// 	testCases.map((e) => e.id),
-			// );
-
-			// TODO: Collect metric names from evaluation nodes of the workflow
-			// const testMetricNames = new Set<string>();
-
-			// 2. Run over all the test cases
-			// const pastExecutionIds = pastExecutions.map((e) => e.id);
-
 			// Update test run status
-			// TODO: mark test run as running
-			// await this.testRunRepository.markAsRunning(testRun.id);
+			await this.testRunRepository.markAsRunning(testRun.id);

 			this.telemetry.track('User ran test', {
 				user_id: user.id,
@@ -206,23 +300,39 @@ export class TestRunnerService {
 				workflow_id: workflowId,
 			});

+			///
+			// 1. Make test cases list
+			///
+
+			const datasetFetchExecution = await this.runDatasetTrigger(workflow, testRunMetadata);
+			assert(datasetFetchExecution);
+
+			const datasetTriggerOutput = this.extractDatasetTriggerOutput(
+				datasetFetchExecution,
+				workflow,
+			);
+
+			const testCases = datasetTriggerOutput.map((items) => ({ json: items.json }));
+
+			this.logger.debug('Found test cases', { count: testCases.length });
+
 			// Initialize object to collect the results of the evaluation workflow executions
-			// const metrics = new EvaluationMetrics();
+			const metrics = new EvaluationMetrics();

 			///
 			// 2. Run over all the test cases
 			///

-			for (const _testCase of testCases) {
+			for (const testCase of testCases) {
 				if (abortSignal.aborted) {
 					this.logger.debug('Test run was cancelled', {
 						workflowId,
-						// stoppedOn: pastExecutionId,
 					});
 					break;
 				}

 				this.logger.debug('Running test case');
+				const runAt = new Date();

 				try {
 					const testCaseMetadata = {
@@ -230,22 +340,58 @@ export class TestRunnerService {
 					};

 					// Run the test case and wait for it to finish
-					const testCaseExecution = await this.runTestCase(workflow, testCaseMetadata, abortSignal);
+					const testCaseResult = await this.runTestCase(
+						workflow,
+						testCaseMetadata,
+						testCase,
+						abortSignal,
+					);
+					assert(testCaseResult);
+
+					const { executionId: testCaseExecutionId, executionData: testCaseExecution } =
+						testCaseResult;
+
+					assert(testCaseExecution);
+					assert(testCaseExecutionId);

 					this.logger.debug('Test case execution finished');

 					// In case of a permission check issue, the test case execution will be undefined.
 					// If that happens, or if the test case execution produced an error, mark the test case as failed.
 					if (!testCaseExecution || testCaseExecution.data.resultData.error) {
-						// TODO: add failed test case execution to DB
+						// Save the failed test case execution in DB
+						await this.testCaseExecutionRepository.createTestCaseExecution({
+							executionId: testCaseExecutionId,
+							testRun: {
+								id: testRun.id,
+							},
+							status: 'error',
+							errorCode: 'FAILED_TO_EXECUTE_WORKFLOW',
+							metrics: {},
+						});
 						continue;
 					}
+					const completedAt = new Date();

-					// TODO: extract metrics
+					const { addedMetrics } = metrics.addResults(
+						this.extractEvaluationResult(testCaseExecution, workflow),
+					);
+
+					this.logger.debug('Test case metrics extracted', addedMetrics);

 					// Create a new test case execution in DB
-					// TODO: add successful test case execution to DB
+					await this.testCaseExecutionRepository.createTestCaseExecution({
+						executionId: testCaseExecutionId,
+						testRun: {
+							id: testRun.id,
+						},
+						runAt,
+						completedAt,
+						status: 'success',
+						metrics: addedMetrics,
+					});
 				} catch (e) {
+					const completedAt = new Date();
 					// FIXME: this is a temporary log
 					this.logger.error('Test case execution failed', {
 						workflowId,
@@ -255,9 +401,26 @@ export class TestRunnerService {

 					// In case of an unexpected error save it as failed test case execution and continue with the next test case
 					if (e instanceof TestCaseExecutionError) {
-						// TODO: add failed test case execution to DB
+						await this.testCaseExecutionRepository.createTestCaseExecution({
+							testRun: {
+								id: testRun.id,
+							},
+							runAt,
+							completedAt,
+							status: 'error',
+							errorCode: e.code,
+							errorDetails: e.extra as IDataObject,
+						});
 					} else {
-						// TODO: add failed test case execution to DB
+						await this.testCaseExecutionRepository.createTestCaseExecution({
+							testRun: {
+								id: testRun.id,
+							},
+							runAt,
+							completedAt,
+							status: 'error',
+							errorCode: 'UNKNOWN_ERROR',
+						});

 						// Report unexpected errors
 						this.errorReporter.error(e);
@@ -268,16 +431,17 @@ export class TestRunnerService {
 			// Mark the test run as completed or cancelled
 			if (abortSignal.aborted) {
 				await dbManager.transaction(async (trx) => {
-					// TODO: mark test run as cancelled
-					// await this.testRunRepository.markAsCancelled(testRun.id, trx);
+					await this.testRunRepository.markAsCancelled(testRun.id, trx);
 					await this.testCaseExecutionRepository.markAllPendingAsCancelled(testRun.id, trx);

 					testRunEndStatusForTelemetry = 'cancelled';
 				});
 			} else {
-				// const aggregatedMetrics = metrics.getAggregatedMetrics();
+				const aggregatedMetrics = metrics.getAggregatedMetrics();

-				// TODO: mark test run as completed in DB and save metrics
+				this.logger.debug('Aggregated metrics', aggregatedMetrics);
+
+				await this.testRunRepository.markAsCompleted(testRun.id, aggregatedMetrics);

 				this.logger.debug('Test run finished', { workflowId, testRunId: testRun.id });

@@ -291,16 +455,16 @@ export class TestRunnerService {
 				});

 				await dbManager.transaction(async (trx) => {
-					// TODO: mark test run as cancelled in DB
+					await this.testRunRepository.markAsCancelled(testRun.id, trx);
 					await this.testCaseExecutionRepository.markAllPendingAsCancelled(testRun.id, trx);
 				});

 				testRunEndStatusForTelemetry = 'cancelled';
 			} else if (e instanceof TestRunError) {
-				// TODO: mark test run as error
+				await this.testRunRepository.markAsError(testRun.id, e.code, e.extra as IDataObject);
 				testRunEndStatusForTelemetry = 'error';
 			} else {
-				// TODO: mark test run as error
+				await this.testRunRepository.markAsError(testRun.id, 'UNKNOWN_ERROR');
 				testRunEndStatusForTelemetry = 'error';
 				throw e;
 			}
@@ -338,7 +502,7 @@ export class TestRunnerService {

 			// If there is no abort controller - just mark the test run and all its' pending test case executions as cancelled
 			await dbManager.transaction(async (trx) => {
-				// TODO: mark test run as cancelled in DB
+				await this.testRunRepository.markAsCancelled(testRunId, trx);
 				await this.testCaseExecutionRepository.markAllPendingAsCancelled(testRunId, trx);
 			});
 		}
--- a/packages/cli/src/evaluation.ee/test-runner/utils.ee.ts
+++ b/packages/cli/src/evaluation.ee/test-runner/utils.ee.ts
@@ -1,67 +0,0 @@
-import assert from 'assert';
-import type { IRunExecutionData, IPinData, IWorkflowBase } from 'n8n-workflow';
-
-import { TestCaseExecutionError } from '@/evaluation.ee/test-runner/errors.ee';
-
-// Entity representing a node in a workflow under test, for which data should be mocked during test execution
-export type MockedNodeItem = {
-	name?: string;
-	id: string;
-};
-
-/**
- * Extracts the execution data from the past execution
- * and creates a pin data object from it for the given workflow.
- * It uses a list of mocked nodes defined in a test definition
- * to decide which nodes to pin.
- */
-export function createPinData(
-	workflow: IWorkflowBase,
-	mockedNodes: MockedNodeItem[],
-	executionData: IRunExecutionData,
-	pastWorkflowData?: IWorkflowBase,
-) {
-	const pinData = {} as IPinData;
-
-	const workflowNodeIds = new Map(workflow.nodes.map((node) => [node.id, node.name]));
-
-	// If the past workflow data is provided, use it to create a map between node IDs and node names
-	const pastWorkflowNodeIds = new Map<string, string>();
-	if (pastWorkflowData) {
-		for (const node of pastWorkflowData.nodes) {
-			pastWorkflowNodeIds.set(node.id, node.name);
-		}
-	}
-
-	for (const mockedNode of mockedNodes) {
-		assert(mockedNode.id, 'Mocked node ID is missing');
-
-		const nodeName = workflowNodeIds.get(mockedNode.id);
-
-		// If mocked node is still present in the workflow
-		if (nodeName) {
-			// Try to restore node name from past execution data (it might have been renamed between past execution and up-to-date workflow)
-			const pastNodeName = pastWorkflowNodeIds.get(mockedNode.id) ?? nodeName;
-			const nodeData = executionData.resultData.runData[pastNodeName];
-
-			if (nodeData?.[0]?.data?.main?.[0]) {
-				pinData[nodeName] = nodeData[0]?.data?.main?.[0];
-			} else {
-				throw new TestCaseExecutionError('MOCKED_NODE_DOES_NOT_EXIST');
-			}
-		}
-	}
-
-	return pinData;
-}
-
-/**
- * Returns the trigger node of the past execution.
- * The trigger node is the node that has no source and has run data.
- */
-export function getPastExecutionTriggerNode(executionData: IRunExecutionData) {
-	return Object.keys(executionData.resultData.runData).find((nodeName) => {
-		const data = executionData.resultData.runData[nodeName];
-		return !data[0].source || data[0].source.length === 0 || data[0].source[0] === null;
-	});
-}
--- a/packages/cli/src/evaluation.ee/test-runs.controller.ee.ts
+++ b/packages/cli/src/evaluation.ee/test-runs.controller.ee.ts
@@ -65,7 +65,7 @@ export class TestRunsController {
 		}
 	}

-	@Get('/:workflowId/test-runs/:id/cases')
+	@Get('/:workflowId/test-runs/:id/test-cases')
 	async getTestCases(req: TestRunsRequest.GetCases) {
 		await this.getTestRun(req.params.id, req.params.workflowId, req.user);

--- a/packages/cli/test/integration/evaluation/test-runs.api.test.ts
+++ b/packages/cli/test/integration/evaluation/test-runs.api.test.ts
@@ -6,6 +6,7 @@ import { mockInstance } from 'n8n-core/test/utils';
 import type { IWorkflowBase } from 'n8n-workflow';

 import { TestRunnerService } from '@/evaluation.ee/test-runner/test-runner.service.ee';
+import { createTestRun, createTestCaseExecution } from '@test-integration/db/evaluation';
 import { createUserShell } from '@test-integration/db/users';
 import { createWorkflow } from '@test-integration/db/workflows';
 import * as testDb from '@test-integration/test-db';
@@ -18,6 +19,7 @@ let otherWorkflow: IWorkflowBase;
 let ownerShell: User;

 const testRunner = mockInstance(TestRunnerService);
+let testRunRepository: TestRunRepository;

 const testServer = utils.setupTestServer({
 	endpointGroups: ['workflows', 'evaluation'],
@@ -30,7 +32,9 @@ beforeAll(async () => {
 });

 beforeEach(async () => {
-	await testDb.truncate(['TestRun', 'WorkflowEntity', 'SharedWorkflow']);
+	await testDb.truncate(['TestRun', 'TestCaseExecution', 'WorkflowEntity', 'SharedWorkflow']);
+
+	testRunRepository = Container.get(TestRunRepository);

 	workflowUnderTest = await createWorkflow({ name: 'workflow-under-test' }, ownerShell);
 	otherWorkflow = await createWorkflow({ name: 'other-workflow' });
@@ -51,17 +55,15 @@ describe('GET /workflows/:workflowId/test-runs', () => {
 	// 	expect(resp.statusCode).toBe(404);
 	// });

-	// TODO: replace with workflow that is not accessible to the user
-	// test('should return 404 if user does not have access to test definition', async () => {
-	// 	const resp = await authOwnerAgent.get(
-	// 		`/evaluation/test-definitions/${otherTestDefinition.id}/runs`,
-	// 	);
-	//
-	// 	expect(resp.statusCode).toBe(404);
-	// });
+	test('should return 404 if user does not have access to workflow', async () => {
+		const testRun = await testRunRepository.createTestRun(otherWorkflow.id);

-	test('should retrieve list of runs for a workflow', async () => {
-		const testRunRepository = Container.get(TestRunRepository);
+		const resp = await authOwnerAgent.get(`/workflows/${otherWorkflow.id}/test-runs/${testRun.id}`);
+
+		expect(resp.statusCode).toBe(404);
+	});
+
+	test('should retrieve list of test runs for a workflow', async () => {
 		const testRun = await testRunRepository.createTestRun(workflowUnderTest.id);

 		const resp = await authOwnerAgent.get(`/workflows/${workflowUnderTest.id}/test-runs`);
@@ -78,7 +80,6 @@ describe('GET /workflows/:workflowId/test-runs', () => {
 	});

 	test('should retrieve list of test runs for a workflow with pagination', async () => {
-		const testRunRepository = Container.get(TestRunRepository);
 		const testRun1 = await testRunRepository.createTestRun(workflowUnderTest.id);
 		// Mark as running just to make a slight delay between the runs
 		await testRunRepository.markAsRunning(testRun1.id);
@@ -112,11 +113,34 @@ describe('GET /workflows/:workflowId/test-runs', () => {
 			}),
 		]);
 	});
+
+	test('should retrieve list of test runs for a shared workflow', async () => {
+		const memberShell = await createUserShell('global:member');
+		const memberAgent = testServer.authAgentFor(memberShell);
+		const memberPersonalProject = await Container.get(
+			ProjectRepository,
+		).getPersonalProjectForUserOrFail(memberShell.id);
+
+		// Share workflow with a member
+		const sharingResponse = await authOwnerAgent
+			.put(`/workflows/${workflowUnderTest.id}/share`)
+			.send({ shareWithIds: [memberPersonalProject.id] });
+
+		expect(sharingResponse.statusCode).toBe(200);
+
+		// Create a test run for the shared workflow
+		await testRunRepository.createTestRun(workflowUnderTest.id);
+
+		// Check if member can retrieve the test runs of a shared workflow
+		const resp = await memberAgent.get(`/workflows/${workflowUnderTest.id}/test-runs`);
+
+		expect(resp.statusCode).toBe(200);
+		expect(resp.body.data).toHaveLength(1);
+	});
 });

 describe('GET /workflows/:workflowId/test-runs/:id', () => {
 	test('should retrieve specific test run for a workflow', async () => {
-		const testRunRepository = Container.get(TestRunRepository);
 		const testRun = await testRunRepository.createTestRun(workflowUnderTest.id);

 		const resp = await authOwnerAgent.get(
@@ -141,7 +165,6 @@ describe('GET /workflows/:workflowId/test-runs/:id', () => {
 	});

 	test('should return 404 if user does not have access to the workflow', async () => {
-		const testRunRepository = Container.get(TestRunRepository);
 		const testRun = await testRunRepository.createTestRun(otherWorkflow.id);

 		const resp = await authOwnerAgent.get(`/workflows/${otherWorkflow.id}/test-runs/${testRun.id}`);
@@ -164,7 +187,6 @@ describe('GET /workflows/:workflowId/test-runs/:id', () => {
 		expect(sharingResponse.statusCode).toBe(200);

 		// Create a test run for the shared workflow
-		const testRunRepository = Container.get(TestRunRepository);
 		const testRun = await testRunRepository.createTestRun(workflowUnderTest.id);

 		// Check if member can retrieve the test run of a shared workflow
@@ -181,9 +203,8 @@ describe('GET /workflows/:workflowId/test-runs/:id', () => {
 	});
 });

-describe('DELETE /evaluation/test-definitions/:testDefinitionId/runs/:id', () => {
-	test('should delete test run for a test definition', async () => {
-		const testRunRepository = Container.get(TestRunRepository);
+describe('DELETE /workflows/:workflowId/test-runs/:id', () => {
+	test('should delete test run of a workflow', async () => {
 		const testRun = await testRunRepository.createTestRun(workflowUnderTest.id);

 		const resp = await authOwnerAgent.delete(
@@ -203,8 +224,7 @@ describe('DELETE /evaluation/test-definitions/:testDefinitionId/runs/:id', () =>
 		expect(resp.statusCode).toBe(404);
 	});

-	test('should return 404 if user does not have access to test definition', async () => {
-		const testRunRepository = Container.get(TestRunRepository);
+	test('should return 404 if user does not have access to workflow', async () => {
 		const testRun = await testRunRepository.createTestRun(otherWorkflow.id);

 		const resp = await authOwnerAgent.delete(
@@ -215,9 +235,8 @@ describe('DELETE /evaluation/test-definitions/:testDefinitionId/runs/:id', () =>
 	});
 });

-describe('POST /evaluation/test-definitions/:testDefinitionId/runs/:id/cancel', () => {
+describe('POST /workflows/:workflowId/test-runs/:id/cancel', () => {
 	test('should cancel test run', async () => {
-		const testRunRepository = Container.get(TestRunRepository);
 		const testRun = await testRunRepository.createTestRun(workflowUnderTest.id);

 		jest.spyOn(testRunRepository, 'markAsCancelled');
@@ -247,7 +266,6 @@ describe('POST /evaluation/test-definitions/:testDefinitionId/runs/:id/cancel',
 	});

 	test('should return 404 if user does not have access to the workflow', async () => {
-		const testRunRepository = Container.get(TestRunRepository);
 		const testRun = await testRunRepository.createTestRun(otherWorkflow.id);

 		const resp = await authOwnerAgent.post(
@@ -257,3 +275,111 @@ describe('POST /evaluation/test-definitions/:testDefinitionId/runs/:id/cancel',
 		expect(resp.statusCode).toBe(404);
 	});
 });
+
+describe('GET /workflows/:workflowId/test-runs/:id/test-cases', () => {
+	test('should retrieve test cases for a specific test run', async () => {
+		// Create a test run
+		const testRun = await createTestRun(workflowUnderTest.id);
+
+		// Create some test case executions for this test run
+		await createTestCaseExecution(testRun.id, {
+			status: 'success',
+			runAt: new Date(),
+			completedAt: new Date(),
+			metrics: { accuracy: 0.95 },
+		});
+
+		await createTestCaseExecution(testRun.id, {
+			status: 'error',
+			errorCode: 'UNKNOWN_ERROR',
+			runAt: new Date(),
+			completedAt: new Date(),
+		});
+
+		const resp = await authOwnerAgent.get(
+			`/workflows/${workflowUnderTest.id}/test-runs/${testRun.id}/test-cases`,
+		);
+
+		expect(resp.statusCode).toBe(200);
+		expect(resp.body.data).toHaveLength(2);
+		expect(resp.body.data).toEqual(
+			expect.arrayContaining([
+				expect.objectContaining({
+					status: 'success',
+					metrics: { accuracy: 0.95 },
+				}),
+				expect.objectContaining({
+					status: 'error',
+					errorCode: 'UNKNOWN_ERROR',
+				}),
+			]),
+		);
+	});
+
+	test('should return empty array when no test cases exist for a test run', async () => {
+		// Create a test run with no test cases
+		const testRun = await createTestRun(workflowUnderTest.id);
+
+		const resp = await authOwnerAgent.get(
+			`/workflows/${workflowUnderTest.id}/test-runs/${testRun.id}/test-cases`,
+		);
+
+		expect(resp.statusCode).toBe(200);
+		expect(resp.body.data).toEqual([]);
+	});
+
+	test('should return 404 if test run does not exist', async () => {
+		const resp = await authOwnerAgent.get(
+			`/workflows/${workflowUnderTest.id}/test-runs/non-existent-id/test-cases`,
+		);
+
+		expect(resp.statusCode).toBe(404);
+	});
+
+	test('should return 404 if user does not have access to the workflow', async () => {
+		const testRun = await createTestRun(otherWorkflow.id);
+
+		const resp = await authOwnerAgent.get(
+			`/workflows/${otherWorkflow.id}/test-runs/${testRun.id}/test-cases`,
+		);
+
+		expect(resp.statusCode).toBe(404);
+	});
+
+	test('should return test cases for a shared workflow', async () => {
+		const memberShell = await createUserShell('global:member');
+		const memberAgent = testServer.authAgentFor(memberShell);
+		const memberPersonalProject = await Container.get(
+			ProjectRepository,
+		).getPersonalProjectForUserOrFail(memberShell.id);
+
+		// Share workflow with a member
+		const sharingResponse = await authOwnerAgent
+			.put(`/workflows/${workflowUnderTest.id}/share`)
+			.send({ shareWithIds: [memberPersonalProject.id] });
+
+		expect(sharingResponse.statusCode).toBe(200);
+
+		// Create a test run with test cases
+		const testRun = await createTestRun(workflowUnderTest.id);
+
+		await createTestCaseExecution(testRun.id, {
+			status: 'success',
+			runAt: new Date(),
+			completedAt: new Date(),
+			metrics: { precision: 0.87 },
+		});
+
+		// Check if member can retrieve the test cases of a shared workflow
+		const resp = await memberAgent.get(
+			`/workflows/${workflowUnderTest.id}/test-runs/${testRun.id}/test-cases`,
+		);
+
+		expect(resp.statusCode).toBe(200);
+		expect(resp.body.data).toHaveLength(1);
+		expect(resp.body.data[0]).toMatchObject({
+			status: 'success',
+			metrics: { precision: 0.87 },
+		});
+	});
+});
--- a/packages/cli/test/integration/shared/db/evaluation.ts
+++ b/packages/cli/test/integration/shared/db/evaluation.ts
@@ -0,0 +1,68 @@
+import { TestRunRepository, TestCaseExecutionRepository } from '@n8n/db';
+import type { TestRun } from '@n8n/db';
+import type { TestCaseExecution } from '@n8n/db';
+import type { AggregatedTestRunMetrics } from '@n8n/db';
+import type { TestCaseExecutionErrorCode, TestRunErrorCode } from '@n8n/db';
+import { Container } from '@n8n/di';
+import type { IDataObject } from 'n8n-workflow';
+
+/**
+ * Creates a test run for a workflow
+ */
+export const createTestRun = async (
+	workflowId: string,
+	options: {
+		status?: TestRun['status'];
+		runAt?: Date | null;
+		completedAt?: Date | null;
+		metrics?: AggregatedTestRunMetrics;
+		errorCode?: TestRunErrorCode;
+		errorDetails?: IDataObject;
+	} = {},
+) => {
+	const testRunRepository = Container.get(TestRunRepository);
+
+	const testRun = testRunRepository.create({
+		workflow: { id: workflowId },
+		status: options.status ?? 'new',
+		runAt: options.runAt ?? null,
+		completedAt: options.completedAt ?? null,
+		metrics: options.metrics ?? {},
+		errorCode: options.errorCode,
+		errorDetails: options.errorDetails,
+	});
+
+	return await testRunRepository.save(testRun);
+};
+
+/**
+ * Creates a test case execution for a test run
+ */
+export const createTestCaseExecution = async (
+	testRunId: string,
+	options: {
+		status?: TestCaseExecution['status'];
+		runAt?: Date | null;
+		completedAt?: Date | null;
+		metrics?: Record<string, number>;
+		errorCode?: TestCaseExecutionErrorCode;
+		errorDetails?: IDataObject;
+		executionId?: string;
+		pastExecutionId?: string;
+	} = {},
+) => {
+	const testCaseExecutionRepository = Container.get(TestCaseExecutionRepository);
+
+	const testCaseExecution = testCaseExecutionRepository.create({
+		testRun: { id: testRunId },
+		status: options.status ?? 'success',
+		runAt: options.runAt ?? null,
+		completedAt: options.completedAt ?? null,
+		metrics: options.metrics ?? {},
+		errorCode: options.errorCode,
+		errorDetails: options.errorDetails,
+		executionId: options.executionId,
+	});
+
+	return await testCaseExecutionRepository.save(testCaseExecution);
+};
--- a/packages/cli/test/integration/test-run.repository.ee.test.ts
+++ b/packages/cli/test/integration/test-run.repository.ee.test.ts
@@ -0,0 +1,66 @@
+import { TestRunRepository } from '@n8n/db';
+import type { IWorkflowDb, WorkflowEntity } from '@n8n/db';
+import { Container } from '@n8n/di';
+
+import { createTestCaseExecution, createTestRun } from '@test-integration/db/evaluation';
+
+import { createWorkflow } from './shared/db/workflows';
+import * as testDb from './shared/test-db';
+
+describe('TestRunRepository', () => {
+	let testRunRepository: TestRunRepository;
+
+	beforeAll(async () => {
+		await testDb.init();
+
+		testRunRepository = Container.get(TestRunRepository);
+	});
+
+	afterEach(async () => {
+		await testDb.truncate(['User', 'WorkflowEntity', 'TestRun', 'TestCaseExecution']);
+	});
+
+	afterAll(async () => {
+		await testDb.terminate();
+	});
+
+	describe('getTestRunSummaryById', () => {
+		let workflow: IWorkflowDb & WorkflowEntity;
+
+		beforeAll(async () => {
+			workflow = await createWorkflow();
+		});
+
+		it('should return the final result of a test run', async () => {
+			const testRun = await createTestRun(workflow.id, {
+				status: 'completed',
+				runAt: new Date(),
+				completedAt: new Date(),
+				metrics: { total: 1, success: 1 },
+			});
+
+			await Promise.all([
+				createTestCaseExecution(testRun.id, {
+					status: 'success',
+				}),
+				createTestCaseExecution(testRun.id, {
+					status: 'success',
+				}),
+			]);
+
+			const result = await testRunRepository.getTestRunSummaryById(testRun.id);
+
+			expect(result).toEqual(
+				expect.objectContaining({
+					id: testRun.id,
+					workflowId: workflow.id,
+					status: 'completed',
+					finalResult: 'success',
+					runAt: expect.any(Date),
+					completedAt: expect.any(Date),
+					metrics: { total: 1, success: 1 },
+				}),
+			);
+		});
+	});
+});
--- a/packages/nodes-base/nodes/EvaluationMetrics/EvaluationMetrics.node.json
+++ b/packages/nodes-base/nodes/EvaluationMetrics/EvaluationMetrics.node.json
@@ -1,14 +0,0 @@
-{
-	"node": "n8n-nodes-base.evaluationMetrics",
-	"nodeVersion": "1.0",
-	"codexVersion": "1.0",
-	"categories": ["Evaluation", "Core Nodes"],
-	"resources": {
-		"primaryDocumentation": [
-			{
-				"url": "https://docs.n8n.io/integrations/builtin/core-nodes/n8n-nodes-base.evaluationmetrics/"
-			}
-		]
-	},
-	"alias": ["Metric"]
-}
--- a/packages/nodes-base/nodes/EvaluationMetrics/EvaluationMetrics.node.ts
+++ b/packages/nodes-base/nodes/EvaluationMetrics/EvaluationMetrics.node.ts
@@ -1,109 +0,0 @@
-import type {
-	AssignmentCollectionValue,
-	FieldType,
-	IExecuteFunctions,
-	INodeExecutionData,
-	INodeType,
-	INodeTypeDescription,
-} from 'n8n-workflow';
-import { NodeConnectionTypes, NodeOperationError } from 'n8n-workflow';
-
-import { composeReturnItem, validateEntry } from '../Set/v2/helpers/utils';
-
-export class EvaluationMetrics implements INodeType {
-	description: INodeTypeDescription = {
-		displayName: 'Evaluation Metrics',
-		name: 'evaluationMetrics',
-		icon: 'fa:check-double',
-		group: ['input'],
-		iconColor: 'light-green',
-		version: 1,
-		description: 'Define the metrics returned for workflow evaluation',
-		defaults: {
-			name: 'Evaluation Metrics',
-			color: '#29A568',
-		},
-		inputs: [NodeConnectionTypes.Main],
-		outputs: [NodeConnectionTypes.Main],
-		properties: [
-			{
-				displayName:
-					"Define the evaluation metrics returned in your report. Only numeric values are supported. <a href='https://docs.n8n.io/integrations/builtin/core-nodes/n8n-nodes-base.evaluationmetric/' target='_blank'>More Info</a>",
-				name: 'notice',
-				type: 'notice',
-				default: '',
-			},
-			{
-				displayName: 'Metrics to Return',
-				name: 'metrics',
-				type: 'assignmentCollection',
-				default: {
-					assignments: [
-						{
-							name: '',
-							value: '',
-							type: 'number',
-						},
-					],
-				},
-				typeOptions: {
-					assignment: {
-						disableType: true,
-						defaultType: 'number',
-					},
-				},
-			},
-		],
-	};
-
-	async execute(this: IExecuteFunctions): Promise<INodeExecutionData[][]> {
-		const items = this.getInputData();
-		const metrics: INodeExecutionData[] = [];
-
-		for (let i = 0; i < items.length; i++) {
-			const dataToSave = this.getNodeParameter('metrics', i, {}) as AssignmentCollectionValue;
-
-			const newItem: INodeExecutionData = {
-				json: {},
-				pairedItem: { item: i },
-			};
-			const newData = Object.fromEntries(
-				(dataToSave?.assignments ?? []).map((assignment) => {
-					const assignmentValue =
-						typeof assignment.value === 'number' ? assignment.value : Number(assignment.value);
-
-					if (isNaN(assignmentValue)) {
-						throw new NodeOperationError(
-							this.getNode(),
-							`Invalid numeric value: "${assignment.value}". Please provide a valid number.`,
-						);
-					}
-
-					const { name, value } = validateEntry(
-						assignment.name,
-						assignment.type as FieldType,
-						assignmentValue,
-						this.getNode(),
-						i,
-						false,
-						1,
-					);
-
-					return [name, value];
-				}),
-			);
-
-			const returnItem = composeReturnItem.call(
-				this,
-				i,
-				newItem,
-				newData,
-				{ dotNotation: false, include: 'none' },
-				1,
-			);
-			metrics.push(returnItem);
-		}
-
-		return [metrics];
-	}
-}
--- a/packages/nodes-base/nodes/EvaluationMetrics/tests/EvaluationMetrics.node.test.ts
+++ b/packages/nodes-base/nodes/EvaluationMetrics/tests/EvaluationMetrics.node.test.ts
@@ -1,111 +0,0 @@
-import { mock } from 'jest-mock-extended';
-import type { INodeTypes, IExecuteFunctions, AssignmentCollectionValue } from 'n8n-workflow';
-import { NodeOperationError } from 'n8n-workflow';
-
-import { EvaluationMetrics } from '../EvaluationMetrics.node';
-
-describe('EvaluationMetrics Node', () => {
-	const nodeTypes = mock<INodeTypes>();
-	const evaluationMetricsNode = new EvaluationMetrics();
-
-	let mockExecuteFunction: IExecuteFunctions;
-
-	function getMockExecuteFunction(metrics: AssignmentCollectionValue['assignments']) {
-		return {
-			getInputData: jest.fn().mockReturnValue([{}]),
-
-			getNodeParameter: jest.fn().mockReturnValueOnce({
-				assignments: metrics,
-			}),
-
-			getNode: jest.fn().mockReturnValue({
-				typeVersion: 1,
-			}),
-		} as unknown as IExecuteFunctions;
-	}
-
-	beforeAll(() => {
-		mockExecuteFunction = getMockExecuteFunction([
-			{
-				id: '1',
-				name: 'Accuracy',
-				value: 0.95,
-				type: 'number',
-			},
-			{
-				id: '2',
-				name: 'Latency',
-				value: 100,
-				type: 'number',
-			},
-		]);
-		nodeTypes.getByName.mockReturnValue(evaluationMetricsNode);
-		jest.clearAllMocks();
-	});
-
-	describe('execute', () => {
-		it('should output the defined metrics', async () => {
-			const result = await evaluationMetricsNode.execute.call(mockExecuteFunction);
-
-			expect(result).toHaveLength(1);
-			expect(result[0]).toHaveLength(1);
-
-			const outputItem = result[0][0].json;
-			expect(outputItem).toEqual({
-				Accuracy: 0.95,
-				Latency: 100,
-			});
-		});
-
-		it('should handle no metrics defined', async () => {
-			const result = await evaluationMetricsNode.execute.call(mockExecuteFunction);
-
-			expect(result).toHaveLength(1);
-			expect(result[0]).toHaveLength(1);
-			expect(result[0][0].json).toEqual({});
-		});
-
-		it('should convert string values to numbers', async () => {
-			const mockExecuteWithStringValues = getMockExecuteFunction([
-				{
-					id: '1',
-					name: 'Accuracy',
-					value: '0.95',
-					type: 'number',
-				},
-				{
-					id: '2',
-					name: 'Latency',
-					value: '100',
-					type: 'number',
-				},
-			]);
-
-			const result = await evaluationMetricsNode.execute.call(mockExecuteWithStringValues);
-
-			expect(result).toHaveLength(1);
-			expect(result[0]).toHaveLength(1);
-
-			const outputItem = result[0][0].json;
-			expect(outputItem).toEqual({
-				Accuracy: 0.95,
-				Latency: 100,
-			});
-		});
-
-		it('should throw error for non-numeric string values', async () => {
-			const mockExecuteWithInvalidValue = getMockExecuteFunction([
-				{
-					id: '1',
-					name: 'Accuracy',
-					value: 'not-a-number',
-					type: 'number',
-				},
-			]);
-
-			await expect(evaluationMetricsNode.execute.call(mockExecuteWithInvalidValue)).rejects.toThrow(
-				NodeOperationError,
-			);
-		});
-	});
-});
--- a/packages/nodes-base/package.json
+++ b/packages/nodes-base/package.json
@@ -514,7 +514,6 @@
      "dist/nodes/ExecuteWorkflow/ExecuteWorkflow/ExecuteWorkflow.node.js",
      "dist/nodes/ExecuteWorkflow/ExecuteWorkflowTrigger/ExecuteWorkflowTrigger.node.js",
      "dist/nodes/ExecutionData/ExecutionData.node.js",
-      "dist/nodes/EvaluationMetrics/EvaluationMetrics.node.js",
      "dist/nodes/Facebook/FacebookGraphApi.node.js",
      "dist/nodes/Facebook/FacebookTrigger.node.js",
      "dist/nodes/FacebookLeadAds/FacebookLeadAdsTrigger.node.js",