diff --git a/packages/cli/src/constants.ts b/packages/cli/src/constants.ts
index fe662c9ddf..dc557dec76 100644
--- a/packages/cli/src/constants.ts
+++ b/packages/cli/src/constants.ts
@@ -195,3 +195,5 @@ export const WsStatusCodes = {
 } as const;
 
 export const FREE_AI_CREDITS_CREDENTIAL_NAME = 'n8n free OpenAI API credits';
+
+export const EVALUATION_METRICS_NODE = `${NODE_PACKAGE_PREFIX}base.evaluationMetrics`;
diff --git a/packages/cli/src/evaluation.ee/metrics.controller.ts b/packages/cli/src/evaluation.ee/metrics.controller.ts
deleted file mode 100644
index 5d27931166..0000000000
--- a/packages/cli/src/evaluation.ee/metrics.controller.ts
+++ /dev/null
@@ -1,141 +0,0 @@
-import express from 'express';
-
-import { TestMetricRepository } from '@/databases/repositories/test-metric.repository.ee';
-import { Delete, Get, Patch, Post, RestController } from '@/decorators';
-import { NotFoundError } from '@/errors/response-errors/not-found.error';
-import {
-	testMetricCreateRequestBodySchema,
-	testMetricPatchRequestBodySchema,
-} from '@/evaluation.ee/metric.schema';
-import { getSharedWorkflowIds } from '@/public-api/v1/handlers/workflows/workflows.service';
-import { Telemetry } from '@/telemetry';
-
-import { TestDefinitionService } from './test-definition.service.ee';
-import { TestMetricsRequest } from './test-definitions.types.ee';
-
-@RestController('/evaluation/test-definitions')
-export class TestMetricsController {
-	constructor(
-		private readonly testDefinitionService: TestDefinitionService,
-		private readonly testMetricRepository: TestMetricRepository,
-		private readonly telemetry: Telemetry,
-	) {}
-
-	// This method is used in multiple places in the controller to get the test definition
-	// (or just check that it exists and the user has access to it).
-	private async getTestDefinition(
-		req:
-			| TestMetricsRequest.GetOne
-			| TestMetricsRequest.GetMany
-			| TestMetricsRequest.Patch
-			| TestMetricsRequest.Delete
-			| TestMetricsRequest.Create,
-	) {
-		const { testDefinitionId } = req.params;
-
-		const userAccessibleWorkflowIds = await getSharedWorkflowIds(req.user, ['workflow:read']);
-
-		const testDefinition = await this.testDefinitionService.findOne(
-			testDefinitionId,
-			userAccessibleWorkflowIds,
-		);
-
-		if (!testDefinition) throw new NotFoundError('Test definition not found');
-
-		return testDefinition;
-	}
-
-	@Get('/:testDefinitionId/metrics')
-	async getMany(req: TestMetricsRequest.GetMany) {
-		const { testDefinitionId } = req.params;
-
-		await this.getTestDefinition(req);
-
-		return await this.testMetricRepository.find({
-			where: { testDefinition: { id: testDefinitionId } },
-		});
-	}
-
-	@Get('/:testDefinitionId/metrics/:id')
-	async getOne(req: TestMetricsRequest.GetOne) {
-		const { id: metricId, testDefinitionId } = req.params;
-
-		await this.getTestDefinition(req);
-
-		const metric = await this.testMetricRepository.findOne({
-			where: { id: metricId, testDefinition: { id: testDefinitionId } },
-		});
-
-		if (!metric) throw new NotFoundError('Metric not found');
-
-		return metric;
-	}
-
-	@Post('/:testDefinitionId/metrics')
-	async create(req: TestMetricsRequest.Create, res: express.Response) {
-		const bodyParseResult = testMetricCreateRequestBodySchema.safeParse(req.body);
-		if (!bodyParseResult.success) {
-			res.status(400).json({ errors: bodyParseResult.error.errors });
-			return;
-		}
-
-		const testDefinition = await this.getTestDefinition(req);
-
-		const metric = this.testMetricRepository.create({
-			...req.body,
-			testDefinition,
-		});
-
-		return await this.testMetricRepository.save(metric);
-	}
-
-	@Patch('/:testDefinitionId/metrics/:id')
-	async patch(req: TestMetricsRequest.Patch, res: express.Response) {
-		const { id: metricId, testDefinitionId } = req.params;
-
-		const bodyParseResult = testMetricPatchRequestBodySchema.safeParse(req.body);
-		if (!bodyParseResult.success) {
-			res.status(400).json({ errors: bodyParseResult.error.errors });
-			return;
-		}
-
-		await this.getTestDefinition(req);
-
-		const metric = await this.testMetricRepository.findOne({
-			where: { id: metricId, testDefinition: { id: testDefinitionId } },
-		});
-
-		if (!metric) throw new NotFoundError('Metric not found');
-
-		const updateResult = await this.testMetricRepository.update(metricId, bodyParseResult.data);
-
-		// Send telemetry event if the metric was updated
-		if (updateResult.affected === 1 && metric.name !== bodyParseResult.data.name) {
-			this.telemetry.track('User added metrics to test', {
-				metric_id: metricId,
-				metric_name: bodyParseResult.data.name,
-				test_id: testDefinitionId,
-			});
-		}
-
-		// Respond with the updated metric
-		return await this.testMetricRepository.findOneBy({ id: metricId });
-	}
-
-	@Delete('/:testDefinitionId/metrics/:id')
-	async delete(req: TestMetricsRequest.Delete) {
-		const { id: metricId, testDefinitionId } = req.params;
-
-		await this.getTestDefinition(req);
-
-		const metric = await this.testMetricRepository.findOne({
-			where: { id: metricId, testDefinition: { id: testDefinitionId } },
-		});
-
-		if (!metric) throw new NotFoundError('Metric not found');
-
-		await this.testMetricRepository.delete(metricId);
-
-		return { success: true };
-	}
-}
diff --git a/packages/cli/src/evaluation.ee/test-definitions.types.ee.ts b/packages/cli/src/evaluation.ee/test-definitions.types.ee.ts
index 98feea7e5d..eb19f964f7 100644
--- a/packages/cli/src/evaluation.ee/test-definitions.types.ee.ts
+++ b/packages/cli/src/evaluation.ee/test-definitions.types.ee.ts
@@ -47,36 +47,6 @@ export declare namespace TestDefinitionsRequest {
 	>;
 }
 
-// ----------------------------------
-//             /test-definitions/:testDefinitionId/metrics
-// ----------------------------------
-
-export declare namespace TestMetricsRequest {
-	namespace RouteParams {
-		type TestDefinitionId = {
-			testDefinitionId: string;
-		};
-
-		type TestMetricId = {
-			id: string;
-		};
-	}
-
-	type GetOne = AuthenticatedRequest<RouteParams.TestDefinitionId & RouteParams.TestMetricId>;
-
-	type GetMany = AuthenticatedRequest<RouteParams.TestDefinitionId>;
-
-	type Create = AuthenticatedRequest<RouteParams.TestDefinitionId, {}, { name: string }>;
-
-	type Patch = AuthenticatedRequest<
-		RouteParams.TestDefinitionId & RouteParams.TestMetricId,
-		{},
-		{ name: string }
-	>;
-
-	type Delete = AuthenticatedRequest<RouteParams.TestDefinitionId & RouteParams.TestMetricId>;
-}
-
 // ----------------------------------
 // 					 /test-definitions/:testDefinitionId/runs
 // ----------------------------------
diff --git a/packages/cli/src/evaluation.ee/test-runner/__tests__/evaluation-metrics.ee.test.ts b/packages/cli/src/evaluation.ee/test-runner/__tests__/evaluation-metrics.ee.test.ts
index d7bb9ec910..d9ddbde162 100644
--- a/packages/cli/src/evaluation.ee/test-runner/__tests__/evaluation-metrics.ee.test.ts
+++ b/packages/cli/src/evaluation.ee/test-runner/__tests__/evaluation-metrics.ee.test.ts
@@ -24,14 +24,6 @@ describe('EvaluationMetrics', () => {
 		);
 	});
 
-	test('should throw when missing values', () => {
-		const testMetricNames = new Set(['metric1', 'metric2']);
-		const metrics = new EvaluationMetrics(testMetricNames);
-
-		expect(() => metrics.addResults({ metric1: 1 })).toThrow('METRICS_MISSING');
-		expect(() => metrics.addResults({ metric2: 0.2 })).toThrow('METRICS_MISSING');
-	});
-
 	test('should handle empty metrics', () => {
 		const testMetricNames = new Set(['metric1', 'metric2']);
 		const metrics = new EvaluationMetrics(testMetricNames);
diff --git a/packages/cli/src/evaluation.ee/test-runner/__tests__/mock-data/workflow.evaluation-middle.json b/packages/cli/src/evaluation.ee/test-runner/__tests__/mock-data/workflow.evaluation-middle.json
new file mode 100644
index 0000000000..ba203bbac5
--- /dev/null
+++ b/packages/cli/src/evaluation.ee/test-runner/__tests__/mock-data/workflow.evaluation-middle.json
@@ -0,0 +1,85 @@
+{
+	"nodes": [
+		{
+			"parameters": {},
+			"id": "6dde1608-135f-441d-8438-40f605e4dae3",
+			"name": "Execute Workflow Trigger",
+			"type": "n8n-nodes-base.executeWorkflowTrigger",
+			"typeVersion": 1,
+			"position": [-180, -40]
+		},
+		{
+			"parameters": {
+				"metrics": {
+					"assignments": [
+						{
+							"id": "f1cab1e4-dabc-4750-a1f7-6669a60213c9",
+							"name": "metric1",
+							"value": 20,
+							"type": "number"
+						},
+						{
+							"id": "a66ed953-5341-47f6-8565-47640d987f5f",
+							"name": "metric2",
+							"value": 30,
+							"type": "number"
+						}
+					]
+				}
+			},
+			"id": "dfc71b70-7b7a-4dde-914f-cc4ffc99b18c",
+			"name": "Success",
+			"type": "n8n-nodes-base.evaluationMetrics",
+			"typeVersion": 1,
+			"position": [620, -40]
+		},
+		{
+			"parameters": {
+				"metrics": {
+					"assignments": [
+						{
+							"id": "1e1153da-77c0-4cb5-804a-3f4bc40833dd",
+							"name": "metric2",
+							"value": 10,
+							"type": "number"
+						}
+					]
+				}
+			},
+			"id": "5ef5be33-37e0-4c95-81c8-3fd677bdca88",
+			"name": "First Metric",
+			"type": "n8n-nodes-base.evaluationMetrics",
+			"typeVersion": 1,
+			"position": [160, -40]
+		}
+	],
+	"connections": {
+		"Execute Workflow Trigger": {
+			"main": [
+				[
+					{
+						"node": "First Metric",
+						"type": "main",
+						"index": 0
+					}
+				]
+			]
+		},
+		"First Metric": {
+			"main": [
+				[
+					{
+						"node": "Success",
+						"type": "main",
+						"index": 0
+					}
+				]
+			]
+		}
+	},
+	"pinData": {},
+	"meta": {
+		"templateCredsSetupCompleted": true,
+		"instanceId": "27cc9b56542ad45b38725555722c50a1c3fee1670bbb67980558314ee08517c4"
+	}
+}
diff --git a/packages/cli/src/evaluation.ee/test-runner/__tests__/mock-data/workflow.evaluation.json b/packages/cli/src/evaluation.ee/test-runner/__tests__/mock-data/workflow.evaluation.json
index 6ec7f2c386..4db9e2bd6d 100644
--- a/packages/cli/src/evaluation.ee/test-runner/__tests__/mock-data/workflow.evaluation.json
+++ b/packages/cli/src/evaluation.ee/test-runner/__tests__/mock-data/workflow.evaluation.json
@@ -50,7 +50,7 @@
 		},
 		{
 			"parameters": {
-				"assignments": {
+				"metrics": {
 					"assignments": [
 						{
 							"id": "3b65d55a-158f-40c6-9853-a1c44b7ba1e5",
@@ -70,13 +70,13 @@
 			},
 			"id": "0c7a1ee8-0cf0-4d7f-99a3-186bbcd8815a",
 			"name": "Success",
-			"type": "n8n-nodes-base.set",
-			"typeVersion": 3.4,
+			"type": "n8n-nodes-base.evaluationMetrics",
+			"typeVersion": 1,
 			"position": [980, 220]
 		},
 		{
 			"parameters": {
-				"assignments": {
+				"metrics": {
 					"assignments": [
 						{
 							"id": "6cc8b402-4a30-4873-b825-963a1f1b8b82",
@@ -90,8 +90,8 @@
 			},
 			"id": "50d3f84a-d99f-4e04-bdbd-3e8c2668e708",
 			"name": "Fail",
-			"type": "n8n-nodes-base.set",
-			"typeVersion": 3.4,
+			"type": "n8n-nodes-base.evaluationMetrics",
+			"typeVersion": 1,
 			"position": [980, 420]
 		}
 	],
diff --git a/packages/cli/src/evaluation.ee/test-runner/__tests__/test-runner.service.ee.test.ts b/packages/cli/src/evaluation.ee/test-runner/__tests__/test-runner.service.ee.test.ts
index d91a2d0ee7..8d61fcb901 100644
--- a/packages/cli/src/evaluation.ee/test-runner/__tests__/test-runner.service.ee.test.ts
+++ b/packages/cli/src/evaluation.ee/test-runner/__tests__/test-runner.service.ee.test.ts
@@ -46,6 +46,12 @@ const wfEvaluationJson = JSON.parse(
 	readFileSync(path.join(__dirname, './mock-data/workflow.evaluation.json'), { encoding: 'utf-8' }),
 );
 
+const wfEvaluationMiddleJson = JSON.parse(
+	readFileSync(path.join(__dirname, './mock-data/workflow.evaluation-middle.json'), {
+		encoding: 'utf-8',
+	}),
+);
+
 const wfMultipleTriggersJson = JSON.parse(
 	readFileSync(path.join(__dirname, './mock-data/workflow.multiple-triggers.json'), {
 		encoding: 'utf-8',
@@ -131,9 +137,22 @@ function mockEvaluationExecutionData(metrics: Record<string, GenericValue>) {
 	return mock<IRun>({
 		data: {
 			resultData: {
-				lastNodeExecuted: 'lastNode',
+				lastNodeExecuted: 'Success',
 				runData: {
-					lastNode: [
+					Success: [
+						{
+							data: {
+								main: [
+									[
+										{
+											json: metrics,
+										},
+									],
+								],
+							},
+						},
+					],
+					Fail: [
 						{
 							data: {
 								main: [
@@ -155,6 +174,52 @@ function mockEvaluationExecutionData(metrics: Record<string, GenericValue>) {
 	});
 }
 
+function mockEvaluationMiddleExecutionData(
+	firstMetrics: Record<string, GenericValue>,
+	secondMetrics: Record<string, GenericValue>,
+) {
+	// Clone the metrics to avoid modifying the passed object
+	// For test assertions, these run-data need special handling
+	const runData: Record<string, any> = {
+		'First Metric': [
+			{
+				data: {
+					main: [
+						[
+							{
+								json: firstMetrics,
+							},
+						],
+					],
+				},
+			},
+		],
+		Success: [
+			{
+				data: {
+					main: [
+						[
+							{
+								json: secondMetrics,
+							},
+						],
+					],
+				},
+			},
+		],
+	};
+
+	return mock<IRun>({
+		data: {
+			resultData: {
+				lastNodeExecuted: 'Success',
+				runData,
+				error: undefined,
+			},
+		},
+	});
+}
+
 const errorReporter = mock<ErrorReporter>();
 const logger = mockLogger();
 const telemetry = mock<Telemetry>();
@@ -363,7 +428,6 @@ describe('TestRunnerService', () => {
 		expect(testRunRepository.markAsCompleted).toHaveBeenCalledTimes(1);
 		expect(testRunRepository.markAsCompleted).toHaveBeenCalledWith('test-run-id', {
 			metric1: 0.75,
-			metric2: 50,
 		});
 
 		expect(testRunRepository.incrementPassed).toHaveBeenCalledTimes(2);
@@ -868,6 +932,218 @@ describe('TestRunnerService', () => {
 		expect(workflowRunner.run).toHaveBeenCalledTimes(1);
 	});
 
+	test('should run workflow with metrics defined in the middle of the workflow', async () => {
+		const testRunnerService = new TestRunnerService(
+			logger,
+			telemetry,
+			workflowRepository,
+			workflowRunner,
+			executionRepository,
+			activeExecutions,
+			testRunRepository,
+			testCaseExecutionRepository,
+			testMetricRepository,
+			mockNodeTypes,
+			errorReporter,
+		);
+
+		workflowRepository.findById.calledWith('workflow-under-test-id').mockResolvedValueOnce({
+			id: 'workflow-under-test-id',
+			...wfUnderTestJson,
+		});
+
+		workflowRepository.findById.calledWith('evaluation-workflow-id').mockResolvedValueOnce({
+			id: 'evaluation-workflow-id',
+			...wfEvaluationMiddleJson,
+		});
+
+		workflowRunner.run.mockResolvedValueOnce('some-execution-id');
+		workflowRunner.run.mockResolvedValueOnce('some-execution-id-2');
+		workflowRunner.run.mockResolvedValueOnce('some-execution-id-3');
+		workflowRunner.run.mockResolvedValueOnce('some-execution-id-4');
+
+		// Mock executions of workflow under test
+		activeExecutions.getPostExecutePromise
+			.calledWith('some-execution-id')
+			.mockResolvedValue(mockExecutionData());
+
+		activeExecutions.getPostExecutePromise
+			.calledWith('some-execution-id-3')
+			.mockResolvedValue(mockExecutionData());
+
+		// Mock executions of evaluation workflow
+		activeExecutions.getPostExecutePromise
+			.calledWith('some-execution-id-2')
+			.mockResolvedValue(mockEvaluationMiddleExecutionData({ metric2: 1 }, { metric1: 1 }));
+
+		activeExecutions.getPostExecutePromise
+			.calledWith('some-execution-id-4')
+			.mockResolvedValue(mockEvaluationMiddleExecutionData({ metric2: 2 }, { metric1: 0.5 }));
+
+		await testRunnerService.runTest(
+			mock<User>(),
+			mock<TestDefinition>({
+				workflowId: 'workflow-under-test-id',
+				evaluationWorkflowId: 'evaluation-workflow-id',
+				mockedNodes: [{ id: '72256d90-3a67-4e29-b032-47df4e5768af' }],
+			}),
+		);
+
+		expect(workflowRunner.run).toHaveBeenCalledTimes(4);
+
+		// Check workflow under test was executed
+		expect(workflowRunner.run).toHaveBeenCalledWith(
+			expect.objectContaining({
+				executionMode: 'evaluation',
+				pinData: {
+					'When clicking ‘Test workflow’':
+						executionDataJson.resultData.runData['When clicking ‘Test workflow’'][0].data.main[0],
+				},
+				workflowData: expect.objectContaining({
+					id: 'workflow-under-test-id',
+				}),
+			}),
+		);
+
+		// Check evaluation workflow was executed
+		expect(workflowRunner.run).toHaveBeenCalledWith(
+			expect.objectContaining({
+				executionMode: 'integrated',
+				executionData: expect.objectContaining({
+					executionData: expect.objectContaining({
+						nodeExecutionStack: expect.arrayContaining([
+							expect.objectContaining({ data: expect.anything() }),
+						]),
+					}),
+				}),
+				workflowData: expect.objectContaining({
+					id: 'evaluation-workflow-id',
+				}),
+			}),
+		);
+
+		// Check Test Run status was updated correctly
+		expect(testRunRepository.createTestRun).toHaveBeenCalledTimes(1);
+		expect(testRunRepository.markAsRunning).toHaveBeenCalledTimes(1);
+		expect(testRunRepository.markAsRunning).toHaveBeenCalledWith('test-run-id', expect.any(Number));
+		expect(testRunRepository.markAsCompleted).toHaveBeenCalledTimes(1);
+		expect(testRunRepository.markAsCompleted).toHaveBeenCalledWith('test-run-id', {
+			metric1: 0.75,
+			metric2: 1.5,
+		});
+
+		expect(testRunRepository.incrementPassed).toHaveBeenCalledTimes(2);
+		expect(testRunRepository.incrementFailed).not.toHaveBeenCalled();
+	});
+
+	test('should properly override metrics from earlier nodes with later ones', async () => {
+		const testRunnerService = new TestRunnerService(
+			logger,
+			telemetry,
+			workflowRepository,
+			workflowRunner,
+			executionRepository,
+			activeExecutions,
+			testRunRepository,
+			testCaseExecutionRepository,
+			testMetricRepository,
+			mockNodeTypes,
+			errorReporter,
+		);
+
+		workflowRepository.findById.calledWith('workflow-under-test-id').mockResolvedValueOnce({
+			id: 'workflow-under-test-id',
+			...wfUnderTestJson,
+		});
+
+		workflowRepository.findById.calledWith('evaluation-workflow-id').mockResolvedValueOnce({
+			id: 'evaluation-workflow-id',
+			...wfEvaluationMiddleJson,
+		});
+
+		workflowRunner.run.mockResolvedValueOnce('some-execution-id');
+		workflowRunner.run.mockResolvedValueOnce('some-execution-id-2');
+		workflowRunner.run.mockResolvedValueOnce('some-execution-id-3');
+		workflowRunner.run.mockResolvedValueOnce('some-execution-id-4');
+
+		// Mock executions of workflow under test
+		activeExecutions.getPostExecutePromise
+			.calledWith('some-execution-id')
+			.mockResolvedValue(mockExecutionData());
+
+		activeExecutions.getPostExecutePromise
+			.calledWith('some-execution-id-3')
+			.mockResolvedValue(mockExecutionData());
+
+		// Mock executions of evaluation workflow
+		activeExecutions.getPostExecutePromise
+			.calledWith('some-execution-id-2')
+			.mockResolvedValue(
+				mockEvaluationMiddleExecutionData({ metric2: 5 }, { metric1: 1, metric2: 5 }),
+			);
+
+		activeExecutions.getPostExecutePromise
+			.calledWith('some-execution-id-4')
+			.mockResolvedValue(
+				mockEvaluationMiddleExecutionData({ metric2: 10 }, { metric1: 0.5, metric2: 10 }),
+			);
+
+		await testRunnerService.runTest(
+			mock<User>(),
+			mock<TestDefinition>({
+				workflowId: 'workflow-under-test-id',
+				evaluationWorkflowId: 'evaluation-workflow-id',
+				mockedNodes: [{ id: '72256d90-3a67-4e29-b032-47df4e5768af' }],
+			}),
+		);
+
+		expect(workflowRunner.run).toHaveBeenCalledTimes(4);
+
+		// Check workflow under test was executed
+		expect(workflowRunner.run).toHaveBeenCalledWith(
+			expect.objectContaining({
+				executionMode: 'evaluation',
+				pinData: {
+					'When clicking ‘Test workflow’':
+						executionDataJson.resultData.runData['When clicking ‘Test workflow’'][0].data.main[0],
+				},
+				workflowData: expect.objectContaining({
+					id: 'workflow-under-test-id',
+				}),
+			}),
+		);
+
+		// Check evaluation workflow was executed
+		expect(workflowRunner.run).toHaveBeenCalledWith(
+			expect.objectContaining({
+				executionMode: 'integrated',
+				executionData: expect.objectContaining({
+					executionData: expect.objectContaining({
+						nodeExecutionStack: expect.arrayContaining([
+							expect.objectContaining({ data: expect.anything() }),
+						]),
+					}),
+				}),
+				workflowData: expect.objectContaining({
+					id: 'evaluation-workflow-id',
+				}),
+			}),
+		);
+
+		// Check Test Run status was updated correctly
+		expect(testRunRepository.createTestRun).toHaveBeenCalledTimes(1);
+		expect(testRunRepository.markAsRunning).toHaveBeenCalledTimes(1);
+		expect(testRunRepository.markAsRunning).toHaveBeenCalledWith('test-run-id', expect.any(Number));
+		expect(testRunRepository.markAsCompleted).toHaveBeenCalledTimes(1);
+		expect(testRunRepository.markAsCompleted).toHaveBeenCalledWith('test-run-id', {
+			metric1: 0.75,
+			metric2: 7.5,
+		});
+
+		expect(testRunRepository.incrementPassed).toHaveBeenCalledTimes(2);
+		expect(testRunRepository.incrementFailed).not.toHaveBeenCalled();
+	});
+
 	describe('Test Run cancellation', () => {
 		beforeAll(() => {
 			jest.useFakeTimers();
diff --git a/packages/cli/src/evaluation.ee/test-runner/errors.ee.ts b/packages/cli/src/evaluation.ee/test-runner/errors.ee.ts
index 0b2df294b2..bd11ba479e 100644
--- a/packages/cli/src/evaluation.ee/test-runner/errors.ee.ts
+++ b/packages/cli/src/evaluation.ee/test-runner/errors.ee.ts
@@ -6,8 +6,6 @@ export type TestCaseExecutionErrorCode =
 	| 'FAILED_TO_EXECUTE_WORKFLOW'
 	| 'EVALUATION_WORKFLOW_DOES_NOT_EXIST'
 	| 'FAILED_TO_EXECUTE_EVALUATION_WORKFLOW'
-	| 'METRICS_MISSING'
-	| 'UNKNOWN_METRICS'
 	| 'INVALID_METRICS'
 	| 'PAYLOAD_LIMIT_EXCEEDED'
 	| 'UNKNOWN_ERROR';
diff --git a/packages/cli/src/evaluation.ee/test-runner/evaluation-metrics.ee.ts b/packages/cli/src/evaluation.ee/test-runner/evaluation-metrics.ee.ts
index b2422f4b6b..1e063e262a 100644
--- a/packages/cli/src/evaluation.ee/test-runner/evaluation-metrics.ee.ts
+++ b/packages/cli/src/evaluation.ee/test-runner/evaluation-metrics.ee.ts
@@ -1,4 +1,3 @@
-import difference from 'lodash/difference';
 import type { IDataObject } from 'n8n-workflow';
 
 import { TestCaseExecutionError } from '@/evaluation.ee/test-runner/errors.ee';
@@ -43,16 +42,6 @@ export class EvaluationMetrics {
 			}
 		}
 
-		// Check that result contains all expected metrics
-		if (
-			difference(Array.from(this.metricNames), Object.keys(addResultsInfo.addedMetrics)).length > 0
-		) {
-			throw new TestCaseExecutionError('METRICS_MISSING', {
-				expectedMetrics: Array.from(this.metricNames).sort(),
-				receivedMetrics: Object.keys(addResultsInfo.addedMetrics).sort(),
-			});
-		}
-
 		return addResultsInfo;
 	}
 
diff --git a/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts b/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts
index 628349bf85..c987523a7d 100644
--- a/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts
+++ b/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts
@@ -1,8 +1,10 @@
 import { Service } from '@n8n/di';
 import { parse } from 'flatted';
+import difference from 'lodash/difference';
 import { ErrorReporter, Logger } from 'n8n-core';
 import { ExecutionCancelledError, NodeConnectionTypes, Workflow } from 'n8n-workflow';
 import type {
+	AssignmentCollectionValue,
 	IDataObject,
 	IRun,
 	IRunExecutionData,
@@ -13,6 +15,7 @@ import assert from 'node:assert';
 
 import { ActiveExecutions } from '@/active-executions';
 import config from '@/config';
+import { EVALUATION_METRICS_NODE } from '@/constants';
 import type { ExecutionEntity } from '@/databases/entities/execution-entity';
 import type { MockedNodeItem, TestDefinition } from '@/databases/entities/test-definition.ee';
 import type { TestRun } from '@/databases/entities/test-run.ee';
@@ -225,6 +228,50 @@ export class TestRunnerService {
 		return await executePromise;
 	}
 
+	/**
+	 * Sync the metrics of the test definition with the evaluation workflow.
+	 */
+	async syncMetrics(
+		testDefinitionId: string,
+		evaluationWorkflow: IWorkflowBase,
+	): Promise<Set<string>> {
+		const usedTestMetricNames = await this.getUsedTestMetricNames(evaluationWorkflow);
+		const existingTestMetrics = await this.testMetricRepository.find({
+			where: {
+				testDefinition: { id: testDefinitionId },
+			},
+		});
+
+		const existingMetricNames = new Set(existingTestMetrics.map((metric) => metric.name));
+		const metricsToAdd = difference(
+			Array.from(usedTestMetricNames),
+			Array.from(existingMetricNames),
+		);
+		const metricsToRemove = difference(
+			Array.from(existingMetricNames),
+			Array.from(usedTestMetricNames),
+		);
+
+		// Add new metrics
+		const metricsToAddEntities = metricsToAdd.map((metricName) =>
+			this.testMetricRepository.create({
+				name: metricName,
+				testDefinition: { id: testDefinitionId },
+			}),
+		);
+		await this.testMetricRepository.save(metricsToAddEntities);
+
+		// Remove no longer used metrics
+		metricsToRemove.forEach(async (metricName) => {
+			const metric = existingTestMetrics.find((m) => m.name === metricName);
+			assert(metric, 'Existing metric not found');
+
+			await this.testMetricRepository.delete(metric.id);
+		});
+
+		return usedTestMetricNames;
+	}
+
 	/**
 	 * Run the evaluation workflow with the expected and actual run data.
 	 */
@@ -265,35 +312,45 @@ export class TestRunnerService {
 		return await executePromise;
 	}
 
+	/**
+	 * Get the evaluation metrics nodes from a workflow.
+	 */
+	static getEvaluationMetricsNodes(workflow: IWorkflowBase) {
+		return workflow.nodes.filter((node) => node.type === EVALUATION_METRICS_NODE);
+	}
+
 	/**
 	 * Evaluation result is the first item in the output of the last node
 	 * executed in the evaluation workflow. Defaults to an empty object
 	 * in case the node doesn't produce any output items.
 	 */
-	private extractEvaluationResult(execution: IRun): IDataObject {
+	private extractEvaluationResult(execution: IRun, evaluationWorkflow: IWorkflowBase): IDataObject {
 		const lastNodeExecuted = execution.data.resultData.lastNodeExecuted;
 		assert(lastNodeExecuted, 'Could not find the last node executed in evaluation workflow');
+		const metricsNodes = TestRunnerService.getEvaluationMetricsNodes(evaluationWorkflow);
+		const metricsRunData = metricsNodes.flatMap(
+			(node) => execution.data.resultData.runData[node.name],
+		);
+		const metricsData = metricsRunData.reverse().map((data) => data.data?.main?.[0]?.[0]?.json);
+		const metricsResult = metricsData.reduce((acc, curr) => ({ ...acc, ...curr }), {}) ?? {};
 
-		// Extract the output of the last node executed in the evaluation workflow
-		// We use only the first item of a first main output
-		const lastNodeTaskData = execution.data.resultData.runData[lastNodeExecuted]?.[0];
-		const mainConnectionData = lastNodeTaskData?.data?.main?.[0];
-		return mainConnectionData?.[0]?.json ?? {};
+		return metricsResult;
 	}
 
 	/**
 	 * Get the metrics to collect from the evaluation workflow execution results.
 	 */
-	private async getTestMetricNames(testDefinitionId: string) {
-		const metrics = await this.testMetricRepository.find({
-			where: {
-				testDefinition: {
-					id: testDefinitionId,
-				},
-			},
+	private async getUsedTestMetricNames(evaluationWorkflow: IWorkflowBase) {
+		const metricsNodes = TestRunnerService.getEvaluationMetricsNodes(evaluationWorkflow);
+		const metrics = metricsNodes.map((node) => {
+			const metricsParameter = node.parameters?.metrics as AssignmentCollectionValue;
+			assert(metricsParameter, 'Metrics parameter not found');
+
+			const metricsNames = metricsParameter.assignments.map((assignment) => assignment.name);
+			return metricsNames;
 		});
 
-		return new Set(metrics.map((m) => m.name));
+		return new Set(metrics.flat());
 	}
 
 	/**
@@ -329,7 +386,6 @@ export class TestRunnerService {
 			if (!evaluationWorkflow) {
 				throw new TestRunError('EVALUATION_WORKFLOW_NOT_FOUND');
 			}
-
 			///
 			// 1. Make test cases from previous executions
 			///
@@ -359,8 +415,8 @@ export class TestRunnerService {
 				pastExecutions.map((e) => e.id),
 			);
 
-			// Get the metrics to collect from the evaluation workflow
-			const testMetricNames = await this.getTestMetricNames(test.id);
+			// Sync the metrics of the test definition with the evaluation workflow
+			const testMetricNames = await this.syncMetrics(test.id, evaluationWorkflow);
 
 			// 2. Run over all the test cases
 			const pastExecutionIds = pastExecutions.map((e) => e.id);
@@ -465,8 +521,8 @@ export class TestRunnerService {
 					this.logger.debug('Evaluation execution finished', { pastExecutionId });
 
 					// Extract the output of the last node executed in the evaluation workflow
-					const { addedMetrics, unknownMetrics } = metrics.addResults(
-						this.extractEvaluationResult(evalExecution),
+					const { addedMetrics } = metrics.addResults(
+						this.extractEvaluationResult(evalExecution, evaluationWorkflow),
 					);
 
 					if (evalExecution.data.resultData.error) {
@@ -483,22 +539,12 @@ export class TestRunnerService {
 						await Db.transaction(async (trx) => {
 							await this.testRunRepository.incrementPassed(testRun.id, trx);
 
-							// Add warning if the evaluation workflow produced an unknown metric
-							if (unknownMetrics.size > 0) {
-								await this.testCaseExecutionRepository.markAsWarning({
-									testRunId: testRun.id,
-									pastExecutionId,
-									errorCode: 'UNKNOWN_METRICS',
-									errorDetails: { unknownMetrics: Array.from(unknownMetrics) },
-								});
-							} else {
-								await this.testCaseExecutionRepository.markAsCompleted({
-									testRunId: testRun.id,
-									pastExecutionId,
-									metrics: addedMetrics,
-									trx,
-								});
-							}
+							await this.testCaseExecutionRepository.markAsCompleted({
+								testRunId: testRun.id,
+								pastExecutionId,
+								metrics: addedMetrics,
+								trx,
+							});
 						});
 					}
 				} catch (e) {
diff --git a/packages/cli/src/server.ts b/packages/cli/src/server.ts
index 9ba7aa930c..e2d05f2a66 100644
--- a/packages/cli/src/server.ts
+++ b/packages/cli/src/server.ts
@@ -66,7 +66,6 @@ import '@/executions/executions.controller';
 import '@/external-secrets.ee/external-secrets.controller.ee';
 import '@/license/license.controller';
 import '@/evaluation.ee/test-definitions.controller.ee';
-import '@/evaluation.ee/metrics.controller';
 import '@/evaluation.ee/test-runs.controller.ee';
 import '@/workflows/workflow-history.ee/workflow-history.controller.ee';
 import '@/workflows/workflows.controller';
diff --git a/packages/cli/test/integration/evaluation/metrics.api.test.ts b/packages/cli/test/integration/evaluation/metrics.api.test.ts
deleted file mode 100644
index ff04aedf12..0000000000
--- a/packages/cli/test/integration/evaluation/metrics.api.test.ts
+++ /dev/null
@@ -1,381 +0,0 @@
-import { Container } from '@n8n/di';
-import type { IWorkflowBase } from 'n8n-workflow';
-
-import type { TestDefinition } from '@/databases/entities/test-definition.ee';
-import type { User } from '@/databases/entities/user';
-import { TestDefinitionRepository } from '@/databases/repositories/test-definition.repository.ee';
-import { TestMetricRepository } from '@/databases/repositories/test-metric.repository.ee';
-import { createUserShell } from '@test-integration/db/users';
-import { createWorkflow } from '@test-integration/db/workflows';
-import * as testDb from '@test-integration/test-db';
-import type { SuperAgentTest } from '@test-integration/types';
-import * as utils from '@test-integration/utils';
-
-let authOwnerAgent: SuperAgentTest;
-let workflowUnderTest: IWorkflowBase;
-let otherWorkflow: IWorkflowBase;
-let testDefinition: TestDefinition;
-let otherTestDefinition: TestDefinition;
-let ownerShell: User;
-
-const testServer = utils.setupTestServer({ endpointGroups: ['evaluation'] });
-
-beforeAll(async () => {
-	ownerShell = await createUserShell('global:owner');
-	authOwnerAgent = testServer.authAgentFor(ownerShell);
-});
-
-beforeEach(async () => {
-	await testDb.truncate(['TestDefinition', 'TestMetric']);
-
-	workflowUnderTest = await createWorkflow({ name: 'workflow-under-test' }, ownerShell);
-
-	testDefinition = Container.get(TestDefinitionRepository).create({
-		name: 'test',
-		workflow: { id: workflowUnderTest.id },
-	});
-	await Container.get(TestDefinitionRepository).save(testDefinition);
-
-	otherWorkflow = await createWorkflow({ name: 'other-workflow' });
-
-	otherTestDefinition = Container.get(TestDefinitionRepository).create({
-		name: 'other-test',
-		workflow: { id: otherWorkflow.id },
-	});
-	await Container.get(TestDefinitionRepository).save(otherTestDefinition);
-});
-
-describe('GET /evaluation/test-definitions/:testDefinitionId/metrics', () => {
-	test('should retrieve empty list of metrics for a test definition', async () => {
-		const resp = await authOwnerAgent.get(
-			`/evaluation/test-definitions/${testDefinition.id}/metrics`,
-		);
-
-		expect(resp.statusCode).toBe(200);
-		expect(resp.body.data.length).toBe(0);
-	});
-
-	test('should retrieve metrics for a test definition', async () => {
-		const newMetric = Container.get(TestMetricRepository).create({
-			testDefinition: { id: testDefinition.id },
-			name: 'metric-1',
-		});
-		await Container.get(TestMetricRepository).save(newMetric);
-
-		const newMetric2 = Container.get(TestMetricRepository).create({
-			testDefinition: { id: testDefinition.id },
-			name: 'metric-2',
-		});
-		await Container.get(TestMetricRepository).save(newMetric2);
-
-		const resp = await authOwnerAgent.get(
-			`/evaluation/test-definitions/${testDefinition.id}/metrics`,
-		);
-
-		expect(resp.statusCode).toBe(200);
-		expect(resp.body.data.length).toBe(2);
-		expect(resp.body.data).toEqual(
-			expect.arrayContaining([
-				expect.objectContaining({
-					id: expect.any(String),
-					name: 'metric-1',
-				}),
-				expect.objectContaining({
-					id: expect.any(String),
-					name: 'metric-2',
-				}),
-			]),
-		);
-	});
-
-	test('should return 404 if test definition does not exist', async () => {
-		const resp = await authOwnerAgent.get('/evaluation/test-definitions/999/metrics');
-
-		expect(resp.statusCode).toBe(404);
-	});
-
-	test('should return 404 if test definition is not accessible to the user', async () => {
-		const resp = await authOwnerAgent.get(
-			`/evaluation/test-definitions/${otherTestDefinition.id}/metrics`,
-		);
-
-		expect(resp.statusCode).toBe(404);
-	});
-});
-
-describe('GET /evaluation/test-definitions/:testDefinitionId/metrics/:id', () => {
-	test('should retrieve a metric for a test definition', async () => {
-		const newMetric = Container.get(TestMetricRepository).create({
-			testDefinition: { id: testDefinition.id },
-			name: 'metric-1',
-		});
-		await Container.get(TestMetricRepository).save(newMetric);
-
-		const resp = await authOwnerAgent.get(
-			`/evaluation/test-definitions/${testDefinition.id}/metrics/${newMetric.id}`,
-		);
-
-		expect(resp.statusCode).toBe(200);
-		expect(resp.body.data).toEqual(
-			expect.objectContaining({
-				id: newMetric.id,
-				name: 'metric-1',
-			}),
-		);
-	});
-
-	test('should return 404 if metric does not exist', async () => {
-		const resp = await authOwnerAgent.get(
-			`/evaluation/test-definitions/${testDefinition.id}/metrics/999`,
-		);
-
-		expect(resp.statusCode).toBe(404);
-	});
-
-	test('should return 404 if metric is not accessible to the user', async () => {
-		const newMetric = Container.get(TestMetricRepository).create({
-			testDefinition: { id: otherTestDefinition.id },
-			name: 'metric-1',
-		});
-		await Container.get(TestMetricRepository).save(newMetric);
-
-		const resp = await authOwnerAgent.get(
-			`/evaluation/test-definitions/${otherTestDefinition.id}/metrics/${newMetric.id}`,
-		);
-
-		expect(resp.statusCode).toBe(404);
-	});
-});
-
-describe('POST /evaluation/test-definitions/:testDefinitionId/metrics', () => {
-	test('should create a metric for a test definition', async () => {
-		const resp = await authOwnerAgent
-			.post(`/evaluation/test-definitions/${testDefinition.id}/metrics`)
-			.send({
-				name: 'metric-1',
-			});
-
-		expect(resp.statusCode).toBe(200);
-		expect(resp.body.data).toEqual(
-			expect.objectContaining({
-				id: expect.any(String),
-				name: 'metric-1',
-			}),
-		);
-
-		const metrics = await Container.get(TestMetricRepository).find({
-			where: { testDefinition: { id: testDefinition.id } },
-		});
-		expect(metrics.length).toBe(1);
-		expect(metrics[0].name).toBe('metric-1');
-	});
-
-	test('should return 400 if name is missing', async () => {
-		const resp = await authOwnerAgent
-			.post(`/evaluation/test-definitions/${testDefinition.id}/metrics`)
-			.send({});
-
-		expect(resp.statusCode).toBe(400);
-		expect(resp.body.errors).toEqual(
-			expect.arrayContaining([
-				expect.objectContaining({
-					code: 'invalid_type',
-					message: 'Required',
-					path: ['name'],
-				}),
-			]),
-		);
-	});
-
-	test('should return 400 if name is not a string', async () => {
-		const resp = await authOwnerAgent
-			.post(`/evaluation/test-definitions/${testDefinition.id}/metrics`)
-			.send({
-				name: 123,
-			});
-
-		expect(resp.statusCode).toBe(400);
-		expect(resp.body.errors).toEqual(
-			expect.arrayContaining([
-				expect.objectContaining({
-					code: 'invalid_type',
-					message: 'Expected string, received number',
-					path: ['name'],
-				}),
-			]),
-		);
-	});
-
-	test('should return 404 if test definition does not exist', async () => {
-		const resp = await authOwnerAgent.post('/evaluation/test-definitions/999/metrics').send({
-			name: 'metric-1',
-		});
-
-		expect(resp.statusCode).toBe(404);
-	});
-
-	test('should return 404 if test definition is not accessible to the user', async () => {
-		const resp = await authOwnerAgent
-			.post(`/evaluation/test-definitions/${otherTestDefinition.id}/metrics`)
-			.send({
-				name: 'metric-1',
-			});
-
-		expect(resp.statusCode).toBe(404);
-	});
-});
-
-describe('PATCH /evaluation/test-definitions/:testDefinitionId/metrics/:id', () => {
-	test('should update a metric for a test definition', async () => {
-		const newMetric = Container.get(TestMetricRepository).create({
-			testDefinition: { id: testDefinition.id },
-			name: 'metric-1',
-		});
-		await Container.get(TestMetricRepository).save(newMetric);
-
-		const resp = await authOwnerAgent
-			.patch(`/evaluation/test-definitions/${testDefinition.id}/metrics/${newMetric.id}`)
-			.send({
-				name: 'metric-2',
-			});
-
-		expect(resp.statusCode).toBe(200);
-		expect(resp.body.data).toEqual(
-			expect.objectContaining({
-				id: newMetric.id,
-				name: 'metric-2',
-			}),
-		);
-
-		const metrics = await Container.get(TestMetricRepository).find({
-			where: { testDefinition: { id: testDefinition.id } },
-		});
-		expect(metrics.length).toBe(1);
-		expect(metrics[0].name).toBe('metric-2');
-	});
-
-	test('should return 400 if name is missing', async () => {
-		const newMetric = Container.get(TestMetricRepository).create({
-			testDefinition: { id: testDefinition.id },
-			name: 'metric-1',
-		});
-		await Container.get(TestMetricRepository).save(newMetric);
-
-		const resp = await authOwnerAgent
-			.patch(`/evaluation/test-definitions/${testDefinition.id}/metrics/${newMetric.id}`)
-			.send({});
-
-		expect(resp.statusCode).toBe(400);
-		expect(resp.body.errors).toEqual(
-			expect.arrayContaining([
-				expect.objectContaining({
-					code: 'invalid_type',
-					message: 'Required',
-					path: ['name'],
-				}),
-			]),
-		);
-	});
-
-	test('should return 400 if name is not a string', async () => {
-		const newMetric = Container.get(TestMetricRepository).create({
-			testDefinition: { id: testDefinition.id },
-			name: 'metric-1',
-		});
-		await Container.get(TestMetricRepository).save(newMetric);
-
-		const resp = await authOwnerAgent
-			.patch(`/evaluation/test-definitions/${testDefinition.id}/metrics/${newMetric.id}`)
-			.send({
-				name: 123,
-			});
-
-		expect(resp.statusCode).toBe(400);
-		expect(resp.body.errors).toEqual(
-			expect.arrayContaining([
-				expect.objectContaining({
-					code: 'invalid_type',
-					message: 'Expected string, received number',
-				}),
-			]),
-		);
-	});
-
-	test('should return 404 if metric does not exist', async () => {
-		const resp = await authOwnerAgent
-			.patch(`/evaluation/test-definitions/${testDefinition.id}/metrics/999`)
-			.send({
-				name: 'metric-1',
-			});
-
-		expect(resp.statusCode).toBe(404);
-	});
-
-	test('should return 404 if test definition does not exist', async () => {
-		const resp = await authOwnerAgent.patch('/evaluation/test-definitions/999/metrics/999').send({
-			name: 'metric-1',
-		});
-
-		expect(resp.statusCode).toBe(404);
-	});
-
-	test('should return 404 if metric is not accessible to the user', async () => {
-		const newMetric = Container.get(TestMetricRepository).create({
-			testDefinition: { id: otherTestDefinition.id },
-			name: 'metric-1',
-		});
-		await Container.get(TestMetricRepository).save(newMetric);
-
-		const resp = await authOwnerAgent
-			.patch(`/evaluation/test-definitions/${otherTestDefinition.id}/metrics/${newMetric.id}`)
-			.send({
-				name: 'metric-2',
-			});
-
-		expect(resp.statusCode).toBe(404);
-	});
-});
-
-describe('DELETE /evaluation/test-definitions/:testDefinitionId/metrics/:id', () => {
-	test('should delete a metric for a test definition', async () => {
-		const newMetric = Container.get(TestMetricRepository).create({
-			testDefinition: { id: testDefinition.id },
-			name: 'metric-1',
-		});
-		await Container.get(TestMetricRepository).save(newMetric);
-
-		const resp = await authOwnerAgent.delete(
-			`/evaluation/test-definitions/${testDefinition.id}/metrics/${newMetric.id}`,
-		);
-
-		expect(resp.statusCode).toBe(200);
-		expect(resp.body.data).toEqual({ success: true });
-
-		const metrics = await Container.get(TestMetricRepository).find({
-			where: { testDefinition: { id: testDefinition.id } },
-		});
-		expect(metrics.length).toBe(0);
-	});
-
-	test('should return 404 if metric does not exist', async () => {
-		const resp = await authOwnerAgent.delete(
-			`/evaluation/test-definitions/${testDefinition.id}/metrics/999`,
-		);
-
-		expect(resp.statusCode).toBe(404);
-	});
-
-	test('should return 404 if metric is not accessible to the user', async () => {
-		const newMetric = Container.get(TestMetricRepository).create({
-			testDefinition: { id: otherTestDefinition.id },
-			name: 'metric-1',
-		});
-		await Container.get(TestMetricRepository).save(newMetric);
-
-		const resp = await authOwnerAgent.delete(
-			`/evaluation/test-definitions/${otherTestDefinition.id}/metrics/${newMetric.id}`,
-		);
-
-		expect(resp.statusCode).toBe(404);
-	});
-});
diff --git a/packages/cli/test/integration/shared/utils/test-server.ts b/packages/cli/test/integration/shared/utils/test-server.ts
index b242915c92..739a89305c 100644
--- a/packages/cli/test/integration/shared/utils/test-server.ts
+++ b/packages/cli/test/integration/shared/utils/test-server.ts
@@ -281,7 +281,6 @@ export const setupTestServer = ({
 						break;
 
 					case 'evaluation':
-						await import('@/evaluation.ee/metrics.controller');
 						await import('@/evaluation.ee/test-definitions.controller.ee');
 						await import('@/evaluation.ee/test-runs.controller.ee');
 						break;
diff --git a/packages/frontend/editor-ui/src/api/testDefinition.ee.ts b/packages/frontend/editor-ui/src/api/testDefinition.ee.ts
index c15b2bc9d3..cf4a123b10 100644
--- a/packages/frontend/editor-ui/src/api/testDefinition.ee.ts
+++ b/packages/frontend/editor-ui/src/api/testDefinition.ee.ts
@@ -83,8 +83,6 @@ export interface TestCaseExecutionRecord {
 }
 
 const endpoint = '/evaluation/test-definitions';
-const getMetricsEndpoint = (testDefinitionId: string, metricId?: string) =>
-	`${endpoint}/${testDefinitionId}/metrics${metricId ? `/${metricId}` : ''}`;
 
 export async function getTestDefinitions(
 	context: IRestApiContext,
@@ -141,86 +139,6 @@ export async function getExampleEvaluationInput(
 	);
 }
 
-// Metrics
-export interface TestMetricRecord {
-	id: string;
-	name: string;
-	testDefinitionId: string;
-	createdAt?: string;
-	updatedAt?: string;
-}
-
-export interface CreateTestMetricParams {
-	testDefinitionId: string;
-	name: string;
-}
-
-export interface UpdateTestMetricParams {
-	name: string;
-	id: string;
-	testDefinitionId: string;
-}
-
-export interface DeleteTestMetricParams {
-	testDefinitionId: string;
-	id: string;
-}
-
-export const getTestMetrics = async (context: IRestApiContext, testDefinitionId: string) => {
-	return await makeRestApiRequest<TestMetricRecord[]>(
-		context,
-		'GET',
-		getMetricsEndpoint(testDefinitionId),
-	);
-};
-
-export const getTestMetric = async (
-	context: IRestApiContext,
-	testDefinitionId: string,
-	id: string,
-) => {
-	return await makeRestApiRequest<TestMetricRecord>(
-		context,
-		'GET',
-		getMetricsEndpoint(testDefinitionId, id),
-	);
-};
-
-export const createTestMetric = async (
-	context: IRestApiContext,
-	params: CreateTestMetricParams,
-) => {
-	return await makeRestApiRequest<TestMetricRecord>(
-		context,
-		'POST',
-		getMetricsEndpoint(params.testDefinitionId),
-		{ name: params.name },
-	);
-};
-
-export const updateTestMetric = async (
-	context: IRestApiContext,
-	params: UpdateTestMetricParams,
-) => {
-	return await makeRestApiRequest<TestMetricRecord>(
-		context,
-		'PATCH',
-		getMetricsEndpoint(params.testDefinitionId, params.id),
-		{ name: params.name },
-	);
-};
-
-export const deleteTestMetric = async (
-	context: IRestApiContext,
-	params: DeleteTestMetricParams,
-) => {
-	return await makeRestApiRequest(
-		context,
-		'DELETE',
-		getMetricsEndpoint(params.testDefinitionId, params.id),
-	);
-};
-
 const getRunsEndpoint = (testDefinitionId: string, runId?: string) =>
 	`${endpoint}/${testDefinitionId}/runs${runId ? `/${runId}` : ''}`;
 
diff --git a/packages/frontend/editor-ui/src/components/TestDefinition/EditDefinition/MetricsInput.vue b/packages/frontend/editor-ui/src/components/TestDefinition/EditDefinition/MetricsInput.vue
deleted file mode 100644
index f679b6cdfa..0000000000
--- a/packages/frontend/editor-ui/src/components/TestDefinition/EditDefinition/MetricsInput.vue
+++ /dev/null
@@ -1,70 +0,0 @@
-<script setup lang="ts">
-import { useTemplateRef, nextTick } from 'vue';
-import type { TestMetricRecord } from '@/api/testDefinition.ee';
-import { useI18n } from '@/composables/useI18n';
-import { N8nInput, N8nButton, N8nIconButton } from '@n8n/design-system';
-
-export interface MetricsInputProps {
-	modelValue: Array<Partial<TestMetricRecord>>;
-}
-const props = defineProps<MetricsInputProps>();
-const emit = defineEmits<{
-	'update:modelValue': [value: MetricsInputProps['modelValue']];
-	deleteMetric: [metric: TestMetricRecord];
-}>();
-const locale = useI18n();
-const metricsRefs = useTemplateRef<Array<InstanceType<typeof N8nInput>>>('metric');
-
-function addNewMetric() {
-	emit('update:modelValue', [...props.modelValue, { name: '' }]);
-	void nextTick(() => metricsRefs.value?.at(-1)?.focus());
-}
-
-function updateMetric(index: number, name: string) {
-	const newMetrics = [...props.modelValue];
-	newMetrics[index].name = name;
-	emit('update:modelValue', newMetrics);
-}
-
-function onDeleteMetric(metric: Partial<TestMetricRecord>, index: number) {
-	if (!metric.id) {
-		const newMetrics = [...props.modelValue];
-		newMetrics.splice(index, 1);
-		emit('update:modelValue', newMetrics);
-	} else {
-		emit('deleteMetric', metric as TestMetricRecord);
-	}
-}
-</script>
-
-<template>
-	<div>
-		<div
-			v-for="(metric, index) in modelValue"
-			:key="index"
-			:class="$style.metricItem"
-			class="mb-xs"
-		>
-			<N8nInput
-				ref="metric"
-				data-test-id="evaluation-metric-item"
-				:model-value="metric.name"
-				:placeholder="locale.baseText('testDefinition.edit.metricsPlaceholder')"
-				@update:model-value="(value: string) => updateMetric(index, value)"
-			/>
-			<N8nIconButton icon="trash" type="secondary" text @click="onDeleteMetric(metric, index)" />
-		</div>
-		<N8nButton
-			type="secondary"
-			:label="locale.baseText('testDefinition.edit.metricsNew')"
-			@click="addNewMetric"
-		/>
-	</div>
-</template>
-
-<style module lang="scss">
-.metricItem {
-	display: flex;
-	align-items: center;
-}
-</style>
diff --git a/packages/frontend/editor-ui/src/components/TestDefinition/EditDefinition/sections/ConfigSection.vue b/packages/frontend/editor-ui/src/components/TestDefinition/EditDefinition/sections/ConfigSection.vue
index 038bbf71f3..c02d02064d 100644
--- a/packages/frontend/editor-ui/src/components/TestDefinition/EditDefinition/sections/ConfigSection.vue
+++ b/packages/frontend/editor-ui/src/components/TestDefinition/EditDefinition/sections/ConfigSection.vue
@@ -1,8 +1,6 @@
 <script setup lang="ts">
-import type { TestMetricRecord } from '@/api/testDefinition.ee';
 import BlockArrow from '@/components/TestDefinition/EditDefinition/BlockArrow.vue';
 import EvaluationStep from '@/components/TestDefinition/EditDefinition/EvaluationStep.vue';
-import MetricsInput from '@/components/TestDefinition/EditDefinition/MetricsInput.vue';
 import NodesPinning from '@/components/TestDefinition/EditDefinition/NodesPinning.vue';
 import WorkflowSelector from '@/components/TestDefinition/EditDefinition/WorkflowSelector.vue';
 import type { EditableFormState, EvaluationFormState } from '@/components/TestDefinition/types';
@@ -27,7 +25,6 @@ const props = defineProps<{
 }>();
 const emit = defineEmits<{
 	openPinningModal: [];
-	deleteMetric: [metric: TestMetricRecord];
 	openExecutionsViewForTag: [];
 	renameTag: [tag: string];
 	evaluationWorkflowCreated: [workflowId: string];
@@ -64,7 +61,6 @@ const evaluationWorkflow = defineModel<EvaluationFormState['evaluationWorkflow']
 	'evaluationWorkflow',
 	{ required: true },
 );
-const metrics = defineModel<EvaluationFormState['metrics']>('metrics', { required: true });
 const mockedNodes = defineModel<EvaluationFormState['mockedNodes']>('mockedNodes', {
 	required: true,
 });
@@ -177,25 +173,6 @@ function openExecutionsView() {
 					/>
 				</template>
 			</EvaluationStep>
-
-			<BlockArrow class="mt-5xs mb-5xs" />
-			<!-- Metrics -->
-			<EvaluationStep
-				:title="locale.baseText('testDefinition.edit.step.metrics')"
-				:issues="getFieldIssues('metrics')"
-				:description="locale.baseText('testDefinition.edit.step.metrics.description')"
-				:tooltip="locale.baseText('testDefinition.edit.step.metrics.tooltip')"
-				:external-tooltip="!hasRuns"
-			>
-				<template #cardContent>
-					<MetricsInput
-						v-model="metrics"
-						:class="{ 'has-issues': getFieldIssues('metrics').length > 0 }"
-						class="mt-xs"
-						@delete-metric="(metric) => emit('deleteMetric', metric)"
-					/>
-				</template>
-			</EvaluationStep>
 		</div>
 		<Modal
 			width="calc(100% - (48px * 2))"
diff --git a/packages/frontend/editor-ui/src/components/TestDefinition/composables/useTestDefinitionForm.ts b/packages/frontend/editor-ui/src/components/TestDefinition/composables/useTestDefinitionForm.ts
index efccf32148..5e14a6996b 100644
--- a/packages/frontend/editor-ui/src/components/TestDefinition/composables/useTestDefinitionForm.ts
+++ b/packages/frontend/editor-ui/src/components/TestDefinition/composables/useTestDefinitionForm.ts
@@ -36,7 +36,6 @@ export function useTestDefinitionForm() {
 			value: '',
 			__rl: true,
 		},
-		metrics: [],
 		mockedNodes: [],
 	});
 
@@ -62,8 +61,6 @@ export function useTestDefinitionForm() {
 			const testDefinition = evaluationsStore.testDefinitionsById[testId];
 
 			if (testDefinition) {
-				const metrics = await evaluationsStore.fetchMetrics(testId);
-
 				state.value.description = {
 					value: testDefinition.description ?? '',
 					isEditing: false,
@@ -84,7 +81,6 @@ export function useTestDefinitionForm() {
 					value: testDefinition.evaluationWorkflowId ?? '',
 					__rl: true,
 				};
-				state.value.metrics = metrics;
 				state.value.mockedNodes = testDefinition.mockedNodes ?? [];
 				evaluationsStore.updateRunFieldIssues(testDefinition.id);
 			}
@@ -110,37 +106,6 @@ export function useTestDefinitionForm() {
 		}
 	};
 
-	const deleteMetric = async (metricId: string, testId: string) => {
-		await evaluationsStore.deleteMetric({ id: metricId, testDefinitionId: testId });
-		state.value.metrics = state.value.metrics.filter((metric) => metric.id !== metricId);
-	};
-
-	/**
-	 * This method would perform unnecessary updates on the BE
-	 * it's a performance degradation candidate if metrics reach certain amount
-	 */
-	const updateMetrics = async (testId: string) => {
-		const promises = state.value.metrics.map(async (metric) => {
-			if (!metric.name) return;
-			if (!metric.id) {
-				const createdMetric = await evaluationsStore.createMetric({
-					name: metric.name,
-					testDefinitionId: testId,
-				});
-				metric.id = createdMetric.id;
-			} else {
-				await evaluationsStore.updateMetric({
-					name: metric.name,
-					id: metric.id,
-					testDefinitionId: testId,
-				});
-			}
-		});
-		isSaving.value = true;
-		await Promise.all(promises);
-		isSaving.value = false;
-	};
-
 	const updateTest = async (testId: string) => {
 		if (isSaving.value) return;
 
@@ -230,8 +195,6 @@ export function useTestDefinitionForm() {
 		state,
 		fields,
 		isSaving: computed(() => isSaving.value),
-		deleteMetric,
-		updateMetrics,
 		loadTestData,
 		createTest,
 		updateTest,
diff --git a/packages/frontend/editor-ui/src/components/TestDefinition/shared/TableStatusCell.vue b/packages/frontend/editor-ui/src/components/TestDefinition/shared/TableStatusCell.vue
index 97f718b083..72877554fe 100644
--- a/packages/frontend/editor-ui/src/components/TestDefinition/shared/TableStatusCell.vue
+++ b/packages/frontend/editor-ui/src/components/TestDefinition/shared/TableStatusCell.vue
@@ -27,8 +27,6 @@ const errorTooltipMap: Record<string, BaseTextKey> = {
 	FAILED_TO_EXECUTE_EVALUATION_WORKFLOW: 'testDefinition.runDetail.error.evaluationFailed',
 	FAILED_TO_EXECUTE_WORKFLOW: 'testDefinition.runDetail.error.executionFailed',
 	TRIGGER_NO_LONGER_EXISTS: 'testDefinition.runDetail.error.triggerNoLongerExists',
-	METRICS_MISSING: 'testDefinition.runDetail.error.metricsMissing',
-	UNKNOWN_METRICS: 'testDefinition.runDetail.error.unknownMetrics',
 	INVALID_METRICS: 'testDefinition.runDetail.error.invalidMetrics',
 
 	// Test run errors
diff --git a/packages/frontend/editor-ui/src/components/TestDefinition/tests/MetricsInput.test.ts b/packages/frontend/editor-ui/src/components/TestDefinition/tests/MetricsInput.test.ts
deleted file mode 100644
index 7d6545a5c5..0000000000
--- a/packages/frontend/editor-ui/src/components/TestDefinition/tests/MetricsInput.test.ts
+++ /dev/null
@@ -1,142 +0,0 @@
-import { describe, it, expect, beforeEach } from 'vitest';
-import { createComponentRenderer } from '@/__tests__/render';
-import MetricsInput from '../EditDefinition/MetricsInput.vue';
-import userEvent from '@testing-library/user-event';
-
-const renderComponent = createComponentRenderer(MetricsInput);
-
-describe('MetricsInput', () => {
-	let props: { modelValue: Array<{ id?: string; name: string }> };
-
-	beforeEach(() => {
-		props = {
-			modelValue: [
-				{ name: 'Metric 1', id: 'metric-1' },
-				{ name: 'Metric 2', id: 'metric-2' },
-			],
-		};
-	});
-
-	it('should render correctly with initial metrics', () => {
-		const { getAllByPlaceholderText } = renderComponent({ props });
-		const inputs = getAllByPlaceholderText('e.g. latency');
-		expect(inputs).toHaveLength(2);
-		expect(inputs[0]).toHaveValue('Metric 1');
-		expect(inputs[1]).toHaveValue('Metric 2');
-	});
-
-	it('should update a metric when typing in the input', async () => {
-		const { getAllByPlaceholderText, emitted } = renderComponent({
-			props: {
-				modelValue: [{ name: '' }],
-			},
-		});
-		const inputs = getAllByPlaceholderText('e.g. latency');
-		await userEvent.type(inputs[0], 'Updated Metric 1');
-
-		// Every character typed triggers an update event. Let's check the last emission.
-		const allEmits = emitted('update:modelValue');
-		expect(allEmits).toBeTruthy();
-		// The last emission should contain the fully updated name
-		const lastEmission = allEmits[allEmits.length - 1];
-		expect(lastEmission).toEqual([[{ name: 'Updated Metric 1' }]]);
-	});
-
-	it('should render correctly with no initial metrics', () => {
-		props.modelValue = [];
-		const { queryAllByRole, getByText } = renderComponent({ props });
-		const inputs = queryAllByRole('textbox');
-		expect(inputs).toHaveLength(0);
-		expect(getByText('New metric')).toBeInTheDocument();
-	});
-
-	it('should handle adding multiple metrics', async () => {
-		const { getByText, emitted } = renderComponent({ props });
-		const addButton = getByText('New metric');
-
-		await userEvent.click(addButton);
-		await userEvent.click(addButton);
-		await userEvent.click(addButton);
-
-		// Each click adds a new metric
-		const updateEvents = emitted('update:modelValue');
-		expect(updateEvents).toHaveLength(3);
-
-		// Check the structure of one of the emissions
-		// Initial: [{ name: 'Metric 1' }, { name: 'Metric 2' }]
-		// After first click: [{ name: 'Metric 1' }, { name: 'Metric 2' }, { name: '' }]
-		expect(updateEvents[0]).toEqual([[...props.modelValue, { name: '' }]]);
-	});
-
-	it('should emit "deleteMetric" event when a delete button is clicked', async () => {
-		const { getAllByRole, emitted } = renderComponent({ props });
-
-		// Each metric row has a delete button, identified by "button"
-		const deleteButtons = getAllByRole('button', { name: '' });
-		expect(deleteButtons).toHaveLength(props.modelValue.length);
-
-		// Click on the delete button for the second metric
-		await userEvent.click(deleteButtons[1]);
-
-		expect(emitted('deleteMetric')).toBeTruthy();
-		expect(emitted('deleteMetric')[0]).toEqual([props.modelValue[1]]);
-	});
-
-	it('should emit multiple update events as the user types and reflect the final name correctly', async () => {
-		const { getAllByPlaceholderText, emitted } = renderComponent({
-			props: {
-				modelValue: [{ name: '' }],
-			},
-		});
-		const inputs = getAllByPlaceholderText('e.g. latency');
-		await userEvent.type(inputs[0], 'ABC');
-
-		const allEmits = emitted('update:modelValue');
-		expect(allEmits).toBeTruthy();
-		// Each character typed should emit a new value
-		expect(allEmits.length).toBe(3);
-		expect(allEmits[2]).toEqual([[{ name: 'ABC' }]]);
-	});
-
-	it('should not break if metrics are empty and still allow adding a new metric', async () => {
-		props.modelValue = [];
-		const { queryAllByRole, getByText, emitted } = renderComponent({ props });
-
-		// No metrics initially
-		const inputs = queryAllByRole('textbox');
-		expect(inputs).toHaveLength(0);
-
-		const addButton = getByText('New metric');
-		await userEvent.click(addButton);
-
-		const updates = emitted('update:modelValue');
-		expect(updates).toBeTruthy();
-		expect(updates[0]).toEqual([[{ name: '' }]]);
-
-		// After adding one metric, we should now have an input
-		const { getAllByPlaceholderText } = renderComponent({
-			props: { modelValue: [{ name: '' }] },
-		});
-		const updatedInputs = getAllByPlaceholderText('e.g. latency');
-		expect(updatedInputs).toHaveLength(1);
-	});
-
-	it('should handle deleting the first metric and still display remaining metrics correctly', async () => {
-		const { getAllByPlaceholderText, getAllByRole, rerender, emitted } = renderComponent({
-			props,
-		});
-		const inputs = getAllByPlaceholderText('e.g. latency');
-		expect(inputs).toHaveLength(2);
-
-		const deleteButtons = getAllByRole('button', { name: '' });
-		await userEvent.click(deleteButtons[0]);
-
-		expect(emitted('deleteMetric')).toBeTruthy();
-		expect(emitted('deleteMetric')[0]).toEqual([props.modelValue[0]]);
-
-		await rerender({ modelValue: [{ name: 'Metric 2' }] });
-		const updatedInputs = getAllByPlaceholderText('e.g. latency');
-		expect(updatedInputs).toHaveLength(1);
-		expect(updatedInputs[0]).toHaveValue('Metric 2');
-	});
-});
diff --git a/packages/frontend/editor-ui/src/components/TestDefinition/tests/useTestDefinitionForm.test.ts b/packages/frontend/editor-ui/src/components/TestDefinition/tests/useTestDefinitionForm.test.ts
index a89db28b8e..b81a7dec7f 100644
--- a/packages/frontend/editor-ui/src/components/TestDefinition/tests/useTestDefinitionForm.test.ts
+++ b/packages/frontend/editor-ui/src/components/TestDefinition/tests/useTestDefinitionForm.test.ts
@@ -48,20 +48,12 @@ describe('useTestDefinitionForm', () => {
 		expect(state.value.description.value).toBe('');
 		expect(state.value.name.value).toContain('My Test');
 		expect(state.value.tags.value).toEqual([]);
-		expect(state.value.metrics).toEqual([]);
 		expect(state.value.evaluationWorkflow.value).toBe('');
 	});
 
 	it('should load test data', async () => {
 		const { loadTestData, state } = useTestDefinitionForm();
 		const fetchSpy = vi.spyOn(useTestDefinitionStore(), 'fetchAll');
-		const fetchMetricsSpy = vi.spyOn(useTestDefinitionStore(), 'fetchMetrics').mockResolvedValue([
-			{
-				id: 'metric1',
-				name: 'Metric 1',
-				testDefinitionId: TEST_DEF_A.id,
-			},
-		]);
 		const evaluationsStore = mockedStore(useTestDefinitionStore);
 
 		evaluationsStore.testDefinitionsById = {
@@ -71,14 +63,10 @@ describe('useTestDefinitionForm', () => {
 
 		await loadTestData(TEST_DEF_A.id, '123');
 		expect(fetchSpy).toBeCalled();
-		expect(fetchMetricsSpy).toBeCalledWith(TEST_DEF_A.id);
 		expect(state.value.name.value).toEqual(TEST_DEF_A.name);
 		expect(state.value.description.value).toEqual(TEST_DEF_A.description);
 		expect(state.value.tags.value).toEqual([TEST_DEF_A.annotationTagId]);
 		expect(state.value.evaluationWorkflow.value).toEqual(TEST_DEF_A.evaluationWorkflowId);
-		expect(state.value.metrics).toEqual([
-			{ id: 'metric1', name: 'Metric 1', testDefinitionId: TEST_DEF_A.id },
-		]);
 	});
 
 	it('should gracefully handle loadTestData when no test definition found', async () => {
@@ -94,7 +82,6 @@ describe('useTestDefinitionForm', () => {
 		expect(state.value.description.value).toBe('');
 		expect(state.value.name.value).toContain('My Test');
 		expect(state.value.tags.value).toEqual([]);
-		expect(state.value.metrics).toEqual([]);
 	});
 
 	it('should handle errors while loading test data', async () => {
@@ -176,68 +163,6 @@ describe('useTestDefinitionForm', () => {
 		expect(updateSpy).toBeCalled();
 	});
 
-	it('should delete a metric', async () => {
-		const { state, deleteMetric } = useTestDefinitionForm();
-		const evaluationsStore = mockedStore(useTestDefinitionStore);
-		const deleteMetricSpy = vi.spyOn(evaluationsStore, 'deleteMetric');
-
-		state.value.metrics = [
-			{
-				id: 'metric1',
-				name: 'Metric 1',
-				testDefinitionId: '1',
-			},
-			{
-				id: 'metric2',
-				name: 'Metric 2',
-				testDefinitionId: '1',
-			},
-		];
-
-		await deleteMetric('metric1', TEST_DEF_A.id);
-		expect(deleteMetricSpy).toBeCalledWith({ id: 'metric1', testDefinitionId: TEST_DEF_A.id });
-		expect(state.value.metrics).toEqual([
-			{ id: 'metric2', name: 'Metric 2', testDefinitionId: '1' },
-		]);
-	});
-
-	it('should update metrics', async () => {
-		const { state, updateMetrics } = useTestDefinitionForm();
-		const evaluationsStore = mockedStore(useTestDefinitionStore);
-		const updateMetricSpy = vi.spyOn(evaluationsStore, 'updateMetric');
-		const createMetricSpy = vi
-			.spyOn(evaluationsStore, 'createMetric')
-			.mockResolvedValue({ id: 'metric_new', name: 'Metric 2', testDefinitionId: TEST_DEF_A.id });
-
-		state.value.metrics = [
-			{
-				id: 'metric1',
-				name: 'Metric 1',
-				testDefinitionId: TEST_DEF_A.id,
-			},
-			{
-				id: '',
-				name: 'Metric 2',
-				testDefinitionId: TEST_DEF_A.id,
-			}, // New metric that needs creation
-		];
-
-		await updateMetrics(TEST_DEF_A.id);
-		expect(createMetricSpy).toHaveBeenCalledWith({
-			name: 'Metric 2',
-			testDefinitionId: TEST_DEF_A.id,
-		});
-		expect(updateMetricSpy).toHaveBeenCalledWith({
-			name: 'Metric 1',
-			id: 'metric1',
-			testDefinitionId: TEST_DEF_A.id,
-		});
-		expect(state.value.metrics).toEqual([
-			{ id: 'metric1', name: 'Metric 1', testDefinitionId: TEST_DEF_A.id },
-			{ id: 'metric_new', name: 'Metric 2', testDefinitionId: TEST_DEF_A.id },
-		]);
-	});
-
 	it('should start editing a field', () => {
 		const { state, startEditing } = useTestDefinitionForm();
 
diff --git a/packages/frontend/editor-ui/src/components/TestDefinition/types.ts b/packages/frontend/editor-ui/src/components/TestDefinition/types.ts
index 36c74de9c4..f52cf13690 100644
--- a/packages/frontend/editor-ui/src/components/TestDefinition/types.ts
+++ b/packages/frontend/editor-ui/src/components/TestDefinition/types.ts
@@ -1,4 +1,3 @@
-import type { TestMetricRecord } from '@/api/testDefinition.ee';
 import type { INodeParameterResourceLocator } from 'n8n-workflow';
 
 export interface EditableField<T = string> {
@@ -14,6 +13,5 @@ export interface EditableFormState {
 
 export interface EvaluationFormState extends EditableFormState {
 	evaluationWorkflow: INodeParameterResourceLocator;
-	metrics: TestMetricRecord[];
 	mockedNodes: Array<{ name: string; id: string }>;
 }
diff --git a/packages/frontend/editor-ui/src/constants.workflows.ts b/packages/frontend/editor-ui/src/constants.workflows.ts
index 582e0dc2aa..4f5fa86596 100644
--- a/packages/frontend/editor-ui/src/constants.workflows.ts
+++ b/packages/frontend/editor-ui/src/constants.workflows.ts
@@ -62,23 +62,22 @@ export const SAMPLE_EVALUATION_WORKFLOW: IWorkflowDataCreate = {
 		},
 		{
 			parameters: {
-				assignments: {
+				metrics: {
 					assignments: [
 						{
-							id: 'a748051d-ebdb-4fcf-aaed-02756130ce2a',
 							name: 'latency',
 							value:
 								'={{(() => {\n  const newExecutionRuns = Object.values($json.newExecution)\n    .reduce((acc, node) => {\n      acc.push(node.runs.filter(run => run.output.main !== undefined))\n      return acc\n    }, []).flat()\n\n  const latency = newExecutionRuns.reduce((acc, run) => acc + run.executionTime, 0)\n\n  return latency\n})()}}',
 							type: 'number',
+							id: '1ebc15e9-f079-4d1f-a08d-d4880ea0ddb5',
 						},
 					],
 				},
-				options: {},
 			},
+			type: 'n8n-nodes-base.evaluationMetrics',
 			id: '33e2e94a-ec48-4e7b-b750-f56718d5105c',
 			name: 'Return metric(s)',
-			type: 'n8n-nodes-base.set',
-			typeVersion: 3.4,
+			typeVersion: 1,
 			position: [600, 440],
 		},
 		{
diff --git a/packages/frontend/editor-ui/src/plugins/i18n/locales/en.json b/packages/frontend/editor-ui/src/plugins/i18n/locales/en.json
index 2a61973243..2b39e92dec 100644
--- a/packages/frontend/editor-ui/src/plugins/i18n/locales/en.json
+++ b/packages/frontend/editor-ui/src/plugins/i18n/locales/en.json
@@ -2918,11 +2918,6 @@
 	"testDefinition.edit.hideConfig": "Hide config",
 	"testDefinition.edit.backButtonTitle": "Back to Workflow Evaluation",
 	"testDefinition.edit.namePlaceholder": "Enter test name",
-	"testDefinition.edit.metricsTitle": "Metrics",
-	"testDefinition.edit.metricsHelpText": "The output field of the last node in the evaluation workflow. Metrics will be averaged across all test cases.",
-	"testDefinition.edit.metricsFields": "Output fields to use as metrics",
-	"testDefinition.edit.metricsPlaceholder": "e.g. latency",
-	"testDefinition.edit.metricsNew": "New metric",
 	"testDefinition.edit.selectTag": "Select tag...",
 	"testDefinition.edit.tagsHelpText": "Executions with this tag will be added as test cases to this test.",
 	"testDefinition.edit.workflowSelectorLabel": "Use a second workflow to make the comparison",
@@ -2952,9 +2947,6 @@
 	"testDefinition.edit.step.reRunExecutions.tooltip": "Each past execution is re-run using the latest version of the workflow being tested",
 	"testDefinition.edit.step.compareExecutions": "4. Compare each past and new execution",
 	"testDefinition.edit.step.compareExecutions.tooltip": "Each past execution is compared with its new equivalent to check how similar they are. This is done using a separate evaluation workflow: it receives the two execution versions as input, and outputs metrics.",
-	"testDefinition.edit.step.metrics": "5. Summarise metrics",
-	"testDefinition.edit.step.metrics.tooltip": "Metrics returned by the evaluation workflow (defined above). If included in this section, they are displayed in the test run results and averaged to give a score for the entire test run.",
-	"testDefinition.edit.step.metrics.description": "The names of fields output by your evaluation workflow in the step above.",
 	"testDefinition.edit.step.collapse": "Collapse",
 	"testDefinition.edit.step.configure": "Configure",
 	"testDefinition.edit.selectNodes": "Pin nodes to mock them",
@@ -3025,10 +3017,6 @@
 	"testDefinition.runDetail.error.evaluationFailed.solution": "View evaluation execution",
 	"testDefinition.runDetail.error.triggerNoLongerExists": "Trigger in benchmark execution no longer exists in workflow.{link}.",
 	"testDefinition.runDetail.error.triggerNoLongerExists.solution": "View benchmark",
-	"testDefinition.runDetail.error.metricsMissing": "Metrics defined in test were not returned by evaluation workflow {link}.",
-	"testDefinition.runDetail.error.metricsMissing.solution": "Fix test configuration",
-	"testDefinition.runDetail.error.unknownMetrics": "Evaluation workflow defined metrics that are not defined in the test. {link}.",
-	"testDefinition.runDetail.error.unknownMetrics.solution": "Fix test configuration",
 	"testDefinition.runDetail.error.invalidMetrics": "Evaluation workflow returned invalid metrics. Only numeric values are expected. View evaluation execution. {link}.",
 	"testDefinition.runDetail.error.invalidMetrics.solution": "View evaluation execution",
 	"testDefinition.runTest": "Run Test",
diff --git a/packages/frontend/editor-ui/src/stores/testDefinition.store.ee.test.ts b/packages/frontend/editor-ui/src/stores/testDefinition.store.ee.test.ts
index 6f43a939bb..ed055a85b3 100644
--- a/packages/frontend/editor-ui/src/stores/testDefinition.store.ee.test.ts
+++ b/packages/frontend/editor-ui/src/stores/testDefinition.store.ee.test.ts
@@ -11,10 +11,6 @@ const {
 	deleteTestDefinition,
 	getTestDefinitions,
 	updateTestDefinition,
-	getTestMetrics,
-	createTestMetric,
-	updateTestMetric,
-	deleteTestMetric,
 	getTestRuns,
 	getTestRun,
 	startTestRun,
@@ -24,10 +20,6 @@ const {
 	createTestDefinition: vi.fn(),
 	updateTestDefinition: vi.fn(),
 	deleteTestDefinition: vi.fn(),
-	getTestMetrics: vi.fn(),
-	createTestMetric: vi.fn(),
-	updateTestMetric: vi.fn(),
-	deleteTestMetric: vi.fn(),
 	getTestRuns: vi.fn(),
 	getTestRun: vi.fn(),
 	startTestRun: vi.fn(),
@@ -39,10 +31,6 @@ vi.mock('@/api/testDefinition.ee', () => ({
 	deleteTestDefinition,
 	getTestDefinitions,
 	updateTestDefinition,
-	getTestMetrics,
-	createTestMetric,
-	updateTestMetric,
-	deleteTestMetric,
 	getTestRuns,
 	getTestRun,
 	startTestRun,
@@ -77,13 +65,6 @@ const TEST_DEF_NEW: TestDefinitionRecord = {
 	createdAt: '2023-01-01T00:00:00.000Z',
 };
 
-const TEST_METRIC = {
-	id: 'metric1',
-	name: 'Test Metric',
-	testDefinitionId: '1',
-	createdAt: '2023-01-01T00:00:00.000Z',
-};
-
 const TEST_RUN: TestRunRecord = {
 	id: 'run1',
 	testDefinitionId: '1',
@@ -124,7 +105,6 @@ describe('testDefinition.store.ee', () => {
 		getTestRun.mockResolvedValue(TEST_RUN);
 		startTestRun.mockResolvedValue({ success: true });
 		deleteTestRun.mockResolvedValue({ success: true });
-		getTestMetrics.mockResolvedValue([TEST_METRIC]);
 	});
 
 	test('Initialization', () => {
@@ -280,80 +260,6 @@ describe('testDefinition.store.ee', () => {
 		});
 	});
 
-	describe('Metrics', () => {
-		test('Fetching Metrics for a Test Definition', async () => {
-			const metrics = await store.fetchMetrics('1');
-
-			expect(getTestMetrics).toHaveBeenCalledWith(rootStoreMock.restApiContext, '1');
-			expect(store.metricsById).toEqual({
-				metric1: TEST_METRIC,
-			});
-			expect(metrics).toEqual([TEST_METRIC]);
-		});
-
-		test('Creating a Metric', async () => {
-			createTestMetric.mockResolvedValue(TEST_METRIC);
-
-			const params = {
-				name: 'Test Metric',
-				testDefinitionId: '1',
-			};
-
-			const result = await store.createMetric(params);
-
-			expect(createTestMetric).toHaveBeenCalledWith(rootStoreMock.restApiContext, params);
-			expect(store.metricsById).toEqual({
-				metric1: TEST_METRIC,
-			});
-			expect(result).toEqual(TEST_METRIC);
-		});
-
-		test('Updating a Metric', async () => {
-			const updatedMetric = { ...TEST_METRIC, name: 'Updated Metric' };
-			updateTestMetric.mockResolvedValue(updatedMetric);
-
-			const result = await store.updateMetric(updatedMetric);
-
-			expect(updateTestMetric).toHaveBeenCalledWith(rootStoreMock.restApiContext, updatedMetric);
-			expect(store.metricsById).toEqual({
-				metric1: updatedMetric,
-			});
-			expect(result).toEqual(updatedMetric);
-		});
-
-		test('Deleting a Metric', async () => {
-			store.metricsById = {
-				metric1: TEST_METRIC,
-			};
-
-			const params = { id: 'metric1', testDefinitionId: '1' };
-			deleteTestMetric.mockResolvedValue(undefined);
-
-			await store.deleteMetric(params);
-
-			expect(deleteTestMetric).toHaveBeenCalledWith(rootStoreMock.restApiContext, params);
-			expect(store.metricsById).toEqual({});
-		});
-
-		test('Getting Metrics by Test ID', () => {
-			const metric1 = { ...TEST_METRIC, id: 'metric1', testDefinitionId: '1' };
-			const metric2 = { ...TEST_METRIC, id: 'metric2', testDefinitionId: '1' };
-			const metric3 = { ...TEST_METRIC, id: 'metric3', testDefinitionId: '2' };
-
-			store.metricsById = {
-				metric1,
-				metric2,
-				metric3,
-			};
-
-			const metricsForTest1 = store.metricsByTestId['1'];
-			expect(metricsForTest1).toEqual([metric1, metric2]);
-
-			const metricsForTest2 = store.metricsByTestId['2'];
-			expect(metricsForTest2).toEqual([metric3]);
-		});
-	});
-
 	describe('Computed Properties', () => {
 		test('hasTestDefinitions', () => {
 			store.testDefinitionsById = {};
diff --git a/packages/frontend/editor-ui/src/stores/testDefinition.store.ee.ts b/packages/frontend/editor-ui/src/stores/testDefinition.store.ee.ts
index 283947f4be..9206044bd9 100644
--- a/packages/frontend/editor-ui/src/stores/testDefinition.store.ee.ts
+++ b/packages/frontend/editor-ui/src/stores/testDefinition.store.ee.ts
@@ -21,7 +21,6 @@ export const useTestDefinitionStore = defineStore(
 		const testDefinitionsById = ref<Record<string, TestDefinitionRecord>>({});
 		const loading = ref(false);
 		const fetchedAll = ref(false);
-		const metricsById = ref<Record<string, testDefinitionsApi.TestMetricRecord>>({});
 		const testRunsById = ref<Record<string, TestRunRecord>>({});
 		const testCaseExecutionsById = ref<Record<string, TestCaseExecutionRecord>>({});
 		const pollingTimeouts = ref<Record<string, NodeJS.Timeout>>({});
@@ -61,19 +60,6 @@ export const useTestDefinitionStore = defineStore(
 
 		const hasTestDefinitions = computed(() => Object.keys(testDefinitionsById.value).length > 0);
 
-		const metricsByTestId = computed(() => {
-			return Object.values(metricsById.value).reduce(
-				(acc: Record<string, testDefinitionsApi.TestMetricRecord[]>, metric) => {
-					if (!acc[metric.testDefinitionId]) {
-						acc[metric.testDefinitionId] = [];
-					}
-					acc[metric.testDefinitionId].push(metric);
-					return acc;
-				},
-				{},
-			);
-		});
-
 		const testRunsByTestId = computed(() => {
 			return Object.values(testRunsById.value).reduce(
 				(acc: Record<string, TestRunRecord[]>, run) => {
@@ -157,11 +143,6 @@ export const useTestDefinitionStore = defineStore(
 			}
 		};
 
-		const fetchMetricsForAllTests = async () => {
-			const testDefinitions = Object.values(testDefinitionsById.value);
-			await Promise.all(testDefinitions.map(async (testDef) => await fetchMetrics(testDef.id)));
-		};
-
 		const fetchTestDefinition = async (id: string) => {
 			const testDefinition = await testDefinitionsApi.getTestDefinition(
 				rootStore.restApiContext,
@@ -221,7 +202,6 @@ export const useTestDefinitionStore = defineStore(
 				await Promise.all([
 					tagsStore.fetchAll({ force: true, withUsageCount: true }),
 					fetchRunsForAllTests(),
-					fetchMetricsForAllTests(),
 				]);
 				return retrievedDefinitions;
 			} finally {
@@ -289,48 +269,6 @@ export const useTestDefinitionStore = defineStore(
 			return result.success;
 		};
 
-		const fetchMetrics = async (testId: string) => {
-			loading.value = true;
-			try {
-				const metrics = await testDefinitionsApi.getTestMetrics(rootStore.restApiContext, testId);
-				metrics.forEach((metric) => {
-					metricsById.value[metric.id] = { ...metric, testDefinitionId: testId };
-				});
-				return metrics.map((metric) => ({ ...metric, testDefinitionId: testId }));
-			} finally {
-				loading.value = false;
-			}
-		};
-
-		const createMetric = async (params: {
-			name: string;
-			testDefinitionId: string;
-		}): Promise<testDefinitionsApi.TestMetricRecord> => {
-			const metric = await testDefinitionsApi.createTestMetric(rootStore.restApiContext, params);
-			metricsById.value[metric.id] = { ...metric, testDefinitionId: params.testDefinitionId };
-			return metric;
-		};
-
-		const updateMetric = async (
-			params: testDefinitionsApi.TestMetricRecord,
-		): Promise<testDefinitionsApi.TestMetricRecord> => {
-			const metric = await testDefinitionsApi.updateTestMetric(rootStore.restApiContext, params);
-			metricsById.value[metric.id] = { ...metric, testDefinitionId: params.testDefinitionId };
-
-			updateRunFieldIssues(params.testDefinitionId);
-			return metric;
-		};
-
-		const deleteMetric = async (
-			params: testDefinitionsApi.DeleteTestMetricParams,
-		): Promise<void> => {
-			await testDefinitionsApi.deleteTestMetric(rootStore.restApiContext, params);
-			const { [params.id]: deleted, ...rest } = metricsById.value;
-			metricsById.value = rest;
-
-			updateRunFieldIssues(params.testDefinitionId);
-		};
-
 		// Test Runs Methods
 		const fetchTestRuns = async (testDefinitionId: string) => {
 			loading.value = true;
@@ -436,14 +374,6 @@ export const useTestDefinitionStore = defineStore(
 				});
 			}
 
-			const metrics = metricsByTestId.value[testId] || [];
-			if (metrics.filter((metric) => metric.name).length === 0) {
-				issues.push({
-					field: 'metrics',
-					message: locale.baseText('testDefinition.configError.noMetrics'),
-				});
-			}
-
 			fieldsIssues.value = {
 				...fieldsIssues.value,
 				[testId]: issues,
@@ -464,8 +394,6 @@ export const useTestDefinitionStore = defineStore(
 			isLoading,
 			hasTestDefinitions,
 			isFeatureEnabled,
-			metricsById,
-			metricsByTestId,
 			testRunsByTestId,
 			lastRunByTestId,
 
@@ -480,10 +408,6 @@ export const useTestDefinitionStore = defineStore(
 			deleteById,
 			upsertTestDefinitions,
 			deleteTestDefinition,
-			fetchMetrics,
-			createMetric,
-			updateMetric,
-			deleteMetric,
 			fetchTestRuns,
 			getTestRun,
 			startTestRun,
diff --git a/packages/frontend/editor-ui/src/views/TestDefinition/TestDefinitionEditView.vue b/packages/frontend/editor-ui/src/views/TestDefinition/TestDefinitionEditView.vue
index 719f5605ac..45a89c405e 100644
--- a/packages/frontend/editor-ui/src/views/TestDefinition/TestDefinitionEditView.vue
+++ b/packages/frontend/editor-ui/src/views/TestDefinition/TestDefinitionEditView.vue
@@ -9,7 +9,7 @@ import { useAnnotationTagsStore } from '@/stores/tags.store';
 import { computed, ref, watch } from 'vue';
 import { useRouter } from 'vue-router';
 
-import type { TestMetricRecord, TestRunRecord } from '@/api/testDefinition.ee';
+import type { TestRunRecord } from '@/api/testDefinition.ee';
 import InlineNameEdit from '@/components/InlineNameEdit.vue';
 import ConfigSection from '@/components/TestDefinition/EditDefinition/sections/ConfigSection.vue';
 import RunsSection from '@/components/TestDefinition/EditDefinition/sections/RunsSection.vue';
@@ -44,17 +44,8 @@ watch(visibility, async () => {
 	testDefinitionStore.updateRunFieldIssues(props.testId);
 });
 
-const {
-	state,
-	isSaving,
-	cancelEditing,
-	loadTestData,
-	updateTest,
-	startEditing,
-	saveChanges,
-	deleteMetric,
-	updateMetrics,
-} = useTestDefinitionForm();
+const { state, isSaving, cancelEditing, loadTestData, updateTest, startEditing, saveChanges } =
+	useTestDefinitionForm();
 
 const isLoading = computed(() => tagsStore.isLoading);
 const tagsById = computed(() => tagsStore.tagsById);
@@ -79,22 +70,11 @@ const handleUpdateTest = async () => {
 };
 
 const handleUpdateTestDebounced = debounce(handleUpdateTest, { debounceTime: 400, trailing: true });
-const handleUpdateMetricsDebounced = debounce(
-	async (testId: string) => {
-		await updateMetrics(testId);
-		testDefinitionStore.updateRunFieldIssues(testId);
-	},
-	{ debounceTime: 400, trailing: true },
-);
 
 function getFieldIssues(key: string) {
 	return fieldsIssues.value.filter((issue) => issue.field === key);
 }
 
-async function onDeleteMetric(deletedMetric: TestMetricRecord) {
-	await deleteMetric(deletedMetric.id, props.testId);
-}
-
 async function openPinningModal() {
 	uiStore.openModal(NODE_PINNING_MODAL_KEY);
 }
@@ -253,7 +233,6 @@ function onEvaluationWorkflowCreated(workflowId: string) {
 					v-if="showConfig"
 					v-model:tags="state.tags"
 					v-model:evaluationWorkflow="state.evaluationWorkflow"
-					v-model:metrics="state.metrics"
 					v-model:mockedNodes="state.mockedNodes"
 					:class="$style.config"
 					:cancel-editing="cancelEditing"
@@ -266,11 +245,9 @@ function onEvaluationWorkflowCreated(workflowId: string) {
 					:example-pinned-data="examplePinnedData"
 					:sample-workflow-name="workflowName"
 					@rename-tag="renameTag"
-					@update:metrics="() => handleUpdateMetricsDebounced(testId)"
 					@update:evaluation-workflow="handleUpdateTestDebounced"
 					@update:mocked-nodes="handleUpdateTestDebounced"
 					@open-pinning-modal="openPinningModal"
-					@delete-metric="onDeleteMetric"
 					@open-executions-view-for-tag="openExecutionsViewForTag"
 					@evaluation-workflow-created="onEvaluationWorkflowCreated($event)"
 				/>
diff --git a/packages/frontend/editor-ui/src/views/TestDefinition/TestDefinitionRunDetailView.vue b/packages/frontend/editor-ui/src/views/TestDefinition/TestDefinitionRunDetailView.vue
index 11d242a8bf..aaffda172f 100644
--- a/packages/frontend/editor-ui/src/views/TestDefinition/TestDefinitionRunDetailView.vue
+++ b/packages/frontend/editor-ui/src/views/TestDefinition/TestDefinitionRunDetailView.vue
@@ -19,8 +19,6 @@ const TEST_CASE_EXECUTION_ERROR_CODE = {
 	FAILED_TO_EXECUTE_WORKFLOW: 'FAILED_TO_EXECUTE_WORKFLOW',
 	EVALUATION_WORKFLOW_DOES_NOT_EXIST: 'EVALUATION_WORKFLOW_DOES_NOT_EXIST',
 	FAILED_TO_EXECUTE_EVALUATION_WORKFLOW: 'FAILED_TO_EXECUTE_EVALUATION_WORKFLOW',
-	METRICS_MISSING: 'METRICS_MISSING',
-	UNKNOWN_METRICS: 'UNKNOWN_METRICS',
 	INVALID_METRICS: 'INVALID_METRICS',
 	PAYLOAD_LIMIT_EXCEEDED: 'PAYLOAD_LIMIT_EXCEEDED',
 	UNKNOWN_ERROR: 'UNKNOWN_ERROR',
@@ -97,8 +95,6 @@ const testCaseErrorDictionary: Partial<Record<TestCaseExecutionErrorCodes, BaseT
 	FAILED_TO_EXECUTE_EVALUATION_WORKFLOW: 'testDefinition.runDetail.error.evaluationFailed',
 	FAILED_TO_EXECUTE_WORKFLOW: 'testDefinition.runDetail.error.executionFailed',
 	TRIGGER_NO_LONGER_EXISTS: 'testDefinition.runDetail.error.triggerNoLongerExists',
-	METRICS_MISSING: 'testDefinition.runDetail.error.metricsMissing',
-	UNKNOWN_METRICS: 'testDefinition.runDetail.error.unknownMetrics',
 	INVALID_METRICS: 'testDefinition.runDetail.error.invalidMetrics',
 } as const;
 
@@ -143,20 +139,6 @@ const getErrorTooltipLinkRoute = (row: TestCaseExecutionRecord) => {
 				executionId: row.pastExecutionId,
 			},
 		};
-	} else if (row.errorCode === TEST_CASE_EXECUTION_ERROR_CODE.METRICS_MISSING) {
-		return {
-			name: VIEWS.TEST_DEFINITION_EDIT,
-			params: {
-				testId: testId.value,
-			},
-		};
-	} else if (row.errorCode === TEST_CASE_EXECUTION_ERROR_CODE.UNKNOWN_METRICS) {
-		return {
-			name: VIEWS.TEST_DEFINITION_EDIT,
-			params: {
-				testId: testId.value,
-			},
-		};
 	} else if (row.errorCode === TEST_CASE_EXECUTION_ERROR_CODE.INVALID_METRICS) {
 		return {
 			name: VIEWS.EXECUTION_PREVIEW,
diff --git a/packages/frontend/editor-ui/src/views/TestDefinition/tests/TestDefinitionEditView.test.ts b/packages/frontend/editor-ui/src/views/TestDefinition/tests/TestDefinitionEditView.test.ts
index e900649e92..7f709a332e 100644
--- a/packages/frontend/editor-ui/src/views/TestDefinition/tests/TestDefinitionEditView.test.ts
+++ b/packages/frontend/editor-ui/src/views/TestDefinition/tests/TestDefinitionEditView.test.ts
@@ -14,7 +14,6 @@ const form: Partial<ReturnType<typeof useTestDefinitionForm>> = {
 		description: { value: '', isEditing: false, tempValue: '' },
 		tags: { value: [], tempValue: [], isEditing: false },
 		evaluationWorkflow: { mode: 'list', value: '', __rl: true },
-		metrics: [],
 		mockedNodes: [],
 	}),
 	loadTestData: vi.fn(),
@@ -22,8 +21,6 @@ const form: Partial<ReturnType<typeof useTestDefinitionForm>> = {
 	updateTest: vi.fn(),
 	startEditing: vi.fn(),
 	saveChanges: vi.fn(),
-	deleteMetric: vi.fn(),
-	updateMetrics: vi.fn(),
 	createTest: vi.fn(),
 };
 vi.mock('@/components/TestDefinition/composables/useTestDefinitionForm', () => ({