diff --git a/packages/cli/src/constants.ts b/packages/cli/src/constants.ts index fe662c9ddf..dc557dec76 100644 --- a/packages/cli/src/constants.ts +++ b/packages/cli/src/constants.ts @@ -195,3 +195,5 @@ export const WsStatusCodes = { } as const; export const FREE_AI_CREDITS_CREDENTIAL_NAME = 'n8n free OpenAI API credits'; + +export const EVALUATION_METRICS_NODE = `${NODE_PACKAGE_PREFIX}base.evaluationMetrics`; diff --git a/packages/cli/src/evaluation.ee/metrics.controller.ts b/packages/cli/src/evaluation.ee/metrics.controller.ts deleted file mode 100644 index 5d27931166..0000000000 --- a/packages/cli/src/evaluation.ee/metrics.controller.ts +++ /dev/null @@ -1,141 +0,0 @@ -import express from 'express'; - -import { TestMetricRepository } from '@/databases/repositories/test-metric.repository.ee'; -import { Delete, Get, Patch, Post, RestController } from '@/decorators'; -import { NotFoundError } from '@/errors/response-errors/not-found.error'; -import { - testMetricCreateRequestBodySchema, - testMetricPatchRequestBodySchema, -} from '@/evaluation.ee/metric.schema'; -import { getSharedWorkflowIds } from '@/public-api/v1/handlers/workflows/workflows.service'; -import { Telemetry } from '@/telemetry'; - -import { TestDefinitionService } from './test-definition.service.ee'; -import { TestMetricsRequest } from './test-definitions.types.ee'; - -@RestController('/evaluation/test-definitions') -export class TestMetricsController { - constructor( - private readonly testDefinitionService: TestDefinitionService, - private readonly testMetricRepository: TestMetricRepository, - private readonly telemetry: Telemetry, - ) {} - - // This method is used in multiple places in the controller to get the test definition - // (or just check that it exists and the user has access to it). - private async getTestDefinition( - req: - | TestMetricsRequest.GetOne - | TestMetricsRequest.GetMany - | TestMetricsRequest.Patch - | TestMetricsRequest.Delete - | TestMetricsRequest.Create, - ) { - const { testDefinitionId } = req.params; - - const userAccessibleWorkflowIds = await getSharedWorkflowIds(req.user, ['workflow:read']); - - const testDefinition = await this.testDefinitionService.findOne( - testDefinitionId, - userAccessibleWorkflowIds, - ); - - if (!testDefinition) throw new NotFoundError('Test definition not found'); - - return testDefinition; - } - - @Get('/:testDefinitionId/metrics') - async getMany(req: TestMetricsRequest.GetMany) { - const { testDefinitionId } = req.params; - - await this.getTestDefinition(req); - - return await this.testMetricRepository.find({ - where: { testDefinition: { id: testDefinitionId } }, - }); - } - - @Get('/:testDefinitionId/metrics/:id') - async getOne(req: TestMetricsRequest.GetOne) { - const { id: metricId, testDefinitionId } = req.params; - - await this.getTestDefinition(req); - - const metric = await this.testMetricRepository.findOne({ - where: { id: metricId, testDefinition: { id: testDefinitionId } }, - }); - - if (!metric) throw new NotFoundError('Metric not found'); - - return metric; - } - - @Post('/:testDefinitionId/metrics') - async create(req: TestMetricsRequest.Create, res: express.Response) { - const bodyParseResult = testMetricCreateRequestBodySchema.safeParse(req.body); - if (!bodyParseResult.success) { - res.status(400).json({ errors: bodyParseResult.error.errors }); - return; - } - - const testDefinition = await this.getTestDefinition(req); - - const metric = this.testMetricRepository.create({ - ...req.body, - testDefinition, - }); - - return await this.testMetricRepository.save(metric); - } - - @Patch('/:testDefinitionId/metrics/:id') - async patch(req: TestMetricsRequest.Patch, res: express.Response) { - const { id: metricId, testDefinitionId } = req.params; - - const bodyParseResult = testMetricPatchRequestBodySchema.safeParse(req.body); - if (!bodyParseResult.success) { - res.status(400).json({ errors: bodyParseResult.error.errors }); - return; - } - - await this.getTestDefinition(req); - - const metric = await this.testMetricRepository.findOne({ - where: { id: metricId, testDefinition: { id: testDefinitionId } }, - }); - - if (!metric) throw new NotFoundError('Metric not found'); - - const updateResult = await this.testMetricRepository.update(metricId, bodyParseResult.data); - - // Send telemetry event if the metric was updated - if (updateResult.affected === 1 && metric.name !== bodyParseResult.data.name) { - this.telemetry.track('User added metrics to test', { - metric_id: metricId, - metric_name: bodyParseResult.data.name, - test_id: testDefinitionId, - }); - } - - // Respond with the updated metric - return await this.testMetricRepository.findOneBy({ id: metricId }); - } - - @Delete('/:testDefinitionId/metrics/:id') - async delete(req: TestMetricsRequest.Delete) { - const { id: metricId, testDefinitionId } = req.params; - - await this.getTestDefinition(req); - - const metric = await this.testMetricRepository.findOne({ - where: { id: metricId, testDefinition: { id: testDefinitionId } }, - }); - - if (!metric) throw new NotFoundError('Metric not found'); - - await this.testMetricRepository.delete(metricId); - - return { success: true }; - } -} diff --git a/packages/cli/src/evaluation.ee/test-definitions.types.ee.ts b/packages/cli/src/evaluation.ee/test-definitions.types.ee.ts index 98feea7e5d..eb19f964f7 100644 --- a/packages/cli/src/evaluation.ee/test-definitions.types.ee.ts +++ b/packages/cli/src/evaluation.ee/test-definitions.types.ee.ts @@ -47,36 +47,6 @@ export declare namespace TestDefinitionsRequest { >; } -// ---------------------------------- -// /test-definitions/:testDefinitionId/metrics -// ---------------------------------- - -export declare namespace TestMetricsRequest { - namespace RouteParams { - type TestDefinitionId = { - testDefinitionId: string; - }; - - type TestMetricId = { - id: string; - }; - } - - type GetOne = AuthenticatedRequest; - - type GetMany = AuthenticatedRequest; - - type Create = AuthenticatedRequest; - - type Patch = AuthenticatedRequest< - RouteParams.TestDefinitionId & RouteParams.TestMetricId, - {}, - { name: string } - >; - - type Delete = AuthenticatedRequest; -} - // ---------------------------------- // /test-definitions/:testDefinitionId/runs // ---------------------------------- diff --git a/packages/cli/src/evaluation.ee/test-runner/__tests__/evaluation-metrics.ee.test.ts b/packages/cli/src/evaluation.ee/test-runner/__tests__/evaluation-metrics.ee.test.ts index d7bb9ec910..d9ddbde162 100644 --- a/packages/cli/src/evaluation.ee/test-runner/__tests__/evaluation-metrics.ee.test.ts +++ b/packages/cli/src/evaluation.ee/test-runner/__tests__/evaluation-metrics.ee.test.ts @@ -24,14 +24,6 @@ describe('EvaluationMetrics', () => { ); }); - test('should throw when missing values', () => { - const testMetricNames = new Set(['metric1', 'metric2']); - const metrics = new EvaluationMetrics(testMetricNames); - - expect(() => metrics.addResults({ metric1: 1 })).toThrow('METRICS_MISSING'); - expect(() => metrics.addResults({ metric2: 0.2 })).toThrow('METRICS_MISSING'); - }); - test('should handle empty metrics', () => { const testMetricNames = new Set(['metric1', 'metric2']); const metrics = new EvaluationMetrics(testMetricNames); diff --git a/packages/cli/src/evaluation.ee/test-runner/__tests__/mock-data/workflow.evaluation-middle.json b/packages/cli/src/evaluation.ee/test-runner/__tests__/mock-data/workflow.evaluation-middle.json new file mode 100644 index 0000000000..ba203bbac5 --- /dev/null +++ b/packages/cli/src/evaluation.ee/test-runner/__tests__/mock-data/workflow.evaluation-middle.json @@ -0,0 +1,85 @@ +{ + "nodes": [ + { + "parameters": {}, + "id": "6dde1608-135f-441d-8438-40f605e4dae3", + "name": "Execute Workflow Trigger", + "type": "n8n-nodes-base.executeWorkflowTrigger", + "typeVersion": 1, + "position": [-180, -40] + }, + { + "parameters": { + "metrics": { + "assignments": [ + { + "id": "f1cab1e4-dabc-4750-a1f7-6669a60213c9", + "name": "metric1", + "value": 20, + "type": "number" + }, + { + "id": "a66ed953-5341-47f6-8565-47640d987f5f", + "name": "metric2", + "value": 30, + "type": "number" + } + ] + } + }, + "id": "dfc71b70-7b7a-4dde-914f-cc4ffc99b18c", + "name": "Success", + "type": "n8n-nodes-base.evaluationMetrics", + "typeVersion": 1, + "position": [620, -40] + }, + { + "parameters": { + "metrics": { + "assignments": [ + { + "id": "1e1153da-77c0-4cb5-804a-3f4bc40833dd", + "name": "metric2", + "value": 10, + "type": "number" + } + ] + } + }, + "id": "5ef5be33-37e0-4c95-81c8-3fd677bdca88", + "name": "First Metric", + "type": "n8n-nodes-base.evaluationMetrics", + "typeVersion": 1, + "position": [160, -40] + } + ], + "connections": { + "Execute Workflow Trigger": { + "main": [ + [ + { + "node": "First Metric", + "type": "main", + "index": 0 + } + ] + ] + }, + "First Metric": { + "main": [ + [ + { + "node": "Success", + "type": "main", + "index": 0 + } + ] + ] + } + }, + "pinData": {}, + "meta": { + "templateCredsSetupCompleted": true, + "instanceId": "27cc9b56542ad45b38725555722c50a1c3fee1670bbb67980558314ee08517c4" + } +} diff --git a/packages/cli/src/evaluation.ee/test-runner/__tests__/mock-data/workflow.evaluation.json b/packages/cli/src/evaluation.ee/test-runner/__tests__/mock-data/workflow.evaluation.json index 6ec7f2c386..4db9e2bd6d 100644 --- a/packages/cli/src/evaluation.ee/test-runner/__tests__/mock-data/workflow.evaluation.json +++ b/packages/cli/src/evaluation.ee/test-runner/__tests__/mock-data/workflow.evaluation.json @@ -50,7 +50,7 @@ }, { "parameters": { - "assignments": { + "metrics": { "assignments": [ { "id": "3b65d55a-158f-40c6-9853-a1c44b7ba1e5", @@ -70,13 +70,13 @@ }, "id": "0c7a1ee8-0cf0-4d7f-99a3-186bbcd8815a", "name": "Success", - "type": "n8n-nodes-base.set", - "typeVersion": 3.4, + "type": "n8n-nodes-base.evaluationMetrics", + "typeVersion": 1, "position": [980, 220] }, { "parameters": { - "assignments": { + "metrics": { "assignments": [ { "id": "6cc8b402-4a30-4873-b825-963a1f1b8b82", @@ -90,8 +90,8 @@ }, "id": "50d3f84a-d99f-4e04-bdbd-3e8c2668e708", "name": "Fail", - "type": "n8n-nodes-base.set", - "typeVersion": 3.4, + "type": "n8n-nodes-base.evaluationMetrics", + "typeVersion": 1, "position": [980, 420] } ], diff --git a/packages/cli/src/evaluation.ee/test-runner/__tests__/test-runner.service.ee.test.ts b/packages/cli/src/evaluation.ee/test-runner/__tests__/test-runner.service.ee.test.ts index d91a2d0ee7..8d61fcb901 100644 --- a/packages/cli/src/evaluation.ee/test-runner/__tests__/test-runner.service.ee.test.ts +++ b/packages/cli/src/evaluation.ee/test-runner/__tests__/test-runner.service.ee.test.ts @@ -46,6 +46,12 @@ const wfEvaluationJson = JSON.parse( readFileSync(path.join(__dirname, './mock-data/workflow.evaluation.json'), { encoding: 'utf-8' }), ); +const wfEvaluationMiddleJson = JSON.parse( + readFileSync(path.join(__dirname, './mock-data/workflow.evaluation-middle.json'), { + encoding: 'utf-8', + }), +); + const wfMultipleTriggersJson = JSON.parse( readFileSync(path.join(__dirname, './mock-data/workflow.multiple-triggers.json'), { encoding: 'utf-8', @@ -131,9 +137,22 @@ function mockEvaluationExecutionData(metrics: Record) { return mock({ data: { resultData: { - lastNodeExecuted: 'lastNode', + lastNodeExecuted: 'Success', runData: { - lastNode: [ + Success: [ + { + data: { + main: [ + [ + { + json: metrics, + }, + ], + ], + }, + }, + ], + Fail: [ { data: { main: [ @@ -155,6 +174,52 @@ function mockEvaluationExecutionData(metrics: Record) { }); } +function mockEvaluationMiddleExecutionData( + firstMetrics: Record, + secondMetrics: Record, +) { + // Clone the metrics to avoid modifying the passed object + // For test assertions, these run-data need special handling + const runData: Record = { + 'First Metric': [ + { + data: { + main: [ + [ + { + json: firstMetrics, + }, + ], + ], + }, + }, + ], + Success: [ + { + data: { + main: [ + [ + { + json: secondMetrics, + }, + ], + ], + }, + }, + ], + }; + + return mock({ + data: { + resultData: { + lastNodeExecuted: 'Success', + runData, + error: undefined, + }, + }, + }); +} + const errorReporter = mock(); const logger = mockLogger(); const telemetry = mock(); @@ -363,7 +428,6 @@ describe('TestRunnerService', () => { expect(testRunRepository.markAsCompleted).toHaveBeenCalledTimes(1); expect(testRunRepository.markAsCompleted).toHaveBeenCalledWith('test-run-id', { metric1: 0.75, - metric2: 50, }); expect(testRunRepository.incrementPassed).toHaveBeenCalledTimes(2); @@ -868,6 +932,218 @@ describe('TestRunnerService', () => { expect(workflowRunner.run).toHaveBeenCalledTimes(1); }); + test('should run workflow with metrics defined in the middle of the workflow', async () => { + const testRunnerService = new TestRunnerService( + logger, + telemetry, + workflowRepository, + workflowRunner, + executionRepository, + activeExecutions, + testRunRepository, + testCaseExecutionRepository, + testMetricRepository, + mockNodeTypes, + errorReporter, + ); + + workflowRepository.findById.calledWith('workflow-under-test-id').mockResolvedValueOnce({ + id: 'workflow-under-test-id', + ...wfUnderTestJson, + }); + + workflowRepository.findById.calledWith('evaluation-workflow-id').mockResolvedValueOnce({ + id: 'evaluation-workflow-id', + ...wfEvaluationMiddleJson, + }); + + workflowRunner.run.mockResolvedValueOnce('some-execution-id'); + workflowRunner.run.mockResolvedValueOnce('some-execution-id-2'); + workflowRunner.run.mockResolvedValueOnce('some-execution-id-3'); + workflowRunner.run.mockResolvedValueOnce('some-execution-id-4'); + + // Mock executions of workflow under test + activeExecutions.getPostExecutePromise + .calledWith('some-execution-id') + .mockResolvedValue(mockExecutionData()); + + activeExecutions.getPostExecutePromise + .calledWith('some-execution-id-3') + .mockResolvedValue(mockExecutionData()); + + // Mock executions of evaluation workflow + activeExecutions.getPostExecutePromise + .calledWith('some-execution-id-2') + .mockResolvedValue(mockEvaluationMiddleExecutionData({ metric2: 1 }, { metric1: 1 })); + + activeExecutions.getPostExecutePromise + .calledWith('some-execution-id-4') + .mockResolvedValue(mockEvaluationMiddleExecutionData({ metric2: 2 }, { metric1: 0.5 })); + + await testRunnerService.runTest( + mock(), + mock({ + workflowId: 'workflow-under-test-id', + evaluationWorkflowId: 'evaluation-workflow-id', + mockedNodes: [{ id: '72256d90-3a67-4e29-b032-47df4e5768af' }], + }), + ); + + expect(workflowRunner.run).toHaveBeenCalledTimes(4); + + // Check workflow under test was executed + expect(workflowRunner.run).toHaveBeenCalledWith( + expect.objectContaining({ + executionMode: 'evaluation', + pinData: { + 'When clicking ‘Test workflow’': + executionDataJson.resultData.runData['When clicking ‘Test workflow’'][0].data.main[0], + }, + workflowData: expect.objectContaining({ + id: 'workflow-under-test-id', + }), + }), + ); + + // Check evaluation workflow was executed + expect(workflowRunner.run).toHaveBeenCalledWith( + expect.objectContaining({ + executionMode: 'integrated', + executionData: expect.objectContaining({ + executionData: expect.objectContaining({ + nodeExecutionStack: expect.arrayContaining([ + expect.objectContaining({ data: expect.anything() }), + ]), + }), + }), + workflowData: expect.objectContaining({ + id: 'evaluation-workflow-id', + }), + }), + ); + + // Check Test Run status was updated correctly + expect(testRunRepository.createTestRun).toHaveBeenCalledTimes(1); + expect(testRunRepository.markAsRunning).toHaveBeenCalledTimes(1); + expect(testRunRepository.markAsRunning).toHaveBeenCalledWith('test-run-id', expect.any(Number)); + expect(testRunRepository.markAsCompleted).toHaveBeenCalledTimes(1); + expect(testRunRepository.markAsCompleted).toHaveBeenCalledWith('test-run-id', { + metric1: 0.75, + metric2: 1.5, + }); + + expect(testRunRepository.incrementPassed).toHaveBeenCalledTimes(2); + expect(testRunRepository.incrementFailed).not.toHaveBeenCalled(); + }); + + test('should properly override metrics from earlier nodes with later ones', async () => { + const testRunnerService = new TestRunnerService( + logger, + telemetry, + workflowRepository, + workflowRunner, + executionRepository, + activeExecutions, + testRunRepository, + testCaseExecutionRepository, + testMetricRepository, + mockNodeTypes, + errorReporter, + ); + + workflowRepository.findById.calledWith('workflow-under-test-id').mockResolvedValueOnce({ + id: 'workflow-under-test-id', + ...wfUnderTestJson, + }); + + workflowRepository.findById.calledWith('evaluation-workflow-id').mockResolvedValueOnce({ + id: 'evaluation-workflow-id', + ...wfEvaluationMiddleJson, + }); + + workflowRunner.run.mockResolvedValueOnce('some-execution-id'); + workflowRunner.run.mockResolvedValueOnce('some-execution-id-2'); + workflowRunner.run.mockResolvedValueOnce('some-execution-id-3'); + workflowRunner.run.mockResolvedValueOnce('some-execution-id-4'); + + // Mock executions of workflow under test + activeExecutions.getPostExecutePromise + .calledWith('some-execution-id') + .mockResolvedValue(mockExecutionData()); + + activeExecutions.getPostExecutePromise + .calledWith('some-execution-id-3') + .mockResolvedValue(mockExecutionData()); + + // Mock executions of evaluation workflow + activeExecutions.getPostExecutePromise + .calledWith('some-execution-id-2') + .mockResolvedValue( + mockEvaluationMiddleExecutionData({ metric2: 5 }, { metric1: 1, metric2: 5 }), + ); + + activeExecutions.getPostExecutePromise + .calledWith('some-execution-id-4') + .mockResolvedValue( + mockEvaluationMiddleExecutionData({ metric2: 10 }, { metric1: 0.5, metric2: 10 }), + ); + + await testRunnerService.runTest( + mock(), + mock({ + workflowId: 'workflow-under-test-id', + evaluationWorkflowId: 'evaluation-workflow-id', + mockedNodes: [{ id: '72256d90-3a67-4e29-b032-47df4e5768af' }], + }), + ); + + expect(workflowRunner.run).toHaveBeenCalledTimes(4); + + // Check workflow under test was executed + expect(workflowRunner.run).toHaveBeenCalledWith( + expect.objectContaining({ + executionMode: 'evaluation', + pinData: { + 'When clicking ‘Test workflow’': + executionDataJson.resultData.runData['When clicking ‘Test workflow’'][0].data.main[0], + }, + workflowData: expect.objectContaining({ + id: 'workflow-under-test-id', + }), + }), + ); + + // Check evaluation workflow was executed + expect(workflowRunner.run).toHaveBeenCalledWith( + expect.objectContaining({ + executionMode: 'integrated', + executionData: expect.objectContaining({ + executionData: expect.objectContaining({ + nodeExecutionStack: expect.arrayContaining([ + expect.objectContaining({ data: expect.anything() }), + ]), + }), + }), + workflowData: expect.objectContaining({ + id: 'evaluation-workflow-id', + }), + }), + ); + + // Check Test Run status was updated correctly + expect(testRunRepository.createTestRun).toHaveBeenCalledTimes(1); + expect(testRunRepository.markAsRunning).toHaveBeenCalledTimes(1); + expect(testRunRepository.markAsRunning).toHaveBeenCalledWith('test-run-id', expect.any(Number)); + expect(testRunRepository.markAsCompleted).toHaveBeenCalledTimes(1); + expect(testRunRepository.markAsCompleted).toHaveBeenCalledWith('test-run-id', { + metric1: 0.75, + metric2: 7.5, + }); + + expect(testRunRepository.incrementPassed).toHaveBeenCalledTimes(2); + expect(testRunRepository.incrementFailed).not.toHaveBeenCalled(); + }); + describe('Test Run cancellation', () => { beforeAll(() => { jest.useFakeTimers(); diff --git a/packages/cli/src/evaluation.ee/test-runner/errors.ee.ts b/packages/cli/src/evaluation.ee/test-runner/errors.ee.ts index 0b2df294b2..bd11ba479e 100644 --- a/packages/cli/src/evaluation.ee/test-runner/errors.ee.ts +++ b/packages/cli/src/evaluation.ee/test-runner/errors.ee.ts @@ -6,8 +6,6 @@ export type TestCaseExecutionErrorCode = | 'FAILED_TO_EXECUTE_WORKFLOW' | 'EVALUATION_WORKFLOW_DOES_NOT_EXIST' | 'FAILED_TO_EXECUTE_EVALUATION_WORKFLOW' - | 'METRICS_MISSING' - | 'UNKNOWN_METRICS' | 'INVALID_METRICS' | 'PAYLOAD_LIMIT_EXCEEDED' | 'UNKNOWN_ERROR'; diff --git a/packages/cli/src/evaluation.ee/test-runner/evaluation-metrics.ee.ts b/packages/cli/src/evaluation.ee/test-runner/evaluation-metrics.ee.ts index b2422f4b6b..1e063e262a 100644 --- a/packages/cli/src/evaluation.ee/test-runner/evaluation-metrics.ee.ts +++ b/packages/cli/src/evaluation.ee/test-runner/evaluation-metrics.ee.ts @@ -1,4 +1,3 @@ -import difference from 'lodash/difference'; import type { IDataObject } from 'n8n-workflow'; import { TestCaseExecutionError } from '@/evaluation.ee/test-runner/errors.ee'; @@ -43,16 +42,6 @@ export class EvaluationMetrics { } } - // Check that result contains all expected metrics - if ( - difference(Array.from(this.metricNames), Object.keys(addResultsInfo.addedMetrics)).length > 0 - ) { - throw new TestCaseExecutionError('METRICS_MISSING', { - expectedMetrics: Array.from(this.metricNames).sort(), - receivedMetrics: Object.keys(addResultsInfo.addedMetrics).sort(), - }); - } - return addResultsInfo; } diff --git a/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts b/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts index 628349bf85..c987523a7d 100644 --- a/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts +++ b/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts @@ -1,8 +1,10 @@ import { Service } from '@n8n/di'; import { parse } from 'flatted'; +import difference from 'lodash/difference'; import { ErrorReporter, Logger } from 'n8n-core'; import { ExecutionCancelledError, NodeConnectionTypes, Workflow } from 'n8n-workflow'; import type { + AssignmentCollectionValue, IDataObject, IRun, IRunExecutionData, @@ -13,6 +15,7 @@ import assert from 'node:assert'; import { ActiveExecutions } from '@/active-executions'; import config from '@/config'; +import { EVALUATION_METRICS_NODE } from '@/constants'; import type { ExecutionEntity } from '@/databases/entities/execution-entity'; import type { MockedNodeItem, TestDefinition } from '@/databases/entities/test-definition.ee'; import type { TestRun } from '@/databases/entities/test-run.ee'; @@ -225,6 +228,50 @@ export class TestRunnerService { return await executePromise; } + /** + * Sync the metrics of the test definition with the evaluation workflow. + */ + async syncMetrics( + testDefinitionId: string, + evaluationWorkflow: IWorkflowBase, + ): Promise> { + const usedTestMetricNames = await this.getUsedTestMetricNames(evaluationWorkflow); + const existingTestMetrics = await this.testMetricRepository.find({ + where: { + testDefinition: { id: testDefinitionId }, + }, + }); + + const existingMetricNames = new Set(existingTestMetrics.map((metric) => metric.name)); + const metricsToAdd = difference( + Array.from(usedTestMetricNames), + Array.from(existingMetricNames), + ); + const metricsToRemove = difference( + Array.from(existingMetricNames), + Array.from(usedTestMetricNames), + ); + + // Add new metrics + const metricsToAddEntities = metricsToAdd.map((metricName) => + this.testMetricRepository.create({ + name: metricName, + testDefinition: { id: testDefinitionId }, + }), + ); + await this.testMetricRepository.save(metricsToAddEntities); + + // Remove no longer used metrics + metricsToRemove.forEach(async (metricName) => { + const metric = existingTestMetrics.find((m) => m.name === metricName); + assert(metric, 'Existing metric not found'); + + await this.testMetricRepository.delete(metric.id); + }); + + return usedTestMetricNames; + } + /** * Run the evaluation workflow with the expected and actual run data. */ @@ -265,35 +312,45 @@ export class TestRunnerService { return await executePromise; } + /** + * Get the evaluation metrics nodes from a workflow. + */ + static getEvaluationMetricsNodes(workflow: IWorkflowBase) { + return workflow.nodes.filter((node) => node.type === EVALUATION_METRICS_NODE); + } + /** * Evaluation result is the first item in the output of the last node * executed in the evaluation workflow. Defaults to an empty object * in case the node doesn't produce any output items. */ - private extractEvaluationResult(execution: IRun): IDataObject { + private extractEvaluationResult(execution: IRun, evaluationWorkflow: IWorkflowBase): IDataObject { const lastNodeExecuted = execution.data.resultData.lastNodeExecuted; assert(lastNodeExecuted, 'Could not find the last node executed in evaluation workflow'); + const metricsNodes = TestRunnerService.getEvaluationMetricsNodes(evaluationWorkflow); + const metricsRunData = metricsNodes.flatMap( + (node) => execution.data.resultData.runData[node.name], + ); + const metricsData = metricsRunData.reverse().map((data) => data.data?.main?.[0]?.[0]?.json); + const metricsResult = metricsData.reduce((acc, curr) => ({ ...acc, ...curr }), {}) ?? {}; - // Extract the output of the last node executed in the evaluation workflow - // We use only the first item of a first main output - const lastNodeTaskData = execution.data.resultData.runData[lastNodeExecuted]?.[0]; - const mainConnectionData = lastNodeTaskData?.data?.main?.[0]; - return mainConnectionData?.[0]?.json ?? {}; + return metricsResult; } /** * Get the metrics to collect from the evaluation workflow execution results. */ - private async getTestMetricNames(testDefinitionId: string) { - const metrics = await this.testMetricRepository.find({ - where: { - testDefinition: { - id: testDefinitionId, - }, - }, + private async getUsedTestMetricNames(evaluationWorkflow: IWorkflowBase) { + const metricsNodes = TestRunnerService.getEvaluationMetricsNodes(evaluationWorkflow); + const metrics = metricsNodes.map((node) => { + const metricsParameter = node.parameters?.metrics as AssignmentCollectionValue; + assert(metricsParameter, 'Metrics parameter not found'); + + const metricsNames = metricsParameter.assignments.map((assignment) => assignment.name); + return metricsNames; }); - return new Set(metrics.map((m) => m.name)); + return new Set(metrics.flat()); } /** @@ -329,7 +386,6 @@ export class TestRunnerService { if (!evaluationWorkflow) { throw new TestRunError('EVALUATION_WORKFLOW_NOT_FOUND'); } - /// // 1. Make test cases from previous executions /// @@ -359,8 +415,8 @@ export class TestRunnerService { pastExecutions.map((e) => e.id), ); - // Get the metrics to collect from the evaluation workflow - const testMetricNames = await this.getTestMetricNames(test.id); + // Sync the metrics of the test definition with the evaluation workflow + const testMetricNames = await this.syncMetrics(test.id, evaluationWorkflow); // 2. Run over all the test cases const pastExecutionIds = pastExecutions.map((e) => e.id); @@ -465,8 +521,8 @@ export class TestRunnerService { this.logger.debug('Evaluation execution finished', { pastExecutionId }); // Extract the output of the last node executed in the evaluation workflow - const { addedMetrics, unknownMetrics } = metrics.addResults( - this.extractEvaluationResult(evalExecution), + const { addedMetrics } = metrics.addResults( + this.extractEvaluationResult(evalExecution, evaluationWorkflow), ); if (evalExecution.data.resultData.error) { @@ -483,22 +539,12 @@ export class TestRunnerService { await Db.transaction(async (trx) => { await this.testRunRepository.incrementPassed(testRun.id, trx); - // Add warning if the evaluation workflow produced an unknown metric - if (unknownMetrics.size > 0) { - await this.testCaseExecutionRepository.markAsWarning({ - testRunId: testRun.id, - pastExecutionId, - errorCode: 'UNKNOWN_METRICS', - errorDetails: { unknownMetrics: Array.from(unknownMetrics) }, - }); - } else { - await this.testCaseExecutionRepository.markAsCompleted({ - testRunId: testRun.id, - pastExecutionId, - metrics: addedMetrics, - trx, - }); - } + await this.testCaseExecutionRepository.markAsCompleted({ + testRunId: testRun.id, + pastExecutionId, + metrics: addedMetrics, + trx, + }); }); } } catch (e) { diff --git a/packages/cli/src/server.ts b/packages/cli/src/server.ts index 9ba7aa930c..e2d05f2a66 100644 --- a/packages/cli/src/server.ts +++ b/packages/cli/src/server.ts @@ -66,7 +66,6 @@ import '@/executions/executions.controller'; import '@/external-secrets.ee/external-secrets.controller.ee'; import '@/license/license.controller'; import '@/evaluation.ee/test-definitions.controller.ee'; -import '@/evaluation.ee/metrics.controller'; import '@/evaluation.ee/test-runs.controller.ee'; import '@/workflows/workflow-history.ee/workflow-history.controller.ee'; import '@/workflows/workflows.controller'; diff --git a/packages/cli/test/integration/evaluation/metrics.api.test.ts b/packages/cli/test/integration/evaluation/metrics.api.test.ts deleted file mode 100644 index ff04aedf12..0000000000 --- a/packages/cli/test/integration/evaluation/metrics.api.test.ts +++ /dev/null @@ -1,381 +0,0 @@ -import { Container } from '@n8n/di'; -import type { IWorkflowBase } from 'n8n-workflow'; - -import type { TestDefinition } from '@/databases/entities/test-definition.ee'; -import type { User } from '@/databases/entities/user'; -import { TestDefinitionRepository } from '@/databases/repositories/test-definition.repository.ee'; -import { TestMetricRepository } from '@/databases/repositories/test-metric.repository.ee'; -import { createUserShell } from '@test-integration/db/users'; -import { createWorkflow } from '@test-integration/db/workflows'; -import * as testDb from '@test-integration/test-db'; -import type { SuperAgentTest } from '@test-integration/types'; -import * as utils from '@test-integration/utils'; - -let authOwnerAgent: SuperAgentTest; -let workflowUnderTest: IWorkflowBase; -let otherWorkflow: IWorkflowBase; -let testDefinition: TestDefinition; -let otherTestDefinition: TestDefinition; -let ownerShell: User; - -const testServer = utils.setupTestServer({ endpointGroups: ['evaluation'] }); - -beforeAll(async () => { - ownerShell = await createUserShell('global:owner'); - authOwnerAgent = testServer.authAgentFor(ownerShell); -}); - -beforeEach(async () => { - await testDb.truncate(['TestDefinition', 'TestMetric']); - - workflowUnderTest = await createWorkflow({ name: 'workflow-under-test' }, ownerShell); - - testDefinition = Container.get(TestDefinitionRepository).create({ - name: 'test', - workflow: { id: workflowUnderTest.id }, - }); - await Container.get(TestDefinitionRepository).save(testDefinition); - - otherWorkflow = await createWorkflow({ name: 'other-workflow' }); - - otherTestDefinition = Container.get(TestDefinitionRepository).create({ - name: 'other-test', - workflow: { id: otherWorkflow.id }, - }); - await Container.get(TestDefinitionRepository).save(otherTestDefinition); -}); - -describe('GET /evaluation/test-definitions/:testDefinitionId/metrics', () => { - test('should retrieve empty list of metrics for a test definition', async () => { - const resp = await authOwnerAgent.get( - `/evaluation/test-definitions/${testDefinition.id}/metrics`, - ); - - expect(resp.statusCode).toBe(200); - expect(resp.body.data.length).toBe(0); - }); - - test('should retrieve metrics for a test definition', async () => { - const newMetric = Container.get(TestMetricRepository).create({ - testDefinition: { id: testDefinition.id }, - name: 'metric-1', - }); - await Container.get(TestMetricRepository).save(newMetric); - - const newMetric2 = Container.get(TestMetricRepository).create({ - testDefinition: { id: testDefinition.id }, - name: 'metric-2', - }); - await Container.get(TestMetricRepository).save(newMetric2); - - const resp = await authOwnerAgent.get( - `/evaluation/test-definitions/${testDefinition.id}/metrics`, - ); - - expect(resp.statusCode).toBe(200); - expect(resp.body.data.length).toBe(2); - expect(resp.body.data).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - id: expect.any(String), - name: 'metric-1', - }), - expect.objectContaining({ - id: expect.any(String), - name: 'metric-2', - }), - ]), - ); - }); - - test('should return 404 if test definition does not exist', async () => { - const resp = await authOwnerAgent.get('/evaluation/test-definitions/999/metrics'); - - expect(resp.statusCode).toBe(404); - }); - - test('should return 404 if test definition is not accessible to the user', async () => { - const resp = await authOwnerAgent.get( - `/evaluation/test-definitions/${otherTestDefinition.id}/metrics`, - ); - - expect(resp.statusCode).toBe(404); - }); -}); - -describe('GET /evaluation/test-definitions/:testDefinitionId/metrics/:id', () => { - test('should retrieve a metric for a test definition', async () => { - const newMetric = Container.get(TestMetricRepository).create({ - testDefinition: { id: testDefinition.id }, - name: 'metric-1', - }); - await Container.get(TestMetricRepository).save(newMetric); - - const resp = await authOwnerAgent.get( - `/evaluation/test-definitions/${testDefinition.id}/metrics/${newMetric.id}`, - ); - - expect(resp.statusCode).toBe(200); - expect(resp.body.data).toEqual( - expect.objectContaining({ - id: newMetric.id, - name: 'metric-1', - }), - ); - }); - - test('should return 404 if metric does not exist', async () => { - const resp = await authOwnerAgent.get( - `/evaluation/test-definitions/${testDefinition.id}/metrics/999`, - ); - - expect(resp.statusCode).toBe(404); - }); - - test('should return 404 if metric is not accessible to the user', async () => { - const newMetric = Container.get(TestMetricRepository).create({ - testDefinition: { id: otherTestDefinition.id }, - name: 'metric-1', - }); - await Container.get(TestMetricRepository).save(newMetric); - - const resp = await authOwnerAgent.get( - `/evaluation/test-definitions/${otherTestDefinition.id}/metrics/${newMetric.id}`, - ); - - expect(resp.statusCode).toBe(404); - }); -}); - -describe('POST /evaluation/test-definitions/:testDefinitionId/metrics', () => { - test('should create a metric for a test definition', async () => { - const resp = await authOwnerAgent - .post(`/evaluation/test-definitions/${testDefinition.id}/metrics`) - .send({ - name: 'metric-1', - }); - - expect(resp.statusCode).toBe(200); - expect(resp.body.data).toEqual( - expect.objectContaining({ - id: expect.any(String), - name: 'metric-1', - }), - ); - - const metrics = await Container.get(TestMetricRepository).find({ - where: { testDefinition: { id: testDefinition.id } }, - }); - expect(metrics.length).toBe(1); - expect(metrics[0].name).toBe('metric-1'); - }); - - test('should return 400 if name is missing', async () => { - const resp = await authOwnerAgent - .post(`/evaluation/test-definitions/${testDefinition.id}/metrics`) - .send({}); - - expect(resp.statusCode).toBe(400); - expect(resp.body.errors).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - code: 'invalid_type', - message: 'Required', - path: ['name'], - }), - ]), - ); - }); - - test('should return 400 if name is not a string', async () => { - const resp = await authOwnerAgent - .post(`/evaluation/test-definitions/${testDefinition.id}/metrics`) - .send({ - name: 123, - }); - - expect(resp.statusCode).toBe(400); - expect(resp.body.errors).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - code: 'invalid_type', - message: 'Expected string, received number', - path: ['name'], - }), - ]), - ); - }); - - test('should return 404 if test definition does not exist', async () => { - const resp = await authOwnerAgent.post('/evaluation/test-definitions/999/metrics').send({ - name: 'metric-1', - }); - - expect(resp.statusCode).toBe(404); - }); - - test('should return 404 if test definition is not accessible to the user', async () => { - const resp = await authOwnerAgent - .post(`/evaluation/test-definitions/${otherTestDefinition.id}/metrics`) - .send({ - name: 'metric-1', - }); - - expect(resp.statusCode).toBe(404); - }); -}); - -describe('PATCH /evaluation/test-definitions/:testDefinitionId/metrics/:id', () => { - test('should update a metric for a test definition', async () => { - const newMetric = Container.get(TestMetricRepository).create({ - testDefinition: { id: testDefinition.id }, - name: 'metric-1', - }); - await Container.get(TestMetricRepository).save(newMetric); - - const resp = await authOwnerAgent - .patch(`/evaluation/test-definitions/${testDefinition.id}/metrics/${newMetric.id}`) - .send({ - name: 'metric-2', - }); - - expect(resp.statusCode).toBe(200); - expect(resp.body.data).toEqual( - expect.objectContaining({ - id: newMetric.id, - name: 'metric-2', - }), - ); - - const metrics = await Container.get(TestMetricRepository).find({ - where: { testDefinition: { id: testDefinition.id } }, - }); - expect(metrics.length).toBe(1); - expect(metrics[0].name).toBe('metric-2'); - }); - - test('should return 400 if name is missing', async () => { - const newMetric = Container.get(TestMetricRepository).create({ - testDefinition: { id: testDefinition.id }, - name: 'metric-1', - }); - await Container.get(TestMetricRepository).save(newMetric); - - const resp = await authOwnerAgent - .patch(`/evaluation/test-definitions/${testDefinition.id}/metrics/${newMetric.id}`) - .send({}); - - expect(resp.statusCode).toBe(400); - expect(resp.body.errors).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - code: 'invalid_type', - message: 'Required', - path: ['name'], - }), - ]), - ); - }); - - test('should return 400 if name is not a string', async () => { - const newMetric = Container.get(TestMetricRepository).create({ - testDefinition: { id: testDefinition.id }, - name: 'metric-1', - }); - await Container.get(TestMetricRepository).save(newMetric); - - const resp = await authOwnerAgent - .patch(`/evaluation/test-definitions/${testDefinition.id}/metrics/${newMetric.id}`) - .send({ - name: 123, - }); - - expect(resp.statusCode).toBe(400); - expect(resp.body.errors).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - code: 'invalid_type', - message: 'Expected string, received number', - }), - ]), - ); - }); - - test('should return 404 if metric does not exist', async () => { - const resp = await authOwnerAgent - .patch(`/evaluation/test-definitions/${testDefinition.id}/metrics/999`) - .send({ - name: 'metric-1', - }); - - expect(resp.statusCode).toBe(404); - }); - - test('should return 404 if test definition does not exist', async () => { - const resp = await authOwnerAgent.patch('/evaluation/test-definitions/999/metrics/999').send({ - name: 'metric-1', - }); - - expect(resp.statusCode).toBe(404); - }); - - test('should return 404 if metric is not accessible to the user', async () => { - const newMetric = Container.get(TestMetricRepository).create({ - testDefinition: { id: otherTestDefinition.id }, - name: 'metric-1', - }); - await Container.get(TestMetricRepository).save(newMetric); - - const resp = await authOwnerAgent - .patch(`/evaluation/test-definitions/${otherTestDefinition.id}/metrics/${newMetric.id}`) - .send({ - name: 'metric-2', - }); - - expect(resp.statusCode).toBe(404); - }); -}); - -describe('DELETE /evaluation/test-definitions/:testDefinitionId/metrics/:id', () => { - test('should delete a metric for a test definition', async () => { - const newMetric = Container.get(TestMetricRepository).create({ - testDefinition: { id: testDefinition.id }, - name: 'metric-1', - }); - await Container.get(TestMetricRepository).save(newMetric); - - const resp = await authOwnerAgent.delete( - `/evaluation/test-definitions/${testDefinition.id}/metrics/${newMetric.id}`, - ); - - expect(resp.statusCode).toBe(200); - expect(resp.body.data).toEqual({ success: true }); - - const metrics = await Container.get(TestMetricRepository).find({ - where: { testDefinition: { id: testDefinition.id } }, - }); - expect(metrics.length).toBe(0); - }); - - test('should return 404 if metric does not exist', async () => { - const resp = await authOwnerAgent.delete( - `/evaluation/test-definitions/${testDefinition.id}/metrics/999`, - ); - - expect(resp.statusCode).toBe(404); - }); - - test('should return 404 if metric is not accessible to the user', async () => { - const newMetric = Container.get(TestMetricRepository).create({ - testDefinition: { id: otherTestDefinition.id }, - name: 'metric-1', - }); - await Container.get(TestMetricRepository).save(newMetric); - - const resp = await authOwnerAgent.delete( - `/evaluation/test-definitions/${otherTestDefinition.id}/metrics/${newMetric.id}`, - ); - - expect(resp.statusCode).toBe(404); - }); -}); diff --git a/packages/cli/test/integration/shared/utils/test-server.ts b/packages/cli/test/integration/shared/utils/test-server.ts index b242915c92..739a89305c 100644 --- a/packages/cli/test/integration/shared/utils/test-server.ts +++ b/packages/cli/test/integration/shared/utils/test-server.ts @@ -281,7 +281,6 @@ export const setupTestServer = ({ break; case 'evaluation': - await import('@/evaluation.ee/metrics.controller'); await import('@/evaluation.ee/test-definitions.controller.ee'); await import('@/evaluation.ee/test-runs.controller.ee'); break; diff --git a/packages/frontend/editor-ui/src/api/testDefinition.ee.ts b/packages/frontend/editor-ui/src/api/testDefinition.ee.ts index c15b2bc9d3..cf4a123b10 100644 --- a/packages/frontend/editor-ui/src/api/testDefinition.ee.ts +++ b/packages/frontend/editor-ui/src/api/testDefinition.ee.ts @@ -83,8 +83,6 @@ export interface TestCaseExecutionRecord { } const endpoint = '/evaluation/test-definitions'; -const getMetricsEndpoint = (testDefinitionId: string, metricId?: string) => - `${endpoint}/${testDefinitionId}/metrics${metricId ? `/${metricId}` : ''}`; export async function getTestDefinitions( context: IRestApiContext, @@ -141,86 +139,6 @@ export async function getExampleEvaluationInput( ); } -// Metrics -export interface TestMetricRecord { - id: string; - name: string; - testDefinitionId: string; - createdAt?: string; - updatedAt?: string; -} - -export interface CreateTestMetricParams { - testDefinitionId: string; - name: string; -} - -export interface UpdateTestMetricParams { - name: string; - id: string; - testDefinitionId: string; -} - -export interface DeleteTestMetricParams { - testDefinitionId: string; - id: string; -} - -export const getTestMetrics = async (context: IRestApiContext, testDefinitionId: string) => { - return await makeRestApiRequest( - context, - 'GET', - getMetricsEndpoint(testDefinitionId), - ); -}; - -export const getTestMetric = async ( - context: IRestApiContext, - testDefinitionId: string, - id: string, -) => { - return await makeRestApiRequest( - context, - 'GET', - getMetricsEndpoint(testDefinitionId, id), - ); -}; - -export const createTestMetric = async ( - context: IRestApiContext, - params: CreateTestMetricParams, -) => { - return await makeRestApiRequest( - context, - 'POST', - getMetricsEndpoint(params.testDefinitionId), - { name: params.name }, - ); -}; - -export const updateTestMetric = async ( - context: IRestApiContext, - params: UpdateTestMetricParams, -) => { - return await makeRestApiRequest( - context, - 'PATCH', - getMetricsEndpoint(params.testDefinitionId, params.id), - { name: params.name }, - ); -}; - -export const deleteTestMetric = async ( - context: IRestApiContext, - params: DeleteTestMetricParams, -) => { - return await makeRestApiRequest( - context, - 'DELETE', - getMetricsEndpoint(params.testDefinitionId, params.id), - ); -}; - const getRunsEndpoint = (testDefinitionId: string, runId?: string) => `${endpoint}/${testDefinitionId}/runs${runId ? `/${runId}` : ''}`; diff --git a/packages/frontend/editor-ui/src/components/TestDefinition/EditDefinition/MetricsInput.vue b/packages/frontend/editor-ui/src/components/TestDefinition/EditDefinition/MetricsInput.vue deleted file mode 100644 index f679b6cdfa..0000000000 --- a/packages/frontend/editor-ui/src/components/TestDefinition/EditDefinition/MetricsInput.vue +++ /dev/null @@ -1,70 +0,0 @@ - - - - - diff --git a/packages/frontend/editor-ui/src/components/TestDefinition/EditDefinition/sections/ConfigSection.vue b/packages/frontend/editor-ui/src/components/TestDefinition/EditDefinition/sections/ConfigSection.vue index 038bbf71f3..c02d02064d 100644 --- a/packages/frontend/editor-ui/src/components/TestDefinition/EditDefinition/sections/ConfigSection.vue +++ b/packages/frontend/editor-ui/src/components/TestDefinition/EditDefinition/sections/ConfigSection.vue @@ -1,8 +1,6 @@