feat(core): Automatically extract evaluation metrics (no-changelog) (#14051)

This commit is contained in:
oleg
2025-03-25 15:43:19 +01:00
committed by GitHub
parent 22e6569f7e
commit 53e11b19ad
28 changed files with 460 additions and 1286 deletions

View File

@@ -195,3 +195,5 @@ export const WsStatusCodes = {
} as const;
export const FREE_AI_CREDITS_CREDENTIAL_NAME = 'n8n free OpenAI API credits';
export const EVALUATION_METRICS_NODE = `${NODE_PACKAGE_PREFIX}base.evaluationMetrics`;

View File

@@ -1,141 +0,0 @@
import express from 'express';
import { TestMetricRepository } from '@/databases/repositories/test-metric.repository.ee';
import { Delete, Get, Patch, Post, RestController } from '@/decorators';
import { NotFoundError } from '@/errors/response-errors/not-found.error';
import {
testMetricCreateRequestBodySchema,
testMetricPatchRequestBodySchema,
} from '@/evaluation.ee/metric.schema';
import { getSharedWorkflowIds } from '@/public-api/v1/handlers/workflows/workflows.service';
import { Telemetry } from '@/telemetry';
import { TestDefinitionService } from './test-definition.service.ee';
import { TestMetricsRequest } from './test-definitions.types.ee';
@RestController('/evaluation/test-definitions')
export class TestMetricsController {
constructor(
private readonly testDefinitionService: TestDefinitionService,
private readonly testMetricRepository: TestMetricRepository,
private readonly telemetry: Telemetry,
) {}
// This method is used in multiple places in the controller to get the test definition
// (or just check that it exists and the user has access to it).
private async getTestDefinition(
req:
| TestMetricsRequest.GetOne
| TestMetricsRequest.GetMany
| TestMetricsRequest.Patch
| TestMetricsRequest.Delete
| TestMetricsRequest.Create,
) {
const { testDefinitionId } = req.params;
const userAccessibleWorkflowIds = await getSharedWorkflowIds(req.user, ['workflow:read']);
const testDefinition = await this.testDefinitionService.findOne(
testDefinitionId,
userAccessibleWorkflowIds,
);
if (!testDefinition) throw new NotFoundError('Test definition not found');
return testDefinition;
}
@Get('/:testDefinitionId/metrics')
async getMany(req: TestMetricsRequest.GetMany) {
const { testDefinitionId } = req.params;
await this.getTestDefinition(req);
return await this.testMetricRepository.find({
where: { testDefinition: { id: testDefinitionId } },
});
}
@Get('/:testDefinitionId/metrics/:id')
async getOne(req: TestMetricsRequest.GetOne) {
const { id: metricId, testDefinitionId } = req.params;
await this.getTestDefinition(req);
const metric = await this.testMetricRepository.findOne({
where: { id: metricId, testDefinition: { id: testDefinitionId } },
});
if (!metric) throw new NotFoundError('Metric not found');
return metric;
}
@Post('/:testDefinitionId/metrics')
async create(req: TestMetricsRequest.Create, res: express.Response) {
const bodyParseResult = testMetricCreateRequestBodySchema.safeParse(req.body);
if (!bodyParseResult.success) {
res.status(400).json({ errors: bodyParseResult.error.errors });
return;
}
const testDefinition = await this.getTestDefinition(req);
const metric = this.testMetricRepository.create({
...req.body,
testDefinition,
});
return await this.testMetricRepository.save(metric);
}
@Patch('/:testDefinitionId/metrics/:id')
async patch(req: TestMetricsRequest.Patch, res: express.Response) {
const { id: metricId, testDefinitionId } = req.params;
const bodyParseResult = testMetricPatchRequestBodySchema.safeParse(req.body);
if (!bodyParseResult.success) {
res.status(400).json({ errors: bodyParseResult.error.errors });
return;
}
await this.getTestDefinition(req);
const metric = await this.testMetricRepository.findOne({
where: { id: metricId, testDefinition: { id: testDefinitionId } },
});
if (!metric) throw new NotFoundError('Metric not found');
const updateResult = await this.testMetricRepository.update(metricId, bodyParseResult.data);
// Send telemetry event if the metric was updated
if (updateResult.affected === 1 && metric.name !== bodyParseResult.data.name) {
this.telemetry.track('User added metrics to test', {
metric_id: metricId,
metric_name: bodyParseResult.data.name,
test_id: testDefinitionId,
});
}
// Respond with the updated metric
return await this.testMetricRepository.findOneBy({ id: metricId });
}
@Delete('/:testDefinitionId/metrics/:id')
async delete(req: TestMetricsRequest.Delete) {
const { id: metricId, testDefinitionId } = req.params;
await this.getTestDefinition(req);
const metric = await this.testMetricRepository.findOne({
where: { id: metricId, testDefinition: { id: testDefinitionId } },
});
if (!metric) throw new NotFoundError('Metric not found');
await this.testMetricRepository.delete(metricId);
return { success: true };
}
}

View File

@@ -47,36 +47,6 @@ export declare namespace TestDefinitionsRequest {
>;
}
// ----------------------------------
// /test-definitions/:testDefinitionId/metrics
// ----------------------------------
export declare namespace TestMetricsRequest {
namespace RouteParams {
type TestDefinitionId = {
testDefinitionId: string;
};
type TestMetricId = {
id: string;
};
}
type GetOne = AuthenticatedRequest<RouteParams.TestDefinitionId & RouteParams.TestMetricId>;
type GetMany = AuthenticatedRequest<RouteParams.TestDefinitionId>;
type Create = AuthenticatedRequest<RouteParams.TestDefinitionId, {}, { name: string }>;
type Patch = AuthenticatedRequest<
RouteParams.TestDefinitionId & RouteParams.TestMetricId,
{},
{ name: string }
>;
type Delete = AuthenticatedRequest<RouteParams.TestDefinitionId & RouteParams.TestMetricId>;
}
// ----------------------------------
// /test-definitions/:testDefinitionId/runs
// ----------------------------------

View File

@@ -24,14 +24,6 @@ describe('EvaluationMetrics', () => {
);
});
test('should throw when missing values', () => {
const testMetricNames = new Set(['metric1', 'metric2']);
const metrics = new EvaluationMetrics(testMetricNames);
expect(() => metrics.addResults({ metric1: 1 })).toThrow('METRICS_MISSING');
expect(() => metrics.addResults({ metric2: 0.2 })).toThrow('METRICS_MISSING');
});
test('should handle empty metrics', () => {
const testMetricNames = new Set(['metric1', 'metric2']);
const metrics = new EvaluationMetrics(testMetricNames);

View File

@@ -0,0 +1,85 @@
{
"nodes": [
{
"parameters": {},
"id": "6dde1608-135f-441d-8438-40f605e4dae3",
"name": "Execute Workflow Trigger",
"type": "n8n-nodes-base.executeWorkflowTrigger",
"typeVersion": 1,
"position": [-180, -40]
},
{
"parameters": {
"metrics": {
"assignments": [
{
"id": "f1cab1e4-dabc-4750-a1f7-6669a60213c9",
"name": "metric1",
"value": 20,
"type": "number"
},
{
"id": "a66ed953-5341-47f6-8565-47640d987f5f",
"name": "metric2",
"value": 30,
"type": "number"
}
]
}
},
"id": "dfc71b70-7b7a-4dde-914f-cc4ffc99b18c",
"name": "Success",
"type": "n8n-nodes-base.evaluationMetrics",
"typeVersion": 1,
"position": [620, -40]
},
{
"parameters": {
"metrics": {
"assignments": [
{
"id": "1e1153da-77c0-4cb5-804a-3f4bc40833dd",
"name": "metric2",
"value": 10,
"type": "number"
}
]
}
},
"id": "5ef5be33-37e0-4c95-81c8-3fd677bdca88",
"name": "First Metric",
"type": "n8n-nodes-base.evaluationMetrics",
"typeVersion": 1,
"position": [160, -40]
}
],
"connections": {
"Execute Workflow Trigger": {
"main": [
[
{
"node": "First Metric",
"type": "main",
"index": 0
}
]
]
},
"First Metric": {
"main": [
[
{
"node": "Success",
"type": "main",
"index": 0
}
]
]
}
},
"pinData": {},
"meta": {
"templateCredsSetupCompleted": true,
"instanceId": "27cc9b56542ad45b38725555722c50a1c3fee1670bbb67980558314ee08517c4"
}
}

View File

@@ -50,7 +50,7 @@
},
{
"parameters": {
"assignments": {
"metrics": {
"assignments": [
{
"id": "3b65d55a-158f-40c6-9853-a1c44b7ba1e5",
@@ -70,13 +70,13 @@
},
"id": "0c7a1ee8-0cf0-4d7f-99a3-186bbcd8815a",
"name": "Success",
"type": "n8n-nodes-base.set",
"typeVersion": 3.4,
"type": "n8n-nodes-base.evaluationMetrics",
"typeVersion": 1,
"position": [980, 220]
},
{
"parameters": {
"assignments": {
"metrics": {
"assignments": [
{
"id": "6cc8b402-4a30-4873-b825-963a1f1b8b82",
@@ -90,8 +90,8 @@
},
"id": "50d3f84a-d99f-4e04-bdbd-3e8c2668e708",
"name": "Fail",
"type": "n8n-nodes-base.set",
"typeVersion": 3.4,
"type": "n8n-nodes-base.evaluationMetrics",
"typeVersion": 1,
"position": [980, 420]
}
],

View File

@@ -46,6 +46,12 @@ const wfEvaluationJson = JSON.parse(
readFileSync(path.join(__dirname, './mock-data/workflow.evaluation.json'), { encoding: 'utf-8' }),
);
const wfEvaluationMiddleJson = JSON.parse(
readFileSync(path.join(__dirname, './mock-data/workflow.evaluation-middle.json'), {
encoding: 'utf-8',
}),
);
const wfMultipleTriggersJson = JSON.parse(
readFileSync(path.join(__dirname, './mock-data/workflow.multiple-triggers.json'), {
encoding: 'utf-8',
@@ -131,9 +137,22 @@ function mockEvaluationExecutionData(metrics: Record<string, GenericValue>) {
return mock<IRun>({
data: {
resultData: {
lastNodeExecuted: 'lastNode',
lastNodeExecuted: 'Success',
runData: {
lastNode: [
Success: [
{
data: {
main: [
[
{
json: metrics,
},
],
],
},
},
],
Fail: [
{
data: {
main: [
@@ -155,6 +174,52 @@ function mockEvaluationExecutionData(metrics: Record<string, GenericValue>) {
});
}
function mockEvaluationMiddleExecutionData(
firstMetrics: Record<string, GenericValue>,
secondMetrics: Record<string, GenericValue>,
) {
// Clone the metrics to avoid modifying the passed object
// For test assertions, these run-data need special handling
const runData: Record<string, any> = {
'First Metric': [
{
data: {
main: [
[
{
json: firstMetrics,
},
],
],
},
},
],
Success: [
{
data: {
main: [
[
{
json: secondMetrics,
},
],
],
},
},
],
};
return mock<IRun>({
data: {
resultData: {
lastNodeExecuted: 'Success',
runData,
error: undefined,
},
},
});
}
const errorReporter = mock<ErrorReporter>();
const logger = mockLogger();
const telemetry = mock<Telemetry>();
@@ -363,7 +428,6 @@ describe('TestRunnerService', () => {
expect(testRunRepository.markAsCompleted).toHaveBeenCalledTimes(1);
expect(testRunRepository.markAsCompleted).toHaveBeenCalledWith('test-run-id', {
metric1: 0.75,
metric2: 50,
});
expect(testRunRepository.incrementPassed).toHaveBeenCalledTimes(2);
@@ -868,6 +932,218 @@ describe('TestRunnerService', () => {
expect(workflowRunner.run).toHaveBeenCalledTimes(1);
});
test('should run workflow with metrics defined in the middle of the workflow', async () => {
const testRunnerService = new TestRunnerService(
logger,
telemetry,
workflowRepository,
workflowRunner,
executionRepository,
activeExecutions,
testRunRepository,
testCaseExecutionRepository,
testMetricRepository,
mockNodeTypes,
errorReporter,
);
workflowRepository.findById.calledWith('workflow-under-test-id').mockResolvedValueOnce({
id: 'workflow-under-test-id',
...wfUnderTestJson,
});
workflowRepository.findById.calledWith('evaluation-workflow-id').mockResolvedValueOnce({
id: 'evaluation-workflow-id',
...wfEvaluationMiddleJson,
});
workflowRunner.run.mockResolvedValueOnce('some-execution-id');
workflowRunner.run.mockResolvedValueOnce('some-execution-id-2');
workflowRunner.run.mockResolvedValueOnce('some-execution-id-3');
workflowRunner.run.mockResolvedValueOnce('some-execution-id-4');
// Mock executions of workflow under test
activeExecutions.getPostExecutePromise
.calledWith('some-execution-id')
.mockResolvedValue(mockExecutionData());
activeExecutions.getPostExecutePromise
.calledWith('some-execution-id-3')
.mockResolvedValue(mockExecutionData());
// Mock executions of evaluation workflow
activeExecutions.getPostExecutePromise
.calledWith('some-execution-id-2')
.mockResolvedValue(mockEvaluationMiddleExecutionData({ metric2: 1 }, { metric1: 1 }));
activeExecutions.getPostExecutePromise
.calledWith('some-execution-id-4')
.mockResolvedValue(mockEvaluationMiddleExecutionData({ metric2: 2 }, { metric1: 0.5 }));
await testRunnerService.runTest(
mock<User>(),
mock<TestDefinition>({
workflowId: 'workflow-under-test-id',
evaluationWorkflowId: 'evaluation-workflow-id',
mockedNodes: [{ id: '72256d90-3a67-4e29-b032-47df4e5768af' }],
}),
);
expect(workflowRunner.run).toHaveBeenCalledTimes(4);
// Check workflow under test was executed
expect(workflowRunner.run).toHaveBeenCalledWith(
expect.objectContaining({
executionMode: 'evaluation',
pinData: {
'When clicking Test workflow':
executionDataJson.resultData.runData['When clicking Test workflow'][0].data.main[0],
},
workflowData: expect.objectContaining({
id: 'workflow-under-test-id',
}),
}),
);
// Check evaluation workflow was executed
expect(workflowRunner.run).toHaveBeenCalledWith(
expect.objectContaining({
executionMode: 'integrated',
executionData: expect.objectContaining({
executionData: expect.objectContaining({
nodeExecutionStack: expect.arrayContaining([
expect.objectContaining({ data: expect.anything() }),
]),
}),
}),
workflowData: expect.objectContaining({
id: 'evaluation-workflow-id',
}),
}),
);
// Check Test Run status was updated correctly
expect(testRunRepository.createTestRun).toHaveBeenCalledTimes(1);
expect(testRunRepository.markAsRunning).toHaveBeenCalledTimes(1);
expect(testRunRepository.markAsRunning).toHaveBeenCalledWith('test-run-id', expect.any(Number));
expect(testRunRepository.markAsCompleted).toHaveBeenCalledTimes(1);
expect(testRunRepository.markAsCompleted).toHaveBeenCalledWith('test-run-id', {
metric1: 0.75,
metric2: 1.5,
});
expect(testRunRepository.incrementPassed).toHaveBeenCalledTimes(2);
expect(testRunRepository.incrementFailed).not.toHaveBeenCalled();
});
test('should properly override metrics from earlier nodes with later ones', async () => {
const testRunnerService = new TestRunnerService(
logger,
telemetry,
workflowRepository,
workflowRunner,
executionRepository,
activeExecutions,
testRunRepository,
testCaseExecutionRepository,
testMetricRepository,
mockNodeTypes,
errorReporter,
);
workflowRepository.findById.calledWith('workflow-under-test-id').mockResolvedValueOnce({
id: 'workflow-under-test-id',
...wfUnderTestJson,
});
workflowRepository.findById.calledWith('evaluation-workflow-id').mockResolvedValueOnce({
id: 'evaluation-workflow-id',
...wfEvaluationMiddleJson,
});
workflowRunner.run.mockResolvedValueOnce('some-execution-id');
workflowRunner.run.mockResolvedValueOnce('some-execution-id-2');
workflowRunner.run.mockResolvedValueOnce('some-execution-id-3');
workflowRunner.run.mockResolvedValueOnce('some-execution-id-4');
// Mock executions of workflow under test
activeExecutions.getPostExecutePromise
.calledWith('some-execution-id')
.mockResolvedValue(mockExecutionData());
activeExecutions.getPostExecutePromise
.calledWith('some-execution-id-3')
.mockResolvedValue(mockExecutionData());
// Mock executions of evaluation workflow
activeExecutions.getPostExecutePromise
.calledWith('some-execution-id-2')
.mockResolvedValue(
mockEvaluationMiddleExecutionData({ metric2: 5 }, { metric1: 1, metric2: 5 }),
);
activeExecutions.getPostExecutePromise
.calledWith('some-execution-id-4')
.mockResolvedValue(
mockEvaluationMiddleExecutionData({ metric2: 10 }, { metric1: 0.5, metric2: 10 }),
);
await testRunnerService.runTest(
mock<User>(),
mock<TestDefinition>({
workflowId: 'workflow-under-test-id',
evaluationWorkflowId: 'evaluation-workflow-id',
mockedNodes: [{ id: '72256d90-3a67-4e29-b032-47df4e5768af' }],
}),
);
expect(workflowRunner.run).toHaveBeenCalledTimes(4);
// Check workflow under test was executed
expect(workflowRunner.run).toHaveBeenCalledWith(
expect.objectContaining({
executionMode: 'evaluation',
pinData: {
'When clicking Test workflow':
executionDataJson.resultData.runData['When clicking Test workflow'][0].data.main[0],
},
workflowData: expect.objectContaining({
id: 'workflow-under-test-id',
}),
}),
);
// Check evaluation workflow was executed
expect(workflowRunner.run).toHaveBeenCalledWith(
expect.objectContaining({
executionMode: 'integrated',
executionData: expect.objectContaining({
executionData: expect.objectContaining({
nodeExecutionStack: expect.arrayContaining([
expect.objectContaining({ data: expect.anything() }),
]),
}),
}),
workflowData: expect.objectContaining({
id: 'evaluation-workflow-id',
}),
}),
);
// Check Test Run status was updated correctly
expect(testRunRepository.createTestRun).toHaveBeenCalledTimes(1);
expect(testRunRepository.markAsRunning).toHaveBeenCalledTimes(1);
expect(testRunRepository.markAsRunning).toHaveBeenCalledWith('test-run-id', expect.any(Number));
expect(testRunRepository.markAsCompleted).toHaveBeenCalledTimes(1);
expect(testRunRepository.markAsCompleted).toHaveBeenCalledWith('test-run-id', {
metric1: 0.75,
metric2: 7.5,
});
expect(testRunRepository.incrementPassed).toHaveBeenCalledTimes(2);
expect(testRunRepository.incrementFailed).not.toHaveBeenCalled();
});
describe('Test Run cancellation', () => {
beforeAll(() => {
jest.useFakeTimers();

View File

@@ -6,8 +6,6 @@ export type TestCaseExecutionErrorCode =
| 'FAILED_TO_EXECUTE_WORKFLOW'
| 'EVALUATION_WORKFLOW_DOES_NOT_EXIST'
| 'FAILED_TO_EXECUTE_EVALUATION_WORKFLOW'
| 'METRICS_MISSING'
| 'UNKNOWN_METRICS'
| 'INVALID_METRICS'
| 'PAYLOAD_LIMIT_EXCEEDED'
| 'UNKNOWN_ERROR';

View File

@@ -1,4 +1,3 @@
import difference from 'lodash/difference';
import type { IDataObject } from 'n8n-workflow';
import { TestCaseExecutionError } from '@/evaluation.ee/test-runner/errors.ee';
@@ -43,16 +42,6 @@ export class EvaluationMetrics {
}
}
// Check that result contains all expected metrics
if (
difference(Array.from(this.metricNames), Object.keys(addResultsInfo.addedMetrics)).length > 0
) {
throw new TestCaseExecutionError('METRICS_MISSING', {
expectedMetrics: Array.from(this.metricNames).sort(),
receivedMetrics: Object.keys(addResultsInfo.addedMetrics).sort(),
});
}
return addResultsInfo;
}

View File

@@ -1,8 +1,10 @@
import { Service } from '@n8n/di';
import { parse } from 'flatted';
import difference from 'lodash/difference';
import { ErrorReporter, Logger } from 'n8n-core';
import { ExecutionCancelledError, NodeConnectionTypes, Workflow } from 'n8n-workflow';
import type {
AssignmentCollectionValue,
IDataObject,
IRun,
IRunExecutionData,
@@ -13,6 +15,7 @@ import assert from 'node:assert';
import { ActiveExecutions } from '@/active-executions';
import config from '@/config';
import { EVALUATION_METRICS_NODE } from '@/constants';
import type { ExecutionEntity } from '@/databases/entities/execution-entity';
import type { MockedNodeItem, TestDefinition } from '@/databases/entities/test-definition.ee';
import type { TestRun } from '@/databases/entities/test-run.ee';
@@ -225,6 +228,50 @@ export class TestRunnerService {
return await executePromise;
}
/**
* Sync the metrics of the test definition with the evaluation workflow.
*/
async syncMetrics(
testDefinitionId: string,
evaluationWorkflow: IWorkflowBase,
): Promise<Set<string>> {
const usedTestMetricNames = await this.getUsedTestMetricNames(evaluationWorkflow);
const existingTestMetrics = await this.testMetricRepository.find({
where: {
testDefinition: { id: testDefinitionId },
},
});
const existingMetricNames = new Set(existingTestMetrics.map((metric) => metric.name));
const metricsToAdd = difference(
Array.from(usedTestMetricNames),
Array.from(existingMetricNames),
);
const metricsToRemove = difference(
Array.from(existingMetricNames),
Array.from(usedTestMetricNames),
);
// Add new metrics
const metricsToAddEntities = metricsToAdd.map((metricName) =>
this.testMetricRepository.create({
name: metricName,
testDefinition: { id: testDefinitionId },
}),
);
await this.testMetricRepository.save(metricsToAddEntities);
// Remove no longer used metrics
metricsToRemove.forEach(async (metricName) => {
const metric = existingTestMetrics.find((m) => m.name === metricName);
assert(metric, 'Existing metric not found');
await this.testMetricRepository.delete(metric.id);
});
return usedTestMetricNames;
}
/**
* Run the evaluation workflow with the expected and actual run data.
*/
@@ -265,35 +312,45 @@ export class TestRunnerService {
return await executePromise;
}
/**
* Get the evaluation metrics nodes from a workflow.
*/
static getEvaluationMetricsNodes(workflow: IWorkflowBase) {
return workflow.nodes.filter((node) => node.type === EVALUATION_METRICS_NODE);
}
/**
* Evaluation result is the first item in the output of the last node
* executed in the evaluation workflow. Defaults to an empty object
* in case the node doesn't produce any output items.
*/
private extractEvaluationResult(execution: IRun): IDataObject {
private extractEvaluationResult(execution: IRun, evaluationWorkflow: IWorkflowBase): IDataObject {
const lastNodeExecuted = execution.data.resultData.lastNodeExecuted;
assert(lastNodeExecuted, 'Could not find the last node executed in evaluation workflow');
const metricsNodes = TestRunnerService.getEvaluationMetricsNodes(evaluationWorkflow);
const metricsRunData = metricsNodes.flatMap(
(node) => execution.data.resultData.runData[node.name],
);
const metricsData = metricsRunData.reverse().map((data) => data.data?.main?.[0]?.[0]?.json);
const metricsResult = metricsData.reduce((acc, curr) => ({ ...acc, ...curr }), {}) ?? {};
// Extract the output of the last node executed in the evaluation workflow
// We use only the first item of a first main output
const lastNodeTaskData = execution.data.resultData.runData[lastNodeExecuted]?.[0];
const mainConnectionData = lastNodeTaskData?.data?.main?.[0];
return mainConnectionData?.[0]?.json ?? {};
return metricsResult;
}
/**
* Get the metrics to collect from the evaluation workflow execution results.
*/
private async getTestMetricNames(testDefinitionId: string) {
const metrics = await this.testMetricRepository.find({
where: {
testDefinition: {
id: testDefinitionId,
},
},
private async getUsedTestMetricNames(evaluationWorkflow: IWorkflowBase) {
const metricsNodes = TestRunnerService.getEvaluationMetricsNodes(evaluationWorkflow);
const metrics = metricsNodes.map((node) => {
const metricsParameter = node.parameters?.metrics as AssignmentCollectionValue;
assert(metricsParameter, 'Metrics parameter not found');
const metricsNames = metricsParameter.assignments.map((assignment) => assignment.name);
return metricsNames;
});
return new Set(metrics.map((m) => m.name));
return new Set(metrics.flat());
}
/**
@@ -329,7 +386,6 @@ export class TestRunnerService {
if (!evaluationWorkflow) {
throw new TestRunError('EVALUATION_WORKFLOW_NOT_FOUND');
}
///
// 1. Make test cases from previous executions
///
@@ -359,8 +415,8 @@ export class TestRunnerService {
pastExecutions.map((e) => e.id),
);
// Get the metrics to collect from the evaluation workflow
const testMetricNames = await this.getTestMetricNames(test.id);
// Sync the metrics of the test definition with the evaluation workflow
const testMetricNames = await this.syncMetrics(test.id, evaluationWorkflow);
// 2. Run over all the test cases
const pastExecutionIds = pastExecutions.map((e) => e.id);
@@ -465,8 +521,8 @@ export class TestRunnerService {
this.logger.debug('Evaluation execution finished', { pastExecutionId });
// Extract the output of the last node executed in the evaluation workflow
const { addedMetrics, unknownMetrics } = metrics.addResults(
this.extractEvaluationResult(evalExecution),
const { addedMetrics } = metrics.addResults(
this.extractEvaluationResult(evalExecution, evaluationWorkflow),
);
if (evalExecution.data.resultData.error) {
@@ -483,22 +539,12 @@ export class TestRunnerService {
await Db.transaction(async (trx) => {
await this.testRunRepository.incrementPassed(testRun.id, trx);
// Add warning if the evaluation workflow produced an unknown metric
if (unknownMetrics.size > 0) {
await this.testCaseExecutionRepository.markAsWarning({
testRunId: testRun.id,
pastExecutionId,
errorCode: 'UNKNOWN_METRICS',
errorDetails: { unknownMetrics: Array.from(unknownMetrics) },
});
} else {
await this.testCaseExecutionRepository.markAsCompleted({
testRunId: testRun.id,
pastExecutionId,
metrics: addedMetrics,
trx,
});
}
await this.testCaseExecutionRepository.markAsCompleted({
testRunId: testRun.id,
pastExecutionId,
metrics: addedMetrics,
trx,
});
});
}
} catch (e) {

View File

@@ -66,7 +66,6 @@ import '@/executions/executions.controller';
import '@/external-secrets.ee/external-secrets.controller.ee';
import '@/license/license.controller';
import '@/evaluation.ee/test-definitions.controller.ee';
import '@/evaluation.ee/metrics.controller';
import '@/evaluation.ee/test-runs.controller.ee';
import '@/workflows/workflow-history.ee/workflow-history.controller.ee';
import '@/workflows/workflows.controller';

View File

@@ -1,381 +0,0 @@
import { Container } from '@n8n/di';
import type { IWorkflowBase } from 'n8n-workflow';
import type { TestDefinition } from '@/databases/entities/test-definition.ee';
import type { User } from '@/databases/entities/user';
import { TestDefinitionRepository } from '@/databases/repositories/test-definition.repository.ee';
import { TestMetricRepository } from '@/databases/repositories/test-metric.repository.ee';
import { createUserShell } from '@test-integration/db/users';
import { createWorkflow } from '@test-integration/db/workflows';
import * as testDb from '@test-integration/test-db';
import type { SuperAgentTest } from '@test-integration/types';
import * as utils from '@test-integration/utils';
let authOwnerAgent: SuperAgentTest;
let workflowUnderTest: IWorkflowBase;
let otherWorkflow: IWorkflowBase;
let testDefinition: TestDefinition;
let otherTestDefinition: TestDefinition;
let ownerShell: User;
const testServer = utils.setupTestServer({ endpointGroups: ['evaluation'] });
beforeAll(async () => {
ownerShell = await createUserShell('global:owner');
authOwnerAgent = testServer.authAgentFor(ownerShell);
});
beforeEach(async () => {
await testDb.truncate(['TestDefinition', 'TestMetric']);
workflowUnderTest = await createWorkflow({ name: 'workflow-under-test' }, ownerShell);
testDefinition = Container.get(TestDefinitionRepository).create({
name: 'test',
workflow: { id: workflowUnderTest.id },
});
await Container.get(TestDefinitionRepository).save(testDefinition);
otherWorkflow = await createWorkflow({ name: 'other-workflow' });
otherTestDefinition = Container.get(TestDefinitionRepository).create({
name: 'other-test',
workflow: { id: otherWorkflow.id },
});
await Container.get(TestDefinitionRepository).save(otherTestDefinition);
});
describe('GET /evaluation/test-definitions/:testDefinitionId/metrics', () => {
test('should retrieve empty list of metrics for a test definition', async () => {
const resp = await authOwnerAgent.get(
`/evaluation/test-definitions/${testDefinition.id}/metrics`,
);
expect(resp.statusCode).toBe(200);
expect(resp.body.data.length).toBe(0);
});
test('should retrieve metrics for a test definition', async () => {
const newMetric = Container.get(TestMetricRepository).create({
testDefinition: { id: testDefinition.id },
name: 'metric-1',
});
await Container.get(TestMetricRepository).save(newMetric);
const newMetric2 = Container.get(TestMetricRepository).create({
testDefinition: { id: testDefinition.id },
name: 'metric-2',
});
await Container.get(TestMetricRepository).save(newMetric2);
const resp = await authOwnerAgent.get(
`/evaluation/test-definitions/${testDefinition.id}/metrics`,
);
expect(resp.statusCode).toBe(200);
expect(resp.body.data.length).toBe(2);
expect(resp.body.data).toEqual(
expect.arrayContaining([
expect.objectContaining({
id: expect.any(String),
name: 'metric-1',
}),
expect.objectContaining({
id: expect.any(String),
name: 'metric-2',
}),
]),
);
});
test('should return 404 if test definition does not exist', async () => {
const resp = await authOwnerAgent.get('/evaluation/test-definitions/999/metrics');
expect(resp.statusCode).toBe(404);
});
test('should return 404 if test definition is not accessible to the user', async () => {
const resp = await authOwnerAgent.get(
`/evaluation/test-definitions/${otherTestDefinition.id}/metrics`,
);
expect(resp.statusCode).toBe(404);
});
});
describe('GET /evaluation/test-definitions/:testDefinitionId/metrics/:id', () => {
test('should retrieve a metric for a test definition', async () => {
const newMetric = Container.get(TestMetricRepository).create({
testDefinition: { id: testDefinition.id },
name: 'metric-1',
});
await Container.get(TestMetricRepository).save(newMetric);
const resp = await authOwnerAgent.get(
`/evaluation/test-definitions/${testDefinition.id}/metrics/${newMetric.id}`,
);
expect(resp.statusCode).toBe(200);
expect(resp.body.data).toEqual(
expect.objectContaining({
id: newMetric.id,
name: 'metric-1',
}),
);
});
test('should return 404 if metric does not exist', async () => {
const resp = await authOwnerAgent.get(
`/evaluation/test-definitions/${testDefinition.id}/metrics/999`,
);
expect(resp.statusCode).toBe(404);
});
test('should return 404 if metric is not accessible to the user', async () => {
const newMetric = Container.get(TestMetricRepository).create({
testDefinition: { id: otherTestDefinition.id },
name: 'metric-1',
});
await Container.get(TestMetricRepository).save(newMetric);
const resp = await authOwnerAgent.get(
`/evaluation/test-definitions/${otherTestDefinition.id}/metrics/${newMetric.id}`,
);
expect(resp.statusCode).toBe(404);
});
});
describe('POST /evaluation/test-definitions/:testDefinitionId/metrics', () => {
test('should create a metric for a test definition', async () => {
const resp = await authOwnerAgent
.post(`/evaluation/test-definitions/${testDefinition.id}/metrics`)
.send({
name: 'metric-1',
});
expect(resp.statusCode).toBe(200);
expect(resp.body.data).toEqual(
expect.objectContaining({
id: expect.any(String),
name: 'metric-1',
}),
);
const metrics = await Container.get(TestMetricRepository).find({
where: { testDefinition: { id: testDefinition.id } },
});
expect(metrics.length).toBe(1);
expect(metrics[0].name).toBe('metric-1');
});
test('should return 400 if name is missing', async () => {
const resp = await authOwnerAgent
.post(`/evaluation/test-definitions/${testDefinition.id}/metrics`)
.send({});
expect(resp.statusCode).toBe(400);
expect(resp.body.errors).toEqual(
expect.arrayContaining([
expect.objectContaining({
code: 'invalid_type',
message: 'Required',
path: ['name'],
}),
]),
);
});
test('should return 400 if name is not a string', async () => {
const resp = await authOwnerAgent
.post(`/evaluation/test-definitions/${testDefinition.id}/metrics`)
.send({
name: 123,
});
expect(resp.statusCode).toBe(400);
expect(resp.body.errors).toEqual(
expect.arrayContaining([
expect.objectContaining({
code: 'invalid_type',
message: 'Expected string, received number',
path: ['name'],
}),
]),
);
});
test('should return 404 if test definition does not exist', async () => {
const resp = await authOwnerAgent.post('/evaluation/test-definitions/999/metrics').send({
name: 'metric-1',
});
expect(resp.statusCode).toBe(404);
});
test('should return 404 if test definition is not accessible to the user', async () => {
const resp = await authOwnerAgent
.post(`/evaluation/test-definitions/${otherTestDefinition.id}/metrics`)
.send({
name: 'metric-1',
});
expect(resp.statusCode).toBe(404);
});
});
describe('PATCH /evaluation/test-definitions/:testDefinitionId/metrics/:id', () => {
test('should update a metric for a test definition', async () => {
const newMetric = Container.get(TestMetricRepository).create({
testDefinition: { id: testDefinition.id },
name: 'metric-1',
});
await Container.get(TestMetricRepository).save(newMetric);
const resp = await authOwnerAgent
.patch(`/evaluation/test-definitions/${testDefinition.id}/metrics/${newMetric.id}`)
.send({
name: 'metric-2',
});
expect(resp.statusCode).toBe(200);
expect(resp.body.data).toEqual(
expect.objectContaining({
id: newMetric.id,
name: 'metric-2',
}),
);
const metrics = await Container.get(TestMetricRepository).find({
where: { testDefinition: { id: testDefinition.id } },
});
expect(metrics.length).toBe(1);
expect(metrics[0].name).toBe('metric-2');
});
test('should return 400 if name is missing', async () => {
const newMetric = Container.get(TestMetricRepository).create({
testDefinition: { id: testDefinition.id },
name: 'metric-1',
});
await Container.get(TestMetricRepository).save(newMetric);
const resp = await authOwnerAgent
.patch(`/evaluation/test-definitions/${testDefinition.id}/metrics/${newMetric.id}`)
.send({});
expect(resp.statusCode).toBe(400);
expect(resp.body.errors).toEqual(
expect.arrayContaining([
expect.objectContaining({
code: 'invalid_type',
message: 'Required',
path: ['name'],
}),
]),
);
});
test('should return 400 if name is not a string', async () => {
const newMetric = Container.get(TestMetricRepository).create({
testDefinition: { id: testDefinition.id },
name: 'metric-1',
});
await Container.get(TestMetricRepository).save(newMetric);
const resp = await authOwnerAgent
.patch(`/evaluation/test-definitions/${testDefinition.id}/metrics/${newMetric.id}`)
.send({
name: 123,
});
expect(resp.statusCode).toBe(400);
expect(resp.body.errors).toEqual(
expect.arrayContaining([
expect.objectContaining({
code: 'invalid_type',
message: 'Expected string, received number',
}),
]),
);
});
test('should return 404 if metric does not exist', async () => {
const resp = await authOwnerAgent
.patch(`/evaluation/test-definitions/${testDefinition.id}/metrics/999`)
.send({
name: 'metric-1',
});
expect(resp.statusCode).toBe(404);
});
test('should return 404 if test definition does not exist', async () => {
const resp = await authOwnerAgent.patch('/evaluation/test-definitions/999/metrics/999').send({
name: 'metric-1',
});
expect(resp.statusCode).toBe(404);
});
test('should return 404 if metric is not accessible to the user', async () => {
const newMetric = Container.get(TestMetricRepository).create({
testDefinition: { id: otherTestDefinition.id },
name: 'metric-1',
});
await Container.get(TestMetricRepository).save(newMetric);
const resp = await authOwnerAgent
.patch(`/evaluation/test-definitions/${otherTestDefinition.id}/metrics/${newMetric.id}`)
.send({
name: 'metric-2',
});
expect(resp.statusCode).toBe(404);
});
});
describe('DELETE /evaluation/test-definitions/:testDefinitionId/metrics/:id', () => {
test('should delete a metric for a test definition', async () => {
const newMetric = Container.get(TestMetricRepository).create({
testDefinition: { id: testDefinition.id },
name: 'metric-1',
});
await Container.get(TestMetricRepository).save(newMetric);
const resp = await authOwnerAgent.delete(
`/evaluation/test-definitions/${testDefinition.id}/metrics/${newMetric.id}`,
);
expect(resp.statusCode).toBe(200);
expect(resp.body.data).toEqual({ success: true });
const metrics = await Container.get(TestMetricRepository).find({
where: { testDefinition: { id: testDefinition.id } },
});
expect(metrics.length).toBe(0);
});
test('should return 404 if metric does not exist', async () => {
const resp = await authOwnerAgent.delete(
`/evaluation/test-definitions/${testDefinition.id}/metrics/999`,
);
expect(resp.statusCode).toBe(404);
});
test('should return 404 if metric is not accessible to the user', async () => {
const newMetric = Container.get(TestMetricRepository).create({
testDefinition: { id: otherTestDefinition.id },
name: 'metric-1',
});
await Container.get(TestMetricRepository).save(newMetric);
const resp = await authOwnerAgent.delete(
`/evaluation/test-definitions/${otherTestDefinition.id}/metrics/${newMetric.id}`,
);
expect(resp.statusCode).toBe(404);
});
});

View File

@@ -281,7 +281,6 @@ export const setupTestServer = ({
break;
case 'evaluation':
await import('@/evaluation.ee/metrics.controller');
await import('@/evaluation.ee/test-definitions.controller.ee');
await import('@/evaluation.ee/test-runs.controller.ee');
break;