feat(core): Evaluations backend (no-changelog) (#15542)

Co-authored-by: Yiorgis Gozadinos <yiorgis@n8n.io>
Co-authored-by: Mutasem Aldmour <4711238+mutdmour@users.noreply.github.com>
This commit is contained in:
Eugene
2025-05-23 09:05:13 +02:00
committed by GitHub
parent cf8b611d14
commit fa620f2d5b
18 changed files with 1266 additions and 601 deletions

View File

@@ -283,19 +283,15 @@ export type FolderWithWorkflowAndSubFolderCountAndPath = FolderWithWorkflowAndSu
export type TestRunFinalResult = 'success' | 'error' | 'warning';
export type TestRunErrorCode =
| 'PAST_EXECUTIONS_NOT_FOUND'
| 'EVALUATION_WORKFLOW_NOT_FOUND'
| 'TEST_CASES_NOT_FOUND'
| 'INTERRUPTED'
| 'UNKNOWN_ERROR';
| 'UNKNOWN_ERROR'
| 'EVALUATION_TRIGGER_NOT_FOUND';
export type TestCaseExecutionErrorCode =
| 'MOCKED_NODE_DOES_NOT_EXIST'
| 'TRIGGER_NO_LONGER_EXISTS'
| 'MOCKED_NODE_NOT_FOUND' // This will be used when node mocking will be implemented
| 'FAILED_TO_EXECUTE_WORKFLOW'
| 'EVALUATION_WORKFLOW_DOES_NOT_EXIST'
| 'FAILED_TO_EXECUTE_EVALUATION_WORKFLOW'
| 'INVALID_METRICS'
| 'PAYLOAD_LIMIT_EXCEEDED'
| 'UNKNOWN_ERROR';
export type AggregatedTestRunMetrics = Record<string, number | boolean>;

View File

@@ -32,6 +32,12 @@ export class TestCaseExecutionRepository extends Repository<TestCaseExecution> {
super(TestCaseExecution, dataSource.manager);
}
async createTestCaseExecution(testCaseExecutionProps: DeepPartial<TestCaseExecution>) {
const testCaseExecution = this.create(testCaseExecutionProps);
return await this.save(testCaseExecution);
}
async createBatch(testRunId: string, testCases: string[]) {
const mappings = this.create(
testCases.map<DeepPartial<TestCaseExecution>>(() => ({

View File

@@ -46,7 +46,7 @@ export class TestRunRepository extends Repository<TestRun> {
async markAsCancelled(id: string, trx?: EntityManager) {
trx = trx ?? this.manager;
return await trx.update(TestRun, id, { status: 'cancelled' });
return await trx.update(TestRun, id, { status: 'cancelled', completedAt: new Date() });
}
async markAsError(id: string, errorCode: TestRunErrorCode, errorDetails?: IDataObject) {
@@ -54,13 +54,14 @@ export class TestRunRepository extends Repository<TestRun> {
status: 'error',
errorCode,
errorDetails,
completedAt: new Date(),
});
}
async markAllIncompleteAsFailed() {
return await this.update(
{ status: In(['new', 'running']) },
{ status: 'error', errorCode: 'INTERRUPTED' },
{ status: 'error', errorCode: 'INTERRUPTED', completedAt: new Date() },
);
}

View File

@@ -158,4 +158,5 @@ export const WsStatusCodes = {
export const FREE_AI_CREDITS_CREDENTIAL_NAME = 'n8n free OpenAI API credits';
export const EVALUATION_METRICS_NODE = `${NODE_PACKAGE_PREFIX}base.evaluationMetrics`;
export const EVALUATION_NODE = `${NODE_PACKAGE_PREFIX}base.evaluation`;
export const EVALUATION_DATASET_TRIGGER_NODE = `${NODE_PACKAGE_PREFIX}base.evaluationTrigger`;

View File

@@ -1,104 +0,0 @@
import { readFileSync } from 'fs';
import path from 'path';
import { createPinData } from '../utils.ee';
const wfUnderTestJson = JSON.parse(
readFileSync(path.join(__dirname, './mock-data/workflow.under-test.json'), { encoding: 'utf-8' }),
);
const wfUnderTestRenamedNodesJson = JSON.parse(
readFileSync(path.join(__dirname, './mock-data/workflow.under-test-renamed-nodes.json'), {
encoding: 'utf-8',
}),
);
const executionDataJson = JSON.parse(
readFileSync(path.join(__dirname, './mock-data/execution-data.json'), { encoding: 'utf-8' }),
);
describe('createPinData', () => {
test('should create pin data from past execution data', () => {
const mockedNodes = [
{
id: '72256d90-3a67-4e29-b032-47df4e5768af',
name: 'When clicking Execute workflow',
},
];
const pinData = createPinData(wfUnderTestJson, mockedNodes, executionDataJson);
expect(pinData).toEqual(
expect.objectContaining({
'When clicking Execute workflow': expect.anything(),
}),
);
});
test('should not create pin data for non-existing mocked nodes', () => {
const mockedNodes = ['non-existing-ID'].map((id) => ({ id }));
const pinData = createPinData(wfUnderTestJson, mockedNodes, executionDataJson);
expect(pinData).toEqual({});
});
test('should create pin data for all mocked nodes', () => {
const mockedNodes = [
{
id: '72256d90-3a67-4e29-b032-47df4e5768af', // 'When clicking Execute workflow'
},
{
id: '319f29bc-1dd4-4122-b223-c584752151a4', // 'Edit Fields'
},
{
id: 'd2474215-63af-40a4-a51e-0ea30d762621', // 'Code'
},
];
const pinData = createPinData(wfUnderTestJson, mockedNodes, executionDataJson);
expect(pinData).toEqual(
expect.objectContaining({
'When clicking Execute workflow': expect.anything(),
'Edit Fields': expect.anything(),
Code: expect.anything(),
}),
);
});
test('should return empty object if no mocked nodes are provided', () => {
const pinData = createPinData(wfUnderTestJson, [], executionDataJson);
expect(pinData).toEqual({});
});
test('should create pin data for all mocked nodes with renamed nodes', () => {
const mockedNodes = [
{
id: '72256d90-3a67-4e29-b032-47df4e5768af', // 'Manual Run'
},
{
id: '319f29bc-1dd4-4122-b223-c584752151a4', // 'Set Attribute'
},
{
id: 'd2474215-63af-40a4-a51e-0ea30d762621', // 'Code'
},
];
const pinData = createPinData(
wfUnderTestRenamedNodesJson,
mockedNodes,
executionDataJson,
wfUnderTestJson, // Pass original workflow JSON as pastWorkflowData
);
expect(pinData).toEqual(
expect.objectContaining({
'Manual Run': expect.anything(),
'Set Attribute': expect.anything(),
Code: expect.anything(),
}),
);
});
});

View File

@@ -1,40 +0,0 @@
import { readFileSync } from 'fs';
import path from 'path';
import { getPastExecutionTriggerNode } from '../utils.ee';
const executionDataJson = JSON.parse(
readFileSync(path.join(__dirname, './mock-data/execution-data.json'), { encoding: 'utf-8' }),
);
const executionDataMultipleTriggersJson = JSON.parse(
readFileSync(path.join(__dirname, './mock-data/execution-data.multiple-triggers.json'), {
encoding: 'utf-8',
}),
);
const executionDataMultipleTriggersJson2 = JSON.parse(
readFileSync(path.join(__dirname, './mock-data/execution-data.multiple-triggers-2.json'), {
encoding: 'utf-8',
}),
);
describe('getPastExecutionStartNode', () => {
test('should return the start node of the past execution', () => {
const startNode = getPastExecutionTriggerNode(executionDataJson);
expect(startNode).toEqual('When clicking Execute workflow');
});
test('should return the start node of the past execution with multiple triggers', () => {
const startNode = getPastExecutionTriggerNode(executionDataMultipleTriggersJson);
expect(startNode).toEqual('When clicking Execute workflow');
});
test('should return the start node of the past execution with multiple triggers - chat trigger', () => {
const startNode = getPastExecutionTriggerNode(executionDataMultipleTriggersJson2);
expect(startNode).toEqual('When chat message received');
});
});

View File

@@ -1,78 +1,126 @@
{
"name": "Workflow Under Test",
"nodes": [
{
"parameters": {},
"type": "n8n-nodes-base.manualTrigger",
"typeVersion": 1,
"position": [-80, 0],
"id": "72256d90-3a67-4e29-b032-47df4e5768af",
"name": "When clicking Execute workflow"
"parameters": {
"documentId": {
"__rl": true,
"value": "mock",
"mode": "list"
},
"sheetName": {
"__rl": true,
"value": "mock",
"mode": "list"
}
},
"type": "n8n-nodes-base.evaluationTrigger",
"typeVersion": 4.6,
"position": [0, 0],
"id": "3c9068ec-4880-4fbe-a1c8-f7a1cb3f13e9",
"name": "When fetching a dataset row",
"credentials": {
"googleSheetsOAuth2Api": {
"id": "mock",
"name": "Google Sheets account"
}
}
},
{
"parameters": {
"assignments": {
"assignments": [
"documentId": {
"__rl": true,
"value": "mock",
"mode": "list"
},
"sheetName": {
"__rl": true,
"value": "mock",
"mode": "list"
},
"outputs": {
"values": [
{
"id": "acfeecbe-443c-4220-b63b-d44d69216902",
"name": "foo",
"value": "bar",
"type": "string"
"outputName": "reply",
"outputValue": "test"
}
]
},
"options": {}
}
},
"type": "n8n-nodes-base.set",
"typeVersion": 3.4,
"position": [140, 0],
"id": "319f29bc-1dd4-4122-b223-c584752151a4",
"name": "Edit Fields"
"type": "n8n-nodes-base.evaluation",
"typeVersion": 4.6,
"position": [440, 0],
"id": "9e0be4fb-faa3-4344-ba80-e18ceb1d22f1",
"name": "Set Outputs",
"credentials": {
"googleSheetsOAuth2Api": {
"id": "mock",
"name": "Google Sheets account"
}
}
},
{
"parameters": {},
"type": "n8n-nodes-base.noOp",
"typeVersion": 1,
"position": [220, 0],
"id": "335047aa-fb77-43a1-8135-873d34e7ccc1",
"name": "No Operation, do nothing"
},
{
"parameters": {
"jsCode": "for (const item of $input.all()) {\n item.json.random = Math.random();\n}\n\nreturn $input.all();"
"operation": "setMetrics",
"metrics": {
"assignments": [
{
"name": "test",
"value": 0,
"type": "number",
"id": "cc598090-09c8-489d-84d5-e7b9ee5576b5"
}
]
}
},
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [380, 0],
"id": "d2474215-63af-40a4-a51e-0ea30d762621",
"name": "Code"
"type": "n8n-nodes-base.evaluation",
"typeVersion": 4.6,
"position": [660, 0],
"id": "7a795bad-08e1-4e5c-bfe4-2c43129b6da5",
"name": "Set Metrics"
}
],
"connections": {
"When clicking Execute workflow": {
"When fetching a dataset row": {
"main": [
[
{
"node": "Edit Fields",
"node": "No Operation, do nothing",
"type": "main",
"index": 0
}
]
]
},
"Edit Fields": {
"Set Outputs": {
"main": [
[
{
"node": "Wait",
"node": "Set Metrics",
"type": "main",
"index": 0
}
]
]
},
"Wait": {
"No Operation, do nothing": {
"main": [
[
{
"node": "Code",
"node": "Set Outputs",
"type": "main",
"index": 0
}
]
]
}
}
},
"pinData": {}
}

View File

@@ -0,0 +1,635 @@
import type { TestRun } from '@n8n/db';
import type { TestCaseExecutionRepository } from '@n8n/db';
import type { TestRunRepository } from '@n8n/db';
import type { WorkflowRepository } from '@n8n/db';
import { readFileSync } from 'fs';
import { mock } from 'jest-mock-extended';
import type { ErrorReporter } from 'n8n-core';
import type { IWorkflowBase } from 'n8n-workflow';
import type { IRun } from 'n8n-workflow';
import path from 'path';
import type { ActiveExecutions } from '@/active-executions';
import { EVALUATION_DATASET_TRIGGER_NODE } from '@/constants';
import { TestRunError } from '@/evaluation.ee/test-runner/errors.ee';
import { LoadNodesAndCredentials } from '@/load-nodes-and-credentials';
import type { Telemetry } from '@/telemetry';
import type { WorkflowRunner } from '@/workflow-runner';
import { mockInstance, mockLogger } from '@test/mocking';
import { mockNodeTypesData } from '@test-integration/utils/node-types-data';
import { TestRunnerService } from '../test-runner.service.ee';
const wfUnderTestJson = JSON.parse(
readFileSync(path.join(__dirname, './mock-data/workflow.under-test.json'), { encoding: 'utf-8' }),
);
const errorReporter = mock<ErrorReporter>();
const logger = mockLogger();
const telemetry = mock<Telemetry>();
describe('TestRunnerService', () => {
const workflowRepository = mock<WorkflowRepository>();
const workflowRunner = mock<WorkflowRunner>();
const activeExecutions = mock<ActiveExecutions>();
const testRunRepository = mock<TestRunRepository>();
const testCaseExecutionRepository = mock<TestCaseExecutionRepository>();
let testRunnerService: TestRunnerService;
mockInstance(LoadNodesAndCredentials, {
loadedNodes: mockNodeTypesData(['manualTrigger', 'set', 'if', 'code', 'evaluation']),
});
beforeEach(() => {
testRunnerService = new TestRunnerService(
logger,
telemetry,
workflowRepository,
workflowRunner,
activeExecutions,
testRunRepository,
testCaseExecutionRepository,
errorReporter,
);
testRunRepository.createTestRun.mockResolvedValue(mock<TestRun>({ id: 'test-run-id' }));
});
afterEach(() => {
jest.resetAllMocks();
});
describe('findTriggerNode', () => {
test('should find the trigger node in a workflow', () => {
// Setup a test workflow with a trigger node
const workflowWithTrigger = mock<IWorkflowBase>({
nodes: [
{
id: 'node1',
name: 'Dataset Trigger',
type: EVALUATION_DATASET_TRIGGER_NODE,
typeVersion: 1,
position: [0, 0],
parameters: {},
},
{
id: 'node2',
name: 'Regular Node',
type: 'n8n-nodes-base.noOp',
typeVersion: 1,
position: [100, 0],
parameters: {},
},
],
connections: {},
});
// Use the protected method via any type casting
const result = (testRunnerService as any).findTriggerNode(workflowWithTrigger);
// Assert the result is the correct node
expect(result).toBeDefined();
expect(result.type).toBe(EVALUATION_DATASET_TRIGGER_NODE);
expect(result.name).toBe('Dataset Trigger');
});
test('should return undefined when no trigger node is found', () => {
// Setup a test workflow without a trigger node
const workflowWithoutTrigger = mock<IWorkflowBase>({
nodes: [
{
id: 'node1',
name: 'Regular Node 1',
type: 'n8n-nodes-base.noOp',
typeVersion: 1,
position: [0, 0],
parameters: {},
},
{
id: 'node2',
name: 'Regular Node 2',
type: 'n8n-nodes-base.set',
typeVersion: 1,
position: [100, 0],
parameters: {},
},
],
connections: {},
});
// Call the function and expect undefined result
const result = (testRunnerService as any).findTriggerNode(workflowWithoutTrigger);
expect(result).toBeUndefined();
});
test('should work with the actual workflow.under-test.json', () => {
const result = (testRunnerService as any).findTriggerNode(wfUnderTestJson);
// Assert the result is the correct node
expect(result).toBeDefined();
expect(result.type).toBe(EVALUATION_DATASET_TRIGGER_NODE);
expect(result.name).toBe('When fetching a dataset row');
});
});
describe('extractDatasetTriggerOutput', () => {
test('should extract trigger output data from execution', () => {
// Create workflow with a trigger node
const workflow = mock<IWorkflowBase>({
nodes: [
{
id: 'triggerNodeId',
name: 'TriggerNode',
type: EVALUATION_DATASET_TRIGGER_NODE,
typeVersion: 1,
position: [0, 0],
parameters: {},
},
],
connections: {},
});
// Create execution data with output for the trigger node
const mockOutputItems = [
{ json: { id: 1, name: 'Test 1' } },
{ json: { id: 2, name: 'Test 2' } },
];
const execution = mock<IRun>({
data: {
resultData: {
runData: {
TriggerNode: [
{
data: {
main: [mockOutputItems],
},
},
],
},
},
},
});
// Call the method
const result = (testRunnerService as any).extractDatasetTriggerOutput(execution, workflow);
// Verify results
expect(result).toEqual(mockOutputItems);
});
test('should throw an error if trigger node output is not present', () => {
// Create workflow with a trigger node
const workflow = mock<IWorkflowBase>({
nodes: [
{
id: 'triggerNodeId',
name: 'TriggerNode',
type: EVALUATION_DATASET_TRIGGER_NODE,
typeVersion: 1,
position: [0, 0],
parameters: {},
},
],
connections: {},
});
// Create execution data with missing output
const execution = mock<IRun>({
data: {
resultData: {
runData: {},
},
},
});
// Expect the method to throw an error
expect(() => {
(testRunnerService as any).extractDatasetTriggerOutput(execution, workflow);
}).toThrow(TestRunError);
// Verify the error has the correct code
try {
(testRunnerService as any).extractDatasetTriggerOutput(execution, workflow);
} catch (error) {
expect(error).toBeInstanceOf(TestRunError);
expect(error.code).toBe('TEST_CASES_NOT_FOUND');
}
});
test('should throw an error if trigger node output is empty list', () => {
// Create workflow with a trigger node
const workflow = mock<IWorkflowBase>({
nodes: [
{
id: 'triggerNodeId',
name: 'TriggerNode',
type: EVALUATION_DATASET_TRIGGER_NODE,
typeVersion: 1,
position: [0, 0],
parameters: {},
},
],
connections: {},
});
// Create execution data with missing output
const execution = mock<IRun>({
data: {
resultData: {
runData: {
TriggerNode: [
{
data: {
main: [[]], // Empty list
},
},
],
},
},
},
});
// Expect the method to throw an error
expect(() => {
(testRunnerService as any).extractDatasetTriggerOutput(execution, workflow);
}).toThrow(TestRunError);
// Verify the error has the correct code
try {
(testRunnerService as any).extractDatasetTriggerOutput(execution, workflow);
} catch (error) {
expect(error).toBeInstanceOf(TestRunError);
expect(error.code).toBe('TEST_CASES_NOT_FOUND');
}
});
test('should work with actual execution data format', () => {
// Create workflow with a trigger node that matches the name in the actual data
const workflow = mock<IWorkflowBase>({
nodes: [
{
id: 'triggerNodeId',
name: "When clicking 'Execute workflow'",
type: EVALUATION_DATASET_TRIGGER_NODE,
typeVersion: 1,
position: [0, 0],
parameters: {},
},
],
connections: {},
});
// Mock execution data similar to actual format
const expectedItems = [
{ json: { query: 'First item' }, pairedItem: { item: 0 } },
{ json: { query: 'Second item' }, pairedItem: { item: 0 } },
{ json: { query: 'Third item' }, pairedItem: { item: 0 } },
];
// TODO: change with actual data
const execution = mock<IRun>({
data: {
resultData: {
runData: {
"When clicking 'Execute workflow'": [
{
data: {
main: [expectedItems],
},
},
],
},
},
},
});
// Call the method
const result = (testRunnerService as any).extractDatasetTriggerOutput(execution, workflow);
// Verify results
expect(result).toEqual(expectedItems);
});
});
describe('runDatasetTrigger', () => {
beforeEach(() => {
// Setup mock execution response
const mockExecutionId = 'mock-execution-id';
const mockExecutionData = mock<IRun>({
data: {
resultData: {
runData: {},
},
},
});
// Setup workflowRunner mock
workflowRunner.run.mockResolvedValue(mockExecutionId);
// Setup activeExecutions mock
activeExecutions.getPostExecutePromise.mockResolvedValue(mockExecutionData);
});
test('should throw an error if trigger node is not found', async () => {
// Create workflow without a trigger node
const workflow = mock<IWorkflowBase>({
nodes: [
{
id: 'node1',
name: 'Regular Node',
type: 'n8n-nodes-base.noOp',
typeVersion: 1,
position: [0, 0],
parameters: {},
},
],
connections: {},
});
const metadata = {
testRunId: 'test-run-id',
userId: 'user-id',
};
// Call the method and expect it to throw an error
await expect(
(testRunnerService as any).runDatasetTrigger(workflow, metadata),
).rejects.toThrow(TestRunError);
// Verify the error has the correct code
try {
await (testRunnerService as any).runDatasetTrigger(workflow, metadata);
} catch (error) {
expect(error).toBeInstanceOf(TestRunError);
expect(error.code).toBe('EVALUATION_TRIGGER_NOT_FOUND');
}
});
test('should call workflowRunner.run with correct data', async () => {
// Create workflow with a trigger node
const triggerNodeName = 'Dataset Trigger';
const workflow = mock<IWorkflowBase>({
nodes: [
{
id: 'node1',
name: triggerNodeName,
type: EVALUATION_DATASET_TRIGGER_NODE,
typeVersion: 1,
position: [0, 0],
parameters: {},
},
],
connections: {},
settings: {
saveDataErrorExecution: 'all',
},
});
const metadata = {
testRunId: 'test-run-id',
userId: 'user-id',
};
// Call the method
await (testRunnerService as any).runDatasetTrigger(workflow, metadata);
// Verify workflowRunner.run was called
expect(workflowRunner.run).toHaveBeenCalledTimes(1);
// Get the argument passed to workflowRunner.run
const runCallArg = workflowRunner.run.mock.calls[0][0];
// Verify it has the correct structure
expect(runCallArg).toHaveProperty('destinationNode', triggerNodeName);
expect(runCallArg).toHaveProperty('executionMode', 'manual');
expect(runCallArg).toHaveProperty('workflowData.settings.saveManualExecutions', false);
expect(runCallArg).toHaveProperty('workflowData.settings.saveDataErrorExecution', 'none');
expect(runCallArg).toHaveProperty('workflowData.settings.saveDataSuccessExecution', 'none');
expect(runCallArg).toHaveProperty('workflowData.settings.saveExecutionProgress', false);
expect(runCallArg).toHaveProperty('userId', metadata.userId);
expect(runCallArg).toHaveProperty('partialExecutionVersion', 2);
// Verify node execution stack contains the requestDataset flag
expect(runCallArg).toHaveProperty('executionData.executionData.nodeExecutionStack');
const nodeExecutionStack = runCallArg.executionData?.executionData?.nodeExecutionStack;
expect(nodeExecutionStack).toBeInstanceOf(Array);
expect(nodeExecutionStack).toHaveLength(1);
expect(nodeExecutionStack?.[0]).toHaveProperty('node.name', triggerNodeName);
expect(nodeExecutionStack?.[0]).toHaveProperty('data.main[0][0].json.requestDataset', true);
});
test('should wait for execution to finish and return result', async () => {
// Create workflow with a trigger node
const triggerNodeName = 'Dataset Trigger';
const workflow = mock<IWorkflowBase>({
nodes: [
{
id: 'node1',
name: triggerNodeName,
type: EVALUATION_DATASET_TRIGGER_NODE,
typeVersion: 1,
position: [0, 0],
parameters: {},
},
],
connections: {},
});
const metadata = {
testRunId: 'test-run-id',
userId: 'user-id',
};
// Setup mock for execution ID and result
const mockExecutionId = 'dataset-execution-id';
const mockExecutionResult = mock<IRun>({
data: {
resultData: {
runData: {
[triggerNodeName]: [
{
data: {
main: [[{ json: { test: 'data1' } }, { json: { test: 'data2' } }]],
},
},
],
},
},
},
});
workflowRunner.run.mockResolvedValue(mockExecutionId);
activeExecutions.getPostExecutePromise.mockResolvedValue(mockExecutionResult);
// Call the method
const result = await (testRunnerService as any).runDatasetTrigger(workflow, metadata);
// Verify the execution was waited for
expect(activeExecutions.getPostExecutePromise).toHaveBeenCalledWith(mockExecutionId);
// Verify the result is correct
expect(result).toEqual(mockExecutionResult);
});
});
describe('runTestCase', () => {
beforeEach(() => {
// Setup mock execution response
const mockExecutionId = 'mock-execution-id';
const mockExecutionData = mock<IRun>({
data: {
resultData: {
runData: {},
},
},
});
// Setup workflowRunner mock
workflowRunner.run.mockResolvedValue(mockExecutionId);
// Setup activeExecutions mock
activeExecutions.getPostExecutePromise.mockResolvedValue(mockExecutionData);
});
test('should return undefined if abortSignal is aborted', async () => {
// Create an aborted signal
const abortController = new AbortController();
abortController.abort();
// Create test data
const workflow = mock<IWorkflowBase>({
nodes: [],
connections: {},
});
const metadata = {
testRunId: 'test-run-id',
userId: 'user-id',
};
const testCase = { json: { id: 1, name: 'Test 1' } };
// Call the method
const result = await (testRunnerService as any).runTestCase(
workflow,
metadata,
testCase,
abortController.signal,
);
// Verify results
expect(result).toBeUndefined();
expect(workflowRunner.run).not.toHaveBeenCalled();
});
test('should call workflowRunner.run with correct data', async () => {
// Setup test data
const triggerNodeName = 'TriggerNode';
const workflow = mock<IWorkflowBase>({
nodes: [
{
id: 'node1',
name: triggerNodeName,
type: EVALUATION_DATASET_TRIGGER_NODE,
typeVersion: 1,
position: [0, 0],
parameters: {},
},
],
connections: {},
});
const metadata = {
testRunId: 'test-run-id',
userId: 'user-id',
};
const testCase = { json: { id: 1, name: 'Test 1' } };
const abortController = new AbortController();
// Call the method
await (testRunnerService as any).runTestCase(
workflow,
metadata,
testCase,
abortController.signal,
);
// Verify workflowRunner.run was called with the correct data
expect(workflowRunner.run).toHaveBeenCalledTimes(1);
const runCallArg = workflowRunner.run.mock.calls[0][0];
// Verify the expected structure
expect(runCallArg).toEqual(
expect.objectContaining({
executionMode: 'evaluation',
pinData: {
[triggerNodeName]: [testCase],
},
workflowData: workflow,
userId: metadata.userId,
partialExecutionVersion: 2,
triggerToStartFrom: {
name: triggerNodeName,
},
}),
);
});
test('should register abort event listener and return execution results', async () => {
// Setup test data
const triggerNodeName = 'TriggerNode';
const workflow = mock<IWorkflowBase>({
nodes: [
{
id: 'node1',
name: triggerNodeName,
type: EVALUATION_DATASET_TRIGGER_NODE,
typeVersion: 1,
position: [0, 0],
parameters: {},
},
],
connections: {},
});
const metadata = {
testRunId: 'test-run-id',
userId: 'user-id',
};
const testCase = { json: { id: 1, name: 'Test 1' } };
const abortController = new AbortController();
// Mock addEventListener on AbortSignal
const mockAddEventListener = jest.fn();
const originalAddEventListener = abortController.signal.addEventListener;
abortController.signal.addEventListener = mockAddEventListener;
try {
// Call the method
const result = await (testRunnerService as any).runTestCase(
workflow,
metadata,
testCase,
abortController.signal,
);
// Verify addEventListener was called
expect(mockAddEventListener).toHaveBeenCalledTimes(1);
expect(mockAddEventListener.mock.calls[0][0]).toBe('abort');
// Verify the expected result structure
expect(result).toHaveProperty('executionData');
expect(result.executionData?.data).toBeDefined();
expect(result).toHaveProperty('executionId');
expect(result.executionId).toEqual(expect.any(String));
} finally {
// Restore original method
abortController.signal.addEventListener = originalAddEventListener;
}
});
});
});

View File

@@ -3,28 +3,43 @@ import { TestCaseExecutionRepository, TestRunRepository, WorkflowRepository } fr
import { Service } from '@n8n/di';
import { ErrorReporter, Logger } from 'n8n-core';
import { ExecutionCancelledError } from 'n8n-workflow';
import type { IRun, IWorkflowBase, IWorkflowExecutionDataProcess } from 'n8n-workflow';
import type {
IDataObject,
IRun,
IWorkflowBase,
IWorkflowExecutionDataProcess,
IExecuteData,
INodeExecutionData,
} from 'n8n-workflow';
import assert from 'node:assert';
import { ActiveExecutions } from '@/active-executions';
import config from '@/config';
import { EVALUATION_METRICS_NODE } from '@/constants';
import { EVALUATION_DATASET_TRIGGER_NODE, EVALUATION_NODE } from '@/constants';
import { TestCaseExecutionError, TestRunError } from '@/evaluation.ee/test-runner/errors.ee';
import { Telemetry } from '@/telemetry';
import { WorkflowRunner } from '@/workflow-runner';
import { EvaluationMetrics } from './evaluation-metrics.ee';
export interface TestRunMetadata {
testRunId: string;
userId: string;
}
export interface TestCaseExecutionResult {
executionData: IRun;
executionId: string;
}
/**
* This service orchestrates the running of test cases.
* It uses the test definitions to find
* past executions, creates pin data from them,
* and runs the workflow-under-test with the pin data.
* After the workflow-under-test finishes, it runs the evaluation workflow
* with the original and new run data, and collects the metrics.
* This service orchestrates the running of evaluations.
* It makes a partial execution of the workflow under test to get the dataset
* by running the evaluation trigger node only and capturing the output.
* Then it iterates over test cases (the items of a list produced by evaluation trigger node)
* and runs the workflow under test with each test case as input.
* After running each test case, it collects the metrics from the evaluation nodes output.
* After all test cases are run, it aggregates the metrics and saves them to the database.
*/
@Service()
export class TestRunnerService {
@@ -42,57 +57,48 @@ export class TestRunnerService {
) {}
/**
* Prepares the start nodes and trigger node data props for the `workflowRunner.run` method input.
* Finds the dataset trigger node in the workflow
*/
private getStartNodesData(
workflow: IWorkflowBase,
): Pick<IWorkflowExecutionDataProcess, 'triggerToStartFrom'> {
// Find the dataset trigger node
// TODO: replace with dataset trigger node
const triggerNode = workflow.nodes.find(
(node) => node.type === 'n8n-nodes-base.executeWorkflowTrigger',
);
if (!triggerNode) {
// TODO: Change error
throw new TestCaseExecutionError('TRIGGER_NO_LONGER_EXISTS');
}
const triggerToStartFrom = {
name: triggerNode.name,
};
return {
triggerToStartFrom,
};
private findTriggerNode(workflow: IWorkflowBase) {
return workflow.nodes.find((node) => node.type === EVALUATION_DATASET_TRIGGER_NODE);
}
/**
* Runs a test case with the given pin data.
* Runs a test case with the given input.
* Injects the input data as pinned data of evaluation trigger node.
* Waits for the workflow under test to finish execution.
*/
private async runTestCase(
workflow: IWorkflowBase,
metadata: TestRunMetadata,
testCase: INodeExecutionData,
abortSignal: AbortSignal,
): Promise<IRun | undefined> {
): Promise<TestCaseExecutionResult | undefined> {
// Do not run if the test run is cancelled
if (abortSignal.aborted) {
return;
}
const startNodesData = this.getStartNodesData(workflow);
// Prepare the data to run the workflow
// Evaluation executions should run the same way as manual,
// because they need pinned data and partial execution logic
const triggerNode = this.findTriggerNode(workflow);
assert(triggerNode);
const pinData = {
[triggerNode.name]: [testCase],
};
const data: IWorkflowExecutionDataProcess = {
...startNodesData,
executionMode: 'evaluation',
runData: {},
// pinData,
pinData,
workflowData: workflow,
userId: metadata.userId,
partialExecutionVersion: 2,
triggerToStartFrom: {
name: triggerNode.name,
},
};
// When in queue mode, we need to pass additional data to the execution
@@ -109,7 +115,9 @@ export class TestRunnerService {
manualData: {
userId: metadata.userId,
partialExecutionVersion: 2,
triggerToStartFrom: startNodesData.triggerToStartFrom,
triggerToStartFrom: {
name: triggerNode.name,
},
},
};
}
@@ -123,7 +131,86 @@ export class TestRunnerService {
this.activeExecutions.stopExecution(executionId);
});
// TODO: Update status of the test run execution
// Wait for the execution to finish
const executionData = await this.activeExecutions.getPostExecutePromise(executionId);
assert(executionData);
return { executionId, executionData };
}
/**
* This method creates a partial workflow execution to run the dataset trigger only
* to get the whole dataset.
*/
private async runDatasetTrigger(workflow: IWorkflowBase, metadata: TestRunMetadata) {
// Prepare the data to run the workflow
// Evaluation executions should run the same way as manual,
// because they need pinned data and partial execution logic
const triggerNode = this.findTriggerNode(workflow);
if (!triggerNode) {
throw new TestRunError('EVALUATION_TRIGGER_NOT_FOUND');
}
// Initialize the input data for dataset trigger
// Provide a flag indicating that we want to get the whole dataset
const nodeExecutionStack: IExecuteData[] = [];
nodeExecutionStack.push({
node: triggerNode,
data: {
main: [[{ json: { requestDataset: true } }]],
},
source: null,
});
const data: IWorkflowExecutionDataProcess = {
destinationNode: triggerNode.name,
executionMode: 'manual',
runData: {},
workflowData: {
...workflow,
settings: {
...workflow.settings,
saveManualExecutions: false,
saveDataErrorExecution: 'none',
saveDataSuccessExecution: 'none',
saveExecutionProgress: false,
},
},
userId: metadata.userId,
partialExecutionVersion: 2,
executionData: {
startData: {
destinationNode: triggerNode.name,
},
resultData: {
runData: {},
},
executionData: {
contextData: {},
metadata: {},
nodeExecutionStack,
waitingExecution: {},
waitingExecutionSource: {},
},
manualData: {
userId: metadata.userId,
partialExecutionVersion: 2,
triggerToStartFrom: {
name: triggerNode.name,
},
},
},
triggerToStartFrom: {
name: triggerNode.name,
},
};
// Trigger the workflow under test with mocked data
const executionId = await this.workflowRunner.run(data);
assert(executionId);
// Wait for the execution to finish
const executePromise = this.activeExecutions.getPostExecutePromise(executionId);
@@ -135,11 +222,47 @@ export class TestRunnerService {
* Get the evaluation metrics nodes from a workflow.
*/
static getEvaluationMetricsNodes(workflow: IWorkflowBase) {
return workflow.nodes.filter((node) => node.type === EVALUATION_METRICS_NODE);
return workflow.nodes.filter(
(node) => node.type === EVALUATION_NODE && node.parameters.operation === 'setMetrics',
);
}
/**
* Creates a new test run for the given test definition.
* Extract the dataset trigger output
*/
private extractDatasetTriggerOutput(execution: IRun, workflow: IWorkflowBase) {
const triggerNode = this.findTriggerNode(workflow);
assert(triggerNode);
const triggerOutputData = execution.data.resultData.runData[triggerNode.name][0];
const triggerOutput = triggerOutputData?.data?.main?.[0];
if (!triggerOutput || triggerOutput.length === 0) {
throw new TestRunError('TEST_CASES_NOT_FOUND');
}
return triggerOutput;
}
/**
* Evaluation result is collected from all Evaluation Metrics nodes
*/
private extractEvaluationResult(execution: IRun, workflow: IWorkflowBase): IDataObject {
// TODO: Do not fail if not all metric nodes were executed
const metricsNodes = TestRunnerService.getEvaluationMetricsNodes(workflow);
const metricsRunData = metricsNodes.flatMap(
(node) => execution.data.resultData.runData[node.name],
);
const metricsData = metricsRunData
.reverse()
.map((data) => data.data?.main?.[0]?.[0]?.json ?? {});
const metricsResult = metricsData.reduce((acc, curr) => ({ ...acc, ...curr }), {});
return metricsResult;
}
/**
* Creates a new test run for the given workflow
*/
async runTest(user: User, workflowId: string): Promise<void> {
this.logger.debug('Starting new test run', { workflowId });
@@ -148,7 +271,6 @@ export class TestRunnerService {
assert(workflow, 'Workflow not found');
// 0. Create new Test Run
// TODO: Check that createTestRun takes workflowId as an argument
const testRun = await this.testRunRepository.createTestRun(workflowId);
assert(testRun, 'Unable to create a test run');
@@ -169,36 +291,8 @@ export class TestRunnerService {
const { manager: dbManager } = this.testRunRepository;
try {
///
// 1. Make test cases list
///
// TODO: Get the test cases from the dataset trigger node
const testCases = [{ id: 1 }];
this.logger.debug('Found test cases', { count: testCases.length });
if (testCases.length === 0) {
// TODO: Change error
throw new TestRunError('PAST_EXECUTIONS_NOT_FOUND');
}
// Add all past executions mappings to the test run.
// This will be used to track the status of each test case and keep the connection between test run and all related executions (past, current, and evaluation).
// await this.testCaseExecutionRepository.createBatch(
// testRun.id,
// testCases.map((e) => e.id),
// );
// TODO: Collect metric names from evaluation nodes of the workflow
// const testMetricNames = new Set<string>();
// 2. Run over all the test cases
// const pastExecutionIds = pastExecutions.map((e) => e.id);
// Update test run status
// TODO: mark test run as running
// await this.testRunRepository.markAsRunning(testRun.id);
await this.testRunRepository.markAsRunning(testRun.id);
this.telemetry.track('User ran test', {
user_id: user.id,
@@ -206,23 +300,39 @@ export class TestRunnerService {
workflow_id: workflowId,
});
///
// 1. Make test cases list
///
const datasetFetchExecution = await this.runDatasetTrigger(workflow, testRunMetadata);
assert(datasetFetchExecution);
const datasetTriggerOutput = this.extractDatasetTriggerOutput(
datasetFetchExecution,
workflow,
);
const testCases = datasetTriggerOutput.map((items) => ({ json: items.json }));
this.logger.debug('Found test cases', { count: testCases.length });
// Initialize object to collect the results of the evaluation workflow executions
// const metrics = new EvaluationMetrics();
const metrics = new EvaluationMetrics();
///
// 2. Run over all the test cases
///
for (const _testCase of testCases) {
for (const testCase of testCases) {
if (abortSignal.aborted) {
this.logger.debug('Test run was cancelled', {
workflowId,
// stoppedOn: pastExecutionId,
});
break;
}
this.logger.debug('Running test case');
const runAt = new Date();
try {
const testCaseMetadata = {
@@ -230,22 +340,58 @@ export class TestRunnerService {
};
// Run the test case and wait for it to finish
const testCaseExecution = await this.runTestCase(workflow, testCaseMetadata, abortSignal);
const testCaseResult = await this.runTestCase(
workflow,
testCaseMetadata,
testCase,
abortSignal,
);
assert(testCaseResult);
const { executionId: testCaseExecutionId, executionData: testCaseExecution } =
testCaseResult;
assert(testCaseExecution);
assert(testCaseExecutionId);
this.logger.debug('Test case execution finished');
// In case of a permission check issue, the test case execution will be undefined.
// If that happens, or if the test case execution produced an error, mark the test case as failed.
if (!testCaseExecution || testCaseExecution.data.resultData.error) {
// TODO: add failed test case execution to DB
// Save the failed test case execution in DB
await this.testCaseExecutionRepository.createTestCaseExecution({
executionId: testCaseExecutionId,
testRun: {
id: testRun.id,
},
status: 'error',
errorCode: 'FAILED_TO_EXECUTE_WORKFLOW',
metrics: {},
});
continue;
}
const completedAt = new Date();
// TODO: extract metrics
const { addedMetrics } = metrics.addResults(
this.extractEvaluationResult(testCaseExecution, workflow),
);
this.logger.debug('Test case metrics extracted', addedMetrics);
// Create a new test case execution in DB
// TODO: add successful test case execution to DB
await this.testCaseExecutionRepository.createTestCaseExecution({
executionId: testCaseExecutionId,
testRun: {
id: testRun.id,
},
runAt,
completedAt,
status: 'success',
metrics: addedMetrics,
});
} catch (e) {
const completedAt = new Date();
// FIXME: this is a temporary log
this.logger.error('Test case execution failed', {
workflowId,
@@ -255,9 +401,26 @@ export class TestRunnerService {
// In case of an unexpected error save it as failed test case execution and continue with the next test case
if (e instanceof TestCaseExecutionError) {
// TODO: add failed test case execution to DB
await this.testCaseExecutionRepository.createTestCaseExecution({
testRun: {
id: testRun.id,
},
runAt,
completedAt,
status: 'error',
errorCode: e.code,
errorDetails: e.extra as IDataObject,
});
} else {
// TODO: add failed test case execution to DB
await this.testCaseExecutionRepository.createTestCaseExecution({
testRun: {
id: testRun.id,
},
runAt,
completedAt,
status: 'error',
errorCode: 'UNKNOWN_ERROR',
});
// Report unexpected errors
this.errorReporter.error(e);
@@ -268,16 +431,17 @@ export class TestRunnerService {
// Mark the test run as completed or cancelled
if (abortSignal.aborted) {
await dbManager.transaction(async (trx) => {
// TODO: mark test run as cancelled
// await this.testRunRepository.markAsCancelled(testRun.id, trx);
await this.testRunRepository.markAsCancelled(testRun.id, trx);
await this.testCaseExecutionRepository.markAllPendingAsCancelled(testRun.id, trx);
testRunEndStatusForTelemetry = 'cancelled';
});
} else {
// const aggregatedMetrics = metrics.getAggregatedMetrics();
const aggregatedMetrics = metrics.getAggregatedMetrics();
// TODO: mark test run as completed in DB and save metrics
this.logger.debug('Aggregated metrics', aggregatedMetrics);
await this.testRunRepository.markAsCompleted(testRun.id, aggregatedMetrics);
this.logger.debug('Test run finished', { workflowId, testRunId: testRun.id });
@@ -291,16 +455,16 @@ export class TestRunnerService {
});
await dbManager.transaction(async (trx) => {
// TODO: mark test run as cancelled in DB
await this.testRunRepository.markAsCancelled(testRun.id, trx);
await this.testCaseExecutionRepository.markAllPendingAsCancelled(testRun.id, trx);
});
testRunEndStatusForTelemetry = 'cancelled';
} else if (e instanceof TestRunError) {
// TODO: mark test run as error
await this.testRunRepository.markAsError(testRun.id, e.code, e.extra as IDataObject);
testRunEndStatusForTelemetry = 'error';
} else {
// TODO: mark test run as error
await this.testRunRepository.markAsError(testRun.id, 'UNKNOWN_ERROR');
testRunEndStatusForTelemetry = 'error';
throw e;
}
@@ -338,7 +502,7 @@ export class TestRunnerService {
// If there is no abort controller - just mark the test run and all its' pending test case executions as cancelled
await dbManager.transaction(async (trx) => {
// TODO: mark test run as cancelled in DB
await this.testRunRepository.markAsCancelled(testRunId, trx);
await this.testCaseExecutionRepository.markAllPendingAsCancelled(testRunId, trx);
});
}

View File

@@ -1,67 +0,0 @@
import assert from 'assert';
import type { IRunExecutionData, IPinData, IWorkflowBase } from 'n8n-workflow';
import { TestCaseExecutionError } from '@/evaluation.ee/test-runner/errors.ee';
// Entity representing a node in a workflow under test, for which data should be mocked during test execution
export type MockedNodeItem = {
name?: string;
id: string;
};
/**
* Extracts the execution data from the past execution
* and creates a pin data object from it for the given workflow.
* It uses a list of mocked nodes defined in a test definition
* to decide which nodes to pin.
*/
export function createPinData(
workflow: IWorkflowBase,
mockedNodes: MockedNodeItem[],
executionData: IRunExecutionData,
pastWorkflowData?: IWorkflowBase,
) {
const pinData = {} as IPinData;
const workflowNodeIds = new Map(workflow.nodes.map((node) => [node.id, node.name]));
// If the past workflow data is provided, use it to create a map between node IDs and node names
const pastWorkflowNodeIds = new Map<string, string>();
if (pastWorkflowData) {
for (const node of pastWorkflowData.nodes) {
pastWorkflowNodeIds.set(node.id, node.name);
}
}
for (const mockedNode of mockedNodes) {
assert(mockedNode.id, 'Mocked node ID is missing');
const nodeName = workflowNodeIds.get(mockedNode.id);
// If mocked node is still present in the workflow
if (nodeName) {
// Try to restore node name from past execution data (it might have been renamed between past execution and up-to-date workflow)
const pastNodeName = pastWorkflowNodeIds.get(mockedNode.id) ?? nodeName;
const nodeData = executionData.resultData.runData[pastNodeName];
if (nodeData?.[0]?.data?.main?.[0]) {
pinData[nodeName] = nodeData[0]?.data?.main?.[0];
} else {
throw new TestCaseExecutionError('MOCKED_NODE_DOES_NOT_EXIST');
}
}
}
return pinData;
}
/**
* Returns the trigger node of the past execution.
* The trigger node is the node that has no source and has run data.
*/
export function getPastExecutionTriggerNode(executionData: IRunExecutionData) {
return Object.keys(executionData.resultData.runData).find((nodeName) => {
const data = executionData.resultData.runData[nodeName];
return !data[0].source || data[0].source.length === 0 || data[0].source[0] === null;
});
}

View File

@@ -65,7 +65,7 @@ export class TestRunsController {
}
}
@Get('/:workflowId/test-runs/:id/cases')
@Get('/:workflowId/test-runs/:id/test-cases')
async getTestCases(req: TestRunsRequest.GetCases) {
await this.getTestRun(req.params.id, req.params.workflowId, req.user);

View File

@@ -6,6 +6,7 @@ import { mockInstance } from 'n8n-core/test/utils';
import type { IWorkflowBase } from 'n8n-workflow';
import { TestRunnerService } from '@/evaluation.ee/test-runner/test-runner.service.ee';
import { createTestRun, createTestCaseExecution } from '@test-integration/db/evaluation';
import { createUserShell } from '@test-integration/db/users';
import { createWorkflow } from '@test-integration/db/workflows';
import * as testDb from '@test-integration/test-db';
@@ -18,6 +19,7 @@ let otherWorkflow: IWorkflowBase;
let ownerShell: User;
const testRunner = mockInstance(TestRunnerService);
let testRunRepository: TestRunRepository;
const testServer = utils.setupTestServer({
endpointGroups: ['workflows', 'evaluation'],
@@ -30,7 +32,9 @@ beforeAll(async () => {
});
beforeEach(async () => {
await testDb.truncate(['TestRun', 'WorkflowEntity', 'SharedWorkflow']);
await testDb.truncate(['TestRun', 'TestCaseExecution', 'WorkflowEntity', 'SharedWorkflow']);
testRunRepository = Container.get(TestRunRepository);
workflowUnderTest = await createWorkflow({ name: 'workflow-under-test' }, ownerShell);
otherWorkflow = await createWorkflow({ name: 'other-workflow' });
@@ -51,17 +55,15 @@ describe('GET /workflows/:workflowId/test-runs', () => {
// expect(resp.statusCode).toBe(404);
// });
// TODO: replace with workflow that is not accessible to the user
// test('should return 404 if user does not have access to test definition', async () => {
// const resp = await authOwnerAgent.get(
// `/evaluation/test-definitions/${otherTestDefinition.id}/runs`,
// );
//
// expect(resp.statusCode).toBe(404);
// });
test('should return 404 if user does not have access to workflow', async () => {
const testRun = await testRunRepository.createTestRun(otherWorkflow.id);
test('should retrieve list of runs for a workflow', async () => {
const testRunRepository = Container.get(TestRunRepository);
const resp = await authOwnerAgent.get(`/workflows/${otherWorkflow.id}/test-runs/${testRun.id}`);
expect(resp.statusCode).toBe(404);
});
test('should retrieve list of test runs for a workflow', async () => {
const testRun = await testRunRepository.createTestRun(workflowUnderTest.id);
const resp = await authOwnerAgent.get(`/workflows/${workflowUnderTest.id}/test-runs`);
@@ -78,7 +80,6 @@ describe('GET /workflows/:workflowId/test-runs', () => {
});
test('should retrieve list of test runs for a workflow with pagination', async () => {
const testRunRepository = Container.get(TestRunRepository);
const testRun1 = await testRunRepository.createTestRun(workflowUnderTest.id);
// Mark as running just to make a slight delay between the runs
await testRunRepository.markAsRunning(testRun1.id);
@@ -112,11 +113,34 @@ describe('GET /workflows/:workflowId/test-runs', () => {
}),
]);
});
test('should retrieve list of test runs for a shared workflow', async () => {
const memberShell = await createUserShell('global:member');
const memberAgent = testServer.authAgentFor(memberShell);
const memberPersonalProject = await Container.get(
ProjectRepository,
).getPersonalProjectForUserOrFail(memberShell.id);
// Share workflow with a member
const sharingResponse = await authOwnerAgent
.put(`/workflows/${workflowUnderTest.id}/share`)
.send({ shareWithIds: [memberPersonalProject.id] });
expect(sharingResponse.statusCode).toBe(200);
// Create a test run for the shared workflow
await testRunRepository.createTestRun(workflowUnderTest.id);
// Check if member can retrieve the test runs of a shared workflow
const resp = await memberAgent.get(`/workflows/${workflowUnderTest.id}/test-runs`);
expect(resp.statusCode).toBe(200);
expect(resp.body.data).toHaveLength(1);
});
});
describe('GET /workflows/:workflowId/test-runs/:id', () => {
test('should retrieve specific test run for a workflow', async () => {
const testRunRepository = Container.get(TestRunRepository);
const testRun = await testRunRepository.createTestRun(workflowUnderTest.id);
const resp = await authOwnerAgent.get(
@@ -141,7 +165,6 @@ describe('GET /workflows/:workflowId/test-runs/:id', () => {
});
test('should return 404 if user does not have access to the workflow', async () => {
const testRunRepository = Container.get(TestRunRepository);
const testRun = await testRunRepository.createTestRun(otherWorkflow.id);
const resp = await authOwnerAgent.get(`/workflows/${otherWorkflow.id}/test-runs/${testRun.id}`);
@@ -164,7 +187,6 @@ describe('GET /workflows/:workflowId/test-runs/:id', () => {
expect(sharingResponse.statusCode).toBe(200);
// Create a test run for the shared workflow
const testRunRepository = Container.get(TestRunRepository);
const testRun = await testRunRepository.createTestRun(workflowUnderTest.id);
// Check if member can retrieve the test run of a shared workflow
@@ -181,9 +203,8 @@ describe('GET /workflows/:workflowId/test-runs/:id', () => {
});
});
describe('DELETE /evaluation/test-definitions/:testDefinitionId/runs/:id', () => {
test('should delete test run for a test definition', async () => {
const testRunRepository = Container.get(TestRunRepository);
describe('DELETE /workflows/:workflowId/test-runs/:id', () => {
test('should delete test run of a workflow', async () => {
const testRun = await testRunRepository.createTestRun(workflowUnderTest.id);
const resp = await authOwnerAgent.delete(
@@ -203,8 +224,7 @@ describe('DELETE /evaluation/test-definitions/:testDefinitionId/runs/:id', () =>
expect(resp.statusCode).toBe(404);
});
test('should return 404 if user does not have access to test definition', async () => {
const testRunRepository = Container.get(TestRunRepository);
test('should return 404 if user does not have access to workflow', async () => {
const testRun = await testRunRepository.createTestRun(otherWorkflow.id);
const resp = await authOwnerAgent.delete(
@@ -215,9 +235,8 @@ describe('DELETE /evaluation/test-definitions/:testDefinitionId/runs/:id', () =>
});
});
describe('POST /evaluation/test-definitions/:testDefinitionId/runs/:id/cancel', () => {
describe('POST /workflows/:workflowId/test-runs/:id/cancel', () => {
test('should cancel test run', async () => {
const testRunRepository = Container.get(TestRunRepository);
const testRun = await testRunRepository.createTestRun(workflowUnderTest.id);
jest.spyOn(testRunRepository, 'markAsCancelled');
@@ -247,7 +266,6 @@ describe('POST /evaluation/test-definitions/:testDefinitionId/runs/:id/cancel',
});
test('should return 404 if user does not have access to the workflow', async () => {
const testRunRepository = Container.get(TestRunRepository);
const testRun = await testRunRepository.createTestRun(otherWorkflow.id);
const resp = await authOwnerAgent.post(
@@ -257,3 +275,111 @@ describe('POST /evaluation/test-definitions/:testDefinitionId/runs/:id/cancel',
expect(resp.statusCode).toBe(404);
});
});
describe('GET /workflows/:workflowId/test-runs/:id/test-cases', () => {
test('should retrieve test cases for a specific test run', async () => {
// Create a test run
const testRun = await createTestRun(workflowUnderTest.id);
// Create some test case executions for this test run
await createTestCaseExecution(testRun.id, {
status: 'success',
runAt: new Date(),
completedAt: new Date(),
metrics: { accuracy: 0.95 },
});
await createTestCaseExecution(testRun.id, {
status: 'error',
errorCode: 'UNKNOWN_ERROR',
runAt: new Date(),
completedAt: new Date(),
});
const resp = await authOwnerAgent.get(
`/workflows/${workflowUnderTest.id}/test-runs/${testRun.id}/test-cases`,
);
expect(resp.statusCode).toBe(200);
expect(resp.body.data).toHaveLength(2);
expect(resp.body.data).toEqual(
expect.arrayContaining([
expect.objectContaining({
status: 'success',
metrics: { accuracy: 0.95 },
}),
expect.objectContaining({
status: 'error',
errorCode: 'UNKNOWN_ERROR',
}),
]),
);
});
test('should return empty array when no test cases exist for a test run', async () => {
// Create a test run with no test cases
const testRun = await createTestRun(workflowUnderTest.id);
const resp = await authOwnerAgent.get(
`/workflows/${workflowUnderTest.id}/test-runs/${testRun.id}/test-cases`,
);
expect(resp.statusCode).toBe(200);
expect(resp.body.data).toEqual([]);
});
test('should return 404 if test run does not exist', async () => {
const resp = await authOwnerAgent.get(
`/workflows/${workflowUnderTest.id}/test-runs/non-existent-id/test-cases`,
);
expect(resp.statusCode).toBe(404);
});
test('should return 404 if user does not have access to the workflow', async () => {
const testRun = await createTestRun(otherWorkflow.id);
const resp = await authOwnerAgent.get(
`/workflows/${otherWorkflow.id}/test-runs/${testRun.id}/test-cases`,
);
expect(resp.statusCode).toBe(404);
});
test('should return test cases for a shared workflow', async () => {
const memberShell = await createUserShell('global:member');
const memberAgent = testServer.authAgentFor(memberShell);
const memberPersonalProject = await Container.get(
ProjectRepository,
).getPersonalProjectForUserOrFail(memberShell.id);
// Share workflow with a member
const sharingResponse = await authOwnerAgent
.put(`/workflows/${workflowUnderTest.id}/share`)
.send({ shareWithIds: [memberPersonalProject.id] });
expect(sharingResponse.statusCode).toBe(200);
// Create a test run with test cases
const testRun = await createTestRun(workflowUnderTest.id);
await createTestCaseExecution(testRun.id, {
status: 'success',
runAt: new Date(),
completedAt: new Date(),
metrics: { precision: 0.87 },
});
// Check if member can retrieve the test cases of a shared workflow
const resp = await memberAgent.get(
`/workflows/${workflowUnderTest.id}/test-runs/${testRun.id}/test-cases`,
);
expect(resp.statusCode).toBe(200);
expect(resp.body.data).toHaveLength(1);
expect(resp.body.data[0]).toMatchObject({
status: 'success',
metrics: { precision: 0.87 },
});
});
});

View File

@@ -0,0 +1,68 @@
import { TestRunRepository, TestCaseExecutionRepository } from '@n8n/db';
import type { TestRun } from '@n8n/db';
import type { TestCaseExecution } from '@n8n/db';
import type { AggregatedTestRunMetrics } from '@n8n/db';
import type { TestCaseExecutionErrorCode, TestRunErrorCode } from '@n8n/db';
import { Container } from '@n8n/di';
import type { IDataObject } from 'n8n-workflow';
/**
* Creates a test run for a workflow
*/
export const createTestRun = async (
workflowId: string,
options: {
status?: TestRun['status'];
runAt?: Date | null;
completedAt?: Date | null;
metrics?: AggregatedTestRunMetrics;
errorCode?: TestRunErrorCode;
errorDetails?: IDataObject;
} = {},
) => {
const testRunRepository = Container.get(TestRunRepository);
const testRun = testRunRepository.create({
workflow: { id: workflowId },
status: options.status ?? 'new',
runAt: options.runAt ?? null,
completedAt: options.completedAt ?? null,
metrics: options.metrics ?? {},
errorCode: options.errorCode,
errorDetails: options.errorDetails,
});
return await testRunRepository.save(testRun);
};
/**
* Creates a test case execution for a test run
*/
export const createTestCaseExecution = async (
testRunId: string,
options: {
status?: TestCaseExecution['status'];
runAt?: Date | null;
completedAt?: Date | null;
metrics?: Record<string, number>;
errorCode?: TestCaseExecutionErrorCode;
errorDetails?: IDataObject;
executionId?: string;
pastExecutionId?: string;
} = {},
) => {
const testCaseExecutionRepository = Container.get(TestCaseExecutionRepository);
const testCaseExecution = testCaseExecutionRepository.create({
testRun: { id: testRunId },
status: options.status ?? 'success',
runAt: options.runAt ?? null,
completedAt: options.completedAt ?? null,
metrics: options.metrics ?? {},
errorCode: options.errorCode,
errorDetails: options.errorDetails,
executionId: options.executionId,
});
return await testCaseExecutionRepository.save(testCaseExecution);
};

View File

@@ -0,0 +1,66 @@
import { TestRunRepository } from '@n8n/db';
import type { IWorkflowDb, WorkflowEntity } from '@n8n/db';
import { Container } from '@n8n/di';
import { createTestCaseExecution, createTestRun } from '@test-integration/db/evaluation';
import { createWorkflow } from './shared/db/workflows';
import * as testDb from './shared/test-db';
describe('TestRunRepository', () => {
let testRunRepository: TestRunRepository;
beforeAll(async () => {
await testDb.init();
testRunRepository = Container.get(TestRunRepository);
});
afterEach(async () => {
await testDb.truncate(['User', 'WorkflowEntity', 'TestRun', 'TestCaseExecution']);
});
afterAll(async () => {
await testDb.terminate();
});
describe('getTestRunSummaryById', () => {
let workflow: IWorkflowDb & WorkflowEntity;
beforeAll(async () => {
workflow = await createWorkflow();
});
it('should return the final result of a test run', async () => {
const testRun = await createTestRun(workflow.id, {
status: 'completed',
runAt: new Date(),
completedAt: new Date(),
metrics: { total: 1, success: 1 },
});
await Promise.all([
createTestCaseExecution(testRun.id, {
status: 'success',
}),
createTestCaseExecution(testRun.id, {
status: 'success',
}),
]);
const result = await testRunRepository.getTestRunSummaryById(testRun.id);
expect(result).toEqual(
expect.objectContaining({
id: testRun.id,
workflowId: workflow.id,
status: 'completed',
finalResult: 'success',
runAt: expect.any(Date),
completedAt: expect.any(Date),
metrics: { total: 1, success: 1 },
}),
);
});
});
});

View File

@@ -1,14 +0,0 @@
{
"node": "n8n-nodes-base.evaluationMetrics",
"nodeVersion": "1.0",
"codexVersion": "1.0",
"categories": ["Evaluation", "Core Nodes"],
"resources": {
"primaryDocumentation": [
{
"url": "https://docs.n8n.io/integrations/builtin/core-nodes/n8n-nodes-base.evaluationmetrics/"
}
]
},
"alias": ["Metric"]
}

View File

@@ -1,109 +0,0 @@
import type {
AssignmentCollectionValue,
FieldType,
IExecuteFunctions,
INodeExecutionData,
INodeType,
INodeTypeDescription,
} from 'n8n-workflow';
import { NodeConnectionTypes, NodeOperationError } from 'n8n-workflow';
import { composeReturnItem, validateEntry } from '../Set/v2/helpers/utils';
export class EvaluationMetrics implements INodeType {
description: INodeTypeDescription = {
displayName: 'Evaluation Metrics',
name: 'evaluationMetrics',
icon: 'fa:check-double',
group: ['input'],
iconColor: 'light-green',
version: 1,
description: 'Define the metrics returned for workflow evaluation',
defaults: {
name: 'Evaluation Metrics',
color: '#29A568',
},
inputs: [NodeConnectionTypes.Main],
outputs: [NodeConnectionTypes.Main],
properties: [
{
displayName:
"Define the evaluation metrics returned in your report. Only numeric values are supported. <a href='https://docs.n8n.io/integrations/builtin/core-nodes/n8n-nodes-base.evaluationmetric/' target='_blank'>More Info</a>",
name: 'notice',
type: 'notice',
default: '',
},
{
displayName: 'Metrics to Return',
name: 'metrics',
type: 'assignmentCollection',
default: {
assignments: [
{
name: '',
value: '',
type: 'number',
},
],
},
typeOptions: {
assignment: {
disableType: true,
defaultType: 'number',
},
},
},
],
};
async execute(this: IExecuteFunctions): Promise<INodeExecutionData[][]> {
const items = this.getInputData();
const metrics: INodeExecutionData[] = [];
for (let i = 0; i < items.length; i++) {
const dataToSave = this.getNodeParameter('metrics', i, {}) as AssignmentCollectionValue;
const newItem: INodeExecutionData = {
json: {},
pairedItem: { item: i },
};
const newData = Object.fromEntries(
(dataToSave?.assignments ?? []).map((assignment) => {
const assignmentValue =
typeof assignment.value === 'number' ? assignment.value : Number(assignment.value);
if (isNaN(assignmentValue)) {
throw new NodeOperationError(
this.getNode(),
`Invalid numeric value: "${assignment.value}". Please provide a valid number.`,
);
}
const { name, value } = validateEntry(
assignment.name,
assignment.type as FieldType,
assignmentValue,
this.getNode(),
i,
false,
1,
);
return [name, value];
}),
);
const returnItem = composeReturnItem.call(
this,
i,
newItem,
newData,
{ dotNotation: false, include: 'none' },
1,
);
metrics.push(returnItem);
}
return [metrics];
}
}

View File

@@ -1,111 +0,0 @@
import { mock } from 'jest-mock-extended';
import type { INodeTypes, IExecuteFunctions, AssignmentCollectionValue } from 'n8n-workflow';
import { NodeOperationError } from 'n8n-workflow';
import { EvaluationMetrics } from '../EvaluationMetrics.node';
describe('EvaluationMetrics Node', () => {
const nodeTypes = mock<INodeTypes>();
const evaluationMetricsNode = new EvaluationMetrics();
let mockExecuteFunction: IExecuteFunctions;
function getMockExecuteFunction(metrics: AssignmentCollectionValue['assignments']) {
return {
getInputData: jest.fn().mockReturnValue([{}]),
getNodeParameter: jest.fn().mockReturnValueOnce({
assignments: metrics,
}),
getNode: jest.fn().mockReturnValue({
typeVersion: 1,
}),
} as unknown as IExecuteFunctions;
}
beforeAll(() => {
mockExecuteFunction = getMockExecuteFunction([
{
id: '1',
name: 'Accuracy',
value: 0.95,
type: 'number',
},
{
id: '2',
name: 'Latency',
value: 100,
type: 'number',
},
]);
nodeTypes.getByName.mockReturnValue(evaluationMetricsNode);
jest.clearAllMocks();
});
describe('execute', () => {
it('should output the defined metrics', async () => {
const result = await evaluationMetricsNode.execute.call(mockExecuteFunction);
expect(result).toHaveLength(1);
expect(result[0]).toHaveLength(1);
const outputItem = result[0][0].json;
expect(outputItem).toEqual({
Accuracy: 0.95,
Latency: 100,
});
});
it('should handle no metrics defined', async () => {
const result = await evaluationMetricsNode.execute.call(mockExecuteFunction);
expect(result).toHaveLength(1);
expect(result[0]).toHaveLength(1);
expect(result[0][0].json).toEqual({});
});
it('should convert string values to numbers', async () => {
const mockExecuteWithStringValues = getMockExecuteFunction([
{
id: '1',
name: 'Accuracy',
value: '0.95',
type: 'number',
},
{
id: '2',
name: 'Latency',
value: '100',
type: 'number',
},
]);
const result = await evaluationMetricsNode.execute.call(mockExecuteWithStringValues);
expect(result).toHaveLength(1);
expect(result[0]).toHaveLength(1);
const outputItem = result[0][0].json;
expect(outputItem).toEqual({
Accuracy: 0.95,
Latency: 100,
});
});
it('should throw error for non-numeric string values', async () => {
const mockExecuteWithInvalidValue = getMockExecuteFunction([
{
id: '1',
name: 'Accuracy',
value: 'not-a-number',
type: 'number',
},
]);
await expect(evaluationMetricsNode.execute.call(mockExecuteWithInvalidValue)).rejects.toThrow(
NodeOperationError,
);
});
});
});

View File

@@ -514,7 +514,6 @@
"dist/nodes/ExecuteWorkflow/ExecuteWorkflow/ExecuteWorkflow.node.js",
"dist/nodes/ExecuteWorkflow/ExecuteWorkflowTrigger/ExecuteWorkflowTrigger.node.js",
"dist/nodes/ExecutionData/ExecutionData.node.js",
"dist/nodes/EvaluationMetrics/EvaluationMetrics.node.js",
"dist/nodes/Facebook/FacebookGraphApi.node.js",
"dist/nodes/Facebook/FacebookTrigger.node.js",
"dist/nodes/FacebookLeadAds/FacebookLeadAdsTrigger.node.js",