feat: Track inputs and outputs in Evaluations (#17404)

This commit is contained in:
Mutasem Aldmour
2025-07-24 18:58:37 +02:00
committed by GitHub
parent 3f2e43e919
commit c18fabb419
20 changed files with 1431 additions and 60 deletions

View File

@@ -8,6 +8,58 @@ import {
} from './CannedMetricPrompts.ee';
import { document, sheet } from '../../Google/Sheet/GoogleSheetsTrigger.node';
export const setInputsProperties: INodeProperties[] = [
{
displayName:
'For adding columns from your dataset to the evaluation results. Anything you add here will be displayed in the evaluations tab, not the Google Sheet.',
name: 'setInputsNotice',
type: 'notice',
default: '',
displayOptions: {
show: {
operation: ['setInputs'],
},
},
},
{
displayName: 'Inputs',
name: 'inputs',
placeholder: 'Add Input',
type: 'fixedCollection',
typeOptions: {
multipleValueButtonText: 'Add Input',
multipleValues: true,
},
default: {},
options: [
{
displayName: 'Filter',
name: 'values',
values: [
{
displayName: 'Name',
name: 'inputName',
type: 'string',
default: '',
requiresDataPath: 'single',
},
{
displayName: 'Value',
name: 'inputValue',
type: 'string',
default: '',
},
],
},
],
displayOptions: {
show: {
operation: ['setInputs'],
},
},
},
];
export const setOutputProperties: INodeProperties[] = [
{
displayName: 'Credentials',
@@ -53,6 +105,7 @@ export const setOutputProperties: INodeProperties[] = [
name: 'outputName',
type: 'string',
default: '',
requiresDataPath: 'single',
},
{
displayName: 'Value',

View File

@@ -8,6 +8,7 @@ import type {
import {
setCheckIfEvaluatingProperties,
setInputsProperties,
setMetricsProperties,
setOutputProperties,
} from './Description.node';
@@ -16,9 +17,10 @@ import { listSearch, loadOptions, credentialTest } from '../methods';
import {
checkIfEvaluating,
setMetrics,
setInputs,
getInputConnectionTypes,
getOutputConnectionTypes,
setOutputs,
setOutput,
setInputs,
} from '../utils/evaluationUtils';
export class Evaluation implements INodeType {
@@ -35,8 +37,8 @@ export class Evaluation implements INodeType {
name: 'Evaluation',
color: '#c3c9d5',
},
inputs: `={{(${setInputs})($parameter)}}`,
outputs: `={{(${setOutputs})($parameter)}}`,
inputs: `={{(${getInputConnectionTypes})($parameter)}}`,
outputs: `={{(${getOutputConnectionTypes})($parameter)}}`,
codex: {
alias: ['Test', 'Metrics', 'Evals', 'Set Output', 'Set Metrics'],
},
@@ -70,6 +72,10 @@ export class Evaluation implements INodeType {
type: 'options',
noDataExpression: true,
options: [
{
name: 'Set Inputs',
value: 'setInputs',
},
{
name: 'Set Outputs',
value: 'setOutputs',
@@ -86,6 +92,7 @@ export class Evaluation implements INodeType {
default: 'setOutputs',
},
authentication,
...setInputsProperties,
...setOutputProperties,
...setMetricsProperties,
...setCheckIfEvaluatingProperties,
@@ -98,12 +105,15 @@ export class Evaluation implements INodeType {
const operation = this.getNodeParameter('operation', 0);
if (operation === 'setOutputs') {
return await setOutput.call(this);
return await setOutputs.call(this);
} else if (operation === 'setInputs') {
return setInputs.call(this);
} else if (operation === 'setMetrics') {
return await setMetrics.call(this);
} else {
// operation === 'checkIfEvaluating'
} else if (operation === 'checkIfEvaluating') {
return await checkIfEvaluating.call(this);
}
throw new Error('Unsupported Operation');
}
}

View File

@@ -0,0 +1,216 @@
import type { IExecuteFunctions } from 'n8n-workflow';
import { UserError } from 'n8n-workflow';
import { setInputs, setOutputs } from '../utils/evaluationUtils';
jest.mock('../utils/evaluationTriggerUtils', () => ({
getGoogleSheet: jest.fn(),
getSheet: jest.fn(),
}));
import { getGoogleSheet, getSheet } from '../utils/evaluationTriggerUtils';
import { mockDeep } from 'jest-mock-extended';
describe('setInputs', () => {
const mockThis = (options: Partial<any> = {}) =>
mockDeep<IExecuteFunctions>({
getNode: jest.fn().mockReturnValue({ name: 'EvalNode' }),
getParentNodes: jest
.fn()
.mockReturnValue([{ name: 'EvalTrigger', type: 'n8n-nodes-base.evaluationTrigger' }]),
evaluateExpression: jest.fn().mockReturnValue(true),
getNodeParameter: jest.fn().mockReturnValue([
{ inputName: 'foo', inputValue: 'bar' },
{ inputName: 'baz', inputValue: 'qux' },
]),
getInputData: jest.fn().mockReturnValue([{ json: { test: 1 } }]),
addExecutionHints: jest.fn(),
getMode: jest.fn().mockReturnValue('evaluation'),
...options,
});
it('should return input data with evaluationData when inputs are provided', () => {
const context = mockThis();
const result = setInputs.call(context);
expect(result).toHaveLength(1);
expect(result[0][0].evaluationData).toEqual({ foo: 'bar', baz: 'qux' });
});
it('should throw UserError if no input fields are provided', () => {
const context = mockThis({
getNodeParameter: jest.fn().mockReturnValue([]),
});
expect(() => setInputs.call(context)).toThrow(UserError);
});
it('should add execution hints and return input data if not started from evaluation trigger', () => {
const context = mockThis({
getParentNodes: jest.fn().mockReturnValue([]),
getInputData: jest.fn().mockReturnValue([{ json: { test: 2 } }]),
});
const result = setInputs.call(context);
expect(context.addExecutionHints).toHaveBeenCalledWith(
expect.objectContaining({
message: expect.stringContaining('No inputs were set'),
}),
);
expect(result).toEqual([[{ json: { test: 2 } }]]);
});
it('should add execution hints and return input data if evalTriggerOutput is falsy', () => {
const context = mockThis({
evaluateExpression: jest.fn().mockReturnValue(undefined),
getInputData: jest.fn().mockReturnValue([{ json: { test: 3 } }]),
});
const result = setInputs.call(context);
expect(context.addExecutionHints).toHaveBeenCalled();
expect(result).toEqual([[{ json: { test: 3 } }]]);
});
});
describe('setOutputs', () => {
const mockGoogleSheetInstance = {
updateRows: jest.fn(),
prepareDataForUpdatingByRowNumber: jest.fn().mockReturnValue({
updateData: [{ range: 'Sheet1!A2:C2', values: [['foo', 'bar']] }],
}),
batchUpdate: jest.fn(),
};
const mockSheet = {
title: 'Sheet1',
};
const mockThis = (options: Partial<any> = {}) =>
mockDeep<IExecuteFunctions>({
getNode: jest.fn().mockReturnValue({ name: 'EvalNode' }),
getParentNodes: jest
.fn()
.mockReturnValue([{ name: 'EvalTrigger', type: 'n8n-nodes-base.evaluationTrigger' }]),
evaluateExpression: jest.fn().mockImplementation((expr) => {
if (expr.includes('isExecuted')) return true;
if (expr.includes('first().json')) return { row_number: 2, inputField: 'inputValue' };
return true;
}),
getNodeParameter: jest.fn().mockReturnValue([
{ outputName: 'result', outputValue: 'success' },
{ outputName: 'score', outputValue: '95' },
]),
getInputData: jest.fn().mockReturnValue([{ json: { test: 1 } }]),
addExecutionHints: jest.fn(),
getMode: jest.fn().mockReturnValue('evaluation'),
...options,
});
beforeEach(() => {
jest.clearAllMocks();
(getGoogleSheet as jest.Mock).mockReturnValue(mockGoogleSheetInstance);
(getSheet as jest.Mock).mockResolvedValue(mockSheet);
});
it('should set outputs to Google Sheet and return evaluation data', async () => {
const context = mockThis();
const result = await setOutputs.call(context);
expect(getGoogleSheet).toHaveBeenCalled();
expect(getSheet).toHaveBeenCalledWith(mockGoogleSheetInstance);
expect(mockGoogleSheetInstance.updateRows).toHaveBeenCalledWith(
'Sheet1',
[['inputField', 'result', 'score']],
'RAW',
1,
);
expect(mockGoogleSheetInstance.prepareDataForUpdatingByRowNumber).toHaveBeenCalledWith(
[{ row_number: 2, result: 'success', score: '95' }],
'Sheet1!A:Z',
[['inputField', 'result', 'score']],
);
expect(mockGoogleSheetInstance.batchUpdate).toHaveBeenCalledWith(
[{ range: 'Sheet1!A2:C2', values: [['foo', 'bar']] }],
'RAW',
);
expect(result).toHaveLength(1);
expect(result[0][0].evaluationData).toEqual({ result: 'success', score: '95' });
});
it('should throw UserError if no output fields are provided', async () => {
const context = mockThis({
getNodeParameter: jest.fn().mockReturnValue([]),
});
await expect(setOutputs.call(context)).rejects.toThrow(UserError);
await expect(setOutputs.call(context)).rejects.toThrow('No outputs to set');
});
it('should add execution hints and return input data if not started from evaluation trigger', async () => {
const context = mockThis({
getParentNodes: jest.fn().mockReturnValue([]),
getInputData: jest.fn().mockReturnValue([{ json: { test: 2 } }]),
});
const result = await setOutputs.call(context);
expect(context.addExecutionHints).toHaveBeenCalledWith(
expect.objectContaining({
message: expect.stringContaining('No outputs were set'),
}),
);
expect(result).toEqual([[{ json: { test: 2 } }]]);
expect(getGoogleSheet).not.toHaveBeenCalled();
});
it('should add execution hints and return input data if evalTriggerOutput is falsy', async () => {
const context = mockThis({
evaluateExpression: jest.fn().mockImplementation((expr) => {
if (expr.includes('isExecuted')) return false;
return true;
}),
getInputData: jest.fn().mockReturnValue([{ json: { test: 3 } }]),
});
const result = await setOutputs.call(context);
expect(context.addExecutionHints).toHaveBeenCalled();
expect(result).toEqual([[{ json: { test: 3 } }]]);
expect(getGoogleSheet).not.toHaveBeenCalled();
});
it('should handle row_number as string "row_number" by using 1', async () => {
const context = mockThis({
evaluateExpression: jest.fn().mockImplementation((expr) => {
if (expr.includes('isExecuted')) return true;
if (expr.includes('first().json'))
return { row_number: 'row_number', inputField: 'inputValue' };
return true;
}),
});
const result = await setOutputs.call(context);
expect(mockGoogleSheetInstance.prepareDataForUpdatingByRowNumber).toHaveBeenCalledWith(
[{ row_number: 1, result: 'success', score: '95' }],
'Sheet1!A:Z',
[['inputField', 'result', 'score']],
);
expect(result).toHaveLength(1);
});
it('should add new column names that are not in existing columns', async () => {
const context = mockThis({
evaluateExpression: jest.fn().mockImplementation((expr) => {
if (expr.includes('isExecuted')) return true;
if (expr.includes('first().json')) return { row_number: 2, existingCol: 'value' };
return true;
}),
getNodeParameter: jest
.fn()
.mockReturnValue([{ outputName: 'newCol', outputValue: 'newValue' }]),
});
const result = await setOutputs.call(context);
expect(mockGoogleSheetInstance.updateRows).toHaveBeenCalledWith(
'Sheet1',
[['existingCol', 'newCol']],
'RAW',
1,
);
expect(result).toHaveLength(1);
});
});

View File

@@ -1,25 +1,61 @@
import { UserError, NodeOperationError } from 'n8n-workflow';
import { UserError, NodeOperationError, EVALUATION_TRIGGER_NODE_TYPE } from 'n8n-workflow';
import type {
INodeParameters,
IDataObject,
IExecuteFunctions,
INodeExecutionData,
JsonObject,
JsonValue,
} from 'n8n-workflow';
import { metricHandlers } from './metricHandlers';
import { getGoogleSheet, getSheet } from './evaluationTriggerUtils';
import { metricHandlers } from './metricHandlers';
import { composeReturnItem } from '../../Set/v2/helpers/utils';
import assert from 'node:assert';
export async function setOutput(this: IExecuteFunctions): Promise<INodeExecutionData[][]> {
function withEvaluationData(this: IExecuteFunctions, data: JsonObject): INodeExecutionData[] {
const inputData = this.getInputData();
if (!inputData.length) {
return inputData;
}
const isEvaluationMode = this.getMode() === 'evaluation';
return [
{
...inputData[0],
// test-runner only looks at first item. Don't need to duplicate the data for each item
evaluationData: isEvaluationMode ? data : undefined,
},
...inputData.slice(1),
];
}
function isOutputsArray(
value: unknown,
): value is Array<{ outputName: string; outputValue: JsonValue }> {
return (
Array.isArray(value) &&
value.every(
(item) =>
typeof item === 'object' &&
item !== null &&
'outputName' in item &&
'outputValue' in item &&
typeof item.outputName === 'string',
)
);
}
export async function setOutputs(this: IExecuteFunctions): Promise<INodeExecutionData[][]> {
const evaluationNode = this.getNode();
const parentNodes = this.getParentNodes(evaluationNode.name);
const evalTrigger = parentNodes.find((node) => node.type === 'n8n-nodes-base.evaluationTrigger');
const evalTriggerOutput = evalTrigger
const evalTrigger = parentNodes.find((node) => node.type === EVALUATION_TRIGGER_NODE_TYPE);
const isEvalTriggerExecuted = evalTrigger
? this.evaluateExpression(`{{ $('${evalTrigger?.name}').isExecuted }}`, 0)
: undefined;
: false;
if (!evalTrigger || !evalTriggerOutput) {
if (!evalTrigger || !isEvalTriggerExecuted) {
this.addExecutionHints({
message: "No outputs were set since the execution didn't start from an evaluation trigger",
location: 'outputPane',
@@ -27,10 +63,11 @@ export async function setOutput(this: IExecuteFunctions): Promise<INodeExecution
return [this.getInputData()];
}
const outputFields = this.getNodeParameter('outputs.values', 0, []) as Array<{
outputName: string;
outputValue: string;
}>;
const outputFields = this.getNodeParameter('outputs.values', 0, []);
assert(
isOutputsArray(outputFields),
'Invalid output fields format. Expected an array of objects with outputName and outputValue properties.',
);
if (outputFields.length === 0) {
throw new UserError('No outputs to set', {
@@ -66,10 +103,10 @@ export async function setOutput(this: IExecuteFunctions): Promise<INodeExecution
1, // header row
);
const outputs = outputFields.reduce((acc, { outputName, outputValue }) => {
const outputs = outputFields.reduce<JsonObject>((acc, { outputName, outputValue }) => {
acc[outputName] = outputValue;
return acc;
}, {} as IDataObject);
}, {});
const preparedData = googleSheetInstance.prepareDataForUpdatingByRowNumber(
[
@@ -87,7 +124,60 @@ export async function setOutput(this: IExecuteFunctions): Promise<INodeExecution
'RAW', // default value for Value Input Mode
);
return [this.getInputData()];
return [withEvaluationData.call(this, outputs)];
}
function isInputsArray(
value: unknown,
): value is Array<{ inputName: string; inputValue: JsonValue }> {
return (
Array.isArray(value) &&
value.every(
(item) =>
typeof item === 'object' &&
item !== null &&
'inputName' in item &&
'inputValue' in item &&
typeof item.inputName === 'string',
)
);
}
export function setInputs(this: IExecuteFunctions): INodeExecutionData[][] {
const evaluationNode = this.getNode();
const parentNodes = this.getParentNodes(evaluationNode.name);
const evalTrigger = parentNodes.find((node) => node.type === 'n8n-nodes-base.evaluationTrigger');
const isEvalTriggerExecuted = evalTrigger
? this.evaluateExpression(`{{ $('${evalTrigger?.name}').isExecuted }}`, 0)
: false;
if (!evalTrigger || !isEvalTriggerExecuted) {
this.addExecutionHints({
message: "No inputs were set since the execution didn't start from an evaluation trigger",
location: 'outputPane',
});
return [this.getInputData()];
}
const inputFields = this.getNodeParameter('inputs.values', 0, []);
assert(
isInputsArray(inputFields),
'Invalid input fields format. Expected an array of objects with inputName and inputValue properties.',
);
if (inputFields.length === 0) {
throw new UserError('No inputs to set', {
description: 'Add inputs using the Add Input button',
});
}
const inputs = inputFields.reduce<JsonObject>((acc, { inputName, inputValue }) => {
acc[inputName] = inputValue;
return acc;
}, {});
return [withEvaluationData.call(this, inputs)];
}
export async function setMetrics(this: IExecuteFunctions): Promise<INodeExecutionData[][]> {
@@ -128,18 +218,18 @@ export async function checkIfEvaluating(this: IExecuteFunctions): Promise<INodeE
const parentNodes = this.getParentNodes(evaluationNode.name);
const evalTrigger = parentNodes.find((node) => node.type === 'n8n-nodes-base.evaluationTrigger');
const evalTriggerOutput = evalTrigger
const isEvalTriggerExecuted = evalTrigger
? this.evaluateExpression(`{{ $('${evalTrigger?.name}').isExecuted }}`, 0)
: undefined;
: false;
if (evalTriggerOutput) {
if (isEvalTriggerExecuted) {
return [this.getInputData(), normalExecutionResult];
} else {
return [evaluationExecutionResult, this.getInputData()];
}
}
export function setOutputs(parameters: INodeParameters) {
export function getOutputConnectionTypes(parameters: INodeParameters) {
if (parameters.operation === 'checkIfEvaluating') {
return [
{ type: 'main', displayName: 'Evaluation' },
@@ -150,7 +240,7 @@ export function setOutputs(parameters: INodeParameters) {
return [{ type: 'main' }];
}
export function setInputs(parameters: INodeParameters) {
export function getInputConnectionTypes(parameters: INodeParameters) {
if (
parameters.operation === 'setMetrics' &&
['correctness', 'helpfulness'].includes(parameters.metric as string)