feat: Track inputs and outputs in Evaluations (#17404)

2025-12-16 17:46:45 +00:00 · 2025-07-24 18:58:37 +02:00
parent 3f2e43e919
commit c18fabb419
20 changed files with 1431 additions and 60 deletions
--- a/packages/@n8n/db/src/entities/test-case-execution.ee.ts
+++ b/packages/@n8n/db/src/entities/test-case-execution.ee.ts
@@ -1,5 +1,5 @@
 import { Column, Entity, ManyToOne, OneToOne } from '@n8n/typeorm';
-import type { IDataObject } from 'n8n-workflow';
+import type { IDataObject, JsonObject } from 'n8n-workflow';

 import { WithStringId, DateTimeColumn, JsonColumn } from './abstract-entity';
 import type { ExecutionEntity } from './execution-entity';
@@ -54,4 +54,10 @@ export class TestCaseExecution extends WithStringId {

 	@JsonColumn({ nullable: true })
 	metrics: TestCaseRunMetrics;
+
+	@JsonColumn({ nullable: true })
+	inputs: JsonObject | null;
+
+	@JsonColumn({ nullable: true })
+	outputs: JsonObject | null;
 }
--- a/packages/@n8n/db/src/migrations/common/1752669793000-AddInputsOutputsToTestCaseExecution.ts
+++ b/packages/@n8n/db/src/migrations/common/1752669793000-AddInputsOutputsToTestCaseExecution.ts
@@ -0,0 +1,11 @@
+import type { MigrationContext, ReversibleMigration } from '../migration-types';
+
+export class AddInputsOutputsToTestCaseExecution1752669793000 implements ReversibleMigration {
+	async up({ schemaBuilder: { addColumns, column } }: MigrationContext) {
+		await addColumns('test_case_execution', [column('inputs').json, column('outputs').json]);
+	}
+
+	async down({ schemaBuilder: { dropColumns } }: MigrationContext) {
+		await dropColumns('test_case_execution', ['inputs', 'outputs']);
+	}
+}
--- a/packages/@n8n/db/src/migrations/mysqldb/index.ts
+++ b/packages/@n8n/db/src/migrations/mysqldb/index.ts
@@ -88,6 +88,7 @@ import { AddWorkflowArchivedColumn1745934666076 } from '../common/1745934666076-
 import { DropRoleTable1745934666077 } from '../common/1745934666077-DropRoleTable';
 import { AddProjectDescriptionColumn1747824239000 } from '../common/1747824239000-AddProjectDescriptionColumn';
 import { AddLastActiveAtColumnToUser1750252139166 } from '../common/1750252139166-AddLastActiveAtColumnToUser';
+import { AddInputsOutputsToTestCaseExecution1752669793000 } from '../common/1752669793000-AddInputsOutputsToTestCaseExecution';
 import type { Migration } from '../migration-types';
 import { UpdateParentFolderIdColumn1740445074052 } from '../mysqldb/1740445074052-UpdateParentFolderIdColumn';

@@ -183,4 +184,5 @@ export const mysqlMigrations: Migration[] = [
 	ClearEvaluation1745322634000,
 	AddProjectDescriptionColumn1747824239000,
 	AddLastActiveAtColumnToUser1750252139166,
+	AddInputsOutputsToTestCaseExecution1752669793000,
 ];
--- a/packages/@n8n/db/src/migrations/postgresdb/index.ts
+++ b/packages/@n8n/db/src/migrations/postgresdb/index.ts
@@ -1,4 +1,5 @@
 import { AddMfaColumns1690000000030 } from './../common/1690000000040-AddMfaColumns';
+import { AddInputsOutputsToTestCaseExecution1752669793000 } from './../common/1752669793000-AddInputsOutputsToTestCaseExecution';
 import { InitialMigration1587669153312 } from './1587669153312-InitialMigration';
 import { WebhookModel1589476000887 } from './1589476000887-WebhookModel';
 import { CreateIndexStoppedAt1594828256133 } from './1594828256133-CreateIndexStoppedAt';
@@ -181,4 +182,5 @@ export const postgresMigrations: Migration[] = [
 	ClearEvaluation1745322634000,
 	AddProjectDescriptionColumn1747824239000,
 	AddLastActiveAtColumnToUser1750252139166,
+	AddInputsOutputsToTestCaseExecution1752669793000,
 ];
--- a/packages/@n8n/db/src/migrations/sqlite/index.ts
+++ b/packages/@n8n/db/src/migrations/sqlite/index.ts
@@ -85,7 +85,9 @@ import { AddWorkflowArchivedColumn1745934666076 } from '../common/1745934666076-
 import { DropRoleTable1745934666077 } from '../common/1745934666077-DropRoleTable';
 import { AddProjectDescriptionColumn1747824239000 } from '../common/1747824239000-AddProjectDescriptionColumn';
 import { AddLastActiveAtColumnToUser1750252139166 } from '../common/1750252139166-AddLastActiveAtColumnToUser';
+import { AddInputsOutputsToTestCaseExecution1752669793000 } from '../common/1752669793000-AddInputsOutputsToTestCaseExecution';
 import type { Migration } from '../migration-types';
+
 const sqliteMigrations: Migration[] = [
 	InitialMigration1588102412422,
 	WebhookModel1592445003908,
@@ -174,6 +176,7 @@ const sqliteMigrations: Migration[] = [
 	ClearEvaluation1745322634000,
 	AddProjectDescriptionColumn1747824239000,
 	AddLastActiveAtColumnToUser1750252139166,
+	AddInputsOutputsToTestCaseExecution1752669793000,
 ];

 export { sqliteMigrations };
--- a/packages/@n8n/db/src/repositories/test-run.repository.ee.ts
+++ b/packages/@n8n/db/src/repositories/test-run.repository.ee.ts
@@ -22,7 +22,7 @@ export class TestRunRepository extends Repository<TestRun> {
 		super(TestRun, dataSource.manager);
 	}

-	async createTestRun(workflowId: string) {
+	async createTestRun(workflowId: string): Promise<TestRun> {
 		const testRun = this.create({
 			status: 'new',
 			workflow: {
--- a/packages/cli/src/evaluation.ee/test-runner/tests/test-runner.service.ee.test.ts
+++ b/packages/cli/src/evaluation.ee/test-runner/tests/test-runner.service.ee.test.ts
@@ -1026,7 +1026,7 @@ describe('TestRunnerService', () => {
 			}
 		});

-		it('should throw SET_METRICS_NODE_NOT_CONFIGURED when metrics node is disabled', () => {
+		it('should throw SET_METRICS_NODE_NOT_FOUND when metrics node is disabled', () => {
 			const workflow = mock<IWorkflowBase>({
 				nodes: [
 					{
@@ -1061,8 +1061,8 @@ describe('TestRunnerService', () => {
 				(testRunnerService as any).validateSetMetricsNodes(workflow);
 			} catch (error) {
 				expect(error).toBeInstanceOf(TestRunError);
-				expect(error.code).toBe('SET_METRICS_NODE_NOT_CONFIGURED');
-				expect(error.extra).toEqual({ node_name: 'Set Metrics' });
+				expect(error.code).toBe('SET_METRICS_NODE_NOT_FOUND');
+				expect(error.extra).toEqual({});
 			}
 		});

--- a/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts
+++ b/packages/cli/src/evaluation.ee/test-runner/test-runner.service.ee.ts
@@ -7,6 +7,7 @@ import {
 	EVALUATION_NODE_TYPE,
 	EVALUATION_TRIGGER_NODE_TYPE,
 	ExecutionCancelledError,
+	NodeConnectionTypes,
 } from 'n8n-workflow';
 import type {
 	IDataObject,
@@ -31,6 +32,7 @@ import { Telemetry } from '@/telemetry';
 import { WorkflowRunner } from '@/workflow-runner';

 import { EvaluationMetrics } from './evaluation-metrics.ee';
+import { JsonObject } from 'openid-client';

 export interface TestRunMetadata {
 	testRunId: string;
@@ -360,21 +362,32 @@ export class TestRunnerService {
 	/**
 	 * Get the evaluation set metrics nodes from a workflow.
 	 */
-	static getEvaluationMetricsNodes(workflow: IWorkflowBase) {
+	static getEvaluationNodes(
+		workflow: IWorkflowBase,
+		operation: 'setMetrics' | 'setOutputs' | 'setInputs',
+		{ isDefaultOperation }: { isDefaultOperation: boolean } = { isDefaultOperation: false },
+	) {
 		return workflow.nodes.filter(
-			(node) => node.type === EVALUATION_NODE_TYPE && node.parameters.operation === 'setMetrics',
+			(node) =>
+				node.type === EVALUATION_NODE_TYPE &&
+				node.disabled !== true &&
+				(node.parameters.operation === operation ||
+					(isDefaultOperation && node.parameters.operation === undefined)),
 		);
 	}

+	/**
+	 * Get the evaluation set metrics nodes from a workflow.
+	 */
+	static getEvaluationMetricsNodes(workflow: IWorkflowBase) {
+		return this.getEvaluationNodes(workflow, 'setMetrics');
+	}
+
 	/**
 	 * Get the evaluation set outputs nodes from a workflow.
 	 */
 	static getEvaluationSetOutputsNodes(workflow: IWorkflowBase) {
-		return workflow.nodes.filter(
-			(node) =>
-				node.type === EVALUATION_NODE_TYPE &&
-				(node.parameters.operation === 'setOutputs' || node.parameters.operation === undefined),
-		);
+		return this.getEvaluationNodes(workflow, 'setOutputs', { isDefaultOperation: true });
 	}

 	/**
@@ -392,7 +405,7 @@ export class TestRunnerService {
 			});
 		}

-		const triggerOutput = triggerOutputData?.data?.main?.[0];
+		const triggerOutput = triggerOutputData?.data?.[NodeConnectionTypes.Main]?.[0];

 		if (!triggerOutput || triggerOutput.length === 0) {
 			throw new TestRunError('TEST_CASES_NOT_FOUND');
@@ -401,6 +414,22 @@ export class TestRunnerService {
 		return triggerOutput;
 	}

+	private getEvaluationData(
+		execution: IRun,
+		workflow: IWorkflowBase,
+		operation: 'setInputs' | 'setOutputs',
+	): JsonObject {
+		const evalNodes = TestRunnerService.getEvaluationNodes(workflow, operation);
+
+		return evalNodes.reduce<JsonObject>((accu, node) => {
+			const runs = execution.data.resultData.runData[node.name];
+			const data = runs?.[0]?.data?.[NodeConnectionTypes.Main]?.[0]?.[0]?.evaluationData ?? {};
+
+			Object.assign(accu, data);
+			return accu;
+		}, {});
+	}
+
 	/**
 	 * Evaluation result is collected from all Evaluation Metrics nodes
 	 */
@@ -594,6 +623,9 @@ export class TestRunnerService {
 							...addedPredefinedMetrics,
 						};

+						const inputs = this.getEvaluationData(testCaseExecution, workflow, 'setInputs');
+						const outputs = this.getEvaluationData(testCaseExecution, workflow, 'setOutputs');
+
 						this.logger.debug(
 							'Test case metrics extracted (user-defined)',
 							addedUserDefinedMetrics,
@@ -609,6 +641,8 @@ export class TestRunnerService {
 							completedAt,
 							status: 'success',
 							metrics: combinedMetrics,
+							inputs,
+							outputs,
 						});
 					}
 				} catch (e) {
--- a/packages/frontend/@n8n/i18n/src/locales/en.json
+++ b/packages/frontend/@n8n/i18n/src/locales/en.json
@@ -3204,6 +3204,7 @@
 	"evaluation.runDetail.error.unknownError.solution": "View execution",
 	"evaluation.runDetail.error.noMetricsCollected": "No 'Set metrics' node executed",
 	"evaluation.runDetail.error.partialCasesFailed": "Finished with errors",
+	"evaluation.runDetail.notice.useSetInputs": "Tip: Show input columns from your dataset here by adding the evaluation node's 'set inputs' operation to your workflow",
 	"evaluation.runTest": "Run Test",
 	"evaluation.stopTest": "Stop Test",
 	"evaluation.cancelTestRun": "Cancel Test Run",
--- a/packages/frontend/editor-ui/src/api/evaluation.ee.ts
+++ b/packages/frontend/editor-ui/src/api/evaluation.ee.ts
@@ -1,5 +1,6 @@
 import type { IRestApiContext } from '@n8n/rest-api-client';
 import { makeRestApiRequest, request } from '@n8n/rest-api-client';
+import type { JsonObject } from 'n8n-workflow';

 export interface TestRunRecord {
 	id: string;
@@ -36,6 +37,8 @@ export interface TestCaseExecutionRecord {
 	metrics?: Record<string, number>;
 	errorCode?: string;
 	errorDetails?: Record<string, unknown>;
+	inputs?: JsonObject;
+	outputs?: JsonObject;
 }

 const getTestRunsEndpoint = (workflowId: string, runId?: string) =>
--- a/packages/frontend/editor-ui/src/components/Evaluations.ee/shared/TestTableBase.vue
+++ b/packages/frontend/editor-ui/src/components/Evaluations.ee/shared/TestTableBase.vue
@@ -173,6 +173,10 @@ defineSlots<{
 	overflow: hidden;
 	text-overflow: ellipsis;
 	border-bottom: 1px solid var(--border-color-light) !important;
+
+	> div {
+		max-height: 100px;
+	}
 }

 .cell {
--- a/packages/frontend/editor-ui/src/views/Evaluations.ee/TestRunDetailView.test.ts
+++ b/packages/frontend/editor-ui/src/views/Evaluations.ee/TestRunDetailView.test.ts
@@ -0,0 +1,445 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { createComponentRenderer } from '@/__tests__/render';
+import { createTestingPinia } from '@pinia/testing';
+import { waitFor } from '@testing-library/vue';
+import { useEvaluationStore } from '@/stores/evaluation.store.ee';
+import TestRunDetailView from './TestRunDetailView.vue';
+import type { TestCaseExecutionRecord, TestRunRecord } from '@/api/evaluation.ee';
+import type { IWorkflowDb } from '@/Interface';
+import { mock } from 'vitest-mock-extended';
+
+vi.mock('@/composables/useToast', () => ({
+	useToast: () => ({
+		showError: vi.fn(),
+	}),
+}));
+
+const mockRouter = {
+	currentRoute: {
+		value: {
+			params: {
+				runId: 'test-run-id',
+				name: 'test-workflow-id',
+			},
+		},
+	},
+	back: vi.fn(),
+	resolve: vi.fn(() => ({ href: '/test-execution-url' })),
+};
+
+vi.mock('vue-router', () => {
+	return {
+		RouterLink: vi.fn(),
+		useRoute: () => ({}),
+		useRouter: () => mockRouter,
+	};
+});
+
+const mockTestRun: TestRunRecord = {
+	id: 'test-run-id',
+	workflowId: 'test-workflow-id',
+	status: 'completed',
+	createdAt: '2023-10-01T10:00:00Z',
+	updatedAt: '2023-10-01T10:00:00Z',
+	completedAt: '2023-10-01T10:00:00Z',
+	runAt: '2023-10-01T10:00:00Z',
+	metrics: {
+		accuracy: 0.95,
+		precision: 0.92,
+	},
+	finalResult: 'success',
+};
+
+const mockTestCases = [
+	mock<TestCaseExecutionRecord>({
+		id: 'test-case-1',
+		status: 'completed',
+		runAt: '2023-10-01T10:00:00Z',
+		executionId: 'execution-1',
+		metrics: {
+			accuracy: 0.98,
+			precision: 0.95,
+		},
+		inputs: {
+			input1: 'value1',
+		},
+		outputs: {
+			output1: 'result1',
+		},
+	}),
+	mock<TestCaseExecutionRecord>({
+		id: 'test-case-2',
+		status: 'error',
+		runAt: '2023-10-01T10:01:00Z',
+		executionId: 'execution-2',
+		errorCode: 'INTERRUPTED',
+		metrics: {
+			accuracy: 0.85,
+			precision: 0.88,
+		},
+		inputs: {
+			input1: 'value2',
+		},
+		outputs: {
+			output1: 'result2',
+		},
+	}),
+];
+
+const mockWorkflow = mock<IWorkflowDb>({
+	id: 'test-workflow-id',
+	name: 'Test Workflow',
+	active: true,
+	nodes: [],
+	connections: {},
+	createdAt: '2023-10-01T09:00:00Z',
+	updatedAt: '2023-10-01T09:00:00Z',
+	versionId: 'version-1',
+	tags: [],
+	settings: {},
+	pinData: {},
+	homeProject: { id: 'home-project', name: 'Home' },
+	sharedWithProjects: [],
+	scopes: [],
+	usedCredentials: [],
+	meta: {},
+});
+
+describe('TestRunDetailView', () => {
+	let evaluationStore: ReturnType<typeof useEvaluationStore>;
+
+	const renderComponent = createComponentRenderer(TestRunDetailView, {
+		pinia: createTestingPinia({
+			initialState: {
+				evaluation: {
+					testRunsById: {
+						'test-run-id': mockTestRun,
+					},
+				},
+				workflows: {
+					workflowsById: {
+						'test-workflow-id': mockWorkflow,
+					},
+				},
+			},
+			stubActions: false,
+		}),
+	});
+
+	beforeEach(() => {
+		evaluationStore = useEvaluationStore();
+
+		// Mock store methods
+		vi.mocked(evaluationStore.getTestRun).mockResolvedValue(mockTestRun);
+		vi.spyOn(evaluationStore, 'fetchTestCaseExecutions').mockResolvedValue(mockTestCases);
+
+		vi.clearAllMocks();
+	});
+
+	afterEach(() => {
+		vi.clearAllMocks();
+	});
+
+	it('should render component', () => {
+		const { container } = renderComponent();
+		expect(container).toBeTruthy();
+	});
+
+	it('should fetch test run data on mount', async () => {
+		renderComponent();
+
+		await waitFor(() => {
+			expect(evaluationStore.getTestRun).toHaveBeenCalledWith({
+				workflowId: 'test-workflow-id',
+				runId: 'test-run-id',
+			});
+		});
+	});
+
+	it('should display test run detail view', async () => {
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			expect(container.querySelector('[data-test-id="test-definition-run-detail"]')).toBeTruthy();
+		});
+	});
+
+	it('should display summary cards', async () => {
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			const summaryCards = container.querySelectorAll('.summaryCard');
+			expect(summaryCards.length).toBeGreaterThan(0);
+		});
+	});
+
+	it('should handle error state', async () => {
+		const errorTestRun = {
+			...mockTestRun,
+			status: 'error' as const,
+			errorCode: 'TIMEOUT',
+		};
+
+		vi.mocked(evaluationStore.getTestRun).mockResolvedValue(errorTestRun);
+
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			expect(container.querySelector('[data-test-id="test-definition-run-detail"]')).toBeTruthy();
+		});
+	});
+
+	it('should display metrics in summary', async () => {
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			const summaryCards = container.querySelectorAll('.summaryCard');
+			expect(summaryCards.length).toBeGreaterThan(2); // At least total cases, date, status + metrics
+		});
+	});
+
+	it('should display back navigation', async () => {
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			const backButton = container.querySelector('.backButton');
+			expect(backButton).toBeTruthy();
+		});
+	});
+
+	it('should display test table when data is loaded', async () => {
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			// TestTableBase component should be rendered
+			expect(container.querySelector('[data-test-id="test-definition-run-detail"]')).toBeTruthy();
+		});
+	});
+
+	it('should handle partial failures', async () => {
+		// Test with cases that have errors
+		vi.spyOn(evaluationStore, 'fetchTestCaseExecutions').mockResolvedValue(mockTestCases);
+
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			expect(container.querySelector('[data-test-id="test-definition-run-detail"]')).toBeTruthy();
+		});
+	});
+
+	it('should handle empty test cases', async () => {
+		vi.spyOn(evaluationStore, 'fetchTestCaseExecutions').mockResolvedValue([]);
+
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			expect(container.querySelector('[data-test-id="test-definition-run-detail"]')).toBeTruthy();
+		});
+	});
+
+	it('should handle fetch errors', async () => {
+		const error = new Error('Failed to fetch');
+		vi.mocked(evaluationStore.getTestRun).mockRejectedValue(error);
+
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			expect(container.querySelector('[data-test-id="test-definition-run-detail"]')).toBeTruthy();
+		});
+	});
+
+	it('should render scrollable summary section', async () => {
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			const scrollableSection = container.querySelector('.scrollableSummary');
+			expect(scrollableSection).toBeTruthy();
+		});
+	});
+
+	it('should display notice callout when no input columns and run is successful', async () => {
+		// Mock test cases with no inputs
+		const testCasesWithoutInputs = mockTestCases.map((tc) => ({
+			...tc,
+			inputs: {},
+		}));
+
+		vi.spyOn(evaluationStore, 'fetchTestCaseExecutions').mockResolvedValue(testCasesWithoutInputs);
+
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			// Should render the component
+			expect(container.querySelector('[data-test-id="test-definition-run-detail"]')).toBeTruthy();
+		});
+	});
+
+	it('should display inputs correctly in test table', async () => {
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			expect(evaluationStore.fetchTestCaseExecutions).toHaveBeenCalledWith({
+				workflowId: 'test-workflow-id',
+				runId: 'test-run-id',
+			});
+		});
+
+		await waitFor(() => {
+			// Check that inputs are displayed
+			const testTable = container.querySelector('[data-test-id="test-definition-run-detail"]');
+			expect(testTable).toBeTruthy();
+			// Inputs should be rendered in the table
+			expect(container.textContent).toContain('value1');
+			expect(container.textContent).toContain('value2');
+		});
+	});
+
+	it('should display outputs correctly in test table', async () => {
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			expect(evaluationStore.fetchTestCaseExecutions).toHaveBeenCalledWith({
+				workflowId: 'test-workflow-id',
+				runId: 'test-run-id',
+			});
+		});
+
+		await waitFor(() => {
+			// Check that outputs are displayed
+			const testTable = container.querySelector('[data-test-id="test-definition-run-detail"]');
+			expect(testTable).toBeTruthy();
+			// Outputs should be rendered in the table
+			expect(container.textContent).toContain('result1');
+			expect(container.textContent).toContain('result2');
+		});
+	});
+
+	it('should display metrics correctly for individual test cases', async () => {
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			expect(evaluationStore.fetchTestCaseExecutions).toHaveBeenCalledWith({
+				workflowId: 'test-workflow-id',
+				runId: 'test-run-id',
+			});
+		});
+
+		await waitFor(() => {
+			// Check that metrics are displayed in the table
+			const testTable = container.querySelector('[data-test-id="test-definition-run-detail"]');
+			expect(testTable).toBeTruthy();
+			// Individual test case metrics should be shown
+			expect(container.textContent).toContain('0.98'); // accuracy for test-case-1
+			expect(container.textContent).toContain('0.95'); // precision for test-case-1
+			expect(container.textContent).toContain('0.85'); // accuracy for test-case-2
+			expect(container.textContent).toContain('0.88'); // precision for test-case-2
+		});
+	});
+
+	it('should display overall run metrics in summary cards', async () => {
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			// Check that overall metrics are displayed in summary
+			const summaryCards = container.querySelectorAll('.summaryCard');
+			expect(summaryCards.length).toBeGreaterThan(0);
+			// Overall run metrics should be shown
+			expect(container.textContent).toContain('0.95'); // overall accuracy
+			expect(container.textContent).toContain('0.92'); // overall precision
+		});
+	});
+
+	it('should handle test cases with missing metrics gracefully', async () => {
+		const testCasesWithMissingMetrics = [
+			mock<TestCaseExecutionRecord>({
+				id: 'test-case-3',
+				status: 'completed',
+				runAt: '2023-10-01T10:02:00Z',
+				executionId: 'execution-3',
+				inputs: { input1: 'value3' },
+				outputs: { output1: 'result3' },
+				// No metrics property
+			}),
+		];
+
+		vi.spyOn(evaluationStore, 'fetchTestCaseExecutions').mockResolvedValue(
+			testCasesWithMissingMetrics,
+		);
+
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			expect(container.querySelector('[data-test-id="test-definition-run-detail"]')).toBeTruthy();
+			// Should still display inputs and outputs
+			expect(container.textContent).toContain('value3');
+			expect(container.textContent).toContain('result3');
+		});
+	});
+
+	it('should display error status for failed test cases', async () => {
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			expect(evaluationStore.fetchTestCaseExecutions).toHaveBeenCalledWith({
+				workflowId: 'test-workflow-id',
+				runId: 'test-run-id',
+			});
+		});
+
+		await waitFor(() => {
+			// Check that error status and error code are displayed
+			const testTable = container.querySelector('[data-test-id="test-definition-run-detail"]');
+			expect(testTable).toBeTruthy();
+			// Error status should be shown for test-case-2
+			expect(container.textContent).toContain('error');
+			expect(container.textContent).toContain('run was interrupted');
+		});
+	});
+
+	it('should handle complex input and output objects', async () => {
+		const testCasesWithComplexData = [
+			mock<TestCaseExecutionRecord>({
+				id: 'test-case-complex',
+				status: 'completed',
+				runAt: '2023-10-01T10:03:00Z',
+				executionId: 'execution-complex',
+				inputs: {
+					complexInput: {
+						nested: {
+							value: 'nested-value',
+							array: [1, 2, 3],
+						},
+					},
+				},
+				outputs: {
+					complexOutput: {
+						result: 'complex-result',
+						metadata: {
+							processed: true,
+							timestamp: '2023-10-01T10:03:00Z',
+						},
+					},
+				},
+				metrics: {
+					accuracy: 0.97,
+					precision: 0.94,
+				},
+			}),
+		];
+
+		vi.spyOn(evaluationStore, 'fetchTestCaseExecutions').mockResolvedValue(
+			testCasesWithComplexData,
+		);
+
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			expect(container.querySelector('[data-test-id="test-definition-run-detail"]')).toBeTruthy();
+			// Complex data should be handled and displayed
+			expect(container.textContent).toContain('0.97');
+			expect(container.textContent).toContain('0.94');
+			expect(container.textContent).toContain('complexInput');
+			expect(container.textContent).toContain('complexOutput');
+		});
+	});
+});
--- a/packages/frontend/editor-ui/src/views/Evaluations.ee/TestRunDetailView.vue
+++ b/packages/frontend/editor-ui/src/views/Evaluations.ee/TestRunDetailView.vue
@@ -15,6 +15,7 @@ import { useRouter } from 'vue-router';
 import orderBy from 'lodash/orderBy';
 import { statusDictionary } from '@/components/Evaluations.ee/shared/statusDictionary';
 import { getErrorBaseKey } from '@/components/Evaluations.ee/shared/errorCodes';
+import { getTestCasesColumns, mapToNumericColumns } from './utils';

 const router = useRouter();
 const toast = useToast();
@@ -65,28 +66,36 @@ const handleRowClick = (row: TestCaseExecutionRecord) => {
 	}
 };

+const inputColumns = computed(() => getTestCasesColumns(filteredTestCases.value, 'inputs'));
+
 const columns = computed(
-	(): Array<TestTableColumn<TestCaseExecutionRecord & { index: number }>> => [
-		{
-			prop: 'index',
-			width: 100,
-			label: locale.baseText('evaluation.runDetail.testCase'),
-			sortable: true,
-			formatter: (row: TestCaseExecutionRecord & { index: number }) => `#${row.index}`,
-		},
-		{
-			prop: 'status',
-			label: locale.baseText('evaluation.listRuns.status'),
-		},
-		...Object.keys(run.value?.metrics ?? {}).map((metric) => ({
-			prop: `metrics.${metric}`,
-			label: metric,
-			sortable: true,
-			filter: true,
-			showHeaderTooltip: true,
-			formatter: (row: TestCaseExecutionRecord) => row.metrics?.[metric]?.toFixed(2) ?? '-',
-		})),
-	],
+	(): Array<TestTableColumn<TestCaseExecutionRecord & { index: number }>> => {
+		const specialKeys = ['promptTokens', 'completionTokens', 'totalTokens', 'executionTime'];
+		const metricColumns = Object.keys(run.value?.metrics ?? {}).filter(
+			(key) => !specialKeys.includes(key),
+		);
+		const specialColumns = specialKeys.filter((key) =>
+			run.value?.metrics ? key in run.value.metrics : false,
+		);
+
+		return [
+			{
+				prop: 'index',
+				width: 100,
+				label: locale.baseText('evaluation.runDetail.testCase'),
+				sortable: true,
+				formatter: (row: TestCaseExecutionRecord & { index: number }) => `#${row.index}`,
+			},
+			{
+				prop: 'status',
+				label: locale.baseText('evaluation.listRuns.status'),
+			},
+			...inputColumns.value,
+			...getTestCasesColumns(filteredTestCases.value, 'outputs'),
+			...mapToNumericColumns(metricColumns),
+			...mapToNumericColumns(specialColumns),
+		];
+	},
 );

 const metrics = computed(() => run.value?.metrics ?? {});
@@ -156,6 +165,7 @@ onMounted(async () => {
 				}}
 			</N8nText>
 		</n8n-callout>
+
 		<el-scrollbar always :class="$style.scrollableSummary" class="mb-m">
 			<div style="display: flex">
 				<div :class="$style.summaryCard">
@@ -215,6 +225,23 @@ onMounted(async () => {
 				</div>
 			</div>
 		</el-scrollbar>
+
+		<n8n-callout
+			v-if="
+				!isLoading &&
+				!inputColumns.length &&
+				run?.status === 'completed' &&
+				run?.finalResult === 'success'
+			"
+			theme="secondary"
+			icon="info"
+			class="mb-s"
+		>
+			<N8nText size="small" :class="$style.capitalized">
+				{{ locale.baseText('evaluation.runDetail.notice.useSetInputs') }}
+			</N8nText>
+		</n8n-callout>
+
 		<div v-if="isLoading" :class="$style.loading">
 			<n8n-loading :loading="true" :rows="5" />
 		</div>
--- a/packages/frontend/editor-ui/src/views/Evaluations.ee/utils.test.ts
+++ b/packages/frontend/editor-ui/src/views/Evaluations.ee/utils.test.ts
@@ -0,0 +1,421 @@
+import { describe, it, expect } from 'vitest';
+import { getTestCasesColumns } from './utils';
+import type { TestCaseExecutionRecord } from '../../api/evaluation.ee';
+import { mock } from 'vitest-mock-extended';
+
+describe('utils', () => {
+	describe('getTestCasesColumns', () => {
+		const mockTestCases: TestCaseExecutionRecord[] = [
+			mock<TestCaseExecutionRecord>({
+				id: 'test-case-1',
+				testRunId: 'test-run-1',
+				executionId: 'execution-1',
+				status: 'completed',
+				createdAt: '2023-10-01T10:00:00Z',
+				updatedAt: '2023-10-01T10:00:00Z',
+				runAt: '2023-10-01T10:00:00Z',
+				inputs: {
+					query: 'test query',
+					limit: 10,
+					category: 'test',
+				},
+				outputs: {
+					result: 'success',
+					count: 5,
+				},
+				metrics: {
+					accuracy: 0.95,
+				},
+			}),
+			mock<TestCaseExecutionRecord>({
+				id: 'test-case-2',
+				testRunId: 'test-run-1',
+				executionId: 'execution-2',
+				status: 'completed',
+				createdAt: '2023-10-01T10:01:00Z',
+				updatedAt: '2023-10-01T10:01:00Z',
+				runAt: '2023-10-01T10:01:00Z',
+				inputs: {
+					query: 'another query',
+					limit: 20,
+					filter: 'active',
+				},
+				outputs: {
+					result: 'success',
+					data: { items: [] },
+				},
+				metrics: {
+					accuracy: 0.88,
+				},
+			}),
+			mock<TestCaseExecutionRecord>({
+				id: 'test-case-3',
+				testRunId: 'test-run-1',
+				executionId: 'execution-3',
+				status: 'error',
+				createdAt: '2023-10-01T10:02:00Z',
+				updatedAt: '2023-10-01T10:02:00Z',
+				runAt: '2023-10-01T10:02:00Z',
+				inputs: {
+					query: 'error query',
+					timeout: 5000,
+				},
+				outputs: {
+					error: 'timeout occurred',
+				},
+				metrics: {
+					accuracy: 0.0,
+				},
+			}),
+		];
+
+		it('should extract input columns from test cases', () => {
+			const columns = getTestCasesColumns(mockTestCases, 'inputs');
+
+			expect(columns).toHaveLength(5);
+
+			const columnProps = columns.map((col) => col.prop);
+			expect(columnProps).toContain('inputs.query');
+			expect(columnProps).toContain('inputs.limit');
+			expect(columnProps).toContain('inputs.category');
+			expect(columnProps).toContain('inputs.filter');
+			expect(columnProps).toContain('inputs.timeout');
+		});
+
+		it('should extract output columns from test cases', () => {
+			const columns = getTestCasesColumns(mockTestCases, 'outputs');
+
+			expect(columns).toHaveLength(4);
+
+			const columnProps = columns.map((col) => col.prop);
+			expect(columnProps).toContain('outputs.result');
+			expect(columnProps).toContain('outputs.count');
+			expect(columnProps).toContain('outputs.data');
+			expect(columnProps).toContain('outputs.error');
+		});
+
+		it('should return columns with correct structure', () => {
+			const columns = getTestCasesColumns(mockTestCases, 'inputs');
+			const firstColumn = columns[0];
+
+			expect(firstColumn).toHaveProperty('prop');
+			expect(firstColumn).toHaveProperty('label');
+			expect(firstColumn).toHaveProperty('sortable', true);
+			expect(firstColumn).toHaveProperty('filter', true);
+			expect(firstColumn).toHaveProperty('showHeaderTooltip', true);
+		});
+
+		it('should set correct label for columns', () => {
+			const columns = getTestCasesColumns(mockTestCases, 'inputs');
+			const queryColumn = columns.find((col) => col.prop === 'inputs.query');
+
+			expect(queryColumn?.label).toBe('query');
+		});
+
+		it('should handle empty test cases array', () => {
+			const columns = getTestCasesColumns([], 'inputs');
+
+			expect(columns).toHaveLength(0);
+		});
+
+		it('should handle test cases with no inputs', () => {
+			const testCasesWithoutInputs: TestCaseExecutionRecord[] = [
+				mock<TestCaseExecutionRecord>({
+					id: 'test-case-1',
+					testRunId: 'test-run-1',
+					executionId: 'execution-1',
+					status: 'completed',
+					createdAt: '2023-10-01T10:00:00Z',
+					updatedAt: '2023-10-01T10:00:00Z',
+					runAt: '2023-10-01T10:00:00Z',
+					inputs: {},
+					outputs: {
+						result: 'success',
+					},
+					metrics: {
+						accuracy: 0.95,
+					},
+				}),
+			];
+
+			const columns = getTestCasesColumns(testCasesWithoutInputs, 'inputs');
+
+			expect(columns).toHaveLength(0);
+		});
+
+		it('should handle test cases with no outputs', () => {
+			const testCasesWithoutOutputs: TestCaseExecutionRecord[] = [
+				mock<TestCaseExecutionRecord>({
+					id: 'test-case-1',
+					testRunId: 'test-run-1',
+					executionId: 'execution-1',
+					status: 'completed',
+					createdAt: '2023-10-01T10:00:00Z',
+					updatedAt: '2023-10-01T10:00:00Z',
+					runAt: '2023-10-01T10:00:00Z',
+					inputs: {
+						query: 'test',
+					},
+					outputs: {},
+					metrics: {
+						accuracy: 0.95,
+					},
+				}),
+			];
+
+			const columns = getTestCasesColumns(testCasesWithoutOutputs, 'outputs');
+
+			expect(columns).toHaveLength(0);
+		});
+
+		it('should handle test cases with undefined inputs', () => {
+			const testCasesWithUndefinedInputs: TestCaseExecutionRecord[] = [
+				{
+					id: 'test-case-1',
+					testRunId: 'test-run-1',
+					executionId: 'execution-1',
+					status: 'completed',
+					createdAt: '2023-10-01T10:00:00Z',
+					updatedAt: '2023-10-01T10:00:00Z',
+					runAt: '2023-10-01T10:00:00Z',
+					outputs: {
+						result: 'success',
+					},
+					metrics: {
+						accuracy: 0.95,
+					},
+				},
+			];
+
+			const columns = getTestCasesColumns(testCasesWithUndefinedInputs, 'inputs');
+
+			expect(columns).toHaveLength(0);
+		});
+
+		it('should handle test cases with undefined outputs', () => {
+			const testCasesWithUndefinedOutputs: TestCaseExecutionRecord[] = [
+				{
+					id: 'test-case-1',
+					testRunId: 'test-run-1',
+					executionId: 'execution-1',
+					status: 'completed',
+					createdAt: '2023-10-01T10:00:00Z',
+					updatedAt: '2023-10-01T10:00:00Z',
+					runAt: '2023-10-01T10:00:00Z',
+					inputs: {
+						query: 'test',
+					},
+					metrics: {
+						accuracy: 0.95,
+					},
+				},
+			];
+
+			const columns = getTestCasesColumns(testCasesWithUndefinedOutputs, 'outputs');
+
+			expect(columns).toHaveLength(0);
+		});
+
+		it('should handle mixed test cases with some having empty inputs/outputs', () => {
+			const mixedTestCases: TestCaseExecutionRecord[] = [
+				mock<TestCaseExecutionRecord>({
+					id: 'test-case-1',
+					testRunId: 'test-run-1',
+					executionId: 'execution-1',
+					status: 'completed',
+					createdAt: '2023-10-01T10:00:00Z',
+					updatedAt: '2023-10-01T10:00:00Z',
+					runAt: '2023-10-01T10:00:00Z',
+					inputs: {
+						query: 'test query',
+						limit: 10,
+					},
+					outputs: {
+						result: 'success',
+					},
+					metrics: {
+						accuracy: 0.95,
+					},
+				}),
+				mock<TestCaseExecutionRecord>({
+					id: 'test-case-2',
+					testRunId: 'test-run-1',
+					executionId: 'execution-2',
+					status: 'completed',
+					createdAt: '2023-10-01T10:01:00Z',
+					updatedAt: '2023-10-01T10:01:00Z',
+					runAt: '2023-10-01T10:01:00Z',
+					inputs: {},
+					outputs: {
+						result: 'success',
+						count: 5,
+					},
+					metrics: {
+						accuracy: 0.88,
+					},
+				}),
+				mock<TestCaseExecutionRecord>({
+					id: 'test-case-3',
+					testRunId: 'test-run-1',
+					executionId: 'execution-3',
+					status: 'completed',
+					createdAt: '2023-10-01T10:02:00Z',
+					updatedAt: '2023-10-01T10:02:00Z',
+					runAt: '2023-10-01T10:02:00Z',
+					inputs: {
+						filter: 'active',
+					},
+					outputs: {},
+					metrics: {
+						accuracy: 0.92,
+					},
+				}),
+			];
+
+			const inputColumns = getTestCasesColumns(mixedTestCases, 'inputs');
+			const outputColumns = getTestCasesColumns(mixedTestCases, 'outputs');
+
+			expect(inputColumns).toHaveLength(3);
+			expect(outputColumns).toHaveLength(2);
+
+			const inputProps = inputColumns.map((col) => col.prop);
+			expect(inputProps).toContain('inputs.query');
+			expect(inputProps).toContain('inputs.limit');
+			expect(inputProps).toContain('inputs.filter');
+
+			const outputProps = outputColumns.map((col) => col.prop);
+			expect(outputProps).toContain('outputs.result');
+			expect(outputProps).toContain('outputs.count');
+		});
+
+		it('should remove duplicate columns from multiple test cases', () => {
+			const testCasesWithDuplicates: TestCaseExecutionRecord[] = [
+				mock<TestCaseExecutionRecord>({
+					id: 'test-case-1',
+					testRunId: 'test-run-1',
+					executionId: 'execution-1',
+					status: 'completed',
+					createdAt: '2023-10-01T10:00:00Z',
+					updatedAt: '2023-10-01T10:00:00Z',
+					runAt: '2023-10-01T10:00:00Z',
+					inputs: {
+						query: 'test query 1',
+						limit: 10,
+					},
+					outputs: {
+						result: 'success',
+					},
+					metrics: {
+						accuracy: 0.95,
+					},
+				}),
+				mock<TestCaseExecutionRecord>({
+					id: 'test-case-2',
+					testRunId: 'test-run-1',
+					executionId: 'execution-2',
+					status: 'completed',
+					createdAt: '2023-10-01T10:01:00Z',
+					updatedAt: '2023-10-01T10:01:00Z',
+					runAt: '2023-10-01T10:01:00Z',
+					inputs: {
+						query: 'test query 2',
+						limit: 20,
+					},
+					outputs: {
+						result: 'success',
+					},
+					metrics: {
+						accuracy: 0.88,
+					},
+				}),
+			];
+
+			const inputColumns = getTestCasesColumns(testCasesWithDuplicates, 'inputs');
+			const outputColumns = getTestCasesColumns(testCasesWithDuplicates, 'outputs');
+
+			expect(inputColumns).toHaveLength(2);
+			expect(outputColumns).toHaveLength(1);
+		});
+
+		it('should handle complex nested object keys', () => {
+			const testCasesWithComplexKeys: TestCaseExecutionRecord[] = [
+				mock<TestCaseExecutionRecord>({
+					id: 'test-case-1',
+					testRunId: 'test-run-1',
+					executionId: 'execution-1',
+					status: 'completed',
+					createdAt: '2023-10-01T10:00:00Z',
+					updatedAt: '2023-10-01T10:00:00Z',
+					runAt: '2023-10-01T10:00:00Z',
+					inputs: {
+						'user.name': 'John Doe',
+						'user.email': 'john@example.com',
+						'config.timeout': 5000,
+						'config.retries': 3,
+					},
+					outputs: {
+						'response.status': 200,
+						'response.data': { success: true },
+					},
+					metrics: {
+						accuracy: 0.95,
+					},
+				}),
+			];
+
+			const inputColumns = getTestCasesColumns(testCasesWithComplexKeys, 'inputs');
+			const outputColumns = getTestCasesColumns(testCasesWithComplexKeys, 'outputs');
+
+			expect(inputColumns).toHaveLength(4);
+			expect(outputColumns).toHaveLength(2);
+
+			const inputLabels = inputColumns.map((col) => col.label);
+			expect(inputLabels).toContain('user.name');
+			expect(inputLabels).toContain('user.email');
+			expect(inputLabels).toContain('config.timeout');
+			expect(inputLabels).toContain('config.retries');
+
+			const outputLabels = outputColumns.map((col) => col.label);
+			expect(outputLabels).toContain('response.status');
+			expect(outputLabels).toContain('response.data');
+		});
+
+		it('should maintain consistent column order across multiple calls', () => {
+			const columns1 = getTestCasesColumns(mockTestCases, 'inputs');
+			const columns2 = getTestCasesColumns(mockTestCases, 'inputs');
+
+			expect(columns1.map((col) => col.prop)).toEqual(columns2.map((col) => col.prop));
+		});
+
+		it('should handle single test case', () => {
+			const singleTestCase: TestCaseExecutionRecord[] = [
+				mock<TestCaseExecutionRecord>({
+					id: 'test-case-1',
+					testRunId: 'test-run-1',
+					executionId: 'execution-1',
+					status: 'completed',
+					createdAt: '2023-10-01T10:00:00Z',
+					updatedAt: '2023-10-01T10:00:00Z',
+					runAt: '2023-10-01T10:00:00Z',
+					inputs: {
+						query: 'single test',
+					},
+					outputs: {
+						result: 'success',
+					},
+					metrics: {
+						accuracy: 0.95,
+					},
+				}),
+			];
+
+			const inputColumns = getTestCasesColumns(singleTestCase, 'inputs');
+			const outputColumns = getTestCasesColumns(singleTestCase, 'outputs');
+
+			expect(inputColumns).toHaveLength(1);
+			expect(outputColumns).toHaveLength(1);
+			expect(inputColumns[0].prop).toBe('inputs.query');
+			expect(outputColumns[0].prop).toBe('outputs.result');
+		});
+	});
+});
--- a/packages/frontend/editor-ui/src/views/Evaluations.ee/utils.ts
+++ b/packages/frontend/editor-ui/src/views/Evaluations.ee/utils.ts
@@ -0,0 +1,42 @@
+import type { TestTableColumn } from '@/components/Evaluations.ee/shared/TestTableBase.vue';
+import type { TestCaseExecutionRecord } from '../../api/evaluation.ee';
+
+export function getTestCasesColumns(
+	cases: TestCaseExecutionRecord[],
+	columnType: 'inputs' | 'outputs',
+): Array<TestTableColumn<TestCaseExecutionRecord & { index: number }>> {
+	const inputColumnNames = cases.reduce(
+		(set, testCase) => {
+			Object.keys(testCase[columnType] ?? {}).forEach((key) => set.add(key));
+			return set;
+		},
+		new Set([] as string[]),
+	);
+
+	return Array.from(inputColumnNames.keys()).map((column) => ({
+		prop: `${columnType}.${column}`,
+		label: column,
+		sortable: true,
+		filter: true,
+		showHeaderTooltip: true,
+		formatter: (row: TestCaseExecutionRecord) => {
+			const value = row[columnType]?.[column];
+			if (typeof value === 'object' && value !== null) {
+				return JSON.stringify(value, null, 2);
+			}
+
+			return `${value}`;
+		},
+	}));
+}
+
+export function mapToNumericColumns(columnNames: string[]) {
+	return columnNames.map((metric) => ({
+		prop: `metrics.${metric}`,
+		label: metric,
+		sortable: true,
+		filter: true,
+		showHeaderTooltip: true,
+		formatter: (row: TestCaseExecutionRecord) => row.metrics?.[metric]?.toFixed(2) ?? '-',
+	}));
+}
--- a/packages/nodes-base/nodes/Evaluation/Evaluation/Description.node.ts
+++ b/packages/nodes-base/nodes/Evaluation/Evaluation/Description.node.ts
@@ -8,6 +8,58 @@ import {
 } from './CannedMetricPrompts.ee';
 import { document, sheet } from '../../Google/Sheet/GoogleSheetsTrigger.node';

+export const setInputsProperties: INodeProperties[] = [
+	{
+		displayName:
+			'For adding columns from your dataset to the evaluation results. Anything you add here will be displayed in the ‘evaluations’ tab, not the Google Sheet.',
+		name: 'setInputsNotice',
+		type: 'notice',
+		default: '',
+		displayOptions: {
+			show: {
+				operation: ['setInputs'],
+			},
+		},
+	},
+	{
+		displayName: 'Inputs',
+		name: 'inputs',
+		placeholder: 'Add Input',
+		type: 'fixedCollection',
+		typeOptions: {
+			multipleValueButtonText: 'Add Input',
+			multipleValues: true,
+		},
+		default: {},
+		options: [
+			{
+				displayName: 'Filter',
+				name: 'values',
+				values: [
+					{
+						displayName: 'Name',
+						name: 'inputName',
+						type: 'string',
+						default: '',
+						requiresDataPath: 'single',
+					},
+					{
+						displayName: 'Value',
+						name: 'inputValue',
+						type: 'string',
+						default: '',
+					},
+				],
+			},
+		],
+		displayOptions: {
+			show: {
+				operation: ['setInputs'],
+			},
+		},
+	},
+];
+
 export const setOutputProperties: INodeProperties[] = [
 	{
 		displayName: 'Credentials',
@@ -53,6 +105,7 @@ export const setOutputProperties: INodeProperties[] = [
 						name: 'outputName',
 						type: 'string',
 						default: '',
+						requiresDataPath: 'single',
 					},
 					{
 						displayName: 'Value',
--- a/packages/nodes-base/nodes/Evaluation/Evaluation/Evaluation.node.ee.ts
+++ b/packages/nodes-base/nodes/Evaluation/Evaluation/Evaluation.node.ee.ts
@@ -8,6 +8,7 @@ import type {

 import {
 	setCheckIfEvaluatingProperties,
+	setInputsProperties,
 	setMetricsProperties,
 	setOutputProperties,
 } from './Description.node';
@@ -16,9 +17,10 @@ import { listSearch, loadOptions, credentialTest } from '../methods';
 import {
 	checkIfEvaluating,
 	setMetrics,
-	setInputs,
+	getInputConnectionTypes,
+	getOutputConnectionTypes,
 	setOutputs,
-	setOutput,
+	setInputs,
 } from '../utils/evaluationUtils';

 export class Evaluation implements INodeType {
@@ -35,8 +37,8 @@ export class Evaluation implements INodeType {
 			name: 'Evaluation',
 			color: '#c3c9d5',
 		},
-		inputs: `={{(${setInputs})($parameter)}}`,
-		outputs: `={{(${setOutputs})($parameter)}}`,
+		inputs: `={{(${getInputConnectionTypes})($parameter)}}`,
+		outputs: `={{(${getOutputConnectionTypes})($parameter)}}`,
 		codex: {
 			alias: ['Test', 'Metrics', 'Evals', 'Set Output', 'Set Metrics'],
 		},
@@ -70,6 +72,10 @@ export class Evaluation implements INodeType {
 				type: 'options',
 				noDataExpression: true,
 				options: [
+					{
+						name: 'Set Inputs',
+						value: 'setInputs',
+					},
 					{
 						name: 'Set Outputs',
 						value: 'setOutputs',
@@ -86,6 +92,7 @@ export class Evaluation implements INodeType {
 				default: 'setOutputs',
 			},
 			authentication,
+			...setInputsProperties,
 			...setOutputProperties,
 			...setMetricsProperties,
 			...setCheckIfEvaluatingProperties,
@@ -98,12 +105,15 @@ export class Evaluation implements INodeType {
 		const operation = this.getNodeParameter('operation', 0);

 		if (operation === 'setOutputs') {
-			return await setOutput.call(this);
+			return await setOutputs.call(this);
+		} else if (operation === 'setInputs') {
+			return setInputs.call(this);
 		} else if (operation === 'setMetrics') {
 			return await setMetrics.call(this);
-		} else {
-			// operation === 'checkIfEvaluating'
+		} else if (operation === 'checkIfEvaluating') {
 			return await checkIfEvaluating.call(this);
 		}
+
+		throw new Error('Unsupported Operation');
 	}
 }
--- a/packages/nodes-base/nodes/Evaluation/test/evaluationUtils.test.ts
+++ b/packages/nodes-base/nodes/Evaluation/test/evaluationUtils.test.ts
@@ -0,0 +1,216 @@
+import type { IExecuteFunctions } from 'n8n-workflow';
+import { UserError } from 'n8n-workflow';
+
+import { setInputs, setOutputs } from '../utils/evaluationUtils';
+
+jest.mock('../utils/evaluationTriggerUtils', () => ({
+	getGoogleSheet: jest.fn(),
+	getSheet: jest.fn(),
+}));
+
+import { getGoogleSheet, getSheet } from '../utils/evaluationTriggerUtils';
+
+import { mockDeep } from 'jest-mock-extended';
+
+describe('setInputs', () => {
+	const mockThis = (options: Partial<any> = {}) =>
+		mockDeep<IExecuteFunctions>({
+			getNode: jest.fn().mockReturnValue({ name: 'EvalNode' }),
+			getParentNodes: jest
+				.fn()
+				.mockReturnValue([{ name: 'EvalTrigger', type: 'n8n-nodes-base.evaluationTrigger' }]),
+			evaluateExpression: jest.fn().mockReturnValue(true),
+			getNodeParameter: jest.fn().mockReturnValue([
+				{ inputName: 'foo', inputValue: 'bar' },
+				{ inputName: 'baz', inputValue: 'qux' },
+			]),
+			getInputData: jest.fn().mockReturnValue([{ json: { test: 1 } }]),
+			addExecutionHints: jest.fn(),
+			getMode: jest.fn().mockReturnValue('evaluation'),
+			...options,
+		});
+
+	it('should return input data with evaluationData when inputs are provided', () => {
+		const context = mockThis();
+		const result = setInputs.call(context);
+		expect(result).toHaveLength(1);
+		expect(result[0][0].evaluationData).toEqual({ foo: 'bar', baz: 'qux' });
+	});
+
+	it('should throw UserError if no input fields are provided', () => {
+		const context = mockThis({
+			getNodeParameter: jest.fn().mockReturnValue([]),
+		});
+		expect(() => setInputs.call(context)).toThrow(UserError);
+	});
+
+	it('should add execution hints and return input data if not started from evaluation trigger', () => {
+		const context = mockThis({
+			getParentNodes: jest.fn().mockReturnValue([]),
+			getInputData: jest.fn().mockReturnValue([{ json: { test: 2 } }]),
+		});
+		const result = setInputs.call(context);
+		expect(context.addExecutionHints).toHaveBeenCalledWith(
+			expect.objectContaining({
+				message: expect.stringContaining('No inputs were set'),
+			}),
+		);
+		expect(result).toEqual([[{ json: { test: 2 } }]]);
+	});
+
+	it('should add execution hints and return input data if evalTriggerOutput is falsy', () => {
+		const context = mockThis({
+			evaluateExpression: jest.fn().mockReturnValue(undefined),
+			getInputData: jest.fn().mockReturnValue([{ json: { test: 3 } }]),
+		});
+		const result = setInputs.call(context);
+		expect(context.addExecutionHints).toHaveBeenCalled();
+		expect(result).toEqual([[{ json: { test: 3 } }]]);
+	});
+});
+
+describe('setOutputs', () => {
+	const mockGoogleSheetInstance = {
+		updateRows: jest.fn(),
+		prepareDataForUpdatingByRowNumber: jest.fn().mockReturnValue({
+			updateData: [{ range: 'Sheet1!A2:C2', values: [['foo', 'bar']] }],
+		}),
+		batchUpdate: jest.fn(),
+	};
+
+	const mockSheet = {
+		title: 'Sheet1',
+	};
+
+	const mockThis = (options: Partial<any> = {}) =>
+		mockDeep<IExecuteFunctions>({
+			getNode: jest.fn().mockReturnValue({ name: 'EvalNode' }),
+			getParentNodes: jest
+				.fn()
+				.mockReturnValue([{ name: 'EvalTrigger', type: 'n8n-nodes-base.evaluationTrigger' }]),
+			evaluateExpression: jest.fn().mockImplementation((expr) => {
+				if (expr.includes('isExecuted')) return true;
+				if (expr.includes('first().json')) return { row_number: 2, inputField: 'inputValue' };
+				return true;
+			}),
+			getNodeParameter: jest.fn().mockReturnValue([
+				{ outputName: 'result', outputValue: 'success' },
+				{ outputName: 'score', outputValue: '95' },
+			]),
+			getInputData: jest.fn().mockReturnValue([{ json: { test: 1 } }]),
+			addExecutionHints: jest.fn(),
+			getMode: jest.fn().mockReturnValue('evaluation'),
+			...options,
+		});
+
+	beforeEach(() => {
+		jest.clearAllMocks();
+		(getGoogleSheet as jest.Mock).mockReturnValue(mockGoogleSheetInstance);
+		(getSheet as jest.Mock).mockResolvedValue(mockSheet);
+	});
+
+	it('should set outputs to Google Sheet and return evaluation data', async () => {
+		const context = mockThis();
+		const result = await setOutputs.call(context);
+
+		expect(getGoogleSheet).toHaveBeenCalled();
+		expect(getSheet).toHaveBeenCalledWith(mockGoogleSheetInstance);
+		expect(mockGoogleSheetInstance.updateRows).toHaveBeenCalledWith(
+			'Sheet1',
+			[['inputField', 'result', 'score']],
+			'RAW',
+			1,
+		);
+		expect(mockGoogleSheetInstance.prepareDataForUpdatingByRowNumber).toHaveBeenCalledWith(
+			[{ row_number: 2, result: 'success', score: '95' }],
+			'Sheet1!A:Z',
+			[['inputField', 'result', 'score']],
+		);
+		expect(mockGoogleSheetInstance.batchUpdate).toHaveBeenCalledWith(
+			[{ range: 'Sheet1!A2:C2', values: [['foo', 'bar']] }],
+			'RAW',
+		);
+		expect(result).toHaveLength(1);
+		expect(result[0][0].evaluationData).toEqual({ result: 'success', score: '95' });
+	});
+
+	it('should throw UserError if no output fields are provided', async () => {
+		const context = mockThis({
+			getNodeParameter: jest.fn().mockReturnValue([]),
+		});
+		await expect(setOutputs.call(context)).rejects.toThrow(UserError);
+		await expect(setOutputs.call(context)).rejects.toThrow('No outputs to set');
+	});
+
+	it('should add execution hints and return input data if not started from evaluation trigger', async () => {
+		const context = mockThis({
+			getParentNodes: jest.fn().mockReturnValue([]),
+			getInputData: jest.fn().mockReturnValue([{ json: { test: 2 } }]),
+		});
+		const result = await setOutputs.call(context);
+
+		expect(context.addExecutionHints).toHaveBeenCalledWith(
+			expect.objectContaining({
+				message: expect.stringContaining('No outputs were set'),
+			}),
+		);
+		expect(result).toEqual([[{ json: { test: 2 } }]]);
+		expect(getGoogleSheet).not.toHaveBeenCalled();
+	});
+
+	it('should add execution hints and return input data if evalTriggerOutput is falsy', async () => {
+		const context = mockThis({
+			evaluateExpression: jest.fn().mockImplementation((expr) => {
+				if (expr.includes('isExecuted')) return false;
+				return true;
+			}),
+			getInputData: jest.fn().mockReturnValue([{ json: { test: 3 } }]),
+		});
+		const result = await setOutputs.call(context);
+
+		expect(context.addExecutionHints).toHaveBeenCalled();
+		expect(result).toEqual([[{ json: { test: 3 } }]]);
+		expect(getGoogleSheet).not.toHaveBeenCalled();
+	});
+
+	it('should handle row_number as string "row_number" by using 1', async () => {
+		const context = mockThis({
+			evaluateExpression: jest.fn().mockImplementation((expr) => {
+				if (expr.includes('isExecuted')) return true;
+				if (expr.includes('first().json'))
+					return { row_number: 'row_number', inputField: 'inputValue' };
+				return true;
+			}),
+		});
+		const result = await setOutputs.call(context);
+
+		expect(mockGoogleSheetInstance.prepareDataForUpdatingByRowNumber).toHaveBeenCalledWith(
+			[{ row_number: 1, result: 'success', score: '95' }],
+			'Sheet1!A:Z',
+			[['inputField', 'result', 'score']],
+		);
+		expect(result).toHaveLength(1);
+	});
+
+	it('should add new column names that are not in existing columns', async () => {
+		const context = mockThis({
+			evaluateExpression: jest.fn().mockImplementation((expr) => {
+				if (expr.includes('isExecuted')) return true;
+				if (expr.includes('first().json')) return { row_number: 2, existingCol: 'value' };
+				return true;
+			}),
+			getNodeParameter: jest
+				.fn()
+				.mockReturnValue([{ outputName: 'newCol', outputValue: 'newValue' }]),
+		});
+		const result = await setOutputs.call(context);
+
+		expect(mockGoogleSheetInstance.updateRows).toHaveBeenCalledWith(
+			'Sheet1',
+			[['existingCol', 'newCol']],
+			'RAW',
+			1,
+		);
+		expect(result).toHaveLength(1);
+	});
+});
--- a/packages/nodes-base/nodes/Evaluation/utils/evaluationUtils.ts
+++ b/packages/nodes-base/nodes/Evaluation/utils/evaluationUtils.ts
@@ -1,25 +1,61 @@
-import { UserError, NodeOperationError } from 'n8n-workflow';
+import { UserError, NodeOperationError, EVALUATION_TRIGGER_NODE_TYPE } from 'n8n-workflow';
 import type {
 	INodeParameters,
 	IDataObject,
 	IExecuteFunctions,
 	INodeExecutionData,
+	JsonObject,
+	JsonValue,
 } from 'n8n-workflow';

-import { metricHandlers } from './metricHandlers';
 import { getGoogleSheet, getSheet } from './evaluationTriggerUtils';
+import { metricHandlers } from './metricHandlers';
 import { composeReturnItem } from '../../Set/v2/helpers/utils';
+import assert from 'node:assert';

-export async function setOutput(this: IExecuteFunctions): Promise<INodeExecutionData[][]> {
+function withEvaluationData(this: IExecuteFunctions, data: JsonObject): INodeExecutionData[] {
+	const inputData = this.getInputData();
+	if (!inputData.length) {
+		return inputData;
+	}
+
+	const isEvaluationMode = this.getMode() === 'evaluation';
+	return [
+		{
+			...inputData[0],
+			// test-runner only looks at first item. Don't need to duplicate the data for each item
+			evaluationData: isEvaluationMode ? data : undefined,
+		},
+		...inputData.slice(1),
+	];
+}
+
+function isOutputsArray(
+	value: unknown,
+): value is Array<{ outputName: string; outputValue: JsonValue }> {
+	return (
+		Array.isArray(value) &&
+		value.every(
+			(item) =>
+				typeof item === 'object' &&
+				item !== null &&
+				'outputName' in item &&
+				'outputValue' in item &&
+				typeof item.outputName === 'string',
+		)
+	);
+}
+
+export async function setOutputs(this: IExecuteFunctions): Promise<INodeExecutionData[][]> {
 	const evaluationNode = this.getNode();
 	const parentNodes = this.getParentNodes(evaluationNode.name);

-	const evalTrigger = parentNodes.find((node) => node.type === 'n8n-nodes-base.evaluationTrigger');
-	const evalTriggerOutput = evalTrigger
+	const evalTrigger = parentNodes.find((node) => node.type === EVALUATION_TRIGGER_NODE_TYPE);
+	const isEvalTriggerExecuted = evalTrigger
 		? this.evaluateExpression(`{{ $('${evalTrigger?.name}').isExecuted }}`, 0)
-		: undefined;
+		: false;

-	if (!evalTrigger || !evalTriggerOutput) {
+	if (!evalTrigger || !isEvalTriggerExecuted) {
 		this.addExecutionHints({
 			message: "No outputs were set since the execution didn't start from an evaluation trigger",
 			location: 'outputPane',
@@ -27,10 +63,11 @@ export async function setOutput(this: IExecuteFunctions): Promise<INodeExecution
 		return [this.getInputData()];
 	}

-	const outputFields = this.getNodeParameter('outputs.values', 0, []) as Array<{
-		outputName: string;
-		outputValue: string;
-	}>;
+	const outputFields = this.getNodeParameter('outputs.values', 0, []);
+	assert(
+		isOutputsArray(outputFields),
+		'Invalid output fields format. Expected an array of objects with outputName and outputValue properties.',
+	);

 	if (outputFields.length === 0) {
 		throw new UserError('No outputs to set', {
@@ -66,10 +103,10 @@ export async function setOutput(this: IExecuteFunctions): Promise<INodeExecution
 		1, // header row
 	);

-	const outputs = outputFields.reduce((acc, { outputName, outputValue }) => {
+	const outputs = outputFields.reduce<JsonObject>((acc, { outputName, outputValue }) => {
 		acc[outputName] = outputValue;
 		return acc;
-	}, {} as IDataObject);
+	}, {});

 	const preparedData = googleSheetInstance.prepareDataForUpdatingByRowNumber(
 		[
@@ -87,7 +124,60 @@ export async function setOutput(this: IExecuteFunctions): Promise<INodeExecution
 		'RAW', // default value for Value Input Mode
 	);

-	return [this.getInputData()];
+	return [withEvaluationData.call(this, outputs)];
+}
+
+function isInputsArray(
+	value: unknown,
+): value is Array<{ inputName: string; inputValue: JsonValue }> {
+	return (
+		Array.isArray(value) &&
+		value.every(
+			(item) =>
+				typeof item === 'object' &&
+				item !== null &&
+				'inputName' in item &&
+				'inputValue' in item &&
+				typeof item.inputName === 'string',
+		)
+	);
+}
+
+export function setInputs(this: IExecuteFunctions): INodeExecutionData[][] {
+	const evaluationNode = this.getNode();
+	const parentNodes = this.getParentNodes(evaluationNode.name);
+
+	const evalTrigger = parentNodes.find((node) => node.type === 'n8n-nodes-base.evaluationTrigger');
+	const isEvalTriggerExecuted = evalTrigger
+		? this.evaluateExpression(`{{ $('${evalTrigger?.name}').isExecuted }}`, 0)
+		: false;
+
+	if (!evalTrigger || !isEvalTriggerExecuted) {
+		this.addExecutionHints({
+			message: "No inputs were set since the execution didn't start from an evaluation trigger",
+			location: 'outputPane',
+		});
+		return [this.getInputData()];
+	}
+
+	const inputFields = this.getNodeParameter('inputs.values', 0, []);
+	assert(
+		isInputsArray(inputFields),
+		'Invalid input fields format. Expected an array of objects with inputName and inputValue properties.',
+	);
+
+	if (inputFields.length === 0) {
+		throw new UserError('No inputs to set', {
+			description: 'Add inputs using the ‘Add Input’ button',
+		});
+	}
+
+	const inputs = inputFields.reduce<JsonObject>((acc, { inputName, inputValue }) => {
+		acc[inputName] = inputValue;
+		return acc;
+	}, {});
+
+	return [withEvaluationData.call(this, inputs)];
 }

 export async function setMetrics(this: IExecuteFunctions): Promise<INodeExecutionData[][]> {
@@ -128,18 +218,18 @@ export async function checkIfEvaluating(this: IExecuteFunctions): Promise<INodeE
 	const parentNodes = this.getParentNodes(evaluationNode.name);

 	const evalTrigger = parentNodes.find((node) => node.type === 'n8n-nodes-base.evaluationTrigger');
-	const evalTriggerOutput = evalTrigger
+	const isEvalTriggerExecuted = evalTrigger
 		? this.evaluateExpression(`{{ $('${evalTrigger?.name}').isExecuted }}`, 0)
-		: undefined;
+		: false;

-	if (evalTriggerOutput) {
+	if (isEvalTriggerExecuted) {
 		return [this.getInputData(), normalExecutionResult];
 	} else {
 		return [evaluationExecutionResult, this.getInputData()];
 	}
 }

-export function setOutputs(parameters: INodeParameters) {
+export function getOutputConnectionTypes(parameters: INodeParameters) {
 	if (parameters.operation === 'checkIfEvaluating') {
 		return [
 			{ type: 'main', displayName: 'Evaluation' },
@@ -150,7 +240,7 @@ export function setOutputs(parameters: INodeParameters) {
 	return [{ type: 'main' }];
 }

-export function setInputs(parameters: INodeParameters) {
+export function getInputConnectionTypes(parameters: INodeParameters) {
 	if (
 		parameters.operation === 'setMetrics' &&
 		['correctness', 'helpfulness'].includes(parameters.metric as string)
--- a/packages/workflow/src/interfaces.ts
+++ b/packages/workflow/src/interfaces.ts
@@ -1214,6 +1214,7 @@ export interface INodeExecutionData {
 	metadata?: {
 		subExecution: RelatedExecution;
 	};
+	evaluationData?: Record<string, GenericValue>;
 	/**
 	 * Use this key to send a message to the chat.
 	 *