feat: Track inputs and outputs in Evaluations (#17404)

2025-12-18 02:21:13 +00:00 · 2025-07-24 18:58:37 +02:00
parent 3f2e43e919
commit c18fabb419
20 changed files with 1431 additions and 60 deletions
--- a/packages/frontend/editor-ui/src/api/evaluation.ee.ts
+++ b/packages/frontend/editor-ui/src/api/evaluation.ee.ts
@@ -1,5 +1,6 @@
 import type { IRestApiContext } from '@n8n/rest-api-client';
 import { makeRestApiRequest, request } from '@n8n/rest-api-client';
+import type { JsonObject } from 'n8n-workflow';

 export interface TestRunRecord {
 	id: string;
@@ -36,6 +37,8 @@ export interface TestCaseExecutionRecord {
 	metrics?: Record<string, number>;
 	errorCode?: string;
 	errorDetails?: Record<string, unknown>;
+	inputs?: JsonObject;
+	outputs?: JsonObject;
 }

 const getTestRunsEndpoint = (workflowId: string, runId?: string) =>
--- a/packages/frontend/editor-ui/src/components/Evaluations.ee/shared/TestTableBase.vue
+++ b/packages/frontend/editor-ui/src/components/Evaluations.ee/shared/TestTableBase.vue
@@ -173,6 +173,10 @@ defineSlots<{
 	overflow: hidden;
 	text-overflow: ellipsis;
 	border-bottom: 1px solid var(--border-color-light) !important;
+
+	> div {
+		max-height: 100px;
+	}
 }

 .cell {
--- a/packages/frontend/editor-ui/src/views/Evaluations.ee/TestRunDetailView.test.ts
+++ b/packages/frontend/editor-ui/src/views/Evaluations.ee/TestRunDetailView.test.ts
@@ -0,0 +1,445 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { createComponentRenderer } from '@/__tests__/render';
+import { createTestingPinia } from '@pinia/testing';
+import { waitFor } from '@testing-library/vue';
+import { useEvaluationStore } from '@/stores/evaluation.store.ee';
+import TestRunDetailView from './TestRunDetailView.vue';
+import type { TestCaseExecutionRecord, TestRunRecord } from '@/api/evaluation.ee';
+import type { IWorkflowDb } from '@/Interface';
+import { mock } from 'vitest-mock-extended';
+
+vi.mock('@/composables/useToast', () => ({
+	useToast: () => ({
+		showError: vi.fn(),
+	}),
+}));
+
+const mockRouter = {
+	currentRoute: {
+		value: {
+			params: {
+				runId: 'test-run-id',
+				name: 'test-workflow-id',
+			},
+		},
+	},
+	back: vi.fn(),
+	resolve: vi.fn(() => ({ href: '/test-execution-url' })),
+};
+
+vi.mock('vue-router', () => {
+	return {
+		RouterLink: vi.fn(),
+		useRoute: () => ({}),
+		useRouter: () => mockRouter,
+	};
+});
+
+const mockTestRun: TestRunRecord = {
+	id: 'test-run-id',
+	workflowId: 'test-workflow-id',
+	status: 'completed',
+	createdAt: '2023-10-01T10:00:00Z',
+	updatedAt: '2023-10-01T10:00:00Z',
+	completedAt: '2023-10-01T10:00:00Z',
+	runAt: '2023-10-01T10:00:00Z',
+	metrics: {
+		accuracy: 0.95,
+		precision: 0.92,
+	},
+	finalResult: 'success',
+};
+
+const mockTestCases = [
+	mock<TestCaseExecutionRecord>({
+		id: 'test-case-1',
+		status: 'completed',
+		runAt: '2023-10-01T10:00:00Z',
+		executionId: 'execution-1',
+		metrics: {
+			accuracy: 0.98,
+			precision: 0.95,
+		},
+		inputs: {
+			input1: 'value1',
+		},
+		outputs: {
+			output1: 'result1',
+		},
+	}),
+	mock<TestCaseExecutionRecord>({
+		id: 'test-case-2',
+		status: 'error',
+		runAt: '2023-10-01T10:01:00Z',
+		executionId: 'execution-2',
+		errorCode: 'INTERRUPTED',
+		metrics: {
+			accuracy: 0.85,
+			precision: 0.88,
+		},
+		inputs: {
+			input1: 'value2',
+		},
+		outputs: {
+			output1: 'result2',
+		},
+	}),
+];
+
+const mockWorkflow = mock<IWorkflowDb>({
+	id: 'test-workflow-id',
+	name: 'Test Workflow',
+	active: true,
+	nodes: [],
+	connections: {},
+	createdAt: '2023-10-01T09:00:00Z',
+	updatedAt: '2023-10-01T09:00:00Z',
+	versionId: 'version-1',
+	tags: [],
+	settings: {},
+	pinData: {},
+	homeProject: { id: 'home-project', name: 'Home' },
+	sharedWithProjects: [],
+	scopes: [],
+	usedCredentials: [],
+	meta: {},
+});
+
+describe('TestRunDetailView', () => {
+	let evaluationStore: ReturnType<typeof useEvaluationStore>;
+
+	const renderComponent = createComponentRenderer(TestRunDetailView, {
+		pinia: createTestingPinia({
+			initialState: {
+				evaluation: {
+					testRunsById: {
+						'test-run-id': mockTestRun,
+					},
+				},
+				workflows: {
+					workflowsById: {
+						'test-workflow-id': mockWorkflow,
+					},
+				},
+			},
+			stubActions: false,
+		}),
+	});
+
+	beforeEach(() => {
+		evaluationStore = useEvaluationStore();
+
+		// Mock store methods
+		vi.mocked(evaluationStore.getTestRun).mockResolvedValue(mockTestRun);
+		vi.spyOn(evaluationStore, 'fetchTestCaseExecutions').mockResolvedValue(mockTestCases);
+
+		vi.clearAllMocks();
+	});
+
+	afterEach(() => {
+		vi.clearAllMocks();
+	});
+
+	it('should render component', () => {
+		const { container } = renderComponent();
+		expect(container).toBeTruthy();
+	});
+
+	it('should fetch test run data on mount', async () => {
+		renderComponent();
+
+		await waitFor(() => {
+			expect(evaluationStore.getTestRun).toHaveBeenCalledWith({
+				workflowId: 'test-workflow-id',
+				runId: 'test-run-id',
+			});
+		});
+	});
+
+	it('should display test run detail view', async () => {
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			expect(container.querySelector('[data-test-id="test-definition-run-detail"]')).toBeTruthy();
+		});
+	});
+
+	it('should display summary cards', async () => {
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			const summaryCards = container.querySelectorAll('.summaryCard');
+			expect(summaryCards.length).toBeGreaterThan(0);
+		});
+	});
+
+	it('should handle error state', async () => {
+		const errorTestRun = {
+			...mockTestRun,
+			status: 'error' as const,
+			errorCode: 'TIMEOUT',
+		};
+
+		vi.mocked(evaluationStore.getTestRun).mockResolvedValue(errorTestRun);
+
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			expect(container.querySelector('[data-test-id="test-definition-run-detail"]')).toBeTruthy();
+		});
+	});
+
+	it('should display metrics in summary', async () => {
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			const summaryCards = container.querySelectorAll('.summaryCard');
+			expect(summaryCards.length).toBeGreaterThan(2); // At least total cases, date, status + metrics
+		});
+	});
+
+	it('should display back navigation', async () => {
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			const backButton = container.querySelector('.backButton');
+			expect(backButton).toBeTruthy();
+		});
+	});
+
+	it('should display test table when data is loaded', async () => {
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			// TestTableBase component should be rendered
+			expect(container.querySelector('[data-test-id="test-definition-run-detail"]')).toBeTruthy();
+		});
+	});
+
+	it('should handle partial failures', async () => {
+		// Test with cases that have errors
+		vi.spyOn(evaluationStore, 'fetchTestCaseExecutions').mockResolvedValue(mockTestCases);
+
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			expect(container.querySelector('[data-test-id="test-definition-run-detail"]')).toBeTruthy();
+		});
+	});
+
+	it('should handle empty test cases', async () => {
+		vi.spyOn(evaluationStore, 'fetchTestCaseExecutions').mockResolvedValue([]);
+
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			expect(container.querySelector('[data-test-id="test-definition-run-detail"]')).toBeTruthy();
+		});
+	});
+
+	it('should handle fetch errors', async () => {
+		const error = new Error('Failed to fetch');
+		vi.mocked(evaluationStore.getTestRun).mockRejectedValue(error);
+
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			expect(container.querySelector('[data-test-id="test-definition-run-detail"]')).toBeTruthy();
+		});
+	});
+
+	it('should render scrollable summary section', async () => {
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			const scrollableSection = container.querySelector('.scrollableSummary');
+			expect(scrollableSection).toBeTruthy();
+		});
+	});
+
+	it('should display notice callout when no input columns and run is successful', async () => {
+		// Mock test cases with no inputs
+		const testCasesWithoutInputs = mockTestCases.map((tc) => ({
+			...tc,
+			inputs: {},
+		}));
+
+		vi.spyOn(evaluationStore, 'fetchTestCaseExecutions').mockResolvedValue(testCasesWithoutInputs);
+
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			// Should render the component
+			expect(container.querySelector('[data-test-id="test-definition-run-detail"]')).toBeTruthy();
+		});
+	});
+
+	it('should display inputs correctly in test table', async () => {
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			expect(evaluationStore.fetchTestCaseExecutions).toHaveBeenCalledWith({
+				workflowId: 'test-workflow-id',
+				runId: 'test-run-id',
+			});
+		});
+
+		await waitFor(() => {
+			// Check that inputs are displayed
+			const testTable = container.querySelector('[data-test-id="test-definition-run-detail"]');
+			expect(testTable).toBeTruthy();
+			// Inputs should be rendered in the table
+			expect(container.textContent).toContain('value1');
+			expect(container.textContent).toContain('value2');
+		});
+	});
+
+	it('should display outputs correctly in test table', async () => {
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			expect(evaluationStore.fetchTestCaseExecutions).toHaveBeenCalledWith({
+				workflowId: 'test-workflow-id',
+				runId: 'test-run-id',
+			});
+		});
+
+		await waitFor(() => {
+			// Check that outputs are displayed
+			const testTable = container.querySelector('[data-test-id="test-definition-run-detail"]');
+			expect(testTable).toBeTruthy();
+			// Outputs should be rendered in the table
+			expect(container.textContent).toContain('result1');
+			expect(container.textContent).toContain('result2');
+		});
+	});
+
+	it('should display metrics correctly for individual test cases', async () => {
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			expect(evaluationStore.fetchTestCaseExecutions).toHaveBeenCalledWith({
+				workflowId: 'test-workflow-id',
+				runId: 'test-run-id',
+			});
+		});
+
+		await waitFor(() => {
+			// Check that metrics are displayed in the table
+			const testTable = container.querySelector('[data-test-id="test-definition-run-detail"]');
+			expect(testTable).toBeTruthy();
+			// Individual test case metrics should be shown
+			expect(container.textContent).toContain('0.98'); // accuracy for test-case-1
+			expect(container.textContent).toContain('0.95'); // precision for test-case-1
+			expect(container.textContent).toContain('0.85'); // accuracy for test-case-2
+			expect(container.textContent).toContain('0.88'); // precision for test-case-2
+		});
+	});
+
+	it('should display overall run metrics in summary cards', async () => {
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			// Check that overall metrics are displayed in summary
+			const summaryCards = container.querySelectorAll('.summaryCard');
+			expect(summaryCards.length).toBeGreaterThan(0);
+			// Overall run metrics should be shown
+			expect(container.textContent).toContain('0.95'); // overall accuracy
+			expect(container.textContent).toContain('0.92'); // overall precision
+		});
+	});
+
+	it('should handle test cases with missing metrics gracefully', async () => {
+		const testCasesWithMissingMetrics = [
+			mock<TestCaseExecutionRecord>({
+				id: 'test-case-3',
+				status: 'completed',
+				runAt: '2023-10-01T10:02:00Z',
+				executionId: 'execution-3',
+				inputs: { input1: 'value3' },
+				outputs: { output1: 'result3' },
+				// No metrics property
+			}),
+		];
+
+		vi.spyOn(evaluationStore, 'fetchTestCaseExecutions').mockResolvedValue(
+			testCasesWithMissingMetrics,
+		);
+
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			expect(container.querySelector('[data-test-id="test-definition-run-detail"]')).toBeTruthy();
+			// Should still display inputs and outputs
+			expect(container.textContent).toContain('value3');
+			expect(container.textContent).toContain('result3');
+		});
+	});
+
+	it('should display error status for failed test cases', async () => {
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			expect(evaluationStore.fetchTestCaseExecutions).toHaveBeenCalledWith({
+				workflowId: 'test-workflow-id',
+				runId: 'test-run-id',
+			});
+		});
+
+		await waitFor(() => {
+			// Check that error status and error code are displayed
+			const testTable = container.querySelector('[data-test-id="test-definition-run-detail"]');
+			expect(testTable).toBeTruthy();
+			// Error status should be shown for test-case-2
+			expect(container.textContent).toContain('error');
+			expect(container.textContent).toContain('run was interrupted');
+		});
+	});
+
+	it('should handle complex input and output objects', async () => {
+		const testCasesWithComplexData = [
+			mock<TestCaseExecutionRecord>({
+				id: 'test-case-complex',
+				status: 'completed',
+				runAt: '2023-10-01T10:03:00Z',
+				executionId: 'execution-complex',
+				inputs: {
+					complexInput: {
+						nested: {
+							value: 'nested-value',
+							array: [1, 2, 3],
+						},
+					},
+				},
+				outputs: {
+					complexOutput: {
+						result: 'complex-result',
+						metadata: {
+							processed: true,
+							timestamp: '2023-10-01T10:03:00Z',
+						},
+					},
+				},
+				metrics: {
+					accuracy: 0.97,
+					precision: 0.94,
+				},
+			}),
+		];
+
+		vi.spyOn(evaluationStore, 'fetchTestCaseExecutions').mockResolvedValue(
+			testCasesWithComplexData,
+		);
+
+		const { container } = renderComponent();
+
+		await waitFor(() => {
+			expect(container.querySelector('[data-test-id="test-definition-run-detail"]')).toBeTruthy();
+			// Complex data should be handled and displayed
+			expect(container.textContent).toContain('0.97');
+			expect(container.textContent).toContain('0.94');
+			expect(container.textContent).toContain('complexInput');
+			expect(container.textContent).toContain('complexOutput');
+		});
+	});
+});
--- a/packages/frontend/editor-ui/src/views/Evaluations.ee/TestRunDetailView.vue
+++ b/packages/frontend/editor-ui/src/views/Evaluations.ee/TestRunDetailView.vue
@@ -15,6 +15,7 @@ import { useRouter } from 'vue-router';
 import orderBy from 'lodash/orderBy';
 import { statusDictionary } from '@/components/Evaluations.ee/shared/statusDictionary';
 import { getErrorBaseKey } from '@/components/Evaluations.ee/shared/errorCodes';
+import { getTestCasesColumns, mapToNumericColumns } from './utils';

 const router = useRouter();
 const toast = useToast();
@@ -65,28 +66,36 @@ const handleRowClick = (row: TestCaseExecutionRecord) => {
 	}
 };

+const inputColumns = computed(() => getTestCasesColumns(filteredTestCases.value, 'inputs'));
+
 const columns = computed(
-	(): Array<TestTableColumn<TestCaseExecutionRecord & { index: number }>> => [
-		{
-			prop: 'index',
-			width: 100,
-			label: locale.baseText('evaluation.runDetail.testCase'),
-			sortable: true,
-			formatter: (row: TestCaseExecutionRecord & { index: number }) => `#${row.index}`,
-		},
-		{
-			prop: 'status',
-			label: locale.baseText('evaluation.listRuns.status'),
-		},
-		...Object.keys(run.value?.metrics ?? {}).map((metric) => ({
-			prop: `metrics.${metric}`,
-			label: metric,
-			sortable: true,
-			filter: true,
-			showHeaderTooltip: true,
-			formatter: (row: TestCaseExecutionRecord) => row.metrics?.[metric]?.toFixed(2) ?? '-',
-		})),
-	],
+	(): Array<TestTableColumn<TestCaseExecutionRecord & { index: number }>> => {
+		const specialKeys = ['promptTokens', 'completionTokens', 'totalTokens', 'executionTime'];
+		const metricColumns = Object.keys(run.value?.metrics ?? {}).filter(
+			(key) => !specialKeys.includes(key),
+		);
+		const specialColumns = specialKeys.filter((key) =>
+			run.value?.metrics ? key in run.value.metrics : false,
+		);
+
+		return [
+			{
+				prop: 'index',
+				width: 100,
+				label: locale.baseText('evaluation.runDetail.testCase'),
+				sortable: true,
+				formatter: (row: TestCaseExecutionRecord & { index: number }) => `#${row.index}`,
+			},
+			{
+				prop: 'status',
+				label: locale.baseText('evaluation.listRuns.status'),
+			},
+			...inputColumns.value,
+			...getTestCasesColumns(filteredTestCases.value, 'outputs'),
+			...mapToNumericColumns(metricColumns),
+			...mapToNumericColumns(specialColumns),
+		];
+	},
 );

 const metrics = computed(() => run.value?.metrics ?? {});
@@ -156,6 +165,7 @@ onMounted(async () => {
 				}}
 			</N8nText>
 		</n8n-callout>
+
 		<el-scrollbar always :class="$style.scrollableSummary" class="mb-m">
 			<div style="display: flex">
 				<div :class="$style.summaryCard">
@@ -215,6 +225,23 @@ onMounted(async () => {
 				</div>
 			</div>
 		</el-scrollbar>
+
+		<n8n-callout
+			v-if="
+				!isLoading &&
+				!inputColumns.length &&
+				run?.status === 'completed' &&
+				run?.finalResult === 'success'
+			"
+			theme="secondary"
+			icon="info"
+			class="mb-s"
+		>
+			<N8nText size="small" :class="$style.capitalized">
+				{{ locale.baseText('evaluation.runDetail.notice.useSetInputs') }}
+			</N8nText>
+		</n8n-callout>
+
 		<div v-if="isLoading" :class="$style.loading">
 			<n8n-loading :loading="true" :rows="5" />
 		</div>
--- a/packages/frontend/editor-ui/src/views/Evaluations.ee/utils.test.ts
+++ b/packages/frontend/editor-ui/src/views/Evaluations.ee/utils.test.ts
@@ -0,0 +1,421 @@
+import { describe, it, expect } from 'vitest';
+import { getTestCasesColumns } from './utils';
+import type { TestCaseExecutionRecord } from '../../api/evaluation.ee';
+import { mock } from 'vitest-mock-extended';
+
+describe('utils', () => {
+	describe('getTestCasesColumns', () => {
+		const mockTestCases: TestCaseExecutionRecord[] = [
+			mock<TestCaseExecutionRecord>({
+				id: 'test-case-1',
+				testRunId: 'test-run-1',
+				executionId: 'execution-1',
+				status: 'completed',
+				createdAt: '2023-10-01T10:00:00Z',
+				updatedAt: '2023-10-01T10:00:00Z',
+				runAt: '2023-10-01T10:00:00Z',
+				inputs: {
+					query: 'test query',
+					limit: 10,
+					category: 'test',
+				},
+				outputs: {
+					result: 'success',
+					count: 5,
+				},
+				metrics: {
+					accuracy: 0.95,
+				},
+			}),
+			mock<TestCaseExecutionRecord>({
+				id: 'test-case-2',
+				testRunId: 'test-run-1',
+				executionId: 'execution-2',
+				status: 'completed',
+				createdAt: '2023-10-01T10:01:00Z',
+				updatedAt: '2023-10-01T10:01:00Z',
+				runAt: '2023-10-01T10:01:00Z',
+				inputs: {
+					query: 'another query',
+					limit: 20,
+					filter: 'active',
+				},
+				outputs: {
+					result: 'success',
+					data: { items: [] },
+				},
+				metrics: {
+					accuracy: 0.88,
+				},
+			}),
+			mock<TestCaseExecutionRecord>({
+				id: 'test-case-3',
+				testRunId: 'test-run-1',
+				executionId: 'execution-3',
+				status: 'error',
+				createdAt: '2023-10-01T10:02:00Z',
+				updatedAt: '2023-10-01T10:02:00Z',
+				runAt: '2023-10-01T10:02:00Z',
+				inputs: {
+					query: 'error query',
+					timeout: 5000,
+				},
+				outputs: {
+					error: 'timeout occurred',
+				},
+				metrics: {
+					accuracy: 0.0,
+				},
+			}),
+		];
+
+		it('should extract input columns from test cases', () => {
+			const columns = getTestCasesColumns(mockTestCases, 'inputs');
+
+			expect(columns).toHaveLength(5);
+
+			const columnProps = columns.map((col) => col.prop);
+			expect(columnProps).toContain('inputs.query');
+			expect(columnProps).toContain('inputs.limit');
+			expect(columnProps).toContain('inputs.category');
+			expect(columnProps).toContain('inputs.filter');
+			expect(columnProps).toContain('inputs.timeout');
+		});
+
+		it('should extract output columns from test cases', () => {
+			const columns = getTestCasesColumns(mockTestCases, 'outputs');
+
+			expect(columns).toHaveLength(4);
+
+			const columnProps = columns.map((col) => col.prop);
+			expect(columnProps).toContain('outputs.result');
+			expect(columnProps).toContain('outputs.count');
+			expect(columnProps).toContain('outputs.data');
+			expect(columnProps).toContain('outputs.error');
+		});
+
+		it('should return columns with correct structure', () => {
+			const columns = getTestCasesColumns(mockTestCases, 'inputs');
+			const firstColumn = columns[0];
+
+			expect(firstColumn).toHaveProperty('prop');
+			expect(firstColumn).toHaveProperty('label');
+			expect(firstColumn).toHaveProperty('sortable', true);
+			expect(firstColumn).toHaveProperty('filter', true);
+			expect(firstColumn).toHaveProperty('showHeaderTooltip', true);
+		});
+
+		it('should set correct label for columns', () => {
+			const columns = getTestCasesColumns(mockTestCases, 'inputs');
+			const queryColumn = columns.find((col) => col.prop === 'inputs.query');
+
+			expect(queryColumn?.label).toBe('query');
+		});
+
+		it('should handle empty test cases array', () => {
+			const columns = getTestCasesColumns([], 'inputs');
+
+			expect(columns).toHaveLength(0);
+		});
+
+		it('should handle test cases with no inputs', () => {
+			const testCasesWithoutInputs: TestCaseExecutionRecord[] = [
+				mock<TestCaseExecutionRecord>({
+					id: 'test-case-1',
+					testRunId: 'test-run-1',
+					executionId: 'execution-1',
+					status: 'completed',
+					createdAt: '2023-10-01T10:00:00Z',
+					updatedAt: '2023-10-01T10:00:00Z',
+					runAt: '2023-10-01T10:00:00Z',
+					inputs: {},
+					outputs: {
+						result: 'success',
+					},
+					metrics: {
+						accuracy: 0.95,
+					},
+				}),
+			];
+
+			const columns = getTestCasesColumns(testCasesWithoutInputs, 'inputs');
+
+			expect(columns).toHaveLength(0);
+		});
+
+		it('should handle test cases with no outputs', () => {
+			const testCasesWithoutOutputs: TestCaseExecutionRecord[] = [
+				mock<TestCaseExecutionRecord>({
+					id: 'test-case-1',
+					testRunId: 'test-run-1',
+					executionId: 'execution-1',
+					status: 'completed',
+					createdAt: '2023-10-01T10:00:00Z',
+					updatedAt: '2023-10-01T10:00:00Z',
+					runAt: '2023-10-01T10:00:00Z',
+					inputs: {
+						query: 'test',
+					},
+					outputs: {},
+					metrics: {
+						accuracy: 0.95,
+					},
+				}),
+			];
+
+			const columns = getTestCasesColumns(testCasesWithoutOutputs, 'outputs');
+
+			expect(columns).toHaveLength(0);
+		});
+
+		it('should handle test cases with undefined inputs', () => {
+			const testCasesWithUndefinedInputs: TestCaseExecutionRecord[] = [
+				{
+					id: 'test-case-1',
+					testRunId: 'test-run-1',
+					executionId: 'execution-1',
+					status: 'completed',
+					createdAt: '2023-10-01T10:00:00Z',
+					updatedAt: '2023-10-01T10:00:00Z',
+					runAt: '2023-10-01T10:00:00Z',
+					outputs: {
+						result: 'success',
+					},
+					metrics: {
+						accuracy: 0.95,
+					},
+				},
+			];
+
+			const columns = getTestCasesColumns(testCasesWithUndefinedInputs, 'inputs');
+
+			expect(columns).toHaveLength(0);
+		});
+
+		it('should handle test cases with undefined outputs', () => {
+			const testCasesWithUndefinedOutputs: TestCaseExecutionRecord[] = [
+				{
+					id: 'test-case-1',
+					testRunId: 'test-run-1',
+					executionId: 'execution-1',
+					status: 'completed',
+					createdAt: '2023-10-01T10:00:00Z',
+					updatedAt: '2023-10-01T10:00:00Z',
+					runAt: '2023-10-01T10:00:00Z',
+					inputs: {
+						query: 'test',
+					},
+					metrics: {
+						accuracy: 0.95,
+					},
+				},
+			];
+
+			const columns = getTestCasesColumns(testCasesWithUndefinedOutputs, 'outputs');
+
+			expect(columns).toHaveLength(0);
+		});
+
+		it('should handle mixed test cases with some having empty inputs/outputs', () => {
+			const mixedTestCases: TestCaseExecutionRecord[] = [
+				mock<TestCaseExecutionRecord>({
+					id: 'test-case-1',
+					testRunId: 'test-run-1',
+					executionId: 'execution-1',
+					status: 'completed',
+					createdAt: '2023-10-01T10:00:00Z',
+					updatedAt: '2023-10-01T10:00:00Z',
+					runAt: '2023-10-01T10:00:00Z',
+					inputs: {
+						query: 'test query',
+						limit: 10,
+					},
+					outputs: {
+						result: 'success',
+					},
+					metrics: {
+						accuracy: 0.95,
+					},
+				}),
+				mock<TestCaseExecutionRecord>({
+					id: 'test-case-2',
+					testRunId: 'test-run-1',
+					executionId: 'execution-2',
+					status: 'completed',
+					createdAt: '2023-10-01T10:01:00Z',
+					updatedAt: '2023-10-01T10:01:00Z',
+					runAt: '2023-10-01T10:01:00Z',
+					inputs: {},
+					outputs: {
+						result: 'success',
+						count: 5,
+					},
+					metrics: {
+						accuracy: 0.88,
+					},
+				}),
+				mock<TestCaseExecutionRecord>({
+					id: 'test-case-3',
+					testRunId: 'test-run-1',
+					executionId: 'execution-3',
+					status: 'completed',
+					createdAt: '2023-10-01T10:02:00Z',
+					updatedAt: '2023-10-01T10:02:00Z',
+					runAt: '2023-10-01T10:02:00Z',
+					inputs: {
+						filter: 'active',
+					},
+					outputs: {},
+					metrics: {
+						accuracy: 0.92,
+					},
+				}),
+			];
+
+			const inputColumns = getTestCasesColumns(mixedTestCases, 'inputs');
+			const outputColumns = getTestCasesColumns(mixedTestCases, 'outputs');
+
+			expect(inputColumns).toHaveLength(3);
+			expect(outputColumns).toHaveLength(2);
+
+			const inputProps = inputColumns.map((col) => col.prop);
+			expect(inputProps).toContain('inputs.query');
+			expect(inputProps).toContain('inputs.limit');
+			expect(inputProps).toContain('inputs.filter');
+
+			const outputProps = outputColumns.map((col) => col.prop);
+			expect(outputProps).toContain('outputs.result');
+			expect(outputProps).toContain('outputs.count');
+		});
+
+		it('should remove duplicate columns from multiple test cases', () => {
+			const testCasesWithDuplicates: TestCaseExecutionRecord[] = [
+				mock<TestCaseExecutionRecord>({
+					id: 'test-case-1',
+					testRunId: 'test-run-1',
+					executionId: 'execution-1',
+					status: 'completed',
+					createdAt: '2023-10-01T10:00:00Z',
+					updatedAt: '2023-10-01T10:00:00Z',
+					runAt: '2023-10-01T10:00:00Z',
+					inputs: {
+						query: 'test query 1',
+						limit: 10,
+					},
+					outputs: {
+						result: 'success',
+					},
+					metrics: {
+						accuracy: 0.95,
+					},
+				}),
+				mock<TestCaseExecutionRecord>({
+					id: 'test-case-2',
+					testRunId: 'test-run-1',
+					executionId: 'execution-2',
+					status: 'completed',
+					createdAt: '2023-10-01T10:01:00Z',
+					updatedAt: '2023-10-01T10:01:00Z',
+					runAt: '2023-10-01T10:01:00Z',
+					inputs: {
+						query: 'test query 2',
+						limit: 20,
+					},
+					outputs: {
+						result: 'success',
+					},
+					metrics: {
+						accuracy: 0.88,
+					},
+				}),
+			];
+
+			const inputColumns = getTestCasesColumns(testCasesWithDuplicates, 'inputs');
+			const outputColumns = getTestCasesColumns(testCasesWithDuplicates, 'outputs');
+
+			expect(inputColumns).toHaveLength(2);
+			expect(outputColumns).toHaveLength(1);
+		});
+
+		it('should handle complex nested object keys', () => {
+			const testCasesWithComplexKeys: TestCaseExecutionRecord[] = [
+				mock<TestCaseExecutionRecord>({
+					id: 'test-case-1',
+					testRunId: 'test-run-1',
+					executionId: 'execution-1',
+					status: 'completed',
+					createdAt: '2023-10-01T10:00:00Z',
+					updatedAt: '2023-10-01T10:00:00Z',
+					runAt: '2023-10-01T10:00:00Z',
+					inputs: {
+						'user.name': 'John Doe',
+						'user.email': 'john@example.com',
+						'config.timeout': 5000,
+						'config.retries': 3,
+					},
+					outputs: {
+						'response.status': 200,
+						'response.data': { success: true },
+					},
+					metrics: {
+						accuracy: 0.95,
+					},
+				}),
+			];
+
+			const inputColumns = getTestCasesColumns(testCasesWithComplexKeys, 'inputs');
+			const outputColumns = getTestCasesColumns(testCasesWithComplexKeys, 'outputs');
+
+			expect(inputColumns).toHaveLength(4);
+			expect(outputColumns).toHaveLength(2);
+
+			const inputLabels = inputColumns.map((col) => col.label);
+			expect(inputLabels).toContain('user.name');
+			expect(inputLabels).toContain('user.email');
+			expect(inputLabels).toContain('config.timeout');
+			expect(inputLabels).toContain('config.retries');
+
+			const outputLabels = outputColumns.map((col) => col.label);
+			expect(outputLabels).toContain('response.status');
+			expect(outputLabels).toContain('response.data');
+		});
+
+		it('should maintain consistent column order across multiple calls', () => {
+			const columns1 = getTestCasesColumns(mockTestCases, 'inputs');
+			const columns2 = getTestCasesColumns(mockTestCases, 'inputs');
+
+			expect(columns1.map((col) => col.prop)).toEqual(columns2.map((col) => col.prop));
+		});
+
+		it('should handle single test case', () => {
+			const singleTestCase: TestCaseExecutionRecord[] = [
+				mock<TestCaseExecutionRecord>({
+					id: 'test-case-1',
+					testRunId: 'test-run-1',
+					executionId: 'execution-1',
+					status: 'completed',
+					createdAt: '2023-10-01T10:00:00Z',
+					updatedAt: '2023-10-01T10:00:00Z',
+					runAt: '2023-10-01T10:00:00Z',
+					inputs: {
+						query: 'single test',
+					},
+					outputs: {
+						result: 'success',
+					},
+					metrics: {
+						accuracy: 0.95,
+					},
+				}),
+			];
+
+			const inputColumns = getTestCasesColumns(singleTestCase, 'inputs');
+			const outputColumns = getTestCasesColumns(singleTestCase, 'outputs');
+
+			expect(inputColumns).toHaveLength(1);
+			expect(outputColumns).toHaveLength(1);
+			expect(inputColumns[0].prop).toBe('inputs.query');
+			expect(outputColumns[0].prop).toBe('outputs.result');
+		});
+	});
+});
--- a/packages/frontend/editor-ui/src/views/Evaluations.ee/utils.ts
+++ b/packages/frontend/editor-ui/src/views/Evaluations.ee/utils.ts
@@ -0,0 +1,42 @@
+import type { TestTableColumn } from '@/components/Evaluations.ee/shared/TestTableBase.vue';
+import type { TestCaseExecutionRecord } from '../../api/evaluation.ee';
+
+export function getTestCasesColumns(
+	cases: TestCaseExecutionRecord[],
+	columnType: 'inputs' | 'outputs',
+): Array<TestTableColumn<TestCaseExecutionRecord & { index: number }>> {
+	const inputColumnNames = cases.reduce(
+		(set, testCase) => {
+			Object.keys(testCase[columnType] ?? {}).forEach((key) => set.add(key));
+			return set;
+		},
+		new Set([] as string[]),
+	);
+
+	return Array.from(inputColumnNames.keys()).map((column) => ({
+		prop: `${columnType}.${column}`,
+		label: column,
+		sortable: true,
+		filter: true,
+		showHeaderTooltip: true,
+		formatter: (row: TestCaseExecutionRecord) => {
+			const value = row[columnType]?.[column];
+			if (typeof value === 'object' && value !== null) {
+				return JSON.stringify(value, null, 2);
+			}
+
+			return `${value}`;
+		},
+	}));
+}
+
+export function mapToNumericColumns(columnNames: string[]) {
+	return columnNames.map((metric) => ({
+		prop: `metrics.${metric}`,
+		label: metric,
+		sortable: true,
+		filter: true,
+		showHeaderTooltip: true,
+		formatter: (row: TestCaseExecutionRecord) => row.metrics?.[metric]?.toFixed(2) ?? '-',
+	}));
+}