Files
n8n-enterprise-unlocked/packages/@n8n/ai-workflow-builder.ee/evaluations/utils/evaluation-reporter.ts

265 lines
8.2 KiB
TypeScript

import Table from 'cli-table3';
import pc from 'picocolors';
import {
formatColoredScore,
formatHeader,
formatPercentage,
formatStatusBadge,
formatTestName,
formatViolationType,
} from './evaluation-helpers.js';
import type { Violation } from '../types/evaluation.js';
import type { TestResult } from '../types/test-result.js';
/**
* Generates a markdown report from evaluation results
* @param results - Array of test results
* @param metrics - Calculated metrics including averages and counts
* @returns Formatted markdown report string
*/
export function generateMarkdownReport(
results: TestResult[],
metrics: {
totalTests: number;
successfulTests: number;
averageScore: number;
categoryAverages: Record<string, number>;
violationCounts: { critical: number; major: number; minor: number };
},
): string {
const { totalTests, successfulTests, averageScore, categoryAverages, violationCounts } = metrics;
let report = `# AI Workflow Builder Evaluation Report
## Summary
- Total Tests: ${totalTests}
- Successful: ${successfulTests}
- Failed: ${totalTests - successfulTests}
- Average Score: ${formatPercentage(averageScore)}
## Category Averages
- Functionality: ${formatPercentage(categoryAverages.functionality)}
- Connections: ${formatPercentage(categoryAverages.connections)}
- Expressions: ${formatPercentage(categoryAverages.expressions)}
- Node Configuration: ${formatPercentage(categoryAverages.nodeConfiguration)}
## Violations Summary
- Critical: ${violationCounts.critical}
- Major: ${violationCounts.major}
- Minor: ${violationCounts.minor}
## Detailed Results
`;
results.forEach((result) => {
report += `### ${result.testCase.name} (${result.testCase.id})
- **Score**: ${formatPercentage(result.evaluationResult.overallScore)}
- **Generation Time**: ${result.generationTime}ms
- **Nodes Generated**: ${result.generatedWorkflow.nodes.length}
- **Summary**: ${result.evaluationResult.summary}
`;
if (
result.evaluationResult.criticalIssues &&
result.evaluationResult.criticalIssues.length > 0
) {
report += '**Critical Issues**:\n';
result.evaluationResult.criticalIssues.forEach((issue) => {
report += `- ${issue}\n`;
});
report += '\n';
}
const allViolations = [
...result.evaluationResult.functionality.violations.map((v) => ({
...v,
category: 'Functionality',
})),
...result.evaluationResult.connections.violations.map((v) => ({
...v,
category: 'Connections',
})),
...result.evaluationResult.expressions.violations.map((v) => ({
...v,
category: 'Expressions',
})),
...result.evaluationResult.nodeConfiguration.violations.map((v) => ({
...v,
category: 'Node Configuration',
})),
];
if (allViolations.length > 0) {
report += '**Violations**:\n';
allViolations.forEach((v) => {
report += `- [${v.type.toUpperCase()}] ${v.category}: ${v.description}\n`;
});
report += '\n';
}
});
return report;
}
/**
* Displays test results summary in the console
* @param testCases - Array of test cases
* @param results - Array of test results
*/
export function displayTestResults(
testCases: Array<{ id: string; name: string }>,
results: TestResult[],
): void {
console.log();
console.log(formatHeader('Test Results', 70));
console.log();
for (const testCase of testCases) {
const result = results.find((r) => r.testCase.id === testCase.id);
if (result) {
const status = result.error ? 'fail' : 'pass';
const badge = formatStatusBadge(status);
const score = result.error ? 'N/A' : formatColoredScore(result.evaluationResult.overallScore);
console.log(` ${badge} ${formatTestName(testCase.name, testCase.id)}`);
console.log(
` Score: ${score} | Nodes: ${result.generatedWorkflow?.nodes?.length} | Time: ${result.generationTime}ms`,
);
if (result.error) {
console.log(` ${pc.red('Error:')} ${pc.dim(result.error)}`);
}
}
}
}
/**
* Displays the evaluation summary table
* @param results - Array of test results
* @param metrics - Calculated metrics
*/
export function displaySummaryTable(
_results: TestResult[],
metrics: {
totalTests: number;
successfulTests: number;
averageScore: number;
categoryAverages: Record<string, number>;
violationCounts: { critical: number; major: number; minor: number };
},
): void {
const { totalTests, successfulTests, averageScore, categoryAverages, violationCounts } = metrics;
const failedTests = totalTests - successfulTests;
const summaryTable = new Table({
head: ['Metric', 'Value'],
style: { head: ['cyan'] },
});
summaryTable.push(
['Total Tests', totalTests.toString()],
['Successful', pc.green(successfulTests.toString())],
['Failed', failedTests > 0 ? pc.red(failedTests.toString()) : '0'],
['Average Score', formatColoredScore(averageScore)],
[pc.dim('─'.repeat(20)), pc.dim('─'.repeat(20))],
['Functionality', formatColoredScore(categoryAverages.functionality)],
['Connections', formatColoredScore(categoryAverages.connections)],
['Expressions', formatColoredScore(categoryAverages.expressions)],
['Node Config', formatColoredScore(categoryAverages.nodeConfiguration)],
[pc.dim('─'.repeat(20)), pc.dim('─'.repeat(20))],
[
'Critical Issues',
violationCounts.critical > 0 ? pc.red(violationCounts.critical.toString()) : '0',
],
['Major Issues', violationCounts.major > 0 ? pc.yellow(violationCounts.major.toString()) : '0'],
['Minor Issues', pc.dim(violationCounts.minor.toString())],
);
console.log();
console.log(formatHeader('Summary', 70));
console.log(summaryTable.toString());
}
/**
* Displays detailed violations grouped by severity
* @param results - Array of test results
*/
export function displayViolationsDetail(results: TestResult[]): void {
// Collect all violations with test context
const allViolations: Array<{
violation: Violation & { category: string };
testName: string;
}> = [];
results.forEach((result) => {
if (!result.error) {
const testViolations = [
...result.evaluationResult.functionality.violations.map((v) => ({
violation: { ...v, category: 'Functionality' },
testName: result.testCase.name,
})),
...result.evaluationResult.connections.violations.map((v) => ({
violation: { ...v, category: 'Connections' },
testName: result.testCase.name,
})),
...result.evaluationResult.expressions.violations.map((v) => ({
violation: { ...v, category: 'Expressions' },
testName: result.testCase.name,
})),
...result.evaluationResult.nodeConfiguration.violations.map((v) => ({
violation: { ...v, category: 'Node Config' },
testName: result.testCase.name,
})),
];
allViolations.push.apply(allViolations, testViolations);
}
});
if (allViolations.length === 0) return;
console.log();
console.log(formatHeader('Violations Detail', 70));
// Group violations by severity
const criticalViolations = allViolations.filter((v) => v.violation.type === 'critical');
const majorViolations = allViolations.filter((v) => v.violation.type === 'major');
const minorViolations = allViolations.filter((v) => v.violation.type === 'minor');
// Display critical violations
if (criticalViolations.length > 0) {
console.log();
console.log(pc.red('Critical Violations:'));
criticalViolations.forEach(({ violation, testName }) => {
console.log(
` ${formatViolationType('critical')} [${violation.category}] ${violation.description}`,
);
console.log(` ${pc.dim(`Test: ${testName} | Points: -${violation.pointsDeducted}`)}`);
});
}
// Display major violations
if (majorViolations.length > 0) {
console.log();
console.log(pc.yellow('Major Violations:'));
majorViolations.forEach(({ violation, testName }) => {
console.log(
` ${formatViolationType('major')} [${violation.category}] ${violation.description}`,
);
console.log(` ${pc.dim(`Test: ${testName} | Points: -${violation.pointsDeducted}`)}`);
});
}
// Display minor violations
if (minorViolations.length > 0) {
console.log();
console.log(pc.gray('Minor Violations:'));
minorViolations.forEach(({ violation, testName }) => {
console.log(
` ${formatViolationType('minor')} [${violation.category}] ${violation.description}`,
);
console.log(` ${pc.dim(`Test: ${testName} | Points: -${violation.pointsDeducted}`)}`);
});
}
}