fix: Update builder to better work with loops and binary data (no-changelog) (#19040)

This commit is contained in:
Mutasem Aldmour
2025-09-02 11:41:26 +02:00
committed by GitHub
parent 38de3ee8a4
commit 2ba544284f
6 changed files with 46 additions and 8 deletions

View File

@@ -123,6 +123,10 @@ Violations are categorized by severity:
# Run with default settings
pnpm eval
# Run a specific test case
pnpm eval --test-case google-sheets-processing
pnpm eval --test-case extract-from-file
# With additional generated test cases
GENERATE_TEST_CASES=true pnpm eval

View File

@@ -142,4 +142,16 @@ export const basicTestCases: TestCase[] = [
prompt:
'Create a multi-agent AI workflow where different AI agents collaborate to research a topic, fact-check information, and compile comprehensive reports.',
},
{
id: 'google-sheets-processing',
name: 'Process large Google Sheets data',
prompt:
'Create a workflow that reads all rows from a Google Sheets document with thousands of customer records. For each row, call an external API to get additional customer data, process the response, and update the row with the enriched information. Handle rate limiting and errors gracefully.',
},
{
id: 'extract-from-file',
name: 'Extract data from uploaded files',
prompt:
'Build a workflow that accepts file uploads through an n8n form. When users upload PDF documents, CSV files, or Excel spreadsheets, automatically extract the text content and data from these files. Transform the extracted data into a structured format and save it to a database or send it via email as a summary.',
},
];

View File

@@ -54,6 +54,7 @@ Evaluate whether the workflow correctly implements what the user EXPLICITLY requ
- Missing core functionality explicitly requested
- Incorrect operation logic that prevents the workflow from working
- Workflows missing a trigger node when they need to start automatically or by some external event
- Using Split In Batches node
- **Major (-15 to -25 points)**:
- Missing explicitly required data transformations
- Incomplete implementation of requested features

View File

@@ -24,7 +24,7 @@ import { generateMarkdownReport } from '../utils/evaluation-reporter.js';
* Main CLI evaluation runner that executes all test cases in parallel
* Supports concurrency control via EVALUATION_CONCURRENCY environment variable
*/
export async function runCliEvaluation(): Promise<void> {
export async function runCliEvaluation(testCaseFilter?: string): Promise<void> {
console.log(formatHeader('AI Workflow Builder Full Evaluation', 70));
console.log();
try {
@@ -34,11 +34,24 @@ export async function runCliEvaluation(): Promise<void> {
// Determine test cases to run
let testCases: TestCase[] = basicTestCases;
// Optionally generate additional test cases
if (shouldGenerateTestCases()) {
console.log(pc.blue('➔ Generating additional test cases...'));
const generatedCases = await generateTestCases(llm, howManyTestCasesToGenerate());
testCases = [...testCases, ...generatedCases];
// Filter to single test case if specified
if (testCaseFilter) {
const filteredCase = testCases.find((tc) => tc.id === testCaseFilter);
if (filteredCase) {
testCases = [filteredCase];
console.log(pc.blue(`➔ Running single test case: ${filteredCase.name}`));
} else {
console.log(pc.red(`❌ Test case '${testCaseFilter}' not found`));
console.log(pc.dim(`Available test cases: ${testCases.map((tc) => tc.id).join(', ')}`));
return;
}
} else {
// Optionally generate additional test cases
if (shouldGenerateTestCases()) {
console.log(pc.blue('➔ Generating additional test cases...'));
const generatedCases = await generateTestCases(llm, howManyTestCasesToGenerate());
testCases = [...testCases, ...generatedCases];
}
}
// Get concurrency from environment

View File

@@ -14,10 +14,15 @@ export { setupTestEnvironment, createAgent } from './core/environment.js';
async function main(): Promise<void> {
const useLangsmith = process.env.USE_LANGSMITH_EVAL === 'true';
// Parse command line arguments for single test case
const testCaseId = process.argv.includes('--test-case')
? process.argv[process.argv.indexOf('--test-case') + 1]
: undefined;
if (useLangsmith) {
await runLangsmithEvaluation();
} else {
await runCliEvaluation();
await runCliEvaluation(testCaseId);
}
}

View File

@@ -238,6 +238,8 @@ Why: Unconfigured nodes WILL fail at runtime
For AI-generated structured data, prefer Structured Output Parser nodes over Code nodes.
Why: Purpose-built parsers are more reliable and handle edge cases better than custom code.
For binary file data, use Extract From File node to extract content from files before processing.
Use Code nodes only for:
- Simple string manipulations
- Already structured data (JSON, CSV)
@@ -321,9 +323,10 @@ Anticipate workflow needs and suggest enhancements:
- Set nodes for data transformation between incompatible formats
- Schedule Triggers for recurring tasks
- Error handling for external service calls
- Split In Batches for large dataset processing
Why: Proactive suggestions create more robust, production-ready workflows
NEVER use Split In Batches nodes.
</proactive_design>
<parameter_updates>