feat(Structured Output Parser Node): Mark all parameters as required for schemas generated from JSON example (#15935)

This commit is contained in:
Eugene
2025-06-05 12:11:21 +02:00
committed by GitHub
parent 6cf07200dc
commit 7711e4b052
10 changed files with 980 additions and 21 deletions

View File

@@ -11,8 +11,13 @@ import type {
} from 'n8n-workflow';
import type { z } from 'zod';
import { inputSchemaField, jsonSchemaExampleField, schemaTypeField } from '@utils/descriptions';
import { convertJsonSchemaToZod, generateSchema } from '@utils/schemaParsing';
import {
buildJsonSchemaExampleNotice,
inputSchemaField,
jsonSchemaExampleField,
schemaTypeField,
} from '@utils/descriptions';
import { convertJsonSchemaToZod, generateSchemaFromExample } from '@utils/schemaParsing';
import { getBatchingOptionFields } from '@utils/sharedFields';
import { SYSTEM_PROMPT_TEMPLATE } from './constants';
@@ -27,7 +32,8 @@ export class InformationExtractor implements INodeType {
icon: 'fa:project-diagram',
iconColor: 'black',
group: ['transform'],
version: [1, 1.1],
version: [1, 1.1, 1.2],
defaultVersion: 1.2,
description: 'Extract information from text in a structured format',
codex: {
alias: ['NER', 'parse', 'parsing', 'JSON', 'data extraction', 'structured'],
@@ -88,6 +94,11 @@ export class InformationExtractor implements INodeType {
"cities": ["Los Angeles", "San Francisco", "San Diego"]
}`,
},
buildJsonSchemaExampleNotice({
showExtraProps: {
'@version': [{ _cnd: { gte: 1.2 } }],
},
}),
{
...inputSchemaField,
default: `{
@@ -242,7 +253,10 @@ export class InformationExtractor implements INodeType {
if (schemaType === 'fromJson') {
const jsonExample = this.getNodeParameter('jsonSchemaExample', 0, '') as string;
jsonSchema = generateSchema(jsonExample);
// Enforce all fields to be required in the generated schema if the node version is 1.2 or higher
const jsonExampleAllFieldsRequired = this.getNode().typeVersion >= 1.2;
jsonSchema = generateSchemaFromExample(jsonExample, jsonExampleAllFieldsRequired);
} else {
const inputSchema = this.getNodeParameter('inputSchema', 0, '') as string;
jsonSchema = jsonParse<JSONSchema7>(inputSchema);

View File

@@ -1,7 +1,8 @@
import type { BaseLanguageModel } from '@langchain/core/language_models/base';
import { FakeListChatModel } from '@langchain/core/utils/testing';
import { mock } from 'jest-mock-extended';
import get from 'lodash/get';
import type { IDataObject, IExecuteFunctions } from 'n8n-workflow';
import type { IDataObject, IExecuteFunctions, INode } from 'n8n-workflow';
import { makeZodSchemaFromAttributes } from '../helpers';
import { InformationExtractor } from '../InformationExtractor.node';
@@ -88,6 +89,208 @@ describe('InformationExtractor', () => {
});
});
describe('Single Item Processing with JSON Schema from Example', () => {
it('should extract information using JSON schema from example - version 1.2 (required fields)', async () => {
const node = new InformationExtractor();
const inputData = [
{
json: { text: 'John lives in California and has visited Los Angeles and San Francisco' },
},
];
const mockExecuteFunctions = createExecuteFunctionsMock(
{
text: 'John lives in California and has visited Los Angeles and San Francisco',
schemaType: 'fromJson',
jsonSchemaExample: JSON.stringify({
state: 'California',
cities: ['Los Angeles', 'San Francisco'],
}),
options: {
systemPromptTemplate: '',
},
},
new FakeListChatModel({
responses: [
formatFakeLlmResponse({
state: 'California',
cities: ['Los Angeles', 'San Francisco'],
}),
],
}),
inputData,
);
// Mock version 1.2 to test required fields behavior
mockExecuteFunctions.getNode = () => mock<INode>({ typeVersion: 1.2 });
const response = await node.execute.call(mockExecuteFunctions);
expect(response).toEqual([
[
{
json: {
output: {
state: 'California',
cities: ['Los Angeles', 'San Francisco'],
},
},
},
],
]);
});
it('should extract information using JSON schema from example - version 1.1 (optional fields)', async () => {
const node = new InformationExtractor();
const inputData = [{ json: { text: 'John lives in California' } }];
const mockExecuteFunctions = createExecuteFunctionsMock(
{
text: 'John lives in California',
schemaType: 'fromJson',
jsonSchemaExample: JSON.stringify({
state: 'California',
cities: ['Los Angeles', 'San Francisco'],
}),
options: {
systemPromptTemplate: '',
},
},
new FakeListChatModel({
responses: [
formatFakeLlmResponse({
state: 'California',
// cities field missing - should be allowed in v1.1
}),
],
}),
inputData,
);
// Mock version 1.1 to test optional fields behavior
mockExecuteFunctions.getNode = () => mock<INode>({ typeVersion: 1.1 });
const response = await node.execute.call(mockExecuteFunctions);
expect(response).toEqual([
[
{
json: {
output: {
state: 'California',
},
},
},
],
]);
});
it('should throw error for incomplete model output in version 1.2 (required fields)', async () => {
const node = new InformationExtractor();
const inputData = [{ json: { text: 'John lives in California' } }];
const mockExecuteFunctions = createExecuteFunctionsMock(
{
text: 'John lives in California',
schemaType: 'fromJson',
jsonSchemaExample: JSON.stringify({
state: 'California',
cities: ['Los Angeles', 'San Francisco'],
zipCode: '90210',
}),
options: {
systemPromptTemplate: '',
},
},
new FakeListChatModel({
responses: [
formatFakeLlmResponse({
state: 'California',
// Missing cities and zipCode - should fail in v1.2 since all fields are required
}),
],
}),
inputData,
);
mockExecuteFunctions.getNode = () => mock<INode>({ typeVersion: 1.2 });
await expect(node.execute.call(mockExecuteFunctions)).rejects.toThrow();
});
it('should extract information using complex nested JSON schema from example', async () => {
const node = new InformationExtractor();
const inputData = [
{
json: {
text: 'John Doe works at Acme Corp as a Software Engineer with 5 years experience',
},
},
];
const complexSchema = {
person: {
name: 'John Doe',
company: {
name: 'Acme Corp',
position: 'Software Engineer',
},
},
experience: {
years: 5,
skills: ['JavaScript', 'TypeScript'],
},
};
const mockExecuteFunctions = createExecuteFunctionsMock(
{
text: 'John Doe works at Acme Corp as a Software Engineer with 5 years experience',
schemaType: 'fromJson',
jsonSchemaExample: JSON.stringify(complexSchema),
options: {
systemPromptTemplate: '',
},
},
new FakeListChatModel({
responses: [
formatFakeLlmResponse({
person: {
name: 'John Doe',
company: {
name: 'Acme Corp',
position: 'Software Engineer',
},
},
experience: {
years: 5,
skills: ['JavaScript', 'TypeScript'],
},
}),
],
}),
inputData,
);
mockExecuteFunctions.getNode = () => mock<INode>({ typeVersion: 1.2 });
const response = await node.execute.call(mockExecuteFunctions);
expect(response[0][0].json.output).toMatchObject({
person: {
name: 'John Doe',
company: {
name: 'Acme Corp',
position: 'Software Engineer',
},
},
experience: {
years: 5,
skills: expect.arrayContaining(['JavaScript', 'TypeScript']),
},
});
});
});
describe('Batch Processing', () => {
it('should process multiple items in batches', async () => {
const node = new InformationExtractor();

View File

@@ -12,12 +12,17 @@ import {
} from 'n8n-workflow';
import type { z } from 'zod';
import { inputSchemaField, jsonSchemaExampleField, schemaTypeField } from '@utils/descriptions';
import {
buildJsonSchemaExampleNotice,
inputSchemaField,
jsonSchemaExampleField,
schemaTypeField,
} from '@utils/descriptions';
import {
N8nOutputFixingParser,
N8nStructuredOutputParser,
} from '@utils/output_parsers/N8nOutputParser';
import { convertJsonSchemaToZod, generateSchema } from '@utils/schemaParsing';
import { convertJsonSchemaToZod, generateSchemaFromExample } from '@utils/schemaParsing';
import { getConnectionHintNoticeField } from '@utils/sharedFields';
import { NAIVE_FIX_PROMPT } from './prompt';
@@ -29,8 +34,8 @@ export class OutputParserStructured implements INodeType {
icon: 'fa:code',
iconColor: 'black',
group: ['transform'],
version: [1, 1.1, 1.2],
defaultVersion: 1.2,
version: [1, 1.1, 1.2, 1.3],
defaultVersion: 1.3,
description: 'Return data in a defined JSON format',
defaults: {
name: 'Structured Output Parser',
@@ -74,6 +79,11 @@ export class OutputParserStructured implements INodeType {
"cities": ["Los Angeles", "San Francisco", "San Diego"]
}`,
},
buildJsonSchemaExampleNotice({
showExtraProps: {
'@version': [{ _cnd: { gte: 1.3 } }],
},
}),
{
...inputSchemaField,
default: `{
@@ -181,6 +191,9 @@ export class OutputParserStructured implements INodeType {
let inputSchema: string;
// Enforce all fields to be required in the generated schema if the node version is 1.3 or higher
const jsonExampleAllFieldsRequired = this.getNode().typeVersion >= 1.3;
if (this.getNode().typeVersion <= 1.1) {
inputSchema = this.getNodeParameter('jsonSchema', itemIndex, '') as string;
} else {
@@ -188,7 +201,9 @@ export class OutputParserStructured implements INodeType {
}
const jsonSchema =
schemaType === 'fromJson' ? generateSchema(jsonExample) : jsonParse<JSONSchema7>(inputSchema);
schemaType === 'fromJson'
? generateSchemaFromExample(jsonExample, jsonExampleAllFieldsRequired)
: jsonParse<JSONSchema7>(inputSchema);
const zodSchema = convertJsonSchemaToZod<z.ZodSchema<object>>(jsonSchema);
const nodeVersion = this.getNode().typeVersion;

View File

@@ -472,6 +472,272 @@ describe('OutputParserStructured', () => {
expect(parsersOutput).toEqual(outputObject);
});
});
describe('Version 1.3', () => {
beforeEach(() => {
thisArg.getNode.mockReturnValue(mock<INode>({ typeVersion: 1.3 }));
});
describe('schema from JSON example', () => {
it('should make all fields required when generating schema from JSON example', async () => {
const jsonExample = `{
"user": {
"name": "Alice",
"email": "alice@example.com",
"profile": {
"age": 30,
"city": "New York"
}
},
"tags": ["work", "important"]
}`;
thisArg.getNodeParameter.calledWith('schemaType', 0).mockReturnValueOnce('fromJson');
thisArg.getNodeParameter
.calledWith('jsonSchemaExample', 0)
.mockReturnValueOnce(jsonExample);
const { response } = (await outputParser.supplyData.call(thisArg, 0)) as {
response: N8nStructuredOutputParser;
};
const outputObject = {
output: {
user: {
name: 'Bob',
email: 'bob@example.com',
profile: {
age: 25,
city: 'San Francisco',
},
},
tags: ['personal'],
},
};
const parsersOutput = await response.parse(`Here's the user data:
\`\`\`json
${JSON.stringify(outputObject)}
\`\`\`
`);
expect(parsersOutput).toEqual(outputObject);
});
it('should reject output missing required fields from JSON example', async () => {
const jsonExample = `{
"name": "Alice",
"age": 30,
"email": "alice@example.com"
}`;
thisArg.getNodeParameter.calledWith('schemaType', 0).mockReturnValueOnce('fromJson');
thisArg.getNodeParameter
.calledWith('jsonSchemaExample', 0)
.mockReturnValueOnce(jsonExample);
const { response } = (await outputParser.supplyData.call(thisArg, 0)) as {
response: N8nStructuredOutputParser;
};
const incompleteOutput = {
output: {
name: 'Bob',
age: 25,
// Missing email field
},
};
await expect(
response.parse(
`Here's the incomplete output:
\`\`\`json
${JSON.stringify(incompleteOutput)}
\`\`\`
`,
undefined,
(e) => e,
),
).rejects.toThrow('Required');
});
it('should require all fields in array items from JSON example', async () => {
const jsonExample = `{
"users": [
{
"id": 1,
"name": "Alice",
"metadata": {
"department": "Engineering",
"role": "Developer"
}
}
]
}`;
thisArg.getNodeParameter.calledWith('schemaType', 0).mockReturnValueOnce('fromJson');
thisArg.getNodeParameter
.calledWith('jsonSchemaExample', 0)
.mockReturnValueOnce(jsonExample);
const { response } = (await outputParser.supplyData.call(thisArg, 0)) as {
response: N8nStructuredOutputParser;
};
const incompleteArrayOutput = {
output: {
users: [
{
id: 2,
name: 'Bob',
metadata: {
department: 'Marketing',
// Missing role field
},
},
],
},
};
await expect(
response.parse(
`Here's the incomplete array output:
\`\`\`json
${JSON.stringify(incompleteArrayOutput)}
\`\`\`
`,
undefined,
(e) => e,
),
).rejects.toThrow('Required');
});
});
describe('manual schema mode', () => {
it('should work with manually defined schema in version 1.3', async () => {
const inputSchema = `{
"type": "object",
"properties": {
"result": {
"type": "object",
"properties": {
"status": { "type": "string" },
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": { "type": "number" },
"value": { "type": "string" }
},
"required": ["id", "value"]
}
}
},
"required": ["status", "data"]
}
},
"required": ["result"]
}`;
thisArg.getNodeParameter.calledWith('schemaType', 0).mockReturnValueOnce('manual');
thisArg.getNodeParameter.calledWith('inputSchema', 0).mockReturnValueOnce(inputSchema);
const { response } = (await outputParser.supplyData.call(thisArg, 0)) as {
response: N8nStructuredOutputParser;
};
const outputObject = {
output: {
result: {
status: 'success',
data: [
{ id: 1, value: 'first' },
{ id: 2, value: 'second' },
],
},
},
};
const parsersOutput = await response.parse(`Here's the result:
\`\`\`json
${JSON.stringify(outputObject)}
\`\`\`
`);
expect(parsersOutput).toEqual(outputObject);
});
});
describe('complex nested structures', () => {
it('should handle deeply nested objects with required fields', async () => {
const jsonExample = `{
"company": {
"name": "TechCorp",
"departments": [
{
"name": "Engineering",
"teams": [
{
"name": "Backend",
"members": [
{
"id": 1,
"name": "Alice",
"skills": ["Python", "Docker"]
}
]
}
]
}
]
}
}`;
thisArg.getNodeParameter.calledWith('schemaType', 0).mockReturnValueOnce('fromJson');
thisArg.getNodeParameter
.calledWith('jsonSchemaExample', 0)
.mockReturnValueOnce(jsonExample);
const { response } = (await outputParser.supplyData.call(thisArg, 0)) as {
response: N8nStructuredOutputParser;
};
const complexOutput = {
output: {
company: {
name: 'StartupCorp',
departments: [
{
name: 'Product',
teams: [
{
name: 'Frontend',
members: [
{
id: 2,
name: 'Bob',
skills: ['React', 'TypeScript'],
},
{
id: 3,
name: 'Carol',
skills: ['Vue', 'CSS'],
},
],
},
],
},
],
},
},
};
const parsersOutput = await response.parse(`Here's the complex company data:
\`\`\`json
${JSON.stringify(complexOutput)}
\`\`\`
`);
expect(parsersOutput).toEqual(complexOutput);
});
});
});
});
describe('Auto-Fix', () => {

View File

@@ -18,14 +18,28 @@ import { jsonParse, NodeConnectionTypes, NodeOperationError } from 'n8n-workflow
import {
buildInputSchemaField,
buildJsonSchemaExampleField,
buildJsonSchemaExampleNotice,
schemaTypeField,
} from '@utils/descriptions';
import { nodeNameToToolName } from '@utils/helpers';
import { convertJsonSchemaToZod, generateSchema } from '@utils/schemaParsing';
import { convertJsonSchemaToZod, generateSchemaFromExample } from '@utils/schemaParsing';
import { getConnectionHintNoticeField } from '@utils/sharedFields';
import type { DynamicZodObject } from '../../../types/zod.types';
const jsonSchemaExampleField = buildJsonSchemaExampleField({
showExtraProps: { specifyInputSchema: [true] },
});
const jsonSchemaExampleNotice = buildJsonSchemaExampleNotice({
showExtraProps: {
specifyInputSchema: [true],
'@version': [{ _cnd: { gte: 1.3 } }],
},
});
const jsonSchemaField = buildInputSchemaField({ showExtraProps: { specifyInputSchema: [true] } });
export class ToolCode implements INodeType {
description: INodeTypeDescription = {
displayName: 'Code Tool',
@@ -33,7 +47,7 @@ export class ToolCode implements INodeType {
icon: 'fa:code',
iconColor: 'black',
group: ['transform'],
version: [1, 1.1, 1.2],
version: [1, 1.1, 1.2, 1.3],
description: 'Write a tool in JS or Python',
defaults: {
name: 'Code Tool',
@@ -173,8 +187,9 @@ export class ToolCode implements INodeType {
default: false,
},
{ ...schemaTypeField, displayOptions: { show: { specifyInputSchema: [true] } } },
buildJsonSchemaExampleField({ showExtraProps: { specifyInputSchema: [true] } }),
buildInputSchemaField({ showExtraProps: { specifyInputSchema: [true] } }),
jsonSchemaExampleField,
jsonSchemaExampleNotice,
jsonSchemaField,
],
};
@@ -275,9 +290,10 @@ export class ToolCode implements INodeType {
const inputSchema = this.getNodeParameter('inputSchema', itemIndex, '') as string;
const schemaType = this.getNodeParameter('schemaType', itemIndex) as 'fromJson' | 'manual';
const jsonSchema =
schemaType === 'fromJson'
? generateSchema(jsonExample)
? generateSchemaFromExample(jsonExample, this.getNode().typeVersion >= 1.3)
: jsonParse<JSONSchema7>(inputSchema);
const zodSchema = convertJsonSchemaToZod<DynamicZodObject>(jsonSchema);

View File

@@ -24,7 +24,7 @@ import { NodeConnectionTypes, NodeOperationError, jsonParse } from 'n8n-workflow
import { versionDescription } from './versionDescription';
import type { DynamicZodObject } from '../../../../types/zod.types';
import { convertJsonSchemaToZod, generateSchema } from '../../../../utils/schemaParsing';
import { convertJsonSchemaToZod, generateSchemaFromExample } from '../../../../utils/schemaParsing';
export class ToolWorkflowV1 implements INodeType {
description: INodeTypeDescription;
@@ -215,7 +215,7 @@ export class ToolWorkflowV1 implements INodeType {
const schemaType = this.getNodeParameter('schemaType', itemIndex) as 'fromJson' | 'manual';
const jsonSchema =
schemaType === 'fromJson'
? generateSchema(jsonExample)
? generateSchemaFromExample(jsonExample)
: jsonParse<JSONSchema7>(inputSchema);
const zodSchema = convertJsonSchemaToZod<DynamicZodObject>(jsonSchema);