feat(Structured Output Parser Node): Mark all parameters as required for schemas generated from JSON example (#15935)

This commit is contained in:
Eugene
2025-06-05 12:11:21 +02:00
committed by GitHub
parent 6cf07200dc
commit 7711e4b052
10 changed files with 980 additions and 21 deletions

View File

@@ -11,8 +11,13 @@ import type {
} from 'n8n-workflow';
import type { z } from 'zod';
import { inputSchemaField, jsonSchemaExampleField, schemaTypeField } from '@utils/descriptions';
import { convertJsonSchemaToZod, generateSchema } from '@utils/schemaParsing';
import {
buildJsonSchemaExampleNotice,
inputSchemaField,
jsonSchemaExampleField,
schemaTypeField,
} from '@utils/descriptions';
import { convertJsonSchemaToZod, generateSchemaFromExample } from '@utils/schemaParsing';
import { getBatchingOptionFields } from '@utils/sharedFields';
import { SYSTEM_PROMPT_TEMPLATE } from './constants';
@@ -27,7 +32,8 @@ export class InformationExtractor implements INodeType {
icon: 'fa:project-diagram',
iconColor: 'black',
group: ['transform'],
version: [1, 1.1],
version: [1, 1.1, 1.2],
defaultVersion: 1.2,
description: 'Extract information from text in a structured format',
codex: {
alias: ['NER', 'parse', 'parsing', 'JSON', 'data extraction', 'structured'],
@@ -88,6 +94,11 @@ export class InformationExtractor implements INodeType {
"cities": ["Los Angeles", "San Francisco", "San Diego"]
}`,
},
buildJsonSchemaExampleNotice({
showExtraProps: {
'@version': [{ _cnd: { gte: 1.2 } }],
},
}),
{
...inputSchemaField,
default: `{
@@ -242,7 +253,10 @@ export class InformationExtractor implements INodeType {
if (schemaType === 'fromJson') {
const jsonExample = this.getNodeParameter('jsonSchemaExample', 0, '') as string;
jsonSchema = generateSchema(jsonExample);
// Enforce all fields to be required in the generated schema if the node version is 1.2 or higher
const jsonExampleAllFieldsRequired = this.getNode().typeVersion >= 1.2;
jsonSchema = generateSchemaFromExample(jsonExample, jsonExampleAllFieldsRequired);
} else {
const inputSchema = this.getNodeParameter('inputSchema', 0, '') as string;
jsonSchema = jsonParse<JSONSchema7>(inputSchema);

View File

@@ -1,7 +1,8 @@
import type { BaseLanguageModel } from '@langchain/core/language_models/base';
import { FakeListChatModel } from '@langchain/core/utils/testing';
import { mock } from 'jest-mock-extended';
import get from 'lodash/get';
import type { IDataObject, IExecuteFunctions } from 'n8n-workflow';
import type { IDataObject, IExecuteFunctions, INode } from 'n8n-workflow';
import { makeZodSchemaFromAttributes } from '../helpers';
import { InformationExtractor } from '../InformationExtractor.node';
@@ -88,6 +89,208 @@ describe('InformationExtractor', () => {
});
});
describe('Single Item Processing with JSON Schema from Example', () => {
it('should extract information using JSON schema from example - version 1.2 (required fields)', async () => {
const node = new InformationExtractor();
const inputData = [
{
json: { text: 'John lives in California and has visited Los Angeles and San Francisco' },
},
];
const mockExecuteFunctions = createExecuteFunctionsMock(
{
text: 'John lives in California and has visited Los Angeles and San Francisco',
schemaType: 'fromJson',
jsonSchemaExample: JSON.stringify({
state: 'California',
cities: ['Los Angeles', 'San Francisco'],
}),
options: {
systemPromptTemplate: '',
},
},
new FakeListChatModel({
responses: [
formatFakeLlmResponse({
state: 'California',
cities: ['Los Angeles', 'San Francisco'],
}),
],
}),
inputData,
);
// Mock version 1.2 to test required fields behavior
mockExecuteFunctions.getNode = () => mock<INode>({ typeVersion: 1.2 });
const response = await node.execute.call(mockExecuteFunctions);
expect(response).toEqual([
[
{
json: {
output: {
state: 'California',
cities: ['Los Angeles', 'San Francisco'],
},
},
},
],
]);
});
it('should extract information using JSON schema from example - version 1.1 (optional fields)', async () => {
const node = new InformationExtractor();
const inputData = [{ json: { text: 'John lives in California' } }];
const mockExecuteFunctions = createExecuteFunctionsMock(
{
text: 'John lives in California',
schemaType: 'fromJson',
jsonSchemaExample: JSON.stringify({
state: 'California',
cities: ['Los Angeles', 'San Francisco'],
}),
options: {
systemPromptTemplate: '',
},
},
new FakeListChatModel({
responses: [
formatFakeLlmResponse({
state: 'California',
// cities field missing - should be allowed in v1.1
}),
],
}),
inputData,
);
// Mock version 1.1 to test optional fields behavior
mockExecuteFunctions.getNode = () => mock<INode>({ typeVersion: 1.1 });
const response = await node.execute.call(mockExecuteFunctions);
expect(response).toEqual([
[
{
json: {
output: {
state: 'California',
},
},
},
],
]);
});
it('should throw error for incomplete model output in version 1.2 (required fields)', async () => {
const node = new InformationExtractor();
const inputData = [{ json: { text: 'John lives in California' } }];
const mockExecuteFunctions = createExecuteFunctionsMock(
{
text: 'John lives in California',
schemaType: 'fromJson',
jsonSchemaExample: JSON.stringify({
state: 'California',
cities: ['Los Angeles', 'San Francisco'],
zipCode: '90210',
}),
options: {
systemPromptTemplate: '',
},
},
new FakeListChatModel({
responses: [
formatFakeLlmResponse({
state: 'California',
// Missing cities and zipCode - should fail in v1.2 since all fields are required
}),
],
}),
inputData,
);
mockExecuteFunctions.getNode = () => mock<INode>({ typeVersion: 1.2 });
await expect(node.execute.call(mockExecuteFunctions)).rejects.toThrow();
});
it('should extract information using complex nested JSON schema from example', async () => {
const node = new InformationExtractor();
const inputData = [
{
json: {
text: 'John Doe works at Acme Corp as a Software Engineer with 5 years experience',
},
},
];
const complexSchema = {
person: {
name: 'John Doe',
company: {
name: 'Acme Corp',
position: 'Software Engineer',
},
},
experience: {
years: 5,
skills: ['JavaScript', 'TypeScript'],
},
};
const mockExecuteFunctions = createExecuteFunctionsMock(
{
text: 'John Doe works at Acme Corp as a Software Engineer with 5 years experience',
schemaType: 'fromJson',
jsonSchemaExample: JSON.stringify(complexSchema),
options: {
systemPromptTemplate: '',
},
},
new FakeListChatModel({
responses: [
formatFakeLlmResponse({
person: {
name: 'John Doe',
company: {
name: 'Acme Corp',
position: 'Software Engineer',
},
},
experience: {
years: 5,
skills: ['JavaScript', 'TypeScript'],
},
}),
],
}),
inputData,
);
mockExecuteFunctions.getNode = () => mock<INode>({ typeVersion: 1.2 });
const response = await node.execute.call(mockExecuteFunctions);
expect(response[0][0].json.output).toMatchObject({
person: {
name: 'John Doe',
company: {
name: 'Acme Corp',
position: 'Software Engineer',
},
},
experience: {
years: 5,
skills: expect.arrayContaining(['JavaScript', 'TypeScript']),
},
});
});
});
describe('Batch Processing', () => {
it('should process multiple items in batches', async () => {
const node = new InformationExtractor();