refactor(Structured Output Parser Node): Sandbox JSON schema parsing (no-changelog) (#9239)

Signed-off-by: Oleg Ivaniv <me@olegivaniv.com>
This commit is contained in:
oleg
2024-04-29 13:59:55 +02:00
committed by GitHub
parent e17e767e70
commit f84abc0586
4 changed files with 217 additions and 211 deletions

View File

@@ -8,21 +8,22 @@ import {
NodeOperationError, NodeOperationError,
NodeConnectionType, NodeConnectionType,
} from 'n8n-workflow'; } from 'n8n-workflow';
import { parseSchema } from 'json-schema-to-zod';
import { z } from 'zod'; import { z } from 'zod';
import type { JSONSchema7 } from 'json-schema'; import type { JSONSchema7 } from 'json-schema';
import { StructuredOutputParser } from 'langchain/output_parsers'; import { StructuredOutputParser } from 'langchain/output_parsers';
import { OutputParserException } from '@langchain/core/output_parsers'; import { OutputParserException } from '@langchain/core/output_parsers';
import get from 'lodash/get'; import get from 'lodash/get';
import { logWrapper } from '../../../utils/logWrapper'; import { getSandboxContext } from 'n8n-nodes-base/dist/nodes/Code/Sandbox';
import { JavaScriptSandbox } from 'n8n-nodes-base/dist/nodes/Code/JavaScriptSandbox';
import { makeResolverFromLegacyOptions } from '@n8n/vm2';
import { getConnectionHintNoticeField } from '../../../utils/sharedFields'; import { getConnectionHintNoticeField } from '../../../utils/sharedFields';
import { logWrapper } from '../../../utils/logWrapper';
const STRUCTURED_OUTPUT_KEY = '__structured__output'; const STRUCTURED_OUTPUT_KEY = '__structured__output';
const STRUCTURED_OUTPUT_OBJECT_KEY = '__structured__output__object'; const STRUCTURED_OUTPUT_OBJECT_KEY = '__structured__output__object';
const STRUCTURED_OUTPUT_ARRAY_KEY = '__structured__output__array'; const STRUCTURED_OUTPUT_ARRAY_KEY = '__structured__output__array';
class N8nStructuredOutputParser<T extends z.ZodTypeAny> extends StructuredOutputParser<T> { export class N8nStructuredOutputParser<T extends z.ZodTypeAny> extends StructuredOutputParser<T> {
async parse(text: string): Promise<z.infer<T>> { async parse(text: string): Promise<z.infer<T>> {
try { try {
const parsed = (await super.parse(text)) as object; const parsed = (await super.parse(text)) as object;
@@ -39,26 +40,19 @@ class N8nStructuredOutputParser<T extends z.ZodTypeAny> extends StructuredOutput
} }
} }
static fromZedJsonSchema( static async fromZedJsonSchema(
schema: JSONSchema7, sandboxedSchema: JavaScriptSandbox,
nodeVersion: number, nodeVersion: number,
): StructuredOutputParser<z.ZodType<object, z.ZodTypeDef, object>> { ): Promise<StructuredOutputParser<z.ZodType<object, z.ZodTypeDef, object>>> {
// Make sure to remove the description from root schema const zodSchema = (await sandboxedSchema.runCode()) as z.ZodSchema<object>;
const { description, ...restOfSchema } = schema;
const zodSchemaString = parseSchema(restOfSchema as JSONSchema7);
// TODO: This is obviously not great and should be replaced later!!!
// eslint-disable-next-line @typescript-eslint/no-implied-eval
const itemSchema = new Function('z', `return (${zodSchemaString})`)(z) as z.ZodSchema<object>;
let returnSchema: z.ZodSchema<object>; let returnSchema: z.ZodSchema<object>;
if (nodeVersion === 1) { if (nodeVersion === 1) {
returnSchema = z.object({ returnSchema = z.object({
[STRUCTURED_OUTPUT_KEY]: z [STRUCTURED_OUTPUT_KEY]: z
.object({ .object({
[STRUCTURED_OUTPUT_OBJECT_KEY]: itemSchema.optional(), [STRUCTURED_OUTPUT_OBJECT_KEY]: zodSchema.optional(),
[STRUCTURED_OUTPUT_ARRAY_KEY]: z.array(itemSchema).optional(), [STRUCTURED_OUTPUT_ARRAY_KEY]: z.array(zodSchema).optional(),
}) })
.describe( .describe(
`Wrapper around the output data. It can only contain ${STRUCTURED_OUTPUT_OBJECT_KEY} or ${STRUCTURED_OUTPUT_ARRAY_KEY} but never both.`, `Wrapper around the output data. It can only contain ${STRUCTURED_OUTPUT_OBJECT_KEY} or ${STRUCTURED_OUTPUT_ARRAY_KEY} but never both.`,
@@ -80,7 +74,7 @@ class N8nStructuredOutputParser<T extends z.ZodTypeAny> extends StructuredOutput
}); });
} else { } else {
returnSchema = z.object({ returnSchema = z.object({
output: itemSchema.optional(), output: zodSchema.optional(),
}); });
} }
@@ -166,18 +160,64 @@ export class OutputParserStructured implements INodeType {
if (itemSchema.type === undefined) { if (itemSchema.type === undefined) {
itemSchema = { itemSchema = {
type: 'object', type: 'object',
properties: itemSchema.properties || (itemSchema as { [key: string]: JSONSchema7 }), properties: itemSchema.properties ?? (itemSchema as { [key: string]: JSONSchema7 }),
}; };
} }
} catch (error) { } catch (error) {
throw new NodeOperationError(this.getNode(), 'Error during parsing of JSON Schema.'); throw new NodeOperationError(this.getNode(), 'Error during parsing of JSON Schema.');
} }
const nodeVersion = this.getNode().typeVersion; const vmResolver = makeResolverFromLegacyOptions({
const parser = N8nStructuredOutputParser.fromZedJsonSchema(itemSchema, nodeVersion); external: {
modules: ['json-schema-to-zod', 'zod'],
transitive: false,
},
resolve(moduleName, parentDirname) {
if (moduleName === 'json-schema-to-zod') {
return require.resolve(
'@n8n/n8n-nodes-langchain/node_modules/json-schema-to-zod/dist/cjs/jsonSchemaToZod.js',
{
paths: [parentDirname],
},
);
}
if (moduleName === 'zod') {
return require.resolve('@n8n/n8n-nodes-langchain/node_modules/zod.cjs', {
paths: [parentDirname],
});
}
return;
},
builtin: [],
});
const context = getSandboxContext.call(this, itemIndex);
// Make sure to remove the description from root schema
const { description, ...restOfSchema } = itemSchema;
const sandboxedSchema = new JavaScriptSandbox(
context,
`
const { z } = require('zod');
const { parseSchema } = require('json-schema-to-zod');
const zodSchema = parseSchema(${JSON.stringify(restOfSchema)});
const itemSchema = new Function('z', 'return (' + zodSchema + ')')(z)
return itemSchema
`,
itemIndex,
this.helpers,
{ resolver: vmResolver },
);
return { const nodeVersion = this.getNode().typeVersion;
response: logWrapper(parser, this), try {
}; const parser = await N8nStructuredOutputParser.fromZedJsonSchema(
sandboxedSchema,
nodeVersion,
);
return {
response: logWrapper(parser, this),
};
} catch (error) {
throw new NodeOperationError(this.getNode(), 'Error during parsing of JSON Schema.');
}
} }
} }

View File

@@ -0,0 +1,149 @@
import type { IExecuteFunctions, IWorkflowDataProxyData } from 'n8n-workflow';
import { mock } from 'jest-mock-extended';
import { normalizeItems } from 'n8n-core';
import type { z } from 'zod';
import type { StructuredOutputParser } from 'langchain/output_parsers';
import { OutputParserStructured } from '../OutputParserStructured.node';
describe('OutputParserStructured', () => {
let outputParser: OutputParserStructured;
const thisArg = mock<IExecuteFunctions>({
helpers: { normalizeItems },
});
const workflowDataProxy = mock<IWorkflowDataProxyData>({ $input: mock() });
thisArg.getWorkflowDataProxy.mockReturnValue(workflowDataProxy);
thisArg.getNode.mockReturnValue({ typeVersion: 1.1 });
thisArg.addInputData.mockReturnValue({ index: 0 });
thisArg.addOutputData.mockReturnValue();
beforeEach(() => {
outputParser = new OutputParserStructured();
});
describe('supplyData', () => {
it('should parse a valid JSON schema', async () => {
const schema = `{
"type": "object",
"properties": {
"name": {
"type": "string"
},
"age": {
"type": "number"
}
},
"required": ["name", "age"]
}`;
thisArg.getNodeParameter.calledWith('jsonSchema', 0).mockReturnValueOnce(schema);
const { response } = (await outputParser.supplyData.call(thisArg, 0)) as {
response: StructuredOutputParser<z.ZodType<object, z.ZodTypeDef, object>>;
};
const outputObject = { output: { name: 'Mac', age: 27 } };
const parsersOutput = await response.parse(`Here's the output!
\`\`\`json
${JSON.stringify(outputObject)}
\`\`\`
`);
expect(parsersOutput).toEqual(outputObject);
});
it('should handle missing required properties', async () => {
const schema = `{
"type": "object",
"properties": {
"name": {
"type": "string"
},
"age": {
"type": "number"
}
},
"required": ["name", "age"]
}`;
thisArg.getNodeParameter.calledWith('jsonSchema', 0).mockReturnValueOnce(schema);
const { response } = (await outputParser.supplyData.call(thisArg, 0)) as {
response: StructuredOutputParser<z.ZodType<object, z.ZodTypeDef, object>>;
};
const outputObject = { output: { name: 'Mac' } };
await expect(
response.parse(`Here's the output!
\`\`\`json
${JSON.stringify(outputObject)}
\`\`\`
`),
).rejects.toThrow('Required');
});
it('should throw on wrong type', async () => {
const schema = `{
"type": "object",
"properties": {
"name": {
"type": "string"
},
"age": {
"type": "number"
}
},
"required": ["name", "age"]
}`;
thisArg.getNodeParameter.calledWith('jsonSchema', 0).mockReturnValueOnce(schema);
const { response } = (await outputParser.supplyData.call(thisArg, 0)) as {
response: StructuredOutputParser<z.ZodType<object, z.ZodTypeDef, object>>;
};
const outputObject = { output: { name: 'Mac', age: '27' } };
await expect(
response.parse(`Here's the output!
\`\`\`json
${JSON.stringify(outputObject)}
\`\`\`
`),
).rejects.toThrow('Expected number, received string');
});
it('should parse array output', async () => {
const schema = `{
"type": "object",
"properties": {
"myArr": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"age": {
"type": "number"
}
},
"required": ["name", "age"]
}
}
},
"required": ["myArr"]
}`;
thisArg.getNodeParameter.calledWith('jsonSchema', 0).mockReturnValueOnce(schema);
const { response } = (await outputParser.supplyData.call(thisArg, 0)) as {
response: StructuredOutputParser<z.ZodType<object, z.ZodTypeDef, object>>;
};
const outputObject = {
output: {
myArr: [
{ name: 'Mac', age: 27 },
{ name: 'Alice', age: 25 },
],
},
};
const parsersOutput = await response.parse(`Here's the output!
\`\`\`json
${JSON.stringify(outputObject)}
\`\`\`
`);
expect(parsersOutput).toEqual(outputObject);
});
});
});

View File

@@ -156,6 +156,7 @@
"epub2": "3.0.2", "epub2": "3.0.2",
"form-data": "4.0.0", "form-data": "4.0.0",
"html-to-text": "9.0.5", "html-to-text": "9.0.5",
"jest-mock-extended": "^3.0.4",
"json-schema-to-zod": "2.0.14", "json-schema-to-zod": "2.0.14",
"langchain": "0.1.25", "langchain": "0.1.25",
"lodash": "4.17.21", "lodash": "4.17.21",

190
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff