diff --git a/packages/nodes-base/nodes/MistralAI/GenericFunctions.ts b/packages/nodes-base/nodes/MistralAI/GenericFunctions.ts new file mode 100644 index 0000000000..54fb80d342 --- /dev/null +++ b/packages/nodes-base/nodes/MistralAI/GenericFunctions.ts @@ -0,0 +1,62 @@ +import type FormData from 'form-data'; +import type { + IDataObject, + IExecuteFunctions, + IHttpRequestMethods, + IHttpRequestOptions, + JsonObject, +} from 'n8n-workflow'; +import { NodeApiError } from 'n8n-workflow'; + +import type { Page } from './types'; + +export async function mistralApiRequest( + this: IExecuteFunctions, + method: IHttpRequestMethods, + resource: string, + body: IDataObject | FormData = {}, + qs: IDataObject = {}, +): Promise { + const options: IHttpRequestOptions = { + method, + body, + qs, + url: `https://api.mistral.ai${resource}`, + json: true, + }; + + if (Object.keys(body).length === 0) { + delete options.body; + } + if (Object.keys(qs).length === 0) { + delete options.qs; + } + + try { + return await this.helpers.httpRequestWithAuthentication.call(this, 'mistralCloudApi', options); + } catch (error) { + throw new NodeApiError(this.getNode(), error as JsonObject); + } +} + +export async function encodeBinaryData( + this: IExecuteFunctions, + itemIndex: number, +): Promise<{ dataUrl: string; fileName: string | undefined }> { + const binaryProperty = this.getNodeParameter('binaryProperty', itemIndex); + const binaryData = this.helpers.assertBinaryData(itemIndex, binaryProperty); + const binaryDataBuffer = await this.helpers.getBinaryDataBuffer(itemIndex, binaryProperty); + const base64Data = binaryDataBuffer.toString('base64'); + const dataUrl = `data:${binaryData.mimeType};base64,${base64Data}`; + + return { dataUrl, fileName: binaryData.fileName }; +} + +export function processResponseData(response: IDataObject): IDataObject { + const pages = response.pages as Page[]; + return { + ...response, + extractedText: pages.map((page) => page.markdown).join('\n\n'), + pageCount: pages.length, + }; +} diff --git a/packages/nodes-base/nodes/MistralAI/MistralAi.node.json b/packages/nodes-base/nodes/MistralAI/MistralAi.node.json new file mode 100644 index 0000000000..d9b9b1a7eb --- /dev/null +++ b/packages/nodes-base/nodes/MistralAI/MistralAi.node.json @@ -0,0 +1,18 @@ +{ + "node": "n8n-nodes-base.mistralAi", + "nodeVersion": "1.0", + "codexVersion": "1.0", + "categories": ["Utility"], + "resources": { + "credentialDocumentation": [ + { + "url": "https://docs.n8n.io/integrations/builtin/credentials/mistral/" + } + ], + "primaryDocumentation": [ + { + "url": "https://docs.n8n.io/integrations/builtin/app-nodes/n8n-nodes-base.mistralai/" + } + ] + } +} diff --git a/packages/nodes-base/nodes/MistralAI/MistralAi.node.ts b/packages/nodes-base/nodes/MistralAI/MistralAi.node.ts new file mode 100644 index 0000000000..8b08fbcfcf --- /dev/null +++ b/packages/nodes-base/nodes/MistralAI/MistralAi.node.ts @@ -0,0 +1,316 @@ +import FormData from 'form-data'; +import chunk from 'lodash/chunk'; +import type { + IDataObject, + IExecuteFunctions, + INodeExecutionData, + INodeType, + INodeTypeDescription, +} from 'n8n-workflow'; +import { NodeApiError, NodeConnectionTypes } from 'n8n-workflow'; + +import { document } from './descriptions'; +import { encodeBinaryData, mistralApiRequest, processResponseData } from './GenericFunctions'; +import type { BatchItemResult, BatchJob } from './types'; + +export class MistralAi implements INodeType { + description: INodeTypeDescription = { + displayName: 'Mistral AI', + name: 'mistralAi', + icon: { + light: 'file:mistralAi.svg', + dark: 'file:mistralAi.svg', + }, + group: ['transform'], + version: 1, + subtitle: '={{ $parameter["operation"] + ": " + $parameter["resource"] }}', + description: 'Consume Mistral AI API', + defaults: { + name: 'Mistral AI', + }, + inputs: [NodeConnectionTypes.Main], + outputs: [NodeConnectionTypes.Main], + usableAsTool: true, + credentials: [ + { + name: 'mistralCloudApi', + required: true, + }, + ], + properties: [ + { + displayName: 'Resource', + name: 'resource', + type: 'options', + noDataExpression: true, + options: [ + { + name: 'Document', + value: 'document', + }, + ], + default: 'document', + }, + + ...document.description, + ], + }; + + async execute(this: IExecuteFunctions): Promise { + const items = this.getInputData(); + const returnData: INodeExecutionData[] = []; + const resource = this.getNodeParameter('resource', 0); + const operation = this.getNodeParameter('operation', 0); + + if (resource === 'document') { + if (operation === 'extractText') { + const enableBatch = this.getNodeParameter('options.batch', 0, false) as boolean; + + if (enableBatch) { + try { + const deleteFiles = this.getNodeParameter('options.deleteFiles', 0, true) as boolean; + const model = this.getNodeParameter('model', 0) as string; + const batchSize = this.getNodeParameter('options.batchSize', 0, 50) as number; + + const itemsWithIndex = items.map((item, index) => ({ + ...item, + index, + })); + + const fileIds = []; + for (const batch of chunk(itemsWithIndex, batchSize)) { + const entries = []; + for (const item of batch) { + const documentType = this.getNodeParameter('documentType', item.index) as + | 'document_url' + | 'image_url'; + const { dataUrl, fileName } = await encodeBinaryData.call(this, item.index); + + entries.push({ + custom_id: item.index.toString(), + body: { + document: { + type: documentType, + document_name: documentType === 'document_url' ? fileName : undefined, + [documentType]: dataUrl, + }, + }, + }); + } + + const formData = new FormData(); + formData.append( + 'file', + Buffer.from(entries.map((entry) => JSON.stringify(entry)).join('\n')), + { + filename: 'batch_file.jsonl', + contentType: 'application/json', + }, + ); + formData.append('purpose', 'batch'); + + const fileResponse = await mistralApiRequest.call( + this, + 'POST', + '/v1/files', + formData, + ); + fileIds.push(fileResponse.id); + } + + const jobIds = []; + for (const fileId of fileIds) { + const body: IDataObject = { + model, + input_files: [fileId], + endpoint: '/v1/ocr', + }; + + jobIds.push((await mistralApiRequest.call(this, 'POST', '/v1/batch/jobs', body)).id); + } + + const jobResults: BatchJob[] = []; + for (const jobId of jobIds) { + let job = (await mistralApiRequest.call( + this, + 'GET', + `/v1/batch/jobs/${jobId}`, + )) as BatchJob; + while (job.status === 'QUEUED' || job.status === 'RUNNING') { + await new Promise((resolve) => setTimeout(resolve, 2000)); + job = (await mistralApiRequest.call( + this, + 'GET', + `/v1/batch/jobs/${jobId}`, + )) as BatchJob; + } + jobResults.push(job); + } + + if (deleteFiles) { + for (const fileId of fileIds) { + try { + await mistralApiRequest.call(this, 'DELETE', `/v1/files/${fileId}`); + } catch {} + } + } + + for (const jobResult of jobResults) { + if ( + jobResult.status !== 'SUCCESS' || + (jobResult.errors && jobResult.errors.length > 0) + ) { + for (let i = 0; i < items.length; i++) { + if (this.continueOnFail()) { + const errorData = this.helpers.constructExecutionMetaData( + this.helpers.returnJsonArray({ + error: 'Batch job failed or returned errors', + }), + { itemData: { item: i } }, + ); + returnData.push(...errorData); + } else { + throw new NodeApiError(this.getNode(), { + message: `Batch job failed with status: ${jobResult.status}`, + }); + } + } + continue; + } else { + const fileResponse = (await mistralApiRequest.call( + this, + 'GET', + `/v1/files/${jobResult.output_file}/content`, + )) as string | BatchItemResult; + if (deleteFiles) { + try { + await mistralApiRequest.call( + this, + 'DELETE', + `/v1/files/${jobResult.output_file}`, + ); + } catch {} + } + + let batchResult: BatchItemResult[]; + if (typeof fileResponse === 'string') { + batchResult = fileResponse + .trim() + .split('\n') + .map((json) => JSON.parse(json) as BatchItemResult); + } else { + // If the response is not a string, it is a single item result + batchResult = [fileResponse]; + } + + for (const result of batchResult) { + const index = parseInt(result.custom_id, 10); + if (result.error) { + const executionData = this.helpers.constructExecutionMetaData( + this.helpers.returnJsonArray({ error: result.error }), + { itemData: { item: index } }, + ); + returnData.push(...executionData); + } else { + const data = processResponseData(result.response.body); + + const executionData = this.helpers.constructExecutionMetaData( + this.helpers.returnJsonArray(data), + { itemData: { item: index } }, + ); + returnData.push(...executionData); + } + } + } + } + } catch (error) { + if (this.continueOnFail()) { + const executionError = this.helpers.constructExecutionMetaData( + this.helpers.returnJsonArray({ + error: error instanceof Error ? error.message : JSON.stringify(error), + }), + { itemData: { item: 0 } }, + ); + returnData.push(...executionError); + } else { + throw new NodeApiError(this.getNode(), error); + } + } + } else { + let responseData: IDataObject; + + for (let i = 0; i < items.length; i++) { + try { + const model = this.getNodeParameter('model', i) as string; + const inputType = this.getNodeParameter('inputType', i) as 'binary' | 'url'; + const documentType = this.getNodeParameter('documentType', i) as + | 'document_url' + | 'image_url'; + + if (inputType === 'binary') { + const { dataUrl, fileName } = await encodeBinaryData.call(this, i); + + const body: IDataObject = { + model, + document: { + type: documentType, + document_name: documentType === 'document_url' ? fileName : undefined, + [documentType]: dataUrl, + }, + }; + + responseData = (await mistralApiRequest.call( + this, + 'POST', + '/v1/ocr', + body, + )) as IDataObject; + + responseData = processResponseData(responseData); + } else { + const url = this.getNodeParameter('url', i) as string; + + const body: IDataObject = { + model, + document: { + type: documentType, + [documentType]: url, + }, + }; + + responseData = (await mistralApiRequest.call( + this, + 'POST', + '/v1/ocr', + body, + )) as IDataObject; + + responseData = processResponseData(responseData); + } + + const executionData = this.helpers.constructExecutionMetaData( + this.helpers.returnJsonArray(responseData), + { itemData: { item: i } }, + ); + returnData.push(...executionData); + } catch (error) { + if (this.continueOnFail()) { + const executionError = this.helpers.constructExecutionMetaData( + this.helpers.returnJsonArray({ + error: error instanceof Error ? error.message : JSON.stringify(error), + }), + { itemData: { item: i } }, + ); + returnData.push(...executionError); + } else { + throw new NodeApiError(this.getNode(), error); + } + } + } + } + } + } + + return [returnData]; + } +} diff --git a/packages/nodes-base/nodes/MistralAI/descriptions/document/Document.resource.ts b/packages/nodes-base/nodes/MistralAI/descriptions/document/Document.resource.ts new file mode 100644 index 0000000000..4ee2c75088 --- /dev/null +++ b/packages/nodes-base/nodes/MistralAI/descriptions/document/Document.resource.ts @@ -0,0 +1,28 @@ +import type { INodeProperties } from 'n8n-workflow'; + +import * as extractText from './extractText.operation'; + +export const description: INodeProperties[] = [ + { + displayName: 'Operation', + name: 'operation', + type: 'options', + noDataExpression: true, + displayOptions: { + show: { + resource: ['document'], + }, + }, + options: [ + { + name: 'Extract Text', + value: 'extractText', + description: 'Extract text from document using OCR', + action: 'Extract text', + }, + ], + default: 'extractText', + }, + + ...extractText.description, +]; diff --git a/packages/nodes-base/nodes/MistralAI/descriptions/document/extractText.operation.ts b/packages/nodes-base/nodes/MistralAI/descriptions/document/extractText.operation.ts new file mode 100644 index 0000000000..eabe5fbcc8 --- /dev/null +++ b/packages/nodes-base/nodes/MistralAI/descriptions/document/extractText.operation.ts @@ -0,0 +1,141 @@ +import type { INodeProperties } from 'n8n-workflow'; +import { updateDisplayOptions } from 'n8n-workflow'; + +const properties: INodeProperties[] = [ + { + displayName: 'Model', + name: 'model', + type: 'options', + options: [ + { + name: 'mistral-ocr-latest', + value: 'mistral-ocr-latest', + }, + ], + description: 'The OCR model to use', + required: true, + default: 'mistral-ocr-latest', + }, + { + displayName: 'Document Type', + name: 'documentType', + type: 'options', + options: [ + { + name: 'Document', + value: 'document_url', + }, + { + name: 'Image', + value: 'image_url', + }, + ], + description: 'The type of document to process', + required: true, + default: 'document_url', + }, + { + displayName: 'Input Type', + name: 'inputType', + type: 'options', + options: [ + { + name: 'Binary Data', + value: 'binary', + }, + { + name: 'URL', + value: 'url', + }, + ], + description: 'How the document will be provided', + required: true, + default: 'binary', + disabledOptions: { + show: { + 'options.batch': [true], + }, + }, + }, + { + displayName: 'Input Binary Field', + name: 'binaryProperty', + type: 'string', + description: 'Name of the input binary field that contains the file to process', + placeholder: 'e.g. data', + hint: 'Uploaded document files must not exceed 50 MB in size and should be no longer than 1,000 pages.', + required: true, + default: 'data', + displayOptions: { + show: { + inputType: ['binary'], + }, + }, + }, + { + displayName: 'URL', + name: 'url', + type: 'string', + description: 'URL of the document or image to process', + placeholder: 'e.g. https://example.com/document.pdf', + required: true, + default: '', + displayOptions: { + show: { + inputType: ['url'], + }, + }, + }, + { + displayName: 'Options', + name: 'options', + type: 'collection', + placeholder: 'Add Option', + default: {}, + options: [ + { + displayName: 'Enable Batch Processing', + name: 'batch', + type: 'boolean', + description: + 'Whether to process multiple documents in a single API call (more cost-efficient)', + default: false, + }, + { + displayName: 'Batch Size', + name: 'batchSize', + type: 'number', + description: 'Maximum number of documents to process in a single batch', + default: 50, + typeOptions: { maxValue: 2048 }, + required: true, + displayOptions: { + show: { + batch: [true], + }, + }, + }, + { + displayName: 'Delete Files After Processing', + name: 'deleteFiles', + type: 'boolean', + default: true, + description: 'Whether to delete the files on Mistral Cloud after processing', + displayOptions: { + show: { + batch: [true], + }, + }, + }, + ], + }, +]; + +const displayOptions = { + show: { + resource: ['document'], + operation: ['extractText'], + }, +}; + +export const description = updateDisplayOptions(displayOptions, properties); diff --git a/packages/nodes-base/nodes/MistralAI/descriptions/index.ts b/packages/nodes-base/nodes/MistralAI/descriptions/index.ts new file mode 100644 index 0000000000..41f6f17b3e --- /dev/null +++ b/packages/nodes-base/nodes/MistralAI/descriptions/index.ts @@ -0,0 +1 @@ +export * as document from './document/Document.resource'; diff --git a/packages/nodes-base/nodes/MistralAI/mistralAi.svg b/packages/nodes-base/nodes/MistralAI/mistralAi.svg new file mode 100644 index 0000000000..928faf4518 --- /dev/null +++ b/packages/nodes-base/nodes/MistralAI/mistralAi.svg @@ -0,0 +1,262 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/packages/nodes-base/nodes/MistralAI/test/GenericFunctions.test.ts b/packages/nodes-base/nodes/MistralAI/test/GenericFunctions.test.ts new file mode 100644 index 0000000000..53b5b07b16 --- /dev/null +++ b/packages/nodes-base/nodes/MistralAI/test/GenericFunctions.test.ts @@ -0,0 +1,67 @@ +import { encodeBinaryData, processResponseData } from '../GenericFunctions'; + +describe('Mistral OCR Generic Functions', () => { + describe('encodeBinaryData', () => { + const binaryBuffer = Buffer.from('testdata'); + const base64 = binaryBuffer.toString('base64'); + + const context = { + getNodeParameter: jest.fn(), + helpers: { + assertBinaryData: jest.fn(), + getBinaryDataBuffer: jest.fn(), + }, + } as any; + + beforeEach(() => { + jest.clearAllMocks(); + }); + + it('should encode binary data to a data URL', async () => { + context.getNodeParameter.mockReturnValue('binaryProp1'); + context.helpers.assertBinaryData.mockReturnValue({ + mimeType: 'image/png', + fileName: 'file.png', + }); + context.helpers.getBinaryDataBuffer.mockResolvedValue(binaryBuffer); + + const result = await encodeBinaryData.call(context, 0); + + expect(context.getNodeParameter).toHaveBeenCalledWith('binaryProperty', 0); + expect(context.helpers.assertBinaryData).toHaveBeenCalledWith(0, 'binaryProp1'); + expect(context.helpers.getBinaryDataBuffer).toHaveBeenCalledWith(0, 'binaryProp1'); + + expect(result).toEqual({ + dataUrl: `data:image/png;base64,${base64}`, + fileName: 'file.png', + }); + }); + }); + + describe('processResponseData', () => { + it('should extract text and page count from pages', () => { + const input = { + pages: [ + { markdown: 'Page 1 markdown', text: 'Page 1 text' }, + { markdown: 'Page 2 markdown', text: 'Page 2 text' }, + ], + otherProp: 'test', + }; + + const result = processResponseData(input); + + expect(result.extractedText).toBe('Page 1 markdown\n\nPage 2 markdown'); + expect(result.pageCount).toBe(2); + expect(result.otherProp).toBe('test'); + }); + + it('should handle empty pages array', () => { + const input = { pages: [] }; + + const result = processResponseData(input); + + expect(result.extractedText).toBe(''); + expect(result.pageCount).toBe(0); + }); + }); +}); diff --git a/packages/nodes-base/nodes/MistralAI/test/MistralAi.node.test.ts b/packages/nodes-base/nodes/MistralAI/test/MistralAi.node.test.ts new file mode 100644 index 0000000000..c27a3c71ed --- /dev/null +++ b/packages/nodes-base/nodes/MistralAI/test/MistralAi.node.test.ts @@ -0,0 +1,216 @@ +import { NodeTestHarness } from '@nodes-testing/node-test-harness'; +import nock from 'nock'; +import path from 'path'; + +import batchResult from './fixtures/batch.json'; +import documentResult from './fixtures/document.json'; +import imageResult from './fixtures/image.json'; + +describe('Mistral AI Node', () => { + const credentials = { + mistralCloudApi: { + apiKey: 'API-KEY', + }, + }; + const mistralAiNock = nock('https://api.mistral.ai'); + + describe('Document -> Extract Text', () => { + beforeAll(() => { + // Document by URL + mistralAiNock + .post('/v1/ocr', { + model: 'mistral-ocr-latest', + document: { + type: 'document_url', + document_url: 'https://example.com/document.pdf', + }, + }) + .reply(200, documentResult); + + // Image by URL + mistralAiNock + .post('/v1/ocr', { + model: 'mistral-ocr-latest', + document: { + type: 'image_url', + image_url: 'https://example.com/image.jpg', + }, + }) + .reply(200, imageResult); + + // Document from binary + mistralAiNock + .post('/v1/ocr', { + model: 'mistral-ocr-latest', + document: { + type: 'document_url', + document_name: 'sample.pdf', + document_url: 'data:application/pdf;base64,abcdefgh', + }, + }) + .reply(200, documentResult); + + // Image from binary + mistralAiNock + .post('/v1/ocr', { + model: 'mistral-ocr-latest', + document: { + type: 'image_url', + image_url: 'data:image/jpeg;base64,abcdefgh', + }, + }) + .reply(200, imageResult); + + // Batching + mistralAiNock + .post( + '/v1/files', + (body: string) => + body.includes( + JSON.stringify({ + document: { + type: 'document_url', + document_name: 'sample_1.pdf', + document_url: 'data:application/pdf;base64,abcdefgh', + }, + }), + ) && + body.includes( + JSON.stringify({ + document: { + type: 'document_url', + document_name: 'sample_2.pdf', + document_url: 'data:application/pdf;base64,aaaaaaaa', + }, + }), + ), + ) + .reply(200, { id: 'input-file-1' }); + mistralAiNock + .post('/v1/files', (body: string) => + body.includes( + JSON.stringify({ + document: { + type: 'document_url', + document_name: 'sample_3.pdf', + document_url: 'data:application/pdf;base64,aaaabbbb', + }, + }), + ), + ) + .reply(200, { id: 'input-file-2' }); + mistralAiNock + .post('/v1/batch/jobs', { + model: 'mistral-ocr-latest', + input_files: ['input-file-1'], + endpoint: '/v1/ocr', + }) + .reply(200, { id: 'job-1' }); + mistralAiNock + .post('/v1/batch/jobs', { + model: 'mistral-ocr-latest', + input_files: ['input-file-2'], + endpoint: '/v1/ocr', + }) + .reply(200, { id: 'job-2' }); + mistralAiNock.get('/v1/batch/jobs/job-1').reply(200, { + status: 'SUCCESS', + output_file: 'output-file-1', + }); + mistralAiNock.get('/v1/batch/jobs/job-2').reply(200, { + status: 'SUCCESS', + output_file: 'output-file-2', + }); + mistralAiNock.get('/v1/files/output-file-1/content').reply( + 200, + batchResult + .slice(0, 2) + .map((item) => JSON.stringify(item)) + .join('\n'), + ); + mistralAiNock.get('/v1/files/output-file-2/content').reply(200, batchResult[2]); + + // Batching with delete files + mistralAiNock + .post( + '/v1/files', + (body: string) => + body.includes( + JSON.stringify({ + document: { + type: 'document_url', + document_name: 'sample_1.pdf', + document_url: 'data:application/pdf;base64,abcdefgh', + }, + }), + ) && + body.includes( + JSON.stringify({ + document: { + type: 'document_url', + document_name: 'sample_2.pdf', + document_url: 'data:application/pdf;base64,aaaaaaaa', + }, + }), + ), + ) + .reply(200, { id: 'input-file-1' }); + mistralAiNock + .post('/v1/files', (body: string) => + body.includes( + JSON.stringify({ + document: { + type: 'document_url', + document_name: 'sample_3.pdf', + document_url: 'data:application/pdf;base64,aaaabbbb', + }, + }), + ), + ) + .reply(200, { id: 'input-file-2' }); + mistralAiNock + .post('/v1/batch/jobs', { + model: 'mistral-ocr-latest', + input_files: ['input-file-1'], + endpoint: '/v1/ocr', + }) + .reply(200, { id: 'job-1' }); + mistralAiNock + .post('/v1/batch/jobs', { + model: 'mistral-ocr-latest', + input_files: ['input-file-2'], + endpoint: '/v1/ocr', + }) + .reply(200, { id: 'job-2' }); + mistralAiNock.get('/v1/batch/jobs/job-1').reply(200, { + status: 'SUCCESS', + output_file: 'output-file-1', + }); + mistralAiNock.get('/v1/batch/jobs/job-2').reply(200, { + status: 'SUCCESS', + output_file: 'output-file-2', + }); + mistralAiNock.delete('/v1/files/input-file-1').reply(200); + mistralAiNock.delete('/v1/files/input-file-2').reply(200); + mistralAiNock.get('/v1/files/output-file-1/content').reply( + 200, + batchResult + .slice(0, 2) + .map((item) => JSON.stringify(item)) + .join('\n'), + ); + mistralAiNock.get('/v1/files/output-file-2/content').reply(200, batchResult[2]); + mistralAiNock.delete('/v1/files/output-file-1').reply(200); + mistralAiNock.delete('/v1/files/output-file-2').reply(200); + }); + + afterAll(() => mistralAiNock.done()); + + new NodeTestHarness({ + additionalPackagePaths: [path.join(__dirname, '../../../../@n8n/nodes-langchain')], + }).setupTests({ + credentials, + workflowFiles: ['workflow.json'], + }); + }); +}); diff --git a/packages/nodes-base/nodes/MistralAI/test/fixtures/batch.json b/packages/nodes-base/nodes/MistralAI/test/fixtures/batch.json new file mode 100644 index 0000000000..7845a32e0f --- /dev/null +++ b/packages/nodes-base/nodes/MistralAI/test/fixtures/batch.json @@ -0,0 +1,77 @@ +[ + { + "custom_id": "0", + "response": { + "body": { + "pages": [ + { + "index": 0, + "markdown": "# Dummy PDF file 1", + "images": [], + "dimensions": { + "dpi": 200, + "height": 2339, + "width": 1653 + } + } + ], + "model": "mistral-ocr-2505-completion", + "document_annotation": null, + "usage_info": { + "pages_processed": 1, + "doc_size_bytes": 13264 + } + } + } + }, + { + "custom_id": "1", + "response": { + "body": { + "pages": [ + { + "index": 0, + "markdown": "# Dummy PDF file 2", + "images": [], + "dimensions": { + "dpi": 200, + "height": 2339, + "width": 1653 + } + } + ], + "model": "mistral-ocr-2505-completion", + "document_annotation": null, + "usage_info": { + "pages_processed": 1, + "doc_size_bytes": 13264 + } + } + } + }, + { + "custom_id": "2", + "response": { + "body": { + "pages": [ + { + "index": 0, + "markdown": "# Dummy PDF file 3", + "images": [], + "dimensions": { + "dpi": 200, + "height": 2339, + "width": 1653 + } + } + ], + "model": "mistral-ocr-2505-completion", + "document_annotation": null, + "usage_info": { + "pages_processed": 1, + "doc_size_bytes": 13264 + } + } + } + } +] diff --git a/packages/nodes-base/nodes/MistralAI/test/fixtures/document.json b/packages/nodes-base/nodes/MistralAI/test/fixtures/document.json new file mode 100644 index 0000000000..e3729d0b38 --- /dev/null +++ b/packages/nodes-base/nodes/MistralAI/test/fixtures/document.json @@ -0,0 +1,20 @@ +{ + "pages": [ + { + "index": 0, + "markdown": "# Dummy PDF file", + "images": [], + "dimensions": { + "dpi": 200, + "height": 2339, + "width": 1653 + } + } + ], + "model": "mistral-ocr-2505-completion", + "document_annotation": null, + "usage_info": { + "pages_processed": 1, + "doc_size_bytes": 13264 + } +} diff --git a/packages/nodes-base/nodes/MistralAI/test/fixtures/image.json b/packages/nodes-base/nodes/MistralAI/test/fixtures/image.json new file mode 100644 index 0000000000..7d138b5538 --- /dev/null +++ b/packages/nodes-base/nodes/MistralAI/test/fixtures/image.json @@ -0,0 +1,20 @@ +{ + "pages": [ + { + "index": 0, + "markdown": "# EXAMPLE", + "images": [], + "dimensions": { + "dpi": 200, + "height": 408, + "width": 612 + } + } + ], + "model": "mistral-ocr-2505-completion", + "document_annotation": null, + "usage_info": { + "pages_processed": 1, + "doc_size_bytes": 76734 + } +} diff --git a/packages/nodes-base/nodes/MistralAI/test/workflow.json b/packages/nodes-base/nodes/MistralAI/test/workflow.json new file mode 100644 index 0000000000..ad8e54bb20 --- /dev/null +++ b/packages/nodes-base/nodes/MistralAI/test/workflow.json @@ -0,0 +1,582 @@ +{ + "name": "Mistral AI", + "nodes": [ + { + "parameters": {}, + "type": "n8n-nodes-base.manualTrigger", + "typeVersion": 1, + "position": [-400, -380], + "id": "48cf408c-de44-4ed2-8a43-d16a4fa5b95b", + "name": "When clicking ‘Execute workflow’" + }, + { + "parameters": { + "inputType": "url", + "url": "https://example.com/document.pdf" + }, + "type": "n8n-nodes-base.mistralAi", + "typeVersion": 1, + "position": [-180, -680], + "id": "d7af696f-26a4-419e-9377-02c91d0ea23d", + "name": "Document by URL", + "credentials": { + "mistralCloudApi": { + "id": "l4fJwkB8bVdo6Evk", + "name": "Mistral Cloud account" + } + } + }, + { + "parameters": { + "inputType": "url", + "documentType": "image_url", + "url": "https://example.com/image.jpg" + }, + "type": "n8n-nodes-base.mistralAi", + "typeVersion": 1, + "position": [-180, -480], + "id": "9bddff7e-00ab-412c-9cf3-52f88e941641", + "name": "Image by URL", + "credentials": { + "mistralCloudApi": { + "id": "l4fJwkB8bVdo6Evk", + "name": "Mistral Cloud account" + } + } + }, + { + "parameters": {}, + "type": "n8n-nodes-base.mistralAi", + "typeVersion": 1, + "position": [260, -380], + "id": "cd64700f-abfb-48c7-b744-c68365a7cda2", + "name": "Document from binary", + "credentials": { + "mistralCloudApi": { + "id": "l4fJwkB8bVdo6Evk", + "name": "Mistral Cloud account" + } + } + }, + { + "parameters": { + "assignments": { + "assignments": [ + { + "id": "5c5233de-3b47-4860-8598-f50059061fee", + "name": "data", + "value": "abcdefgh", + "type": "string" + } + ] + }, + "options": {} + }, + "type": "n8n-nodes-base.set", + "typeVersion": 3.4, + "position": [-180, -280], + "id": "ebc64cb6-524b-4ae3-8a1f-c51c33e8043b", + "name": "Set mock data" + }, + { + "parameters": { + "operation": "toBinary", + "sourceProperty": "data", + "options": { + "fileName": "sample.pdf", + "mimeType": "application/pdf" + } + }, + "type": "n8n-nodes-base.convertToFile", + "typeVersion": 1.1, + "position": [40, -380], + "id": "9f656043-c21e-4eb8-b591-a609634e487f", + "name": "Create PDF" + }, + { + "parameters": { + "documentType": "image_url" + }, + "type": "n8n-nodes-base.mistralAi", + "typeVersion": 1, + "position": [260, -180], + "id": "8f2cc502-c9b0-4561-9ff9-0734d336de6d", + "name": "Image from binary", + "credentials": { + "mistralCloudApi": { + "id": "l4fJwkB8bVdo6Evk", + "name": "Mistral Cloud account" + } + } + }, + { + "parameters": { + "operation": "toBinary", + "sourceProperty": "data", + "options": { + "fileName": "sample.jpg", + "mimeType": "image/jpeg" + } + }, + "type": "n8n-nodes-base.convertToFile", + "typeVersion": 1.1, + "position": [40, -180], + "id": "e1398740-1b36-493b-8950-caf153d6fdd9", + "name": "Create JPG" + }, + { + "parameters": { + "fieldToSplitOut": "data", + "options": {} + }, + "type": "n8n-nodes-base.splitOut", + "typeVersion": 1, + "position": [40, 20], + "id": "c6810b4b-804c-44e6-89e0-a73d585750ee", + "name": "Split Out" + }, + { + "parameters": { + "assignments": { + "assignments": [ + { + "id": "0fc6f054-a7f0-466f-b54c-612379b9c049", + "name": "data", + "value": "[\"abcdefgh\", \"aaaaaaaa\", \"aaaabbbb\"]", + "type": "array" + } + ] + }, + "options": {} + }, + "type": "n8n-nodes-base.set", + "typeVersion": 3.4, + "position": [-180, 20], + "id": "887989fe-2829-43d5-86e6-333f399035b3", + "name": "Set mock data for multiple files" + }, + { + "parameters": { + "operation": "toBinary", + "sourceProperty": "data", + "options": { + "fileName": "=sample_{{ $itemIndex + 1 }}.pdf", + "mimeType": "application/pdf" + } + }, + "type": "n8n-nodes-base.convertToFile", + "typeVersion": 1.1, + "position": [260, 20], + "id": "27ef8f5b-ee55-45f9-b66a-741cc6ccde21", + "name": "Create PDFs" + }, + { + "parameters": { + "options": { + "batch": true, + "batchSize": 2, + "deleteFiles": false + } + }, + "type": "n8n-nodes-base.mistralAi", + "typeVersion": 1, + "position": [480, -80], + "id": "ad4b06b9-65e7-4b9c-b06a-bcbf0df5999a", + "name": "Batching", + "credentials": { + "mistralCloudApi": { + "id": "l4fJwkB8bVdo6Evk", + "name": "Mistral Cloud account" + } + } + }, + { + "parameters": { + "options": { + "batch": true, + "batchSize": 2 + } + }, + "type": "n8n-nodes-base.mistralAi", + "typeVersion": 1, + "position": [480, 120], + "id": "ca14c489-02d2-459f-b694-d83112d95d13", + "name": "Batching with delete files", + "credentials": { + "mistralCloudApi": { + "id": "l4fJwkB8bVdo6Evk", + "name": "Mistral Cloud account" + } + } + } + ], + "pinData": { + "Document by URL": [ + { + "json": { + "pages": [ + { + "index": 0, + "markdown": "# Dummy PDF file", + "images": [], + "dimensions": { + "dpi": 200, + "height": 2339, + "width": 1653 + } + } + ], + "model": "mistral-ocr-2505-completion", + "document_annotation": null, + "usage_info": { + "pages_processed": 1, + "doc_size_bytes": 13264 + }, + "extractedText": "# Dummy PDF file", + "pageCount": 1 + } + } + ], + "Image by URL": [ + { + "json": { + "pages": [ + { + "index": 0, + "markdown": "# EXAMPLE", + "images": [], + "dimensions": { + "dpi": 200, + "height": 408, + "width": 612 + } + } + ], + "model": "mistral-ocr-2505-completion", + "document_annotation": null, + "usage_info": { + "pages_processed": 1, + "doc_size_bytes": 76734 + }, + "extractedText": "# EXAMPLE", + "pageCount": 1 + } + } + ], + "Document from binary": [ + { + "json": { + "pages": [ + { + "index": 0, + "markdown": "# Dummy PDF file", + "images": [], + "dimensions": { + "dpi": 200, + "height": 2339, + "width": 1653 + } + } + ], + "model": "mistral-ocr-2505-completion", + "document_annotation": null, + "usage_info": { + "pages_processed": 1, + "doc_size_bytes": 13264 + }, + "extractedText": "# Dummy PDF file", + "pageCount": 1 + } + } + ], + "Image from binary": [ + { + "json": { + "pages": [ + { + "index": 0, + "markdown": "# EXAMPLE", + "images": [], + "dimensions": { + "dpi": 200, + "height": 408, + "width": 612 + } + } + ], + "model": "mistral-ocr-2505-completion", + "document_annotation": null, + "usage_info": { + "pages_processed": 1, + "doc_size_bytes": 76734 + }, + "extractedText": "# EXAMPLE", + "pageCount": 1 + } + } + ], + "Batching": [ + { + "json": { + "pages": [ + { + "index": 0, + "markdown": "# Dummy PDF file 1", + "images": [], + "dimensions": { + "dpi": 200, + "height": 2339, + "width": 1653 + } + } + ], + "model": "mistral-ocr-2505-completion", + "document_annotation": null, + "usage_info": { + "pages_processed": 1, + "doc_size_bytes": 13264 + }, + "extractedText": "# Dummy PDF file 1", + "pageCount": 1 + } + }, + { + "json": { + "pages": [ + { + "index": 0, + "markdown": "# Dummy PDF file 2", + "images": [], + "dimensions": { + "dpi": 200, + "height": 2339, + "width": 1653 + } + } + ], + "model": "mistral-ocr-2505-completion", + "document_annotation": null, + "usage_info": { + "pages_processed": 1, + "doc_size_bytes": 13264 + }, + "extractedText": "# Dummy PDF file 2", + "pageCount": 1 + } + }, + { + "json": { + "pages": [ + { + "index": 0, + "markdown": "# Dummy PDF file 3", + "images": [], + "dimensions": { + "dpi": 200, + "height": 2339, + "width": 1653 + } + } + ], + "model": "mistral-ocr-2505-completion", + "document_annotation": null, + "usage_info": { + "pages_processed": 1, + "doc_size_bytes": 13264 + }, + "extractedText": "# Dummy PDF file 3", + "pageCount": 1 + } + } + ], + "Batching with delete files": [ + { + "json": { + "pages": [ + { + "index": 0, + "markdown": "# Dummy PDF file 1", + "images": [], + "dimensions": { + "dpi": 200, + "height": 2339, + "width": 1653 + } + } + ], + "model": "mistral-ocr-2505-completion", + "document_annotation": null, + "usage_info": { + "pages_processed": 1, + "doc_size_bytes": 13264 + }, + "extractedText": "# Dummy PDF file 1", + "pageCount": 1 + } + }, + { + "json": { + "pages": [ + { + "index": 0, + "markdown": "# Dummy PDF file 2", + "images": [], + "dimensions": { + "dpi": 200, + "height": 2339, + "width": 1653 + } + } + ], + "model": "mistral-ocr-2505-completion", + "document_annotation": null, + "usage_info": { + "pages_processed": 1, + "doc_size_bytes": 13264 + }, + "extractedText": "# Dummy PDF file 2", + "pageCount": 1 + } + }, + { + "json": { + "pages": [ + { + "index": 0, + "markdown": "# Dummy PDF file 3", + "images": [], + "dimensions": { + "dpi": 200, + "height": 2339, + "width": 1653 + } + } + ], + "model": "mistral-ocr-2505-completion", + "document_annotation": null, + "usage_info": { + "pages_processed": 1, + "doc_size_bytes": 13264 + }, + "extractedText": "# Dummy PDF file 3", + "pageCount": 1 + } + } + ] + }, + "connections": { + "When clicking ‘Execute workflow’": { + "main": [ + [ + { + "node": "Document by URL", + "type": "main", + "index": 0 + }, + { + "node": "Image by URL", + "type": "main", + "index": 0 + }, + { + "node": "Set mock data", + "type": "main", + "index": 0 + }, + { + "node": "Set mock data for multiple files", + "type": "main", + "index": 0 + } + ] + ] + }, + "Set mock data": { + "main": [ + [ + { + "node": "Create PDF", + "type": "main", + "index": 0 + }, + { + "node": "Create JPG", + "type": "main", + "index": 0 + } + ] + ] + }, + "Create PDF": { + "main": [ + [ + { + "node": "Document from binary", + "type": "main", + "index": 0 + } + ] + ] + }, + "Create JPG": { + "main": [ + [ + { + "node": "Image from binary", + "type": "main", + "index": 0 + } + ] + ] + }, + "Set mock data for multiple files": { + "main": [ + [ + { + "node": "Split Out", + "type": "main", + "index": 0 + } + ] + ] + }, + "Split Out": { + "main": [ + [ + { + "node": "Create PDFs", + "type": "main", + "index": 0 + } + ] + ] + }, + "Create PDFs": { + "main": [ + [ + { + "node": "Batching", + "type": "main", + "index": 0 + }, + { + "node": "Batching with delete files", + "type": "main", + "index": 0 + } + ] + ] + } + }, + "active": false, + "settings": { + "executionOrder": "v1" + }, + "versionId": "2c53fedc-f105-4f02-b1e0-8e6dcf4da1fa", + "meta": { + "templateCredsSetupCompleted": true, + "instanceId": "e115be144a6a5547dbfca93e774dfffa178aa94a181854c13e2ce5e14d195b2e" + }, + "id": "N6UCbkc3HPvoTa0M", + "tags": [] +} diff --git a/packages/nodes-base/nodes/MistralAI/types.ts b/packages/nodes-base/nodes/MistralAI/types.ts new file mode 100644 index 0000000000..535e6cd02d --- /dev/null +++ b/packages/nodes-base/nodes/MistralAI/types.ts @@ -0,0 +1,32 @@ +import type { IDataObject } from 'n8n-workflow'; + +export interface BatchJob { + id: string; + status: + | 'QUEUED' + | 'RUNNING' + | 'SUCCESS' + | 'FAILED' + | 'TIMEOUT_EXCEEDED' + | 'CANCELLATION_REQUESTED' + | 'CANCELLED'; + output_file: string; + errors: IDataObject[]; +} + +export interface BatchItemResult { + id: string; + custom_id: string; + response: { + body: { + pages: Page[]; + }; + }; + error?: IDataObject; +} + +export interface Page { + index: number; + markdown: string; + images: IDataObject[]; +} diff --git a/packages/nodes-base/package.json b/packages/nodes-base/package.json index a2ce9cdcea..596ab84b00 100644 --- a/packages/nodes-base/package.json +++ b/packages/nodes-base/package.json @@ -663,6 +663,7 @@ "dist/nodes/Microsoft/ToDo/MicrosoftToDo.node.js", "dist/nodes/Mindee/Mindee.node.js", "dist/nodes/Misp/Misp.node.js", + "dist/nodes/MistralAI/MistralAi.node.js", "dist/nodes/Mocean/Mocean.node.js", "dist/nodes/MondayCom/MondayCom.node.js", "dist/nodes/MongoDb/MongoDb.node.js",