From e17e767e700a74b187706552fc879c00fd551611 Mon Sep 17 00:00:00 2001 From: oleg Date: Mon, 29 Apr 2024 13:41:48 +0200 Subject: [PATCH] feat(Ollama Chat Model Node): Add aditional Ollama config parameters & fix vision (#9215) Signed-off-by: Oleg Ivaniv Co-authored-by: Michael Kret --- .../nodes/chains/ChainLLM/ChainLlm.node.ts | 6 +- .../llms/LMChatOllama/LmChatOllama.node.ts | 6 +- .../nodes/llms/LMOllama/description.ts | 140 +++++++++++++++++- 3 files changed, 145 insertions(+), 7 deletions(-) diff --git a/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/ChainLlm.node.ts b/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/ChainLlm.node.ts index 578dc791c1..b177755591 100644 --- a/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/ChainLlm.node.ts +++ b/packages/@n8n/nodes-langchain/nodes/chains/ChainLLM/ChainLlm.node.ts @@ -22,6 +22,7 @@ import { LLMChain } from 'langchain/chains'; import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; import { HumanMessage } from '@langchain/core/messages'; import { ChatGoogleGenerativeAI } from '@langchain/google-genai'; +import { ChatOllama } from '@langchain/community/chat_models/ollama'; import { getTemplateNoticeField } from '../../../utils/sharedFields'; import { getOptionalOutputParsers, @@ -81,7 +82,10 @@ async function getImageMessage( )) as BaseLanguageModel; const dataURI = `data:image/jpeg;base64,${bufferData.toString('base64')}`; - const imageUrl = model instanceof ChatGoogleGenerativeAI ? dataURI : { url: dataURI, detail }; + const directUriModels = [ChatGoogleGenerativeAI, ChatOllama]; + const imageUrl = directUriModels.some((i) => model instanceof i) + ? dataURI + : { url: dataURI, detail }; return new HumanMessage({ content: [ diff --git a/packages/@n8n/nodes-langchain/nodes/llms/LMChatOllama/LmChatOllama.node.ts b/packages/@n8n/nodes-langchain/nodes/llms/LMChatOllama/LmChatOllama.node.ts index b5314b067b..c5cd5eabfb 100644 --- a/packages/@n8n/nodes-langchain/nodes/llms/LMChatOllama/LmChatOllama.node.ts +++ b/packages/@n8n/nodes-langchain/nodes/llms/LMChatOllama/LmChatOllama.node.ts @@ -7,6 +7,7 @@ import { type SupplyData, } from 'n8n-workflow'; +import type { ChatOllamaInput } from '@langchain/community/chat_models/ollama'; import { ChatOllama } from '@langchain/community/chat_models/ollama'; import { logWrapper } from '../../../utils/logWrapper'; import { getConnectionHintNoticeField } from '../../../utils/sharedFields'; @@ -54,12 +55,13 @@ export class LmChatOllama implements INodeType { const credentials = await this.getCredentials('ollamaApi'); const modelName = this.getNodeParameter('model', itemIndex) as string; - const options = this.getNodeParameter('options', itemIndex, {}) as object; + const options = this.getNodeParameter('options', itemIndex, {}) as ChatOllamaInput; const model = new ChatOllama({ + ...options, baseUrl: credentials.baseUrl as string, model: modelName, - ...options, + format: options.format === 'default' ? undefined : options.format, }); return { diff --git a/packages/@n8n/nodes-langchain/nodes/llms/LMOllama/description.ts b/packages/@n8n/nodes-langchain/nodes/llms/LMOllama/description.ts index c9493fd573..382de60fdd 100644 --- a/packages/@n8n/nodes-langchain/nodes/llms/LMOllama/description.ts +++ b/packages/@n8n/nodes-langchain/nodes/llms/LMOllama/description.ts @@ -76,16 +76,16 @@ export const ollamaOptions: INodeProperties = { default: 0.7, typeOptions: { maxValue: 1, minValue: 0, numberPrecision: 1 }, description: - 'Controls randomness: Lowering results in less random completions. As the temperature approaches zero, the model will become deterministic and repetitive.', + 'Controls the randomness of the generated text. Lower values make the output more focused and deterministic, while higher values make it more diverse and random.', type: 'number', }, { displayName: 'Top K', name: 'topK', default: -1, - typeOptions: { maxValue: 1, minValue: -1, numberPrecision: 1 }, + typeOptions: { maxValue: 100, minValue: -1, numberPrecision: 1 }, description: - 'Used to remove "long tail" low probability responses. Defaults to -1, which disables it.', + 'Limits the number of highest probability vocabulary tokens to consider at each step. A higher value increases diversity but may reduce coherence. Set to -1 to disable.', type: 'number', }, { @@ -94,8 +94,140 @@ export const ollamaOptions: INodeProperties = { default: 1, typeOptions: { maxValue: 1, minValue: 0, numberPrecision: 1 }, description: - 'Controls diversity via nucleus sampling: 0.5 means half of all likelihood-weighted options are considered. We generally recommend altering this or temperature but not both.', + 'Chooses from the smallest possible set of tokens whose cumulative probability exceeds the probability top_p. Helps generate more human-like text by reducing repetitions.', type: 'number', }, + { + displayName: 'Frequency Penalty', + name: 'frequencyPenalty', + type: 'number', + default: 0.0, + typeOptions: { minValue: 0 }, + description: + 'Adjusts the penalty for tokens that have already appeared in the generated text. Higher values discourage repetition.', + }, + { + displayName: 'Keep Alive', + name: 'keepAlive', + type: 'string', + default: '5m', + description: + 'Specifies the duration to keep the loaded model in memory after use. Useful for frequently used models. Format: 1h30m (1 hour 30 minutes).', + }, + { + displayName: 'Low VRAM Mode', + name: 'lowVram', + type: 'boolean', + default: false, + description: + 'Whether to Activate low VRAM mode, which reduces memory usage at the cost of slower generation speed. Useful for GPUs with limited memory.', + }, + { + displayName: 'Main GPU ID', + name: 'mainGpu', + type: 'number', + default: 0, + description: + 'Specifies the ID of the GPU to use for the main computation. Only change this if you have multiple GPUs.', + }, + { + displayName: 'Context Batch Size', + name: 'numBatch', + type: 'number', + default: 512, + description: + 'Sets the batch size for prompt processing. Larger batch sizes may improve generation speed but increase memory usage.', + }, + { + displayName: 'Context Length', + name: 'numCtx', + type: 'number', + default: 2048, + description: + 'The maximum number of tokens to use as context for generating the next token. Smaller values reduce memory usage, while larger values provide more context to the model.', + }, + { + displayName: 'Number of GPUs', + name: 'numGpu', + type: 'number', + default: -1, + description: + 'Specifies the number of GPUs to use for parallel processing. Set to -1 for auto-detection.', + }, + { + displayName: 'Max Tokens to Generate', + name: 'numPredict', + type: 'number', + default: -1, + description: + 'The maximum number of tokens to generate. Set to -1 for no limit. Be cautious when setting this to a large value, as it can lead to very long outputs.', + }, + { + displayName: 'Number of CPU Threads', + name: 'numThread', + type: 'number', + default: 0, + description: + 'Specifies the number of CPU threads to use for processing. Set to 0 for auto-detection.', + }, + { + displayName: 'Penalize Newlines', + name: 'penalizeNewline', + type: 'boolean', + default: true, + description: + 'Whether the model will be less likely to generate newline characters, encouraging longer continuous sequences of text', + }, + { + displayName: 'Presence Penalty', + name: 'presencePenalty', + type: 'number', + default: 0.0, + description: + 'Adjusts the penalty for tokens based on their presence in the generated text so far. Positive values penalize tokens that have already appeared, encouraging diversity.', + }, + { + displayName: 'Repetition Penalty', + name: 'repeatPenalty', + type: 'number', + default: 1.0, + description: + 'Adjusts the penalty factor for repeated tokens. Higher values more strongly discourage repetition. Set to 1.0 to disable repetition penalty.', + }, + { + displayName: 'Use Memory Locking', + name: 'useMLock', + type: 'boolean', + default: false, + description: + 'Whether to lock the model in memory to prevent swapping. This can improve performance but requires sufficient available memory.', + }, + { + displayName: 'Use Memory Mapping', + name: 'useMMap', + type: 'boolean', + default: true, + description: + 'Whether to use memory mapping for loading the model. This can reduce memory usage but may impact performance. Recommended to keep enabled.', + }, + { + displayName: 'Load Vocabulary Only', + name: 'vocabOnly', + type: 'boolean', + default: false, + description: + 'Whether to only load the model vocabulary without the weights. Useful for quickly testing tokenization.', + }, + { + displayName: 'Output Format', + name: 'format', + type: 'options', + options: [ + { name: 'Default', value: 'default' }, + { name: 'JSON', value: 'json' }, + ], + default: 'default', + description: 'Specifies the format of the API response', + }, ], };