diff --git a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs index 507f041b1..112fe23f9 100644 --- a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs +++ b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs @@ -1,8 +1,7 @@ using System.Text.RegularExpressions; -using LLama.Batched; using LLama.Common; using Spectre.Console; -using LLama.Abstractions; +using LLama.Native; namespace LLama.Examples.Examples { @@ -19,12 +18,8 @@ public static async Task Run() var prompt = $"{{{modelImage}}}\nUSER:\nProvide a full description of the image.\nASSISTANT:\n"; - var parameters = new ModelParams(modelPath) - { - ContextSize = 4096, - Seed = 1337, - GpuLayerCount = 10 - }; + var parameters = new ModelParams(modelPath); + using var model = LLamaWeights.LoadFromFile(parameters); using var context = model.CreateContext(parameters); @@ -47,16 +42,16 @@ public static async Task Run() var imageMatches = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Value); var imageCount = imageMatches.Count(); var hasImages = imageCount > 0; - byte[][] imageBytes = null; if (hasImages) { var imagePathsWithCurlyBraces = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Value); - var imagePaths = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Groups[1].Value); + var imagePaths = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Groups[1].Value).ToList(); + List imageBytes; try { - imageBytes = imagePaths.Select(File.ReadAllBytes).ToArray(); + imageBytes = imagePaths.Select(File.ReadAllBytes).ToList(); } catch (IOException exception) { @@ -69,15 +64,17 @@ public static async Task Run() break; } + // Each prompt with images we clear cache + // When the prompt contains images we clear KV_CACHE to restart conversation + // See: + // https://github.com/ggerganov/llama.cpp/discussions/3620 + ex.Context.NativeHandle.KvCacheRemove( LLamaSeqId.Zero, -1, -1 ); int index = 0; foreach (var path in imagePathsWithCurlyBraces) { // First image replace to tag "); - else - prompt = prompt.Replace(path, ""); + prompt = prompt.Replace(path, index++ == 0 ? "" : ""); } @@ -102,7 +99,7 @@ public static async Task Run() // foreach (var image in imagePaths) { - ex.Images.Add(File.ReadAllBytes(image)); + ex.Images.Add(await File.ReadAllBytesAsync(image)); } } @@ -118,7 +115,7 @@ public static async Task Run() // let the user finish with exit // - if (prompt.Equals("/exit", StringComparison.OrdinalIgnoreCase)) + if (prompt != null && prompt.Equals("/exit", StringComparison.OrdinalIgnoreCase)) break; } diff --git a/LLama/Abstractions/ILLamaExecutor.cs b/LLama/Abstractions/ILLamaExecutor.cs index d6c8d2ce2..574a27d8e 100644 --- a/LLama/Abstractions/ILLamaExecutor.cs +++ b/LLama/Abstractions/ILLamaExecutor.cs @@ -25,7 +25,7 @@ public interface ILLamaExecutor public LLavaWeights? ClipModel { get; } /// - /// List of images: Image filen path, uri or image byte array. See ImageData. + /// List of images: List of images in byte array format. /// public List Images { get; } diff --git a/LLama/LLamaExecutorBase.cs b/LLama/LLamaExecutorBase.cs index 65c0dcb4b..c721726e8 100644 --- a/LLama/LLamaExecutorBase.cs +++ b/LLama/LLamaExecutorBase.cs @@ -79,7 +79,7 @@ public bool IsMultiModal public LLavaWeights? ClipModel { get; } /// - public List Images { get; set; } + public List Images { get; } /// /// Current "mu" value for mirostat sampling diff --git a/LLama/LLamaInteractExecutor.cs b/LLama/LLamaInteractExecutor.cs index a87a0f37c..055a5f13d 100644 --- a/LLama/LLamaInteractExecutor.cs +++ b/LLama/LLamaInteractExecutor.cs @@ -11,7 +11,7 @@ using LLama.Exceptions; using LLama.Extensions; using Microsoft.Extensions.Logging; -using System.Net.Http; + namespace LLama { @@ -136,20 +136,29 @@ protected override Task PreprocessInputs(string text, InferStateArgs args) text += "\n"; } - var line_inp = Context.Tokenize(text, false); - _embed_inps.AddRange(line_inp); - args.RemainedTokens -= line_inp.Length; + if (!this.IsMultiModal) + { + var line_inp = Context.Tokenize(text, false); + _embed_inps.AddRange(line_inp); + args.RemainedTokens -= line_inp.Length; + } + else + { + PreprocessLlava(text, args, false); + } } return Task.CompletedTask; } + /// private Task PreprocessLlava(string text, InferStateArgs args, bool addBos = true ) { int usedTokens = 0; + // If the prompt contains the tag extract this. _imageInPrompt = text.Contains(""); - if (_imageInPrompt && ClipModel != null) + if (_imageInPrompt && IsMultiModal ) { foreach (var image in Images) { @@ -170,7 +179,16 @@ private Task PreprocessLlava(string text, InferStateArgs args, bool addBos = tru } else { - _embed_inps = Context.Tokenize(text, true).ToList(); + if (addBos) + { + _embed_inps = Context.Tokenize(text, true).ToList(); + } + else + { + var line_inp = Context.Tokenize(text, false); + _embed_inps.AddRange(line_inp); + args.RemainedTokens -= line_inp.Length; + } } return Task.CompletedTask; } @@ -239,6 +257,7 @@ protected override Task InferInternal(IInferenceParams inferenceParams, InferSta _EmbedImagePosition = -1; _imageEmbedHandles.Clear(); + Images.Clear(); } else { diff --git a/docs/Examples/LLavaInteractiveModeExecute.md b/docs/Examples/LLavaInteractiveModeExecute.md index 9c6faa4f2..826ac447b 100644 --- a/docs/Examples/LLavaInteractiveModeExecute.md +++ b/docs/Examples/LLavaInteractiveModeExecute.md @@ -2,9 +2,9 @@ ```cs using System.Text.RegularExpressions; -using LLama.Batched; using LLama.Common; using Spectre.Console; +using LLama.Native; namespace LLama.Examples.Examples { @@ -21,11 +21,8 @@ namespace LLama.Examples.Examples var prompt = $"{{{modelImage}}}\nUSER:\nProvide a full description of the image.\nASSISTANT:\n"; - var parameters = new ModelParams(modelPath) - { - ContextSize = 4096, - Seed = 1337, - }; + var parameters = new ModelParams(modelPath); + using var model = LLamaWeights.LoadFromFile(parameters); using var context = model.CreateContext(parameters); @@ -48,16 +45,16 @@ namespace LLama.Examples.Examples var imageMatches = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Value); var imageCount = imageMatches.Count(); var hasImages = imageCount > 0; - byte[][] imageBytes = null; if (hasImages) { var imagePathsWithCurlyBraces = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Value); - var imagePaths = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Groups[1].Value); + var imagePaths = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Groups[1].Value).ToList(); + List imageBytes; try { - imageBytes = imagePaths.Select(File.ReadAllBytes).ToArray(); + imageBytes = imagePaths.Select(File.ReadAllBytes).ToList(); } catch (IOException exception) { @@ -70,15 +67,17 @@ namespace LLama.Examples.Examples break; } + // Each prompt with images we clear cache + // When the prompt contains images we clear KV_CACHE to restart conversation + // See: + // https://github.com/ggerganov/llama.cpp/discussions/3620 + ex.Context.NativeHandle.KvCacheRemove( LLamaSeqId.Zero, -1, -1 ); int index = 0; foreach (var path in imagePathsWithCurlyBraces) { // First image replace to tag "); - else - prompt = prompt.Replace(path, ""); + prompt = prompt.Replace(path, index++ == 0 ? "" : ""); } @@ -101,7 +100,10 @@ namespace LLama.Examples.Examples // Initilize Images in executor // - ex.ImagePaths = imagePaths.ToList(); + foreach (var image in imagePaths) + { + ex.Images.Add(await File.ReadAllBytesAsync(image)); + } } Console.ForegroundColor = Color.White; @@ -116,7 +118,7 @@ namespace LLama.Examples.Examples // let the user finish with exit // - if (prompt.Equals("/exit", StringComparison.OrdinalIgnoreCase)) + if (prompt != null && prompt.Equals("/exit", StringComparison.OrdinalIgnoreCase)) break; } diff --git a/docs/Tutorials/Executors.md b/docs/Tutorials/Executors.md index d014da883..8e7ce23a2 100644 --- a/docs/Tutorials/Executors.md +++ b/docs/Tutorials/Executors.md @@ -28,9 +28,9 @@ public interface ILLamaExecutor public LLavaWeights? ClipModel { get; } /// - /// List of images: Image filename and path (jpeg images). + /// List of images: List of images in byte array format. /// - public List ImagePaths { get; set; } + public List Images { get; } ///