From d6890e4ec472fc6c4ac7a3e738849d9f01f8f13c Mon Sep 17 00:00:00 2001 From: SignalRT Date: Sat, 13 Apr 2024 11:33:41 +0200 Subject: [PATCH 1/5] Initial approach to clear images --- .../Examples/LlavaInteractiveModeExecute.cs | 7 +++-- LLama/LLamaInteractExecutor.cs | 31 +++++++++++++++---- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs index 507f041b1..fac10ef1e 100644 --- a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs +++ b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs @@ -3,6 +3,7 @@ using LLama.Common; using Spectre.Console; using LLama.Abstractions; +using LLama.Native; namespace LLama.Examples.Examples { @@ -21,9 +22,6 @@ public static async Task Run() var parameters = new ModelParams(modelPath) { - ContextSize = 4096, - Seed = 1337, - GpuLayerCount = 10 }; using var model = LLamaWeights.LoadFromFile(parameters); using var context = model.CreateContext(parameters); @@ -69,6 +67,9 @@ public static async Task Run() break; } + // Each prompt with images we clear cache + // When the prompt contains images we clear KV_CACHE to restart conversation + ex.Context.NativeHandle.KvCacheRemove( LLamaSeqId.Zero, -1, -1 ); int index = 0; foreach (var path in imagePathsWithCurlyBraces) diff --git a/LLama/LLamaInteractExecutor.cs b/LLama/LLamaInteractExecutor.cs index a87a0f37c..055a5f13d 100644 --- a/LLama/LLamaInteractExecutor.cs +++ b/LLama/LLamaInteractExecutor.cs @@ -11,7 +11,7 @@ using LLama.Exceptions; using LLama.Extensions; using Microsoft.Extensions.Logging; -using System.Net.Http; + namespace LLama { @@ -136,20 +136,29 @@ protected override Task PreprocessInputs(string text, InferStateArgs args) text += "\n"; } - var line_inp = Context.Tokenize(text, false); - _embed_inps.AddRange(line_inp); - args.RemainedTokens -= line_inp.Length; + if (!this.IsMultiModal) + { + var line_inp = Context.Tokenize(text, false); + _embed_inps.AddRange(line_inp); + args.RemainedTokens -= line_inp.Length; + } + else + { + PreprocessLlava(text, args, false); + } } return Task.CompletedTask; } + /// private Task PreprocessLlava(string text, InferStateArgs args, bool addBos = true ) { int usedTokens = 0; + // If the prompt contains the tag extract this. _imageInPrompt = text.Contains(""); - if (_imageInPrompt && ClipModel != null) + if (_imageInPrompt && IsMultiModal ) { foreach (var image in Images) { @@ -170,7 +179,16 @@ private Task PreprocessLlava(string text, InferStateArgs args, bool addBos = tru } else { - _embed_inps = Context.Tokenize(text, true).ToList(); + if (addBos) + { + _embed_inps = Context.Tokenize(text, true).ToList(); + } + else + { + var line_inp = Context.Tokenize(text, false); + _embed_inps.AddRange(line_inp); + args.RemainedTokens -= line_inp.Length; + } } return Task.CompletedTask; } @@ -239,6 +257,7 @@ protected override Task InferInternal(IInferenceParams inferenceParams, InferSta _EmbedImagePosition = -1; _imageEmbedHandles.Clear(); + Images.Clear(); } else { From aa11562f62e74f2972885090959e0b3f26984803 Mon Sep 17 00:00:00 2001 From: SignalRT Date: Sat, 13 Apr 2024 11:59:16 +0200 Subject: [PATCH 2/5] Link the llama.cpp reference about reset llava contex --- LLama.Examples/Examples/LlavaInteractiveModeExecute.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs index fac10ef1e..0c291ec75 100644 --- a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs +++ b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs @@ -69,6 +69,8 @@ public static async Task Run() // Each prompt with images we clear cache // When the prompt contains images we clear KV_CACHE to restart conversation + // See: + // https://github.com/ggerganov/llama.cpp/discussions/3620 ex.Context.NativeHandle.KvCacheRemove( LLamaSeqId.Zero, -1, -1 ); int index = 0; From 168f697db6d05e470c3ea1556a42df84ef5550e6 Mon Sep 17 00:00:00 2001 From: SignalRT Date: Sat, 13 Apr 2024 16:34:32 +0200 Subject: [PATCH 3/5] Clean up and align documentation with the changes in the interface --- .../Examples/LlavaInteractiveModeExecute.cs | 22 +++++-------- LLama/Abstractions/ILLamaExecutor.cs | 2 +- LLama/LLamaExecutorBase.cs | 2 +- docs/Examples/LLavaInteractiveModeExecute.md | 32 ++++++++++--------- docs/Tutorials/Executors.md | 4 +-- 5 files changed, 29 insertions(+), 33 deletions(-) diff --git a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs index 0c291ec75..112fe23f9 100644 --- a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs +++ b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs @@ -1,8 +1,6 @@ using System.Text.RegularExpressions; -using LLama.Batched; using LLama.Common; using Spectre.Console; -using LLama.Abstractions; using LLama.Native; namespace LLama.Examples.Examples @@ -20,9 +18,8 @@ public static async Task Run() var prompt = $"{{{modelImage}}}\nUSER:\nProvide a full description of the image.\nASSISTANT:\n"; - var parameters = new ModelParams(modelPath) - { - }; + var parameters = new ModelParams(modelPath); + using var model = LLamaWeights.LoadFromFile(parameters); using var context = model.CreateContext(parameters); @@ -45,16 +42,16 @@ public static async Task Run() var imageMatches = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Value); var imageCount = imageMatches.Count(); var hasImages = imageCount > 0; - byte[][] imageBytes = null; if (hasImages) { var imagePathsWithCurlyBraces = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Value); - var imagePaths = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Groups[1].Value); + var imagePaths = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Groups[1].Value).ToList(); + List imageBytes; try { - imageBytes = imagePaths.Select(File.ReadAllBytes).ToArray(); + imageBytes = imagePaths.Select(File.ReadAllBytes).ToList(); } catch (IOException exception) { @@ -77,10 +74,7 @@ public static async Task Run() foreach (var path in imagePathsWithCurlyBraces) { // First image replace to tag "); - else - prompt = prompt.Replace(path, ""); + prompt = prompt.Replace(path, index++ == 0 ? "" : ""); } @@ -105,7 +99,7 @@ public static async Task Run() // foreach (var image in imagePaths) { - ex.Images.Add(File.ReadAllBytes(image)); + ex.Images.Add(await File.ReadAllBytesAsync(image)); } } @@ -121,7 +115,7 @@ public static async Task Run() // let the user finish with exit // - if (prompt.Equals("/exit", StringComparison.OrdinalIgnoreCase)) + if (prompt != null && prompt.Equals("/exit", StringComparison.OrdinalIgnoreCase)) break; } diff --git a/LLama/Abstractions/ILLamaExecutor.cs b/LLama/Abstractions/ILLamaExecutor.cs index d6c8d2ce2..574a27d8e 100644 --- a/LLama/Abstractions/ILLamaExecutor.cs +++ b/LLama/Abstractions/ILLamaExecutor.cs @@ -25,7 +25,7 @@ public interface ILLamaExecutor public LLavaWeights? ClipModel { get; } /// - /// List of images: Image filen path, uri or image byte array. See ImageData. + /// List of images: List of images in byte array format. /// public List Images { get; } diff --git a/LLama/LLamaExecutorBase.cs b/LLama/LLamaExecutorBase.cs index 65c0dcb4b..c721726e8 100644 --- a/LLama/LLamaExecutorBase.cs +++ b/LLama/LLamaExecutorBase.cs @@ -79,7 +79,7 @@ public bool IsMultiModal public LLavaWeights? ClipModel { get; } /// - public List Images { get; set; } + public List Images { get; } /// /// Current "mu" value for mirostat sampling diff --git a/docs/Examples/LLavaInteractiveModeExecute.md b/docs/Examples/LLavaInteractiveModeExecute.md index 9c6faa4f2..826ac447b 100644 --- a/docs/Examples/LLavaInteractiveModeExecute.md +++ b/docs/Examples/LLavaInteractiveModeExecute.md @@ -2,9 +2,9 @@ ```cs using System.Text.RegularExpressions; -using LLama.Batched; using LLama.Common; using Spectre.Console; +using LLama.Native; namespace LLama.Examples.Examples { @@ -21,11 +21,8 @@ namespace LLama.Examples.Examples var prompt = $"{{{modelImage}}}\nUSER:\nProvide a full description of the image.\nASSISTANT:\n"; - var parameters = new ModelParams(modelPath) - { - ContextSize = 4096, - Seed = 1337, - }; + var parameters = new ModelParams(modelPath); + using var model = LLamaWeights.LoadFromFile(parameters); using var context = model.CreateContext(parameters); @@ -48,16 +45,16 @@ namespace LLama.Examples.Examples var imageMatches = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Value); var imageCount = imageMatches.Count(); var hasImages = imageCount > 0; - byte[][] imageBytes = null; if (hasImages) { var imagePathsWithCurlyBraces = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Value); - var imagePaths = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Groups[1].Value); + var imagePaths = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Groups[1].Value).ToList(); + List imageBytes; try { - imageBytes = imagePaths.Select(File.ReadAllBytes).ToArray(); + imageBytes = imagePaths.Select(File.ReadAllBytes).ToList(); } catch (IOException exception) { @@ -70,15 +67,17 @@ namespace LLama.Examples.Examples break; } + // Each prompt with images we clear cache + // When the prompt contains images we clear KV_CACHE to restart conversation + // See: + // https://github.com/ggerganov/llama.cpp/discussions/3620 + ex.Context.NativeHandle.KvCacheRemove( LLamaSeqId.Zero, -1, -1 ); int index = 0; foreach (var path in imagePathsWithCurlyBraces) { // First image replace to tag "); - else - prompt = prompt.Replace(path, ""); + prompt = prompt.Replace(path, index++ == 0 ? "" : ""); } @@ -101,7 +100,10 @@ namespace LLama.Examples.Examples // Initilize Images in executor // - ex.ImagePaths = imagePaths.ToList(); + foreach (var image in imagePaths) + { + ex.Images.Add(await File.ReadAllBytesAsync(image)); + } } Console.ForegroundColor = Color.White; @@ -116,7 +118,7 @@ namespace LLama.Examples.Examples // let the user finish with exit // - if (prompt.Equals("/exit", StringComparison.OrdinalIgnoreCase)) + if (prompt != null && prompt.Equals("/exit", StringComparison.OrdinalIgnoreCase)) break; } diff --git a/docs/Tutorials/Executors.md b/docs/Tutorials/Executors.md index d014da883..8e7ce23a2 100644 --- a/docs/Tutorials/Executors.md +++ b/docs/Tutorials/Executors.md @@ -28,9 +28,9 @@ public interface ILLamaExecutor public LLavaWeights? ClipModel { get; } /// - /// List of images: Image filename and path (jpeg images). + /// List of images: List of images in byte array format. /// - public List ImagePaths { get; set; } + public List Images { get; } /// From f29f61ee0faa0884b2c26b51e6bbf66793091e7b Mon Sep 17 00:00:00 2001 From: SignalRT Date: Sat, 13 Apr 2024 17:38:49 +0200 Subject: [PATCH 4/5] Try MacOS ARM availability --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 26d352079..6dfde1acf 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -22,7 +22,7 @@ jobs: os: ubuntu-latest config: release - build: osx-release - os: macos-latest + os: macos-latest-xlarge config: release - build: windows-release os: windows-2019 From 0cf6073378bb0fe97437c44ce28da9f9f2269590 Mon Sep 17 00:00:00 2001 From: SignalRT Date: Sat, 13 Apr 2024 17:49:54 +0200 Subject: [PATCH 5/5] Revert "Try MacOS ARM availability" This reverts commit f29f61ee0faa0884b2c26b51e6bbf66793091e7b. --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 6dfde1acf..26d352079 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -22,7 +22,7 @@ jobs: os: ubuntu-latest config: release - build: osx-release - os: macos-latest-xlarge + os: macos-latest config: release - build: windows-release os: windows-2019