From d6890e4ec472fc6c4ac7a3e738849d9f01f8f13c Mon Sep 17 00:00:00 2001
From: SignalRT <admin@signalrt.com>
Date: Sat, 13 Apr 2024 11:33:41 +0200
Subject: [PATCH 1/5] Initial approach to clear images

---
 .../Examples/LlavaInteractiveModeExecute.cs   |  7 +++--
 LLama/LLamaInteractExecutor.cs                | 31 +++++++++++++++----
 2 files changed, 29 insertions(+), 9 deletions(-)
diff --git a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs
index 507f041b1..fac10ef1e 100644
--- a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs
+++ b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs
@@ -3,6 +3,7 @@
 using LLama.Common;
 using Spectre.Console;
 using LLama.Abstractions;
+using LLama.Native;
 
 namespace LLama.Examples.Examples
 {
@@ -21,9 +22,6 @@ public static async Task Run()
 
             var parameters = new ModelParams(modelPath)
             {
-                ContextSize = 4096,
-                Seed = 1337,
-                GpuLayerCount = 10
             };
             using var model = LLamaWeights.LoadFromFile(parameters);
             using var context = model.CreateContext(parameters);
@@ -69,6 +67,9 @@ public static async Task Run()
                         break;
                     }
 
+                    // Each prompt with images we clear cache
+                    // When the prompt contains images we clear KV_CACHE to restart conversation
+                    ex.Context.NativeHandle.KvCacheRemove( LLamaSeqId.Zero, -1, -1 );
 
                     int index = 0;
                     foreach (var path in imagePathsWithCurlyBraces)
diff --git a/LLama/LLamaInteractExecutor.cs b/LLama/LLamaInteractExecutor.cs
index a87a0f37c..055a5f13d 100644
--- a/LLama/LLamaInteractExecutor.cs
+++ b/LLama/LLamaInteractExecutor.cs
@@ -11,7 +11,7 @@
 using LLama.Exceptions;
 using LLama.Extensions;
 using Microsoft.Extensions.Logging;
-using System.Net.Http;
+
 
 namespace LLama
 {
@@ -136,20 +136,29 @@ protected override Task PreprocessInputs(string text, InferStateArgs args)
                     text += "\n";
                 }
 
-                var line_inp = Context.Tokenize(text, false);
-                _embed_inps.AddRange(line_inp);
-                args.RemainedTokens -= line_inp.Length;
+                if (!this.IsMultiModal)
+                {
+                    var line_inp = Context.Tokenize(text, false);
+                    _embed_inps.AddRange(line_inp);
+                    args.RemainedTokens -= line_inp.Length;
+                }
+                else
+                {
+                    PreprocessLlava(text, args, false);
+                }
             }
 
             return Task.CompletedTask;
         }
 
+        /// <inheritdoc />
         private Task PreprocessLlava(string text, InferStateArgs args, bool addBos = true )
         {
             int usedTokens = 0;
+            
             // If the prompt contains the tag <image> extract this.
             _imageInPrompt = text.Contains("<image>");
-            if (_imageInPrompt && ClipModel != null)
+            if (_imageInPrompt && IsMultiModal )
             {
                 foreach (var image in Images)
                 {
@@ -170,7 +179,16 @@ private Task PreprocessLlava(string text, InferStateArgs args, bool addBos = tru
             }
             else
             {
-                _embed_inps = Context.Tokenize(text, true).ToList();
+                if (addBos)
+                {
+                    _embed_inps = Context.Tokenize(text, true).ToList();
+                }
+                else
+                {
+                    var line_inp = Context.Tokenize(text, false);
+                    _embed_inps.AddRange(line_inp);
+                    args.RemainedTokens -= line_inp.Length;                    
+                }
             }
             return Task.CompletedTask;
         }
@@ -239,6 +257,7 @@ protected override Task InferInternal(IInferenceParams inferenceParams, InferSta
 
                     _EmbedImagePosition = -1;
                     _imageEmbedHandles.Clear();
+                    Images.Clear();
                 }
                 else
                 {

From aa11562f62e74f2972885090959e0b3f26984803 Mon Sep 17 00:00:00 2001
From: SignalRT <admin@signalrt.com>
Date: Sat, 13 Apr 2024 11:59:16 +0200
Subject: [PATCH 2/5] Link the llama.cpp reference about reset llava contex

---
 LLama.Examples/Examples/LlavaInteractiveModeExecute.cs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs
index fac10ef1e..0c291ec75 100644
--- a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs
+++ b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs
@@ -69,6 +69,8 @@ public static async Task Run()
 
                     // Each prompt with images we clear cache
                     // When the prompt contains images we clear KV_CACHE to restart conversation
+                    // See:
+                    // https://github.com/ggerganov/llama.cpp/discussions/3620
                     ex.Context.NativeHandle.KvCacheRemove( LLamaSeqId.Zero, -1, -1 );
 
                     int index = 0;

From 168f697db6d05e470c3ea1556a42df84ef5550e6 Mon Sep 17 00:00:00 2001
From: SignalRT <admin@signalrt.com>
Date: Sat, 13 Apr 2024 16:34:32 +0200
Subject: [PATCH 3/5] Clean up and align documentation with the changes in the
 interface

---
 .../Examples/LlavaInteractiveModeExecute.cs   | 22 +++++--------
 LLama/Abstractions/ILLamaExecutor.cs          |  2 +-
 LLama/LLamaExecutorBase.cs                    |  2 +-
 docs/Examples/LLavaInteractiveModeExecute.md  | 32 ++++++++++---------
 docs/Tutorials/Executors.md                   |  4 +--
 5 files changed, 29 insertions(+), 33 deletions(-)

diff --git a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs
index 0c291ec75..112fe23f9 100644
--- a/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs
+++ b/LLama.Examples/Examples/LlavaInteractiveModeExecute.cs
@@ -1,8 +1,6 @@
 ﻿using System.Text.RegularExpressions;
-using LLama.Batched;
 using LLama.Common;
 using Spectre.Console;
-using LLama.Abstractions;
 using LLama.Native;
 
 namespace LLama.Examples.Examples
@@ -20,9 +18,8 @@ public static async Task Run()
 
             var prompt = $"{{{modelImage}}}\nUSER:\nProvide a full description of the image.\nASSISTANT:\n";
 
-            var parameters = new ModelParams(modelPath)
-            {
-            };
+            var parameters = new ModelParams(modelPath);
+
             using var model = LLamaWeights.LoadFromFile(parameters);
             using var context = model.CreateContext(parameters);
             
@@ -45,16 +42,16 @@ public static async Task Run()
                 var imageMatches = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Value);
                 var imageCount = imageMatches.Count();
                 var hasImages = imageCount > 0;
-                byte[][] imageBytes = null;
 
                 if (hasImages)
                 {
                     var imagePathsWithCurlyBraces = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Value);
-                    var imagePaths = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Groups[1].Value);
+                    var imagePaths = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Groups[1].Value).ToList();
 
+                    List<byte[]> imageBytes;
                     try
                     {
-                        imageBytes = imagePaths.Select(File.ReadAllBytes).ToArray();
+                        imageBytes = imagePaths.Select(File.ReadAllBytes).ToList();
                     }
                     catch (IOException exception)
                     {
@@ -77,10 +74,7 @@ public static async Task Run()
                     foreach (var path in imagePathsWithCurlyBraces)
                     {
                         // First image replace to tag <image, the rest of the images delete the tag
-                        if (index++ == 0)
-                            prompt = prompt.Replace(path, "<image>");
-                        else
-                            prompt = prompt.Replace(path, "");
+                        prompt = prompt.Replace(path, index++ == 0 ? "<image>" : "");
                     }
 
                   
@@ -105,7 +99,7 @@ public static async Task Run()
                     //
                     foreach (var image in imagePaths)
                     {
-                        ex.Images.Add(File.ReadAllBytes(image));
+                        ex.Images.Add(await File.ReadAllBytesAsync(image));
                     }
                 }
 
@@ -121,7 +115,7 @@ public static async Task Run()
                 
                 // let the user finish with exit
                 //
-                if (prompt.Equals("/exit", StringComparison.OrdinalIgnoreCase))
+                if (prompt != null && prompt.Equals("/exit", StringComparison.OrdinalIgnoreCase))
                     break;
 
             }
diff --git a/LLama/Abstractions/ILLamaExecutor.cs b/LLama/Abstractions/ILLamaExecutor.cs
index d6c8d2ce2..574a27d8e 100644
--- a/LLama/Abstractions/ILLamaExecutor.cs
+++ b/LLama/Abstractions/ILLamaExecutor.cs
@@ -25,7 +25,7 @@ public interface ILLamaExecutor
         public LLavaWeights? ClipModel { get;  }
 
         /// <summary>
-        /// List of images: Image filen path, uri or image byte array. See ImageData.
+        /// List of images: List of images in byte array format.
         /// </summary>
         public List<byte[]> Images { get; }
 
diff --git a/LLama/LLamaExecutorBase.cs b/LLama/LLamaExecutorBase.cs
index 65c0dcb4b..c721726e8 100644
--- a/LLama/LLamaExecutorBase.cs
+++ b/LLama/LLamaExecutorBase.cs
@@ -79,7 +79,7 @@ public bool IsMultiModal
         public LLavaWeights? ClipModel { get;  }
 
         /// <inheritdoc />
-        public List<byte[]> Images { get; set; }
+        public List<byte[]> Images { get; }
 
         /// <summary>
         /// Current "mu" value for mirostat sampling
diff --git a/docs/Examples/LLavaInteractiveModeExecute.md b/docs/Examples/LLavaInteractiveModeExecute.md
index 9c6faa4f2..826ac447b 100644
--- a/docs/Examples/LLavaInteractiveModeExecute.md
+++ b/docs/Examples/LLavaInteractiveModeExecute.md
@@ -2,9 +2,9 @@
 
 ```cs
 using System.Text.RegularExpressions;
-using LLama.Batched;
 using LLama.Common;
 using Spectre.Console;
+using LLama.Native;
 
 namespace LLama.Examples.Examples
 {
@@ -21,11 +21,8 @@ namespace LLama.Examples.Examples
 
             var prompt = $"{{{modelImage}}}\nUSER:\nProvide a full description of the image.\nASSISTANT:\n";
 
-            var parameters = new ModelParams(modelPath)
-            {
-                ContextSize = 4096,
-                Seed = 1337,
-            };
+            var parameters = new ModelParams(modelPath);
+
             using var model = LLamaWeights.LoadFromFile(parameters);
             using var context = model.CreateContext(parameters);
             
@@ -48,16 +45,16 @@ namespace LLama.Examples.Examples
                 var imageMatches = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Value);
                 var imageCount = imageMatches.Count();
                 var hasImages = imageCount > 0;
-                byte[][] imageBytes = null;
 
                 if (hasImages)
                 {
                     var imagePathsWithCurlyBraces = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Value);
-                    var imagePaths = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Groups[1].Value);
+                    var imagePaths = Regex.Matches(prompt, "{([^}]*)}").Select(m => m.Groups[1].Value).ToList();
 
+                    List<byte[]> imageBytes;
                     try
                     {
-                        imageBytes = imagePaths.Select(File.ReadAllBytes).ToArray();
+                        imageBytes = imagePaths.Select(File.ReadAllBytes).ToList();
                     }
                     catch (IOException exception)
                     {
@@ -70,15 +67,17 @@ namespace LLama.Examples.Examples
                         break;
                     }
 
+                    // Each prompt with images we clear cache
+                    // When the prompt contains images we clear KV_CACHE to restart conversation
+                    // See:
+                    // https://github.com/ggerganov/llama.cpp/discussions/3620
+                    ex.Context.NativeHandle.KvCacheRemove( LLamaSeqId.Zero, -1, -1 );
 
                     int index = 0;
                     foreach (var path in imagePathsWithCurlyBraces)
                     {
                         // First image replace to tag <image, the rest of the images delete the tag
-                        if (index++ == 0)
-                            prompt = prompt.Replace(path, "<image>");
-                        else
-                            prompt = prompt.Replace(path, "");
+                        prompt = prompt.Replace(path, index++ == 0 ? "<image>" : "");
                     }
 
                   
@@ -101,7 +100,10 @@ namespace LLama.Examples.Examples
 
                     // Initilize Images in executor
                     //
-                    ex.ImagePaths = imagePaths.ToList();
+                    foreach (var image in imagePaths)
+                    {
+                        ex.Images.Add(await File.ReadAllBytesAsync(image));
+                    }
                 }
 
                 Console.ForegroundColor = Color.White;
@@ -116,7 +118,7 @@ namespace LLama.Examples.Examples
                 
                 // let the user finish with exit
                 //
-                if (prompt.Equals("/exit", StringComparison.OrdinalIgnoreCase))
+                if (prompt != null && prompt.Equals("/exit", StringComparison.OrdinalIgnoreCase))
                     break;
 
             }
diff --git a/docs/Tutorials/Executors.md b/docs/Tutorials/Executors.md
index d014da883..8e7ce23a2 100644
--- a/docs/Tutorials/Executors.md
+++ b/docs/Tutorials/Executors.md
@@ -28,9 +28,9 @@ public interface ILLamaExecutor
     public LLavaWeights? ClipModel { get;  }        
 
     /// <summary>
-    /// List of images: Image filename and path (jpeg images).
+    /// List of images: List of images in byte array format.
     /// </summary>
-    public List<string> ImagePaths { get; set; }
+    public List<byte[]> Images { get; }
 
 
     /// <summary>

From f29f61ee0faa0884b2c26b51e6bbf66793091e7b Mon Sep 17 00:00:00 2001
From: SignalRT <admin@signalrt.com>
Date: Sat, 13 Apr 2024 17:38:49 +0200
Subject: [PATCH 4/5] Try MacOS ARM availability

---
 .github/workflows/main.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 26d352079..6dfde1acf 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -22,7 +22,7 @@ jobs:
             os: ubuntu-latest
             config: release
           - build: osx-release
-            os: macos-latest
+            os: macos-latest-xlarge
             config: release
           - build: windows-release
             os: windows-2019

From 0cf6073378bb0fe97437c44ce28da9f9f2269590 Mon Sep 17 00:00:00 2001
From: SignalRT <admin@signalrt.com>
Date: Sat, 13 Apr 2024 17:49:54 +0200
Subject: [PATCH 5/5] Revert "Try MacOS ARM availability"

This reverts commit f29f61ee0faa0884b2c26b51e6bbf66793091e7b.
---
 .github/workflows/main.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 6dfde1acf..26d352079 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -22,7 +22,7 @@ jobs:
             os: ubuntu-latest
             config: release
           - build: osx-release
-            os: macos-latest-xlarge
+            os: macos-latest
             config: release
           - build: windows-release
             os: windows-2019