SciSharp · AsakusaRinne · Aug 17, 2023 · Aug 8, 2023 · Aug 8, 2023 · Aug 8, 2023
diff --git a/LLama.Examples/NewVersion/ChatSessionStripRoleName.cs b/LLama.Examples/NewVersion/ChatSessionStripRoleName.cs
@@ -14,7 +14,7 @@ public static void Run()
             Console.Write("Please input your model path: ");
             string modelPath = Console.ReadLine();
             var prompt = File.ReadAllText("Assets/chat-with-bob.txt").Trim();
-            InteractiveExecutor ex = new(new LLamaModel(new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5)));
+            InteractiveExecutor ex = new(new LLamaContext(new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5)));
             ChatSession session = new ChatSession(ex).WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { "User:", "Bob:" }, redundancyLength: 8));
 
             Console.ForegroundColor = ConsoleColor.Yellow;

diff --git a/LLama.Examples/NewVersion/ChatSessionWithRoleName.cs b/LLama.Examples/NewVersion/ChatSessionWithRoleName.cs
@@ -14,7 +14,7 @@ public static void Run()
             Console.Write("Please input your model path: ");
             string modelPath = Console.ReadLine();
             var prompt = File.ReadAllText("Assets/chat-with-bob.txt").Trim();
-            InteractiveExecutor ex = new(new LLamaModel(new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5)));
+            InteractiveExecutor ex = new(new LLamaContext(new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5)));
             ChatSession session = new ChatSession(ex); // The only change is to remove the transform for the output text stream.
 
             Console.ForegroundColor = ConsoleColor.Yellow;

diff --git a/LLama.Examples/NewVersion/InstructModeExecute.cs b/LLama.Examples/NewVersion/InstructModeExecute.cs
@@ -15,7 +15,7 @@ public static void Run()
             string modelPath = Console.ReadLine();
             var prompt = File.ReadAllText("Assets/dan.txt").Trim();
 
-            InstructExecutor ex = new(new LLamaModel(new ModelParams(modelPath, contextSize: 1024)));
+            InstructExecutor ex = new(new LLamaContext(new ModelParams(modelPath, contextSize: 1024)));
 
             Console.ForegroundColor = ConsoleColor.Yellow;
             Console.WriteLine("The executor has been enabled. In this example, the LLM will follow your instructions. For example, you can input \"Write a story about a fox who want to " +

diff --git a/LLama.Examples/NewVersion/InteractiveModeExecute.cs b/LLama.Examples/NewVersion/InteractiveModeExecute.cs
@@ -15,7 +15,7 @@ public async static Task Run()
             string modelPath = Console.ReadLine();
             var prompt = File.ReadAllText("Assets/chat-with-bob.txt").Trim();
 
-            InteractiveExecutor ex = new(new LLamaModel(new ModelParams(modelPath, contextSize: 256)));
+            InteractiveExecutor ex = new(new LLamaContext(new ModelParams(modelPath, contextSize: 256)));
 
             Console.ForegroundColor = ConsoleColor.Yellow;
             Console.WriteLine("The executor has been enabled. In this example, the prompt is printed, the maximum tokens is set to 128 and the context size is 256. (an example for small scale usage)");

diff --git a/LLama.Examples/NewVersion/LoadAndSaveSession.cs b/LLama.Examples/NewVersion/LoadAndSaveSession.cs
@@ -15,7 +15,7 @@ public static void Run()
             Console.Write("Please input your model path: ");
             string modelPath = Console.ReadLine();
             var prompt = File.ReadAllText("Assets/chat-with-bob.txt").Trim();
-            InteractiveExecutor ex = new(new LLamaModel(new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5)));
+            InteractiveExecutor ex = new(new LLamaContext(new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5)));
             ChatSession session = new ChatSession(ex); // The only change is to remove the transform for the output text stream.
 
             Console.ForegroundColor = ConsoleColor.Yellow;
@@ -45,8 +45,8 @@ public static void Run()
                     Console.WriteLine("Saved session!");
                     Console.ForegroundColor = ConsoleColor.White;
 
-                    ex.Model.Dispose();
-                    ex = new(new LLamaModel(new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5)));
+                    ex.Context.Dispose();
+                    ex = new(new LLamaContext(new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5)));
                     session = new ChatSession(ex);
                     session.LoadSession(statePath);
 

diff --git a/LLama.Examples/NewVersion/LoadAndSaveState.cs b/LLama.Examples/NewVersion/LoadAndSaveState.cs
@@ -15,7 +15,7 @@ public static void Run()
             string modelPath = Console.ReadLine();
             var prompt = File.ReadAllText("Assets/chat-with-bob.txt").Trim();
 
-            InteractiveExecutor ex = new(new LLamaModel(new ModelParams(modelPath, contextSize: 256)));
+            InteractiveExecutor ex = new(new LLamaContext(new ModelParams(modelPath, contextSize: 256)));
 
             Console.ForegroundColor = ConsoleColor.Yellow;
             Console.WriteLine("The executor has been enabled. In this example, the prompt is printed, the maximum tokens is set to 64 and the context size is 256. (an example for small scale usage)");
@@ -37,7 +37,7 @@ public static void Run()
                 {
                     Console.Write("Your path to save model state: ");
                     string modelStatePath = Console.ReadLine();
-                    ex.Model.SaveState(modelStatePath);
+                    ex.Context.SaveState(modelStatePath);
 
                     Console.Write("Your path to save executor state: ");
                     string executorStatePath = Console.ReadLine();
@@ -47,7 +47,7 @@ public static void Run()
                     Console.WriteLine("All states saved!");
                     Console.ForegroundColor = ConsoleColor.White;
 
-                    var model = ex.Model;
+                    var model = ex.Context;
                     model.LoadState(modelStatePath);
                     ex = new InteractiveExecutor(model);
                     ex.LoadState(executorStatePath);

diff --git a/LLama.Examples/NewVersion/StatelessModeExecute.cs b/LLama.Examples/NewVersion/StatelessModeExecute.cs
@@ -14,7 +14,7 @@ public static void Run()
             Console.Write("Please input your model path: ");
             string modelPath = Console.ReadLine();
 
-            StatelessExecutor ex = new(new LLamaModel(new ModelParams(modelPath, contextSize: 256)));
+            StatelessExecutor ex = new(new LLamaContext(new ModelParams(modelPath, contextSize: 256)));
 
             Console.ForegroundColor = ConsoleColor.Yellow;
             Console.WriteLine("The executor has been enabled. In this example, the inference is an one-time job. That says, the previous input and response has " +

diff --git a/LLama.Examples/NewVersion/TalkToYourself.cs b/LLama.Examples/NewVersion/TalkToYourself.cs
@@ -0,0 +1,74 @@
+using System.Security.Cryptography;
+using System.Text;
+using LLama.Abstractions;
+using LLama.Common;
+
+namespace LLama.Examples.NewVersion
+{
+    public class TalkToYourself
+    {
+        public static async Task Run()
+        {
+            Console.Write("Please input your model path: ");
+            var modelPath = Console.ReadLine();
+
+            // Load weights into memory
+            var @params = new ModelParams(modelPath)
+            {
+                Seed = RandomNumberGenerator.GetInt32(int.MaxValue)
+            };
+            using var weights = LLamaWeights.LoadFromFile(@params);
+
+            // Create 2 contexts sharing the same weights
+            using var aliceCtx = weights.CreateContext(@params, Encoding.UTF8);
+            var alice = new InteractiveExecutor(aliceCtx);
+            using var bobCtx = weights.CreateContext(@params, Encoding.UTF8);
+            var bob = new InteractiveExecutor(bobCtx);
+
+            // Initial alice prompt
+            var alicePrompt = "Transcript of a dialog, where the Alice interacts a person named Bob. Alice is friendly, kind, honest and good at writing.\nAlice: Hello";
+            var aliceResponse = await Prompt(alice, ConsoleColor.Green, alicePrompt, false, false);
+
+            // Initial bob prompt
+            var bobPrompt = $"Transcript of a dialog, where the Bob interacts a person named Alice. Bob is smart, intellectual and good at writing.\nAlice: Hello{aliceResponse}";
+            var bobResponse = await Prompt(bob, ConsoleColor.Red, bobPrompt, true, true);
+
+            // swap back and forth from Alice to Bob
+            while (true)
+            {
+                aliceResponse = await Prompt(alice, ConsoleColor.Green, bobResponse, false, true);
+                bobResponse = await Prompt(bob, ConsoleColor.Red, aliceResponse, false, true);
+
+                if (Console.KeyAvailable)
+                    break;
+            }
+        }
+
+        private static async Task<string> Prompt(ILLamaExecutor executor, ConsoleColor color, string prompt, bool showPrompt, bool showResponse)
+        {
+            var inferenceParams = new InferenceParams
+            {
+                Temperature = 0.9f,
+                AntiPrompts = new List<string> { "Alice:", "Bob:", "User:" },
+                MaxTokens = 128,
+                Mirostat = MirostatType.Mirostat2,
+                MirostatTau = 10,
+            };
+
+            Console.ForegroundColor = ConsoleColor.White;
+            if (showPrompt)
+                Console.Write(prompt);
+
+            Console.ForegroundColor = color;
+            var builder = new StringBuilder();
+            await foreach (var text in executor.InferAsync(prompt, inferenceParams))
+            {
+                builder.Append(text);
+                if (showResponse)
+                    Console.Write(text);
+            }
+
+            return builder.ToString();
+        }
+    }
+}
diff --git a/LLama.Examples/NewVersion/TestRunner.cs b/LLama.Examples/NewVersion/TestRunner.cs
@@ -22,6 +22,7 @@ public static async Task Run()
             Console.WriteLine("6: Load and save state of model and executor.");
             Console.WriteLine("7: Get embeddings from LLama model.");
             Console.WriteLine("8: Quantize the model.");
+            Console.WriteLine("9: Automatic conversation.");
 
             while (true)
             {
@@ -64,6 +65,10 @@ public static async Task Run()
                 {
                     QuantizeModel.Run();
                 }
+                else if (choice == 9)
+                {
+                    await TalkToYourself.Run();
+                }
                 else
                 {
                     Console.WriteLine("Cannot parse your choice. Please select again.");

diff --git a/LLama.Unittest/BasicTest.cs b/LLama.Unittest/BasicTest.cs
@@ -1,4 +1,3 @@
-using LLama;
 using LLama.Common;
 
 namespace LLama.Unittest
@@ -8,7 +7,7 @@ public class BasicTest
         [Fact]
         public void LoadModel()
         {
-            var model = new LLamaModel(new ModelParams("Models/llama-2-7b-chat.ggmlv3.q3_K_S.bin", contextSize: 256));
+            var model = new LLamaContext(new ModelParams("Models/llama-2-7b-chat.ggmlv3.q3_K_S.bin", contextSize: 256));
             model.Dispose();
         }
     }

diff --git a/LLama.Unittest/LLamaContextTests.cs b/LLama.Unittest/LLamaContextTests.cs
@@ -0,0 +1,36 @@
+using System.Text;
+using LLama.Common;
+
+namespace LLama.Unittest
+{
+    public class LLamaContextTests
+        : IDisposable
+    {
+        private readonly LLamaWeights _weights;
+        private readonly LLamaContext _context;
+
+        public LLamaContextTests()
+        {
+            var @params = new ModelParams("Models/llama-2-7b-chat.ggmlv3.q3_K_S.bin")
+            {
+                ContextSize = 768,
+            };
+            _weights = LLamaWeights.LoadFromFile(@params);
+            _context = _weights.CreateContext(@params, Encoding.UTF8);
+        }
+
+        public void Dispose()
+        {
+            _weights.Dispose();
+            _context.Dispose();
+        }
+
+        [Fact]
+        public void CheckProperties()
+        {
+            Assert.Equal(768, _context.ContextSize);
+            Assert.Equal(4096, _context.EmbeddingSize);
+            Assert.Equal(32000, _context.VocabCount);
+        }
+    }
+}
diff --git a/LLama.Web/Models/ModelSession.cs b/LLama.Web/Models/ModelSession.cs
@@ -60,7 +60,8 @@ public void Dispose()
         {
             _inferenceOptions = null;
             _outputTransform = null;
-            _executor.Model?.Dispose();
+
+            _executor?.Context.Dispose();
             _executor = null;
         }
     }

diff --git a/LLama.Web/Services/ConnectionSessionService.cs b/LLama.Web/Services/ConnectionSessionService.cs
@@ -51,7 +51,7 @@ public Task<IServiceResult<ModelSession>> CreateAsync(LLamaExecutorType executor
                 return Task.FromResult(ServiceResult.FromError<ModelSession>("Maximum model instances reached"));
 
             // Create model
-            var llamaModel = new LLamaModel(modelOption);
+            var llamaModel = new LLamaContext(modelOption);
 
             // Create executor
             ILLamaExecutor executor = executorType switch

diff --git a/LLama.WebAPI/Services/StatefulChatService.cs b/LLama.WebAPI/Services/StatefulChatService.cs
@@ -8,21 +8,21 @@ namespace LLama.WebAPI.Services;
 public class StatefulChatService : IDisposable
 {
     private readonly ChatSession _session;
-    private readonly LLamaModel _model;
+    private readonly LLamaContext _context;
     private bool _continue = false;
 
     private const string SystemPrompt = "Transcript of a dialog, where the User interacts with an Assistant. Assistant is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision.\n\n"
                                        + "User: ";
 
     public StatefulChatService(IConfiguration configuration)
     {
-        _model = new LLamaModel(new Common.ModelParams(configuration["ModelPath"], contextSize: 512));
-        _session = new ChatSession(new InteractiveExecutor(_model));
+        _context = new LLamaContext(new Common.ModelParams(configuration["ModelPath"], contextSize: 512));
+        _session = new ChatSession(new InteractiveExecutor(_context));
     }
 
     public void Dispose()
     {
-        _model?.Dispose();
+        _context?.Dispose();
     }
 
     public string Send(SendMessageInput input)

diff --git a/LLama.WebAPI/Services/StatelessChatService.cs b/LLama.WebAPI/Services/StatelessChatService.cs
@@ -7,14 +7,14 @@ namespace LLama.WebAPI.Services
 {
     public class StatelessChatService
     {
-        private readonly LLamaModel _model;
+        private readonly LLamaContext _context;
         private readonly ChatSession _session;
 
         public StatelessChatService(IConfiguration configuration)
         {
-            _model = new LLamaModel(new ModelParams(configuration["ModelPath"], contextSize: 512));
+            _context = new LLamaContext(new ModelParams(configuration["ModelPath"], contextSize: 512));
             // TODO: replace with a stateless executor
-            _session = new ChatSession(new InteractiveExecutor(_model))
+            _session = new ChatSession(new InteractiveExecutor(_context))
                         .WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { "User:", "Assistant:" }, redundancyLength: 8))
                         .WithHistoryTransform(new HistoryTransform());
         }

diff --git a/LLama/Abstractions/ILLamaExecutor.cs b/LLama/Abstractions/ILLamaExecutor.cs
@@ -12,9 +12,9 @@ namespace LLama.Abstractions
     public interface ILLamaExecutor
     {
         /// <summary>
-        /// The loaded model for this executor.
+        /// The loaded context for this executor.
         /// </summary>
-        public LLamaModel Model { get; }
+        public LLamaContext Context { get; }
 
         /// <summary>
         /// Infers a response from the model.

diff --git a/LLama/ChatSession.cs b/LLama/ChatSession.cs
@@ -91,7 +91,7 @@ public virtual void SaveSession(string path)
             {
                 Directory.CreateDirectory(path);
             }
-            _executor.Model.SaveState(Path.Combine(path, _modelStateFilename));
+            _executor.Context.SaveState(Path.Combine(path, _modelStateFilename));
             if(Executor is StatelessExecutor)
             {
 
@@ -116,7 +116,7 @@ public virtual void LoadSession(string path)
             {
                 throw new FileNotFoundException($"Directory {path} does not exist.");
             }
-            _executor.Model.LoadState(Path.Combine(path, _modelStateFilename));
+            _executor.Context.LoadState(Path.Combine(path, _modelStateFilename));
             if (Executor is StatelessExecutor)
             {