From 02a46fc3639cd0eb989425d9dc7db806dd0c20f5 Mon Sep 17 00:00:00 2001
From: Martin Evans <martindevans@gmail.com>
Date: Thu, 17 Aug 2023 23:26:20 +0100
Subject: [PATCH] Updated demos to use the new loading/multi context system

---
 .../NewVersion/ChatSessionStripRoleName.cs      | 17 ++++++++++-------
 .../NewVersion/ChatSessionWithRoleName.cs       | 15 ++++++++-------
 LLama.Examples/NewVersion/GetEmbeddings.cs      |  7 +------
 .../NewVersion/InstructModeExecute.cs           | 13 ++++++-------
 .../NewVersion/InteractiveModeExecute.cs        | 15 +++++++--------
 LLama.Examples/NewVersion/LoadAndSaveSession.cs | 16 ++++++++--------
 LLama.Examples/NewVersion/LoadAndSaveState.cs   | 17 ++++++++---------
 LLama.Examples/NewVersion/QuantizeModel.cs      | 14 +++++---------
 .../NewVersion/StatelessModeExecute.cs          | 11 +++++------
 LLama.Examples/NewVersion/TestRunner.cs         |  8 +-------
 10 files changed, 59 insertions(+), 74 deletions(-)
diff --git a/LLama.Examples/NewVersion/ChatSessionStripRoleName.cs b/LLama.Examples/NewVersion/ChatSessionStripRoleName.cs
index 6402e360c..230118e5a 100644
--- a/LLama.Examples/NewVersion/ChatSessionStripRoleName.cs
+++ b/LLama.Examples/NewVersion/ChatSessionStripRoleName.cs
@@ -1,9 +1,5 @@
 ﻿using LLama.Common;
-using System;
-using System.Collections.Generic;
-using System.Linq;
 using System.Text;
-using System.Threading.Tasks;
 
 namespace LLama.Examples.NewVersion
 {
@@ -12,15 +8,22 @@ public class ChatSessionStripRoleName
         public static void Run()
         {
             Console.Write("Please input your model path: ");
-            string modelPath = Console.ReadLine();
+            var modelPath = Console.ReadLine();
             var prompt = File.ReadAllText("Assets/chat-with-bob.txt").Trim();
-            InteractiveExecutor ex = new(new LLamaContext(new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5)));
-            ChatSession session = new ChatSession(ex).WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { "User:", "Bob:" }, redundancyLength: 8));
+
+            var parameters = new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5);
+            using var model = LLamaWeights.LoadFromFile(parameters);
+            using var context = model.CreateContext(parameters, Encoding.UTF8);
+            var executor = new InteractiveExecutor(context);
+
+            var session = new ChatSession(executor).WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { "User:", "Bob:" }, redundancyLength: 8));
 
             Console.ForegroundColor = ConsoleColor.Yellow;
             Console.WriteLine("The chat session has started. The role names won't be printed.");
             Console.ForegroundColor = ConsoleColor.White;
 
+            // show the prompt
+            Console.Write(prompt);
             while (true)
             {
                 foreach (var text in session.Chat(prompt, new InferenceParams() { Temperature = 0.6f, AntiPrompts = new List<string> { "User:" } }))
diff --git a/LLama.Examples/NewVersion/ChatSessionWithRoleName.cs b/LLama.Examples/NewVersion/ChatSessionWithRoleName.cs
index d1cbf34b2..a3609388d 100644
--- a/LLama.Examples/NewVersion/ChatSessionWithRoleName.cs
+++ b/LLama.Examples/NewVersion/ChatSessionWithRoleName.cs
@@ -1,9 +1,5 @@
 ﻿using LLama.Common;
-using System;
-using System.Collections.Generic;
-using System.Linq;
 using System.Text;
-using System.Threading.Tasks;
 
 namespace LLama.Examples.NewVersion
 {
@@ -12,10 +8,15 @@ public class ChatSessionWithRoleName
         public static void Run()
         {
             Console.Write("Please input your model path: ");
-            string modelPath = Console.ReadLine();
+            var modelPath = Console.ReadLine();
             var prompt = File.ReadAllText("Assets/chat-with-bob.txt").Trim();
-            InteractiveExecutor ex = new(new LLamaContext(new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5)));
-            ChatSession session = new ChatSession(ex); // The only change is to remove the transform for the output text stream.
+
+            var parameters = new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5);
+            using var model = LLamaWeights.LoadFromFile(parameters);
+            using var context = model.CreateContext(parameters, Encoding.UTF8);
+            var executor = new InteractiveExecutor(context);
+
+            var session = new ChatSession(executor);
 
             Console.ForegroundColor = ConsoleColor.Yellow;
             Console.WriteLine("The chat session has started. In this example, the prompt is printed for better visual result.");
diff --git a/LLama.Examples/NewVersion/GetEmbeddings.cs b/LLama.Examples/NewVersion/GetEmbeddings.cs
index ed12f868c..516d2da75 100644
--- a/LLama.Examples/NewVersion/GetEmbeddings.cs
+++ b/LLama.Examples/NewVersion/GetEmbeddings.cs
@@ -1,9 +1,4 @@
 ﻿using LLama.Common;
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
 
 namespace LLama.Examples.NewVersion
 {
@@ -12,7 +7,7 @@ public class GetEmbeddings
         public static void Run()
         {
             Console.Write("Please input your model path: ");
-            string modelPath = Console.ReadLine();
+            var modelPath = Console.ReadLine();
             var embedder = new LLamaEmbedder(new ModelParams(modelPath));
 
             while (true)
diff --git a/LLama.Examples/NewVersion/InstructModeExecute.cs b/LLama.Examples/NewVersion/InstructModeExecute.cs
index f81f2f587..0a3840621 100644
--- a/LLama.Examples/NewVersion/InstructModeExecute.cs
+++ b/LLama.Examples/NewVersion/InstructModeExecute.cs
@@ -1,9 +1,5 @@
 ﻿using LLama.Common;
-using System;
-using System.Collections.Generic;
-using System.Linq;
 using System.Text;
-using System.Threading.Tasks;
 
 namespace LLama.Examples.NewVersion
 {
@@ -12,10 +8,13 @@ public class InstructModeExecute
         public static void Run()
         {
             Console.Write("Please input your model path: ");
-            string modelPath = Console.ReadLine();
+            var modelPath = Console.ReadLine();
             var prompt = File.ReadAllText("Assets/dan.txt").Trim();
 
-            InstructExecutor ex = new(new LLamaContext(new ModelParams(modelPath, contextSize: 1024)));
+            var parameters = new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5);
+            using var model = LLamaWeights.LoadFromFile(parameters);
+            using var context = model.CreateContext(parameters, Encoding.UTF8);
+            var executor = new InstructExecutor(context);
 
             Console.ForegroundColor = ConsoleColor.Yellow;
             Console.WriteLine("The executor has been enabled. In this example, the LLM will follow your instructions. For example, you can input \"Write a story about a fox who want to " +
@@ -26,7 +25,7 @@ public static void Run()
 
             while (true)
             {
-                foreach (var text in ex.Infer(prompt, inferenceParams))
+                foreach (var text in executor.Infer(prompt, inferenceParams))
                 {
                     Console.Write(text);
                 }
diff --git a/LLama.Examples/NewVersion/InteractiveModeExecute.cs b/LLama.Examples/NewVersion/InteractiveModeExecute.cs
index aaacabbed..9fee007f8 100644
--- a/LLama.Examples/NewVersion/InteractiveModeExecute.cs
+++ b/LLama.Examples/NewVersion/InteractiveModeExecute.cs
@@ -1,21 +1,20 @@
 ﻿using LLama.Common;
-using System;
-using System.Collections.Generic;
-using System.Linq;
 using System.Text;
-using System.Threading.Tasks;
 
 namespace LLama.Examples.NewVersion
 {
     public class InteractiveModeExecute
     {
-        public async static Task Run()
+        public static async Task Run()
         {
             Console.Write("Please input your model path: ");
-            string modelPath = Console.ReadLine();
-            var prompt = File.ReadAllText("Assets/chat-with-bob.txt").Trim();
+            var modelPath = Console.ReadLine();
+            var prompt = (await File.ReadAllTextAsync("Assets/chat-with-bob.txt")).Trim();
 
-            InteractiveExecutor ex = new(new LLamaContext(new ModelParams(modelPath, contextSize: 256)));
+            var parameters = new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5);
+            using var model = LLamaWeights.LoadFromFile(parameters);
+            using var context = model.CreateContext(parameters, Encoding.UTF8);
+            var ex = new InteractiveExecutor(context);
 
             Console.ForegroundColor = ConsoleColor.Yellow;
             Console.WriteLine("The executor has been enabled. In this example, the prompt is printed, the maximum tokens is set to 128 and the context size is 256. (an example for small scale usage)");
diff --git a/LLama.Examples/NewVersion/LoadAndSaveSession.cs b/LLama.Examples/NewVersion/LoadAndSaveSession.cs
index cbed9179d..5e5c4252a 100644
--- a/LLama.Examples/NewVersion/LoadAndSaveSession.cs
+++ b/LLama.Examples/NewVersion/LoadAndSaveSession.cs
@@ -1,10 +1,5 @@
 ﻿using LLama.Common;
-using LLama.OldVersion;
-using System;
-using System.Collections.Generic;
-using System.Linq;
 using System.Text;
-using System.Threading.Tasks;
 
 namespace LLama.Examples.NewVersion
 {
@@ -13,10 +8,15 @@ public class SaveAndLoadSession
         public static void Run()
         {
             Console.Write("Please input your model path: ");
-            string modelPath = Console.ReadLine();
+            var modelPath = Console.ReadLine();
             var prompt = File.ReadAllText("Assets/chat-with-bob.txt").Trim();
-            InteractiveExecutor ex = new(new LLamaContext(new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5)));
-            ChatSession session = new ChatSession(ex); // The only change is to remove the transform for the output text stream.
+
+            var parameters = new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5);
+            using var model = LLamaWeights.LoadFromFile(parameters);
+            using var context = model.CreateContext(parameters, Encoding.UTF8);
+            var ex = new InteractiveExecutor(context);
+
+            var session = new ChatSession(ex);
 
             Console.ForegroundColor = ConsoleColor.Yellow;
             Console.WriteLine("The chat session has started. In this example, the prompt is printed for better visual result. Input \"save\" to save and reload the session.");
diff --git a/LLama.Examples/NewVersion/LoadAndSaveState.cs b/LLama.Examples/NewVersion/LoadAndSaveState.cs
index 15f2f815f..1a1c0d88e 100644
--- a/LLama.Examples/NewVersion/LoadAndSaveState.cs
+++ b/LLama.Examples/NewVersion/LoadAndSaveState.cs
@@ -1,9 +1,5 @@
 ﻿using LLama.Common;
-using System;
-using System.Collections.Generic;
-using System.Linq;
 using System.Text;
-using System.Threading.Tasks;
 
 namespace LLama.Examples.NewVersion
 {
@@ -12,10 +8,13 @@ public class LoadAndSaveState
         public static void Run()
         {
             Console.Write("Please input your model path: ");
-            string modelPath = Console.ReadLine();
+            var modelPath = Console.ReadLine();
             var prompt = File.ReadAllText("Assets/chat-with-bob.txt").Trim();
 
-            InteractiveExecutor ex = new(new LLamaContext(new ModelParams(modelPath, contextSize: 256)));
+            var parameters = new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5);
+            using var model = LLamaWeights.LoadFromFile(parameters);
+            using var context = model.CreateContext(parameters, Encoding.UTF8);
+            var ex = new InteractiveExecutor(context);
 
             Console.ForegroundColor = ConsoleColor.Yellow;
             Console.WriteLine("The executor has been enabled. In this example, the prompt is printed, the maximum tokens is set to 64 and the context size is 256. (an example for small scale usage)");
@@ -47,9 +46,9 @@ public static void Run()
                     Console.WriteLine("All states saved!");
                     Console.ForegroundColor = ConsoleColor.White;
 
-                    var model = ex.Context;
-                    model.LoadState(modelStatePath);
-                    ex = new InteractiveExecutor(model);
+                    var ctx = ex.Context;
+                    ctx.LoadState(modelStatePath);
+                    ex = new InteractiveExecutor(ctx);
                     ex.LoadState(executorStatePath);
                     Console.ForegroundColor = ConsoleColor.Yellow;
                     Console.WriteLine("Loaded state!");
diff --git a/LLama.Examples/NewVersion/QuantizeModel.cs b/LLama.Examples/NewVersion/QuantizeModel.cs
index a5ad81d88..71966af8f 100644
--- a/LLama.Examples/NewVersion/QuantizeModel.cs
+++ b/LLama.Examples/NewVersion/QuantizeModel.cs
@@ -1,11 +1,4 @@
-﻿using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading;
-using System.Threading.Tasks;
-
-namespace LLama.Examples.NewVersion
+﻿namespace LLama.Examples.NewVersion
 {
     public class QuantizeModel
     {
@@ -13,13 +6,16 @@ public static void Run()
         {
             Console.Write("Please input your original model path: ");
             var inputPath = Console.ReadLine();
+
             Console.Write("Please input your output model path: ");
             var outputPath = Console.ReadLine();
+
             Console.Write("Please input the quantize type (one of q4_0, q4_1, q5_0, q5_1, q8_0): ");
             var quantizeType = Console.ReadLine();
+
             if (LLamaQuantizer.Quantize(inputPath, outputPath, quantizeType))
             {
-                Console.WriteLine("Quantization succeed!");
+                Console.WriteLine("Quantization succeeded!");
             }
             else
             {
diff --git a/LLama.Examples/NewVersion/StatelessModeExecute.cs b/LLama.Examples/NewVersion/StatelessModeExecute.cs
index 8ff2c0a1a..dadaf70a4 100644
--- a/LLama.Examples/NewVersion/StatelessModeExecute.cs
+++ b/LLama.Examples/NewVersion/StatelessModeExecute.cs
@@ -1,9 +1,5 @@
 ﻿using LLama.Common;
-using System;
-using System.Collections.Generic;
-using System.Linq;
 using System.Text;
-using System.Threading.Tasks;
 
 namespace LLama.Examples.NewVersion
 {
@@ -12,9 +8,12 @@ public class StatelessModeExecute
         public static void Run()
         {
             Console.Write("Please input your model path: ");
-            string modelPath = Console.ReadLine();
+            var modelPath = Console.ReadLine();
 
-            StatelessExecutor ex = new(new LLamaContext(new ModelParams(modelPath, contextSize: 256)));
+            var parameters = new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5);
+            using var model = LLamaWeights.LoadFromFile(parameters);
+            using var context = model.CreateContext(parameters, Encoding.UTF8);
+            var ex = new StatelessExecutor(context);
 
             Console.ForegroundColor = ConsoleColor.Yellow;
             Console.WriteLine("The executor has been enabled. In this example, the inference is an one-time job. That says, the previous input and response has " +
diff --git a/LLama.Examples/NewVersion/TestRunner.cs b/LLama.Examples/NewVersion/TestRunner.cs
index c90bc78de..6cc3f3dac 100644
--- a/LLama.Examples/NewVersion/TestRunner.cs
+++ b/LLama.Examples/NewVersion/TestRunner.cs
@@ -1,10 +1,4 @@
-﻿using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
-
-namespace LLama.Examples.NewVersion
+﻿namespace LLama.Examples.NewVersion
 {
     public class NewVersionTestRunner
     {