BatchedExecutor.Create() method (#613)

Replaced `BatchedExecutor.Prompt(string)` method with `BatchedExecutor.Create()` method. This improves the API in two ways: - A conversation can be created, without immediately prompting it - Other prompting overloads (e.g. prompt with token list) can be used without duplicating all the overloads onto `BatchedExecutor` Added `BatchSize` property to `LLamaContext`
SciSharp · Mar 20, 2024 · ad682fb · ad682fb
1 parent e3ecc31
commit ad682fb
Show file tree

Hide file tree

Showing 5 changed files with 27 additions and 5 deletions.
diff --git a/LLama.Examples/Examples/BatchedExecutorFork.cs b/LLama.Examples/Examples/BatchedExecutorFork.cs
@@ -31,7 +31,8 @@ public static async Task Run()
         Console.WriteLine($"Created executor with model: {name}");
 
         // Evaluate the initial prompt to create one conversation
-        using var start = executor.Prompt(prompt);
+        using var start = executor.Create();
+        start.Prompt(prompt);
         await executor.Infer();
 
         // Create the root node of the tree

diff --git a/LLama.Examples/Examples/BatchedExecutorGuidance.cs b/LLama.Examples/Examples/BatchedExecutorGuidance.cs
@@ -33,8 +33,10 @@ public static async Task Run()
         Console.WriteLine($"Created executor with model: {name}");
 
         // Load the two prompts into two conversations
-        using var guided = executor.Prompt(positivePrompt);
-        using var guidance = executor.Prompt(negativePrompt);
+        using var guided = executor.Create();
+        guided.Prompt(positivePrompt);
+        using var guidance = executor.Create();
+        guidance.Prompt(negativePrompt);
 
         // Run inference to evaluate prompts
         await AnsiConsole

diff --git a/LLama.Examples/Examples/BatchedExecutorRewind.cs b/LLama.Examples/Examples/BatchedExecutorRewind.cs
@@ -32,7 +32,8 @@ public static async Task Run()
         Console.WriteLine($"Created executor with model: {name}");
 
         // Evaluate the initial prompt to create one conversation
-        using var conversation = executor.Prompt(prompt);
+        using var conversation = executor.Create();
+        conversation.Prompt(prompt);
 
         // Create the start node wrapping the conversation
         var node = new Node(executor.Context);

diff --git a/LLama/Batched/BatchedExecutor.cs b/LLama/Batched/BatchedExecutor.cs
@@ -68,17 +68,30 @@ public BatchedExecutor(LLamaWeights model, IContextParams contextParams)
     /// </summary>
     /// <param name="prompt"></param>
     /// <returns></returns>
+    [Obsolete("Use BatchedExecutor.Create instead")]
     public Conversation Prompt(string prompt)
     {
         if (IsDisposed)
             throw new ObjectDisposedException(nameof(BatchedExecutor));
 
-        var conversation = new Conversation(this, GetNextSequenceId(), 0);
+        var conversation = Create();
         conversation.Prompt(prompt);
 
         return conversation;
     }
 
+    /// <summary>
+    /// Start a new <see cref="Conversation"/>
+    /// </summary>
+    /// <returns></returns>
+    public Conversation Create()
+    {
+        if (IsDisposed)
+            throw new ObjectDisposedException(nameof(BatchedExecutor));
+
+        return new Conversation(this, GetNextSequenceId(), 0);
+    }
+
     /// <summary>
     /// Run inference for all conversations in the batch which have pending tokens.
     ///

diff --git a/LLama/LLamaContext.cs b/LLama/LLamaContext.cs
@@ -85,6 +85,11 @@ public uint BatchThreads
             }
         }
 
+        /// <summary>
+        /// Get the maximum batch size for this context
+        /// </summary>
+        public uint BatchSize => NativeHandle.BatchSize;
+
         /// <summary>
         /// Create a new LLamaContext for the given LLamaWeights
         /// </summary>