Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multi Context #90

Merged
merged 13 commits into from
Aug 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion LLama.Examples/NewVersion/ChatSessionStripRoleName.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ public static void Run()
Console.Write("Please input your model path: ");
string modelPath = Console.ReadLine();
var prompt = File.ReadAllText("Assets/chat-with-bob.txt").Trim();
InteractiveExecutor ex = new(new LLamaModel(new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5)));
InteractiveExecutor ex = new(new LLamaContext(new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5)));
ChatSession session = new ChatSession(ex).WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { "User:", "Bob:" }, redundancyLength: 8));

Console.ForegroundColor = ConsoleColor.Yellow;
Expand Down
2 changes: 1 addition & 1 deletion LLama.Examples/NewVersion/ChatSessionWithRoleName.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ public static void Run()
Console.Write("Please input your model path: ");
string modelPath = Console.ReadLine();
var prompt = File.ReadAllText("Assets/chat-with-bob.txt").Trim();
InteractiveExecutor ex = new(new LLamaModel(new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5)));
InteractiveExecutor ex = new(new LLamaContext(new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5)));
ChatSession session = new ChatSession(ex); // The only change is to remove the transform for the output text stream.

Console.ForegroundColor = ConsoleColor.Yellow;
Expand Down
2 changes: 1 addition & 1 deletion LLama.Examples/NewVersion/InstructModeExecute.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public static void Run()
string modelPath = Console.ReadLine();
var prompt = File.ReadAllText("Assets/dan.txt").Trim();

InstructExecutor ex = new(new LLamaModel(new ModelParams(modelPath, contextSize: 1024)));
InstructExecutor ex = new(new LLamaContext(new ModelParams(modelPath, contextSize: 1024)));

Console.ForegroundColor = ConsoleColor.Yellow;
Console.WriteLine("The executor has been enabled. In this example, the LLM will follow your instructions. For example, you can input \"Write a story about a fox who want to " +
Expand Down
2 changes: 1 addition & 1 deletion LLama.Examples/NewVersion/InteractiveModeExecute.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public async static Task Run()
string modelPath = Console.ReadLine();
var prompt = File.ReadAllText("Assets/chat-with-bob.txt").Trim();

InteractiveExecutor ex = new(new LLamaModel(new ModelParams(modelPath, contextSize: 256)));
InteractiveExecutor ex = new(new LLamaContext(new ModelParams(modelPath, contextSize: 256)));

Console.ForegroundColor = ConsoleColor.Yellow;
Console.WriteLine("The executor has been enabled. In this example, the prompt is printed, the maximum tokens is set to 128 and the context size is 256. (an example for small scale usage)");
Expand Down
6 changes: 3 additions & 3 deletions LLama.Examples/NewVersion/LoadAndSaveSession.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public static void Run()
Console.Write("Please input your model path: ");
string modelPath = Console.ReadLine();
var prompt = File.ReadAllText("Assets/chat-with-bob.txt").Trim();
InteractiveExecutor ex = new(new LLamaModel(new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5)));
InteractiveExecutor ex = new(new LLamaContext(new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5)));
ChatSession session = new ChatSession(ex); // The only change is to remove the transform for the output text stream.

Console.ForegroundColor = ConsoleColor.Yellow;
Expand Down Expand Up @@ -45,8 +45,8 @@ public static void Run()
Console.WriteLine("Saved session!");
Console.ForegroundColor = ConsoleColor.White;

ex.Model.Dispose();
ex = new(new LLamaModel(new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5)));
ex.Context.Dispose();
ex = new(new LLamaContext(new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5)));
session = new ChatSession(ex);
session.LoadSession(statePath);

Expand Down
6 changes: 3 additions & 3 deletions LLama.Examples/NewVersion/LoadAndSaveState.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public static void Run()
string modelPath = Console.ReadLine();
var prompt = File.ReadAllText("Assets/chat-with-bob.txt").Trim();

InteractiveExecutor ex = new(new LLamaModel(new ModelParams(modelPath, contextSize: 256)));
InteractiveExecutor ex = new(new LLamaContext(new ModelParams(modelPath, contextSize: 256)));

Console.ForegroundColor = ConsoleColor.Yellow;
Console.WriteLine("The executor has been enabled. In this example, the prompt is printed, the maximum tokens is set to 64 and the context size is 256. (an example for small scale usage)");
Expand All @@ -37,7 +37,7 @@ public static void Run()
{
Console.Write("Your path to save model state: ");
string modelStatePath = Console.ReadLine();
ex.Model.SaveState(modelStatePath);
ex.Context.SaveState(modelStatePath);

Console.Write("Your path to save executor state: ");
string executorStatePath = Console.ReadLine();
Expand All @@ -47,7 +47,7 @@ public static void Run()
Console.WriteLine("All states saved!");
Console.ForegroundColor = ConsoleColor.White;

var model = ex.Model;
var model = ex.Context;
model.LoadState(modelStatePath);
ex = new InteractiveExecutor(model);
ex.LoadState(executorStatePath);
Expand Down
2 changes: 1 addition & 1 deletion LLama.Examples/NewVersion/StatelessModeExecute.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ public static void Run()
Console.Write("Please input your model path: ");
string modelPath = Console.ReadLine();

StatelessExecutor ex = new(new LLamaModel(new ModelParams(modelPath, contextSize: 256)));
StatelessExecutor ex = new(new LLamaContext(new ModelParams(modelPath, contextSize: 256)));

Console.ForegroundColor = ConsoleColor.Yellow;
Console.WriteLine("The executor has been enabled. In this example, the inference is an one-time job. That says, the previous input and response has " +
Expand Down
74 changes: 74 additions & 0 deletions LLama.Examples/NewVersion/TalkToYourself.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
using System.Security.Cryptography;
using System.Text;
using LLama.Abstractions;
using LLama.Common;

namespace LLama.Examples.NewVersion
{
public class TalkToYourself
{
public static async Task Run()
{
Console.Write("Please input your model path: ");
var modelPath = Console.ReadLine();

// Load weights into memory
var @params = new ModelParams(modelPath)
{
Seed = RandomNumberGenerator.GetInt32(int.MaxValue)
};
using var weights = LLamaWeights.LoadFromFile(@params);

// Create 2 contexts sharing the same weights
using var aliceCtx = weights.CreateContext(@params, Encoding.UTF8);
var alice = new InteractiveExecutor(aliceCtx);
using var bobCtx = weights.CreateContext(@params, Encoding.UTF8);
var bob = new InteractiveExecutor(bobCtx);

// Initial alice prompt
var alicePrompt = "Transcript of a dialog, where the Alice interacts a person named Bob. Alice is friendly, kind, honest and good at writing.\nAlice: Hello";
var aliceResponse = await Prompt(alice, ConsoleColor.Green, alicePrompt, false, false);

// Initial bob prompt
var bobPrompt = $"Transcript of a dialog, where the Bob interacts a person named Alice. Bob is smart, intellectual and good at writing.\nAlice: Hello{aliceResponse}";
var bobResponse = await Prompt(bob, ConsoleColor.Red, bobPrompt, true, true);

// swap back and forth from Alice to Bob
while (true)
{
aliceResponse = await Prompt(alice, ConsoleColor.Green, bobResponse, false, true);
bobResponse = await Prompt(bob, ConsoleColor.Red, aliceResponse, false, true);

if (Console.KeyAvailable)
break;
}
}

private static async Task<string> Prompt(ILLamaExecutor executor, ConsoleColor color, string prompt, bool showPrompt, bool showResponse)
{
var inferenceParams = new InferenceParams
{
Temperature = 0.9f,
AntiPrompts = new List<string> { "Alice:", "Bob:", "User:" },
MaxTokens = 128,
Mirostat = MirostatType.Mirostat2,
MirostatTau = 10,
};

Console.ForegroundColor = ConsoleColor.White;
if (showPrompt)
Console.Write(prompt);

Console.ForegroundColor = color;
var builder = new StringBuilder();
await foreach (var text in executor.InferAsync(prompt, inferenceParams))
{
builder.Append(text);
if (showResponse)
Console.Write(text);
}

return builder.ToString();
}
}
}
5 changes: 5 additions & 0 deletions LLama.Examples/NewVersion/TestRunner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ public static async Task Run()
Console.WriteLine("6: Load and save state of model and executor.");
Console.WriteLine("7: Get embeddings from LLama model.");
Console.WriteLine("8: Quantize the model.");
Console.WriteLine("9: Automatic conversation.");

while (true)
{
Expand Down Expand Up @@ -64,6 +65,10 @@ public static async Task Run()
{
QuantizeModel.Run();
}
else if (choice == 9)
{
await TalkToYourself.Run();
}
else
{
Console.WriteLine("Cannot parse your choice. Please select again.");
Expand Down
3 changes: 1 addition & 2 deletions LLama.Unittest/BasicTest.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
using LLama;
using LLama.Common;

namespace LLama.Unittest
Expand All @@ -8,7 +7,7 @@ public class BasicTest
[Fact]
public void LoadModel()
{
var model = new LLamaModel(new ModelParams("Models/llama-2-7b-chat.ggmlv3.q3_K_S.bin", contextSize: 256));
var model = new LLamaContext(new ModelParams("Models/llama-2-7b-chat.ggmlv3.q3_K_S.bin", contextSize: 256));
model.Dispose();
}
}
Expand Down
36 changes: 36 additions & 0 deletions LLama.Unittest/LLamaContextTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
using System.Text;
using LLama.Common;

namespace LLama.Unittest
{
public class LLamaContextTests
: IDisposable
{
private readonly LLamaWeights _weights;
private readonly LLamaContext _context;

public LLamaContextTests()
{
var @params = new ModelParams("Models/llama-2-7b-chat.ggmlv3.q3_K_S.bin")
{
ContextSize = 768,
};
_weights = LLamaWeights.LoadFromFile(@params);
_context = _weights.CreateContext(@params, Encoding.UTF8);
}

public void Dispose()
{
_weights.Dispose();
_context.Dispose();
}

[Fact]
public void CheckProperties()
{
Assert.Equal(768, _context.ContextSize);
Assert.Equal(4096, _context.EmbeddingSize);
Assert.Equal(32000, _context.VocabCount);
}
}
}
3 changes: 2 additions & 1 deletion LLama.Web/Models/ModelSession.cs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ public void Dispose()
{
_inferenceOptions = null;
_outputTransform = null;
_executor.Model?.Dispose();

_executor?.Context.Dispose();
_executor = null;
}
}
Expand Down
2 changes: 1 addition & 1 deletion LLama.Web/Services/ConnectionSessionService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ public Task<IServiceResult<ModelSession>> CreateAsync(LLamaExecutorType executor
return Task.FromResult(ServiceResult.FromError<ModelSession>("Maximum model instances reached"));

// Create model
var llamaModel = new LLamaModel(modelOption);
var llamaModel = new LLamaContext(modelOption);

// Create executor
ILLamaExecutor executor = executorType switch
Expand Down
8 changes: 4 additions & 4 deletions LLama.WebAPI/Services/StatefulChatService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,21 @@ namespace LLama.WebAPI.Services;
public class StatefulChatService : IDisposable
{
private readonly ChatSession _session;
private readonly LLamaModel _model;
private readonly LLamaContext _context;
private bool _continue = false;

private const string SystemPrompt = "Transcript of a dialog, where the User interacts with an Assistant. Assistant is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision.\n\n"
+ "User: ";

public StatefulChatService(IConfiguration configuration)
{
_model = new LLamaModel(new Common.ModelParams(configuration["ModelPath"], contextSize: 512));
_session = new ChatSession(new InteractiveExecutor(_model));
_context = new LLamaContext(new Common.ModelParams(configuration["ModelPath"], contextSize: 512));
_session = new ChatSession(new InteractiveExecutor(_context));
}

public void Dispose()
{
_model?.Dispose();
_context?.Dispose();
}

public string Send(SendMessageInput input)
Expand Down
6 changes: 3 additions & 3 deletions LLama.WebAPI/Services/StatelessChatService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@ namespace LLama.WebAPI.Services
{
public class StatelessChatService
{
private readonly LLamaModel _model;
private readonly LLamaContext _context;
private readonly ChatSession _session;

public StatelessChatService(IConfiguration configuration)
{
_model = new LLamaModel(new ModelParams(configuration["ModelPath"], contextSize: 512));
_context = new LLamaContext(new ModelParams(configuration["ModelPath"], contextSize: 512));
// TODO: replace with a stateless executor
_session = new ChatSession(new InteractiveExecutor(_model))
_session = new ChatSession(new InteractiveExecutor(_context))
.WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { "User:", "Assistant:" }, redundancyLength: 8))
.WithHistoryTransform(new HistoryTransform());
}
Expand Down
4 changes: 2 additions & 2 deletions LLama/Abstractions/ILLamaExecutor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ namespace LLama.Abstractions
public interface ILLamaExecutor
{
/// <summary>
/// The loaded model for this executor.
/// The loaded context for this executor.
/// </summary>
public LLamaModel Model { get; }
public LLamaContext Context { get; }

/// <summary>
/// Infers a response from the model.
Expand Down
4 changes: 2 additions & 2 deletions LLama/ChatSession.cs
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ public virtual void SaveSession(string path)
{
Directory.CreateDirectory(path);
}
_executor.Model.SaveState(Path.Combine(path, _modelStateFilename));
_executor.Context.SaveState(Path.Combine(path, _modelStateFilename));
if(Executor is StatelessExecutor)
{

Expand All @@ -116,7 +116,7 @@ public virtual void LoadSession(string path)
{
throw new FileNotFoundException($"Directory {path} does not exist.");
}
_executor.Model.LoadState(Path.Combine(path, _modelStateFilename));
_executor.Context.LoadState(Path.Combine(path, _modelStateFilename));
if (Executor is StatelessExecutor)
{

Expand Down
Loading
Loading