Merge branch 'SciSharp:master' into master

SignalRT · Sep 1, 2023 · 5a1c736 · 5a1c736
2 parents 0cf54fd + 1533ee7
commit 5a1c736
Show file tree

Hide file tree

Showing 15 changed files with 487 additions and 2 deletions.
diff --git a/LLama.Examples/LLama.Examples.csproj b/LLama.Examples/LLama.Examples.csproj
@@ -27,6 +27,11 @@
   </PropertyGroup>
 
   <ItemGroup>
+    <PackageReference Include="Microsoft.SemanticKernel" Version="0.21.230828.2-preview" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\LLama.SemanticKernel\LLamaSharp.SemanticKernel.csproj" />
     <ProjectReference Include="..\LLama\LLamaSharp.csproj" />
   </ItemGroup>
 

diff --git a/LLama.Examples/NewVersion/SemanticKernelChat.cs b/LLama.Examples/NewVersion/SemanticKernelChat.cs
@@ -0,0 +1,69 @@
+using System.Reflection.Metadata;
+using System.Security.Cryptography;
+using System.Text;
+using LLama.Abstractions;
+using LLama.Common;
+using Microsoft.SemanticKernel;
+using Microsoft.SemanticKernel.AI.ChatCompletion;
+using Microsoft.SemanticKernel.AI.TextCompletion;
+using LLamaSharp.SemanticKernel.ChatCompletion;
+using LLamaSharp.SemanticKernel.TextCompletion;
+
+namespace LLama.Examples.NewVersion
+{
+    public class SemanticKernelChat
+    {
+        public static async Task Run()
+        {
+            Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/README.md");
+            Console.Write("Please input your model path: ");
+            var modelPath = Console.ReadLine();
+
+            // Load weights into memory
+            var parameters = new ModelParams(modelPath)
+            {
+                Seed = RandomNumberGenerator.GetInt32(int.MaxValue),
+            };
+            using var model = LLamaWeights.LoadFromFile(parameters);
+            using var context = model.CreateContext(parameters);
+            var ex = new InteractiveExecutor(context);
+
+            var chatGPT = new LLamaSharpChatCompletion(ex);
+
+            var chatHistory = chatGPT.CreateNewChat("You are a librarian, expert about books");
+
+            Console.WriteLine("Chat content:");
+            Console.WriteLine("------------------------");
+
+            chatHistory.AddUserMessage("Hi, I'm looking for book suggestions");
+            await MessageOutputAsync(chatHistory);
+
+            // First bot assistant message
+            string reply = await chatGPT.GenerateMessageAsync(chatHistory);
+            chatHistory.AddAssistantMessage(reply);
+            await MessageOutputAsync(chatHistory);
+
+            // Second user message
+            chatHistory.AddUserMessage("I love history and philosophy, I'd like to learn something new about Greece, any suggestion");
+            await MessageOutputAsync(chatHistory);
+
+            // Second bot assistant message
+            reply = await chatGPT.GenerateMessageAsync(chatHistory);
+            chatHistory.AddAssistantMessage(reply);
+            await MessageOutputAsync(chatHistory);
+        }
+
+        /// <summary>
+        /// Outputs the last message of the chat history
+        /// </summary>
+        private static Task MessageOutputAsync(Microsoft.SemanticKernel.AI.ChatCompletion.ChatHistory chatHistory)
+        {
+            var message = chatHistory.Messages.Last();
+
+            Console.WriteLine($"{message.Role}: {message.Content}");
+            Console.WriteLine("------------------------");
+
+            return Task.CompletedTask;
+        }
+    }
+}
diff --git a/LLama.Examples/NewVersion/SemanticKernelPrompt.cs b/LLama.Examples/NewVersion/SemanticKernelPrompt.cs
@@ -0,0 +1,55 @@
+using System.Reflection.Metadata;
+using System.Security.Cryptography;
+using System.Text;
+using LLama.Abstractions;
+using LLama.Common;
+using Microsoft.SemanticKernel;
+using Microsoft.SemanticKernel.AI.ChatCompletion;
+using Microsoft.SemanticKernel.AI.TextCompletion;
+using LLamaSharp.SemanticKernel.TextCompletion;
+
+namespace LLama.Examples.NewVersion
+{
+    public class SemanticKernelPrompt
+    {
+        public static async Task Run()
+        {
+            Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/KernelSyntaxExamples/Example17_ChatGPT.cs");
+            Console.Write("Please input your model path: ");
+            var modelPath = Console.ReadLine();
+
+            // Load weights into memory
+            var parameters = new ModelParams(modelPath)
+            {
+                Seed = RandomNumberGenerator.GetInt32(int.MaxValue),
+            };
+            using var model = LLamaWeights.LoadFromFile(parameters);
+            var ex = new StatelessExecutor(model, parameters);
+
+            var builder = new KernelBuilder();
+            builder.WithAIService<ITextCompletion>("local-llama", new LLamaSharpTextCompletion(ex), true);
+
+            var kernel = builder.Build();
+
+            var prompt = @"{{$input}}
+
+One line TLDR with the fewest words.";
+
+            var summarize = kernel.CreateSemanticFunction(prompt, maxTokens: 100);
+
+            string text1 = @"
+1st Law of Thermodynamics - Energy cannot be created or destroyed.
+2nd Law of Thermodynamics - For a spontaneous process, the entropy of the universe increases.
+3rd Law of Thermodynamics - A perfect crystal at zero Kelvin has zero entropy.";
+
+            string text2 = @"
+1. An object at rest remains at rest, and an object in motion remains in motion at constant speed and in a straight line unless acted on by an unbalanced force.
+2. The acceleration of an object depends on the mass of the object and the amount of force applied.
+3. Whenever one object exerts a force on another object, the second object exerts an equal and opposite on the first.";
+
+            Console.WriteLine(await summarize.InvokeAsync(text1));
+
+            Console.WriteLine(await summarize.InvokeAsync(text2));
+        }
+    }
+}
diff --git a/LLama.Examples/NewVersion/TestRunner.cs b/LLama.Examples/NewVersion/TestRunner.cs
@@ -8,7 +8,7 @@ public static async Task Run()
 
             Console.WriteLine("Please input a number to choose an example to run:");
             Console.WriteLine("0: Run a chat session without stripping the role names.");
-            Console.WriteLine("1: Run a chat session with the role names strippped.");
+            Console.WriteLine("1: Run a chat session with the role names stripped.");
             Console.WriteLine("2: Interactive mode chat by using executor.");
             Console.WriteLine("3: Instruct mode chat by using executor.");
             Console.WriteLine("4: Stateless mode chat by using executor.");
@@ -18,6 +18,8 @@ public static async Task Run()
             Console.WriteLine("8: Quantize the model.");
             Console.WriteLine("9: Automatic conversation.");
             Console.WriteLine("10: Constrain response to json format using grammar.");
+            Console.WriteLine("11: Semantic Kernel Prompt.");
+            Console.WriteLine("12: Semantic Kernel Chat.");
 
             while (true)
             {
@@ -68,6 +70,14 @@ public static async Task Run()
                 {
                     GrammarJsonResponse.Run();
                 }
+                else if (choice == 11)
+                {
+                    await SemanticKernelPrompt.Run();
+                }
+                else if (choice == 12)
+                {
+                    await SemanticKernelChat.Run();
+                }
                 else
                 {
                     Console.WriteLine("Cannot parse your choice. Please select again.");

diff --git a/LLama.SemanticKernel/ChatCompletion/HistoryTransform.cs b/LLama.SemanticKernel/ChatCompletion/HistoryTransform.cs
@@ -0,0 +1,17 @@
+using static LLama.LLamaTransforms;
+
+namespace LLamaSharp.SemanticKernel.ChatCompletion;
+
+/// <summary>
+/// Default HistoryTransform Patch
+/// </summary>
+public class HistoryTransform : DefaultHistoryTransform
+{
+    /// <inheritdoc/>
+    public override string HistoryToText(global::LLama.Common.ChatHistory history)
+    {
+        var prompt = base.HistoryToText(history);
+        return prompt + "\nAssistant:";
+
+    }
+}
diff --git a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatCompletion.cs
@@ -0,0 +1,74 @@
+using LLama;
+using Microsoft.SemanticKernel.AI.ChatCompletion;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Text;
+using System.Threading;
+using System.Threading.Tasks;
+
+namespace LLamaSharp.SemanticKernel.ChatCompletion;
+
+/// <summary>
+/// LLamaSharp ChatCompletion
+/// </summary>
+public sealed class LLamaSharpChatCompletion : IChatCompletion
+{
+    private const string UserRole = "user:";
+    private const string AssistantRole = "assistant:";
+    private ChatSession session;
+
+    public LLamaSharpChatCompletion(InteractiveExecutor model)
+    {
+        this.session = new ChatSession(model)
+            .WithHistoryTransform(new HistoryTransform())
+            .WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { UserRole, AssistantRole }));
+    }
+
+    /// <inheritdoc/>
+    public ChatHistory CreateNewChat(string? instructions = "")
+    {
+        var history = new ChatHistory();
+
+        if (instructions != null && !string.IsNullOrEmpty(instructions))
+        {
+            history.AddSystemMessage(instructions);
+        }
+
+        return history;
+    }
+
+    /// <inheritdoc/>
+    public async Task<IReadOnlyList<IChatResult>> GetChatCompletionsAsync(ChatHistory chat, ChatRequestSettings? requestSettings = null, CancellationToken cancellationToken = default)
+    {
+        requestSettings ??= new ChatRequestSettings()
+        {
+            MaxTokens = 256,
+            Temperature = 0,
+            TopP = 0,
+            StopSequences = new List<string> { }
+        };
+
+        var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), requestSettings.ToLLamaSharpInferenceParams(), cancellationToken);
+
+        return new List<IChatResult> { new LLamaSharpChatResult(result) }.AsReadOnly();
+    }
+
+    /// <inheritdoc/>
+    public async IAsyncEnumerable<IChatStreamingResult> GetStreamingChatCompletionsAsync(ChatHistory chat, ChatRequestSettings? requestSettings = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+    {
+        requestSettings ??= new ChatRequestSettings()
+        {
+            MaxTokens = 256,
+            Temperature = 0,
+            TopP = 0,
+            StopSequences = new List<string> { }
+        };
+
+        var result = this.session.ChatAsync(chat.ToLLamaSharpChatHistory(), requestSettings.ToLLamaSharpInferenceParams(), cancellationToken);
+
+        yield return new LLamaSharpChatResult(result);
+    }
+}
diff --git a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatMessage.cs
@@ -0,0 +1,14 @@
+using Microsoft.SemanticKernel.AI.ChatCompletion;
+
+namespace LLamaSharp.SemanticKernel.ChatCompletion;
+
+/// <summary>
+/// LLamaSharp Chat Message
+/// </summary>
+public class LLamaSharpChatMessage : ChatMessageBase
+{
+    /// <inheritdoc/>
+    public LLamaSharpChatMessage(AuthorRole role, string content) : base(role, content)
+    {
+    }
+}
diff --git a/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs b/LLama.SemanticKernel/ChatCompletion/LLamaSharpChatResult.cs
@@ -0,0 +1,38 @@
+using Microsoft.SemanticKernel.AI.ChatCompletion;
+using System.Runtime.CompilerServices;
+using System.Text;
+
+namespace LLamaSharp.SemanticKernel.ChatCompletion;
+
+internal sealed class LLamaSharpChatResult : IChatStreamingResult
+{
+    private readonly IAsyncEnumerable<string> _stream;
+
+    /// <summary>
+    /// 
+    /// </summary>
+    /// <param name="stream"></param>
+    public LLamaSharpChatResult(IAsyncEnumerable<string> stream)
+    {
+        _stream = stream;
+    }
+    /// <inheritdoc/>
+    public async Task<ChatMessageBase> GetChatMessageAsync(CancellationToken cancellationToken = default)
+    {
+        var sb = new StringBuilder();
+        await foreach (var token in _stream)
+        {
+            sb.Append(token);
+        }
+        return await Task.FromResult(new LLamaSharpChatMessage(AuthorRole.Assistant, sb.ToString())).ConfigureAwait(false);
+    }
+
+    /// <inheritdoc/>
+    public async IAsyncEnumerable<ChatMessageBase> GetStreamingChatMessageAsync([EnumeratorCancellation] CancellationToken cancellationToken = default)
+    {
+        await foreach (var token in _stream)
+        {
+            yield return new LLamaSharpChatMessage(AuthorRole.Assistant, token);
+        }
+    }
+}
diff --git a/LLama.SemanticKernel/ExtensionMethods.cs b/LLama.SemanticKernel/ExtensionMethods.cs
@@ -0,0 +1,72 @@
+using Microsoft.SemanticKernel.AI.ChatCompletion;
+using Microsoft.SemanticKernel.AI.TextCompletion;
+
+namespace LLamaSharp.SemanticKernel;
+
+internal static class ExtensionMethods
+{
+    internal static global::LLama.Common.ChatHistory ToLLamaSharpChatHistory(this ChatHistory chatHistory)
+    {
+        if (chatHistory is null)
+        {
+            throw new ArgumentNullException(nameof(chatHistory));
+        }
+
+        var history = new global::LLama.Common.ChatHistory();
+
+        foreach (var chat in chatHistory)
+        {
+            var role = Enum.TryParse<global::LLama.Common.AuthorRole>(chat.Role.Label, out var _role) ? _role : global::LLama.Common.AuthorRole.Unknown;
+            history.AddMessage(role, chat.Content);
+        }
+
+        return history;
+    }
+
+    /// <summary>
+    /// Convert ChatRequestSettings to LLamaSharp InferenceParams
+    /// </summary>
+    /// <param name="requestSettings"></param>
+    /// <returns></returns>
+    internal static global::LLama.Common.InferenceParams ToLLamaSharpInferenceParams(this ChatRequestSettings requestSettings)
+    {
+        if (requestSettings is null)
+        {
+            throw new ArgumentNullException(nameof(requestSettings));
+        }
+
+        var antiPrompts = new List<string>(requestSettings.StopSequences) { AuthorRole.User.ToString() + ":" };
+        return new global::LLama.Common.InferenceParams
+        {
+            Temperature = (float)requestSettings.Temperature,
+            TopP = (float)requestSettings.TopP,
+            PresencePenalty = (float)requestSettings.PresencePenalty,
+            FrequencyPenalty = (float)requestSettings.FrequencyPenalty,
+            AntiPrompts = antiPrompts,
+            MaxTokens = requestSettings.MaxTokens ?? -1
+        };
+    }
+
+    /// <summary>
+    /// Convert CompleteRequestSettings to LLamaSharp InferenceParams
+    /// </summary>
+    /// <param name="requestSettings"></param>
+    /// <returns></returns>
+    internal static global::LLama.Common.InferenceParams ToLLamaSharpInferenceParams(this CompleteRequestSettings requestSettings)
+    {
+        if (requestSettings is null)
+        {
+            throw new ArgumentNullException(nameof(requestSettings));
+        }
+
+        return new global::LLama.Common.InferenceParams
+        {
+            Temperature = (float)requestSettings.Temperature,
+            TopP = (float)requestSettings.TopP,
+            PresencePenalty = (float)requestSettings.PresencePenalty,
+            FrequencyPenalty = (float)requestSettings.FrequencyPenalty,
+            AntiPrompts = requestSettings.StopSequences,
+            MaxTokens = requestSettings.MaxTokens ?? -1
+        };
+    }
+}