Merge branch 'master' into add_kernel_memory_pkg

SciSharp · Nov 10, 2023 · 14b5994 · 14b5994
2 parents 4579584 + 47e0167
commit 14b5994
Show file tree

Hide file tree

Showing 32 changed files with 490 additions and 267 deletions.
diff --git a/.github/workflows/compile.yml b/.github/workflows/compile.yml
@@ -14,6 +14,10 @@ on:
   #schedule:
   #  - cron: "22 22 * * 2"
 
+env:
+  # Compiler defines common to all platforms
+  COMMON_DEFINE: -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_SERVER=OFF -DBUILD_SHARED_LIBS=ON
+
 jobs:
   compile-linux:
     name: Compile (Linux)
@@ -22,13 +26,13 @@ jobs:
       matrix:
         include:
           - build: 'noavx'
-            defines: '-DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_SERVER=OFF -DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF -DBUILD_SHARED_LIBS=ON'
+            defines: '-DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF'
           - build: 'avx2'
-            defines: '-DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_SERVER=OFF -DBUILD_SHARED_LIBS=ON'
+            defines: ''
           - build: 'avx'
-            defines: '-DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_SERVER=OFF -DLLAMA_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
+            defines: '-DLLAMA_AVX2=OFF'
           - build: 'avx512'
-            defines: '-DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_SERVER=OFF -DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON'
+            defines: '-DLLAMA_AVX512=ON'
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3
@@ -39,7 +43,7 @@ jobs:
         run: |
           mkdir build
           cd build
-          cmake .. ${{ matrix.defines }}
+          cmake .. ${{ env.COMMON_DEFINE }} ${{ matrix.defines }}
           cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
       - uses: actions/upload-artifact@v3
         with:
@@ -53,13 +57,13 @@ jobs:
       matrix:
         include:
           - build: 'noavx'
-            defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=OFF -DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF -DBUILD_SHARED_LIBS=ON'
+            defines: '-DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF'
           - build: 'avx2'
-            defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=OFF -DBUILD_SHARED_LIBS=ON'
+            defines: ''
           - build: 'avx'
-            defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=OFF -DLLAMA_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
+            defines: '-DLLAMA_AVX2=OFF'
           - build: 'avx512'
-            defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=OFF -DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON'
+            defines: '-DLLAMA_AVX512=ON'
     runs-on: windows-latest
     steps:
       - uses: actions/checkout@v3
@@ -71,7 +75,7 @@ jobs:
         run: |
           mkdir build
           cd build
-          cmake .. ${{ matrix.defines }}
+          cmake .. ${{ env.COMMON_DEFINE }} ${{ matrix.defines }}
           cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
 
       - name: Upload artifacts
@@ -117,7 +121,7 @@ jobs:
         run: |
           mkdir build
           cd build
-          cmake .. -DLLAMA_CUBLAS=ON -DBUILD_SHARED_LIBS=ON -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_SERVER=OFF
+          cmake .. ${{ env.COMMON_DEFINE }} -DLLAMA_CUBLAS=ON
           cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
           ls -R
 
@@ -142,7 +146,7 @@ jobs:
       matrix:
         include:
           - build: 'metal'
-            defines: '-DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_SERVER=OFF -DBUILD_SHARED_LIBS=ON -DLLAMA_NATIVE=OFF -DCMAKE_OSX_ARCHITECTURES=arm64'
+            defines: '-DCMAKE_OSX_ARCHITECTURES=arm64'
     runs-on: macos-latest   
     steps:
       - uses: actions/checkout@v3
@@ -157,7 +161,7 @@ jobs:
         run: |
           mkdir build
           cd build
-          cmake .. ${{ matrix.defines }}
+          cmake .. ${{ env.COMMON_DEFINE }} ${{ matrix.defines }}
           cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
       - name: Upload artifacts
         uses: actions/upload-artifact@v3

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -12,23 +12,14 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        build: [linux-debug, linux-release, windows-debug, windows-release]
+        build: [linux-release, windows-release]
         include:
-          - build: linux-debug
-            os: ubuntu-latest
-            config: debug
           - build: linux-release
             os: ubuntu-latest
-            config: release          
-        # - build: macos-debug
-        #   os: macos-latest
-        #   config: debug
+            config: release
         # - build: macos-release
         #   os: macos-latest
         #   config: release
-          - build: windows-debug
-            os: windows-2019
-            config: debug
           - build: windows-release
             os: windows-2019
             config: release

diff --git a/LLama.Examples/LLama.Examples.csproj b/LLama.Examples/LLama.Examples.csproj
@@ -30,6 +30,7 @@
   <ItemGroup>
     <PackageReference Include="Microsoft.Extensions.Logging.Console" Version="7.0.0" />
     <PackageReference Include="Microsoft.SemanticKernel" Version="1.0.0-beta1" />
+    <PackageReference Include="Spectre.Console" Version="0.47.0" />
   </ItemGroup>
 
   <ItemGroup>

diff --git a/LLama.Examples/NewVersion/GetEmbeddings.cs b/LLama.Examples/NewVersion/GetEmbeddings.cs
@@ -4,7 +4,7 @@ namespace LLama.Examples.NewVersion
 {
     public class GetEmbeddings
     {
-        public static void Run()
+        public static Task Run()
         {
             Console.Write("Please input your model path: ");
             var modelPath = Console.ReadLine();
@@ -23,6 +23,7 @@ public static void Run()
                 Console.WriteLine(string.Join(", ", embedder.GetEmbeddings(text)));
                 Console.WriteLine();
             }
+            return Task.CompletedTask;
         }
     }
 }
diff --git a/LLama.Examples/NewVersion/QuantizeModel.cs b/LLama.Examples/NewVersion/QuantizeModel.cs
@@ -2,7 +2,7 @@
 {
     public class QuantizeModel
     {
-        public static void Run()
+        public static Task Run()
         {
             Console.Write("Please input your original model path: ");
             var inputPath = Console.ReadLine();
@@ -21,6 +21,8 @@ public static void Run()
             {
                 Console.WriteLine("Quantization failed!");
             }
+
+            return Task.CompletedTask;
         }
     }
 }
diff --git a/LLama.Examples/NewVersion/TestRunner.cs b/LLama.Examples/NewVersion/TestRunner.cs
@@ -1,109 +1,54 @@
-namespace LLama.Examples.NewVersion
+using System.Linq.Expressions;
+using Spectre.Console;
+
+namespace LLama.Examples.NewVersion
 {
     public class NewVersionTestRunner
     {
+        static Dictionary<string, Func<Task>> Examples = new Dictionary<string, Func<Task>>
+        {
+            {"Run a chat session without stripping the role names.", () => ChatSessionWithRoleName.Run()},
+            {"Run a chat session with the role names stripped.",()=> ChatSessionStripRoleName.Run()},
+            {"Interactive mode chat by using executor.",()=> InteractiveModeExecute.Run()},
+            {"Instruct mode chat by using executor.",()=> InstructModeExecute.Run()},
+            {"Stateless mode chat by using executor.",()=> StatelessModeExecute.Run()},
+            {"Load and save chat session.",()=> SaveAndLoadSession.Run()},
+            {"Load and save state of model and executor.",()=> LoadAndSaveState.Run()},
+            {"Get embeddings from LLama model.",()=> GetEmbeddings.Run()},
+            {"Quantize the model.",()=> QuantizeModel.Run()},
+            {"Automatic conversation.",()=> TalkToYourself.Run()},
+            {"Constrain response to json format using grammar.",()=> GrammarJsonResponse.Run()},
+            {"Semantic Kernel Prompt.",()=> SemanticKernelPrompt.Run()},
+            {"Semantic Kernel Chat.",()=> SemanticKernelChat.Run()},
+            {"Semantic Kernel Memory.",()=> SemanticKernelMemory.Run()},
+            {"Coding Assistant.",()=> CodingAssistant.Run()},
+            {"Batch Decoding.",()=> BatchedDecoding.Run()},
+            {"SK Kernel Memory.",()=> KernelMemory.Run()},
+            {"Exit", ()=> Task.CompletedTask}
+        };
         public static async Task Run()
         {
-            Console.WriteLine("================LLamaSharp Examples (New Version)==================\n");
-
-            Console.WriteLine("Please input a number to choose an example to run:");
-            Console.WriteLine("0: Run a chat session without stripping the role names.");
-            Console.WriteLine("1: Run a chat session with the role names stripped.");
-            Console.WriteLine("2: Interactive mode chat by using executor.");
-            Console.WriteLine("3: Instruct mode chat by using executor.");
-            Console.WriteLine("4: Stateless mode chat by using executor.");
-            Console.WriteLine("5: Load and save chat session.");
-            Console.WriteLine("6: Load and save state of model and executor.");
-            Console.WriteLine("7: Get embeddings from LLama model.");
-            Console.WriteLine("8: Quantize the model.");
-            Console.WriteLine("9: Automatic conversation.");
-            Console.WriteLine("10: Constrain response to json format using grammar.");
-            Console.WriteLine("11: Semantic Kernel Prompt.");
-            Console.WriteLine("12: Semantic Kernel Chat.");
-            Console.WriteLine("13: Semantic Kernel Memory.");
-            Console.WriteLine("14: Coding Assistant.");
-            Console.WriteLine("15: Batch Decoding.");
-            Console.WriteLine("16: SK Kernel Memory.");
+            AnsiConsole.Write(new Rule("LLamaSharp Examples"));
 
             while (true)
             {
-                Console.Write("\nYour choice: ");
-                int choice = int.Parse(Console.ReadLine());
+                var choice = AnsiConsole.Prompt(
+                    new SelectionPrompt<string>()
+                        .Title("Please choose[green] an example[/] to run: ")
+                        .AddChoices(Examples.Keys));
 
-                if (choice == 0)
-                {
-                    await ChatSessionWithRoleName.Run();
-                }
-                else if (choice == 1)
-                {
-                    await ChatSessionStripRoleName.Run();
-                }
-                else if (choice == 2)
-                {
-                    await InteractiveModeExecute.Run();
-                }
-                else if (choice == 3)
-                {
-                    await InstructModeExecute.Run();
-                }
-                else if (choice == 4)
-                {
-                    await StatelessModeExecute.Run();
-                }
-                else if (choice == 5)
-                {
-                    await SaveAndLoadSession.Run();
-                }
-                else if (choice == 6)
-                {
-                    await LoadAndSaveState.Run();
-                }
-                else if (choice == 7)
-                {
-                    GetEmbeddings.Run();
-                }
-                else if (choice == 8)
-                {
-                    QuantizeModel.Run();
-                }
-                else if (choice == 9)
-                {
-                    await TalkToYourself.Run();
-                }
-                else if (choice == 10)
-                {
-                    await GrammarJsonResponse.Run();
-                }
-                else if (choice == 11)
-                {
-                    await SemanticKernelPrompt.Run();
-                }
-                else if (choice == 12)
-                {
-                    await SemanticKernelChat.Run();
-                }
-                else if (choice == 13)
-                {
-                    await SemanticKernelMemory.Run();
-                }
-                else if (choice == 14)
-                {
-                    await CodingAssistant.Run();
-                }
-                else if (choice == 15)
-                {
-                    await BatchedDecoding.Run();
-                }
-                else if (choice == 16)
-                {
-                    await KernelMemory.Run();
-                }
-                else
+
+                if (Examples.TryGetValue(choice, out var example))
                 {
-                    Console.WriteLine("Cannot parse your choice. Please select again.");
-                    continue;
+                    if (choice == "Exit")
+                    {
+                        break;
+                    }
+                    AnsiConsole.Write(new Rule(choice));
+                    await example();
                 }
-                break;
+
+                AnsiConsole.Clear();
             }
         }
     }

diff --git a/LLama.KernelMemory/BuilderExtensions.cs b/LLama.KernelMemory/BuilderExtensions.cs
@@ -4,6 +4,9 @@
 using System.Linq;
 using System.Text;
 using System.Threading.Tasks;
+using LLama;
+using LLama.Common;
+using Microsoft.KernelMemory.AI;
 
 namespace LLamaSharp.KernelMemory
 {
@@ -24,6 +27,18 @@ public static KernelMemoryBuilder WithLLamaSharpTextEmbeddingGeneration(this Ker
             return builder;
         }
 
+        /// <summary>
+        /// Adds LLamaSharpTextEmbeddingGeneration to the KernelMemoryBuilder.
+        /// </summary>
+        /// <param name="builder">The KernelMemoryBuilder instance.</param>
+        /// <param name="textEmbeddingGeneration">The LLamaSharpTextEmbeddingGeneration instance.</param>
+        /// <returns>The KernelMemoryBuilder instance with LLamaSharpTextEmbeddingGeneration added.</returns>
+        public static KernelMemoryBuilder WithLLamaSharpTextEmbeddingGeneration(this KernelMemoryBuilder builder, LLamaSharpTextEmbeddingGeneration textEmbeddingGeneration)
+        {
+            builder.WithCustomEmbeddingGeneration(textEmbeddingGeneration);
+            return builder;
+        }
+
         /// <summary>
         /// Adds LLamaSharpTextGeneration to the KernelMemoryBuilder.
         /// </summary>
@@ -36,6 +51,18 @@ public static KernelMemoryBuilder WithLLamaSharpTextGeneration(this KernelMemory
             return builder;
         }
 
+        /// <summary>
+        /// Adds LLamaSharpTextGeneration to the KernelMemoryBuilder.
+        /// </summary>
+        /// <param name="builder">The KernelMemoryBuilder instance.</param>
+        /// <param name="textGeneration">The LlamaSharpTextGeneration instance.</param>
+        /// <returns>The KernelMemoryBuilder instance with LLamaSharpTextGeneration added.</returns>
+        public static KernelMemoryBuilder WithLLamaSharpTextGeneration(this KernelMemoryBuilder builder, LlamaSharpTextGeneration textGeneration)
+        {
+            builder.WithCustomTextGeneration(textGeneration);
+            return builder;
+        }
+
         /// <summary>
         /// Adds LLamaSharpTextEmbeddingGeneration and LLamaSharpTextGeneration to the KernelMemoryBuilder.
         /// </summary>
@@ -44,8 +71,18 @@ public static KernelMemoryBuilder WithLLamaSharpTextGeneration(this KernelMemory
         /// <returns>The KernelMemoryBuilder instance with LLamaSharpTextEmbeddingGeneration and LLamaSharpTextGeneration added.</returns>
         public static KernelMemoryBuilder WithLLamaSharpDefaults(this KernelMemoryBuilder builder, LLamaSharpConfig config)
         {
-            builder.WithLLamaSharpTextEmbeddingGeneration(config);
-            builder.WithLLamaSharpTextGeneration(config);
+            var parameters = new ModelParams(config.ModelPath)
+            {
+                ContextSize = config?.ContextSize ?? 2048,
+                Seed = config?.Seed ?? 0,
+                GpuLayerCount = config?.GpuLayerCount ?? 20
+            };
+            var weights = LLamaWeights.LoadFromFile(parameters);
+            var context = weights.CreateContext(parameters);
+            var executor = new StatelessExecutor(weights, parameters);
+            var embedder = new LLamaEmbedder(weights, parameters);
+            builder.WithLLamaSharpTextEmbeddingGeneration(new LLamaSharpTextEmbeddingGeneration(embedder));
+            builder.WithLLamaSharpTextGeneration(new LlamaSharpTextGeneration(weights, context, executor));
             return builder;
         }
     }