Skip to content

Commit

Permalink
Merge branch 'master' into add_kernel_memory_pkg
Browse files Browse the repository at this point in the history
  • Loading branch information
AsakusaRinne authored Nov 10, 2023
2 parents 4579584 + 47e0167 commit 14b5994
Show file tree
Hide file tree
Showing 32 changed files with 490 additions and 267 deletions.
30 changes: 17 additions & 13 deletions .github/workflows/compile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ on:
#schedule:
# - cron: "22 22 * * 2"

env:
# Compiler defines common to all platforms
COMMON_DEFINE: -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_SERVER=OFF -DBUILD_SHARED_LIBS=ON

jobs:
compile-linux:
name: Compile (Linux)
Expand All @@ -22,13 +26,13 @@ jobs:
matrix:
include:
- build: 'noavx'
defines: '-DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_SERVER=OFF -DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF -DBUILD_SHARED_LIBS=ON'
defines: '-DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF'
- build: 'avx2'
defines: '-DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_SERVER=OFF -DBUILD_SHARED_LIBS=ON'
defines: ''
- build: 'avx'
defines: '-DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_SERVER=OFF -DLLAMA_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
defines: '-DLLAMA_AVX2=OFF'
- build: 'avx512'
defines: '-DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_SERVER=OFF -DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON'
defines: '-DLLAMA_AVX512=ON'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
Expand All @@ -39,7 +43,7 @@ jobs:
run: |
mkdir build
cd build
cmake .. ${{ matrix.defines }}
cmake .. ${{ env.COMMON_DEFINE }} ${{ matrix.defines }}
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
- uses: actions/upload-artifact@v3
with:
Expand All @@ -53,13 +57,13 @@ jobs:
matrix:
include:
- build: 'noavx'
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=OFF -DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF -DBUILD_SHARED_LIBS=ON'
defines: '-DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF'
- build: 'avx2'
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=OFF -DBUILD_SHARED_LIBS=ON'
defines: ''
- build: 'avx'
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=OFF -DLLAMA_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
defines: '-DLLAMA_AVX2=OFF'
- build: 'avx512'
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=OFF -DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON'
defines: '-DLLAMA_AVX512=ON'
runs-on: windows-latest
steps:
- uses: actions/checkout@v3
Expand All @@ -71,7 +75,7 @@ jobs:
run: |
mkdir build
cd build
cmake .. ${{ matrix.defines }}
cmake .. ${{ env.COMMON_DEFINE }} ${{ matrix.defines }}
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
- name: Upload artifacts
Expand Down Expand Up @@ -117,7 +121,7 @@ jobs:
run: |
mkdir build
cd build
cmake .. -DLLAMA_CUBLAS=ON -DBUILD_SHARED_LIBS=ON -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_SERVER=OFF
cmake .. ${{ env.COMMON_DEFINE }} -DLLAMA_CUBLAS=ON
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
ls -R
Expand All @@ -142,7 +146,7 @@ jobs:
matrix:
include:
- build: 'metal'
defines: '-DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_SERVER=OFF -DBUILD_SHARED_LIBS=ON -DLLAMA_NATIVE=OFF -DCMAKE_OSX_ARCHITECTURES=arm64'
defines: '-DCMAKE_OSX_ARCHITECTURES=arm64'
runs-on: macos-latest
steps:
- uses: actions/checkout@v3
Expand All @@ -157,7 +161,7 @@ jobs:
run: |
mkdir build
cd build
cmake .. ${{ matrix.defines }}
cmake .. ${{ env.COMMON_DEFINE }} ${{ matrix.defines }}
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
- name: Upload artifacts
uses: actions/upload-artifact@v3
Expand Down
13 changes: 2 additions & 11 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,14 @@ jobs:
strategy:
fail-fast: false
matrix:
build: [linux-debug, linux-release, windows-debug, windows-release]
build: [linux-release, windows-release]
include:
- build: linux-debug
os: ubuntu-latest
config: debug
- build: linux-release
os: ubuntu-latest
config: release
# - build: macos-debug
# os: macos-latest
# config: debug
config: release
# - build: macos-release
# os: macos-latest
# config: release
- build: windows-debug
os: windows-2019
config: debug
- build: windows-release
os: windows-2019
config: release
Expand Down
1 change: 1 addition & 0 deletions LLama.Examples/LLama.Examples.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="7.0.0" />
<PackageReference Include="Microsoft.SemanticKernel" Version="1.0.0-beta1" />
<PackageReference Include="Spectre.Console" Version="0.47.0" />
</ItemGroup>

<ItemGroup>
Expand Down
3 changes: 2 additions & 1 deletion LLama.Examples/NewVersion/GetEmbeddings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ namespace LLama.Examples.NewVersion
{
public class GetEmbeddings
{
public static void Run()
public static Task Run()
{
Console.Write("Please input your model path: ");
var modelPath = Console.ReadLine();
Expand All @@ -23,6 +23,7 @@ public static void Run()
Console.WriteLine(string.Join(", ", embedder.GetEmbeddings(text)));
Console.WriteLine();
}
return Task.CompletedTask;
}
}
}
4 changes: 3 additions & 1 deletion LLama.Examples/NewVersion/QuantizeModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
{
public class QuantizeModel
{
public static void Run()
public static Task Run()
{
Console.Write("Please input your original model path: ");
var inputPath = Console.ReadLine();
Expand All @@ -21,6 +21,8 @@ public static void Run()
{
Console.WriteLine("Quantization failed!");
}

return Task.CompletedTask;
}
}
}
135 changes: 40 additions & 95 deletions LLama.Examples/NewVersion/TestRunner.cs
Original file line number Diff line number Diff line change
@@ -1,109 +1,54 @@
namespace LLama.Examples.NewVersion
using System.Linq.Expressions;
using Spectre.Console;

namespace LLama.Examples.NewVersion
{
public class NewVersionTestRunner
{
static Dictionary<string, Func<Task>> Examples = new Dictionary<string, Func<Task>>
{
{"Run a chat session without stripping the role names.", () => ChatSessionWithRoleName.Run()},
{"Run a chat session with the role names stripped.",()=> ChatSessionStripRoleName.Run()},
{"Interactive mode chat by using executor.",()=> InteractiveModeExecute.Run()},
{"Instruct mode chat by using executor.",()=> InstructModeExecute.Run()},
{"Stateless mode chat by using executor.",()=> StatelessModeExecute.Run()},
{"Load and save chat session.",()=> SaveAndLoadSession.Run()},
{"Load and save state of model and executor.",()=> LoadAndSaveState.Run()},
{"Get embeddings from LLama model.",()=> GetEmbeddings.Run()},
{"Quantize the model.",()=> QuantizeModel.Run()},
{"Automatic conversation.",()=> TalkToYourself.Run()},
{"Constrain response to json format using grammar.",()=> GrammarJsonResponse.Run()},
{"Semantic Kernel Prompt.",()=> SemanticKernelPrompt.Run()},
{"Semantic Kernel Chat.",()=> SemanticKernelChat.Run()},
{"Semantic Kernel Memory.",()=> SemanticKernelMemory.Run()},
{"Coding Assistant.",()=> CodingAssistant.Run()},
{"Batch Decoding.",()=> BatchedDecoding.Run()},
{"SK Kernel Memory.",()=> KernelMemory.Run()},
{"Exit", ()=> Task.CompletedTask}
};
public static async Task Run()
{
Console.WriteLine("================LLamaSharp Examples (New Version)==================\n");

Console.WriteLine("Please input a number to choose an example to run:");
Console.WriteLine("0: Run a chat session without stripping the role names.");
Console.WriteLine("1: Run a chat session with the role names stripped.");
Console.WriteLine("2: Interactive mode chat by using executor.");
Console.WriteLine("3: Instruct mode chat by using executor.");
Console.WriteLine("4: Stateless mode chat by using executor.");
Console.WriteLine("5: Load and save chat session.");
Console.WriteLine("6: Load and save state of model and executor.");
Console.WriteLine("7: Get embeddings from LLama model.");
Console.WriteLine("8: Quantize the model.");
Console.WriteLine("9: Automatic conversation.");
Console.WriteLine("10: Constrain response to json format using grammar.");
Console.WriteLine("11: Semantic Kernel Prompt.");
Console.WriteLine("12: Semantic Kernel Chat.");
Console.WriteLine("13: Semantic Kernel Memory.");
Console.WriteLine("14: Coding Assistant.");
Console.WriteLine("15: Batch Decoding.");
Console.WriteLine("16: SK Kernel Memory.");
AnsiConsole.Write(new Rule("LLamaSharp Examples"));

while (true)
{
Console.Write("\nYour choice: ");
int choice = int.Parse(Console.ReadLine());
var choice = AnsiConsole.Prompt(
new SelectionPrompt<string>()
.Title("Please choose[green] an example[/] to run: ")
.AddChoices(Examples.Keys));

if (choice == 0)
{
await ChatSessionWithRoleName.Run();
}
else if (choice == 1)
{
await ChatSessionStripRoleName.Run();
}
else if (choice == 2)
{
await InteractiveModeExecute.Run();
}
else if (choice == 3)
{
await InstructModeExecute.Run();
}
else if (choice == 4)
{
await StatelessModeExecute.Run();
}
else if (choice == 5)
{
await SaveAndLoadSession.Run();
}
else if (choice == 6)
{
await LoadAndSaveState.Run();
}
else if (choice == 7)
{
GetEmbeddings.Run();
}
else if (choice == 8)
{
QuantizeModel.Run();
}
else if (choice == 9)
{
await TalkToYourself.Run();
}
else if (choice == 10)
{
await GrammarJsonResponse.Run();
}
else if (choice == 11)
{
await SemanticKernelPrompt.Run();
}
else if (choice == 12)
{
await SemanticKernelChat.Run();
}
else if (choice == 13)
{
await SemanticKernelMemory.Run();
}
else if (choice == 14)
{
await CodingAssistant.Run();
}
else if (choice == 15)
{
await BatchedDecoding.Run();
}
else if (choice == 16)
{
await KernelMemory.Run();
}
else

if (Examples.TryGetValue(choice, out var example))
{
Console.WriteLine("Cannot parse your choice. Please select again.");
continue;
if (choice == "Exit")
{
break;
}
AnsiConsole.Write(new Rule(choice));
await example();
}
break;

AnsiConsole.Clear();
}
}
}
Expand Down
41 changes: 39 additions & 2 deletions LLama.KernelMemory/BuilderExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using LLama;
using LLama.Common;
using Microsoft.KernelMemory.AI;

namespace LLamaSharp.KernelMemory
{
Expand All @@ -24,6 +27,18 @@ public static KernelMemoryBuilder WithLLamaSharpTextEmbeddingGeneration(this Ker
return builder;
}

/// <summary>
/// Adds LLamaSharpTextEmbeddingGeneration to the KernelMemoryBuilder.
/// </summary>
/// <param name="builder">The KernelMemoryBuilder instance.</param>
/// <param name="textEmbeddingGeneration">The LLamaSharpTextEmbeddingGeneration instance.</param>
/// <returns>The KernelMemoryBuilder instance with LLamaSharpTextEmbeddingGeneration added.</returns>
public static KernelMemoryBuilder WithLLamaSharpTextEmbeddingGeneration(this KernelMemoryBuilder builder, LLamaSharpTextEmbeddingGeneration textEmbeddingGeneration)
{
builder.WithCustomEmbeddingGeneration(textEmbeddingGeneration);
return builder;
}

/// <summary>
/// Adds LLamaSharpTextGeneration to the KernelMemoryBuilder.
/// </summary>
Expand All @@ -36,6 +51,18 @@ public static KernelMemoryBuilder WithLLamaSharpTextGeneration(this KernelMemory
return builder;
}

/// <summary>
/// Adds LLamaSharpTextGeneration to the KernelMemoryBuilder.
/// </summary>
/// <param name="builder">The KernelMemoryBuilder instance.</param>
/// <param name="textGeneration">The LlamaSharpTextGeneration instance.</param>
/// <returns>The KernelMemoryBuilder instance with LLamaSharpTextGeneration added.</returns>
public static KernelMemoryBuilder WithLLamaSharpTextGeneration(this KernelMemoryBuilder builder, LlamaSharpTextGeneration textGeneration)
{
builder.WithCustomTextGeneration(textGeneration);
return builder;
}

/// <summary>
/// Adds LLamaSharpTextEmbeddingGeneration and LLamaSharpTextGeneration to the KernelMemoryBuilder.
/// </summary>
Expand All @@ -44,8 +71,18 @@ public static KernelMemoryBuilder WithLLamaSharpTextGeneration(this KernelMemory
/// <returns>The KernelMemoryBuilder instance with LLamaSharpTextEmbeddingGeneration and LLamaSharpTextGeneration added.</returns>
public static KernelMemoryBuilder WithLLamaSharpDefaults(this KernelMemoryBuilder builder, LLamaSharpConfig config)
{
builder.WithLLamaSharpTextEmbeddingGeneration(config);
builder.WithLLamaSharpTextGeneration(config);
var parameters = new ModelParams(config.ModelPath)
{
ContextSize = config?.ContextSize ?? 2048,
Seed = config?.Seed ?? 0,
GpuLayerCount = config?.GpuLayerCount ?? 20
};
var weights = LLamaWeights.LoadFromFile(parameters);
var context = weights.CreateContext(parameters);
var executor = new StatelessExecutor(weights, parameters);
var embedder = new LLamaEmbedder(weights, parameters);
builder.WithLLamaSharpTextEmbeddingGeneration(new LLamaSharpTextEmbeddingGeneration(embedder));
builder.WithLLamaSharpTextGeneration(new LlamaSharpTextGeneration(weights, context, executor));
return builder;
}
}
Expand Down
Loading

0 comments on commit 14b5994

Please sign in to comment.